From 8e4c8722268467cde2a04950bba9043fc2e7e136 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sun, 19 Feb 2023 13:41:49 +1100 Subject: [PATCH 01/43] Do not try to set usetex=False or resave if savefig fails. Instead just return error msg. --- jcvi/graphics/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jcvi/graphics/base.py b/jcvi/graphics/base.py index d9b9fa2f..92a81fe3 100644 --- a/jcvi/graphics/base.py +++ b/jcvi/graphics/base.py @@ -316,11 +316,11 @@ def savefig(figname, dpi=150, iopts=None, cleanup=True): try: plt.savefig(figname, dpi=dpi, format=format) except Exception as e: - message = "savefig failed. Reset usetex to False." + message = "savefig failed with message:" message += "\n{0}".format(str(e)) - logging.info(message) - rc("text", usetex=False) - plt.savefig(figname, dpi=dpi) + logging.error(message) + logging.debug(f"Matplotlib backend is: {mpl.get_backend()}") + logging.debug(f"Attempted save as: {format}") msg = "Figure saved to `{0}`".format(figname) if iopts: From 80291758b21dcf4debc1eea12891e9bcb0870de2 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sun, 19 Feb 2023 19:31:53 +1100 Subject: [PATCH 02/43] Replace check_call with check_out in sh() Allows collection of outputs. Added redirect_error arg so stderr can be redirected to stdout and user in error logging. --- jcvi/apps/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/jcvi/apps/base.py b/jcvi/apps/base.py index 6689676a..1a377581 100644 --- a/jcvi/apps/base.py +++ b/jcvi/apps/base.py @@ -22,7 +22,7 @@ ParsingError, ) from socket import gethostname -from subprocess import PIPE, call, check_call +from subprocess import PIPE, call, check_output from optparse import OptionParser as OptionP, OptionGroup, SUPPRESS_HELP from typing import Any, Collection, List, Optional, Union @@ -1140,6 +1140,7 @@ def sh( silent=False, shell="/bin/bash", check=False, + redirect_error=None, ): """ simple wrapper for system calls @@ -1184,8 +1185,8 @@ def sh( if log: logging.debug(cmd) - call_func = check_call if check else call - return call_func(cmd, shell=True, executable=shell) + call_func = check_output if check else call + return call_func(cmd, shell=True, executable=shell, stderr=redirect_error) def Popen(cmd, stdin=None, stdout=PIPE, debug=False, shell="/bin/bash"): From a5225ec35d146123e7682ac6b55229957fae8b9f Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sun, 19 Feb 2023 19:33:35 +1100 Subject: [PATCH 03/43] Collect stderr msgs from lastal for use in informative logging. --- jcvi/apps/align.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/jcvi/apps/align.py b/jcvi/apps/align.py index ebf34623..7b6a6696 100644 --- a/jcvi/apps/align.py +++ b/jcvi/apps/align.py @@ -10,7 +10,7 @@ import shutil import logging -from subprocess import CalledProcessError +from subprocess import CalledProcessError, STDOUT from jcvi.utils.cbook import depends from jcvi.apps.base import ( @@ -558,12 +558,28 @@ def last(args, dbtype=None): lastfile = get_outfile(subject, query, suffix="last", outdir=opts.outdir) # Make several attempts to run LASTAL try: - sh(cmd + f" -P {cpus} {subjectdb} {query}", outfile=lastfile, check=True) - except CalledProcessError: # multi-threading disabled - logging.error("Failed to run `lastal` with multi-threading. Trying again.") + sh( + cmd + f" -P {cpus} {subjectdb} {query}", + outfile=lastfile, + check=True, + redirect_error=STDOUT, + ) + except CalledProcessError as e: # multi-threading disabled + message = "lastal failed with message:" + message += "\n{0}".format(e.output.decode()) + logging.error(message) try: - sh(cmd + f" -P 1 {subjectdb} {query}", outfile=lastfile, check=True) - except CalledProcessError: + logging.debug("Failed to run `lastal` with multi-threading. Trying again.") + sh( + cmd + f" -P 1 {subjectdb} {query}", + outfile=lastfile, + check=True, + redirect_error=STDOUT, + ) + except CalledProcessError as e: + message = "lastal failed with message:" + message += "\n{0}".format(e.output.decode()) + logging.error(message) logging.fatal("Failed to run `lastal`. Aborted.") cleanup(lastfile) sys.exit(1) From 3f7ad6d398d30e8178f878a5d70db9a7dff7a6c4 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sun, 19 Feb 2023 19:57:49 +1100 Subject: [PATCH 04/43] Log suggestion to use --notex if savefig fails --- jcvi/graphics/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jcvi/graphics/base.py b/jcvi/graphics/base.py index 92a81fe3..d1c407db 100644 --- a/jcvi/graphics/base.py +++ b/jcvi/graphics/base.py @@ -319,6 +319,7 @@ def savefig(figname, dpi=150, iopts=None, cleanup=True): message = "savefig failed with message:" message += "\n{0}".format(str(e)) logging.error(message) + logging.info("Try running again with --notex option to disable latex.") logging.debug(f"Matplotlib backend is: {mpl.get_backend()}") logging.debug(f"Attempted save as: {format}") From 00ff7939dabb862c74bcd844b7965f9a6e3a6521 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Mon, 20 Feb 2023 13:49:14 +1100 Subject: [PATCH 05/43] Add cleanup empty pdf before exit if savefig fails --- jcvi/graphics/base.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/jcvi/graphics/base.py b/jcvi/graphics/base.py index d1c407db..87fc80a9 100644 --- a/jcvi/graphics/base.py +++ b/jcvi/graphics/base.py @@ -3,6 +3,7 @@ import copy import os.path as op +from os import remove import sys import logging @@ -314,14 +315,19 @@ def savefig(figname, dpi=150, iopts=None, cleanup=True): except: format = "pdf" try: + logging.debug(f"Matplotlib backend is: {mpl.get_backend()}") + logging.debug(f"Attempting save as: {figname}") plt.savefig(figname, dpi=dpi, format=format) except Exception as e: message = "savefig failed with message:" message += "\n{0}".format(str(e)) logging.error(message) logging.info("Try running again with --notex option to disable latex.") - logging.debug(f"Matplotlib backend is: {mpl.get_backend()}") - logging.debug(f"Attempted save as: {format}") + if op.exists(figname): + if op.getsize(figname) < 1000: + logging.debug(f"Cleaning up empty file: {figname}") + remove(figname) + sys.exit(1) msg = "Figure saved to `{0}`".format(figname) if iopts: From e378e6fec90d871cc05bf5c931f57f1b5689659a Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Tue, 21 Feb 2023 12:09:10 +1100 Subject: [PATCH 06/43] Add logging to clarify why latex is deactivated. --- jcvi/apps/base.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/jcvi/apps/base.py b/jcvi/apps/base.py index 1a377581..0b900e83 100644 --- a/jcvi/apps/base.py +++ b/jcvi/apps/base.py @@ -588,6 +588,15 @@ def set_image_options( assert "x" in opts.figsize iopts = ImageOptions(opts) + + if opts.notex: + logging.info("--notex={}. latex use is disabled.".format(opts.notex)) + elif not is_tex_available(): + if not bool(which("latex")): + logging.info("`latex` not found. latex use is disabled.") + if not bool(which("lp")): + logging.info("`lp` not found. latex use is disabled.") + setup_theme(style=opts.style, font=opts.font, usetex=iopts.usetex) return opts, args, iopts From afb75efe005fee74860c8ee30ba27b27f1b09748 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Thu, 14 Nov 2024 23:41:31 +1100 Subject: [PATCH 07/43] ignore version and cython files --- .gitignore | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c635fa55..9d8bcab7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,18 @@ -# Created by http://www.gitignore.io +# Mac stuff +.DS_Store + +# Versioning +jcvi/version.py + +# Ignore Cython generated C files +jcvi/assembly/chic.c +jcvi/formats/cblast.c ### Python ### # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] +*$py.class # C extensions *.so @@ -14,12 +23,15 @@ env/ build/ develop-eggs/ dist/ +downloads/ eggs/ +.eggs/ lib/ lib64/ parts/ sdist/ var/ +wheels/ *.egg-info/ .installed.cfg *.egg @@ -32,9 +44,12 @@ pip-delete-this-directory.txt htmlcov/ .tox/ .coverage +.coverage.* .cache nosetests.xml coverage.xml +*.cover +.hypothesis/ # Translations *.mo @@ -47,4 +62,4 @@ coverage.xml docs/_build/ # gffutils temp DB file -*.db +*.db \ No newline at end of file From 4029d679f0302ad77df570baa42b3355f0cfadc3 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Thu, 14 Nov 2024 23:41:49 +1100 Subject: [PATCH 08/43] mv exclude to pyproject --- MANIFEST.in | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 708975ee..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,4 +0,0 @@ -recursive-exclude .github * -recursive-exclude docker * -recursive-exclude tests * -exclude .* \ No newline at end of file From 6a5359e25123619189d9a80e96ad13aef50c7502 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Thu, 14 Nov 2024 23:42:36 +1100 Subject: [PATCH 09/43] use hatch for build --- pyproject.toml | 139 ++++++++++++++++++++++++++++++++++++++++++++----- setup.cfg | 64 ----------------------- setup.py | 26 --------- 3 files changed, 126 insertions(+), 103 deletions(-) delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml index 3adcf6e8..2ee5ec64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,15 +1,128 @@ +# Specifies the build system requirements and backend [build-system] requires = [ - "Cython", - "numpy", - "setuptools", - "setuptools_scm[toml]", - "setuptools_scm_git_archive", - "wheel", -] -build-backend = "setuptools.build_meta" - -[tool.setuptools_scm] -write_to = "jcvi/version.py" -git_describe_command = "git describe --dirty --tags --long --match v* --first-parent" -version_scheme = "no-guess-dev" \ No newline at end of file + "hatchling", # Build backend + "hatch-vcs", # Version control system plugin for dynamic versioning + "Cython", # Cython for compiling C extensions + "numpy", # NumPy for numerical operations and C extension includes +] + +build-backend = "hatchling.build" + +# Project metadata and configuration +[project] +name = "jcvi" +description = "Python utility libraries on genome assembly, annotation and comparative genomics" +readme = "README.md" +requires-python = ">=3.6" +license = {text = "BSD"} +authors = [ + {name = "Haibao Tang", email = "tanghaibao@gmail.com"}, + {name = "Vivek Krishnakumar"}, + {name = "Jingping Li"} +] + +classifiers = [ + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Bio-Informatics" +] + +dependencies = [ + "CrossMap", + "Wand", + "biopython", + "boto3", + "brewer2mpl", + "deap", + "ete3", + "ftpretty", + "gffutils", + "goatools", + "genomepy", + "graphviz", + "jinja2", + "matplotlib", + "more-itertools", + "natsort", + "networkx", + "numpy<2", + "ortools", + "pybedtools", + "pyefd", + "pypdf", + "pytesseract", + "rich", + "scikit-image", + "scipy", + "seaborn", + "webcolors" +] + +# Indicates that the version is dynamically determined +dynamic = ["version"] + +# Optional dependencies for testing +[project.optional-dependencies] +test = [ + "PyYAML", + "pytest", + "pytest-cov", + "pytest-benchmark", + "mock" +] + +# Project URLs +[project.urls] +homepage = "http://github.com/tanghaibao/jcvi" + +# Hatch metadata configuration +[tool.hatch.metadata] +allow-direct-references = true + +# Hatch build configuration +[tool.hatch.build] +include = [ + "jcvi/**", # Include all files in the jcvi directory + "README.md", +] +exclude = [ + ".github/*", # Exclude GitHub workflows and configurations + "docker/*", # Exclude Docker-related files + "tests/*", # Exclude test files + ".*" # Exclude hidden files +] + +# Hatch version configuration using VCS +[tool.hatch.version] +source = "vcs" # Use version control system for versioning + +# Version file location for VCS +[tool.hatch.build.hooks.vcs] +version-file = "jcvi/version.py" # Path to write the version information + +# Version control system (VCS) versioning +[tool.hatch.version.vcs] +tag-pattern = "v*" # Git tags starting with 'v' will be used for versioning +fallback-version = "0.0.0" + +# Hatch build targets for source distribution +[tool.hatch.build.targets.sdist] +include = [ + "jcvi/**", + "README.md", +] + +# Hatch build targets for wheel distribution +[tool.hatch.build.targets.wheel] +include = [ + "jcvi/**", + "README.md", +] + +# Cython extensions to be built +[tool.hatch.build.targets.wheel.ext_modules] +jcvi.assembly.chic = {sources = ["jcvi/assembly/chic.pyx"], include-dirs = ["{numpy.get_include()}"], extra-compile-args = ["-O3"]} +jcvi.formats.cblast = {sources = ["jcvi/formats/cblast.pyx"], extra-compile-args = ["-O3"]} \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 547730ae..00000000 --- a/setup.cfg +++ /dev/null @@ -1,64 +0,0 @@ -[metadata] -name = jcvi -author = Haibao Tang, Vivek Krishnakumar, Jingping Li -author_email = tanghaibao@gmail.com -license = BSD -description = Python utility libraries on genome assembly, annotation and comparative genomics -url = http://github.com/tanghaibao/jcvi -long_description = file: README.md -long_description_content_type = text/markdown -classifiers = - Development Status :: 4 - Beta - Intended Audience :: Science/Research - License :: OSI Approved :: BSD License - Programming Language :: Python - Programming Language :: Python :: 3 - Topic :: Scientific/Engineering :: Bio-Informatics - -[options] -packages = - jcvi - jcvi.formats - jcvi.variation - jcvi.compara - jcvi.assembly - jcvi.projects - jcvi.algorithms - jcvi.annotation - jcvi.utils - jcvi.graphics - jcvi.apps - jcvi.utils.data -zip_safe = False -setup_requires = setuptools>=18.0; cython -install_requires = - CrossMap - Wand - biopython - boto3 - brewer2mpl - deap - ete3 - ftpretty - gffutils - goatools - genomepy - graphviz - jinja2 - matplotlib - more-itertools - natsort - networkx - numpy<2 - ortools - pybedtools - pyefd - pypdf - pytesseract - rich - scikit-image - scipy - seaborn - webcolors -include_package_data = True -tests_require = PyYAML; pytest; pytest-cov; pytest-benchmark; mock diff --git a/setup.py b/setup.py deleted file mode 100644 index 09fe097b..00000000 --- a/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python - -"""Package setup""" - -from Cython.Build import build_ext -from setuptools import setup, Extension - -import numpy as np - -ext_modules = [ - Extension( - "jcvi.assembly.chic", - ["jcvi/assembly/chic.pyx"], - include_dirs=[np.get_include()], - extra_compile_args=["-O3"], - ), - Extension( - "jcvi.formats.cblast", ["jcvi/formats/cblast.pyx"], extra_compile_args=["-O3"] - ), -] - -if __name__ == "__main__": - setup( - cmdclass={"build_ext": build_ext}, - ext_modules=ext_modules, - ) From eff4eb69ea62076c8d9c2e63c922c8b1fc7b0746 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 15 Nov 2024 16:08:44 +1100 Subject: [PATCH 10/43] use hatch-vcs version --- jcvi/__init__.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/jcvi/__init__.py b/jcvi/__init__.py index 89bf10ae..ad657eb5 100644 --- a/jcvi/__init__.py +++ b/jcvi/__init__.py @@ -1,6 +1,4 @@ from datetime import datetime -from importlib.metadata import version, PackageNotFoundError - __author__ = ( "Haibao Tang", @@ -13,16 +11,11 @@ __license__ = "BSD" __status__ = "Development" - try: - VERSION = version(__name__) -except PackageNotFoundError: # pragma: no cover - try: - from .version import version as VERSION # noqa - except ImportError as exc: # pragma: no cover - raise ImportError( - "Failed to find (autogenerated) version.py. " - "This might be because you are installing from GitHub's tarballs, " - "use the PyPI ones." - ) from exc -__version__ = VERSION + from .version import __version__ # noqa +except ImportError as exc: # pragma: no cover + raise ImportError( + "Failed to find (autogenerated) version.py. " + "This might be because you are installing from GitHub's tarballs, " + "use the PyPI ones." + ) from exc From b77c3093cb762b631109af0f99c5b13bbf6281bf Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 15 Nov 2024 16:09:22 +1100 Subject: [PATCH 11/43] log error when no cblast --- jcvi/formats/blast.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jcvi/formats/blast.py b/jcvi/formats/blast.py index 75de55f6..54b2ede0 100644 --- a/jcvi/formats/blast.py +++ b/jcvi/formats/blast.py @@ -23,10 +23,10 @@ try: from .cblast import BlastLine -except: +except ImportError as e: + logger.error(f"Failed to import cblast: {e}") from .pyblast import BlastLine - - logger.error("Fall back to Python implementation of BlastLine") + logger.warning("Fall back to Python implementation of BlastLine") class BlastSlow(LineFile): From 39b70d73a8a1879d308b8969e00268ed3b7d12b4 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 15 Nov 2024 16:11:04 +1100 Subject: [PATCH 12/43] mv pkg into src --- {jcvi => src/jcvi}/__init__.py | 0 {jcvi => src/jcvi}/algorithms/__init__.py | 0 {jcvi => src/jcvi}/algorithms/__main__.py | 0 {jcvi => src/jcvi}/algorithms/ec.py | 0 {jcvi => src/jcvi}/algorithms/formula.py | 0 {jcvi => src/jcvi}/algorithms/graph.py | 0 {jcvi => src/jcvi}/algorithms/lis.py | 0 {jcvi => src/jcvi}/algorithms/lpsolve.py | 0 {jcvi => src/jcvi}/algorithms/matrix.py | 0 {jcvi => src/jcvi}/algorithms/maxsum.py | 0 {jcvi => src/jcvi}/algorithms/supermap.py | 0 {jcvi => src/jcvi}/algorithms/tsp.py | 0 {jcvi => src/jcvi}/annotation/__init__.py | 0 {jcvi => src/jcvi}/annotation/__main__.py | 0 {jcvi => src/jcvi}/annotation/ahrd.py | 0 {jcvi => src/jcvi}/annotation/automaton.py | 0 {jcvi => src/jcvi}/annotation/depth.py | 0 {jcvi => src/jcvi}/annotation/evm.py | 0 {jcvi => src/jcvi}/annotation/maker.py | 0 {jcvi => src/jcvi}/annotation/pasa.py | 0 {jcvi => src/jcvi}/annotation/qc.py | 0 {jcvi => src/jcvi}/annotation/reformat.py | 0 {jcvi => src/jcvi}/annotation/stats.py | 0 {jcvi => src/jcvi}/annotation/train.py | 0 {jcvi => src/jcvi}/annotation/trinity.py | 0 {jcvi => src/jcvi}/apps/__init__.py | 0 {jcvi => src/jcvi}/apps/__main__.py | 0 {jcvi => src/jcvi}/apps/align.py | 0 {jcvi => src/jcvi}/apps/base.py | 0 {jcvi => src/jcvi}/apps/biomart.py | 0 {jcvi => src/jcvi}/apps/blastplus.py | 0 {jcvi => src/jcvi}/apps/bowtie.py | 0 {jcvi => src/jcvi}/apps/bwa.py | 0 {jcvi => src/jcvi}/apps/cdhit.py | 0 {jcvi => src/jcvi}/apps/emboss.py | 0 {jcvi => src/jcvi}/apps/fetch.py | 0 {jcvi => src/jcvi}/apps/gbsubmit.py | 0 {jcvi => src/jcvi}/apps/gmap.py | 0 {jcvi => src/jcvi}/apps/grid.py | 0 {jcvi => src/jcvi}/apps/lastz.py | 0 {jcvi => src/jcvi}/apps/mask.py | 0 {jcvi => src/jcvi}/apps/phylo.py | 0 {jcvi => src/jcvi}/apps/r.py | 0 {jcvi => src/jcvi}/apps/restriction.py | 0 {jcvi => src/jcvi}/apps/softlink.py | 0 {jcvi => src/jcvi}/apps/uclust.py | 0 {jcvi => src/jcvi}/apps/uniprot.py | 0 {jcvi => src/jcvi}/apps/vecscreen.py | 0 {jcvi => src/jcvi}/assembly/__init__.py | 0 {jcvi => src/jcvi}/assembly/__main__.py | 0 {jcvi => src/jcvi}/assembly/allmaps.py | 0 {jcvi => src/jcvi}/assembly/allpaths.py | 0 {jcvi => src/jcvi}/assembly/automaton.py | 0 {jcvi => src/jcvi}/assembly/base.py | 0 {jcvi => src/jcvi}/assembly/chic.pyx | 0 {jcvi => src/jcvi}/assembly/coverage.py | 0 {jcvi => src/jcvi}/assembly/gaps.py | 0 {jcvi => src/jcvi}/assembly/geneticmap.py | 0 {jcvi => src/jcvi}/assembly/goldenpath.py | 0 {jcvi => src/jcvi}/assembly/hic.py | 0 {jcvi => src/jcvi}/assembly/kmer.py | 0 {jcvi => src/jcvi}/assembly/opticalmap.py | 0 {jcvi => src/jcvi}/assembly/patch.py | 0 {jcvi => src/jcvi}/assembly/postprocess.py | 0 {jcvi => src/jcvi}/assembly/preprocess.py | 0 {jcvi => src/jcvi}/assembly/sim.py | 0 {jcvi => src/jcvi}/assembly/soap.py | 0 {jcvi => src/jcvi}/assembly/syntenypath.py | 0 {jcvi => src/jcvi}/compara/__init__.py | 0 {jcvi => src/jcvi}/compara/__main__.py | 0 {jcvi => src/jcvi}/compara/base.py | 0 {jcvi => src/jcvi}/compara/blastfilter.py | 0 {jcvi => src/jcvi}/compara/catalog.py | 0 {jcvi => src/jcvi}/compara/fractionation.py | 0 {jcvi => src/jcvi}/compara/ks.py | 0 {jcvi => src/jcvi}/compara/pad.py | 0 {jcvi => src/jcvi}/compara/pedigree.py | 0 {jcvi => src/jcvi}/compara/phylogeny.py | 0 {jcvi => src/jcvi}/compara/quota.py | 0 {jcvi => src/jcvi}/compara/reconstruct.py | 0 {jcvi => src/jcvi}/compara/synfind.py | 0 {jcvi => src/jcvi}/compara/synteny.py | 0 {jcvi => src/jcvi}/formats/__init__.py | 0 {jcvi => src/jcvi}/formats/__main__.py | 0 {jcvi => src/jcvi}/formats/agp.py | 0 {jcvi => src/jcvi}/formats/base.py | 0 {jcvi => src/jcvi}/formats/bed.py | 0 {jcvi => src/jcvi}/formats/blast.py | 0 src/jcvi/formats/cblast.c | 16859 ++++++++++++++++ {jcvi => src/jcvi}/formats/cblast.pyx | 0 {jcvi => src/jcvi}/formats/cdt.py | 0 {jcvi => src/jcvi}/formats/chain.py | 0 {jcvi => src/jcvi}/formats/contig.py | 0 {jcvi => src/jcvi}/formats/coords.py | 0 {jcvi => src/jcvi}/formats/excel.py | 0 {jcvi => src/jcvi}/formats/fasta.py | 0 {jcvi => src/jcvi}/formats/fastq.py | 0 {jcvi => src/jcvi}/formats/genbank.py | 0 {jcvi => src/jcvi}/formats/gff.py | 0 {jcvi => src/jcvi}/formats/html.py | 0 {jcvi => src/jcvi}/formats/maf.py | 0 {jcvi => src/jcvi}/formats/obo.py | 0 {jcvi => src/jcvi}/formats/paf.py | 0 {jcvi => src/jcvi}/formats/pdf.py | 0 {jcvi => src/jcvi}/formats/psl.py | 0 {jcvi => src/jcvi}/formats/pyblast.py | 0 {jcvi => src/jcvi}/formats/sam.py | 0 {jcvi => src/jcvi}/formats/sizes.py | 0 {jcvi => src/jcvi}/formats/vcf.py | 0 {jcvi => src/jcvi}/graphics/__init__.py | 0 {jcvi => src/jcvi}/graphics/__main__.py | 0 {jcvi => src/jcvi}/graphics/align.py | 0 {jcvi => src/jcvi}/graphics/assembly.py | 0 {jcvi => src/jcvi}/graphics/base.py | 0 {jcvi => src/jcvi}/graphics/blastplot.py | 0 {jcvi => src/jcvi}/graphics/chromosome.py | 0 {jcvi => src/jcvi}/graphics/coverage.py | 0 {jcvi => src/jcvi}/graphics/dotplot.py | 0 {jcvi => src/jcvi}/graphics/glyph.py | 0 {jcvi => src/jcvi}/graphics/grabseeds.py | 0 {jcvi => src/jcvi}/graphics/heatmap.py | 0 {jcvi => src/jcvi}/graphics/histogram.py | 0 {jcvi => src/jcvi}/graphics/karyotype.py | 0 {jcvi => src/jcvi}/graphics/landscape.py | 0 {jcvi => src/jcvi}/graphics/mummerplot.py | 0 {jcvi => src/jcvi}/graphics/synteny.py | 0 {jcvi => src/jcvi}/graphics/table.py | 0 {jcvi => src/jcvi}/graphics/tree.py | 0 {jcvi => src/jcvi}/graphics/wheel.py | 0 {jcvi => src/jcvi}/projects/__init__.py | 0 {jcvi => src/jcvi}/projects/__main__.py | 0 {jcvi => src/jcvi}/projects/age.py | 0 {jcvi => src/jcvi}/projects/allmaps.py | 0 {jcvi => src/jcvi}/projects/bites.py | 0 {jcvi => src/jcvi}/projects/ies.py | 0 {jcvi => src/jcvi}/projects/jcvi.py | 0 {jcvi => src/jcvi}/projects/misc.py | 0 {jcvi => src/jcvi}/projects/napus.py | 0 {jcvi => src/jcvi}/projects/pineapple.py | 0 {jcvi => src/jcvi}/projects/str.py | 0 {jcvi => src/jcvi}/projects/sugarcane.py | 0 {jcvi => src/jcvi}/projects/synfind.py | 0 {jcvi => src/jcvi}/projects/tgbs.py | 0 {jcvi => src/jcvi}/projects/vanilla.py | 0 {jcvi => src/jcvi}/utils/__init__.py | 0 {jcvi => src/jcvi}/utils/__main__.py | 0 {jcvi => src/jcvi}/utils/aws.py | 0 {jcvi => src/jcvi}/utils/cbook.py | 0 {jcvi => src/jcvi}/utils/console.py | 0 {jcvi => src/jcvi}/utils/data/Airswing.ttf | Bin {jcvi => src/jcvi}/utils/data/Collegia.ttf | Bin {jcvi => src/jcvi}/utils/data/HookedUp.ttf | Bin {jcvi => src/jcvi}/utils/data/Humor-Sans.ttf | Bin {jcvi => src/jcvi}/utils/data/TREDs.meta.csv | 0 {jcvi => src/jcvi}/utils/data/__init__.py | 0 {jcvi => src/jcvi}/utils/data/adapters.fasta | 0 {jcvi => src/jcvi}/utils/data/blosum80.mat | 0 .../jcvi}/utils/data/chrY.hg38.unique_ccn.gc | 0 .../jcvi}/utils/data/colorchecker.txt | 0 {jcvi => src/jcvi}/utils/data/hg38.band.txt | 0 .../jcvi}/utils/data/hg38.chrom.sizes | 0 {jcvi => src/jcvi}/utils/data/instance.json | 0 {jcvi => src/jcvi}/utils/db.py | 0 {jcvi => src/jcvi}/utils/ez_setup.py | 0 {jcvi => src/jcvi}/utils/grouper.py | 0 .../jcvi}/utils/orderedcollections.py | 0 {jcvi => src/jcvi}/utils/range.py | 0 {jcvi => src/jcvi}/utils/table.py | 0 {jcvi => src/jcvi}/utils/taxonomy.py | 0 {jcvi => src/jcvi}/utils/validator.py | 0 {jcvi => src/jcvi}/utils/webcolors.py | 0 {jcvi => src/jcvi}/variation/__init__.py | 0 {jcvi => src/jcvi}/variation/__main__.py | 0 {jcvi => src/jcvi}/variation/cnv.py | 0 {jcvi => src/jcvi}/variation/deconvolute.py | 0 {jcvi => src/jcvi}/variation/delly.py | 0 {jcvi => src/jcvi}/variation/impute.py | 0 {jcvi => src/jcvi}/variation/phase.py | 0 {jcvi => src/jcvi}/variation/snp.py | 0 {jcvi => src/jcvi}/variation/str.py | 0 src/jcvi/version.py | 16 + 181 files changed, 16875 insertions(+) rename {jcvi => src/jcvi}/__init__.py (100%) rename {jcvi => src/jcvi}/algorithms/__init__.py (100%) rename {jcvi => src/jcvi}/algorithms/__main__.py (100%) rename {jcvi => src/jcvi}/algorithms/ec.py (100%) rename {jcvi => src/jcvi}/algorithms/formula.py (100%) rename {jcvi => src/jcvi}/algorithms/graph.py (100%) rename {jcvi => src/jcvi}/algorithms/lis.py (100%) rename {jcvi => src/jcvi}/algorithms/lpsolve.py (100%) rename {jcvi => src/jcvi}/algorithms/matrix.py (100%) rename {jcvi => src/jcvi}/algorithms/maxsum.py (100%) rename {jcvi => src/jcvi}/algorithms/supermap.py (100%) rename {jcvi => src/jcvi}/algorithms/tsp.py (100%) rename {jcvi => src/jcvi}/annotation/__init__.py (100%) rename {jcvi => src/jcvi}/annotation/__main__.py (100%) rename {jcvi => src/jcvi}/annotation/ahrd.py (100%) rename {jcvi => src/jcvi}/annotation/automaton.py (100%) rename {jcvi => src/jcvi}/annotation/depth.py (100%) rename {jcvi => src/jcvi}/annotation/evm.py (100%) rename {jcvi => src/jcvi}/annotation/maker.py (100%) rename {jcvi => src/jcvi}/annotation/pasa.py (100%) rename {jcvi => src/jcvi}/annotation/qc.py (100%) rename {jcvi => src/jcvi}/annotation/reformat.py (100%) rename {jcvi => src/jcvi}/annotation/stats.py (100%) rename {jcvi => src/jcvi}/annotation/train.py (100%) rename {jcvi => src/jcvi}/annotation/trinity.py (100%) rename {jcvi => src/jcvi}/apps/__init__.py (100%) rename {jcvi => src/jcvi}/apps/__main__.py (100%) rename {jcvi => src/jcvi}/apps/align.py (100%) rename {jcvi => src/jcvi}/apps/base.py (100%) rename {jcvi => src/jcvi}/apps/biomart.py (100%) rename {jcvi => src/jcvi}/apps/blastplus.py (100%) rename {jcvi => src/jcvi}/apps/bowtie.py (100%) rename {jcvi => src/jcvi}/apps/bwa.py (100%) rename {jcvi => src/jcvi}/apps/cdhit.py (100%) rename {jcvi => src/jcvi}/apps/emboss.py (100%) rename {jcvi => src/jcvi}/apps/fetch.py (100%) rename {jcvi => src/jcvi}/apps/gbsubmit.py (100%) rename {jcvi => src/jcvi}/apps/gmap.py (100%) rename {jcvi => src/jcvi}/apps/grid.py (100%) rename {jcvi => src/jcvi}/apps/lastz.py (100%) rename {jcvi => src/jcvi}/apps/mask.py (100%) rename {jcvi => src/jcvi}/apps/phylo.py (100%) rename {jcvi => src/jcvi}/apps/r.py (100%) rename {jcvi => src/jcvi}/apps/restriction.py (100%) rename {jcvi => src/jcvi}/apps/softlink.py (100%) rename {jcvi => src/jcvi}/apps/uclust.py (100%) rename {jcvi => src/jcvi}/apps/uniprot.py (100%) rename {jcvi => src/jcvi}/apps/vecscreen.py (100%) rename {jcvi => src/jcvi}/assembly/__init__.py (100%) rename {jcvi => src/jcvi}/assembly/__main__.py (100%) rename {jcvi => src/jcvi}/assembly/allmaps.py (100%) rename {jcvi => src/jcvi}/assembly/allpaths.py (100%) rename {jcvi => src/jcvi}/assembly/automaton.py (100%) rename {jcvi => src/jcvi}/assembly/base.py (100%) rename {jcvi => src/jcvi}/assembly/chic.pyx (100%) rename {jcvi => src/jcvi}/assembly/coverage.py (100%) rename {jcvi => src/jcvi}/assembly/gaps.py (100%) rename {jcvi => src/jcvi}/assembly/geneticmap.py (100%) rename {jcvi => src/jcvi}/assembly/goldenpath.py (100%) rename {jcvi => src/jcvi}/assembly/hic.py (100%) rename {jcvi => src/jcvi}/assembly/kmer.py (100%) rename {jcvi => src/jcvi}/assembly/opticalmap.py (100%) rename {jcvi => src/jcvi}/assembly/patch.py (100%) rename {jcvi => src/jcvi}/assembly/postprocess.py (100%) rename {jcvi => src/jcvi}/assembly/preprocess.py (100%) rename {jcvi => src/jcvi}/assembly/sim.py (100%) rename {jcvi => src/jcvi}/assembly/soap.py (100%) rename {jcvi => src/jcvi}/assembly/syntenypath.py (100%) rename {jcvi => src/jcvi}/compara/__init__.py (100%) rename {jcvi => src/jcvi}/compara/__main__.py (100%) rename {jcvi => src/jcvi}/compara/base.py (100%) rename {jcvi => src/jcvi}/compara/blastfilter.py (100%) rename {jcvi => src/jcvi}/compara/catalog.py (100%) rename {jcvi => src/jcvi}/compara/fractionation.py (100%) rename {jcvi => src/jcvi}/compara/ks.py (100%) rename {jcvi => src/jcvi}/compara/pad.py (100%) rename {jcvi => src/jcvi}/compara/pedigree.py (100%) rename {jcvi => src/jcvi}/compara/phylogeny.py (100%) rename {jcvi => src/jcvi}/compara/quota.py (100%) rename {jcvi => src/jcvi}/compara/reconstruct.py (100%) rename {jcvi => src/jcvi}/compara/synfind.py (100%) rename {jcvi => src/jcvi}/compara/synteny.py (100%) rename {jcvi => src/jcvi}/formats/__init__.py (100%) rename {jcvi => src/jcvi}/formats/__main__.py (100%) rename {jcvi => src/jcvi}/formats/agp.py (100%) rename {jcvi => src/jcvi}/formats/base.py (100%) rename {jcvi => src/jcvi}/formats/bed.py (100%) rename {jcvi => src/jcvi}/formats/blast.py (100%) create mode 100644 src/jcvi/formats/cblast.c rename {jcvi => src/jcvi}/formats/cblast.pyx (100%) rename {jcvi => src/jcvi}/formats/cdt.py (100%) rename {jcvi => src/jcvi}/formats/chain.py (100%) rename {jcvi => src/jcvi}/formats/contig.py (100%) rename {jcvi => src/jcvi}/formats/coords.py (100%) rename {jcvi => src/jcvi}/formats/excel.py (100%) rename {jcvi => src/jcvi}/formats/fasta.py (100%) rename {jcvi => src/jcvi}/formats/fastq.py (100%) rename {jcvi => src/jcvi}/formats/genbank.py (100%) rename {jcvi => src/jcvi}/formats/gff.py (100%) rename {jcvi => src/jcvi}/formats/html.py (100%) rename {jcvi => src/jcvi}/formats/maf.py (100%) rename {jcvi => src/jcvi}/formats/obo.py (100%) rename {jcvi => src/jcvi}/formats/paf.py (100%) rename {jcvi => src/jcvi}/formats/pdf.py (100%) rename {jcvi => src/jcvi}/formats/psl.py (100%) rename {jcvi => src/jcvi}/formats/pyblast.py (100%) rename {jcvi => src/jcvi}/formats/sam.py (100%) rename {jcvi => src/jcvi}/formats/sizes.py (100%) rename {jcvi => src/jcvi}/formats/vcf.py (100%) rename {jcvi => src/jcvi}/graphics/__init__.py (100%) rename {jcvi => src/jcvi}/graphics/__main__.py (100%) rename {jcvi => src/jcvi}/graphics/align.py (100%) rename {jcvi => src/jcvi}/graphics/assembly.py (100%) rename {jcvi => src/jcvi}/graphics/base.py (100%) rename {jcvi => src/jcvi}/graphics/blastplot.py (100%) rename {jcvi => src/jcvi}/graphics/chromosome.py (100%) rename {jcvi => src/jcvi}/graphics/coverage.py (100%) rename {jcvi => src/jcvi}/graphics/dotplot.py (100%) rename {jcvi => src/jcvi}/graphics/glyph.py (100%) rename {jcvi => src/jcvi}/graphics/grabseeds.py (100%) rename {jcvi => src/jcvi}/graphics/heatmap.py (100%) rename {jcvi => src/jcvi}/graphics/histogram.py (100%) rename {jcvi => src/jcvi}/graphics/karyotype.py (100%) rename {jcvi => src/jcvi}/graphics/landscape.py (100%) rename {jcvi => src/jcvi}/graphics/mummerplot.py (100%) rename {jcvi => src/jcvi}/graphics/synteny.py (100%) rename {jcvi => src/jcvi}/graphics/table.py (100%) rename {jcvi => src/jcvi}/graphics/tree.py (100%) rename {jcvi => src/jcvi}/graphics/wheel.py (100%) rename {jcvi => src/jcvi}/projects/__init__.py (100%) rename {jcvi => src/jcvi}/projects/__main__.py (100%) rename {jcvi => src/jcvi}/projects/age.py (100%) rename {jcvi => src/jcvi}/projects/allmaps.py (100%) rename {jcvi => src/jcvi}/projects/bites.py (100%) rename {jcvi => src/jcvi}/projects/ies.py (100%) rename {jcvi => src/jcvi}/projects/jcvi.py (100%) rename {jcvi => src/jcvi}/projects/misc.py (100%) rename {jcvi => src/jcvi}/projects/napus.py (100%) rename {jcvi => src/jcvi}/projects/pineapple.py (100%) rename {jcvi => src/jcvi}/projects/str.py (100%) rename {jcvi => src/jcvi}/projects/sugarcane.py (100%) rename {jcvi => src/jcvi}/projects/synfind.py (100%) rename {jcvi => src/jcvi}/projects/tgbs.py (100%) rename {jcvi => src/jcvi}/projects/vanilla.py (100%) rename {jcvi => src/jcvi}/utils/__init__.py (100%) rename {jcvi => src/jcvi}/utils/__main__.py (100%) rename {jcvi => src/jcvi}/utils/aws.py (100%) rename {jcvi => src/jcvi}/utils/cbook.py (100%) rename {jcvi => src/jcvi}/utils/console.py (100%) rename {jcvi => src/jcvi}/utils/data/Airswing.ttf (100%) rename {jcvi => src/jcvi}/utils/data/Collegia.ttf (100%) rename {jcvi => src/jcvi}/utils/data/HookedUp.ttf (100%) rename {jcvi => src/jcvi}/utils/data/Humor-Sans.ttf (100%) rename {jcvi => src/jcvi}/utils/data/TREDs.meta.csv (100%) rename {jcvi => src/jcvi}/utils/data/__init__.py (100%) rename {jcvi => src/jcvi}/utils/data/adapters.fasta (100%) rename {jcvi => src/jcvi}/utils/data/blosum80.mat (100%) rename {jcvi => src/jcvi}/utils/data/chrY.hg38.unique_ccn.gc (100%) rename {jcvi => src/jcvi}/utils/data/colorchecker.txt (100%) rename {jcvi => src/jcvi}/utils/data/hg38.band.txt (100%) rename {jcvi => src/jcvi}/utils/data/hg38.chrom.sizes (100%) rename {jcvi => src/jcvi}/utils/data/instance.json (100%) rename {jcvi => src/jcvi}/utils/db.py (100%) rename {jcvi => src/jcvi}/utils/ez_setup.py (100%) rename {jcvi => src/jcvi}/utils/grouper.py (100%) rename {jcvi => src/jcvi}/utils/orderedcollections.py (100%) rename {jcvi => src/jcvi}/utils/range.py (100%) rename {jcvi => src/jcvi}/utils/table.py (100%) rename {jcvi => src/jcvi}/utils/taxonomy.py (100%) rename {jcvi => src/jcvi}/utils/validator.py (100%) rename {jcvi => src/jcvi}/utils/webcolors.py (100%) rename {jcvi => src/jcvi}/variation/__init__.py (100%) rename {jcvi => src/jcvi}/variation/__main__.py (100%) rename {jcvi => src/jcvi}/variation/cnv.py (100%) rename {jcvi => src/jcvi}/variation/deconvolute.py (100%) rename {jcvi => src/jcvi}/variation/delly.py (100%) rename {jcvi => src/jcvi}/variation/impute.py (100%) rename {jcvi => src/jcvi}/variation/phase.py (100%) rename {jcvi => src/jcvi}/variation/snp.py (100%) rename {jcvi => src/jcvi}/variation/str.py (100%) create mode 100644 src/jcvi/version.py diff --git a/jcvi/__init__.py b/src/jcvi/__init__.py similarity index 100% rename from jcvi/__init__.py rename to src/jcvi/__init__.py diff --git a/jcvi/algorithms/__init__.py b/src/jcvi/algorithms/__init__.py similarity index 100% rename from jcvi/algorithms/__init__.py rename to src/jcvi/algorithms/__init__.py diff --git a/jcvi/algorithms/__main__.py b/src/jcvi/algorithms/__main__.py similarity index 100% rename from jcvi/algorithms/__main__.py rename to src/jcvi/algorithms/__main__.py diff --git a/jcvi/algorithms/ec.py b/src/jcvi/algorithms/ec.py similarity index 100% rename from jcvi/algorithms/ec.py rename to src/jcvi/algorithms/ec.py diff --git a/jcvi/algorithms/formula.py b/src/jcvi/algorithms/formula.py similarity index 100% rename from jcvi/algorithms/formula.py rename to src/jcvi/algorithms/formula.py diff --git a/jcvi/algorithms/graph.py b/src/jcvi/algorithms/graph.py similarity index 100% rename from jcvi/algorithms/graph.py rename to src/jcvi/algorithms/graph.py diff --git a/jcvi/algorithms/lis.py b/src/jcvi/algorithms/lis.py similarity index 100% rename from jcvi/algorithms/lis.py rename to src/jcvi/algorithms/lis.py diff --git a/jcvi/algorithms/lpsolve.py b/src/jcvi/algorithms/lpsolve.py similarity index 100% rename from jcvi/algorithms/lpsolve.py rename to src/jcvi/algorithms/lpsolve.py diff --git a/jcvi/algorithms/matrix.py b/src/jcvi/algorithms/matrix.py similarity index 100% rename from jcvi/algorithms/matrix.py rename to src/jcvi/algorithms/matrix.py diff --git a/jcvi/algorithms/maxsum.py b/src/jcvi/algorithms/maxsum.py similarity index 100% rename from jcvi/algorithms/maxsum.py rename to src/jcvi/algorithms/maxsum.py diff --git a/jcvi/algorithms/supermap.py b/src/jcvi/algorithms/supermap.py similarity index 100% rename from jcvi/algorithms/supermap.py rename to src/jcvi/algorithms/supermap.py diff --git a/jcvi/algorithms/tsp.py b/src/jcvi/algorithms/tsp.py similarity index 100% rename from jcvi/algorithms/tsp.py rename to src/jcvi/algorithms/tsp.py diff --git a/jcvi/annotation/__init__.py b/src/jcvi/annotation/__init__.py similarity index 100% rename from jcvi/annotation/__init__.py rename to src/jcvi/annotation/__init__.py diff --git a/jcvi/annotation/__main__.py b/src/jcvi/annotation/__main__.py similarity index 100% rename from jcvi/annotation/__main__.py rename to src/jcvi/annotation/__main__.py diff --git a/jcvi/annotation/ahrd.py b/src/jcvi/annotation/ahrd.py similarity index 100% rename from jcvi/annotation/ahrd.py rename to src/jcvi/annotation/ahrd.py diff --git a/jcvi/annotation/automaton.py b/src/jcvi/annotation/automaton.py similarity index 100% rename from jcvi/annotation/automaton.py rename to src/jcvi/annotation/automaton.py diff --git a/jcvi/annotation/depth.py b/src/jcvi/annotation/depth.py similarity index 100% rename from jcvi/annotation/depth.py rename to src/jcvi/annotation/depth.py diff --git a/jcvi/annotation/evm.py b/src/jcvi/annotation/evm.py similarity index 100% rename from jcvi/annotation/evm.py rename to src/jcvi/annotation/evm.py diff --git a/jcvi/annotation/maker.py b/src/jcvi/annotation/maker.py similarity index 100% rename from jcvi/annotation/maker.py rename to src/jcvi/annotation/maker.py diff --git a/jcvi/annotation/pasa.py b/src/jcvi/annotation/pasa.py similarity index 100% rename from jcvi/annotation/pasa.py rename to src/jcvi/annotation/pasa.py diff --git a/jcvi/annotation/qc.py b/src/jcvi/annotation/qc.py similarity index 100% rename from jcvi/annotation/qc.py rename to src/jcvi/annotation/qc.py diff --git a/jcvi/annotation/reformat.py b/src/jcvi/annotation/reformat.py similarity index 100% rename from jcvi/annotation/reformat.py rename to src/jcvi/annotation/reformat.py diff --git a/jcvi/annotation/stats.py b/src/jcvi/annotation/stats.py similarity index 100% rename from jcvi/annotation/stats.py rename to src/jcvi/annotation/stats.py diff --git a/jcvi/annotation/train.py b/src/jcvi/annotation/train.py similarity index 100% rename from jcvi/annotation/train.py rename to src/jcvi/annotation/train.py diff --git a/jcvi/annotation/trinity.py b/src/jcvi/annotation/trinity.py similarity index 100% rename from jcvi/annotation/trinity.py rename to src/jcvi/annotation/trinity.py diff --git a/jcvi/apps/__init__.py b/src/jcvi/apps/__init__.py similarity index 100% rename from jcvi/apps/__init__.py rename to src/jcvi/apps/__init__.py diff --git a/jcvi/apps/__main__.py b/src/jcvi/apps/__main__.py similarity index 100% rename from jcvi/apps/__main__.py rename to src/jcvi/apps/__main__.py diff --git a/jcvi/apps/align.py b/src/jcvi/apps/align.py similarity index 100% rename from jcvi/apps/align.py rename to src/jcvi/apps/align.py diff --git a/jcvi/apps/base.py b/src/jcvi/apps/base.py similarity index 100% rename from jcvi/apps/base.py rename to src/jcvi/apps/base.py diff --git a/jcvi/apps/biomart.py b/src/jcvi/apps/biomart.py similarity index 100% rename from jcvi/apps/biomart.py rename to src/jcvi/apps/biomart.py diff --git a/jcvi/apps/blastplus.py b/src/jcvi/apps/blastplus.py similarity index 100% rename from jcvi/apps/blastplus.py rename to src/jcvi/apps/blastplus.py diff --git a/jcvi/apps/bowtie.py b/src/jcvi/apps/bowtie.py similarity index 100% rename from jcvi/apps/bowtie.py rename to src/jcvi/apps/bowtie.py diff --git a/jcvi/apps/bwa.py b/src/jcvi/apps/bwa.py similarity index 100% rename from jcvi/apps/bwa.py rename to src/jcvi/apps/bwa.py diff --git a/jcvi/apps/cdhit.py b/src/jcvi/apps/cdhit.py similarity index 100% rename from jcvi/apps/cdhit.py rename to src/jcvi/apps/cdhit.py diff --git a/jcvi/apps/emboss.py b/src/jcvi/apps/emboss.py similarity index 100% rename from jcvi/apps/emboss.py rename to src/jcvi/apps/emboss.py diff --git a/jcvi/apps/fetch.py b/src/jcvi/apps/fetch.py similarity index 100% rename from jcvi/apps/fetch.py rename to src/jcvi/apps/fetch.py diff --git a/jcvi/apps/gbsubmit.py b/src/jcvi/apps/gbsubmit.py similarity index 100% rename from jcvi/apps/gbsubmit.py rename to src/jcvi/apps/gbsubmit.py diff --git a/jcvi/apps/gmap.py b/src/jcvi/apps/gmap.py similarity index 100% rename from jcvi/apps/gmap.py rename to src/jcvi/apps/gmap.py diff --git a/jcvi/apps/grid.py b/src/jcvi/apps/grid.py similarity index 100% rename from jcvi/apps/grid.py rename to src/jcvi/apps/grid.py diff --git a/jcvi/apps/lastz.py b/src/jcvi/apps/lastz.py similarity index 100% rename from jcvi/apps/lastz.py rename to src/jcvi/apps/lastz.py diff --git a/jcvi/apps/mask.py b/src/jcvi/apps/mask.py similarity index 100% rename from jcvi/apps/mask.py rename to src/jcvi/apps/mask.py diff --git a/jcvi/apps/phylo.py b/src/jcvi/apps/phylo.py similarity index 100% rename from jcvi/apps/phylo.py rename to src/jcvi/apps/phylo.py diff --git a/jcvi/apps/r.py b/src/jcvi/apps/r.py similarity index 100% rename from jcvi/apps/r.py rename to src/jcvi/apps/r.py diff --git a/jcvi/apps/restriction.py b/src/jcvi/apps/restriction.py similarity index 100% rename from jcvi/apps/restriction.py rename to src/jcvi/apps/restriction.py diff --git a/jcvi/apps/softlink.py b/src/jcvi/apps/softlink.py similarity index 100% rename from jcvi/apps/softlink.py rename to src/jcvi/apps/softlink.py diff --git a/jcvi/apps/uclust.py b/src/jcvi/apps/uclust.py similarity index 100% rename from jcvi/apps/uclust.py rename to src/jcvi/apps/uclust.py diff --git a/jcvi/apps/uniprot.py b/src/jcvi/apps/uniprot.py similarity index 100% rename from jcvi/apps/uniprot.py rename to src/jcvi/apps/uniprot.py diff --git a/jcvi/apps/vecscreen.py b/src/jcvi/apps/vecscreen.py similarity index 100% rename from jcvi/apps/vecscreen.py rename to src/jcvi/apps/vecscreen.py diff --git a/jcvi/assembly/__init__.py b/src/jcvi/assembly/__init__.py similarity index 100% rename from jcvi/assembly/__init__.py rename to src/jcvi/assembly/__init__.py diff --git a/jcvi/assembly/__main__.py b/src/jcvi/assembly/__main__.py similarity index 100% rename from jcvi/assembly/__main__.py rename to src/jcvi/assembly/__main__.py diff --git a/jcvi/assembly/allmaps.py b/src/jcvi/assembly/allmaps.py similarity index 100% rename from jcvi/assembly/allmaps.py rename to src/jcvi/assembly/allmaps.py diff --git a/jcvi/assembly/allpaths.py b/src/jcvi/assembly/allpaths.py similarity index 100% rename from jcvi/assembly/allpaths.py rename to src/jcvi/assembly/allpaths.py diff --git a/jcvi/assembly/automaton.py b/src/jcvi/assembly/automaton.py similarity index 100% rename from jcvi/assembly/automaton.py rename to src/jcvi/assembly/automaton.py diff --git a/jcvi/assembly/base.py b/src/jcvi/assembly/base.py similarity index 100% rename from jcvi/assembly/base.py rename to src/jcvi/assembly/base.py diff --git a/jcvi/assembly/chic.pyx b/src/jcvi/assembly/chic.pyx similarity index 100% rename from jcvi/assembly/chic.pyx rename to src/jcvi/assembly/chic.pyx diff --git a/jcvi/assembly/coverage.py b/src/jcvi/assembly/coverage.py similarity index 100% rename from jcvi/assembly/coverage.py rename to src/jcvi/assembly/coverage.py diff --git a/jcvi/assembly/gaps.py b/src/jcvi/assembly/gaps.py similarity index 100% rename from jcvi/assembly/gaps.py rename to src/jcvi/assembly/gaps.py diff --git a/jcvi/assembly/geneticmap.py b/src/jcvi/assembly/geneticmap.py similarity index 100% rename from jcvi/assembly/geneticmap.py rename to src/jcvi/assembly/geneticmap.py diff --git a/jcvi/assembly/goldenpath.py b/src/jcvi/assembly/goldenpath.py similarity index 100% rename from jcvi/assembly/goldenpath.py rename to src/jcvi/assembly/goldenpath.py diff --git a/jcvi/assembly/hic.py b/src/jcvi/assembly/hic.py similarity index 100% rename from jcvi/assembly/hic.py rename to src/jcvi/assembly/hic.py diff --git a/jcvi/assembly/kmer.py b/src/jcvi/assembly/kmer.py similarity index 100% rename from jcvi/assembly/kmer.py rename to src/jcvi/assembly/kmer.py diff --git a/jcvi/assembly/opticalmap.py b/src/jcvi/assembly/opticalmap.py similarity index 100% rename from jcvi/assembly/opticalmap.py rename to src/jcvi/assembly/opticalmap.py diff --git a/jcvi/assembly/patch.py b/src/jcvi/assembly/patch.py similarity index 100% rename from jcvi/assembly/patch.py rename to src/jcvi/assembly/patch.py diff --git a/jcvi/assembly/postprocess.py b/src/jcvi/assembly/postprocess.py similarity index 100% rename from jcvi/assembly/postprocess.py rename to src/jcvi/assembly/postprocess.py diff --git a/jcvi/assembly/preprocess.py b/src/jcvi/assembly/preprocess.py similarity index 100% rename from jcvi/assembly/preprocess.py rename to src/jcvi/assembly/preprocess.py diff --git a/jcvi/assembly/sim.py b/src/jcvi/assembly/sim.py similarity index 100% rename from jcvi/assembly/sim.py rename to src/jcvi/assembly/sim.py diff --git a/jcvi/assembly/soap.py b/src/jcvi/assembly/soap.py similarity index 100% rename from jcvi/assembly/soap.py rename to src/jcvi/assembly/soap.py diff --git a/jcvi/assembly/syntenypath.py b/src/jcvi/assembly/syntenypath.py similarity index 100% rename from jcvi/assembly/syntenypath.py rename to src/jcvi/assembly/syntenypath.py diff --git a/jcvi/compara/__init__.py b/src/jcvi/compara/__init__.py similarity index 100% rename from jcvi/compara/__init__.py rename to src/jcvi/compara/__init__.py diff --git a/jcvi/compara/__main__.py b/src/jcvi/compara/__main__.py similarity index 100% rename from jcvi/compara/__main__.py rename to src/jcvi/compara/__main__.py diff --git a/jcvi/compara/base.py b/src/jcvi/compara/base.py similarity index 100% rename from jcvi/compara/base.py rename to src/jcvi/compara/base.py diff --git a/jcvi/compara/blastfilter.py b/src/jcvi/compara/blastfilter.py similarity index 100% rename from jcvi/compara/blastfilter.py rename to src/jcvi/compara/blastfilter.py diff --git a/jcvi/compara/catalog.py b/src/jcvi/compara/catalog.py similarity index 100% rename from jcvi/compara/catalog.py rename to src/jcvi/compara/catalog.py diff --git a/jcvi/compara/fractionation.py b/src/jcvi/compara/fractionation.py similarity index 100% rename from jcvi/compara/fractionation.py rename to src/jcvi/compara/fractionation.py diff --git a/jcvi/compara/ks.py b/src/jcvi/compara/ks.py similarity index 100% rename from jcvi/compara/ks.py rename to src/jcvi/compara/ks.py diff --git a/jcvi/compara/pad.py b/src/jcvi/compara/pad.py similarity index 100% rename from jcvi/compara/pad.py rename to src/jcvi/compara/pad.py diff --git a/jcvi/compara/pedigree.py b/src/jcvi/compara/pedigree.py similarity index 100% rename from jcvi/compara/pedigree.py rename to src/jcvi/compara/pedigree.py diff --git a/jcvi/compara/phylogeny.py b/src/jcvi/compara/phylogeny.py similarity index 100% rename from jcvi/compara/phylogeny.py rename to src/jcvi/compara/phylogeny.py diff --git a/jcvi/compara/quota.py b/src/jcvi/compara/quota.py similarity index 100% rename from jcvi/compara/quota.py rename to src/jcvi/compara/quota.py diff --git a/jcvi/compara/reconstruct.py b/src/jcvi/compara/reconstruct.py similarity index 100% rename from jcvi/compara/reconstruct.py rename to src/jcvi/compara/reconstruct.py diff --git a/jcvi/compara/synfind.py b/src/jcvi/compara/synfind.py similarity index 100% rename from jcvi/compara/synfind.py rename to src/jcvi/compara/synfind.py diff --git a/jcvi/compara/synteny.py b/src/jcvi/compara/synteny.py similarity index 100% rename from jcvi/compara/synteny.py rename to src/jcvi/compara/synteny.py diff --git a/jcvi/formats/__init__.py b/src/jcvi/formats/__init__.py similarity index 100% rename from jcvi/formats/__init__.py rename to src/jcvi/formats/__init__.py diff --git a/jcvi/formats/__main__.py b/src/jcvi/formats/__main__.py similarity index 100% rename from jcvi/formats/__main__.py rename to src/jcvi/formats/__main__.py diff --git a/jcvi/formats/agp.py b/src/jcvi/formats/agp.py similarity index 100% rename from jcvi/formats/agp.py rename to src/jcvi/formats/agp.py diff --git a/jcvi/formats/base.py b/src/jcvi/formats/base.py similarity index 100% rename from jcvi/formats/base.py rename to src/jcvi/formats/base.py diff --git a/jcvi/formats/bed.py b/src/jcvi/formats/bed.py similarity index 100% rename from jcvi/formats/bed.py rename to src/jcvi/formats/bed.py diff --git a/jcvi/formats/blast.py b/src/jcvi/formats/blast.py similarity index 100% rename from jcvi/formats/blast.py rename to src/jcvi/formats/blast.py diff --git a/src/jcvi/formats/cblast.c b/src/jcvi/formats/cblast.c new file mode 100644 index 00000000..1be00a77 --- /dev/null +++ b/src/jcvi/formats/cblast.c @@ -0,0 +1,16859 @@ +/* Generated by Cython 3.0.11 */ + +/* BEGIN: Cython Metadata +{ + "distutils": { + "depends": [], + "name": "jcvi.formats.cblast", + "sources": [ + "/Users/adamtaranto/Documents/Adam/jcvi-dev/jcvi/jcvi/formats/cblast.pyx" + ] + }, + "module_name": "jcvi.formats.cblast" +} +END: Cython Metadata */ + +#ifndef PY_SSIZE_T_CLEAN +#define PY_SSIZE_T_CLEAN +#endif /* PY_SSIZE_T_CLEAN */ +#if defined(CYTHON_LIMITED_API) && 0 + #ifndef Py_LIMITED_API + #if CYTHON_LIMITED_API+0 > 0x03030000 + #define Py_LIMITED_API CYTHON_LIMITED_API + #else + #define Py_LIMITED_API 0x03030000 + #endif + #endif +#endif + +#include "Python.h" +#ifndef Py_PYTHON_H + #error Python headers needed to compile C extensions, please install development version of Python. +#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) + #error Cython requires Python 2.7+ or Python 3.3+. +#else +#if defined(CYTHON_LIMITED_API) && CYTHON_LIMITED_API +#define __PYX_EXTRA_ABI_MODULE_NAME "limited" +#else +#define __PYX_EXTRA_ABI_MODULE_NAME "" +#endif +#define CYTHON_ABI "3_0_11" __PYX_EXTRA_ABI_MODULE_NAME +#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI +#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "." +#define CYTHON_HEX_VERSION 0x03000BF0 +#define CYTHON_FUTURE_DIVISION 0 +#include +#ifndef offsetof + #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) +#endif +#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS) + #ifndef __stdcall + #define __stdcall + #endif + #ifndef __cdecl + #define __cdecl + #endif + #ifndef __fastcall + #define __fastcall + #endif +#endif +#ifndef DL_IMPORT + #define DL_IMPORT(t) t +#endif +#ifndef DL_EXPORT + #define DL_EXPORT(t) t +#endif +#define __PYX_COMMA , +#ifndef HAVE_LONG_LONG + #define HAVE_LONG_LONG +#endif +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif +#ifndef Py_HUGE_VAL + #define Py_HUGE_VAL HUGE_VAL +#endif +#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX +#if defined(GRAALVM_PYTHON) + /* For very preliminary testing purposes. Most variables are set the same as PyPy. + The existence of this section does not imply that anything works or is even tested */ + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 1 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) + #endif + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(PYPY_VERSION) + #define CYTHON_COMPILING_IN_PYPY 1 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) + #endif + #if PY_VERSION_HEX < 0x03090000 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00) + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(CYTHON_LIMITED_API) + #ifdef Py_LIMITED_API + #undef __PYX_LIMITED_VERSION_HEX + #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API + #endif + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 1 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_CLINE_IN_TRACEBACK + #define CYTHON_CLINE_IN_TRACEBACK 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 1 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #endif + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 1 + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(Py_GIL_DISABLED) || defined(Py_NOGIL) + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 1 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #ifndef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #ifndef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #ifndef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 1 + #endif + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #ifndef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #endif + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 + #endif + #ifndef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 + #endif +#else + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 1 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #ifndef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 1 + #endif + #if PY_MAJOR_VERSION < 3 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #ifndef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 1 + #endif + #ifndef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 1 + #endif + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #elif !defined(CYTHON_USE_UNICODE_WRITER) + #define CYTHON_USE_UNICODE_WRITER 1 + #endif + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #ifndef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 1 + #endif + #ifndef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6) + #endif + #ifndef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1) + #endif + #ifndef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 1 + #endif + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #endif + #if PY_VERSION_HEX < 0x030400a1 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #elif !defined(CYTHON_USE_TP_FINALIZE) + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #if PY_VERSION_HEX < 0x030600B1 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #elif !defined(CYTHON_USE_DICT_VERSIONS) + #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX < 0x030C00A5) + #endif + #if PY_VERSION_HEX < 0x030700A3 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #elif !defined(CYTHON_USE_EXC_INFO_STACK) + #define CYTHON_USE_EXC_INFO_STACK 1 + #endif + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 + #endif + #ifndef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 1 + #endif +#endif +#if !defined(CYTHON_FAST_PYCCALL) +#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) +#endif +#if !defined(CYTHON_VECTORCALL) +#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1) +#endif +#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1) +#if CYTHON_USE_PYLONG_INTERNALS + #if PY_MAJOR_VERSION < 3 + #include "longintrepr.h" + #endif + #undef SHIFT + #undef BASE + #undef MASK + #ifdef SIZEOF_VOID_P + enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; + #endif +#endif +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#ifndef __has_cpp_attribute + #define __has_cpp_attribute(x) 0 +#endif +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif +#endif +#ifndef CYTHON_UNUSED + #if defined(__cplusplus) + /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17 + * but leads to warnings with -pedantic, since it is a C++17 feature */ + #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) + #if __has_cpp_attribute(maybe_unused) + #define CYTHON_UNUSED [[maybe_unused]] + #endif + #endif + #endif +#endif +#ifndef CYTHON_UNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_UNUSED_VAR +# if defined(__cplusplus) + template void CYTHON_UNUSED_VAR( const T& ) { } +# else +# define CYTHON_UNUSED_VAR(x) (void)(x) +# endif +#endif +#ifndef CYTHON_MAYBE_UNUSED_VAR + #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x) +#endif +#ifndef CYTHON_NCP_UNUSED +# if CYTHON_COMPILING_IN_CPYTHON +# define CYTHON_NCP_UNUSED +# else +# define CYTHON_NCP_UNUSED CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_USE_CPP_STD_MOVE + #if defined(__cplusplus) && (\ + __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600)) + #define CYTHON_USE_CPP_STD_MOVE 1 + #else + #define CYTHON_USE_CPP_STD_MOVE 0 + #endif +#endif +#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) +#ifdef _MSC_VER + #ifndef _MSC_STDINT_H_ + #if _MSC_VER < 1300 + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + #else + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + #endif + #endif + #if _MSC_VER < 1300 + #ifdef _WIN64 + typedef unsigned long long __pyx_uintptr_t; + #else + typedef unsigned int __pyx_uintptr_t; + #endif + #else + #ifdef _WIN64 + typedef unsigned __int64 __pyx_uintptr_t; + #else + typedef unsigned __int32 __pyx_uintptr_t; + #endif + #endif +#else + #include + typedef uintptr_t __pyx_uintptr_t; +#endif +#ifndef CYTHON_FALLTHROUGH + #if defined(__cplusplus) + /* for clang __has_cpp_attribute(fallthrough) is true even before C++17 + * but leads to warnings with -pedantic, since it is a C++17 feature */ + #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) + #if __has_cpp_attribute(fallthrough) + #define CYTHON_FALLTHROUGH [[fallthrough]] + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_cpp_attribute(clang::fallthrough) + #define CYTHON_FALLTHROUGH [[clang::fallthrough]] + #elif __has_cpp_attribute(gnu::fallthrough) + #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] + #endif + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_attribute(fallthrough) + #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) + #else + #define CYTHON_FALLTHROUGH + #endif + #endif + #if defined(__clang__) && defined(__apple_build_version__) + #if __apple_build_version__ < 7000000 + #undef CYTHON_FALLTHROUGH + #define CYTHON_FALLTHROUGH + #endif + #endif +#endif +#ifdef __cplusplus + template + struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);}; + #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL::value) +#else + #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0) +#endif +#if CYTHON_COMPILING_IN_PYPY == 1 + #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000) +#else + #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000) +#endif +#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer)) + +#ifndef CYTHON_INLINE + #if defined(__clang__) + #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) + #elif defined(__GNUC__) + #define CYTHON_INLINE __inline__ + #elif defined(_MSC_VER) + #define CYTHON_INLINE __inline + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_INLINE inline + #else + #define CYTHON_INLINE + #endif +#endif + +#define __PYX_BUILD_PY_SSIZE_T "n" +#define CYTHON_FORMAT_SSIZE_T "z" +#if PY_MAJOR_VERSION < 3 + #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" + #define __Pyx_DefaultClassType PyClass_Type + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_BUILTIN_MODULE_NAME "builtins" + #define __Pyx_DefaultClassType PyType_Type +#if CYTHON_COMPILING_IN_LIMITED_API + static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, + PyObject *code, PyObject *c, PyObject* n, PyObject *v, + PyObject *fv, PyObject *cell, PyObject* fn, + PyObject *name, int fline, PyObject *lnos) { + PyObject *exception_table = NULL; + PyObject *types_module=NULL, *code_type=NULL, *result=NULL; + #if __PYX_LIMITED_VERSION_HEX < 0x030B0000 + PyObject *version_info; + PyObject *py_minor_version = NULL; + #endif + long minor_version = 0; + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000 + minor_version = 11; + #else + if (!(version_info = PySys_GetObject("version_info"))) goto end; + if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end; + minor_version = PyLong_AsLong(py_minor_version); + Py_DECREF(py_minor_version); + if (minor_version == -1 && PyErr_Occurred()) goto end; + #endif + if (!(types_module = PyImport_ImportModule("types"))) goto end; + if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end; + if (minor_version <= 7) { + (void)p; + result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code, + c, n, v, fn, name, fline, lnos, fv, cell); + } else if (minor_version <= 10) { + result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code, + c, n, v, fn, name, fline, lnos, fv, cell); + } else { + if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end; + result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code, + c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell); + } + end: + Py_XDECREF(code_type); + Py_XDECREF(exception_table); + Py_XDECREF(types_module); + if (type) { + PyErr_Restore(type, value, traceback); + } + return result; + } + #ifndef CO_OPTIMIZED + #define CO_OPTIMIZED 0x0001 + #endif + #ifndef CO_NEWLOCALS + #define CO_NEWLOCALS 0x0002 + #endif + #ifndef CO_VARARGS + #define CO_VARARGS 0x0004 + #endif + #ifndef CO_VARKEYWORDS + #define CO_VARKEYWORDS 0x0008 + #endif + #ifndef CO_ASYNC_GENERATOR + #define CO_ASYNC_GENERATOR 0x0200 + #endif + #ifndef CO_GENERATOR + #define CO_GENERATOR 0x0020 + #endif + #ifndef CO_COROUTINE + #define CO_COROUTINE 0x0080 + #endif +#elif PY_VERSION_HEX >= 0x030B0000 + static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, + PyObject *code, PyObject *c, PyObject* n, PyObject *v, + PyObject *fv, PyObject *cell, PyObject* fn, + PyObject *name, int fline, PyObject *lnos) { + PyCodeObject *result; + PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); + if (!empty_bytes) return NULL; + result = + #if PY_VERSION_HEX >= 0x030C0000 + PyUnstable_Code_NewWithPosOnlyArgs + #else + PyCode_NewWithPosOnlyArgs + #endif + (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes); + Py_DECREF(empty_bytes); + return result; + } +#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#endif +#endif +#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE) + #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type) +#else + #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type)) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is) + #define __Pyx_Py_Is(x, y) Py_Is(x, y) +#else + #define __Pyx_Py_Is(x, y) ((x) == (y)) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone) + #define __Pyx_Py_IsNone(ob) Py_IsNone(ob) +#else + #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue) + #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob) +#else + #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse) + #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob) +#else + #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False) +#endif +#define __Pyx_NoneAsNull(obj) (__Pyx_Py_IsNone(obj) ? NULL : (obj)) +#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o) +#else + #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o) +#endif +#ifndef CO_COROUTINE + #define CO_COROUTINE 0x80 +#endif +#ifndef CO_ASYNC_GENERATOR + #define CO_ASYNC_GENERATOR 0x200 +#endif +#ifndef Py_TPFLAGS_CHECKTYPES + #define Py_TPFLAGS_CHECKTYPES 0 +#endif +#ifndef Py_TPFLAGS_HAVE_INDEX + #define Py_TPFLAGS_HAVE_INDEX 0 +#endif +#ifndef Py_TPFLAGS_HAVE_NEWBUFFER + #define Py_TPFLAGS_HAVE_NEWBUFFER 0 +#endif +#ifndef Py_TPFLAGS_HAVE_FINALIZE + #define Py_TPFLAGS_HAVE_FINALIZE 0 +#endif +#ifndef Py_TPFLAGS_SEQUENCE + #define Py_TPFLAGS_SEQUENCE 0 +#endif +#ifndef Py_TPFLAGS_MAPPING + #define Py_TPFLAGS_MAPPING 0 +#endif +#ifndef METH_STACKLESS + #define METH_STACKLESS 0 +#endif +#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL) + #ifndef METH_FASTCALL + #define METH_FASTCALL 0x80 + #endif + typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs); + typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args, + Py_ssize_t nargs, PyObject *kwnames); +#else + #if PY_VERSION_HEX >= 0x030d00A4 + # define __Pyx_PyCFunctionFast PyCFunctionFast + # define __Pyx_PyCFunctionFastWithKeywords PyCFunctionFastWithKeywords + #else + # define __Pyx_PyCFunctionFast _PyCFunctionFast + # define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords + #endif +#endif +#if CYTHON_METH_FASTCALL + #define __Pyx_METH_FASTCALL METH_FASTCALL + #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast + #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords +#else + #define __Pyx_METH_FASTCALL METH_VARARGS + #define __Pyx_PyCFunction_FastCall PyCFunction + #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords +#endif +#if CYTHON_VECTORCALL + #define __pyx_vectorcallfunc vectorcallfunc + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET + #define __Pyx_PyVectorcall_NARGS(n) PyVectorcall_NARGS((size_t)(n)) +#elif CYTHON_BACKPORT_VECTORCALL + typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args, + size_t nargsf, PyObject *kwnames); + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1)) + #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)) +#else + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET 0 + #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(n)) +#endif +#if PY_MAJOR_VERSION >= 0x030900B1 +#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_CheckExact(func) +#else +#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_Check(func) +#endif +#define __Pyx_CyOrPyCFunction_Check(func) PyCFunction_Check(func) +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) (((PyCFunctionObject*)(func))->m_ml->ml_meth) +#elif !CYTHON_COMPILING_IN_LIMITED_API +#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) PyCFunction_GET_FUNCTION(func) +#endif +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CyOrPyCFunction_GET_FLAGS(func) (((PyCFunctionObject*)(func))->m_ml->ml_flags) +static CYTHON_INLINE PyObject* __Pyx_CyOrPyCFunction_GET_SELF(PyObject *func) { + return (__Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_STATIC) ? NULL : ((PyCFunctionObject*)func)->m_self; +} +#endif +static CYTHON_INLINE int __Pyx__IsSameCFunction(PyObject *func, void *cfunc) { +#if CYTHON_COMPILING_IN_LIMITED_API + return PyCFunction_Check(func) && PyCFunction_GetFunction(func) == (PyCFunction) cfunc; +#else + return PyCFunction_Check(func) && PyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; +#endif +} +#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCFunction(func, cfunc) +#if __PYX_LIMITED_VERSION_HEX < 0x030900B1 + #define __Pyx_PyType_FromModuleAndSpec(m, s, b) ((void)m, PyType_FromSpecWithBases(s, b)) + typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *); +#else + #define __Pyx_PyType_FromModuleAndSpec(m, s, b) PyType_FromModuleAndSpec(m, s, b) + #define __Pyx_PyCMethod PyCMethod +#endif +#ifndef METH_METHOD + #define METH_METHOD 0x200 +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) + #define PyObject_Malloc(s) PyMem_Malloc(s) + #define PyObject_Free(p) PyMem_Free(p) + #define PyObject_Realloc(p) PyMem_Realloc(p) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) +#else + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyThreadState_Current PyThreadState_Get() +#elif !CYTHON_FAST_THREAD_STATE + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#elif PY_VERSION_HEX >= 0x030d00A1 + #define __Pyx_PyThreadState_Current PyThreadState_GetUnchecked() +#elif PY_VERSION_HEX >= 0x03060000 + #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() +#elif PY_VERSION_HEX >= 0x03000000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#else + #define __Pyx_PyThreadState_Current _PyThreadState_Current +#endif +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op) +{ + void *result; + result = PyModule_GetState(op); + if (!result) + Py_FatalError("Couldn't find the module state"); + return result; +} +#endif +#define __Pyx_PyObject_GetSlot(obj, name, func_ctype) __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype) +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((func_ctype) PyType_GetSlot((type), Py_##name)) +#else + #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((type)->name) +#endif +#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT) +#include "pythread.h" +#define Py_tss_NEEDS_INIT 0 +typedef int Py_tss_t; +static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) { + *key = PyThread_create_key(); + return 0; +} +static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) { + Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t)); + *key = Py_tss_NEEDS_INIT; + return key; +} +static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) { + PyObject_Free(key); +} +static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) { + return *key != Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) { + PyThread_delete_key(*key); + *key = Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) { + return PyThread_set_key_value(*key, value); +} +static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { + return PyThread_get_key_value(*key); +} +#endif +#if PY_MAJOR_VERSION < 3 + #if CYTHON_COMPILING_IN_PYPY + #if PYPY_VERSION_NUM < 0x07030600 + #if defined(__cplusplus) && __cplusplus >= 201402L + [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]] + #elif defined(__GNUC__) || defined(__clang__) + __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))) + #elif defined(_MSC_VER) + __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")) + #endif + static CYTHON_INLINE int PyGILState_Check(void) { + return 0; + } + #else // PYPY_VERSION_NUM < 0x07030600 + #endif // PYPY_VERSION_NUM < 0x07030600 + #else + static CYTHON_INLINE int PyGILState_Check(void) { + PyThreadState * tstate = _PyThreadState_Current; + return tstate && (tstate == PyGILState_GetThisThreadState()); + } + #endif +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 || defined(_PyDict_NewPresized) +#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) +#else +#define __Pyx_PyDict_NewPresized(n) PyDict_New() +#endif +#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && PY_VERSION_HEX < 0x030d0000 && CYTHON_USE_UNICODE_INTERNALS +#define __Pyx_PyDict_GetItemStrWithError(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash) +static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) { + PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name); + if (res == NULL) PyErr_Clear(); + return res; +} +#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000) +#define __Pyx_PyDict_GetItemStrWithError PyDict_GetItemWithError +#define __Pyx_PyDict_GetItemStr PyDict_GetItem +#else +static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) { +#if CYTHON_COMPILING_IN_PYPY + return PyDict_GetItem(dict, name); +#else + PyDictEntry *ep; + PyDictObject *mp = (PyDictObject*) dict; + long hash = ((PyStringObject *) name)->ob_shash; + assert(hash != -1); + ep = (mp->ma_lookup)(mp, name, hash); + if (ep == NULL) { + return NULL; + } + return ep->me_value; +#endif +} +#define __Pyx_PyDict_GetItemStr PyDict_GetItem +#endif +#if CYTHON_USE_TYPE_SLOTS + #define __Pyx_PyType_GetFlags(tp) (((PyTypeObject *)tp)->tp_flags) + #define __Pyx_PyType_HasFeature(type, feature) ((__Pyx_PyType_GetFlags(type) & (feature)) != 0) + #define __Pyx_PyObject_GetIterNextFunc(obj) (Py_TYPE(obj)->tp_iternext) +#else + #define __Pyx_PyType_GetFlags(tp) (PyType_GetFlags((PyTypeObject *)tp)) + #define __Pyx_PyType_HasFeature(type, feature) PyType_HasFeature(type, feature) + #define __Pyx_PyObject_GetIterNextFunc(obj) PyIter_Next +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v) +#else + #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v) +#endif +#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000 +#define __Pyx_PyHeapTypeObject_GC_Del(obj) {\ + PyTypeObject *type = Py_TYPE((PyObject*)obj);\ + assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\ + PyObject_GC_Del(obj);\ + Py_DECREF(type);\ +} +#else +#define __Pyx_PyHeapTypeObject_GC_Del(obj) PyObject_GC_Del(obj) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define CYTHON_PEP393_ENABLED 1 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GetLength(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((void)u, 1114111U) + #define __Pyx_PyUnicode_KIND(u) ((void)u, (0)) + #define __Pyx_PyUnicode_DATA(u) ((void*)u) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)k, PyUnicode_ReadChar((PyObject*)(d), i)) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GetLength(u)) +#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) + #define CYTHON_PEP393_ENABLED 1 + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_READY(op) (0) + #else + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + 0 : _PyUnicode_Ready((PyObject *)(op))) + #endif + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) + #define __Pyx_PyUnicode_KIND(u) ((int)PyUnicode_KIND(u)) + #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) + #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, (Py_UCS4) ch) + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) + #else + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) + #else + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) + #endif + #endif +#else + #define CYTHON_PEP393_ENABLED 0 + #define PyUnicode_1BYTE_KIND 1 + #define PyUnicode_2BYTE_KIND 2 + #define PyUnicode_4BYTE_KIND 4 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U) + #define __Pyx_PyUnicode_KIND(u) ((int)sizeof(Py_UNICODE)) + #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #if !defined(PyUnicode_DecodeUnicodeEscape) + #define PyUnicode_DecodeUnicodeEscape(s, size, errors) PyUnicode_Decode(s, size, "unicode_escape", errors) + #endif + #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500) + #undef PyUnicode_Contains + #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) + #endif + #if !defined(PyByteArray_Check) + #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) + #endif + #if !defined(PyObject_Format) + #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) + #endif +#endif +#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) +#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) +#else + #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) +#endif +#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) + #define PyObject_ASCII(o) PyObject_Repr(o) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBaseString_Type PyUnicode_Type + #define PyStringObject PyUnicodeObject + #define PyString_Type PyUnicode_Type + #define PyString_Check PyUnicode_Check + #define PyString_CheckExact PyUnicode_CheckExact +#ifndef PyObject_Unicode + #define PyObject_Unicode PyObject_Str +#endif +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) +#endif +#if CYTHON_COMPILING_IN_CPYTHON + #define __Pyx_PySequence_ListKeepNew(obj)\ + (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj)) +#else + #define __Pyx_PySequence_ListKeepNew(obj) PySequence_List(obj) +#endif +#ifndef PySet_CheckExact + #define PySet_CheckExact(obj) __Pyx_IS_TYPE(obj, &PySet_Type) +#endif +#if PY_VERSION_HEX >= 0x030900A4 + #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size) +#else + #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size) +#endif +#if CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i) + #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq) + #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0)) + #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0)) + #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o) + #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o) + #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o) + #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o) + #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o) +#else + #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i) + #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq) + #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v) + #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v) + #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o) + #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o) + #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o) + #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o) + #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o) +#endif +#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 + #define __Pyx_PyImport_AddModuleRef(name) PyImport_AddModuleRef(name) +#else + static CYTHON_INLINE PyObject *__Pyx_PyImport_AddModuleRef(const char *name) { + PyObject *module = PyImport_AddModule(name); + Py_XINCREF(module); + return module; + } +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyIntObject PyLongObject + #define PyInt_Type PyLong_Type + #define PyInt_Check(op) PyLong_Check(op) + #define PyInt_CheckExact(op) PyLong_CheckExact(op) + #define __Pyx_Py3Int_Check(op) PyLong_Check(op) + #define __Pyx_Py3Int_CheckExact(op) PyLong_CheckExact(op) + #define PyInt_FromString PyLong_FromString + #define PyInt_FromUnicode PyLong_FromUnicode + #define PyInt_FromLong PyLong_FromLong + #define PyInt_FromSize_t PyLong_FromSize_t + #define PyInt_FromSsize_t PyLong_FromSsize_t + #define PyInt_AsLong PyLong_AsLong + #define PyInt_AS_LONG PyLong_AS_LONG + #define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask + #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask + #define PyNumber_Int PyNumber_Long +#else + #define __Pyx_Py3Int_Check(op) (PyLong_Check(op) || PyInt_Check(op)) + #define __Pyx_Py3Int_CheckExact(op) (PyLong_CheckExact(op) || PyInt_CheckExact(op)) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBoolObject PyLongObject +#endif +#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY + #ifndef PyUnicode_InternFromString + #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) + #endif +#endif +#if PY_VERSION_HEX < 0x030200A4 + typedef long Py_hash_t; + #define __Pyx_PyInt_FromHash_t PyInt_FromLong + #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t +#else + #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t + #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t +#endif +#if CYTHON_USE_ASYNC_SLOTS + #if PY_VERSION_HEX >= 0x030500B1 + #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods + #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) + #else + #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) + #endif +#else + #define __Pyx_PyType_AsAsync(obj) NULL +#endif +#ifndef __Pyx_PyAsyncMethodsStruct + typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; + } __Pyx_PyAsyncMethodsStruct; +#endif + +#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) + #if !defined(_USE_MATH_DEFINES) + #define _USE_MATH_DEFINES + #endif +#endif +#include +#ifdef NAN +#define __PYX_NAN() ((float) NAN) +#else +static CYTHON_INLINE float __PYX_NAN() { + float value; + memset(&value, 0xFF, sizeof(value)); + return value; +} +#endif +#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) +#define __Pyx_truncl trunc +#else +#define __Pyx_truncl truncl +#endif + +#define __PYX_MARK_ERR_POS(f_index, lineno) \ + { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; } +#define __PYX_ERR(f_index, lineno, Ln_error) \ + { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; } + +#ifdef CYTHON_EXTERN_C + #undef __PYX_EXTERN_C + #define __PYX_EXTERN_C CYTHON_EXTERN_C +#elif defined(__PYX_EXTERN_C) + #ifdef _MSC_VER + #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") + #else + #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. + #endif +#else + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#define __PYX_HAVE__jcvi__formats__cblast +#define __PYX_HAVE_API__jcvi__formats__cblast +/* Early includes */ +#include +#include +#ifdef _OPENMP +#include +#endif /* _OPENMP */ + +#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) +#define CYTHON_WITHOUT_ASSERTIONS +#endif + +typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; + const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; + +#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8) +#define __PYX_DEFAULT_STRING_ENCODING "" +#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString +#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#define __Pyx_uchar_cast(c) ((unsigned char)c) +#define __Pyx_long_cast(x) ((long)x) +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ + (sizeof(type) < sizeof(Py_ssize_t)) ||\ + (sizeof(type) > sizeof(Py_ssize_t) &&\ + likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX) &&\ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ + v == (type)PY_SSIZE_T_MIN))) ||\ + (sizeof(type) == sizeof(Py_ssize_t) &&\ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX))) ) +static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) { + return (size_t) i < (size_t) limit; +} +#if defined (__cplusplus) && __cplusplus >= 201103L + #include + #define __Pyx_sst_abs(value) std::abs(value) +#elif SIZEOF_INT >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) abs(value) +#elif SIZEOF_LONG >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) labs(value) +#elif defined (_MSC_VER) + #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define __Pyx_sst_abs(value) llabs(value) +#elif defined (__GNUC__) + #define __Pyx_sst_abs(value) __builtin_llabs(value) +#else + #define __Pyx_sst_abs(value) ((value<0) ? -value : value) +#endif +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s); +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char*); +#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) +#define __Pyx_PyBytes_FromString PyBytes_FromString +#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#else + #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize +#endif +#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyObject_AsWritableString(s) ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableSString(s) ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) +#define __Pyx_PyUnicode_FromOrdinal(o) PyUnicode_FromOrdinal((int)o) +#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode +#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) +#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b); +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*); +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); +#define __Pyx_PySequence_Tuple(obj)\ + (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); +static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); +#if CYTHON_ASSUME_SAFE_MACROS +#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) +#else +#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) +#endif +#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) +#else +#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) +#endif +#if CYTHON_USE_PYLONG_INTERNALS + #if PY_VERSION_HEX >= 0x030C00A7 + #ifndef _PyLong_SIGN_MASK + #define _PyLong_SIGN_MASK 3 + #endif + #ifndef _PyLong_NON_SIZE_BITS + #define _PyLong_NON_SIZE_BITS 3 + #endif + #define __Pyx_PyLong_Sign(x) (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK) + #define __Pyx_PyLong_IsNeg(x) ((__Pyx_PyLong_Sign(x) & 2) != 0) + #define __Pyx_PyLong_IsNonNeg(x) (!__Pyx_PyLong_IsNeg(x)) + #define __Pyx_PyLong_IsZero(x) (__Pyx_PyLong_Sign(x) & 1) + #define __Pyx_PyLong_IsPos(x) (__Pyx_PyLong_Sign(x) == 0) + #define __Pyx_PyLong_CompactValueUnsigned(x) (__Pyx_PyLong_Digits(x)[0]) + #define __Pyx_PyLong_DigitCount(x) ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS)) + #define __Pyx_PyLong_SignedDigitCount(x)\ + ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x)) + #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue) + #define __Pyx_PyLong_IsCompact(x) PyUnstable_Long_IsCompact((PyLongObject*) x) + #define __Pyx_PyLong_CompactValue(x) PyUnstable_Long_CompactValue((PyLongObject*) x) + #else + #define __Pyx_PyLong_IsCompact(x) (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS)) + #define __Pyx_PyLong_CompactValue(x) ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0]) + #endif + typedef Py_ssize_t __Pyx_compact_pylong; + typedef size_t __Pyx_compact_upylong; + #else + #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0) + #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0) + #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0) + #define __Pyx_PyLong_IsPos(x) (Py_SIZE(x) > 0) + #define __Pyx_PyLong_CompactValueUnsigned(x) ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0]) + #define __Pyx_PyLong_DigitCount(x) __Pyx_sst_abs(Py_SIZE(x)) + #define __Pyx_PyLong_SignedDigitCount(x) Py_SIZE(x) + #define __Pyx_PyLong_IsCompact(x) (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1) + #define __Pyx_PyLong_CompactValue(x)\ + ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0])) + typedef sdigit __Pyx_compact_pylong; + typedef digit __Pyx_compact_upylong; + #endif + #if PY_VERSION_HEX >= 0x030C00A5 + #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit) + #else + #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->ob_digit) + #endif +#endif +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII +#include +static int __Pyx_sys_getdefaultencoding_not_ascii; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + PyObject* ascii_chars_u = NULL; + PyObject* ascii_chars_b = NULL; + const char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + if (strcmp(default_encoding_c, "ascii") == 0) { + __Pyx_sys_getdefaultencoding_not_ascii = 0; + } else { + char ascii_chars[128]; + int c; + for (c = 0; c < 128; c++) { + ascii_chars[c] = (char) c; + } + __Pyx_sys_getdefaultencoding_not_ascii = 1; + ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); + if (!ascii_chars_u) goto bad; + ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); + if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { + PyErr_Format( + PyExc_ValueError, + "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", + default_encoding_c); + goto bad; + } + Py_DECREF(ascii_chars_u); + Py_DECREF(ascii_chars_b); + } + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + Py_XDECREF(ascii_chars_u); + Py_XDECREF(ascii_chars_b); + return -1; +} +#endif +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) +#else +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#include +static char* __PYX_DEFAULT_STRING_ENCODING; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1); + if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; + strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + return -1; +} +#endif +#endif + + +/* Test for GCC > 2.95 */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) + #define likely(x) __builtin_expect(!!(x), 1) + #define unlikely(x) __builtin_expect(!!(x), 0) +#else /* !__GNUC__ or GCC < 2.95 */ + #define likely(x) (x) + #define unlikely(x) (x) +#endif /* __GNUC__ */ +static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } + +#if !CYTHON_USE_MODULE_STATE +static PyObject *__pyx_m = NULL; +#endif +static int __pyx_lineno; +static int __pyx_clineno = 0; +static const char * __pyx_cfilenm = __FILE__; +static const char *__pyx_filename; + +/* #### Code section: filename_table ### */ + +static const char *__pyx_f[] = { + "cblast.pyx", + "", +}; +/* #### Code section: utility_code_proto_before_types ### */ +/* ForceInitThreads.proto */ +#ifndef __PYX_FORCE_INIT_THREADS + #define __PYX_FORCE_INIT_THREADS 0 +#endif + +/* #### Code section: numeric_typedefs ### */ +/* #### Code section: complex_type_declarations ### */ +/* #### Code section: type_declarations ### */ + +/*--- Type declarations ---*/ +struct __pyx_obj_4jcvi_7formats_6cblast_Blast; +struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine; +struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; +struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; + +/* "jcvi/formats/cblast.pyx":21 + * + * + * cdef class Blast: # <<<<<<<<<<<<<< + * cdef: + * FILE* fh + */ +struct __pyx_obj_4jcvi_7formats_6cblast_Blast { + PyObject_HEAD + FILE *fh; + PyObject *filename; +}; + + +/* "jcvi/formats/cblast.pyx":66 + * + * + * cdef class BlastLine: # <<<<<<<<<<<<<< + * """ + * Given a string of tab-delimited (-m 8) blast output, parse it and create + */ +struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine { + PyObject_HEAD + char _query[0x80]; + char _subject[0x80]; + int hitlen; + int nmismatch; + int ngaps; + int qstart; + int qstop; + int sstart; + int sstop; + float pctid; + float score; + double evalue; + PyObject *qseqid; + PyObject *sseqid; + int qi; + int si; + char orientation; +}; + + +/* "jcvi/formats/cblast.pyx":172 + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< + * return BlastLine(b) + * + */ +struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr { + PyObject_HEAD + PyObject *__pyx_genexpr_arg_0; + PyObject *__pyx_v_x; + PyObject *__pyx_t_0; + Py_ssize_t __pyx_t_1; +}; + + +/* "cfunc.to_py":66 + * + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + */ +struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc { + PyObject_HEAD + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*__pyx_v_f)(char *, char *, float, int, int, int, int, int, int, int, float, float); +}; + +/* #### Code section: utility_code_proto ### */ + +/* --- Runtime support code (head) --- */ +/* Refnanny.proto */ +#ifndef CYTHON_REFNANNY + #define CYTHON_REFNANNY 0 +#endif +#if CYTHON_REFNANNY + typedef struct { + void (*INCREF)(void*, PyObject*, Py_ssize_t); + void (*DECREF)(void*, PyObject*, Py_ssize_t); + void (*GOTREF)(void*, PyObject*, Py_ssize_t); + void (*GIVEREF)(void*, PyObject*, Py_ssize_t); + void* (*SetupContext)(const char*, Py_ssize_t, const char*); + void (*FinishContext)(void**); + } __Pyx_RefNannyAPIStruct; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); + #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; +#ifdef WITH_THREAD + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + if (acquire_gil) {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ + PyGILState_Release(__pyx_gilstate_save);\ + } else {\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ + } + #define __Pyx_RefNannyFinishContextNogil() {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __Pyx_RefNannyFinishContext();\ + PyGILState_Release(__pyx_gilstate_save);\ + } +#else + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__)) + #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext() +#endif + #define __Pyx_RefNannyFinishContextNogil() {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __Pyx_RefNannyFinishContext();\ + PyGILState_Release(__pyx_gilstate_save);\ + } + #define __Pyx_RefNannyFinishContext()\ + __Pyx_RefNanny->FinishContext(&__pyx_refnanny) + #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_XINCREF(r) do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0) + #define __Pyx_XDECREF(r) do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0) + #define __Pyx_XGOTREF(r) do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0) + #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0) +#else + #define __Pyx_RefNannyDeclarations + #define __Pyx_RefNannySetupContext(name, acquire_gil) + #define __Pyx_RefNannyFinishContextNogil() + #define __Pyx_RefNannyFinishContext() + #define __Pyx_INCREF(r) Py_INCREF(r) + #define __Pyx_DECREF(r) Py_DECREF(r) + #define __Pyx_GOTREF(r) + #define __Pyx_GIVEREF(r) + #define __Pyx_XINCREF(r) Py_XINCREF(r) + #define __Pyx_XDECREF(r) Py_XDECREF(r) + #define __Pyx_XGOTREF(r) + #define __Pyx_XGIVEREF(r) +#endif +#define __Pyx_Py_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; Py_XDECREF(tmp);\ + } while (0) +#define __Pyx_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_XDECREF(tmp);\ + } while (0) +#define __Pyx_DECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_DECREF(tmp);\ + } while (0) +#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) +#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) + +/* PyErrExceptionMatches.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) +static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); +#else +#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) +#endif + +/* PyThreadStateGet.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; +#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; +#if PY_VERSION_HEX >= 0x030C00A6 +#define __Pyx_PyErr_Occurred() (__pyx_tstate->current_exception != NULL) +#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL) +#else +#define __Pyx_PyErr_Occurred() (__pyx_tstate->curexc_type != NULL) +#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->curexc_type) +#endif +#else +#define __Pyx_PyThreadState_declare +#define __Pyx_PyThreadState_assign +#define __Pyx_PyErr_Occurred() (PyErr_Occurred() != NULL) +#define __Pyx_PyErr_CurrentExceptionType() PyErr_Occurred() +#endif + +/* PyErrFetchRestore.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) +#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6 +#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) +#else +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#endif +#else +#define __Pyx_PyErr_Clear() PyErr_Clear() +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) +#endif + +/* PyObjectGetAttrStr.proto */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) +#endif + +/* PyObjectGetAttrStrNoError.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name); + +/* GetBuiltinName.proto */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name); + +/* TupleAndListFromArray.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n); +static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n); +#endif + +/* IncludeStringH.proto */ +#include + +/* BytesEquals.proto */ +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); + +/* UnicodeEquals.proto */ +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); + +/* fastcall.proto */ +#if CYTHON_AVOID_BORROWED_REFS + #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i) +#elif CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i) +#else + #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i) +#endif +#if CYTHON_AVOID_BORROWED_REFS + #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg) + #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg) +#else + #define __Pyx_Arg_NewRef_VARARGS(arg) arg + #define __Pyx_Arg_XDECREF_VARARGS(arg) +#endif +#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds) +#define __Pyx_KwValues_VARARGS(args, nargs) NULL +#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s) +#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw) +#if CYTHON_METH_FASTCALL + #define __Pyx_Arg_FASTCALL(args, i) args[i] + #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds) + #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs)) + static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 + CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues); + #else + #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw) + #endif + #define __Pyx_Arg_NewRef_FASTCALL(arg) arg /* no-op, __Pyx_Arg_FASTCALL is direct and this needs + to have the same reference counting */ + #define __Pyx_Arg_XDECREF_FASTCALL(arg) +#else + #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS + #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS + #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS + #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS + #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS + #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg) + #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS +#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start) +#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start) +#else +#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop) +#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop) +#endif + +/* RaiseArgTupleInvalid.proto */ +static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, + Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); + +/* RaiseDoubleKeywords.proto */ +static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); + +/* ParseKeywords.proto */ +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues, + PyObject **argnames[], + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, + const char* function_name); + +/* IncludeStructmemberH.proto */ +#include + +/* FixUpExtensionType.proto */ +#if CYTHON_USE_TYPE_SPECS +static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type); +#endif + +/* FetchSharedCythonModule.proto */ +static PyObject *__Pyx_FetchSharedCythonABIModule(void); + +/* FetchCommonType.proto */ +#if !CYTHON_USE_TYPE_SPECS +static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type); +#else +static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases); +#endif + +/* PyMethodNew.proto */ +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { + PyObject *typesModule=NULL, *methodType=NULL, *result=NULL; + CYTHON_UNUSED_VAR(typ); + if (!self) + return __Pyx_NewRef(func); + typesModule = PyImport_ImportModule("types"); + if (!typesModule) return NULL; + methodType = PyObject_GetAttrString(typesModule, "MethodType"); + Py_DECREF(typesModule); + if (!methodType) return NULL; + result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL); + Py_DECREF(methodType); + return result; +} +#elif PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { + CYTHON_UNUSED_VAR(typ); + if (!self) + return __Pyx_NewRef(func); + return PyMethod_New(func, self); +} +#else + #define __Pyx_PyMethod_New PyMethod_New +#endif + +/* PyVectorcallFastCallDict.proto */ +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw); +#endif + +/* CythonFunctionShared.proto */ +#define __Pyx_CyFunction_USED +#define __Pyx_CYFUNCTION_STATICMETHOD 0x01 +#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02 +#define __Pyx_CYFUNCTION_CCLASS 0x04 +#define __Pyx_CYFUNCTION_COROUTINE 0x08 +#define __Pyx_CyFunction_GetClosure(f)\ + (((__pyx_CyFunctionObject *) (f))->func_closure) +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_CyFunction_GetClassObj(f)\ + (((__pyx_CyFunctionObject *) (f))->func_classobj) +#else + #define __Pyx_CyFunction_GetClassObj(f)\ + ((PyObject*) ((PyCMethodObject *) (f))->mm_class) +#endif +#define __Pyx_CyFunction_SetClassObj(f, classobj)\ + __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj)) +#define __Pyx_CyFunction_Defaults(type, f)\ + ((type *)(((__pyx_CyFunctionObject *) (f))->defaults)) +#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\ + ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g) +typedef struct { +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject_HEAD + PyObject *func; +#elif PY_VERSION_HEX < 0x030900B1 + PyCFunctionObject func; +#else + PyCMethodObject func; +#endif +#if CYTHON_BACKPORT_VECTORCALL + __pyx_vectorcallfunc func_vectorcall; +#endif +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API + PyObject *func_weakreflist; +#endif + PyObject *func_dict; + PyObject *func_name; + PyObject *func_qualname; + PyObject *func_doc; + PyObject *func_globals; + PyObject *func_code; + PyObject *func_closure; +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + PyObject *func_classobj; +#endif + void *defaults; + int defaults_pyobjects; + size_t defaults_size; + int flags; + PyObject *defaults_tuple; + PyObject *defaults_kwdict; + PyObject *(*defaults_getter)(PyObject *); + PyObject *func_annotations; + PyObject *func_is_coroutine; +} __pyx_CyFunctionObject; +#undef __Pyx_CyOrPyCFunction_Check +#define __Pyx_CyFunction_Check(obj) __Pyx_TypeCheck(obj, __pyx_CyFunctionType) +#define __Pyx_CyOrPyCFunction_Check(obj) __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type) +#define __Pyx_CyFunction_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_CyFunctionType) +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc); +#undef __Pyx_IsSameCFunction +#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCyOrCFunction(func, cfunc) +static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml, + int flags, PyObject* qualname, + PyObject *closure, + PyObject *module, PyObject *globals, + PyObject* code); +static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj); +static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m, + size_t size, + int pyobjects); +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m, + PyObject *tuple); +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m, + PyObject *dict); +static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, + PyObject *dict); +static int __pyx_CyFunction_init(PyObject *module); +#if CYTHON_METH_FASTCALL +static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +#if CYTHON_BACKPORT_VECTORCALL +#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall) +#else +#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall) +#endif +#endif + +/* CythonFunction.proto */ +static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, + int flags, PyObject* qualname, + PyObject *closure, + PyObject *module, PyObject *globals, + PyObject* code); + +/* GetTopmostException.proto */ +#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE +static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate); +#endif + +/* SaveResetException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +#else +#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb) +#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb) +#endif + +/* FastTypeChecks.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) +#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2) +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); +#else +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2)) +#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) +#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) +#endif +#define __Pyx_PyErr_ExceptionMatches2(err1, err2) __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2) +#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) + +/* KeywordStringCheck.proto */ +static int __Pyx_CheckKeywordStrings(PyObject *kw, const char* function_name, int kw_allowed); + +/* RaiseException.proto */ +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); + +/* PyObjectCall.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + +/* UnpackUnboundCMethod.proto */ +typedef struct { + PyObject *type; + PyObject **method_name; + PyCFunction func; + PyObject *method; + int flag; +} __Pyx_CachedCFunction; + +/* CallUnboundCMethod1.proto */ +static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg); +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg); +#else +#define __Pyx_CallUnboundCMethod1(cfunc, self, arg) __Pyx__CallUnboundCMethod1(cfunc, self, arg) +#endif + +/* RaiseUnexpectedTypeError.proto */ +static int __Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj); + +/* decode_c_string_utf16.proto */ +static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, const char *errors) { + int byteorder = 0; + return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); +} +static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16LE(const char *s, Py_ssize_t size, const char *errors) { + int byteorder = -1; + return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); +} +static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16BE(const char *s, Py_ssize_t size, const char *errors) { + int byteorder = 1; + return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); +} + +/* decode_c_bytes.proto */ +static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes( + const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop, + const char* encoding, const char* errors, + PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)); + +/* decode_bytes.proto */ +static CYTHON_INLINE PyObject* __Pyx_decode_bytes( + PyObject* string, Py_ssize_t start, Py_ssize_t stop, + const char* encoding, const char* errors, + PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { + char* as_c_string; + Py_ssize_t size; +#if CYTHON_ASSUME_SAFE_MACROS + as_c_string = PyBytes_AS_STRING(string); + size = PyBytes_GET_SIZE(string); +#else + if (PyBytes_AsStringAndSize(string, &as_c_string, &size) < 0) { + return NULL; + } +#endif + return __Pyx_decode_c_bytes( + as_c_string, size, + start, stop, encoding, errors, decode_func); +} + +/* ArgTypeTest.proto */ +#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ + ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\ + __Pyx__ArgTypeTest(obj, type, name, exact)) +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); + +/* PyFunctionFastCall.proto */ +#if CYTHON_FAST_PYCALL +#if !CYTHON_VECTORCALL +#define __Pyx_PyFunction_FastCall(func, args, nargs)\ + __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs); +#endif +#define __Pyx_BUILD_ASSERT_EXPR(cond)\ + (sizeof(char [1 - 2*!(cond)]) - 1) +#ifndef Py_MEMBER_SIZE +#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) +#endif +#if !CYTHON_VECTORCALL +#if PY_VERSION_HEX >= 0x03080000 + #include "frameobject.h" +#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif + #define __Pxy_PyFrame_Initialize_Offsets() + #define __Pyx_PyFrame_GetLocalsplus(frame) ((frame)->f_localsplus) +#else + static size_t __pyx_pyframe_localsplus_offset = 0; + #include "frameobject.h" + #define __Pxy_PyFrame_Initialize_Offsets()\ + ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\ + (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus))) + #define __Pyx_PyFrame_GetLocalsplus(frame)\ + (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset)) +#endif +#endif +#endif + +/* PyObjectCallMethO.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +/* PyObjectFastCall.proto */ +#define __Pyx_PyObject_FastCall(func, args, nargs) __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL) +static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs); + +/* PyObjectCallOneArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + +/* SliceObject.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice( + PyObject* obj, Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** py_start, PyObject** py_stop, PyObject** py_slice, + int has_cstart, int has_cstop, int wraparound); + +/* ListCompAppend.proto */ +#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS +static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { + PyListObject* L = (PyListObject*) list; + Py_ssize_t len = Py_SIZE(list); + if (likely(L->allocated > len)) { + Py_INCREF(x); + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 + L->ob_item[len] = x; + #else + PyList_SET_ITEM(list, len, x); + #endif + __Pyx_SET_SIZE(list, len + 1); + return 0; + } + return PyList_Append(list, x); +} +#else +#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) +#endif + +/* GetAttr.proto */ +static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *, PyObject *); + +/* SetItemInt.proto */ +#define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) :\ + (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) :\ + __Pyx_SetItemInt_Generic(o, to_py_func(i), v))) +static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v); +static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, + int is_list, int wraparound, int boundscheck); + +/* HasAttr.proto */ +static CYTHON_INLINE int __Pyx_HasAttr(PyObject *, PyObject *); + +/* RaiseUnboundLocalError.proto */ +static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname); + +/* PyObject_Str.proto */ +#define __Pyx_PyObject_Str(obj)\ + (likely(PyString_CheckExact(obj)) ? __Pyx_NewRef(obj) : PyObject_Str(obj)) + +/* SliceObject.proto */ +#define __Pyx_PyObject_DelSlice(obj, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound)\ + __Pyx_PyObject_SetSlice(obj, (PyObject*)NULL, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound) +static CYTHON_INLINE int __Pyx_PyObject_SetSlice( + PyObject* obj, PyObject* value, Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** py_start, PyObject** py_stop, PyObject** py_slice, + int has_cstart, int has_cstop, int wraparound); + +/* PyObjectCall2Args.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2); + +/* PyObjectGetMethod.proto */ +static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method); + +/* PyObjectCallMethod1.proto */ +static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg); + +/* StringJoin.proto */ +#if PY_MAJOR_VERSION < 3 +#define __Pyx_PyString_Join __Pyx_PyBytes_Join +#define __Pyx_PyBaseString_Join(s, v) (PyUnicode_CheckExact(s) ? PyUnicode_Join(s, v) : __Pyx_PyBytes_Join(s, v)) +#else +#define __Pyx_PyString_Join PyUnicode_Join +#define __Pyx_PyBaseString_Join PyUnicode_Join +#endif +static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values); + +/* PyObjectSetAttrStr.proto */ +#if CYTHON_USE_TYPE_SLOTS +#define __Pyx_PyObject_DelAttrStr(o,n) __Pyx_PyObject_SetAttrStr(o, n, NULL) +static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value); +#else +#define __Pyx_PyObject_DelAttrStr(o,n) PyObject_DelAttr(o,n) +#define __Pyx_PyObject_SetAttrStr(o,n,v) PyObject_SetAttr(o,n,v) +#endif + +/* PyObjectCallNoArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); + +/* PyObjectCallMethod0.proto */ +static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name); + +/* ValidateBasesTuple.proto */ +#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS +static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases); +#endif + +/* PyType_Ready.proto */ +CYTHON_UNUSED static int __Pyx_PyType_Ready(PyTypeObject *t); + +/* PyObject_GenericGetAttrNoDict.proto */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GenericGetAttrNoDict PyObject_GenericGetAttr +#endif + +/* PyObject_GenericGetAttr.proto */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GenericGetAttr PyObject_GenericGetAttr +#endif + +/* SetupReduce.proto */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __Pyx_setup_reduce(PyObject* type_obj); +#endif + +/* Import.proto */ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); + +/* ImportDottedModule.proto */ +static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple); +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple); +#endif + +/* ImportDottedModuleRelFirst.proto */ +static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple); + +/* PyDictVersioning.proto */ +#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1) +#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\ + (version_var) = __PYX_GET_DICT_VERSION(dict);\ + (cache_var) = (value); +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\ + static PY_UINT64_T __pyx_dict_version = 0;\ + static PyObject *__pyx_dict_cached_value = NULL;\ + if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\ + (VAR) = __pyx_dict_cached_value;\ + } else {\ + (VAR) = __pyx_dict_cached_value = (LOOKUP);\ + __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\ + }\ +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj); +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj); +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version); +#else +#define __PYX_GET_DICT_VERSION(dict) (0) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var) +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP); +#endif + +/* CLineInTraceback.proto */ +#ifdef CYTHON_CLINE_IN_TRACEBACK +#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) +#else +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); +#endif + +/* CodeObjectCache.proto */ +#if !CYTHON_COMPILING_IN_LIMITED_API +typedef struct { + PyCodeObject* code_object; + int code_line; +} __Pyx_CodeObjectCacheEntry; +struct __Pyx_CodeObjectCache { + int count; + int max_count; + __Pyx_CodeObjectCacheEntry* entries; +}; +static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); +static PyCodeObject *__pyx_find_code_object(int code_line); +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); +#endif + +/* AddTraceback.proto */ +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename); + +/* GCCDiagnostics.proto */ +#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) +#define __Pyx_HAS_GCC_DIAGNOSTIC +#endif + +/* CIntFromPy.proto */ +static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); + +/* CIntFromPy.proto */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); + +/* CIntFromPy.proto */ +static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_char(char value); + +/* FormatTypeName.proto */ +#if CYTHON_COMPILING_IN_LIMITED_API +typedef PyObject *__Pyx_TypeName; +#define __Pyx_FMT_TYPENAME "%U" +static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp); +#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj) +#else +typedef const char *__Pyx_TypeName; +#define __Pyx_FMT_TYPENAME "%.200s" +#define __Pyx_PyType_GetName(tp) ((tp)->tp_name) +#define __Pyx_DECREF_TypeName(obj) +#endif + +/* CIntFromPy.proto */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); + +/* SwapException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_ExceptionSwap(type, value, tb) __Pyx__ExceptionSwap(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#else +static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb); +#endif + +/* CoroutineBase.proto */ +struct __pyx_CoroutineObject; +typedef PyObject *(*__pyx_coroutine_body_t)(struct __pyx_CoroutineObject *, PyThreadState *, PyObject *); +#if CYTHON_USE_EXC_INFO_STACK +#define __Pyx_ExcInfoStruct _PyErr_StackItem +#else +typedef struct { + PyObject *exc_type; + PyObject *exc_value; + PyObject *exc_traceback; +} __Pyx_ExcInfoStruct; +#endif +typedef struct __pyx_CoroutineObject { + PyObject_HEAD + __pyx_coroutine_body_t body; + PyObject *closure; + __Pyx_ExcInfoStruct gi_exc_state; + PyObject *gi_weakreflist; + PyObject *classobj; + PyObject *yieldfrom; + PyObject *gi_name; + PyObject *gi_qualname; + PyObject *gi_modulename; + PyObject *gi_code; + PyObject *gi_frame; + int resume_label; + char is_running; +} __pyx_CoroutineObject; +static __pyx_CoroutineObject *__Pyx__Coroutine_New( + PyTypeObject *type, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, + PyObject *name, PyObject *qualname, PyObject *module_name); +static __pyx_CoroutineObject *__Pyx__Coroutine_NewInit( + __pyx_CoroutineObject *gen, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, + PyObject *name, PyObject *qualname, PyObject *module_name); +static CYTHON_INLINE void __Pyx_Coroutine_ExceptionClear(__Pyx_ExcInfoStruct *self); +static int __Pyx_Coroutine_clear(PyObject *self); +static PyObject *__Pyx_Coroutine_Send(PyObject *self, PyObject *value); +static PyObject *__Pyx_Coroutine_Close(PyObject *self); +static PyObject *__Pyx_Coroutine_Throw(PyObject *gen, PyObject *args); +#if CYTHON_USE_EXC_INFO_STACK +#define __Pyx_Coroutine_SwapException(self) +#define __Pyx_Coroutine_ResetAndClearException(self) __Pyx_Coroutine_ExceptionClear(&(self)->gi_exc_state) +#else +#define __Pyx_Coroutine_SwapException(self) {\ + __Pyx_ExceptionSwap(&(self)->gi_exc_state.exc_type, &(self)->gi_exc_state.exc_value, &(self)->gi_exc_state.exc_traceback);\ + __Pyx_Coroutine_ResetFrameBackpointer(&(self)->gi_exc_state);\ + } +#define __Pyx_Coroutine_ResetAndClearException(self) {\ + __Pyx_ExceptionReset((self)->gi_exc_state.exc_type, (self)->gi_exc_state.exc_value, (self)->gi_exc_state.exc_traceback);\ + (self)->gi_exc_state.exc_type = (self)->gi_exc_state.exc_value = (self)->gi_exc_state.exc_traceback = NULL;\ + } +#endif +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyGen_FetchStopIterationValue(pvalue)\ + __Pyx_PyGen__FetchStopIterationValue(__pyx_tstate, pvalue) +#else +#define __Pyx_PyGen_FetchStopIterationValue(pvalue)\ + __Pyx_PyGen__FetchStopIterationValue(__Pyx_PyThreadState_Current, pvalue) +#endif +static int __Pyx_PyGen__FetchStopIterationValue(PyThreadState *tstate, PyObject **pvalue); +static CYTHON_INLINE void __Pyx_Coroutine_ResetFrameBackpointer(__Pyx_ExcInfoStruct *exc_state); + +/* PatchModuleWithCoroutine.proto */ +static PyObject* __Pyx_Coroutine_patch_module(PyObject* module, const char* py_code); + +/* PatchGeneratorABC.proto */ +static int __Pyx_patch_abc(void); + +/* Generator.proto */ +#define __Pyx_Generator_USED +#define __Pyx_Generator_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_GeneratorType) +#define __Pyx_Generator_New(body, code, closure, name, qualname, module_name)\ + __Pyx__Coroutine_New(__pyx_GeneratorType, body, code, closure, name, qualname, module_name) +static PyObject *__Pyx_Generator_Next(PyObject *self); +static int __pyx_Generator_init(PyObject *module); + +/* CheckBinaryVersion.proto */ +static unsigned long __Pyx_get_runtime_version(void); +static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer); + +/* InitStrings.proto */ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); + +/* #### Code section: module_declarations ### */ + +/* Module declarations from "libc.string" */ + +/* Module declarations from "libc.stdio" */ + +/* Module declarations from "jcvi.formats.cblast" */ +static char const *__pyx_v_4jcvi_7formats_6cblast_blast_format; +static char const *__pyx_v_4jcvi_7formats_6cblast_blast_format_line; +static char const *__pyx_v_4jcvi_7formats_6cblast_blast_output; +static char const *__pyx_v_4jcvi_7formats_6cblast_bed_output; +static PyObject *__pyx_f_4jcvi_7formats_6cblast_c_str(PyObject *); /*proto*/ +static PyObject *__pyx_f_4jcvi_7formats_6cblast_py_str(PyObject *); /*proto*/ +static struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_f_4jcvi_7formats_6cblast_create_blast_line(char *, char *, float, int, int, int, int, int, int, int, float, float); /*proto*/ +static PyObject *__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*)(char *, char *, float, int, int, int, int, int, int, int, float, float)); /*proto*/ +static int __Pyx_carray_from_py_char(PyObject *, char *, Py_ssize_t); /*proto*/ +/* #### Code section: typeinfo ### */ +/* #### Code section: before_global_var ### */ +#define __Pyx_MODULE_NAME "jcvi.formats.cblast" +extern int __pyx_module_is_main_jcvi__formats__cblast; +int __pyx_module_is_main_jcvi__formats__cblast = 0; + +/* Implementation of "jcvi.formats.cblast" */ +/* #### Code section: global_var ### */ +static PyObject *__pyx_builtin_StopIteration; +static PyObject *__pyx_builtin_TypeError; +static PyObject *__pyx_builtin_id; +static PyObject *__pyx_builtin_OverflowError; +static PyObject *__pyx_builtin_enumerate; +static PyObject *__pyx_builtin_IndexError; +/* #### Code section: string_decls ### */ +static const char __pyx_k_s[] = "s"; +static const char __pyx_k__5[] = "\t"; +static const char __pyx_k__6[] = "*"; +static const char __pyx_k_gc[] = "gc"; +static const char __pyx_k_id[] = "id"; +static const char __pyx_k_qi[] = "qi"; +static const char __pyx_k_si[] = "si"; +static const char __pyx_k__13[] = "?"; +static const char __pyx_k_sys[] = "sys"; +static const char __pyx_k_args[] = "args"; +static const char __pyx_k_join[] = "join"; +static const char __pyx_k_main[] = "__main__"; +static const char __pyx_k_name[] = "__name__"; +static const char __pyx_k_self[] = "self"; +static const char __pyx_k_send[] = "send"; +static const char __pyx_k_spec[] = "__spec__"; +static const char __pyx_k_test[] = "__test__"; +static const char __pyx_k_wrap[] = "wrap"; +static const char __pyx_k_Blast[] = "Blast"; +static const char __pyx_k_UTF_8[] = "UTF-8"; +static const char __pyx_k_close[] = "close"; +static const char __pyx_k_ngaps[] = "ngaps"; +static const char __pyx_k_pctid[] = "pctid"; +static const char __pyx_k_qstop[] = "qstop"; +static const char __pyx_k_query[] = "query"; +static const char __pyx_k_score[] = "score"; +static const char __pyx_k_slots[] = "__slots__"; +static const char __pyx_k_sstop[] = "sstop"; +static const char __pyx_k_throw[] = "throw"; +static const char __pyx_k_enable[] = "enable"; +static const char __pyx_k_encode[] = "encode"; +static const char __pyx_k_evalue[] = "evalue"; +static const char __pyx_k_hitlen[] = "hitlen"; +static const char __pyx_k_import[] = "__import__"; +static const char __pyx_k_qseqid[] = "qseqid"; +static const char __pyx_k_qstart[] = "qstart"; +static const char __pyx_k_reduce[] = "__reduce__"; +static const char __pyx_k_sseqid[] = "sseqid"; +static const char __pyx_k_sstart[] = "sstart"; +static const char __pyx_k_Blast_s[] = "Blast('%s')"; +static const char __pyx_k_disable[] = "disable"; +static const char __pyx_k_genexpr[] = "genexpr"; +static const char __pyx_k_richcmp[] = "__richcmp__"; +static const char __pyx_k_subject[] = "subject"; +static const char __pyx_k_filename[] = "filename"; +static const char __pyx_k_getstate[] = "__getstate__"; +static const char __pyx_k_setstate[] = "__setstate__"; +static const char __pyx_k_BlastLine[] = "BlastLine"; +static const char __pyx_k_TypeError[] = "TypeError"; +static const char __pyx_k_enumerate[] = "enumerate"; +static const char __pyx_k_isenabled[] = "isenabled"; +static const char __pyx_k_nmismatch[] = "nmismatch"; +static const char __pyx_k_pyx_state[] = "__pyx_state"; +static const char __pyx_k_reduce_ex[] = "__reduce_ex__"; +static const char __pyx_k_IndexError[] = "IndexError"; +static const char __pyx_k_cblast_pyx[] = "cblast.pyx"; +static const char __pyx_k_cfunc_to_py[] = "cfunc.to_py"; +static const char __pyx_k_orientation[] = "orientation"; +static const char __pyx_k_initializing[] = "_initializing"; +static const char __pyx_k_is_coroutine[] = "_is_coroutine"; +static const char __pyx_k_stringsource[] = ""; +static const char __pyx_k_OverflowError[] = "OverflowError"; +static const char __pyx_k_StopIteration[] = "StopIteration"; +static const char __pyx_k_reduce_cython[] = "__reduce_cython__"; +static const char __pyx_k_setstate_cython[] = "__setstate_cython__"; +static const char __pyx_k_BlastLine___reduce[] = "BlastLine.__reduce__"; +static const char __pyx_k_asyncio_coroutines[] = "asyncio.coroutines"; +static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; +static const char __pyx_k_jcvi_formats_cblast[] = "jcvi.formats.cblast"; +static const char __pyx_k_Blast___reduce_cython[] = "Blast.__reduce_cython__"; +static const char __pyx_k_Blast___setstate_cython[] = "Blast.__setstate_cython__"; +static const char __pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma[] = "__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc..wrap"; +static const char __pyx_k_Cythonized_fast_version_of_Blas[] = "\nCythonized (fast) version of BlastLine\n\nStolen from brentp's biostuff (thanks):\n\n"; +static const char __pyx_k_that_comparison_not_implemented[] = "that comparison not implemented"; +static const char __pyx_k_BlastLine___get___locals_genexpr[] = "BlastLine.__get__..genexpr"; +static const char __pyx_k_BlastLine_s_to_s_eval_3f_score_1[] = "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)"; +static const char __pyx_k_no_default___reduce___due_to_non[] = "no default __reduce__ due to non-trivial __cinit__"; +/* #### Code section: decls ### */ +static PyObject *__pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(PyObject *__pyx_self, char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, char *__pyx_v_filename); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ +static void __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_s); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_other, size_t __pyx_v_op); /* proto */ +static Py_hash_t __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_genexpr_arg_0); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_Blast(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ +static PyObject *__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ +static __Pyx_CachedCFunction __pyx_umethod_PyString_Type_encode = {0, 0, 0, 0, 0}; +/* #### Code section: late_includes ### */ +/* #### Code section: module_state ### */ +typedef struct { + PyObject *__pyx_d; + PyObject *__pyx_b; + PyObject *__pyx_cython_runtime; + PyObject *__pyx_empty_tuple; + PyObject *__pyx_empty_bytes; + PyObject *__pyx_empty_unicode; + #ifdef __Pyx_CyFunction_USED + PyTypeObject *__pyx_CyFunctionType; + #endif + #ifdef __Pyx_FusedFunction_USED + PyTypeObject *__pyx_FusedFunctionType; + #endif + #ifdef __Pyx_Generator_USED + PyTypeObject *__pyx_GeneratorType; + #endif + #ifdef __Pyx_IterableCoroutine_USED + PyTypeObject *__pyx_IterableCoroutineType; + #endif + #ifdef __Pyx_Coroutine_USED + PyTypeObject *__pyx_CoroutineAwaitType; + #endif + #ifdef __Pyx_Coroutine_USED + PyTypeObject *__pyx_CoroutineType; + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + PyObject *__pyx_type_4jcvi_7formats_6cblast_Blast; + PyObject *__pyx_type_4jcvi_7formats_6cblast_BlastLine; + PyObject *__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; + PyObject *__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; + #endif + PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast_Blast; + PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast_BlastLine; + PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; + PyTypeObject *__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; + PyObject *__pyx_n_s_Blast; + PyObject *__pyx_n_s_BlastLine; + PyObject *__pyx_n_s_BlastLine___get___locals_genexpr; + PyObject *__pyx_n_s_BlastLine___reduce; + PyObject *__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1; + PyObject *__pyx_n_s_Blast___reduce_cython; + PyObject *__pyx_n_s_Blast___setstate_cython; + PyObject *__pyx_kp_s_Blast_s; + PyObject *__pyx_n_s_IndexError; + PyObject *__pyx_n_s_OverflowError; + PyObject *__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma; + PyObject *__pyx_n_s_StopIteration; + PyObject *__pyx_n_s_TypeError; + PyObject *__pyx_kp_s_UTF_8; + PyObject *__pyx_n_s__13; + PyObject *__pyx_kp_s__5; + PyObject *__pyx_n_s__6; + PyObject *__pyx_n_s_args; + PyObject *__pyx_n_s_asyncio_coroutines; + PyObject *__pyx_kp_s_cblast_pyx; + PyObject *__pyx_n_s_cfunc_to_py; + PyObject *__pyx_n_s_cline_in_traceback; + PyObject *__pyx_n_s_close; + PyObject *__pyx_kp_u_disable; + PyObject *__pyx_kp_u_enable; + PyObject *__pyx_n_s_encode; + PyObject *__pyx_n_s_enumerate; + PyObject *__pyx_n_s_evalue; + PyObject *__pyx_n_s_filename; + PyObject *__pyx_kp_u_gc; + PyObject *__pyx_n_s_genexpr; + PyObject *__pyx_n_s_getstate; + PyObject *__pyx_n_s_hitlen; + PyObject *__pyx_n_s_id; + PyObject *__pyx_n_s_import; + PyObject *__pyx_n_s_initializing; + PyObject *__pyx_n_s_is_coroutine; + PyObject *__pyx_kp_u_isenabled; + PyObject *__pyx_n_s_jcvi_formats_cblast; + PyObject *__pyx_n_s_join; + PyObject *__pyx_n_s_main; + PyObject *__pyx_n_s_name; + PyObject *__pyx_n_s_ngaps; + PyObject *__pyx_n_s_nmismatch; + PyObject *__pyx_kp_s_no_default___reduce___due_to_non; + PyObject *__pyx_n_s_orientation; + PyObject *__pyx_n_s_pctid; + PyObject *__pyx_n_s_pyx_state; + PyObject *__pyx_n_s_qi; + PyObject *__pyx_n_s_qseqid; + PyObject *__pyx_n_s_qstart; + PyObject *__pyx_n_s_qstop; + PyObject *__pyx_n_s_query; + PyObject *__pyx_n_s_reduce; + PyObject *__pyx_n_s_reduce_cython; + PyObject *__pyx_n_s_reduce_ex; + PyObject *__pyx_n_s_richcmp; + PyObject *__pyx_n_s_s; + PyObject *__pyx_n_s_score; + PyObject *__pyx_n_s_self; + PyObject *__pyx_n_s_send; + PyObject *__pyx_n_s_setstate; + PyObject *__pyx_n_s_setstate_cython; + PyObject *__pyx_n_s_si; + PyObject *__pyx_n_s_slots; + PyObject *__pyx_n_s_spec; + PyObject *__pyx_n_s_sseqid; + PyObject *__pyx_n_s_sstart; + PyObject *__pyx_n_s_sstop; + PyObject *__pyx_kp_s_stringsource; + PyObject *__pyx_n_s_subject; + PyObject *__pyx_n_s_sys; + PyObject *__pyx_n_s_test; + PyObject *__pyx_kp_s_that_comparison_not_implemented; + PyObject *__pyx_n_s_throw; + PyObject *__pyx_n_s_wrap; + PyObject *__pyx_int_2; + PyObject *__pyx_int_12; + PyObject *__pyx_tuple_; + PyObject *__pyx_slice__4; + PyObject *__pyx_tuple__3; + PyObject *__pyx_tuple__7; + PyObject *__pyx_tuple__9; + PyObject *__pyx_tuple__11; + PyObject *__pyx_codeobj__2; + PyObject *__pyx_codeobj__8; + PyObject *__pyx_codeobj__10; + PyObject *__pyx_codeobj__12; +} __pyx_mstate; + +#if CYTHON_USE_MODULE_STATE +#ifdef __cplusplus +namespace { + extern struct PyModuleDef __pyx_moduledef; +} /* anonymous namespace */ +#else +static struct PyModuleDef __pyx_moduledef; +#endif + +#define __pyx_mstate(o) ((__pyx_mstate *)__Pyx_PyModule_GetState(o)) + +#define __pyx_mstate_global (__pyx_mstate(PyState_FindModule(&__pyx_moduledef))) + +#define __pyx_m (PyState_FindModule(&__pyx_moduledef)) +#else +static __pyx_mstate __pyx_mstate_global_static = +#ifdef __cplusplus + {}; +#else + {0}; +#endif +static __pyx_mstate *__pyx_mstate_global = &__pyx_mstate_global_static; +#endif +/* #### Code section: module_state_clear ### */ +#if CYTHON_USE_MODULE_STATE +static int __pyx_m_clear(PyObject *m) { + __pyx_mstate *clear_module_state = __pyx_mstate(m); + if (!clear_module_state) return 0; + Py_CLEAR(clear_module_state->__pyx_d); + Py_CLEAR(clear_module_state->__pyx_b); + Py_CLEAR(clear_module_state->__pyx_cython_runtime); + Py_CLEAR(clear_module_state->__pyx_empty_tuple); + Py_CLEAR(clear_module_state->__pyx_empty_bytes); + Py_CLEAR(clear_module_state->__pyx_empty_unicode); + #ifdef __Pyx_CyFunction_USED + Py_CLEAR(clear_module_state->__pyx_CyFunctionType); + #endif + #ifdef __Pyx_FusedFunction_USED + Py_CLEAR(clear_module_state->__pyx_FusedFunctionType); + #endif + Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast_Blast); + Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast_Blast); + Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); + Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast_BlastLine); + Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); + Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); + Py_CLEAR(clear_module_state->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); + Py_CLEAR(clear_module_state->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); + Py_CLEAR(clear_module_state->__pyx_n_s_Blast); + Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine); + Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine___get___locals_genexpr); + Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine___reduce); + Py_CLEAR(clear_module_state->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1); + Py_CLEAR(clear_module_state->__pyx_n_s_Blast___reduce_cython); + Py_CLEAR(clear_module_state->__pyx_n_s_Blast___setstate_cython); + Py_CLEAR(clear_module_state->__pyx_kp_s_Blast_s); + Py_CLEAR(clear_module_state->__pyx_n_s_IndexError); + Py_CLEAR(clear_module_state->__pyx_n_s_OverflowError); + Py_CLEAR(clear_module_state->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma); + Py_CLEAR(clear_module_state->__pyx_n_s_StopIteration); + Py_CLEAR(clear_module_state->__pyx_n_s_TypeError); + Py_CLEAR(clear_module_state->__pyx_kp_s_UTF_8); + Py_CLEAR(clear_module_state->__pyx_n_s__13); + Py_CLEAR(clear_module_state->__pyx_kp_s__5); + Py_CLEAR(clear_module_state->__pyx_n_s__6); + Py_CLEAR(clear_module_state->__pyx_n_s_args); + Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines); + Py_CLEAR(clear_module_state->__pyx_kp_s_cblast_pyx); + Py_CLEAR(clear_module_state->__pyx_n_s_cfunc_to_py); + Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback); + Py_CLEAR(clear_module_state->__pyx_n_s_close); + Py_CLEAR(clear_module_state->__pyx_kp_u_disable); + Py_CLEAR(clear_module_state->__pyx_kp_u_enable); + Py_CLEAR(clear_module_state->__pyx_n_s_encode); + Py_CLEAR(clear_module_state->__pyx_n_s_enumerate); + Py_CLEAR(clear_module_state->__pyx_n_s_evalue); + Py_CLEAR(clear_module_state->__pyx_n_s_filename); + Py_CLEAR(clear_module_state->__pyx_kp_u_gc); + Py_CLEAR(clear_module_state->__pyx_n_s_genexpr); + Py_CLEAR(clear_module_state->__pyx_n_s_getstate); + Py_CLEAR(clear_module_state->__pyx_n_s_hitlen); + Py_CLEAR(clear_module_state->__pyx_n_s_id); + Py_CLEAR(clear_module_state->__pyx_n_s_import); + Py_CLEAR(clear_module_state->__pyx_n_s_initializing); + Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine); + Py_CLEAR(clear_module_state->__pyx_kp_u_isenabled); + Py_CLEAR(clear_module_state->__pyx_n_s_jcvi_formats_cblast); + Py_CLEAR(clear_module_state->__pyx_n_s_join); + Py_CLEAR(clear_module_state->__pyx_n_s_main); + Py_CLEAR(clear_module_state->__pyx_n_s_name); + Py_CLEAR(clear_module_state->__pyx_n_s_ngaps); + Py_CLEAR(clear_module_state->__pyx_n_s_nmismatch); + Py_CLEAR(clear_module_state->__pyx_kp_s_no_default___reduce___due_to_non); + Py_CLEAR(clear_module_state->__pyx_n_s_orientation); + Py_CLEAR(clear_module_state->__pyx_n_s_pctid); + Py_CLEAR(clear_module_state->__pyx_n_s_pyx_state); + Py_CLEAR(clear_module_state->__pyx_n_s_qi); + Py_CLEAR(clear_module_state->__pyx_n_s_qseqid); + Py_CLEAR(clear_module_state->__pyx_n_s_qstart); + Py_CLEAR(clear_module_state->__pyx_n_s_qstop); + Py_CLEAR(clear_module_state->__pyx_n_s_query); + Py_CLEAR(clear_module_state->__pyx_n_s_reduce); + Py_CLEAR(clear_module_state->__pyx_n_s_reduce_cython); + Py_CLEAR(clear_module_state->__pyx_n_s_reduce_ex); + Py_CLEAR(clear_module_state->__pyx_n_s_richcmp); + Py_CLEAR(clear_module_state->__pyx_n_s_s); + Py_CLEAR(clear_module_state->__pyx_n_s_score); + Py_CLEAR(clear_module_state->__pyx_n_s_self); + Py_CLEAR(clear_module_state->__pyx_n_s_send); + Py_CLEAR(clear_module_state->__pyx_n_s_setstate); + Py_CLEAR(clear_module_state->__pyx_n_s_setstate_cython); + Py_CLEAR(clear_module_state->__pyx_n_s_si); + Py_CLEAR(clear_module_state->__pyx_n_s_slots); + Py_CLEAR(clear_module_state->__pyx_n_s_spec); + Py_CLEAR(clear_module_state->__pyx_n_s_sseqid); + Py_CLEAR(clear_module_state->__pyx_n_s_sstart); + Py_CLEAR(clear_module_state->__pyx_n_s_sstop); + Py_CLEAR(clear_module_state->__pyx_kp_s_stringsource); + Py_CLEAR(clear_module_state->__pyx_n_s_subject); + Py_CLEAR(clear_module_state->__pyx_n_s_sys); + Py_CLEAR(clear_module_state->__pyx_n_s_test); + Py_CLEAR(clear_module_state->__pyx_kp_s_that_comparison_not_implemented); + Py_CLEAR(clear_module_state->__pyx_n_s_throw); + Py_CLEAR(clear_module_state->__pyx_n_s_wrap); + Py_CLEAR(clear_module_state->__pyx_int_2); + Py_CLEAR(clear_module_state->__pyx_int_12); + Py_CLEAR(clear_module_state->__pyx_tuple_); + Py_CLEAR(clear_module_state->__pyx_slice__4); + Py_CLEAR(clear_module_state->__pyx_tuple__3); + Py_CLEAR(clear_module_state->__pyx_tuple__7); + Py_CLEAR(clear_module_state->__pyx_tuple__9); + Py_CLEAR(clear_module_state->__pyx_tuple__11); + Py_CLEAR(clear_module_state->__pyx_codeobj__2); + Py_CLEAR(clear_module_state->__pyx_codeobj__8); + Py_CLEAR(clear_module_state->__pyx_codeobj__10); + Py_CLEAR(clear_module_state->__pyx_codeobj__12); + return 0; +} +#endif +/* #### Code section: module_state_traverse ### */ +#if CYTHON_USE_MODULE_STATE +static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) { + __pyx_mstate *traverse_module_state = __pyx_mstate(m); + if (!traverse_module_state) return 0; + Py_VISIT(traverse_module_state->__pyx_d); + Py_VISIT(traverse_module_state->__pyx_b); + Py_VISIT(traverse_module_state->__pyx_cython_runtime); + Py_VISIT(traverse_module_state->__pyx_empty_tuple); + Py_VISIT(traverse_module_state->__pyx_empty_bytes); + Py_VISIT(traverse_module_state->__pyx_empty_unicode); + #ifdef __Pyx_CyFunction_USED + Py_VISIT(traverse_module_state->__pyx_CyFunctionType); + #endif + #ifdef __Pyx_FusedFunction_USED + Py_VISIT(traverse_module_state->__pyx_FusedFunctionType); + #endif + Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast_Blast); + Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast_Blast); + Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); + Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast_BlastLine); + Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); + Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); + Py_VISIT(traverse_module_state->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); + Py_VISIT(traverse_module_state->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); + Py_VISIT(traverse_module_state->__pyx_n_s_Blast); + Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine); + Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine___get___locals_genexpr); + Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine___reduce); + Py_VISIT(traverse_module_state->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1); + Py_VISIT(traverse_module_state->__pyx_n_s_Blast___reduce_cython); + Py_VISIT(traverse_module_state->__pyx_n_s_Blast___setstate_cython); + Py_VISIT(traverse_module_state->__pyx_kp_s_Blast_s); + Py_VISIT(traverse_module_state->__pyx_n_s_IndexError); + Py_VISIT(traverse_module_state->__pyx_n_s_OverflowError); + Py_VISIT(traverse_module_state->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma); + Py_VISIT(traverse_module_state->__pyx_n_s_StopIteration); + Py_VISIT(traverse_module_state->__pyx_n_s_TypeError); + Py_VISIT(traverse_module_state->__pyx_kp_s_UTF_8); + Py_VISIT(traverse_module_state->__pyx_n_s__13); + Py_VISIT(traverse_module_state->__pyx_kp_s__5); + Py_VISIT(traverse_module_state->__pyx_n_s__6); + Py_VISIT(traverse_module_state->__pyx_n_s_args); + Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines); + Py_VISIT(traverse_module_state->__pyx_kp_s_cblast_pyx); + Py_VISIT(traverse_module_state->__pyx_n_s_cfunc_to_py); + Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback); + Py_VISIT(traverse_module_state->__pyx_n_s_close); + Py_VISIT(traverse_module_state->__pyx_kp_u_disable); + Py_VISIT(traverse_module_state->__pyx_kp_u_enable); + Py_VISIT(traverse_module_state->__pyx_n_s_encode); + Py_VISIT(traverse_module_state->__pyx_n_s_enumerate); + Py_VISIT(traverse_module_state->__pyx_n_s_evalue); + Py_VISIT(traverse_module_state->__pyx_n_s_filename); + Py_VISIT(traverse_module_state->__pyx_kp_u_gc); + Py_VISIT(traverse_module_state->__pyx_n_s_genexpr); + Py_VISIT(traverse_module_state->__pyx_n_s_getstate); + Py_VISIT(traverse_module_state->__pyx_n_s_hitlen); + Py_VISIT(traverse_module_state->__pyx_n_s_id); + Py_VISIT(traverse_module_state->__pyx_n_s_import); + Py_VISIT(traverse_module_state->__pyx_n_s_initializing); + Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine); + Py_VISIT(traverse_module_state->__pyx_kp_u_isenabled); + Py_VISIT(traverse_module_state->__pyx_n_s_jcvi_formats_cblast); + Py_VISIT(traverse_module_state->__pyx_n_s_join); + Py_VISIT(traverse_module_state->__pyx_n_s_main); + Py_VISIT(traverse_module_state->__pyx_n_s_name); + Py_VISIT(traverse_module_state->__pyx_n_s_ngaps); + Py_VISIT(traverse_module_state->__pyx_n_s_nmismatch); + Py_VISIT(traverse_module_state->__pyx_kp_s_no_default___reduce___due_to_non); + Py_VISIT(traverse_module_state->__pyx_n_s_orientation); + Py_VISIT(traverse_module_state->__pyx_n_s_pctid); + Py_VISIT(traverse_module_state->__pyx_n_s_pyx_state); + Py_VISIT(traverse_module_state->__pyx_n_s_qi); + Py_VISIT(traverse_module_state->__pyx_n_s_qseqid); + Py_VISIT(traverse_module_state->__pyx_n_s_qstart); + Py_VISIT(traverse_module_state->__pyx_n_s_qstop); + Py_VISIT(traverse_module_state->__pyx_n_s_query); + Py_VISIT(traverse_module_state->__pyx_n_s_reduce); + Py_VISIT(traverse_module_state->__pyx_n_s_reduce_cython); + Py_VISIT(traverse_module_state->__pyx_n_s_reduce_ex); + Py_VISIT(traverse_module_state->__pyx_n_s_richcmp); + Py_VISIT(traverse_module_state->__pyx_n_s_s); + Py_VISIT(traverse_module_state->__pyx_n_s_score); + Py_VISIT(traverse_module_state->__pyx_n_s_self); + Py_VISIT(traverse_module_state->__pyx_n_s_send); + Py_VISIT(traverse_module_state->__pyx_n_s_setstate); + Py_VISIT(traverse_module_state->__pyx_n_s_setstate_cython); + Py_VISIT(traverse_module_state->__pyx_n_s_si); + Py_VISIT(traverse_module_state->__pyx_n_s_slots); + Py_VISIT(traverse_module_state->__pyx_n_s_spec); + Py_VISIT(traverse_module_state->__pyx_n_s_sseqid); + Py_VISIT(traverse_module_state->__pyx_n_s_sstart); + Py_VISIT(traverse_module_state->__pyx_n_s_sstop); + Py_VISIT(traverse_module_state->__pyx_kp_s_stringsource); + Py_VISIT(traverse_module_state->__pyx_n_s_subject); + Py_VISIT(traverse_module_state->__pyx_n_s_sys); + Py_VISIT(traverse_module_state->__pyx_n_s_test); + Py_VISIT(traverse_module_state->__pyx_kp_s_that_comparison_not_implemented); + Py_VISIT(traverse_module_state->__pyx_n_s_throw); + Py_VISIT(traverse_module_state->__pyx_n_s_wrap); + Py_VISIT(traverse_module_state->__pyx_int_2); + Py_VISIT(traverse_module_state->__pyx_int_12); + Py_VISIT(traverse_module_state->__pyx_tuple_); + Py_VISIT(traverse_module_state->__pyx_slice__4); + Py_VISIT(traverse_module_state->__pyx_tuple__3); + Py_VISIT(traverse_module_state->__pyx_tuple__7); + Py_VISIT(traverse_module_state->__pyx_tuple__9); + Py_VISIT(traverse_module_state->__pyx_tuple__11); + Py_VISIT(traverse_module_state->__pyx_codeobj__2); + Py_VISIT(traverse_module_state->__pyx_codeobj__8); + Py_VISIT(traverse_module_state->__pyx_codeobj__10); + Py_VISIT(traverse_module_state->__pyx_codeobj__12); + return 0; +} +#endif +/* #### Code section: module_state_defines ### */ +#define __pyx_d __pyx_mstate_global->__pyx_d +#define __pyx_b __pyx_mstate_global->__pyx_b +#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime +#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple +#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes +#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode +#ifdef __Pyx_CyFunction_USED +#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType +#endif +#ifdef __Pyx_FusedFunction_USED +#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType +#endif +#ifdef __Pyx_Generator_USED +#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType +#endif +#ifdef __Pyx_IterableCoroutine_USED +#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType +#endif +#ifdef __Pyx_Coroutine_USED +#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType +#endif +#ifdef __Pyx_Coroutine_USED +#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#define __pyx_type_4jcvi_7formats_6cblast_Blast __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast_Blast +#define __pyx_type_4jcvi_7formats_6cblast_BlastLine __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast_BlastLine +#define __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr +#define __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc __pyx_mstate_global->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc +#endif +#define __pyx_ptype_4jcvi_7formats_6cblast_Blast __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast_Blast +#define __pyx_ptype_4jcvi_7formats_6cblast_BlastLine __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine +#define __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr +#define __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc __pyx_mstate_global->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc +#define __pyx_n_s_Blast __pyx_mstate_global->__pyx_n_s_Blast +#define __pyx_n_s_BlastLine __pyx_mstate_global->__pyx_n_s_BlastLine +#define __pyx_n_s_BlastLine___get___locals_genexpr __pyx_mstate_global->__pyx_n_s_BlastLine___get___locals_genexpr +#define __pyx_n_s_BlastLine___reduce __pyx_mstate_global->__pyx_n_s_BlastLine___reduce +#define __pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1 __pyx_mstate_global->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1 +#define __pyx_n_s_Blast___reduce_cython __pyx_mstate_global->__pyx_n_s_Blast___reduce_cython +#define __pyx_n_s_Blast___setstate_cython __pyx_mstate_global->__pyx_n_s_Blast___setstate_cython +#define __pyx_kp_s_Blast_s __pyx_mstate_global->__pyx_kp_s_Blast_s +#define __pyx_n_s_IndexError __pyx_mstate_global->__pyx_n_s_IndexError +#define __pyx_n_s_OverflowError __pyx_mstate_global->__pyx_n_s_OverflowError +#define __pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma __pyx_mstate_global->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma +#define __pyx_n_s_StopIteration __pyx_mstate_global->__pyx_n_s_StopIteration +#define __pyx_n_s_TypeError __pyx_mstate_global->__pyx_n_s_TypeError +#define __pyx_kp_s_UTF_8 __pyx_mstate_global->__pyx_kp_s_UTF_8 +#define __pyx_n_s__13 __pyx_mstate_global->__pyx_n_s__13 +#define __pyx_kp_s__5 __pyx_mstate_global->__pyx_kp_s__5 +#define __pyx_n_s__6 __pyx_mstate_global->__pyx_n_s__6 +#define __pyx_n_s_args __pyx_mstate_global->__pyx_n_s_args +#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines +#define __pyx_kp_s_cblast_pyx __pyx_mstate_global->__pyx_kp_s_cblast_pyx +#define __pyx_n_s_cfunc_to_py __pyx_mstate_global->__pyx_n_s_cfunc_to_py +#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback +#define __pyx_n_s_close __pyx_mstate_global->__pyx_n_s_close +#define __pyx_kp_u_disable __pyx_mstate_global->__pyx_kp_u_disable +#define __pyx_kp_u_enable __pyx_mstate_global->__pyx_kp_u_enable +#define __pyx_n_s_encode __pyx_mstate_global->__pyx_n_s_encode +#define __pyx_n_s_enumerate __pyx_mstate_global->__pyx_n_s_enumerate +#define __pyx_n_s_evalue __pyx_mstate_global->__pyx_n_s_evalue +#define __pyx_n_s_filename __pyx_mstate_global->__pyx_n_s_filename +#define __pyx_kp_u_gc __pyx_mstate_global->__pyx_kp_u_gc +#define __pyx_n_s_genexpr __pyx_mstate_global->__pyx_n_s_genexpr +#define __pyx_n_s_getstate __pyx_mstate_global->__pyx_n_s_getstate +#define __pyx_n_s_hitlen __pyx_mstate_global->__pyx_n_s_hitlen +#define __pyx_n_s_id __pyx_mstate_global->__pyx_n_s_id +#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import +#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing +#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine +#define __pyx_kp_u_isenabled __pyx_mstate_global->__pyx_kp_u_isenabled +#define __pyx_n_s_jcvi_formats_cblast __pyx_mstate_global->__pyx_n_s_jcvi_formats_cblast +#define __pyx_n_s_join __pyx_mstate_global->__pyx_n_s_join +#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main +#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name +#define __pyx_n_s_ngaps __pyx_mstate_global->__pyx_n_s_ngaps +#define __pyx_n_s_nmismatch __pyx_mstate_global->__pyx_n_s_nmismatch +#define __pyx_kp_s_no_default___reduce___due_to_non __pyx_mstate_global->__pyx_kp_s_no_default___reduce___due_to_non +#define __pyx_n_s_orientation __pyx_mstate_global->__pyx_n_s_orientation +#define __pyx_n_s_pctid __pyx_mstate_global->__pyx_n_s_pctid +#define __pyx_n_s_pyx_state __pyx_mstate_global->__pyx_n_s_pyx_state +#define __pyx_n_s_qi __pyx_mstate_global->__pyx_n_s_qi +#define __pyx_n_s_qseqid __pyx_mstate_global->__pyx_n_s_qseqid +#define __pyx_n_s_qstart __pyx_mstate_global->__pyx_n_s_qstart +#define __pyx_n_s_qstop __pyx_mstate_global->__pyx_n_s_qstop +#define __pyx_n_s_query __pyx_mstate_global->__pyx_n_s_query +#define __pyx_n_s_reduce __pyx_mstate_global->__pyx_n_s_reduce +#define __pyx_n_s_reduce_cython __pyx_mstate_global->__pyx_n_s_reduce_cython +#define __pyx_n_s_reduce_ex __pyx_mstate_global->__pyx_n_s_reduce_ex +#define __pyx_n_s_richcmp __pyx_mstate_global->__pyx_n_s_richcmp +#define __pyx_n_s_s __pyx_mstate_global->__pyx_n_s_s +#define __pyx_n_s_score __pyx_mstate_global->__pyx_n_s_score +#define __pyx_n_s_self __pyx_mstate_global->__pyx_n_s_self +#define __pyx_n_s_send __pyx_mstate_global->__pyx_n_s_send +#define __pyx_n_s_setstate __pyx_mstate_global->__pyx_n_s_setstate +#define __pyx_n_s_setstate_cython __pyx_mstate_global->__pyx_n_s_setstate_cython +#define __pyx_n_s_si __pyx_mstate_global->__pyx_n_s_si +#define __pyx_n_s_slots __pyx_mstate_global->__pyx_n_s_slots +#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec +#define __pyx_n_s_sseqid __pyx_mstate_global->__pyx_n_s_sseqid +#define __pyx_n_s_sstart __pyx_mstate_global->__pyx_n_s_sstart +#define __pyx_n_s_sstop __pyx_mstate_global->__pyx_n_s_sstop +#define __pyx_kp_s_stringsource __pyx_mstate_global->__pyx_kp_s_stringsource +#define __pyx_n_s_subject __pyx_mstate_global->__pyx_n_s_subject +#define __pyx_n_s_sys __pyx_mstate_global->__pyx_n_s_sys +#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test +#define __pyx_kp_s_that_comparison_not_implemented __pyx_mstate_global->__pyx_kp_s_that_comparison_not_implemented +#define __pyx_n_s_throw __pyx_mstate_global->__pyx_n_s_throw +#define __pyx_n_s_wrap __pyx_mstate_global->__pyx_n_s_wrap +#define __pyx_int_2 __pyx_mstate_global->__pyx_int_2 +#define __pyx_int_12 __pyx_mstate_global->__pyx_int_12 +#define __pyx_tuple_ __pyx_mstate_global->__pyx_tuple_ +#define __pyx_slice__4 __pyx_mstate_global->__pyx_slice__4 +#define __pyx_tuple__3 __pyx_mstate_global->__pyx_tuple__3 +#define __pyx_tuple__7 __pyx_mstate_global->__pyx_tuple__7 +#define __pyx_tuple__9 __pyx_mstate_global->__pyx_tuple__9 +#define __pyx_tuple__11 __pyx_mstate_global->__pyx_tuple__11 +#define __pyx_codeobj__2 __pyx_mstate_global->__pyx_codeobj__2 +#define __pyx_codeobj__8 __pyx_mstate_global->__pyx_codeobj__8 +#define __pyx_codeobj__10 __pyx_mstate_global->__pyx_codeobj__10 +#define __pyx_codeobj__12 __pyx_mstate_global->__pyx_codeobj__12 +/* #### Code section: module_code ### */ + +/* "cfunc.to_py":67 + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +PyDoc_STRVAR(__pyx_doc_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap, "wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'"); +static PyMethodDef __pyx_mdef_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap = {"wrap", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap}; +static PyObject *__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + char *__pyx_v_query; + char *__pyx_v_subject; + float __pyx_v_pctid; + int __pyx_v_hitlen; + int __pyx_v_nmismatch; + int __pyx_v_ngaps; + int __pyx_v_qstart; + int __pyx_v_qstop; + int __pyx_v_sstart; + int __pyx_v_sstop; + float __pyx_v_evalue; + float __pyx_v_score; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[12] = {0,0,0,0,0,0,0,0,0,0,0,0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("wrap (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_query,&__pyx_n_s_subject,&__pyx_n_s_pctid,&__pyx_n_s_hitlen,&__pyx_n_s_nmismatch,&__pyx_n_s_ngaps,&__pyx_n_s_qstart,&__pyx_n_s_qstop,&__pyx_n_s_sstart,&__pyx_n_s_sstop,&__pyx_n_s_evalue,&__pyx_n_s_score,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 12: values[11] = __Pyx_Arg_FASTCALL(__pyx_args, 11); + CYTHON_FALLTHROUGH; + case 11: values[10] = __Pyx_Arg_FASTCALL(__pyx_args, 10); + CYTHON_FALLTHROUGH; + case 10: values[9] = __Pyx_Arg_FASTCALL(__pyx_args, 9); + CYTHON_FALLTHROUGH; + case 9: values[8] = __Pyx_Arg_FASTCALL(__pyx_args, 8); + CYTHON_FALLTHROUGH; + case 8: values[7] = __Pyx_Arg_FASTCALL(__pyx_args, 7); + CYTHON_FALLTHROUGH; + case 7: values[6] = __Pyx_Arg_FASTCALL(__pyx_args, 6); + CYTHON_FALLTHROUGH; + case 6: values[5] = __Pyx_Arg_FASTCALL(__pyx_args, 5); + CYTHON_FALLTHROUGH; + case 5: values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4); + CYTHON_FALLTHROUGH; + case 4: values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3); + CYTHON_FALLTHROUGH; + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_query)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_subject)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[1]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 1); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (likely((values[2] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pctid)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[2]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 2); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 3: + if (likely((values[3] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_hitlen)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[3]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 3); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 4: + if (likely((values[4] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_nmismatch)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[4]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 4); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 5: + if (likely((values[5] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_ngaps)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[5]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 5); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 6: + if (likely((values[6] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_qstart)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[6]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 6); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 7: + if (likely((values[7] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_qstop)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[7]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 7); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 8: + if (likely((values[8] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_sstart)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[8]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 8); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 9: + if (likely((values[9] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_sstop)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[9]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 9); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 10: + if (likely((values[10] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_evalue)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[10]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 10); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 11: + if (likely((values[11] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_score)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[11]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 11); __PYX_ERR(1, 67, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "wrap") < 0)) __PYX_ERR(1, 67, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 12)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3); + values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4); + values[5] = __Pyx_Arg_FASTCALL(__pyx_args, 5); + values[6] = __Pyx_Arg_FASTCALL(__pyx_args, 6); + values[7] = __Pyx_Arg_FASTCALL(__pyx_args, 7); + values[8] = __Pyx_Arg_FASTCALL(__pyx_args, 8); + values[9] = __Pyx_Arg_FASTCALL(__pyx_args, 9); + values[10] = __Pyx_Arg_FASTCALL(__pyx_args, 10); + values[11] = __Pyx_Arg_FASTCALL(__pyx_args, 11); + } + __pyx_v_query = __Pyx_PyObject_AsWritableString(values[0]); if (unlikely((!__pyx_v_query) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_subject = __Pyx_PyObject_AsWritableString(values[1]); if (unlikely((!__pyx_v_subject) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_pctid = __pyx_PyFloat_AsFloat(values[2]); if (unlikely((__pyx_v_pctid == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_hitlen = __Pyx_PyInt_As_int(values[3]); if (unlikely((__pyx_v_hitlen == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_nmismatch = __Pyx_PyInt_As_int(values[4]); if (unlikely((__pyx_v_nmismatch == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_ngaps = __Pyx_PyInt_As_int(values[5]); if (unlikely((__pyx_v_ngaps == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_qstart = __Pyx_PyInt_As_int(values[6]); if (unlikely((__pyx_v_qstart == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_qstop = __Pyx_PyInt_As_int(values[7]); if (unlikely((__pyx_v_qstop == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_sstart = __Pyx_PyInt_As_int(values[8]); if (unlikely((__pyx_v_sstart == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_sstop = __Pyx_PyInt_As_int(values[9]); if (unlikely((__pyx_v_sstop == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_evalue = __pyx_PyFloat_AsFloat(values[10]); if (unlikely((__pyx_v_evalue == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_score = __pyx_PyFloat_AsFloat(values[11]); if (unlikely((__pyx_v_score == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, __pyx_nargs); __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc.wrap", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(__pyx_self, __pyx_v_query, __pyx_v_subject, __pyx_v_pctid, __pyx_v_hitlen, __pyx_v_nmismatch, __pyx_v_ngaps, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_score); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(PyObject *__pyx_self, char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score) { + struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_cur_scope; + struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_outer_scope; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("wrap", 1); + __pyx_outer_scope = (struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *) __Pyx_CyFunction_GetClosure(__pyx_self); + __pyx_cur_scope = __pyx_outer_scope; + + /* "cfunc.to_py":69 + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) # <<<<<<<<<<<<<< + * return wrap + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = ((PyObject *)__pyx_cur_scope->__pyx_v_f(__pyx_v_query, __pyx_v_subject, __pyx_v_pctid, __pyx_v_hitlen, __pyx_v_nmismatch, __pyx_v_ngaps, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_score)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 69, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "cfunc.to_py":67 + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc.wrap", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "cfunc.to_py":66 + * + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + */ + +static PyObject *__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*__pyx_v_f)(char *, char *, float, int, int, int, int, int, int, int, float, float)) { + struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_cur_scope; + PyObject *__pyx_v_wrap = 0; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", 0); + __pyx_cur_scope = (struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, __pyx_empty_tuple, NULL); + if (unlikely(!__pyx_cur_scope)) { + __pyx_cur_scope = ((struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)Py_None); + __Pyx_INCREF(Py_None); + __PYX_ERR(1, 66, __pyx_L1_error) + } else { + __Pyx_GOTREF((PyObject *)__pyx_cur_scope); + } + __pyx_cur_scope->__pyx_v_f = __pyx_v_f; + + /* "cfunc.to_py":67 + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) + */ + __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap, 0, __pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma, ((PyObject*)__pyx_cur_scope), __pyx_n_s_cfunc_to_py, __pyx_d, ((PyObject *)__pyx_codeobj__2)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 67, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_wrap = __pyx_t_1; + __pyx_t_1 = 0; + + /* "cfunc.to_py":70 + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) + * return wrap # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_wrap); + __pyx_r = __pyx_v_wrap; + goto __pyx_L0; + + /* "cfunc.to_py":66 + * + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_wrap); + __Pyx_DECREF((PyObject *)__pyx_cur_scope); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "carray.from_py":79 + * + * @cname("__Pyx_carray_from_py_char") + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: # <<<<<<<<<<<<<< + * cdef Py_ssize_t i = length + * try: + */ + +static int __Pyx_carray_from_py_char(PyObject *__pyx_v_o, char *__pyx_v_v, Py_ssize_t __pyx_v_length) { + Py_ssize_t __pyx_v_i; + PyObject *__pyx_v_item = NULL; + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + Py_ssize_t __pyx_t_4; + int __pyx_t_5; + int __pyx_t_6; + PyObject *__pyx_t_7 = NULL; + Py_ssize_t __pyx_t_8; + PyObject *(*__pyx_t_9)(PyObject *); + PyObject *__pyx_t_10 = NULL; + char __pyx_t_11; + char const *__pyx_t_12; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__Pyx_carray_from_py_char", 1); + + /* "carray.from_py":80 + * @cname("__Pyx_carray_from_py_char") + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: + * cdef Py_ssize_t i = length # <<<<<<<<<<<<<< + * try: + * i = len(o) + */ + __pyx_v_i = __pyx_v_length; + + /* "carray.from_py":81 + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: + * cdef Py_ssize_t i = length + * try: # <<<<<<<<<<<<<< + * i = len(o) + * except (TypeError, OverflowError): + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "carray.from_py":82 + * cdef Py_ssize_t i = length + * try: + * i = len(o) # <<<<<<<<<<<<<< + * except (TypeError, OverflowError): + * pass + */ + __pyx_t_4 = PyObject_Length(__pyx_v_o); if (unlikely(__pyx_t_4 == ((Py_ssize_t)-1))) __PYX_ERR(1, 82, __pyx_L3_error) + __pyx_v_i = __pyx_t_4; + + /* "carray.from_py":81 + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: + * cdef Py_ssize_t i = length + * try: # <<<<<<<<<<<<<< + * i = len(o) + * except (TypeError, OverflowError): + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "carray.from_py":83 + * try: + * i = len(o) + * except (TypeError, OverflowError): # <<<<<<<<<<<<<< + * pass + * if i == length: + */ + __pyx_t_5 = __Pyx_PyErr_ExceptionMatches2(__pyx_builtin_TypeError, __pyx_builtin_OverflowError); + if (__pyx_t_5) { + __Pyx_ErrRestore(0,0,0); + goto __pyx_L4_exception_handled; + } + goto __pyx_L5_except_error; + + /* "carray.from_py":81 + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: + * cdef Py_ssize_t i = length + * try: # <<<<<<<<<<<<<< + * i = len(o) + * except (TypeError, OverflowError): + */ + __pyx_L5_except_error:; + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L4_exception_handled:; + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + __pyx_L8_try_end:; + } + + /* "carray.from_py":85 + * except (TypeError, OverflowError): + * pass + * if i == length: # <<<<<<<<<<<<<< + * for i, item in enumerate(o): + * if i >= length: + */ + __pyx_t_6 = (__pyx_v_i == __pyx_v_length); + if (__pyx_t_6) { + + /* "carray.from_py":86 + * pass + * if i == length: + * for i, item in enumerate(o): # <<<<<<<<<<<<<< + * if i >= length: + * break + */ + __pyx_t_4 = 0; + if (likely(PyList_CheckExact(__pyx_v_o)) || PyTuple_CheckExact(__pyx_v_o)) { + __pyx_t_7 = __pyx_v_o; __Pyx_INCREF(__pyx_t_7); + __pyx_t_8 = 0; + __pyx_t_9 = NULL; + } else { + __pyx_t_8 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_v_o); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 86, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_9 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 86, __pyx_L1_error) + } + for (;;) { + if (likely(!__pyx_t_9)) { + if (likely(PyList_CheckExact(__pyx_t_7))) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_7); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(1, 86, __pyx_L1_error) + #endif + if (__pyx_t_8 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_10 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_8); __Pyx_INCREF(__pyx_t_10); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(1, 86, __pyx_L1_error) + #else + __pyx_t_10 = __Pyx_PySequence_ITEM(__pyx_t_7, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 86, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_10); + #endif + } else { + { + Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_7); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(1, 86, __pyx_L1_error) + #endif + if (__pyx_t_8 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_8); __Pyx_INCREF(__pyx_t_10); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(1, 86, __pyx_L1_error) + #else + __pyx_t_10 = __Pyx_PySequence_ITEM(__pyx_t_7, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 86, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_10); + #endif + } + } else { + __pyx_t_10 = __pyx_t_9(__pyx_t_7); + if (unlikely(!__pyx_t_10)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(1, 86, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_10); + } + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_10); + __pyx_t_10 = 0; + __pyx_v_i = __pyx_t_4; + __pyx_t_4 = (__pyx_t_4 + 1); + + /* "carray.from_py":87 + * if i == length: + * for i, item in enumerate(o): + * if i >= length: # <<<<<<<<<<<<<< + * break + * v[i] = item + */ + __pyx_t_6 = (__pyx_v_i >= __pyx_v_length); + if (__pyx_t_6) { + + /* "carray.from_py":88 + * for i, item in enumerate(o): + * if i >= length: + * break # <<<<<<<<<<<<<< + * v[i] = item + * else: + */ + goto __pyx_L11_break; + + /* "carray.from_py":87 + * if i == length: + * for i, item in enumerate(o): + * if i >= length: # <<<<<<<<<<<<<< + * break + * v[i] = item + */ + } + + /* "carray.from_py":89 + * if i >= length: + * break + * v[i] = item # <<<<<<<<<<<<<< + * else: + * i += 1 # convert index to length + */ + __pyx_t_11 = __Pyx_PyInt_As_char(__pyx_v_item); if (unlikely((__pyx_t_11 == (char)-1) && PyErr_Occurred())) __PYX_ERR(1, 89, __pyx_L1_error) + (__pyx_v_v[__pyx_v_i]) = __pyx_t_11; + + /* "carray.from_py":86 + * pass + * if i == length: + * for i, item in enumerate(o): # <<<<<<<<<<<<<< + * if i >= length: + * break + */ + } + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + goto __pyx_L13_for_else; + __pyx_L11_break:; + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + goto __pyx_L14_for_end; + /*else*/ { + __pyx_L13_for_else:; + + /* "carray.from_py":91 + * v[i] = item + * else: + * i += 1 # convert index to length # <<<<<<<<<<<<<< + * if i == length: + * return 0 + */ + __pyx_v_i = (__pyx_v_i + 1); + + /* "carray.from_py":92 + * else: + * i += 1 # convert index to length + * if i == length: # <<<<<<<<<<<<<< + * return 0 + * + */ + __pyx_t_6 = (__pyx_v_i == __pyx_v_length); + if (__pyx_t_6) { + + /* "carray.from_py":93 + * i += 1 # convert index to length + * if i == length: + * return 0 # <<<<<<<<<<<<<< + * + * PyErr_Format( + */ + __pyx_r = 0; + goto __pyx_L0; + + /* "carray.from_py":92 + * else: + * i += 1 # convert index to length + * if i == length: # <<<<<<<<<<<<<< + * return 0 + * + */ + } + } + __pyx_L14_for_end:; + + /* "carray.from_py":85 + * except (TypeError, OverflowError): + * pass + * if i == length: # <<<<<<<<<<<<<< + * for i, item in enumerate(o): + * if i >= length: + */ + } + + /* "carray.from_py":98 + * IndexError, + * ("too many values found during array assignment, expected %zd" + * if i >= length else # <<<<<<<<<<<<<< + * "not enough values found during array assignment, expected %zd, got %zd"), + * length, i) + */ + __pyx_t_6 = (__pyx_v_i >= __pyx_v_length); + if (__pyx_t_6) { + __pyx_t_12 = ((char const *)"too many values found during array assignment, expected %zd"); + } else { + __pyx_t_12 = ((char const *)"not enough values found during array assignment, expected %zd, got %zd"); + } + + /* "carray.from_py":95 + * return 0 + * + * PyErr_Format( # <<<<<<<<<<<<<< + * IndexError, + * ("too many values found during array assignment, expected %zd" + */ + __pyx_t_7 = PyErr_Format(__pyx_builtin_IndexError, __pyx_t_12, __pyx_v_length, __pyx_v_i); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 95, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + + /* "carray.from_py":79 + * + * @cname("__Pyx_carray_from_py_char") + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: # <<<<<<<<<<<<<< + * cdef Py_ssize_t i = length + * try: + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_10); + __Pyx_AddTraceback("carray.from_py.__Pyx_carray_from_py_char", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_item); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":26 + * object filename + * + * def __cinit__(self, char* filename): # <<<<<<<<<<<<<< + * self.fh = fopen(filename, 'r') + * self.filename = filename + */ + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + char *__pyx_v_filename; + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[1] = {0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1; + #endif + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_filename,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_filename)) != 0)) { + (void)__Pyx_Arg_NewRef_VARARGS(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 26, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__cinit__") < 0)) __PYX_ERR(0, 26, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 1)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); + } + __pyx_v_filename = __Pyx_PyObject_AsWritableString(values[0]); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) __PYX_ERR(0, 26, __pyx_L3_error) + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 26, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return -1; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self), __pyx_v_filename); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, char *__pyx_v_filename) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__cinit__", 1); + + /* "jcvi/formats/cblast.pyx":27 + * + * def __cinit__(self, char* filename): + * self.fh = fopen(filename, 'r') # <<<<<<<<<<<<<< + * self.filename = filename + * + */ + __pyx_v_self->fh = fopen(__pyx_v_filename, ((char const *)"r")); + + /* "jcvi/formats/cblast.pyx":28 + * def __cinit__(self, char* filename): + * self.fh = fopen(filename, 'r') + * self.filename = filename # <<<<<<<<<<<<<< + * + * def __iter__(self): + */ + __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 28, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + __Pyx_GOTREF(__pyx_v_self->filename); + __Pyx_DECREF(__pyx_v_self->filename); + __pyx_v_self->filename = __pyx_t_1; + __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":26 + * object filename + * + * def __cinit__(self, char* filename): # <<<<<<<<<<<<<< + * self.fh = fopen(filename, 'r') + * self.filename = filename + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":30 + * self.filename = filename + * + * def __iter__(self): # <<<<<<<<<<<<<< + * rewind(self.fh) + * return self + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__iter__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__iter__", 1); + + /* "jcvi/formats/cblast.pyx":31 + * + * def __iter__(self): + * rewind(self.fh) # <<<<<<<<<<<<<< + * return self + * + */ + rewind(__pyx_v_self->fh); + + /* "jcvi/formats/cblast.pyx":32 + * def __iter__(self): + * rewind(self.fh) + * return self # <<<<<<<<<<<<<< + * + * def __next__(self): + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF((PyObject *)__pyx_v_self); + __pyx_r = ((PyObject *)__pyx_v_self); + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":30 + * self.filename = filename + * + * def __iter__(self): # <<<<<<<<<<<<<< + * rewind(self.fh) + * return self + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":34 + * return self + * + * def __next__(self): # <<<<<<<<<<<<<< + * cdef: + * float pct = 0.0, evalue = 0.0, bit = 0.0 + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__next__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { + float __pyx_v_pct; + float __pyx_v_evalue; + float __pyx_v_bit; + char __pyx_v_qname[0x80]; + char __pyx_v_sname[0x80]; + int __pyx_v_hlen; + int __pyx_v_nmiss; + int __pyx_v_ngap; + int __pyx_v_qstart; + int __pyx_v_qstop; + int __pyx_v_sstart; + int __pyx_v_sstop; + int __pyx_v_success; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_error_without_exception = 0; /* StopIteration */ + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__next__", 1); + + /* "jcvi/formats/cblast.pyx":36 + * def __next__(self): + * cdef: + * float pct = 0.0, evalue = 0.0, bit = 0.0 # <<<<<<<<<<<<<< + * char qname[128] + * char sname[128] + */ + __pyx_v_pct = 0.0; + __pyx_v_evalue = 0.0; + __pyx_v_bit = 0.0; + + /* "jcvi/formats/cblast.pyx":43 + * int success + * + * success = fscanf(self.fh, blast_format_line, qname, sname, \ # <<<<<<<<<<<<<< + * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ + * &sstart, &sstop, &evalue, &bit ) + */ + __pyx_v_success = fscanf(__pyx_v_self->fh, __pyx_v_4jcvi_7formats_6cblast_blast_format_line, __pyx_v_qname, __pyx_v_sname, (&__pyx_v_pct), (&__pyx_v_hlen), (&__pyx_v_nmiss), (&__pyx_v_ngap), (&__pyx_v_qstart), (&__pyx_v_qstop), (&__pyx_v_sstart), (&__pyx_v_sstop), (&__pyx_v_evalue), (&__pyx_v_bit)); + + /* "jcvi/formats/cblast.pyx":46 + * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ + * &sstart, &sstop, &evalue, &bit ) + * if success == EOF: # <<<<<<<<<<<<<< + * raise StopIteration + * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, + */ + __pyx_t_1 = (__pyx_v_success == EOF); + if (unlikely(__pyx_t_1)) { + + /* "jcvi/formats/cblast.pyx":47 + * &sstart, &sstop, &evalue, &bit ) + * if success == EOF: + * raise StopIteration # <<<<<<<<<<<<<< + * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, + * qstart, qstop, sstart, sstop, evalue, bit) + */ + __pyx_error_without_exception = 1; + goto __pyx_L1_error;; + + /* "jcvi/formats/cblast.pyx":46 + * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ + * &sstart, &sstop, &evalue, &bit ) + * if success == EOF: # <<<<<<<<<<<<<< + * raise StopIteration + * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, + */ + } + + /* "jcvi/formats/cblast.pyx":48 + * if success == EOF: + * raise StopIteration + * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, # <<<<<<<<<<<<<< + * qstart, qstop, sstart, sstop, evalue, bit) + * + */ + __Pyx_XDECREF(__pyx_r); + + /* "jcvi/formats/cblast.pyx":49 + * raise StopIteration + * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, + * qstart, qstop, sstart, sstop, evalue, bit) # <<<<<<<<<<<<<< + * + * def __dealloc__(self): + */ + __pyx_t_2 = ((PyObject *)__pyx_f_4jcvi_7formats_6cblast_create_blast_line(__pyx_v_qname, __pyx_v_sname, __pyx_v_pct, __pyx_v_hlen, __pyx_v_nmiss, __pyx_v_ngap, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_bit)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 48, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":34 + * return self + * + * def __next__(self): # <<<<<<<<<<<<<< + * cdef: + * float pct = 0.0, evalue = 0.0, bit = 0.0 + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + if (!__pyx_error_without_exception) { + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__next__", __pyx_clineno, __pyx_lineno, __pyx_filename); + } + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":51 + * qstart, qstop, sstart, sstop, evalue, bit) + * + * def __dealloc__(self): # <<<<<<<<<<<<<< + * fclose(self.fh) + * + */ + +/* Python wrapper */ +static void __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(PyObject *__pyx_v_self); /*proto*/ +static void __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); +} + +static void __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { + + /* "jcvi/formats/cblast.pyx":52 + * + * def __dealloc__(self): + * fclose(self.fh) # <<<<<<<<<<<<<< + * + * def __repr__(self): + */ + (void)(fclose(__pyx_v_self->fh)); + + /* "jcvi/formats/cblast.pyx":51 + * qstart, qstop, sstart, sstop, evalue, bit) + * + * def __dealloc__(self): # <<<<<<<<<<<<<< + * fclose(self.fh) + * + */ + + /* function exit code */ +} + +/* "jcvi/formats/cblast.pyx":54 + * fclose(self.fh) + * + * def __repr__(self): # <<<<<<<<<<<<<< + * return "Blast('%s')" % (self.filename, ) + * + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__repr__", 1); + + /* "jcvi/formats/cblast.pyx":55 + * + * def __repr__(self): + * return "Blast('%s')" % (self.filename, ) # <<<<<<<<<<<<<< + * + * # Python 2 and 3 differ in str and unicode handling + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 55, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_INCREF(__pyx_v_self->filename); + __Pyx_GIVEREF(__pyx_v_self->filename); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_self->filename)) __PYX_ERR(0, 55, __pyx_L1_error); + __pyx_t_2 = __Pyx_PyString_Format(__pyx_kp_s_Blast_s, __pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 55, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":54 + * fclose(self.fh) + * + * def __repr__(self): # <<<<<<<<<<<<<< + * return "Blast('%s')" % (self.filename, ) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__ = {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + if (unlikely(__pyx_nargs > 0)) { + __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} + if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__reduce_cython__", 1); + + /* "(tree fragment)":2 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< + * def __setstate_cython__(self, __pyx_state): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); + __PYX_ERR(1, 2, __pyx_L1_error) + + /* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__ = {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[1] = {0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 1)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + } + __pyx_v___pyx_state = values[0]; + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self), __pyx_v___pyx_state); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__setstate_cython__", 1); + + /* "(tree fragment)":4 + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< + */ + __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); + __PYX_ERR(1, 4, __pyx_L1_error) + + /* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":59 + * # Python 2 and 3 differ in str and unicode handling + * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython + * cdef bytes c_str(str s): # <<<<<<<<<<<<<< + * return s.encode("UTF-8") + * + */ + +static PyObject *__pyx_f_4jcvi_7formats_6cblast_c_str(PyObject *__pyx_v_s) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("c_str", 1); + + /* "jcvi/formats/cblast.pyx":60 + * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython + * cdef bytes c_str(str s): + * return s.encode("UTF-8") # <<<<<<<<<<<<<< + * + * cdef str py_str(bytes s): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PyString_Type_encode, __pyx_v_s, __pyx_kp_s_UTF_8); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 60, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(PyBytes_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_t_1))) __PYX_ERR(0, 60, __pyx_L1_error) + __pyx_r = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":59 + * # Python 2 and 3 differ in str and unicode handling + * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython + * cdef bytes c_str(str s): # <<<<<<<<<<<<<< + * return s.encode("UTF-8") + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.c_str", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":62 + * return s.encode("UTF-8") + * + * cdef str py_str(bytes s): # <<<<<<<<<<<<<< + * return s.decode("UTF-8", "replace") + * + */ + +static PyObject *__pyx_f_4jcvi_7formats_6cblast_py_str(PyObject *__pyx_v_s) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("py_str", 1); + + /* "jcvi/formats/cblast.pyx":63 + * + * cdef str py_str(bytes s): + * return s.decode("UTF-8", "replace") # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + if (unlikely(__pyx_v_s == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "decode"); + __PYX_ERR(0, 63, __pyx_L1_error) + } + __pyx_t_1 = __Pyx_decode_bytes(__pyx_v_s, 0, PY_SSIZE_T_MAX, NULL, ((char const *)"replace"), PyUnicode_DecodeUTF8); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 63, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(PyString_CheckExact(__pyx_t_1)) || __Pyx_RaiseUnexpectedTypeError("str", __pyx_t_1))) __PYX_ERR(0, 63, __pyx_L1_error) + __pyx_r = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":62 + * return s.encode("UTF-8") + * + * cdef str py_str(bytes s): # <<<<<<<<<<<<<< + * return s.decode("UTF-8", "replace") + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.py_str", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":95 + * + * property query: + * def __get__(self): # <<<<<<<<<<<<<< + * return py_str(self._query) + * def __set__(self, val: str): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + + /* "jcvi/formats/cblast.pyx":96 + * property query: + * def __get__(self): + * return py_str(self._query) # <<<<<<<<<<<<<< + * def __set__(self, val: str): + * strcpy(self._query, c_str(val)) + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 96, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 96, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":95 + * + * property query: + * def __get__(self): # <<<<<<<<<<<<<< + * return py_str(self._query) + * def __set__(self, val: str): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.query.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":97 + * def __get__(self): + * return py_str(self._query) + * def __set__(self, val: str): # <<<<<<<<<<<<<< + * strcpy(self._query, c_str(val)) + * + */ + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_val), (&PyString_Type), 0, "val", 1))) __PYX_ERR(0, 97, __pyx_L1_error) + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject*)__pyx_v_val)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + char const *__pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__set__", 1); + + /* "jcvi/formats/cblast.pyx":98 + * return py_str(self._query) + * def __set__(self, val: str): + * strcpy(self._query, c_str(val)) # <<<<<<<<<<<<<< + * + * property subject: + */ + __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(__pyx_v_val); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 98, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (unlikely(__pyx_t_1 == Py_None)) { + PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); + __PYX_ERR(0, 98, __pyx_L1_error) + } + __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_t_1); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 98, __pyx_L1_error) + (void)(strcpy(__pyx_v_self->_query, __pyx_t_2)); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":97 + * def __get__(self): + * return py_str(self._query) + * def __set__(self, val: str): # <<<<<<<<<<<<<< + * strcpy(self._query, c_str(val)) + * + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.query.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":101 + * + * property subject: + * def __get__(self): # <<<<<<<<<<<<<< + * return py_str(self._subject) + * def __set__(self, val: str): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + + /* "jcvi/formats/cblast.pyx":102 + * property subject: + * def __get__(self): + * return py_str(self._subject) # <<<<<<<<<<<<<< + * def __set__(self, val: str): + * strcpy(self._subject, c_str(val)) + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":101 + * + * property subject: + * def __get__(self): # <<<<<<<<<<<<<< + * return py_str(self._subject) + * def __set__(self, val: str): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.subject.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":103 + * def __get__(self): + * return py_str(self._subject) + * def __set__(self, val: str): # <<<<<<<<<<<<<< + * strcpy(self._subject, c_str(val)) + * + */ + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_val), (&PyString_Type), 0, "val", 1))) __PYX_ERR(0, 103, __pyx_L1_error) + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject*)__pyx_v_val)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + char const *__pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__set__", 1); + + /* "jcvi/formats/cblast.pyx":104 + * return py_str(self._subject) + * def __set__(self, val: str): + * strcpy(self._subject, c_str(val)) # <<<<<<<<<<<<<< + * + * def __init__(self, s): + */ + __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(__pyx_v_val); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 104, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (unlikely(__pyx_t_1 == Py_None)) { + PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); + __PYX_ERR(0, 104, __pyx_L1_error) + } + __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_t_1); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 104, __pyx_L1_error) + (void)(strcpy(__pyx_v_self->_subject, __pyx_t_2)); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":103 + * def __get__(self): + * return py_str(self._subject) + * def __set__(self, val: str): # <<<<<<<<<<<<<< + * strcpy(self._subject, c_str(val)) + * + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.subject.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":106 + * strcpy(self._subject, c_str(val)) + * + * def __init__(self, s): # <<<<<<<<<<<<<< + * sline = c_str(s) + * sscanf(sline, blast_format, self._query, self._subject, + */ + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_s = 0; + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[1] = {0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__init__ (wrapper)", 0); + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1; + #endif + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_s,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_s)) != 0)) { + (void)__Pyx_Arg_NewRef_VARARGS(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 106, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__init__") < 0)) __PYX_ERR(0, 106, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 1)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); + } + __pyx_v_s = values[0]; + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 106, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return -1; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), __pyx_v_s); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_s) { + PyObject *__pyx_v_sline = NULL; + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + char const *__pyx_t_2; + int __pyx_t_3; + int __pyx_t_4; + int __pyx_t_5; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__init__", 1); + + /* "jcvi/formats/cblast.pyx":107 + * + * def __init__(self, s): + * sline = c_str(s) # <<<<<<<<<<<<<< + * sscanf(sline, blast_format, self._query, self._subject, + * &self.pctid, &self.hitlen, &self.nmismatch, &self.ngaps, + */ + if (!(likely(PyString_CheckExact(__pyx_v_s))||((__pyx_v_s) == Py_None) || __Pyx_RaiseUnexpectedTypeError("str", __pyx_v_s))) __PYX_ERR(0, 107, __pyx_L1_error) + __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(((PyObject*)__pyx_v_s)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 107, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_sline = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":108 + * def __init__(self, s): + * sline = c_str(s) + * sscanf(sline, blast_format, self._query, self._subject, # <<<<<<<<<<<<<< + * &self.pctid, &self.hitlen, &self.nmismatch, &self.ngaps, + * &self.qstart, &self.qstop, + */ + if (unlikely(__pyx_v_sline == Py_None)) { + PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); + __PYX_ERR(0, 108, __pyx_L1_error) + } + __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_v_sline); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 108, __pyx_L1_error) + + /* "jcvi/formats/cblast.pyx":112 + * &self.qstart, &self.qstop, + * &self.sstart, &self.sstop, + * &self.evalue, &self.score) # <<<<<<<<<<<<<< + * + * self.orientation = '+' + */ + (void)(sscanf(__pyx_t_2, __pyx_v_4jcvi_7formats_6cblast_blast_format, __pyx_v_self->_query, __pyx_v_self->_subject, (&__pyx_v_self->pctid), (&__pyx_v_self->hitlen), (&__pyx_v_self->nmismatch), (&__pyx_v_self->ngaps), (&__pyx_v_self->qstart), (&__pyx_v_self->qstop), (&__pyx_v_self->sstart), (&__pyx_v_self->sstop), (&__pyx_v_self->evalue), (&__pyx_v_self->score))); + + /* "jcvi/formats/cblast.pyx":114 + * &self.evalue, &self.score) + * + * self.orientation = '+' # <<<<<<<<<<<<<< + * if self.qstart > self.qstop: + * self.qstart, self.qstop = self.qstop, self.qstart + */ + __pyx_v_self->orientation = '+'; + + /* "jcvi/formats/cblast.pyx":115 + * + * self.orientation = '+' + * if self.qstart > self.qstop: # <<<<<<<<<<<<<< + * self.qstart, self.qstop = self.qstop, self.qstart + * self.orientation = '-' + */ + __pyx_t_3 = (__pyx_v_self->qstart > __pyx_v_self->qstop); + if (__pyx_t_3) { + + /* "jcvi/formats/cblast.pyx":116 + * self.orientation = '+' + * if self.qstart > self.qstop: + * self.qstart, self.qstop = self.qstop, self.qstart # <<<<<<<<<<<<<< + * self.orientation = '-' + * if self.sstart > self.sstop: + */ + __pyx_t_4 = __pyx_v_self->qstop; + __pyx_t_5 = __pyx_v_self->qstart; + __pyx_v_self->qstart = __pyx_t_4; + __pyx_v_self->qstop = __pyx_t_5; + + /* "jcvi/formats/cblast.pyx":117 + * if self.qstart > self.qstop: + * self.qstart, self.qstop = self.qstop, self.qstart + * self.orientation = '-' # <<<<<<<<<<<<<< + * if self.sstart > self.sstop: + * self.sstart, self.sstop = self.sstop, self.sstart + */ + __pyx_v_self->orientation = '-'; + + /* "jcvi/formats/cblast.pyx":115 + * + * self.orientation = '+' + * if self.qstart > self.qstop: # <<<<<<<<<<<<<< + * self.qstart, self.qstop = self.qstop, self.qstart + * self.orientation = '-' + */ + } + + /* "jcvi/formats/cblast.pyx":118 + * self.qstart, self.qstop = self.qstop, self.qstart + * self.orientation = '-' + * if self.sstart > self.sstop: # <<<<<<<<<<<<<< + * self.sstart, self.sstop = self.sstop, self.sstart + * self.orientation = '-' + */ + __pyx_t_3 = (__pyx_v_self->sstart > __pyx_v_self->sstop); + if (__pyx_t_3) { + + /* "jcvi/formats/cblast.pyx":119 + * self.orientation = '-' + * if self.sstart > self.sstop: + * self.sstart, self.sstop = self.sstop, self.sstart # <<<<<<<<<<<<<< + * self.orientation = '-' + * + */ + __pyx_t_5 = __pyx_v_self->sstop; + __pyx_t_4 = __pyx_v_self->sstart; + __pyx_v_self->sstart = __pyx_t_5; + __pyx_v_self->sstop = __pyx_t_4; + + /* "jcvi/formats/cblast.pyx":120 + * if self.sstart > self.sstop: + * self.sstart, self.sstop = self.sstop, self.sstart + * self.orientation = '-' # <<<<<<<<<<<<<< + * + * def __richcmp__(BlastLine self, BlastLine other, size_t op): + */ + __pyx_v_self->orientation = '-'; + + /* "jcvi/formats/cblast.pyx":118 + * self.qstart, self.qstop = self.qstop, self.qstart + * self.orientation = '-' + * if self.sstart > self.sstop: # <<<<<<<<<<<<<< + * self.sstart, self.sstop = self.sstop, self.sstart + * self.orientation = '-' + */ + } + + /* "jcvi/formats/cblast.pyx":106 + * strcpy(self._subject, c_str(val)) + * + * def __init__(self, s): # <<<<<<<<<<<<<< + * sline = c_str(s) + * sscanf(sline, blast_format, self._query, self._subject, + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_sline); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":122 + * self.orientation = '-' + * + * def __richcmp__(BlastLine self, BlastLine other, size_t op): # <<<<<<<<<<<<<< + * if op == 2: # == + * if self.query != other.query and self.qstart != other.qstart: + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__(PyObject *__pyx_v_self, PyObject *__pyx_v_other, int __pyx_arg_op); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__(PyObject *__pyx_v_self, PyObject *__pyx_v_other, int __pyx_arg_op) { + size_t __pyx_v_op; + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__richcmp__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_v_op = __pyx_arg_op; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_other), __pyx_ptype_4jcvi_7formats_6cblast_BlastLine, 1, "other", 0))) __PYX_ERR(0, 122, __pyx_L1_error) + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_other), ((size_t)__pyx_v_op)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_other, size_t __pyx_v_op) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + int __pyx_t_5; + PyObject *__pyx_t_6 = NULL; + unsigned int __pyx_t_7; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__richcmp__", 1); + + /* "jcvi/formats/cblast.pyx":123 + * + * def __richcmp__(BlastLine self, BlastLine other, size_t op): + * if op == 2: # == # <<<<<<<<<<<<<< + * if self.query != other.query and self.qstart != other.qstart: + * return False + */ + switch (__pyx_v_op) { + case 2: + + /* "jcvi/formats/cblast.pyx":124 + * def __richcmp__(BlastLine self, BlastLine other, size_t op): + * if op == 2: # == + * if self.query != other.query and self.qstart != other.qstart: # <<<<<<<<<<<<<< + * return False + * return self.subject == other.subject and \ + */ + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 124, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_other), __pyx_n_s_query); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 124, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_t_2, __pyx_t_3, Py_NE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 124, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 124, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_5) { + } else { + __pyx_t_1 = __pyx_t_5; + goto __pyx_L4_bool_binop_done; + } + __pyx_t_5 = (__pyx_v_self->qstart != __pyx_v_other->qstart); + __pyx_t_1 = __pyx_t_5; + __pyx_L4_bool_binop_done:; + if (__pyx_t_1) { + + /* "jcvi/formats/cblast.pyx":125 + * if op == 2: # == + * if self.query != other.query and self.qstart != other.qstart: + * return False # <<<<<<<<<<<<<< + * return self.subject == other.subject and \ + * self.qstop == other.qstop and \ + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(Py_False); + __pyx_r = Py_False; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":124 + * def __richcmp__(BlastLine self, BlastLine other, size_t op): + * if op == 2: # == + * if self.query != other.query and self.qstart != other.qstart: # <<<<<<<<<<<<<< + * return False + * return self.subject == other.subject and \ + */ + } + + /* "jcvi/formats/cblast.pyx":126 + * if self.query != other.query and self.qstart != other.qstart: + * return False + * return self.subject == other.subject and \ # <<<<<<<<<<<<<< + * self.qstop == other.qstop and \ + * self.sstop == other.sstop and \ + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 126, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_other), __pyx_n_s_subject); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 126, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_6 = PyObject_RichCompare(__pyx_t_3, __pyx_t_2, Py_EQ); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 126, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 126, __pyx_L1_error) + if (__pyx_t_1) { + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + } else { + __Pyx_INCREF(__pyx_t_6); + __pyx_t_4 = __pyx_t_6; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + goto __pyx_L6_bool_binop_done; + } + + /* "jcvi/formats/cblast.pyx":127 + * return False + * return self.subject == other.subject and \ + * self.qstop == other.qstop and \ # <<<<<<<<<<<<<< + * self.sstop == other.sstop and \ + * self.evalue == other.evalue and \ + */ + __pyx_t_1 = (__pyx_v_self->qstop == __pyx_v_other->qstop); + if (__pyx_t_1) { + } else { + __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 127, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_4 = __pyx_t_6; + __pyx_t_6 = 0; + goto __pyx_L6_bool_binop_done; + } + + /* "jcvi/formats/cblast.pyx":128 + * return self.subject == other.subject and \ + * self.qstop == other.qstop and \ + * self.sstop == other.sstop and \ # <<<<<<<<<<<<<< + * self.evalue == other.evalue and \ + * self.hitlen == other.hitlen + */ + __pyx_t_1 = (__pyx_v_self->sstop == __pyx_v_other->sstop); + if (__pyx_t_1) { + } else { + __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 128, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_4 = __pyx_t_6; + __pyx_t_6 = 0; + goto __pyx_L6_bool_binop_done; + } + + /* "jcvi/formats/cblast.pyx":129 + * self.qstop == other.qstop and \ + * self.sstop == other.sstop and \ + * self.evalue == other.evalue and \ # <<<<<<<<<<<<<< + * self.hitlen == other.hitlen + * + */ + __pyx_t_1 = (__pyx_v_self->evalue == __pyx_v_other->evalue); + if (__pyx_t_1) { + } else { + __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 129, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_4 = __pyx_t_6; + __pyx_t_6 = 0; + goto __pyx_L6_bool_binop_done; + } + + /* "jcvi/formats/cblast.pyx":130 + * self.sstop == other.sstop and \ + * self.evalue == other.evalue and \ + * self.hitlen == other.hitlen # <<<<<<<<<<<<<< + * + * elif op == 3: # != + */ + __pyx_t_1 = (__pyx_v_self->hitlen == __pyx_v_other->hitlen); + __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 130, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_4 = __pyx_t_6; + __pyx_t_6 = 0; + __pyx_L6_bool_binop_done:; + __pyx_r = __pyx_t_4; + __pyx_t_4 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":123 + * + * def __richcmp__(BlastLine self, BlastLine other, size_t op): + * if op == 2: # == # <<<<<<<<<<<<<< + * if self.query != other.query and self.qstart != other.qstart: + * return False + */ + break; + case 3: + + /* "jcvi/formats/cblast.pyx":133 + * + * elif op == 3: # != + * return not self.__richcmp__(other, 2) # <<<<<<<<<<<<<< + * else: + * raise Exception("that comparison not implemented") + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_richcmp); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_2 = NULL; + __pyx_t_7 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_6); + if (likely(__pyx_t_2)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); + __Pyx_INCREF(__pyx_t_2); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_6, function); + __pyx_t_7 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_2, ((PyObject *)__pyx_v_other), __pyx_int_2}; + __pyx_t_4 = __Pyx_PyObject_FastCall(__pyx_t_6, __pyx_callargs+1-__pyx_t_7, 2+__pyx_t_7); + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + } + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_PyBool_FromLong((!__pyx_t_1)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_r = __pyx_t_4; + __pyx_t_4 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":132 + * self.hitlen == other.hitlen + * + * elif op == 3: # != # <<<<<<<<<<<<<< + * return not self.__richcmp__(other, 2) + * else: + */ + break; + default: + + /* "jcvi/formats/cblast.pyx":135 + * return not self.__richcmp__(other, 2) + * else: + * raise Exception("that comparison not implemented") # <<<<<<<<<<<<<< + * + * def __hash__(self): + */ + __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])), __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 135, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_Raise(__pyx_t_4, 0, 0, 0); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __PYX_ERR(0, 135, __pyx_L1_error) + break; + } + + /* "jcvi/formats/cblast.pyx":122 + * self.orientation = '-' + * + * def __richcmp__(BlastLine self, BlastLine other, size_t op): # <<<<<<<<<<<<<< + * if op == 2: # == + * if self.query != other.query and self.qstart != other.qstart: + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__richcmp__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":137 + * raise Exception("that comparison not implemented") + * + * def __hash__(self): # <<<<<<<<<<<<<< + * return id(self) + * + */ + +/* Python wrapper */ +static Py_hash_t __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__(PyObject *__pyx_v_self); /*proto*/ +static Py_hash_t __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + Py_hash_t __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__hash__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static Py_hash_t __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + Py_hash_t __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + Py_hash_t __pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__hash__", 1); + + /* "jcvi/formats/cblast.pyx":138 + * + * def __hash__(self): + * return id(self) # <<<<<<<<<<<<<< + * + * def __repr__(self): + */ + __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_builtin_id, ((PyObject *)__pyx_v_self)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 138, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_AsHash_t(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_hash_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 138, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":137 + * raise Exception("that comparison not implemented") + * + * def __hash__(self): # <<<<<<<<<<<<<< + * return id(self) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__hash__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + if (unlikely(__pyx_r == -1) && !PyErr_Occurred()) __pyx_r = -2; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":140 + * return id(self) + * + * def __repr__(self): # <<<<<<<<<<<<<< + * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ + * (self.query, self.subject, self.evalue, self.score) + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__repr__", 1); + + /* "jcvi/formats/cblast.pyx":141 + * + * def __repr__(self): + * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ # <<<<<<<<<<<<<< + * (self.query, self.subject, self.evalue, self.score) + * + */ + __Pyx_XDECREF(__pyx_r); + + /* "jcvi/formats/cblast.pyx":142 + * def __repr__(self): + * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ + * (self.query, self.subject, self.evalue, self.score) # <<<<<<<<<<<<<< + * + * def __str__(self): + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = PyTuple_New(4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_1)) __PYX_ERR(0, 142, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_2)) __PYX_ERR(0, 142, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_3); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_3)) __PYX_ERR(0, 142, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_4); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_4)) __PYX_ERR(0, 142, __pyx_L1_error); + __pyx_t_1 = 0; + __pyx_t_2 = 0; + __pyx_t_3 = 0; + __pyx_t_4 = 0; + + /* "jcvi/formats/cblast.pyx":141 + * + * def __repr__(self): + * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ # <<<<<<<<<<<<<< + * (self.query, self.subject, self.evalue, self.score) + * + */ + __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1, __pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 141, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_r = __pyx_t_4; + __pyx_t_4 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":140 + * return id(self) + * + * def __repr__(self): # <<<<<<<<<<<<<< + * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ + * (self.query, self.subject, self.evalue, self.score) + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":144 + * (self.query, self.subject, self.evalue, self.score) + * + * def __str__(self): # <<<<<<<<<<<<<< + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * if self.orientation == '-': + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__str__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_v_args = NULL; + char __pyx_v_result[0x200]; + PyObject *__pyx_v_attr = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + Py_ssize_t __pyx_t_4; + PyObject *(*__pyx_t_5)(PyObject *); + int __pyx_t_6; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__str__", 1); + + /* "jcvi/formats/cblast.pyx":145 + * + * def __str__(self): + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + */ + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_n_s_slots); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetSlice(__pyx_t_2, 0, 12, NULL, NULL, &__pyx_slice__4, 0, 1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + if (likely(PyList_CheckExact(__pyx_t_3)) || PyTuple_CheckExact(__pyx_t_3)) { + __pyx_t_2 = __pyx_t_3; __Pyx_INCREF(__pyx_t_2); + __pyx_t_4 = 0; + __pyx_t_5 = NULL; + } else { + __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_5 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 145, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + for (;;) { + if (likely(!__pyx_t_5)) { + if (likely(PyList_CheckExact(__pyx_t_2))) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 145, __pyx_L1_error) + #endif + if (__pyx_t_4 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 145, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } else { + { + Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 145, __pyx_L1_error) + #endif + if (__pyx_t_4 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 145, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } + } else { + __pyx_t_3 = __pyx_t_5(__pyx_t_2); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 145, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_attr, __pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_GetAttr(((PyObject *)__pyx_v_self), __pyx_v_attr); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_3))) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_v_args = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":146 + * def __str__(self): + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * if self.orientation == '-': # <<<<<<<<<<<<<< + * args[8], args[9] = args[9], args[8] + * + */ + __pyx_t_6 = (__pyx_v_self->orientation == '-'); + if (__pyx_t_6) { + + /* "jcvi/formats/cblast.pyx":147 + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] # <<<<<<<<<<<<<< + * + * cdef char result[512] + */ + __pyx_t_1 = PyList_GET_ITEM(__pyx_v_args, 9); + __Pyx_INCREF(__pyx_t_1); + __pyx_t_2 = PyList_GET_ITEM(__pyx_v_args, 8); + __Pyx_INCREF(__pyx_t_2); + if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 8, __pyx_t_1, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 147, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 9, __pyx_t_2, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 147, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/formats/cblast.pyx":146 + * def __str__(self): + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * if self.orientation == '-': # <<<<<<<<<<<<<< + * args[8], args[9] = args[9], args[8] + * + */ + } + + /* "jcvi/formats/cblast.pyx":150 + * + * cdef char result[512] + * sprintf(result, blast_output, self._query, self._subject, # <<<<<<<<<<<<<< + * self.pctid, self.hitlen, self.nmismatch, self.ngaps, + * self.qstart, self.qstop, + */ + (void)(sprintf(__pyx_v_result, __pyx_v_4jcvi_7formats_6cblast_blast_output, __pyx_v_self->_query, __pyx_v_self->_subject, __pyx_v_self->pctid, __pyx_v_self->hitlen, __pyx_v_self->nmismatch, __pyx_v_self->ngaps, __pyx_v_self->qstart, __pyx_v_self->qstop, __pyx_v_self->sstart, __pyx_v_self->sstop, __pyx_v_self->evalue, __pyx_v_self->score)); + + /* "jcvi/formats/cblast.pyx":156 + * self.evalue, self.score) + * + * return py_str(result) # <<<<<<<<<<<<<< + * + * @property + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_2 = __Pyx_PyObject_FromString(__pyx_v_result); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 156, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_2)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 156, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":144 + * (self.query, self.subject, self.evalue, self.score) + * + * def __str__(self): # <<<<<<<<<<<<<< + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * if self.orientation == '-': + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__str__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_args); + __Pyx_XDECREF(__pyx_v_attr); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":158 + * return py_str(result) + * + * @property # <<<<<<<<<<<<<< + * def has_score(self): + * return hasattr(self, "score") + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + + /* "jcvi/formats/cblast.pyx":160 + * @property + * def has_score(self): + * return hasattr(self, "score") # <<<<<<<<<<<<<< + * + * @property + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_HasAttr(((PyObject *)__pyx_v_self), __pyx_n_s_score); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(0, 160, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 160, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":158 + * return py_str(result) + * + * @property # <<<<<<<<<<<<<< + * def has_score(self): + * return hasattr(self, "score") + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.has_score.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":162 + * return hasattr(self, "score") + * + * @property # <<<<<<<<<<<<<< + * def swapped(self): + * """ + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} +static PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value); /* proto */ + +/* "jcvi/formats/cblast.pyx":172 + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< + * return BlastLine(b) + * + */ + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_genexpr_arg_0) { + struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_cur_scope; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("genexpr", 0); + __pyx_cur_scope = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, __pyx_empty_tuple, NULL); + if (unlikely(!__pyx_cur_scope)) { + __pyx_cur_scope = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)Py_None); + __Pyx_INCREF(Py_None); + __PYX_ERR(0, 172, __pyx_L1_error) + } else { + __Pyx_GOTREF((PyObject *)__pyx_cur_scope); + } + __pyx_cur_scope->__pyx_genexpr_arg_0 = __pyx_genexpr_arg_0; + __Pyx_INCREF(__pyx_cur_scope->__pyx_genexpr_arg_0); + __Pyx_GIVEREF(__pyx_cur_scope->__pyx_genexpr_arg_0); + { + __pyx_CoroutineObject *gen = __Pyx_Generator_New((__pyx_coroutine_body_t) __pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator, NULL, (PyObject *) __pyx_cur_scope, __pyx_n_s_genexpr, __pyx_n_s_BlastLine___get___locals_genexpr, __pyx_n_s_jcvi_formats_cblast); if (unlikely(!gen)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_DECREF(__pyx_cur_scope); + __Pyx_RefNannyFinishContext(); + return (PyObject *) gen; + } + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.swapped.__get__.genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __Pyx_DECREF((PyObject *)__pyx_cur_scope); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value) /* generator body */ +{ + struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_cur_scope = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)__pyx_generator->closure); + PyObject *__pyx_r = NULL; + PyObject *__pyx_t_1 = NULL; + Py_ssize_t __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("genexpr", 0); + switch (__pyx_generator->resume_label) { + case 0: goto __pyx_L3_first_run; + case 1: goto __pyx_L6_resume_from_yield; + default: /* CPython raises the right error here */ + __Pyx_RefNannyFinishContext(); + return NULL; + } + __pyx_L3_first_run:; + if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 172, __pyx_L1_error) + if (unlikely(!__pyx_cur_scope->__pyx_genexpr_arg_0)) { __Pyx_RaiseUnboundLocalError(".0"); __PYX_ERR(0, 172, __pyx_L1_error) } + __pyx_t_1 = __pyx_cur_scope->__pyx_genexpr_arg_0; __Pyx_INCREF(__pyx_t_1); + __pyx_t_2 = 0; + for (;;) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 172, __pyx_L1_error) + #endif + if (__pyx_t_2 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely((0 < 0))) __PYX_ERR(0, 172, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_x); + __Pyx_XDECREF_SET(__pyx_cur_scope->__pyx_v_x, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Str(__pyx_cur_scope->__pyx_v_x); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_r = __pyx_t_3; + __pyx_t_3 = 0; + __Pyx_XGIVEREF(__pyx_t_1); + __pyx_cur_scope->__pyx_t_0 = __pyx_t_1; + __pyx_cur_scope->__pyx_t_1 = __pyx_t_2; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + __Pyx_Coroutine_ResetAndClearException(__pyx_generator); + /* return from generator, yielding value */ + __pyx_generator->resume_label = 1; + return __pyx_r; + __pyx_L6_resume_from_yield:; + __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; + __pyx_cur_scope->__pyx_t_0 = 0; + __Pyx_XGOTREF(__pyx_t_1); + __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; + if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 172, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + CYTHON_MAYBE_UNUSED_VAR(__pyx_cur_scope); + + /* function exit code */ + PyErr_SetNone(PyExc_StopIteration); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_AddTraceback("genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_L0:; + __Pyx_XDECREF(__pyx_r); __pyx_r = 0; + #if !CYTHON_USE_EXC_INFO_STACK + __Pyx_Coroutine_ResetAndClearException(__pyx_generator); + #endif + __pyx_generator->resume_label = -1; + __Pyx_Coroutine_clear((PyObject*)__pyx_generator); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":162 + * return hasattr(self, "score") + * + * @property # <<<<<<<<<<<<<< + * def swapped(self): + * """ + */ + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_v_args = NULL; + PyObject *__pyx_v_b = NULL; + PyObject *__pyx_v_attr = NULL; + PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator = 0; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + Py_ssize_t __pyx_t_4; + PyObject *(*__pyx_t_5)(PyObject *); + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + int __pyx_t_8; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + + /* "jcvi/formats/cblast.pyx":167 + * Swap query and subject. + * """ + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< + * args[0:2] = [self.subject, self.query] + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + */ + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_n_s_slots); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetSlice(__pyx_t_2, 0, 12, NULL, NULL, &__pyx_slice__4, 0, 1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + if (likely(PyList_CheckExact(__pyx_t_3)) || PyTuple_CheckExact(__pyx_t_3)) { + __pyx_t_2 = __pyx_t_3; __Pyx_INCREF(__pyx_t_2); + __pyx_t_4 = 0; + __pyx_t_5 = NULL; + } else { + __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_5 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 167, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + for (;;) { + if (likely(!__pyx_t_5)) { + if (likely(PyList_CheckExact(__pyx_t_2))) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 167, __pyx_L1_error) + #endif + if (__pyx_t_4 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 167, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } else { + { + Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 167, __pyx_L1_error) + #endif + if (__pyx_t_4 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 167, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } + } else { + __pyx_t_3 = __pyx_t_5(__pyx_t_2); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 167, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_attr, __pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_GetAttr(((PyObject *)__pyx_v_self), __pyx_v_attr); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_3))) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_v_args = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":168 + * """ + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * args[0:2] = [self.subject, self.query] # <<<<<<<<<<<<<< + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + * if self.orientation == '-': + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = PyList_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 0, __pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 1, __pyx_t_2)) __PYX_ERR(0, 168, __pyx_L1_error); + __pyx_t_1 = 0; + __pyx_t_2 = 0; + if (__Pyx_PyObject_SetSlice(__pyx_v_args, __pyx_t_3, 0, 2, NULL, NULL, NULL, 1, 1, 0) < 0) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "jcvi/formats/cblast.pyx":169 + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * args[0:2] = [self.subject, self.query] + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] # <<<<<<<<<<<<<< + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + */ + __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_7 = PyList_New(4); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __Pyx_GIVEREF(__pyx_t_3); + if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 0, __pyx_t_3)) __PYX_ERR(0, 169, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 1, __pyx_t_2)) __PYX_ERR(0, 169, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 2, __pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 3, __pyx_t_6)) __PYX_ERR(0, 169, __pyx_L1_error); + __pyx_t_3 = 0; + __pyx_t_2 = 0; + __pyx_t_1 = 0; + __pyx_t_6 = 0; + if (__Pyx_PyObject_SetSlice(__pyx_v_args, __pyx_t_7, 6, 10, NULL, NULL, NULL, 1, 1, 0) < 0) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + + /* "jcvi/formats/cblast.pyx":170 + * args[0:2] = [self.subject, self.query] + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + * if self.orientation == '-': # <<<<<<<<<<<<<< + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) + */ + __pyx_t_8 = (__pyx_v_self->orientation == '-'); + if (__pyx_t_8) { + + /* "jcvi/formats/cblast.pyx":171 + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] # <<<<<<<<<<<<<< + * b = "\t".join(str(x) for x in args) + * return BlastLine(b) + */ + __pyx_t_7 = PyList_GET_ITEM(__pyx_v_args, 9); + __Pyx_INCREF(__pyx_t_7); + __pyx_t_6 = PyList_GET_ITEM(__pyx_v_args, 8); + __Pyx_INCREF(__pyx_t_6); + if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 8, __pyx_t_7, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 171, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 9, __pyx_t_6, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 171, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + + /* "jcvi/formats/cblast.pyx":170 + * args[0:2] = [self.subject, self.query] + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + * if self.orientation == '-': # <<<<<<<<<<<<<< + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) + */ + } + + /* "jcvi/formats/cblast.pyx":172 + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< + * return BlastLine(b) + * + */ + __pyx_t_6 = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(NULL, __pyx_v_args); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_7 = __Pyx_PyString_Join(__pyx_kp_s__5, __pyx_t_6); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_v_b = ((PyObject*)__pyx_t_7); + __pyx_t_7 = 0; + + /* "jcvi/formats/cblast.pyx":173 + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) + * return BlastLine(b) # <<<<<<<<<<<<<< + * + * @property + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_7 = __Pyx_PyObject_CallOneArg(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_v_b); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 173, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __pyx_r = __pyx_t_7; + __pyx_t_7 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":162 + * return hasattr(self, "score") + * + * @property # <<<<<<<<<<<<<< + * def swapped(self): + * """ + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.swapped.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_args); + __Pyx_XDECREF(__pyx_v_b); + __Pyx_XDECREF(__pyx_v_attr); + __Pyx_XDECREF(__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":175 + * return BlastLine(b) + * + * @property # <<<<<<<<<<<<<< + * def bedline(self): + * cdef char result[512] + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + char __pyx_v_result[0x200]; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + + /* "jcvi/formats/cblast.pyx":178 + * def bedline(self): + * cdef char result[512] + * sprintf(result, bed_output, # <<<<<<<<<<<<<< + * self._subject, self.sstart - 1, self.sstop, + * self._query, self.qstart, self.qstop, + */ + (void)(sprintf(__pyx_v_result, __pyx_v_4jcvi_7formats_6cblast_bed_output, __pyx_v_self->_subject, (__pyx_v_self->sstart - 1), __pyx_v_self->sstop, __pyx_v_self->_query, __pyx_v_self->qstart, __pyx_v_self->qstop, __pyx_v_self->score, __pyx_v_self->orientation)); + + /* "jcvi/formats/cblast.pyx":183 + * self.score, self.orientation) + * + * return py_str(result) # <<<<<<<<<<<<<< + * + * def __reduce__(self): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_result); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 183, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 183, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":175 + * return BlastLine(b) + * + * @property # <<<<<<<<<<<<<< + * def bedline(self): + * cdef char result[512] + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.bedline.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":185 + * return py_str(result) + * + * def __reduce__(self): # <<<<<<<<<<<<<< + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_9BlastLine_11__reduce__ = {"__reduce__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__reduce__ (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + if (unlikely(__pyx_nargs > 0)) { + __Pyx_RaiseArgtupleInvalid("__reduce__", 1, 0, 0, __pyx_nargs); return NULL;} + if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce__", 0))) return NULL; + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + PyObject *__pyx_t_9 = NULL; + PyObject *__pyx_t_10 = NULL; + PyObject *__pyx_t_11 = NULL; + PyObject *__pyx_t_12 = NULL; + PyObject *__pyx_t_13 = NULL; + PyObject *__pyx_t_14 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__reduce__", 1); + + /* "jcvi/formats/cblast.pyx":186 + * + * def __reduce__(self): + * return create_blast_line, ( # <<<<<<<<<<<<<< + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(__pyx_f_4jcvi_7formats_6cblast_create_blast_line); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 186, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + + /* "jcvi/formats/cblast.pyx":187 + * def __reduce__(self): + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, # <<<<<<<<<<<<<< + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + * self.evalue, self.score) + */ + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyFloat_FromDouble(__pyx_v_self->pctid); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_self->hitlen); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_self->nmismatch); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + + /* "jcvi/formats/cblast.pyx":188 + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, # <<<<<<<<<<<<<< + * self.evalue, self.score) + * + */ + __pyx_t_7 = __Pyx_PyInt_From_int(__pyx_v_self->ngaps); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_9 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_11 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_11); + + /* "jcvi/formats/cblast.pyx":189 + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + * self.evalue, self.score) # <<<<<<<<<<<<<< + * + * + */ + __pyx_t_12 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_12); + __pyx_t_13 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + + /* "jcvi/formats/cblast.pyx":187 + * def __reduce__(self): + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, # <<<<<<<<<<<<<< + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + * self.evalue, self.score) + */ + __pyx_t_14 = PyTuple_New(12); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_2)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_3); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 1, __pyx_t_3)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_4); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 2, __pyx_t_4)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_5); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 3, __pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 4, __pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_7); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 5, __pyx_t_7)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_8); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 6, __pyx_t_8)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_9); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 7, __pyx_t_9)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_10); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 8, __pyx_t_10)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_11); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 9, __pyx_t_11)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_12); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 10, __pyx_t_12)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_13); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 11, __pyx_t_13)) __PYX_ERR(0, 187, __pyx_L1_error); + __pyx_t_2 = 0; + __pyx_t_3 = 0; + __pyx_t_4 = 0; + __pyx_t_5 = 0; + __pyx_t_6 = 0; + __pyx_t_7 = 0; + __pyx_t_8 = 0; + __pyx_t_9 = 0; + __pyx_t_10 = 0; + __pyx_t_11 = 0; + __pyx_t_12 = 0; + __pyx_t_13 = 0; + + /* "jcvi/formats/cblast.pyx":186 + * + * def __reduce__(self): + * return create_blast_line, ( # <<<<<<<<<<<<<< + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + */ + __pyx_t_13 = PyTuple_New(2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 186, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1)) __PYX_ERR(0, 186, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_14); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_t_14)) __PYX_ERR(0, 186, __pyx_L1_error); + __pyx_t_1 = 0; + __pyx_t_14 = 0; + __pyx_r = __pyx_t_13; + __pyx_t_13 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":185 + * return py_str(result) + * + * def __reduce__(self): # <<<<<<<<<<<<<< + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_XDECREF(__pyx_t_9); + __Pyx_XDECREF(__pyx_t_10); + __Pyx_XDECREF(__pyx_t_11); + __Pyx_XDECREF(__pyx_t_12); + __Pyx_XDECREF(__pyx_t_13); + __Pyx_XDECREF(__pyx_t_14); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__reduce__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":85 + * + * cdef public: + * char _query[128] # <<<<<<<<<<<<<< + * char _subject[128] + * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 85, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._query.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + char __pyx_t_1[0x80]; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + if (unlikely((__Pyx_carray_from_py_char(__pyx_v_value, __pyx_t_1, 0x80) < 0))) __PYX_ERR(0, 85, __pyx_L1_error) + if (unlikely((0x80) != (0x80))) { + PyErr_Format(PyExc_ValueError, "Assignment to slice of wrong length, expected %" CYTHON_FORMAT_SSIZE_T "d, got %" CYTHON_FORMAT_SSIZE_T "d", (Py_ssize_t)(0x80), (Py_ssize_t)(0x80)); + __PYX_ERR(0, 85, __pyx_L1_error) + } + memcpy(&(__pyx_v_self->_query[0]), __pyx_t_1, sizeof(__pyx_v_self->_query[0]) * (0x80)); + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._query.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":86 + * cdef public: + * char _query[128] + * char _subject[128] # <<<<<<<<<<<<<< + * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop + * float pctid, score + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 86, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._subject.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + char __pyx_t_1[0x80]; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + if (unlikely((__Pyx_carray_from_py_char(__pyx_v_value, __pyx_t_1, 0x80) < 0))) __PYX_ERR(0, 86, __pyx_L1_error) + if (unlikely((0x80) != (0x80))) { + PyErr_Format(PyExc_ValueError, "Assignment to slice of wrong length, expected %" CYTHON_FORMAT_SSIZE_T "d, got %" CYTHON_FORMAT_SSIZE_T "d", (Py_ssize_t)(0x80), (Py_ssize_t)(0x80)); + __PYX_ERR(0, 86, __pyx_L1_error) + } + memcpy(&(__pyx_v_self->_subject[0]), __pyx_t_1, sizeof(__pyx_v_self->_subject[0]) * (0x80)); + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._subject.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":87 + * char _query[128] + * char _subject[128] + * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop # <<<<<<<<<<<<<< + * float pctid, score + * double evalue + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->hitlen); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.hitlen.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->hitlen = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.hitlen.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->nmismatch); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.nmismatch.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->nmismatch = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.nmismatch.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->ngaps); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.ngaps.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->ngaps = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.ngaps.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstart.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->qstart = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstart.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstop.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->qstop = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstop.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstart.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->sstart = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstart.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstop.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->sstop = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstop.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":88 + * char _subject[128] + * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop + * float pctid, score # <<<<<<<<<<<<<< + * double evalue + * object qseqid, sseqid + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->pctid); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 88, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.pctid.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + float __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __pyx_PyFloat_AsFloat(__pyx_v_value); if (unlikely((__pyx_t_1 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 88, __pyx_L1_error) + __pyx_v_self->pctid = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.pctid.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 88, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.score.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + float __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __pyx_PyFloat_AsFloat(__pyx_v_value); if (unlikely((__pyx_t_1 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 88, __pyx_L1_error) + __pyx_v_self->score = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.score.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":89 + * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop + * float pctid, score + * double evalue # <<<<<<<<<<<<<< + * object qseqid, sseqid + * int qi, si + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 89, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.evalue.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + double __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __pyx_PyFloat_AsDouble(__pyx_v_value); if (unlikely((__pyx_t_1 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 89, __pyx_L1_error) + __pyx_v_self->evalue = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.evalue.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":90 + * float pctid, score + * double evalue + * object qseqid, sseqid # <<<<<<<<<<<<<< + * int qi, si + * char orientation + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_self->qseqid); + __pyx_r = __pyx_v_self->qseqid; + goto __pyx_L0; + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__", 1); + __Pyx_INCREF(__pyx_v_value); + __Pyx_GIVEREF(__pyx_v_value); + __Pyx_GOTREF(__pyx_v_self->qseqid); + __Pyx_DECREF(__pyx_v_self->qseqid); + __pyx_v_self->qseqid = __pyx_v_value; + + /* function exit code */ + __pyx_r = 0; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(PyObject *__pyx_v_self); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__del__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__del__", 1); + __Pyx_INCREF(Py_None); + __Pyx_GIVEREF(Py_None); + __Pyx_GOTREF(__pyx_v_self->qseqid); + __Pyx_DECREF(__pyx_v_self->qseqid); + __pyx_v_self->qseqid = Py_None; + + /* function exit code */ + __pyx_r = 0; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_self->sseqid); + __pyx_r = __pyx_v_self->sseqid; + goto __pyx_L0; + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__", 1); + __Pyx_INCREF(__pyx_v_value); + __Pyx_GIVEREF(__pyx_v_value); + __Pyx_GOTREF(__pyx_v_self->sseqid); + __Pyx_DECREF(__pyx_v_self->sseqid); + __pyx_v_self->sseqid = __pyx_v_value; + + /* function exit code */ + __pyx_r = 0; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(PyObject *__pyx_v_self); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__del__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__del__", 1); + __Pyx_INCREF(Py_None); + __Pyx_GIVEREF(Py_None); + __Pyx_GOTREF(__pyx_v_self->sseqid); + __Pyx_DECREF(__pyx_v_self->sseqid); + __pyx_v_self->sseqid = Py_None; + + /* function exit code */ + __pyx_r = 0; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":91 + * double evalue + * object qseqid, sseqid + * int qi, si # <<<<<<<<<<<<<< + * char orientation + * + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 91, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qi.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 91, __pyx_L1_error) + __pyx_v_self->qi = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qi.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->si); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 91, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.si.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 91, __pyx_L1_error) + __pyx_v_self->si = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.si.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":92 + * object qseqid, sseqid + * int qi, si + * char orientation # <<<<<<<<<<<<<< + * + * property query: + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_char(__pyx_v_self->orientation); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 92, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.orientation.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + char __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_char(__pyx_v_value); if (unlikely((__pyx_t_1 == (char)-1) && PyErr_Occurred())) __PYX_ERR(0, 92, __pyx_L1_error) + __pyx_v_self->orientation = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.orientation.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":192 + * + * + * cdef BlastLine create_blast_line(char *query, char *subject, float pctid, int hitlen, # <<<<<<<<<<<<<< + * int nmismatch, int ngaps, int qstart, int qstop, + * int sstart, int sstop, float evalue, float score): + */ + +static struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_f_4jcvi_7formats_6cblast_create_blast_line(char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score) { + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_b = 0; + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("create_blast_line", 1); + + /* "jcvi/formats/cblast.pyx":197 + * """ Factory method. + * """ + * cdef BlastLine b = BlastLine.__new__(BlastLine) # <<<<<<<<<<<<<< + * b.query = query + * b.subject = subject + */ + __pyx_t_1 = ((PyObject *)__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(((PyTypeObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_empty_tuple, NULL)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF((PyObject *)__pyx_t_1); + __pyx_v_b = ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":198 + * """ + * cdef BlastLine b = BlastLine.__new__(BlastLine) + * b.query = query # <<<<<<<<<<<<<< + * b.subject = subject + * b.pctid = pctid + */ + __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 198, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (__Pyx_PyObject_SetAttrStr(((PyObject *)__pyx_v_b), __pyx_n_s_query, __pyx_t_1) < 0) __PYX_ERR(0, 198, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":199 + * cdef BlastLine b = BlastLine.__new__(BlastLine) + * b.query = query + * b.subject = subject # <<<<<<<<<<<<<< + * b.pctid = pctid + * b.hitlen = hitlen + */ + __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 199, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (__Pyx_PyObject_SetAttrStr(((PyObject *)__pyx_v_b), __pyx_n_s_subject, __pyx_t_1) < 0) __PYX_ERR(0, 199, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":200 + * b.query = query + * b.subject = subject + * b.pctid = pctid # <<<<<<<<<<<<<< + * b.hitlen = hitlen + * b.nmismatch = nmismatch + */ + __pyx_v_b->pctid = __pyx_v_pctid; + + /* "jcvi/formats/cblast.pyx":201 + * b.subject = subject + * b.pctid = pctid + * b.hitlen = hitlen # <<<<<<<<<<<<<< + * b.nmismatch = nmismatch + * b.ngaps = ngaps + */ + __pyx_v_b->hitlen = __pyx_v_hitlen; + + /* "jcvi/formats/cblast.pyx":202 + * b.pctid = pctid + * b.hitlen = hitlen + * b.nmismatch = nmismatch # <<<<<<<<<<<<<< + * b.ngaps = ngaps + * b.qstart = qstart + */ + __pyx_v_b->nmismatch = __pyx_v_nmismatch; + + /* "jcvi/formats/cblast.pyx":203 + * b.hitlen = hitlen + * b.nmismatch = nmismatch + * b.ngaps = ngaps # <<<<<<<<<<<<<< + * b.qstart = qstart + * b.qstop = qstop + */ + __pyx_v_b->ngaps = __pyx_v_ngaps; + + /* "jcvi/formats/cblast.pyx":204 + * b.nmismatch = nmismatch + * b.ngaps = ngaps + * b.qstart = qstart # <<<<<<<<<<<<<< + * b.qstop = qstop + * b.sstart = sstart + */ + __pyx_v_b->qstart = __pyx_v_qstart; + + /* "jcvi/formats/cblast.pyx":205 + * b.ngaps = ngaps + * b.qstart = qstart + * b.qstop = qstop # <<<<<<<<<<<<<< + * b.sstart = sstart + * b.sstop = sstop + */ + __pyx_v_b->qstop = __pyx_v_qstop; + + /* "jcvi/formats/cblast.pyx":206 + * b.qstart = qstart + * b.qstop = qstop + * b.sstart = sstart # <<<<<<<<<<<<<< + * b.sstop = sstop + * b.evalue = evalue + */ + __pyx_v_b->sstart = __pyx_v_sstart; + + /* "jcvi/formats/cblast.pyx":207 + * b.qstop = qstop + * b.sstart = sstart + * b.sstop = sstop # <<<<<<<<<<<<<< + * b.evalue = evalue + * b.score = score + */ + __pyx_v_b->sstop = __pyx_v_sstop; + + /* "jcvi/formats/cblast.pyx":208 + * b.sstart = sstart + * b.sstop = sstop + * b.evalue = evalue # <<<<<<<<<<<<<< + * b.score = score + * return b + */ + __pyx_v_b->evalue = __pyx_v_evalue; + + /* "jcvi/formats/cblast.pyx":209 + * b.sstop = sstop + * b.evalue = evalue + * b.score = score # <<<<<<<<<<<<<< + * return b + */ + __pyx_v_b->score = __pyx_v_score; + + /* "jcvi/formats/cblast.pyx":210 + * b.evalue = evalue + * b.score = score + * return b # <<<<<<<<<<<<<< + */ + __Pyx_XDECREF((PyObject *)__pyx_r); + __Pyx_INCREF((PyObject *)__pyx_v_b); + __pyx_r = __pyx_v_b; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":192 + * + * + * cdef BlastLine create_blast_line(char *query, char *subject, float pctid, int hitlen, # <<<<<<<<<<<<<< + * int nmismatch, int ngaps, int qstart, int qstop, + * int sstart, int sstop, float evalue, float score): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.create_blast_line", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF((PyObject *)__pyx_v_b); + __Pyx_XGIVEREF((PyObject *)__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_Blast(PyTypeObject *t, PyObject *a, PyObject *k) { + struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p; + PyObject *o; + #if CYTHON_COMPILING_IN_LIMITED_API + allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); + o = alloc_func(t, 0); + #else + if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { + o = (*t->tp_alloc)(t, 0); + } else { + o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); + } + if (unlikely(!o)) return 0; + #endif + p = ((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o); + p->filename = Py_None; Py_INCREF(Py_None); + if (unlikely(__pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(o, a, k) < 0)) goto bad; + return o; + bad: + Py_DECREF(o); o = 0; + return NULL; +} + +static void __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast(PyObject *o) { + struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; + #if CYTHON_USE_TP_FINALIZE + if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { + if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast) { + if (PyObject_CallFinalizerFromDealloc(o)) return; + } + } + #endif + PyObject_GC_UnTrack(o); + { + PyObject *etype, *eval, *etb; + PyErr_Fetch(&etype, &eval, &etb); + __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1); + __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(o); + __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1); + PyErr_Restore(etype, eval, etb); + } + Py_CLEAR(p->filename); + #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + (*Py_TYPE(o)->tp_free)(o); + #else + { + freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); + if (tp_free) tp_free(o); + } + #endif +} + +static int __pyx_tp_traverse_4jcvi_7formats_6cblast_Blast(PyObject *o, visitproc v, void *a) { + int e; + struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; + if (p->filename) { + e = (*v)(p->filename, a); if (e) return e; + } + return 0; +} + +static int __pyx_tp_clear_4jcvi_7formats_6cblast_Blast(PyObject *o) { + PyObject* tmp; + struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; + tmp = ((PyObject*)p->filename); + p->filename = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + return 0; +} + +static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *self, CYTHON_UNUSED PyObject *arg) { + PyObject *res = __pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(self); + if (!res && !PyErr_Occurred()) { PyErr_SetNone(PyExc_StopIteration); } + return res; +} +static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *self, CYTHON_UNUSED PyObject *arg) { + return __pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(self); +} + +static PyMethodDef __pyx_methods_4jcvi_7formats_6cblast_Blast[] = { + {"__next__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__, METH_NOARGS|METH_COEXIST, 0}, + {"__repr__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__, METH_NOARGS|METH_COEXIST, 0}, + {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, + {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, + {0, 0, 0, 0} +}; +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_type_4jcvi_7formats_6cblast_Blast_slots[] = { + {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast}, + {Py_tp_repr, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__}, + {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast_Blast}, + {Py_tp_clear, (void *)__pyx_tp_clear_4jcvi_7formats_6cblast_Blast}, + {Py_tp_iter, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__}, + {Py_tp_iternext, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__}, + {Py_tp_methods, (void *)__pyx_methods_4jcvi_7formats_6cblast_Blast}, + {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast_Blast}, + {0, 0}, +}; +static PyType_Spec __pyx_type_4jcvi_7formats_6cblast_Blast_spec = { + "jcvi.formats.cblast.Blast", + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_Blast), + 0, + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, + __pyx_type_4jcvi_7formats_6cblast_Blast_slots, +}; +#else + +static PyTypeObject __pyx_type_4jcvi_7formats_6cblast_Blast = { + PyVarObject_HEAD_INIT(0, 0) + "jcvi.formats.cblast.""Blast", /*tp_name*/ + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_Blast), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast, /*tp_dealloc*/ + #if PY_VERSION_HEX < 0x030800b4 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030800b4 + 0, /*tp_vectorcall_offset*/ + #endif + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #endif + #if PY_MAJOR_VERSION >= 3 + 0, /*tp_as_async*/ + #endif + __pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /*tp_doc*/ + __pyx_tp_traverse_4jcvi_7formats_6cblast_Blast, /*tp_traverse*/ + __pyx_tp_clear_4jcvi_7formats_6cblast_Blast, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + __pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__, /*tp_iter*/ + __pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__, /*tp_iternext*/ + __pyx_methods_4jcvi_7formats_6cblast_Blast, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + #if !CYTHON_USE_TYPE_SPECS + 0, /*tp_dictoffset*/ + #endif + 0, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_4jcvi_7formats_6cblast_Blast, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0, /*tp_version_tag*/ + #if PY_VERSION_HEX >= 0x030400a1 + #if CYTHON_USE_TP_FINALIZE + 0, /*tp_finalize*/ + #else + NULL, /*tp_finalize*/ + #endif + #endif + #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, /*tp_vectorcall*/ + #endif + #if __PYX_NEED_TP_PRINT_SLOT == 1 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030C0000 + 0, /*tp_watched*/ + #endif + #if PY_VERSION_HEX >= 0x030d00A4 + 0, /*tp_versions_used*/ + #endif + #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, /*tp_pypy_flags*/ + #endif +}; +#endif + +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p; + PyObject *o; + #if CYTHON_COMPILING_IN_LIMITED_API + allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); + o = alloc_func(t, 0); + #else + if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { + o = (*t->tp_alloc)(t, 0); + } else { + o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); + } + if (unlikely(!o)) return 0; + #endif + p = ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o); + p->qseqid = Py_None; Py_INCREF(Py_None); + p->sseqid = Py_None; Py_INCREF(Py_None); + return o; +} + +static void __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine(PyObject *o) { + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; + #if CYTHON_USE_TP_FINALIZE + if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { + if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine) { + if (PyObject_CallFinalizerFromDealloc(o)) return; + } + } + #endif + PyObject_GC_UnTrack(o); + Py_CLEAR(p->qseqid); + Py_CLEAR(p->sseqid); + #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + (*Py_TYPE(o)->tp_free)(o); + #else + { + freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); + if (tp_free) tp_free(o); + } + #endif +} + +static int __pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine(PyObject *o, visitproc v, void *a) { + int e; + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; + if (p->qseqid) { + e = (*v)(p->qseqid, a); if (e) return e; + } + if (p->sseqid) { + e = (*v)(p->sseqid, a); if (e) return e; + } + return 0; +} + +static int __pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine(PyObject *o) { + PyObject* tmp; + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; + tmp = ((PyObject*)p->qseqid); + p->qseqid = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->sseqid); + p->sseqid = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + return 0; +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_query(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_query(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_subject(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_subject(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_has_score(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(o); +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_swapped(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(o); +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_bedline(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(o); +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__query(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__query(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__subject(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__subject(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_hitlen(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_hitlen(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_ngaps(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_ngaps(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstart(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstart(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstop(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstop(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstart(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstart(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstop(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstop(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_pctid(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_pctid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_score(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_score(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_evalue(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_evalue(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qseqid(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qseqid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(o, v); + } + else { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(o); + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sseqid(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sseqid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(o, v); + } + else { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(o); + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qi(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qi(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_si(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_si(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_orientation(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_orientation(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *self, CYTHON_UNUSED PyObject *arg) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(self); +} + +static PyMethodDef __pyx_methods_4jcvi_7formats_6cblast_BlastLine[] = { + {"__repr__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__, METH_NOARGS|METH_COEXIST, 0}, + {"__reduce__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, + {0, 0, 0, 0} +}; + +static struct PyGetSetDef __pyx_getsets_4jcvi_7formats_6cblast_BlastLine[] = { + {(char *)"query", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_query, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_query, (char *)0, 0}, + {(char *)"subject", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_subject, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_subject, (char *)0, 0}, + {(char *)"has_score", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_has_score, 0, (char *)0, 0}, + {(char *)"swapped", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_swapped, 0, (char *)PyDoc_STR("\n Swap query and subject.\n "), 0}, + {(char *)"bedline", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_bedline, 0, (char *)0, 0}, + {(char *)"_query", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__query, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__query, (char *)0, 0}, + {(char *)"_subject", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__subject, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__subject, (char *)0, 0}, + {(char *)"hitlen", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_hitlen, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_hitlen, (char *)0, 0}, + {(char *)"nmismatch", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch, (char *)0, 0}, + {(char *)"ngaps", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_ngaps, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_ngaps, (char *)0, 0}, + {(char *)"qstart", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstart, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstart, (char *)0, 0}, + {(char *)"qstop", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstop, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstop, (char *)0, 0}, + {(char *)"sstart", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstart, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstart, (char *)0, 0}, + {(char *)"sstop", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstop, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstop, (char *)0, 0}, + {(char *)"pctid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_pctid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_pctid, (char *)0, 0}, + {(char *)"score", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_score, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_score, (char *)0, 0}, + {(char *)"evalue", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_evalue, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_evalue, (char *)0, 0}, + {(char *)"qseqid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qseqid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qseqid, (char *)0, 0}, + {(char *)"sseqid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sseqid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sseqid, (char *)0, 0}, + {(char *)"qi", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qi, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qi, (char *)0, 0}, + {(char *)"si", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_si, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_si, (char *)0, 0}, + {(char *)"orientation", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_orientation, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_orientation, (char *)0, 0}, + {0, 0, 0, 0, 0} +}; +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_type_4jcvi_7formats_6cblast_BlastLine_slots[] = { + {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine}, + {Py_tp_repr, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__}, + {Py_tp_hash, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__}, + {Py_tp_str, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__}, + {Py_tp_doc, (void *)PyDoc_STR("\n Given a string of tab-delimited (-m 8) blast output, parse it and create\n an object with the usual attrs:\n\n >>> b = BlastLine(\"Os09g11510\tOs08g13650\t92.31\t39\t3\t0\t2273\t2311\t3237\t3199\t0.001\t54.0\")\n >>> b.query\n 'Os09g11510'\n >>> attrs = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', ... 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score')\n >>> [getattr(b, attr) for attr in attrs] # doctest: +ELLIPSIS\n ['Os09g11510', 'Os08g13650', 92.3..., 39, 3, 0, 2273, 2311, 3237, 3199, 0.001..., 54.0]\n ")}, + {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine}, + {Py_tp_clear, (void *)__pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine}, + {Py_tp_richcompare, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__}, + {Py_tp_methods, (void *)__pyx_methods_4jcvi_7formats_6cblast_BlastLine}, + {Py_tp_getset, (void *)__pyx_getsets_4jcvi_7formats_6cblast_BlastLine}, + {Py_tp_init, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__}, + {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine}, + {0, 0}, +}; +static PyType_Spec __pyx_type_4jcvi_7formats_6cblast_BlastLine_spec = { + "jcvi.formats.cblast.BlastLine", + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine), + 0, + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, + __pyx_type_4jcvi_7formats_6cblast_BlastLine_slots, +}; +#else + +static PyTypeObject __pyx_type_4jcvi_7formats_6cblast_BlastLine = { + PyVarObject_HEAD_INIT(0, 0) + "jcvi.formats.cblast.""BlastLine", /*tp_name*/ + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine, /*tp_dealloc*/ + #if PY_VERSION_HEX < 0x030800b4 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030800b4 + 0, /*tp_vectorcall_offset*/ + #endif + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #endif + #if PY_MAJOR_VERSION >= 3 + 0, /*tp_as_async*/ + #endif + __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__, /*tp_hash*/ + 0, /*tp_call*/ + __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + PyDoc_STR("\n Given a string of tab-delimited (-m 8) blast output, parse it and create\n an object with the usual attrs:\n\n >>> b = BlastLine(\"Os09g11510\tOs08g13650\t92.31\t39\t3\t0\t2273\t2311\t3237\t3199\t0.001\t54.0\")\n >>> b.query\n 'Os09g11510'\n >>> attrs = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', ... 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score')\n >>> [getattr(b, attr) for attr in attrs] # doctest: +ELLIPSIS\n ['Os09g11510', 'Os08g13650', 92.3..., 39, 3, 0, 2273, 2311, 3237, 3199, 0.001..., 54.0]\n "), /*tp_doc*/ + __pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine, /*tp_traverse*/ + __pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine, /*tp_clear*/ + __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + __pyx_methods_4jcvi_7formats_6cblast_BlastLine, /*tp_methods*/ + 0, /*tp_members*/ + __pyx_getsets_4jcvi_7formats_6cblast_BlastLine, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + #if !CYTHON_USE_TYPE_SPECS + 0, /*tp_dictoffset*/ + #endif + __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_4jcvi_7formats_6cblast_BlastLine, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0, /*tp_version_tag*/ + #if PY_VERSION_HEX >= 0x030400a1 + #if CYTHON_USE_TP_FINALIZE + 0, /*tp_finalize*/ + #else + NULL, /*tp_finalize*/ + #endif + #endif + #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, /*tp_vectorcall*/ + #endif + #if __PYX_NEED_TP_PRINT_SLOT == 1 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030C0000 + 0, /*tp_watched*/ + #endif + #if PY_VERSION_HEX >= 0x030d00A4 + 0, /*tp_versions_used*/ + #endif + #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, /*tp_pypy_flags*/ + #endif +}; +#endif + +#if CYTHON_USE_FREELISTS +static struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[8]; +static int __pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = 0; +#endif + +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + PyObject *o; + #if CYTHON_COMPILING_IN_LIMITED_API + allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); + o = alloc_func(t, 0); + #else + #if CYTHON_USE_FREELISTS + if (likely((int)(__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)))) { + o = (PyObject*)__pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[--__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr]; + memset(o, 0, sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)); + (void) PyObject_INIT(o, t); + PyObject_GC_Track(o); + } else + #endif + { + o = (*t->tp_alloc)(t, 0); + if (unlikely(!o)) return 0; + } + #endif + return o; +} + +static void __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyObject *o) { + struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *p = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o; + #if CYTHON_USE_TP_FINALIZE + if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { + if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) { + if (PyObject_CallFinalizerFromDealloc(o)) return; + } + } + #endif + PyObject_GC_UnTrack(o); + Py_CLEAR(p->__pyx_genexpr_arg_0); + Py_CLEAR(p->__pyx_v_x); + Py_CLEAR(p->__pyx_t_0); + #if CYTHON_USE_FREELISTS + if (((int)(__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)))) { + __pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr++] = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o); + } else + #endif + { + #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + (*Py_TYPE(o)->tp_free)(o); + #else + { + freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); + if (tp_free) tp_free(o); + } + #endif + } +} + +static int __pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyObject *o, visitproc v, void *a) { + int e; + struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *p = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o; + if (p->__pyx_genexpr_arg_0) { + e = (*v)(p->__pyx_genexpr_arg_0, a); if (e) return e; + } + if (p->__pyx_v_x) { + e = (*v)(p->__pyx_v_x, a); if (e) return e; + } + if (p->__pyx_t_0) { + e = (*v)(p->__pyx_t_0, a); if (e) return e; + } + return 0; +} +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_slots[] = { + {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, + {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, + {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, + {0, 0}, +}; +static PyType_Spec __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec = { + "jcvi.formats.cblast.__pyx_scope_struct__genexpr", + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr), + 0, + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, + __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_slots, +}; +#else + +static PyTypeObject __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = { + PyVarObject_HEAD_INIT(0, 0) + "jcvi.formats.cblast.""__pyx_scope_struct__genexpr", /*tp_name*/ + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_dealloc*/ + #if PY_VERSION_HEX < 0x030800b4 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030800b4 + 0, /*tp_vectorcall_offset*/ + #endif + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #endif + #if PY_MAJOR_VERSION >= 3 + 0, /*tp_as_async*/ + #endif + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ + 0, /*tp_doc*/ + __pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + 0, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + #if !CYTHON_USE_TYPE_SPECS + 0, /*tp_dictoffset*/ + #endif + 0, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0, /*tp_version_tag*/ + #if PY_VERSION_HEX >= 0x030400a1 + #if CYTHON_USE_TP_FINALIZE + 0, /*tp_finalize*/ + #else + NULL, /*tp_finalize*/ + #endif + #endif + #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, /*tp_vectorcall*/ + #endif + #if __PYX_NEED_TP_PRINT_SLOT == 1 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030C0000 + 0, /*tp_watched*/ + #endif + #if PY_VERSION_HEX >= 0x030d00A4 + 0, /*tp_versions_used*/ + #endif + #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, /*tp_pypy_flags*/ + #endif +}; +#endif + +#if CYTHON_USE_FREELISTS +static struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[8]; +static int __pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = 0; +#endif + +static PyObject *__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + PyObject *o; + #if CYTHON_COMPILING_IN_LIMITED_API + allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); + o = alloc_func(t, 0); + #else + #if CYTHON_USE_FREELISTS + if (likely((int)(__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)))) { + o = (PyObject*)__pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[--__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc]; + memset(o, 0, sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)); + (void) PyObject_INIT(o, t); + } else + #endif + { + o = (*t->tp_alloc)(t, 0); + if (unlikely(!o)) return 0; + } + #endif + return o; +} + +static void __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyObject *o) { + #if CYTHON_USE_TP_FINALIZE + if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && (!PyType_IS_GC(Py_TYPE(o)) || !__Pyx_PyObject_GC_IsFinalized(o))) { + if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) { + if (PyObject_CallFinalizerFromDealloc(o)) return; + } + } + #endif + #if CYTHON_USE_FREELISTS + if (((int)(__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)))) { + __pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc++] = ((struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)o); + } else + #endif + { + #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + (*Py_TYPE(o)->tp_free)(o); + #else + { + freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); + if (tp_free) tp_free(o); + } + #endif + } +} +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_slots[] = { + {Py_tp_dealloc, (void *)__pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc}, + {Py_tp_new, (void *)__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc}, + {0, 0}, +}; +static PyType_Spec __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec = { + "jcvi.formats.cblast.__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", + sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc), + 0, + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_FINALIZE, + __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_slots, +}; +#else + +static PyTypeObject __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = { + PyVarObject_HEAD_INIT(0, 0) + "jcvi.formats.cblast.""__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", /*tp_name*/ + sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, /*tp_dealloc*/ + #if PY_VERSION_HEX < 0x030800b4 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030800b4 + 0, /*tp_vectorcall_offset*/ + #endif + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #endif + #if PY_MAJOR_VERSION >= 3 + 0, /*tp_as_async*/ + #endif + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ + 0, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + 0, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + #if !CYTHON_USE_TYPE_SPECS + 0, /*tp_dictoffset*/ + #endif + 0, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0, /*tp_version_tag*/ + #if PY_VERSION_HEX >= 0x030400a1 + #if CYTHON_USE_TP_FINALIZE + 0, /*tp_finalize*/ + #else + NULL, /*tp_finalize*/ + #endif + #endif + #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, /*tp_vectorcall*/ + #endif + #if __PYX_NEED_TP_PRINT_SLOT == 1 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030C0000 + 0, /*tp_watched*/ + #endif + #if PY_VERSION_HEX >= 0x030d00A4 + 0, /*tp_versions_used*/ + #endif + #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, /*tp_pypy_flags*/ + #endif +}; +#endif + +static PyMethodDef __pyx_methods[] = { + {0, 0, 0, 0} +}; +#ifndef CYTHON_SMALL_CODE +#if defined(__clang__) + #define CYTHON_SMALL_CODE +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define CYTHON_SMALL_CODE __attribute__((cold)) +#else + #define CYTHON_SMALL_CODE +#endif +#endif +/* #### Code section: pystring_table ### */ + +static int __Pyx_CreateStringTabAndInitStrings(void) { + __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_n_s_Blast, __pyx_k_Blast, sizeof(__pyx_k_Blast), 0, 0, 1, 1}, + {&__pyx_n_s_BlastLine, __pyx_k_BlastLine, sizeof(__pyx_k_BlastLine), 0, 0, 1, 1}, + {&__pyx_n_s_BlastLine___get___locals_genexpr, __pyx_k_BlastLine___get___locals_genexpr, sizeof(__pyx_k_BlastLine___get___locals_genexpr), 0, 0, 1, 1}, + {&__pyx_n_s_BlastLine___reduce, __pyx_k_BlastLine___reduce, sizeof(__pyx_k_BlastLine___reduce), 0, 0, 1, 1}, + {&__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1, __pyx_k_BlastLine_s_to_s_eval_3f_score_1, sizeof(__pyx_k_BlastLine_s_to_s_eval_3f_score_1), 0, 0, 1, 0}, + {&__pyx_n_s_Blast___reduce_cython, __pyx_k_Blast___reduce_cython, sizeof(__pyx_k_Blast___reduce_cython), 0, 0, 1, 1}, + {&__pyx_n_s_Blast___setstate_cython, __pyx_k_Blast___setstate_cython, sizeof(__pyx_k_Blast___setstate_cython), 0, 0, 1, 1}, + {&__pyx_kp_s_Blast_s, __pyx_k_Blast_s, sizeof(__pyx_k_Blast_s), 0, 0, 1, 0}, + {&__pyx_n_s_IndexError, __pyx_k_IndexError, sizeof(__pyx_k_IndexError), 0, 0, 1, 1}, + {&__pyx_n_s_OverflowError, __pyx_k_OverflowError, sizeof(__pyx_k_OverflowError), 0, 0, 1, 1}, + {&__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma, __pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma, sizeof(__pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma), 0, 0, 1, 1}, + {&__pyx_n_s_StopIteration, __pyx_k_StopIteration, sizeof(__pyx_k_StopIteration), 0, 0, 1, 1}, + {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1}, + {&__pyx_kp_s_UTF_8, __pyx_k_UTF_8, sizeof(__pyx_k_UTF_8), 0, 0, 1, 0}, + {&__pyx_n_s__13, __pyx_k__13, sizeof(__pyx_k__13), 0, 0, 1, 1}, + {&__pyx_kp_s__5, __pyx_k__5, sizeof(__pyx_k__5), 0, 0, 1, 0}, + {&__pyx_n_s__6, __pyx_k__6, sizeof(__pyx_k__6), 0, 0, 1, 1}, + {&__pyx_n_s_args, __pyx_k_args, sizeof(__pyx_k_args), 0, 0, 1, 1}, + {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1}, + {&__pyx_kp_s_cblast_pyx, __pyx_k_cblast_pyx, sizeof(__pyx_k_cblast_pyx), 0, 0, 1, 0}, + {&__pyx_n_s_cfunc_to_py, __pyx_k_cfunc_to_py, sizeof(__pyx_k_cfunc_to_py), 0, 0, 1, 1}, + {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, + {&__pyx_n_s_close, __pyx_k_close, sizeof(__pyx_k_close), 0, 0, 1, 1}, + {&__pyx_kp_u_disable, __pyx_k_disable, sizeof(__pyx_k_disable), 0, 1, 0, 0}, + {&__pyx_kp_u_enable, __pyx_k_enable, sizeof(__pyx_k_enable), 0, 1, 0, 0}, + {&__pyx_n_s_encode, __pyx_k_encode, sizeof(__pyx_k_encode), 0, 0, 1, 1}, + {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, + {&__pyx_n_s_evalue, __pyx_k_evalue, sizeof(__pyx_k_evalue), 0, 0, 1, 1}, + {&__pyx_n_s_filename, __pyx_k_filename, sizeof(__pyx_k_filename), 0, 0, 1, 1}, + {&__pyx_kp_u_gc, __pyx_k_gc, sizeof(__pyx_k_gc), 0, 1, 0, 0}, + {&__pyx_n_s_genexpr, __pyx_k_genexpr, sizeof(__pyx_k_genexpr), 0, 0, 1, 1}, + {&__pyx_n_s_getstate, __pyx_k_getstate, sizeof(__pyx_k_getstate), 0, 0, 1, 1}, + {&__pyx_n_s_hitlen, __pyx_k_hitlen, sizeof(__pyx_k_hitlen), 0, 0, 1, 1}, + {&__pyx_n_s_id, __pyx_k_id, sizeof(__pyx_k_id), 0, 0, 1, 1}, + {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, + {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1}, + {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1}, + {&__pyx_kp_u_isenabled, __pyx_k_isenabled, sizeof(__pyx_k_isenabled), 0, 1, 0, 0}, + {&__pyx_n_s_jcvi_formats_cblast, __pyx_k_jcvi_formats_cblast, sizeof(__pyx_k_jcvi_formats_cblast), 0, 0, 1, 1}, + {&__pyx_n_s_join, __pyx_k_join, sizeof(__pyx_k_join), 0, 0, 1, 1}, + {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, + {&__pyx_n_s_ngaps, __pyx_k_ngaps, sizeof(__pyx_k_ngaps), 0, 0, 1, 1}, + {&__pyx_n_s_nmismatch, __pyx_k_nmismatch, sizeof(__pyx_k_nmismatch), 0, 0, 1, 1}, + {&__pyx_kp_s_no_default___reduce___due_to_non, __pyx_k_no_default___reduce___due_to_non, sizeof(__pyx_k_no_default___reduce___due_to_non), 0, 0, 1, 0}, + {&__pyx_n_s_orientation, __pyx_k_orientation, sizeof(__pyx_k_orientation), 0, 0, 1, 1}, + {&__pyx_n_s_pctid, __pyx_k_pctid, sizeof(__pyx_k_pctid), 0, 0, 1, 1}, + {&__pyx_n_s_pyx_state, __pyx_k_pyx_state, sizeof(__pyx_k_pyx_state), 0, 0, 1, 1}, + {&__pyx_n_s_qi, __pyx_k_qi, sizeof(__pyx_k_qi), 0, 0, 1, 1}, + {&__pyx_n_s_qseqid, __pyx_k_qseqid, sizeof(__pyx_k_qseqid), 0, 0, 1, 1}, + {&__pyx_n_s_qstart, __pyx_k_qstart, sizeof(__pyx_k_qstart), 0, 0, 1, 1}, + {&__pyx_n_s_qstop, __pyx_k_qstop, sizeof(__pyx_k_qstop), 0, 0, 1, 1}, + {&__pyx_n_s_query, __pyx_k_query, sizeof(__pyx_k_query), 0, 0, 1, 1}, + {&__pyx_n_s_reduce, __pyx_k_reduce, sizeof(__pyx_k_reduce), 0, 0, 1, 1}, + {&__pyx_n_s_reduce_cython, __pyx_k_reduce_cython, sizeof(__pyx_k_reduce_cython), 0, 0, 1, 1}, + {&__pyx_n_s_reduce_ex, __pyx_k_reduce_ex, sizeof(__pyx_k_reduce_ex), 0, 0, 1, 1}, + {&__pyx_n_s_richcmp, __pyx_k_richcmp, sizeof(__pyx_k_richcmp), 0, 0, 1, 1}, + {&__pyx_n_s_s, __pyx_k_s, sizeof(__pyx_k_s), 0, 0, 1, 1}, + {&__pyx_n_s_score, __pyx_k_score, sizeof(__pyx_k_score), 0, 0, 1, 1}, + {&__pyx_n_s_self, __pyx_k_self, sizeof(__pyx_k_self), 0, 0, 1, 1}, + {&__pyx_n_s_send, __pyx_k_send, sizeof(__pyx_k_send), 0, 0, 1, 1}, + {&__pyx_n_s_setstate, __pyx_k_setstate, sizeof(__pyx_k_setstate), 0, 0, 1, 1}, + {&__pyx_n_s_setstate_cython, __pyx_k_setstate_cython, sizeof(__pyx_k_setstate_cython), 0, 0, 1, 1}, + {&__pyx_n_s_si, __pyx_k_si, sizeof(__pyx_k_si), 0, 0, 1, 1}, + {&__pyx_n_s_slots, __pyx_k_slots, sizeof(__pyx_k_slots), 0, 0, 1, 1}, + {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1}, + {&__pyx_n_s_sseqid, __pyx_k_sseqid, sizeof(__pyx_k_sseqid), 0, 0, 1, 1}, + {&__pyx_n_s_sstart, __pyx_k_sstart, sizeof(__pyx_k_sstart), 0, 0, 1, 1}, + {&__pyx_n_s_sstop, __pyx_k_sstop, sizeof(__pyx_k_sstop), 0, 0, 1, 1}, + {&__pyx_kp_s_stringsource, __pyx_k_stringsource, sizeof(__pyx_k_stringsource), 0, 0, 1, 0}, + {&__pyx_n_s_subject, __pyx_k_subject, sizeof(__pyx_k_subject), 0, 0, 1, 1}, + {&__pyx_n_s_sys, __pyx_k_sys, sizeof(__pyx_k_sys), 0, 0, 1, 1}, + {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_kp_s_that_comparison_not_implemented, __pyx_k_that_comparison_not_implemented, sizeof(__pyx_k_that_comparison_not_implemented), 0, 0, 1, 0}, + {&__pyx_n_s_throw, __pyx_k_throw, sizeof(__pyx_k_throw), 0, 0, 1, 1}, + {&__pyx_n_s_wrap, __pyx_k_wrap, sizeof(__pyx_k_wrap), 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0} + }; + return __Pyx_InitStrings(__pyx_string_tab); +} +/* #### Code section: cached_builtins ### */ +static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { + __pyx_builtin_StopIteration = __Pyx_GetBuiltinName(__pyx_n_s_StopIteration); if (!__pyx_builtin_StopIteration) __PYX_ERR(0, 47, __pyx_L1_error) + __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) __PYX_ERR(1, 2, __pyx_L1_error) + __pyx_builtin_id = __Pyx_GetBuiltinName(__pyx_n_s_id); if (!__pyx_builtin_id) __PYX_ERR(0, 138, __pyx_L1_error) + __pyx_builtin_OverflowError = __Pyx_GetBuiltinName(__pyx_n_s_OverflowError); if (!__pyx_builtin_OverflowError) __PYX_ERR(1, 83, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(1, 86, __pyx_L1_error) + __pyx_builtin_IndexError = __Pyx_GetBuiltinName(__pyx_n_s_IndexError); if (!__pyx_builtin_IndexError) __PYX_ERR(1, 96, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: cached_constants ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); + + /* "cfunc.to_py":67 + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) + */ + __pyx_tuple_ = PyTuple_Pack(12, __pyx_n_s_query, __pyx_n_s_subject, __pyx_n_s_pctid, __pyx_n_s_hitlen, __pyx_n_s_nmismatch, __pyx_n_s_ngaps, __pyx_n_s_qstart, __pyx_n_s_qstop, __pyx_n_s_sstart, __pyx_n_s_sstop, __pyx_n_s_evalue, __pyx_n_s_score); if (unlikely(!__pyx_tuple_)) __PYX_ERR(1, 67, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple_); + __Pyx_GIVEREF(__pyx_tuple_); + __pyx_codeobj__2 = (PyObject*)__Pyx_PyCode_New(12, 0, 0, 12, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple_, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_wrap, 67, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__2)) __PYX_ERR(1, 67, __pyx_L1_error) + + /* "jcvi/formats/cblast.pyx":135 + * return not self.__richcmp__(other, 2) + * else: + * raise Exception("that comparison not implemented") # <<<<<<<<<<<<<< + * + * def __hash__(self): + */ + __pyx_tuple__3 = PyTuple_Pack(1, __pyx_kp_s_that_comparison_not_implemented); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 135, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__3); + __Pyx_GIVEREF(__pyx_tuple__3); + + /* "jcvi/formats/cblast.pyx":145 + * + * def __str__(self): + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + */ + __pyx_slice__4 = PySlice_New(Py_None, __pyx_int_12, Py_None); if (unlikely(!__pyx_slice__4)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_slice__4); + __Pyx_GIVEREF(__pyx_slice__4); + + /* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + __pyx_tuple__7 = PyTuple_Pack(1, __pyx_n_s_self); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(1, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__7); + __Pyx_GIVEREF(__pyx_tuple__7); + __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_reduce_cython, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(1, 1, __pyx_L1_error) + + /* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + __pyx_tuple__9 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_pyx_state); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(1, 3, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__9); + __Pyx_GIVEREF(__pyx_tuple__9); + __pyx_codeobj__10 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__9, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_setstate_cython, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__10)) __PYX_ERR(1, 3, __pyx_L1_error) + + /* "jcvi/formats/cblast.pyx":80 + * """ + * + * __slots__ = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ # <<<<<<<<<<<<<< + * 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score', \ + * 'qseqid', 'sseqid', 'qi', 'si', 'orientation') + */ + __pyx_tuple__11 = PyTuple_Pack(17, __pyx_n_s_query, __pyx_n_s_subject, __pyx_n_s_pctid, __pyx_n_s_hitlen, __pyx_n_s_nmismatch, __pyx_n_s_ngaps, __pyx_n_s_qstart, __pyx_n_s_qstop, __pyx_n_s_sstart, __pyx_n_s_sstop, __pyx_n_s_evalue, __pyx_n_s_score, __pyx_n_s_qseqid, __pyx_n_s_sseqid, __pyx_n_s_qi, __pyx_n_s_si, __pyx_n_s_orientation); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(0, 80, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__11); + __Pyx_GIVEREF(__pyx_tuple__11); + + /* "jcvi/formats/cblast.pyx":185 + * return py_str(result) + * + * def __reduce__(self): # <<<<<<<<<<<<<< + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + */ + __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_cblast_pyx, __pyx_n_s_reduce, 185, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) __PYX_ERR(0, 185, __pyx_L1_error) + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} +/* #### Code section: init_constants ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) { + __pyx_umethod_PyString_Type_encode.type = (PyObject*)&PyString_Type; + __pyx_umethod_PyString_Type_encode.method_name = &__pyx_n_s_encode; + if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error); + __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_int_12 = PyInt_FromLong(12); if (unlikely(!__pyx_int_12)) __PYX_ERR(0, 1, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: init_globals ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) { + return 0; +} +/* #### Code section: init_module ### */ + +static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/ + +static int __Pyx_modinit_global_init_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0); + /*--- Global init code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_variable_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0); + /*--- Variable export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0); + /*--- Function export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_type_init_code(void) { + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); + /*--- Type init code ---*/ + #if CYTHON_USE_TYPE_SPECS + __pyx_ptype_4jcvi_7formats_6cblast_Blast = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast_Blast_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast_Blast)) __PYX_ERR(0, 21, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast_Blast_spec, __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) + #else + __pyx_ptype_4jcvi_7formats_6cblast_Blast = &__pyx_type_4jcvi_7formats_6cblast_Blast; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + #endif + #if !CYTHON_USE_TYPE_SPECS + if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) + #endif + #if PY_MAJOR_VERSION < 3 + __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_print = 0; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_getattro == PyObject_GenericGetAttr)) { + __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_getattro = __Pyx_PyObject_GenericGetAttr; + } + #endif + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_Blast, (PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) + #if !CYTHON_COMPILING_IN_LIMITED_API + if (__Pyx_setup_reduce((PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) + #endif + #if CYTHON_USE_TYPE_SPECS + __pyx_ptype_4jcvi_7formats_6cblast_BlastLine = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast_BlastLine_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast_BlastLine)) __PYX_ERR(0, 66, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast_BlastLine_spec, __pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) + #else + __pyx_ptype_4jcvi_7formats_6cblast_BlastLine = &__pyx_type_4jcvi_7formats_6cblast_BlastLine; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + #endif + #if !CYTHON_USE_TYPE_SPECS + if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) + #endif + #if PY_MAJOR_VERSION < 3 + __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_print = 0; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_getattro == PyObject_GenericGetAttr)) { + __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_getattro = __Pyx_PyObject_GenericGetAttr; + } + #endif + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_BlastLine, (PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) + #if CYTHON_USE_TYPE_SPECS + __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)) __PYX_ERR(0, 172, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec, __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) < 0) __PYX_ERR(0, 172, __pyx_L1_error) + #else + __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = &__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + #endif + #if !CYTHON_USE_TYPE_SPECS + if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) < 0) __PYX_ERR(0, 172, __pyx_L1_error) + #endif + #if PY_MAJOR_VERSION < 3 + __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_print = 0; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_getattro == PyObject_GenericGetAttr)) { + __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; + } + #endif + #if CYTHON_USE_TYPE_SPECS + __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec, NULL); if (unlikely(!__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)) __PYX_ERR(1, 66, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec, __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) < 0) __PYX_ERR(1, 66, __pyx_L1_error) + #else + __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = &__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + #endif + #if !CYTHON_USE_TYPE_SPECS + if (__Pyx_PyType_Ready(__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) < 0) __PYX_ERR(1, 66, __pyx_L1_error) + #endif + #if PY_MAJOR_VERSION < 3 + __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_print = 0; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_dictoffset && __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_getattro == PyObject_GenericGetAttr)) { + __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; + } + #endif + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} + +static int __Pyx_modinit_type_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0); + /*--- Type import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_variable_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0); + /*--- Variable import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0); + /*--- Function import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + + +#if PY_MAJOR_VERSION >= 3 +#if CYTHON_PEP489_MULTI_PHASE_INIT +static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ +static int __pyx_pymod_exec_cblast(PyObject* module); /*proto*/ +static PyModuleDef_Slot __pyx_moduledef_slots[] = { + {Py_mod_create, (void*)__pyx_pymod_create}, + {Py_mod_exec, (void*)__pyx_pymod_exec_cblast}, + {0, NULL} +}; +#endif + +#ifdef __cplusplus +namespace { + struct PyModuleDef __pyx_moduledef = + #else + static struct PyModuleDef __pyx_moduledef = + #endif + { + PyModuleDef_HEAD_INIT, + "cblast", + __pyx_k_Cythonized_fast_version_of_Blas, /* m_doc */ + #if CYTHON_PEP489_MULTI_PHASE_INIT + 0, /* m_size */ + #elif CYTHON_USE_MODULE_STATE + sizeof(__pyx_mstate), /* m_size */ + #else + -1, /* m_size */ + #endif + __pyx_methods /* m_methods */, + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_moduledef_slots, /* m_slots */ + #else + NULL, /* m_reload */ + #endif + #if CYTHON_USE_MODULE_STATE + __pyx_m_traverse, /* m_traverse */ + __pyx_m_clear, /* m_clear */ + NULL /* m_free */ + #else + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ + #endif + }; + #ifdef __cplusplus +} /* anonymous namespace */ +#endif +#endif + +#ifndef CYTHON_NO_PYINIT_EXPORT +#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC +#elif PY_MAJOR_VERSION < 3 +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" void +#else +#define __Pyx_PyMODINIT_FUNC void +#endif +#else +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" PyObject * +#else +#define __Pyx_PyMODINIT_FUNC PyObject * +#endif +#endif + + +#if PY_MAJOR_VERSION < 3 +__Pyx_PyMODINIT_FUNC initcblast(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC initcblast(void) +#else +__Pyx_PyMODINIT_FUNC PyInit_cblast(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC PyInit_cblast(void) +#if CYTHON_PEP489_MULTI_PHASE_INIT +{ + return PyModuleDef_Init(&__pyx_moduledef); +} +static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) { + #if PY_VERSION_HEX >= 0x030700A1 + static PY_INT64_T main_interpreter_id = -1; + PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp); + if (main_interpreter_id == -1) { + main_interpreter_id = current_id; + return (unlikely(current_id == -1)) ? -1 : 0; + } else if (unlikely(main_interpreter_id != current_id)) + #else + static PyInterpreterState *main_interpreter = NULL; + PyInterpreterState *current_interpreter = PyThreadState_Get()->interp; + if (!main_interpreter) { + main_interpreter = current_interpreter; + } else if (unlikely(main_interpreter != current_interpreter)) + #endif + { + PyErr_SetString( + PyExc_ImportError, + "Interpreter change detected - this module can only be loaded into one interpreter per process."); + return -1; + } + return 0; +} +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none) +#else +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) +#endif +{ + PyObject *value = PyObject_GetAttrString(spec, from_name); + int result = 0; + if (likely(value)) { + if (allow_none || value != Py_None) { +#if CYTHON_COMPILING_IN_LIMITED_API + result = PyModule_AddObject(module, to_name, value); +#else + result = PyDict_SetItemString(moddict, to_name, value); +#endif + } + Py_DECREF(value); + } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } else { + result = -1; + } + return result; +} +static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) { + PyObject *module = NULL, *moddict, *modname; + CYTHON_UNUSED_VAR(def); + if (__Pyx_check_single_interpreter()) + return NULL; + if (__pyx_m) + return __Pyx_NewRef(__pyx_m); + modname = PyObject_GetAttrString(spec, "name"); + if (unlikely(!modname)) goto bad; + module = PyModule_NewObject(modname); + Py_DECREF(modname); + if (unlikely(!module)) goto bad; +#if CYTHON_COMPILING_IN_LIMITED_API + moddict = module; +#else + moddict = PyModule_GetDict(module); + if (unlikely(!moddict)) goto bad; +#endif + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad; + return module; +bad: + Py_XDECREF(module); + return NULL; +} + + +static CYTHON_SMALL_CODE int __pyx_pymod_exec_cblast(PyObject *__pyx_pyinit_module) +#endif +#endif +{ + int stringtab_initialized = 0; + #if CYTHON_USE_MODULE_STATE + int pystate_addmodule_run = 0; + #endif + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannyDeclarations + #if CYTHON_PEP489_MULTI_PHASE_INIT + if (__pyx_m) { + if (__pyx_m == __pyx_pyinit_module) return 0; + PyErr_SetString(PyExc_RuntimeError, "Module 'cblast' has already been imported. Re-initialisation is not supported."); + return -1; + } + #elif PY_MAJOR_VERSION >= 3 + if (__pyx_m) return __Pyx_NewRef(__pyx_m); + #endif + /*--- Module creation code ---*/ + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_m = __pyx_pyinit_module; + Py_INCREF(__pyx_m); + #else + #if PY_MAJOR_VERSION < 3 + __pyx_m = Py_InitModule4("cblast", __pyx_methods, __pyx_k_Cythonized_fast_version_of_Blas, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #elif CYTHON_USE_MODULE_STATE + __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) + { + int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef); + __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "cblast" pseudovariable */ + if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + pystate_addmodule_run = 1; + } + #else + __pyx_m = PyModule_Create(&__pyx_moduledef); + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #endif + CYTHON_UNUSED_VAR(__pyx_t_1); + __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) + Py_INCREF(__pyx_d); + __pyx_b = __Pyx_PyImport_AddModuleRef(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_cython_runtime = __Pyx_PyImport_AddModuleRef((const char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if CYTHON_REFNANNY +__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); +if (!__Pyx_RefNanny) { + PyErr_Clear(); + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); + if (!__Pyx_RefNanny) + Py_FatalError("failed to import 'refnanny' module"); +} +#endif + __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_cblast(void)", 0); + if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pxy_PyFrame_Initialize_Offsets + __Pxy_PyFrame_Initialize_Offsets(); + #endif + __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pyx_CyFunction_USED + if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_FusedFunction_USED + if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Coroutine_USED + if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Generator_USED + if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_StopAsyncIteration_USED + if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + /*--- Library function declarations ---*/ + /*--- Threads initialization code ---*/ + #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS + PyEval_InitThreads(); + #endif + /*--- Initialize various global constants etc. ---*/ + if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + stringtab_initialized = 1; + if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + if (__pyx_module_is_main_jcvi__formats__cblast) { + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + } + #if PY_MAJOR_VERSION >= 3 + { + PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) + if (!PyDict_GetItemString(modules, "jcvi.formats.cblast")) { + if (unlikely((PyDict_SetItemString(modules, "jcvi.formats.cblast", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + } + } + #endif + /*--- Builtin init code ---*/ + if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Constants init code ---*/ + if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Global type/function init code ---*/ + (void)__Pyx_modinit_global_init_code(); + (void)__Pyx_modinit_variable_export_code(); + (void)__Pyx_modinit_function_export_code(); + if (unlikely((__Pyx_modinit_type_init_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + (void)__Pyx_modinit_type_import_code(); + (void)__Pyx_modinit_variable_import_code(); + (void)__Pyx_modinit_function_import_code(); + /*--- Execution code ---*/ + #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + + /* "jcvi/formats/cblast.pyx":9 + * + * """ + * import sys # <<<<<<<<<<<<<< + * from libc.stdio cimport FILE, EOF, fopen, fscanf, rewind, fclose, sscanf, \ + * fgets, sprintf + */ + __pyx_t_2 = __Pyx_ImportDottedModuleRelFirst(__pyx_n_s_sys, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_sys, __pyx_t_2) < 0) __PYX_ERR(0, 9, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/formats/cblast.pyx":15 + * + * + * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" # <<<<<<<<<<<<<< + * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" + * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" + */ + __pyx_v_4jcvi_7formats_6cblast_blast_format = ((char const *)"%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f"); + + /* "jcvi/formats/cblast.pyx":16 + * + * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" + * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" # <<<<<<<<<<<<<< + * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" + * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" + */ + __pyx_v_4jcvi_7formats_6cblast_blast_format_line = ((char const *)"%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n"); + + /* "jcvi/formats/cblast.pyx":17 + * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" + * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" + * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" # <<<<<<<<<<<<<< + * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" + * + */ + __pyx_v_4jcvi_7formats_6cblast_blast_output = ((char const *)"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g"); + + /* "jcvi/formats/cblast.pyx":18 + * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" + * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" + * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" # <<<<<<<<<<<<<< + * + * + */ + __pyx_v_4jcvi_7formats_6cblast_bed_output = ((char const *)"%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c"); + + /* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Blast___reduce_cython, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__8)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_reduce_cython, __pyx_t_2) < 0) __PYX_ERR(1, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Blast___setstate_cython, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__10)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 3, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_setstate_cython, __pyx_t_2) < 0) __PYX_ERR(1, 3, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/formats/cblast.pyx":80 + * """ + * + * __slots__ = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ # <<<<<<<<<<<<<< + * 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score', \ + * 'qseqid', 'sseqid', 'qi', 'si', 'orientation') + */ + if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine, __pyx_n_s_slots, __pyx_tuple__11) < 0) __PYX_ERR(0, 80, __pyx_L1_error) + PyType_Modified(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); + + /* "jcvi/formats/cblast.pyx":185 + * return py_str(result) + * + * def __reduce__(self): # <<<<<<<<<<<<<< + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + */ + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_BlastLine___reduce, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__12)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 185, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine, __pyx_n_s_reduce, __pyx_t_2) < 0) __PYX_ERR(0, 185, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + PyType_Modified(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); + + /* "jcvi/formats/cblast.pyx":1 + * # cython: language_level=2, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True # <<<<<<<<<<<<<< + * + * """ + */ + __pyx_t_2 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /*--- Wrapped vars code ---*/ + + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + if (__pyx_m) { + if (__pyx_d && stringtab_initialized) { + __Pyx_AddTraceback("init jcvi.formats.cblast", __pyx_clineno, __pyx_lineno, __pyx_filename); + } + #if !CYTHON_USE_MODULE_STATE + Py_CLEAR(__pyx_m); + #else + Py_DECREF(__pyx_m); + if (pystate_addmodule_run) { + PyObject *tp, *value, *tb; + PyErr_Fetch(&tp, &value, &tb); + PyState_RemoveModule(&__pyx_moduledef); + PyErr_Restore(tp, value, tb); + } + #endif + } else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ImportError, "init jcvi.formats.cblast"); + } + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + #if CYTHON_PEP489_MULTI_PHASE_INIT + return (__pyx_m != NULL) ? 0 : -1; + #elif PY_MAJOR_VERSION >= 3 + return __pyx_m; + #else + return; + #endif +} +/* #### Code section: cleanup_globals ### */ +/* #### Code section: cleanup_module ### */ +/* #### Code section: main_method ### */ +/* #### Code section: utility_code_pragmas ### */ +#ifdef _MSC_VER +#pragma warning( push ) +/* Warning 4127: conditional expression is constant + * Cython uses constant conditional expressions to allow in inline functions to be optimized at + * compile-time, so this warning is not useful + */ +#pragma warning( disable : 4127 ) +#endif + + + +/* #### Code section: utility_code_def ### */ + +/* --- Runtime support code --- */ +/* Refnanny */ +#if CYTHON_REFNANNY +static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { + PyObject *m = NULL, *p = NULL; + void *r = NULL; + m = PyImport_ImportModule(modname); + if (!m) goto end; + p = PyObject_GetAttrString(m, "RefNannyAPI"); + if (!p) goto end; + r = PyLong_AsVoidPtr(p); +end: + Py_XDECREF(p); + Py_XDECREF(m); + return (__Pyx_RefNannyAPIStruct *)r; +} +#endif + +/* PyErrExceptionMatches */ +#if CYTHON_FAST_THREAD_STATE +static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; i= 0x030C00A6 + PyObject *current_exception = tstate->current_exception; + if (unlikely(!current_exception)) return 0; + exc_type = (PyObject*) Py_TYPE(current_exception); + if (exc_type == err) return 1; +#else + exc_type = tstate->curexc_type; + if (exc_type == err) return 1; + if (unlikely(!exc_type)) return 0; +#endif + #if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(exc_type); + #endif + if (unlikely(PyTuple_Check(err))) { + result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); + } else { + result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err); + } + #if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(exc_type); + #endif + return result; +} +#endif + +/* PyErrFetchRestore */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { +#if PY_VERSION_HEX >= 0x030C00A6 + PyObject *tmp_value; + assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value))); + if (value) { + #if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb)) + #endif + PyException_SetTraceback(value, tb); + } + tmp_value = tstate->current_exception; + tstate->current_exception = value; + Py_XDECREF(tmp_value); + Py_XDECREF(type); + Py_XDECREF(tb); +#else + PyObject *tmp_type, *tmp_value, *tmp_tb; + tmp_type = tstate->curexc_type; + tmp_value = tstate->curexc_value; + tmp_tb = tstate->curexc_traceback; + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +#endif +} +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { +#if PY_VERSION_HEX >= 0x030C00A6 + PyObject* exc_value; + exc_value = tstate->current_exception; + tstate->current_exception = 0; + *value = exc_value; + *type = NULL; + *tb = NULL; + if (exc_value) { + *type = (PyObject*) Py_TYPE(exc_value); + Py_INCREF(*type); + #if CYTHON_COMPILING_IN_CPYTHON + *tb = ((PyBaseExceptionObject*) exc_value)->traceback; + Py_XINCREF(*tb); + #else + *tb = PyException_GetTraceback(exc_value); + #endif + } +#else + *type = tstate->curexc_type; + *value = tstate->curexc_value; + *tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +#endif +} +#endif + +/* PyObjectGetAttrStr */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro)) + return tp->tp_getattro(obj, attr_name); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_getattr)) + return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); +#endif + return PyObject_GetAttr(obj, attr_name); +} +#endif + +/* PyObjectGetAttrStrNoError */ +#if __PYX_LIMITED_VERSION_HEX < 0x030d00A1 +static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError))) + __Pyx_PyErr_Clear(); +} +#endif +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) { + PyObject *result; +#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 + (void) PyObject_GetOptionalAttr(obj, attr_name, &result); + return result; +#else +#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1 + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) { + return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1); + } +#endif + result = __Pyx_PyObject_GetAttrStr(obj, attr_name); + if (unlikely(!result)) { + __Pyx_PyObject_GetAttrStr_ClearAttributeError(); + } + return result; +#endif +} + +/* GetBuiltinName */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name) { + PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name); + if (unlikely(!result) && !PyErr_Occurred()) { + PyErr_Format(PyExc_NameError, +#if PY_MAJOR_VERSION >= 3 + "name '%U' is not defined", name); +#else + "name '%.200s' is not defined", PyString_AS_STRING(name)); +#endif + } + return result; +} + +/* TupleAndListFromArray */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) { + PyObject *v; + Py_ssize_t i; + for (i = 0; i < length; i++) { + v = dest[i] = src[i]; + Py_INCREF(v); + } +} +static CYTHON_INLINE PyObject * +__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) +{ + PyObject *res; + if (n <= 0) { + Py_INCREF(__pyx_empty_tuple); + return __pyx_empty_tuple; + } + res = PyTuple_New(n); + if (unlikely(res == NULL)) return NULL; + __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n); + return res; +} +static CYTHON_INLINE PyObject * +__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n) +{ + PyObject *res; + if (n <= 0) { + return PyList_New(0); + } + res = PyList_New(n); + if (unlikely(res == NULL)) return NULL; + __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n); + return res; +} +#endif + +/* BytesEquals */ +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API + return PyObject_RichCompareBool(s1, s2, equals); +#else + if (s1 == s2) { + return (equals == Py_EQ); + } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { + const char *ps1, *ps2; + Py_ssize_t length = PyBytes_GET_SIZE(s1); + if (length != PyBytes_GET_SIZE(s2)) + return (equals == Py_NE); + ps1 = PyBytes_AS_STRING(s1); + ps2 = PyBytes_AS_STRING(s2); + if (ps1[0] != ps2[0]) { + return (equals == Py_NE); + } else if (length == 1) { + return (equals == Py_EQ); + } else { + int result; +#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000) + Py_hash_t hash1, hash2; + hash1 = ((PyBytesObject*)s1)->ob_shash; + hash2 = ((PyBytesObject*)s2)->ob_shash; + if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { + return (equals == Py_NE); + } +#endif + result = memcmp(ps1, ps2, (size_t)length); + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { + return (equals == Py_NE); + } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { + return (equals == Py_NE); + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +#endif +} + +/* UnicodeEquals */ +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API + return PyObject_RichCompareBool(s1, s2, equals); +#else +#if PY_MAJOR_VERSION < 3 + PyObject* owned_ref = NULL; +#endif + int s1_is_unicode, s2_is_unicode; + if (s1 == s2) { + goto return_eq; + } + s1_is_unicode = PyUnicode_CheckExact(s1); + s2_is_unicode = PyUnicode_CheckExact(s2); +#if PY_MAJOR_VERSION < 3 + if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { + owned_ref = PyUnicode_FromObject(s2); + if (unlikely(!owned_ref)) + return -1; + s2 = owned_ref; + s2_is_unicode = 1; + } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { + owned_ref = PyUnicode_FromObject(s1); + if (unlikely(!owned_ref)) + return -1; + s1 = owned_ref; + s1_is_unicode = 1; + } else if (((!s2_is_unicode) & (!s1_is_unicode))) { + return __Pyx_PyBytes_Equals(s1, s2, equals); + } +#endif + if (s1_is_unicode & s2_is_unicode) { + Py_ssize_t length; + int kind; + void *data1, *data2; + if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0)) + return -1; + length = __Pyx_PyUnicode_GET_LENGTH(s1); + if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { + goto return_ne; + } +#if CYTHON_USE_UNICODE_INTERNALS + { + Py_hash_t hash1, hash2; + #if CYTHON_PEP393_ENABLED + hash1 = ((PyASCIIObject*)s1)->hash; + hash2 = ((PyASCIIObject*)s2)->hash; + #else + hash1 = ((PyUnicodeObject*)s1)->hash; + hash2 = ((PyUnicodeObject*)s2)->hash; + #endif + if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { + goto return_ne; + } + } +#endif + kind = __Pyx_PyUnicode_KIND(s1); + if (kind != __Pyx_PyUnicode_KIND(s2)) { + goto return_ne; + } + data1 = __Pyx_PyUnicode_DATA(s1); + data2 = __Pyx_PyUnicode_DATA(s2); + if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { + goto return_ne; + } else if (length == 1) { + goto return_eq; + } else { + int result = memcmp(data1, data2, (size_t)(length * kind)); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & s2_is_unicode) { + goto return_ne; + } else if ((s2 == Py_None) & s1_is_unicode) { + goto return_ne; + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +return_eq: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_EQ); +return_ne: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_NE); +#endif +} + +/* fastcall */ +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s) +{ + Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames); + for (i = 0; i < n; i++) + { + if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i]; + } + for (i = 0; i < n; i++) + { + int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ); + if (unlikely(eq != 0)) { + if (unlikely(eq < 0)) return NULL; + return kwvalues[i]; + } + } + return NULL; +} +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 +CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) { + Py_ssize_t i, nkwargs = PyTuple_GET_SIZE(kwnames); + PyObject *dict; + dict = PyDict_New(); + if (unlikely(!dict)) + return NULL; + for (i=0; i= 3 + "%s() got multiple values for keyword argument '%U'", func_name, kw_name); + #else + "%s() got multiple values for keyword argument '%s'", func_name, + PyString_AsString(kw_name)); + #endif +} + +/* ParseKeywords */ +static int __Pyx_ParseOptionalKeywords( + PyObject *kwds, + PyObject *const *kwvalues, + PyObject **argnames[], + PyObject *kwds2, + PyObject *values[], + Py_ssize_t num_pos_args, + const char* function_name) +{ + PyObject *key = 0, *value = 0; + Py_ssize_t pos = 0; + PyObject*** name; + PyObject*** first_kw_arg = argnames + num_pos_args; + int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds)); + while (1) { + Py_XDECREF(key); key = NULL; + Py_XDECREF(value); value = NULL; + if (kwds_is_tuple) { + Py_ssize_t size; +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(kwds); +#else + size = PyTuple_Size(kwds); + if (size < 0) goto bad; +#endif + if (pos >= size) break; +#if CYTHON_AVOID_BORROWED_REFS + key = __Pyx_PySequence_ITEM(kwds, pos); + if (!key) goto bad; +#elif CYTHON_ASSUME_SAFE_MACROS + key = PyTuple_GET_ITEM(kwds, pos); +#else + key = PyTuple_GetItem(kwds, pos); + if (!key) goto bad; +#endif + value = kwvalues[pos]; + pos++; + } + else + { + if (!PyDict_Next(kwds, &pos, &key, &value)) break; +#if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(key); +#endif + } + name = first_kw_arg; + while (*name && (**name != key)) name++; + if (*name) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(value); + Py_DECREF(key); +#endif + key = NULL; + value = NULL; + continue; + } +#if !CYTHON_AVOID_BORROWED_REFS + Py_INCREF(key); +#endif + Py_INCREF(value); + name = first_kw_arg; + #if PY_MAJOR_VERSION < 3 + if (likely(PyString_Check(key))) { + while (*name) { + if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) + && _PyString_Eq(**name, key)) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + value = NULL; +#endif + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + if ((**argname == key) || ( + (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) + && _PyString_Eq(**argname, key))) { + goto arg_passed_twice; + } + argname++; + } + } + } else + #endif + if (likely(PyUnicode_Check(key))) { + while (*name) { + int cmp = ( + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**name, key) + ); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + value = NULL; +#endif + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + int cmp = (**argname == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**argname, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) goto arg_passed_twice; + argname++; + } + } + } else + goto invalid_keyword_type; + if (kwds2) { + if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; + } else { + goto invalid_keyword; + } + } + Py_XDECREF(key); + Py_XDECREF(value); + return 0; +arg_passed_twice: + __Pyx_RaiseDoubleKeywordsError(function_name, key); + goto bad; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + goto bad; +invalid_keyword: + #if PY_MAJOR_VERSION < 3 + PyErr_Format(PyExc_TypeError, + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + PyErr_Format(PyExc_TypeError, + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif +bad: + Py_XDECREF(key); + Py_XDECREF(value); + return -1; +} + +/* FixUpExtensionType */ +#if CYTHON_USE_TYPE_SPECS +static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) { +#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + CYTHON_UNUSED_VAR(spec); + CYTHON_UNUSED_VAR(type); +#else + const PyType_Slot *slot = spec->slots; + while (slot && slot->slot && slot->slot != Py_tp_members) + slot++; + if (slot && slot->slot == Py_tp_members) { + int changed = 0; +#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON) + const +#endif + PyMemberDef *memb = (PyMemberDef*) slot->pfunc; + while (memb && memb->name) { + if (memb->name[0] == '_' && memb->name[1] == '_') { +#if PY_VERSION_HEX < 0x030900b1 + if (strcmp(memb->name, "__weaklistoffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); + type->tp_weaklistoffset = memb->offset; + changed = 1; + } + else if (strcmp(memb->name, "__dictoffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); + type->tp_dictoffset = memb->offset; + changed = 1; + } +#if CYTHON_METH_FASTCALL + else if (strcmp(memb->name, "__vectorcalloffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); +#if PY_VERSION_HEX >= 0x030800b4 + type->tp_vectorcall_offset = memb->offset; +#else + type->tp_print = (printfunc) memb->offset; +#endif + changed = 1; + } +#endif +#else + if ((0)); +#endif +#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON + else if (strcmp(memb->name, "__module__") == 0) { + PyObject *descr; + assert(memb->type == T_OBJECT); + assert(memb->flags == 0 || memb->flags == READONLY); + descr = PyDescr_NewMember(type, memb); + if (unlikely(!descr)) + return -1; + if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) { + Py_DECREF(descr); + return -1; + } + Py_DECREF(descr); + changed = 1; + } +#endif + } + memb++; + } + if (changed) + PyType_Modified(type); + } +#endif + return 0; +} +#endif + +/* FetchSharedCythonModule */ +static PyObject *__Pyx_FetchSharedCythonABIModule(void) { + return __Pyx_PyImport_AddModuleRef((char*) __PYX_ABI_MODULE_NAME); +} + +/* FetchCommonType */ +static int __Pyx_VerifyCachedType(PyObject *cached_type, + const char *name, + Py_ssize_t basicsize, + Py_ssize_t expected_basicsize) { + if (!PyType_Check(cached_type)) { + PyErr_Format(PyExc_TypeError, + "Shared Cython type %.200s is not a type object", name); + return -1; + } + if (basicsize != expected_basicsize) { + PyErr_Format(PyExc_TypeError, + "Shared Cython type %.200s has the wrong size, try recompiling", + name); + return -1; + } + return 0; +} +#if !CYTHON_USE_TYPE_SPECS +static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) { + PyObject* abi_module; + const char* object_name; + PyTypeObject *cached_type = NULL; + abi_module = __Pyx_FetchSharedCythonABIModule(); + if (!abi_module) return NULL; + object_name = strrchr(type->tp_name, '.'); + object_name = object_name ? object_name+1 : type->tp_name; + cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name); + if (cached_type) { + if (__Pyx_VerifyCachedType( + (PyObject *)cached_type, + object_name, + cached_type->tp_basicsize, + type->tp_basicsize) < 0) { + goto bad; + } + goto done; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; + PyErr_Clear(); + if (PyType_Ready(type) < 0) goto bad; + if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0) + goto bad; + Py_INCREF(type); + cached_type = type; +done: + Py_DECREF(abi_module); + return cached_type; +bad: + Py_XDECREF(cached_type); + cached_type = NULL; + goto done; +} +#else +static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) { + PyObject *abi_module, *cached_type = NULL; + const char* object_name = strrchr(spec->name, '.'); + object_name = object_name ? object_name+1 : spec->name; + abi_module = __Pyx_FetchSharedCythonABIModule(); + if (!abi_module) return NULL; + cached_type = PyObject_GetAttrString(abi_module, object_name); + if (cached_type) { + Py_ssize_t basicsize; +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *py_basicsize; + py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__"); + if (unlikely(!py_basicsize)) goto bad; + basicsize = PyLong_AsSsize_t(py_basicsize); + Py_DECREF(py_basicsize); + py_basicsize = 0; + if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; +#else + basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1; +#endif + if (__Pyx_VerifyCachedType( + cached_type, + object_name, + basicsize, + spec->basicsize) < 0) { + goto bad; + } + goto done; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; + PyErr_Clear(); + CYTHON_UNUSED_VAR(module); + cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases); + if (unlikely(!cached_type)) goto bad; + if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad; + if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad; +done: + Py_DECREF(abi_module); + assert(cached_type == NULL || PyType_Check(cached_type)); + return (PyTypeObject *) cached_type; +bad: + Py_XDECREF(cached_type); + cached_type = NULL; + goto done; +} +#endif + +/* PyVectorcallFastCallDict */ +#if CYTHON_METH_FASTCALL +static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) +{ + PyObject *res = NULL; + PyObject *kwnames; + PyObject **newargs; + PyObject **kwvalues; + Py_ssize_t i, pos; + size_t j; + PyObject *key, *value; + unsigned long keys_are_strings; + Py_ssize_t nkw = PyDict_GET_SIZE(kw); + newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0])); + if (unlikely(newargs == NULL)) { + PyErr_NoMemory(); + return NULL; + } + for (j = 0; j < nargs; j++) newargs[j] = args[j]; + kwnames = PyTuple_New(nkw); + if (unlikely(kwnames == NULL)) { + PyMem_Free(newargs); + return NULL; + } + kwvalues = newargs + nargs; + pos = i = 0; + keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; + while (PyDict_Next(kw, &pos, &key, &value)) { + keys_are_strings &= Py_TYPE(key)->tp_flags; + Py_INCREF(key); + Py_INCREF(value); + PyTuple_SET_ITEM(kwnames, i, key); + kwvalues[i] = value; + i++; + } + if (unlikely(!keys_are_strings)) { + PyErr_SetString(PyExc_TypeError, "keywords must be strings"); + goto cleanup; + } + res = vc(func, newargs, nargs, kwnames); +cleanup: + Py_DECREF(kwnames); + for (i = 0; i < nkw; i++) + Py_DECREF(kwvalues[i]); + PyMem_Free(newargs); + return res; +} +static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) +{ + if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) { + return vc(func, args, nargs, NULL); + } + return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw); +} +#endif + +/* CythonFunctionShared */ +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { + if (__Pyx_CyFunction_Check(func)) { + return PyCFunction_GetFunction(((__pyx_CyFunctionObject*)func)->func) == (PyCFunction) cfunc; + } else if (PyCFunction_Check(func)) { + return PyCFunction_GetFunction(func) == (PyCFunction) cfunc; + } + return 0; +} +#else +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { + return __Pyx_CyOrPyCFunction_Check(func) && __Pyx_CyOrPyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; +} +#endif +static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) { +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + __Pyx_Py_XDECREF_SET( + __Pyx_CyFunction_GetClassObj(f), + ((classobj) ? __Pyx_NewRef(classobj) : NULL)); +#else + __Pyx_Py_XDECREF_SET( + ((PyCMethodObject *) (f))->mm_class, + (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL)); +#endif +} +static PyObject * +__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure) +{ + CYTHON_UNUSED_VAR(closure); + if (unlikely(op->func_doc == NULL)) { +#if CYTHON_COMPILING_IN_LIMITED_API + op->func_doc = PyObject_GetAttrString(op->func, "__doc__"); + if (unlikely(!op->func_doc)) return NULL; +#else + if (((PyCFunctionObject*)op)->m_ml->ml_doc) { +#if PY_MAJOR_VERSION >= 3 + op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); +#else + op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); +#endif + if (unlikely(op->func_doc == NULL)) + return NULL; + } else { + Py_INCREF(Py_None); + return Py_None; + } +#endif + } + Py_INCREF(op->func_doc); + return op->func_doc; +} +static int +__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (value == NULL) { + value = Py_None; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_doc, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(op->func_name == NULL)) { +#if CYTHON_COMPILING_IN_LIMITED_API + op->func_name = PyObject_GetAttrString(op->func, "__name__"); +#elif PY_MAJOR_VERSION >= 3 + op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); +#else + op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); +#endif + if (unlikely(op->func_name == NULL)) + return NULL; + } + Py_INCREF(op->func_name); + return op->func_name; +} +static int +__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__name__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_name, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + Py_INCREF(op->func_qualname); + return op->func_qualname; +} +static int +__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__qualname__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_qualname, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(op->func_dict == NULL)) { + op->func_dict = PyDict_New(); + if (unlikely(op->func_dict == NULL)) + return NULL; + } + Py_INCREF(op->func_dict); + return op->func_dict; +} +static int +__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(value == NULL)) { + PyErr_SetString(PyExc_TypeError, + "function's dictionary may not be deleted"); + return -1; + } + if (unlikely(!PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "setting function's dictionary to a non-dict"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_dict, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + Py_INCREF(op->func_globals); + return op->func_globals; +} +static PyObject * +__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(op); + CYTHON_UNUSED_VAR(context); + Py_INCREF(Py_None); + return Py_None; +} +static PyObject * +__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context) +{ + PyObject* result = (op->func_code) ? op->func_code : Py_None; + CYTHON_UNUSED_VAR(context); + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) { + int result = 0; + PyObject *res = op->defaults_getter((PyObject *) op); + if (unlikely(!res)) + return -1; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + op->defaults_tuple = PyTuple_GET_ITEM(res, 0); + Py_INCREF(op->defaults_tuple); + op->defaults_kwdict = PyTuple_GET_ITEM(res, 1); + Py_INCREF(op->defaults_kwdict); + #else + op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0); + if (unlikely(!op->defaults_tuple)) result = -1; + else { + op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1); + if (unlikely(!op->defaults_kwdict)) result = -1; + } + #endif + Py_DECREF(res); + return result; +} +static int +__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value) { + value = Py_None; + } else if (unlikely(value != Py_None && !PyTuple_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__defaults__ must be set to a tuple object"); + return -1; + } + PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not " + "currently affect the values used in function calls", 1); + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->defaults_tuple, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->defaults_tuple; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + if (op->defaults_getter) { + if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; + result = op->defaults_tuple; + } else { + result = Py_None; + } + } + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value) { + value = Py_None; + } else if (unlikely(value != Py_None && !PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__kwdefaults__ must be set to a dict object"); + return -1; + } + PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not " + "currently affect the values used in function calls", 1); + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->defaults_kwdict; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + if (op->defaults_getter) { + if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; + result = op->defaults_kwdict; + } else { + result = Py_None; + } + } + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value || value == Py_None) { + value = NULL; + } else if (unlikely(!PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__annotations__ must be set to a dict object"); + return -1; + } + Py_XINCREF(value); + __Pyx_Py_XDECREF_SET(op->func_annotations, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->func_annotations; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + result = PyDict_New(); + if (unlikely(!result)) return NULL; + op->func_annotations = result; + } + Py_INCREF(result); + return result; +} +static PyObject * +__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) { + int is_coroutine; + CYTHON_UNUSED_VAR(context); + if (op->func_is_coroutine) { + return __Pyx_NewRef(op->func_is_coroutine); + } + is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE; +#if PY_VERSION_HEX >= 0x03050000 + if (is_coroutine) { + PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine; + fromlist = PyList_New(1); + if (unlikely(!fromlist)) return NULL; + Py_INCREF(marker); +#if CYTHON_ASSUME_SAFE_MACROS + PyList_SET_ITEM(fromlist, 0, marker); +#else + if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) { + Py_DECREF(marker); + Py_DECREF(fromlist); + return NULL; + } +#endif + module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0); + Py_DECREF(fromlist); + if (unlikely(!module)) goto ignore; + op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker); + Py_DECREF(module); + if (likely(op->func_is_coroutine)) { + return __Pyx_NewRef(op->func_is_coroutine); + } +ignore: + PyErr_Clear(); + } +#endif + op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine); + return __Pyx_NewRef(op->func_is_coroutine); +} +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject * +__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) { + CYTHON_UNUSED_VAR(context); + return PyObject_GetAttrString(op->func, "__module__"); +} +static int +__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + return PyObject_SetAttrString(op->func, "__module__", value); +} +#endif +static PyGetSetDef __pyx_CyFunction_getsets[] = { + {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, + {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, + {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, + {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, + {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0}, + {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, + {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, + {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, + {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, + {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, + {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, + {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, + {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, + {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, + {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, + {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0}, + {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, + {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0}, +#if CYTHON_COMPILING_IN_LIMITED_API + {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0}, +#endif + {0, 0, 0, 0, 0} +}; +static PyMemberDef __pyx_CyFunction_members[] = { +#if !CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0}, +#endif +#if CYTHON_USE_TYPE_SPECS + {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0}, +#if CYTHON_METH_FASTCALL +#if CYTHON_BACKPORT_VECTORCALL + {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0}, +#else +#if !CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0}, +#endif +#endif +#endif +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0}, +#else + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0}, +#endif +#endif + {0, 0, 0, 0, 0} +}; +static PyObject * +__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args) +{ + CYTHON_UNUSED_VAR(args); +#if PY_MAJOR_VERSION >= 3 + Py_INCREF(m->func_qualname); + return m->func_qualname; +#else + return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name); +#endif +} +static PyMethodDef __pyx_CyFunction_methods[] = { + {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0}, + {0, 0, 0, 0} +}; +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API +#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist) +#else +#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist) +#endif +static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname, + PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { +#if !CYTHON_COMPILING_IN_LIMITED_API + PyCFunctionObject *cf = (PyCFunctionObject*) op; +#endif + if (unlikely(op == NULL)) + return NULL; +#if CYTHON_COMPILING_IN_LIMITED_API + op->func = PyCFunction_NewEx(ml, (PyObject*)op, module); + if (unlikely(!op->func)) return NULL; +#endif + op->flags = flags; + __Pyx_CyFunction_weakreflist(op) = NULL; +#if !CYTHON_COMPILING_IN_LIMITED_API + cf->m_ml = ml; + cf->m_self = (PyObject *) op; +#endif + Py_XINCREF(closure); + op->func_closure = closure; +#if !CYTHON_COMPILING_IN_LIMITED_API + Py_XINCREF(module); + cf->m_module = module; +#endif + op->func_dict = NULL; + op->func_name = NULL; + Py_INCREF(qualname); + op->func_qualname = qualname; + op->func_doc = NULL; +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + op->func_classobj = NULL; +#else + ((PyCMethodObject*)op)->mm_class = NULL; +#endif + op->func_globals = globals; + Py_INCREF(op->func_globals); + Py_XINCREF(code); + op->func_code = code; + op->defaults_pyobjects = 0; + op->defaults_size = 0; + op->defaults = NULL; + op->defaults_tuple = NULL; + op->defaults_kwdict = NULL; + op->defaults_getter = NULL; + op->func_annotations = NULL; + op->func_is_coroutine = NULL; +#if CYTHON_METH_FASTCALL + switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { + case METH_NOARGS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS; + break; + case METH_O: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O; + break; + case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD; + break; + case METH_FASTCALL | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS; + break; + case METH_VARARGS | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = NULL; + break; + default: + PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); + Py_DECREF(op); + return NULL; + } +#endif + return (PyObject *) op; +} +static int +__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m) +{ + Py_CLEAR(m->func_closure); +#if CYTHON_COMPILING_IN_LIMITED_API + Py_CLEAR(m->func); +#else + Py_CLEAR(((PyCFunctionObject*)m)->m_module); +#endif + Py_CLEAR(m->func_dict); + Py_CLEAR(m->func_name); + Py_CLEAR(m->func_qualname); + Py_CLEAR(m->func_doc); + Py_CLEAR(m->func_globals); + Py_CLEAR(m->func_code); +#if !CYTHON_COMPILING_IN_LIMITED_API +#if PY_VERSION_HEX < 0x030900B1 + Py_CLEAR(__Pyx_CyFunction_GetClassObj(m)); +#else + { + PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class; + ((PyCMethodObject *) (m))->mm_class = NULL; + Py_XDECREF(cls); + } +#endif +#endif + Py_CLEAR(m->defaults_tuple); + Py_CLEAR(m->defaults_kwdict); + Py_CLEAR(m->func_annotations); + Py_CLEAR(m->func_is_coroutine); + if (m->defaults) { + PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); + int i; + for (i = 0; i < m->defaults_pyobjects; i++) + Py_XDECREF(pydefaults[i]); + PyObject_Free(m->defaults); + m->defaults = NULL; + } + return 0; +} +static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m) +{ + if (__Pyx_CyFunction_weakreflist(m) != NULL) + PyObject_ClearWeakRefs((PyObject *) m); + __Pyx_CyFunction_clear(m); + __Pyx_PyHeapTypeObject_GC_Del(m); +} +static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m) +{ + PyObject_GC_UnTrack(m); + __Pyx__CyFunction_dealloc(m); +} +static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg) +{ + Py_VISIT(m->func_closure); +#if CYTHON_COMPILING_IN_LIMITED_API + Py_VISIT(m->func); +#else + Py_VISIT(((PyCFunctionObject*)m)->m_module); +#endif + Py_VISIT(m->func_dict); + Py_VISIT(m->func_name); + Py_VISIT(m->func_qualname); + Py_VISIT(m->func_doc); + Py_VISIT(m->func_globals); + Py_VISIT(m->func_code); +#if !CYTHON_COMPILING_IN_LIMITED_API + Py_VISIT(__Pyx_CyFunction_GetClassObj(m)); +#endif + Py_VISIT(m->defaults_tuple); + Py_VISIT(m->defaults_kwdict); + Py_VISIT(m->func_is_coroutine); + if (m->defaults) { + PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); + int i; + for (i = 0; i < m->defaults_pyobjects; i++) + Py_VISIT(pydefaults[i]); + } + return 0; +} +static PyObject* +__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) +{ +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_FromFormat("", + op->func_qualname, (void *)op); +#else + return PyString_FromFormat("", + PyString_AsString(op->func_qualname), (void *)op); +#endif +} +static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) { +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *f = ((__pyx_CyFunctionObject*)func)->func; + PyObject *py_name = NULL; + PyCFunction meth; + int flags; + meth = PyCFunction_GetFunction(f); + if (unlikely(!meth)) return NULL; + flags = PyCFunction_GetFlags(f); + if (unlikely(flags < 0)) return NULL; +#else + PyCFunctionObject* f = (PyCFunctionObject*)func; + PyCFunction meth = f->m_ml->ml_meth; + int flags = f->m_ml->ml_flags; +#endif + Py_ssize_t size; + switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) { + case METH_VARARGS: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) + return (*meth)(self, arg); + break; + case METH_VARARGS | METH_KEYWORDS: + return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw); + case METH_NOARGS: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) { +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(arg); +#else + size = PyTuple_Size(arg); + if (unlikely(size < 0)) return NULL; +#endif + if (likely(size == 0)) + return (*meth)(self, NULL); +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, + "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + py_name, size); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + f->m_ml->ml_name, size); +#endif + return NULL; + } + break; + case METH_O: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) { +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(arg); +#else + size = PyTuple_Size(arg); + if (unlikely(size < 0)) return NULL; +#endif + if (likely(size == 1)) { + PyObject *result, *arg0; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + arg0 = PyTuple_GET_ITEM(arg, 0); + #else + arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL; + #endif + result = (*meth)(self, arg0); + #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) + Py_DECREF(arg0); + #endif + return result; + } +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, + "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + py_name, size); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + f->m_ml->ml_name, size); +#endif + return NULL; + } + break; + default: + PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); + return NULL; + } +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments", + py_name); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", + f->m_ml->ml_name); +#endif + return NULL; +} +static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *self, *result; +#if CYTHON_COMPILING_IN_LIMITED_API + self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func); + if (unlikely(!self) && PyErr_Occurred()) return NULL; +#else + self = ((PyCFunctionObject*)func)->m_self; +#endif + result = __Pyx_CyFunction_CallMethod(func, self, arg, kw); + return result; +} +static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) { + PyObject *result; + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; +#if CYTHON_METH_FASTCALL + __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc); + if (vc) { +#if CYTHON_ASSUME_SAFE_MACROS + return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw); +#else + (void) &__Pyx_PyVectorcall_FastCallDict; + return PyVectorcall_Call(func, args, kw); +#endif + } +#endif + if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { + Py_ssize_t argc; + PyObject *new_args; + PyObject *self; +#if CYTHON_ASSUME_SAFE_MACROS + argc = PyTuple_GET_SIZE(args); +#else + argc = PyTuple_Size(args); + if (unlikely(!argc) < 0) return NULL; +#endif + new_args = PyTuple_GetSlice(args, 1, argc); + if (unlikely(!new_args)) + return NULL; + self = PyTuple_GetItem(args, 0); + if (unlikely(!self)) { + Py_DECREF(new_args); +#if PY_MAJOR_VERSION > 2 + PyErr_Format(PyExc_TypeError, + "unbound method %.200S() needs an argument", + cyfunc->func_qualname); +#else + PyErr_SetString(PyExc_TypeError, + "unbound method needs an argument"); +#endif + return NULL; + } + result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw); + Py_DECREF(new_args); + } else { + result = __Pyx_CyFunction_Call(func, args, kw); + } + return result; +} +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames) +{ + int ret = 0; + if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { + if (unlikely(nargs < 1)) { + PyErr_Format(PyExc_TypeError, "%.200s() needs an argument", + ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); + return -1; + } + ret = 1; + } + if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); + return -1; + } + return ret; +} +static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + if (unlikely(nargs != 0)) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + def->ml_name, nargs); + return NULL; + } + return def->ml_meth(self, NULL); +} +static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + if (unlikely(nargs != 1)) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + def->ml_name, nargs); + return NULL; + } + return def->ml_meth(self, args[0]); +} +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + return ((__Pyx_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames); +} +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; + PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc); +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames); +} +#endif +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_CyFunctionType_slots[] = { + {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc}, + {Py_tp_repr, (void *)__Pyx_CyFunction_repr}, + {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod}, + {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse}, + {Py_tp_clear, (void *)__Pyx_CyFunction_clear}, + {Py_tp_methods, (void *)__pyx_CyFunction_methods}, + {Py_tp_members, (void *)__pyx_CyFunction_members}, + {Py_tp_getset, (void *)__pyx_CyFunction_getsets}, + {Py_tp_descr_get, (void *)__Pyx_PyMethod_New}, + {0, 0}, +}; +static PyType_Spec __pyx_CyFunctionType_spec = { + __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", + sizeof(__pyx_CyFunctionObject), + 0, +#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR + Py_TPFLAGS_METHOD_DESCRIPTOR | +#endif +#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL) + _Py_TPFLAGS_HAVE_VECTORCALL | +#endif + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + __pyx_CyFunctionType_slots +}; +#else +static PyTypeObject __pyx_CyFunctionType_type = { + PyVarObject_HEAD_INIT(0, 0) + __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", + sizeof(__pyx_CyFunctionObject), + 0, + (destructor) __Pyx_CyFunction_dealloc, +#if !CYTHON_METH_FASTCALL + 0, +#elif CYTHON_BACKPORT_VECTORCALL + (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall), +#else + offsetof(PyCFunctionObject, vectorcall), +#endif + 0, + 0, +#if PY_MAJOR_VERSION < 3 + 0, +#else + 0, +#endif + (reprfunc) __Pyx_CyFunction_repr, + 0, + 0, + 0, + 0, + __Pyx_CyFunction_CallAsMethod, + 0, + 0, + 0, + 0, +#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR + Py_TPFLAGS_METHOD_DESCRIPTOR | +#endif +#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL + _Py_TPFLAGS_HAVE_VECTORCALL | +#endif + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + 0, + (traverseproc) __Pyx_CyFunction_traverse, + (inquiry) __Pyx_CyFunction_clear, + 0, +#if PY_VERSION_HEX < 0x030500A0 + offsetof(__pyx_CyFunctionObject, func_weakreflist), +#else + offsetof(PyCFunctionObject, m_weakreflist), +#endif + 0, + 0, + __pyx_CyFunction_methods, + __pyx_CyFunction_members, + __pyx_CyFunction_getsets, + 0, + 0, + __Pyx_PyMethod_New, + 0, + offsetof(__pyx_CyFunctionObject, func_dict), + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +#if PY_VERSION_HEX >= 0x030400a1 + 0, +#endif +#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, +#endif +#if __PYX_NEED_TP_PRINT_SLOT + 0, +#endif +#if PY_VERSION_HEX >= 0x030C0000 + 0, +#endif +#if PY_VERSION_HEX >= 0x030d00A4 + 0, +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, +#endif +}; +#endif +static int __pyx_CyFunction_init(PyObject *module) { +#if CYTHON_USE_TYPE_SPECS + __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL); +#else + CYTHON_UNUSED_VAR(module); + __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type); +#endif + if (unlikely(__pyx_CyFunctionType == NULL)) { + return -1; + } + return 0; +} +static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults = PyObject_Malloc(size); + if (unlikely(!m->defaults)) + return PyErr_NoMemory(); + memset(m->defaults, 0, size); + m->defaults_pyobjects = pyobjects; + m->defaults_size = size; + return m->defaults; +} +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults_tuple = tuple; + Py_INCREF(tuple); +} +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults_kwdict = dict; + Py_INCREF(dict); +} +static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->func_annotations = dict; + Py_INCREF(dict); +} + +/* CythonFunction */ +static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname, + PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { + PyObject *op = __Pyx_CyFunction_Init( + PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType), + ml, flags, qualname, closure, module, globals, code + ); + if (likely(op)) { + PyObject_GC_Track(op); + } + return op; +} + +/* GetTopmostException */ +#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE +static _PyErr_StackItem * +__Pyx_PyErr_GetTopmostException(PyThreadState *tstate) +{ + _PyErr_StackItem *exc_info = tstate->exc_info; + while ((exc_info->exc_value == NULL || exc_info->exc_value == Py_None) && + exc_info->previous_item != NULL) + { + exc_info = exc_info->previous_item; + } + return exc_info; +} +#endif + +/* SaveResetException */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 + _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); + PyObject *exc_value = exc_info->exc_value; + if (exc_value == NULL || exc_value == Py_None) { + *value = NULL; + *type = NULL; + *tb = NULL; + } else { + *value = exc_value; + Py_INCREF(*value); + *type = (PyObject*) Py_TYPE(exc_value); + Py_INCREF(*type); + *tb = PyException_GetTraceback(exc_value); + } + #elif CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); + *type = exc_info->exc_type; + *value = exc_info->exc_value; + *tb = exc_info->exc_traceback; + Py_XINCREF(*type); + Py_XINCREF(*value); + Py_XINCREF(*tb); + #else + *type = tstate->exc_type; + *value = tstate->exc_value; + *tb = tstate->exc_traceback; + Py_XINCREF(*type); + Py_XINCREF(*value); + Py_XINCREF(*tb); + #endif +} +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { + #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 + _PyErr_StackItem *exc_info = tstate->exc_info; + PyObject *tmp_value = exc_info->exc_value; + exc_info->exc_value = value; + Py_XDECREF(tmp_value); + Py_XDECREF(type); + Py_XDECREF(tb); + #else + PyObject *tmp_type, *tmp_value, *tmp_tb; + #if CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = tstate->exc_info; + tmp_type = exc_info->exc_type; + tmp_value = exc_info->exc_value; + tmp_tb = exc_info->exc_traceback; + exc_info->exc_type = type; + exc_info->exc_value = value; + exc_info->exc_traceback = tb; + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = type; + tstate->exc_value = value; + tstate->exc_traceback = tb; + #endif + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); + #endif +} +#endif + +/* FastTypeChecks */ +#if CYTHON_COMPILING_IN_CPYTHON +static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { + while (a) { + a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*); + if (a == b) + return 1; + } + return b == &PyBaseObject_Type; +} +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (a == b) return 1; + mro = a->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(a, b); +} +static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (cls == a || cls == b) return 1; + mro = cls->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + PyObject *base = PyTuple_GET_ITEM(mro, i); + if (base == (PyObject *)a || base == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b); +} +#if PY_MAJOR_VERSION == 2 +static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { + PyObject *exception, *value, *tb; + int res; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&exception, &value, &tb); + res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + if (!res) { + res = PyObject_IsSubclass(err, exc_type2); + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + } + __Pyx_ErrRestore(exception, value, tb); + return res; +} +#else +static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { + if (exc_type1) { + return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2); + } else { + return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); + } +} +#endif +static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + assert(PyExceptionClass_Check(exc_type)); + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; i= 0x030C00A6 + PyException_SetTraceback(value, tb); + #elif CYTHON_FAST_THREAD_STATE + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject* tmp_tb = tstate->curexc_traceback; + if (tb != tmp_tb) { + Py_INCREF(tb); + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_tb); + } +#else + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); + Py_INCREF(tb); + PyErr_Restore(tmp_type, tmp_value, tb); + Py_XDECREF(tmp_tb); +#endif + } +bad: + Py_XDECREF(owned_instance); + return; +} +#endif + +/* PyObjectCall */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = Py_TYPE(func)->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) + return NULL; + #endif + result = (*call)(func, arg, kw); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* UnpackUnboundCMethod */ +static PyObject *__Pyx_SelflessCall(PyObject *method, PyObject *args, PyObject *kwargs) { + PyObject *result; + PyObject *selfless_args = PyTuple_GetSlice(args, 1, PyTuple_Size(args)); + if (unlikely(!selfless_args)) return NULL; + result = PyObject_Call(method, selfless_args, kwargs); + Py_DECREF(selfless_args); + return result; +} +static PyMethodDef __Pyx_UnboundCMethod_Def = { + "CythonUnboundCMethod", + __PYX_REINTERPRET_FUNCION(PyCFunction, __Pyx_SelflessCall), + METH_VARARGS | METH_KEYWORDS, + NULL +}; +static int __Pyx_TryUnpackUnboundCMethod(__Pyx_CachedCFunction* target) { + PyObject *method; + method = __Pyx_PyObject_GetAttrStr(target->type, *target->method_name); + if (unlikely(!method)) + return -1; + target->method = method; +#if CYTHON_COMPILING_IN_CPYTHON + #if PY_MAJOR_VERSION >= 3 + if (likely(__Pyx_TypeCheck(method, &PyMethodDescr_Type))) + #else + if (likely(!__Pyx_CyOrPyCFunction_Check(method))) + #endif + { + PyMethodDescrObject *descr = (PyMethodDescrObject*) method; + target->func = descr->d_method->ml_meth; + target->flag = descr->d_method->ml_flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_STACKLESS); + } else +#endif +#if CYTHON_COMPILING_IN_PYPY +#else + if (PyCFunction_Check(method)) +#endif + { + PyObject *self; + int self_found; +#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY + self = PyObject_GetAttrString(method, "__self__"); + if (!self) { + PyErr_Clear(); + } +#else + self = PyCFunction_GET_SELF(method); +#endif + self_found = (self && self != Py_None); +#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY + Py_XDECREF(self); +#endif + if (self_found) { + PyObject *unbound_method = PyCFunction_New(&__Pyx_UnboundCMethod_Def, method); + if (unlikely(!unbound_method)) return -1; + Py_DECREF(method); + target->method = unbound_method; + } + } + return 0; +} + +/* CallUnboundCMethod1 */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg) { + if (likely(cfunc->func)) { + int flag = cfunc->flag; + if (flag == METH_O) { + return (*(cfunc->func))(self, arg); + } else if ((PY_VERSION_HEX >= 0x030600B1) && flag == METH_FASTCALL) { + #if PY_VERSION_HEX >= 0x030700A0 + return (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)cfunc->func)(self, &arg, 1); + #else + return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL); + #endif + } else if ((PY_VERSION_HEX >= 0x030700A0) && flag == (METH_FASTCALL | METH_KEYWORDS)) { + return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL); + } + } + return __Pyx__CallUnboundCMethod1(cfunc, self, arg); +} +#endif +static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg){ + PyObject *args, *result = NULL; + if (unlikely(!cfunc->func && !cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL; +#if CYTHON_COMPILING_IN_CPYTHON + if (cfunc->func && (cfunc->flag & METH_VARARGS)) { + args = PyTuple_New(1); + if (unlikely(!args)) goto bad; + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 0, arg); + if (cfunc->flag & METH_KEYWORDS) + result = (*(PyCFunctionWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, NULL); + else + result = (*cfunc->func)(self, args); + } else { + args = PyTuple_New(2); + if (unlikely(!args)) goto bad; + Py_INCREF(self); + PyTuple_SET_ITEM(args, 0, self); + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 1, arg); + result = __Pyx_PyObject_Call(cfunc->method, args, NULL); + } +#else + args = PyTuple_Pack(2, self, arg); + if (unlikely(!args)) goto bad; + result = __Pyx_PyObject_Call(cfunc->method, args, NULL); +#endif +bad: + Py_XDECREF(args); + return result; +} + +/* RaiseUnexpectedTypeError */ +static int +__Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj) +{ + __Pyx_TypeName obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, "Expected %s, got " __Pyx_FMT_TYPENAME, + expected, obj_type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return 0; +} + +/* decode_c_bytes */ +static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes( + const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop, + const char* encoding, const char* errors, + PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { + if (unlikely((start < 0) | (stop < 0))) { + if (start < 0) { + start += length; + if (start < 0) + start = 0; + } + if (stop < 0) + stop += length; + } + if (stop > length) + stop = length; + if (unlikely(stop <= start)) + return __Pyx_NewRef(__pyx_empty_unicode); + length = stop - start; + cstring += start; + if (decode_func) { + return decode_func(cstring, length, errors); + } else { + return PyUnicode_Decode(cstring, length, encoding, errors); + } +} + +/* ArgTypeTest */ +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) +{ + __Pyx_TypeName type_name; + __Pyx_TypeName obj_type_name; + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + else if (exact) { + #if PY_MAJOR_VERSION == 2 + if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; + #endif + } + else { + if (likely(__Pyx_TypeCheck(obj, type))) return 1; + } + type_name = __Pyx_PyType_GetName(type); + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME + ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name); + __Pyx_DECREF_TypeName(type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return 0; +} + +/* PyFunctionFastCall */ +#if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL +static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, + PyObject *globals) { + PyFrameObject *f; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject **fastlocals; + Py_ssize_t i; + PyObject *result; + assert(globals != NULL); + /* XXX Perhaps we should create a specialized + PyFrame_New() that doesn't take locals, but does + take builtins without sanity checking them. + */ + assert(tstate != NULL); + f = PyFrame_New(tstate, co, globals, NULL); + if (f == NULL) { + return NULL; + } + fastlocals = __Pyx_PyFrame_GetLocalsplus(f); + for (i = 0; i < na; i++) { + Py_INCREF(*args); + fastlocals[i] = *args++; + } + result = PyEval_EvalFrameEx(f,0); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return result; +} +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) { + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + PyObject *globals = PyFunction_GET_GLOBALS(func); + PyObject *argdefs = PyFunction_GET_DEFAULTS(func); + PyObject *closure; +#if PY_MAJOR_VERSION >= 3 + PyObject *kwdefs; +#endif + PyObject *kwtuple, **k; + PyObject **d; + Py_ssize_t nd; + Py_ssize_t nk; + PyObject *result; + assert(kwargs == NULL || PyDict_Check(kwargs)); + nk = kwargs ? PyDict_Size(kwargs) : 0; + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) { + return NULL; + } + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) { + return NULL; + } + #endif + if ( +#if PY_MAJOR_VERSION >= 3 + co->co_kwonlyargcount == 0 && +#endif + likely(kwargs == NULL || nk == 0) && + co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { + if (argdefs == NULL && co->co_argcount == nargs) { + result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); + goto done; + } + else if (nargs == 0 && argdefs != NULL + && co->co_argcount == Py_SIZE(argdefs)) { + /* function called with no arguments, but all parameters have + a default value: use default values as arguments .*/ + args = &PyTuple_GET_ITEM(argdefs, 0); + result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); + goto done; + } + } + if (kwargs != NULL) { + Py_ssize_t pos, i; + kwtuple = PyTuple_New(2 * nk); + if (kwtuple == NULL) { + result = NULL; + goto done; + } + k = &PyTuple_GET_ITEM(kwtuple, 0); + pos = i = 0; + while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { + Py_INCREF(k[i]); + Py_INCREF(k[i+1]); + i += 2; + } + nk = i / 2; + } + else { + kwtuple = NULL; + k = NULL; + } + closure = PyFunction_GET_CLOSURE(func); +#if PY_MAJOR_VERSION >= 3 + kwdefs = PyFunction_GET_KW_DEFAULTS(func); +#endif + if (argdefs != NULL) { + d = &PyTuple_GET_ITEM(argdefs, 0); + nd = Py_SIZE(argdefs); + } + else { + d = NULL; + nd = 0; + } +#if PY_MAJOR_VERSION >= 3 + result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, kwdefs, closure); +#else + result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, closure); +#endif + Py_XDECREF(kwtuple); +done: + Py_LeaveRecursiveCall(); + return result; +} +#endif + +/* PyObjectCallMethO */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = __Pyx_CyOrPyCFunction_GET_FUNCTION(func); + self = __Pyx_CyOrPyCFunction_GET_SELF(func); + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) + return NULL; + #endif + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectFastCall */ +#if PY_VERSION_HEX < 0x03090000 || CYTHON_COMPILING_IN_LIMITED_API +static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) { + PyObject *argstuple; + PyObject *result = 0; + size_t i; + argstuple = PyTuple_New((Py_ssize_t)nargs); + if (unlikely(!argstuple)) return NULL; + for (i = 0; i < nargs; i++) { + Py_INCREF(args[i]); + if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad; + } + result = __Pyx_PyObject_Call(func, argstuple, kwargs); + bad: + Py_DECREF(argstuple); + return result; +} +#endif +static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) { + Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs); +#if CYTHON_COMPILING_IN_CPYTHON + if (nargs == 0 && kwargs == NULL) { + if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_NOARGS)) + return __Pyx_PyObject_CallMethO(func, NULL); + } + else if (nargs == 1 && kwargs == NULL) { + if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_O)) + return __Pyx_PyObject_CallMethO(func, args[0]); + } +#endif + #if PY_VERSION_HEX < 0x030800B1 + #if CYTHON_FAST_PYCCALL + if (PyCFunction_Check(func)) { + if (kwargs) { + return _PyCFunction_FastCallDict(func, args, nargs, kwargs); + } else { + return _PyCFunction_FastCallKeywords(func, args, nargs, NULL); + } + } + #if PY_VERSION_HEX >= 0x030700A1 + if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) { + return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL); + } + #endif + #endif + #if CYTHON_FAST_PYCALL + if (PyFunction_Check(func)) { + return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs); + } + #endif + #endif + if (kwargs == NULL) { + #if CYTHON_VECTORCALL + #if PY_VERSION_HEX < 0x03090000 + vectorcallfunc f = _PyVectorcall_Function(func); + #else + vectorcallfunc f = PyVectorcall_Function(func); + #endif + if (f) { + return f(func, args, (size_t)nargs, NULL); + } + #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL + if (__Pyx_CyFunction_CheckExact(func)) { + __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func); + if (f) return f(func, args, (size_t)nargs, NULL); + } + #endif + } + if (nargs == 0) { + return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs); + } + #if PY_VERSION_HEX >= 0x03090000 && !CYTHON_COMPILING_IN_LIMITED_API + return PyObject_VectorcallDict(func, args, (size_t)nargs, kwargs); + #else + return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs); + #endif +} + +/* PyObjectCallOneArg */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *args[2] = {NULL, arg}; + return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); +} + +/* SliceObject */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(PyObject* obj, + Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, + int has_cstart, int has_cstop, int wraparound) { + __Pyx_TypeName obj_type_name; +#if CYTHON_USE_TYPE_SLOTS + PyMappingMethods* mp; +#if PY_MAJOR_VERSION < 3 + PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; + if (likely(ms && ms->sq_slice)) { + if (!has_cstart) { + if (_py_start && (*_py_start != Py_None)) { + cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); + if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; + } else + cstart = 0; + } + if (!has_cstop) { + if (_py_stop && (*_py_stop != Py_None)) { + cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); + if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; + } else + cstop = PY_SSIZE_T_MAX; + } + if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { + Py_ssize_t l = ms->sq_length(obj); + if (likely(l >= 0)) { + if (cstop < 0) { + cstop += l; + if (cstop < 0) cstop = 0; + } + if (cstart < 0) { + cstart += l; + if (cstart < 0) cstart = 0; + } + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + goto bad; + PyErr_Clear(); + } + } + return ms->sq_slice(obj, cstart, cstop); + } +#else + CYTHON_UNUSED_VAR(wraparound); +#endif + mp = Py_TYPE(obj)->tp_as_mapping; + if (likely(mp && mp->mp_subscript)) +#else + CYTHON_UNUSED_VAR(wraparound); +#endif + { + PyObject* result; + PyObject *py_slice, *py_start, *py_stop; + if (_py_slice) { + py_slice = *_py_slice; + } else { + PyObject* owned_start = NULL; + PyObject* owned_stop = NULL; + if (_py_start) { + py_start = *_py_start; + } else { + if (has_cstart) { + owned_start = py_start = PyInt_FromSsize_t(cstart); + if (unlikely(!py_start)) goto bad; + } else + py_start = Py_None; + } + if (_py_stop) { + py_stop = *_py_stop; + } else { + if (has_cstop) { + owned_stop = py_stop = PyInt_FromSsize_t(cstop); + if (unlikely(!py_stop)) { + Py_XDECREF(owned_start); + goto bad; + } + } else + py_stop = Py_None; + } + py_slice = PySlice_New(py_start, py_stop, Py_None); + Py_XDECREF(owned_start); + Py_XDECREF(owned_stop); + if (unlikely(!py_slice)) goto bad; + } +#if CYTHON_USE_TYPE_SLOTS + result = mp->mp_subscript(obj, py_slice); +#else + result = PyObject_GetItem(obj, py_slice); +#endif + if (!_py_slice) { + Py_DECREF(py_slice); + } + return result; + } + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "'" __Pyx_FMT_TYPENAME "' object is unsliceable", obj_type_name); + __Pyx_DECREF_TypeName(obj_type_name); +bad: + return NULL; +} + +/* GetAttr */ +static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *o, PyObject *n) { +#if CYTHON_USE_TYPE_SLOTS +#if PY_MAJOR_VERSION >= 3 + if (likely(PyUnicode_Check(n))) +#else + if (likely(PyString_Check(n))) +#endif + return __Pyx_PyObject_GetAttrStr(o, n); +#endif + return PyObject_GetAttr(o, n); +} + +/* SetItemInt */ +static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) { + int r; + if (unlikely(!j)) return -1; + r = PyObject_SetItem(o, j, v); + Py_DECREF(j); + return r; +} +static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list, + CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS + if (is_list || PyList_CheckExact(o)) { + Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o)); + if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o)))) { + PyObject* old = PyList_GET_ITEM(o, n); + Py_INCREF(v); + PyList_SET_ITEM(o, n, v); + Py_DECREF(old); + return 1; + } + } else { + PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; + PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; + if (mm && mm->mp_ass_subscript) { + int r; + PyObject *key = PyInt_FromSsize_t(i); + if (unlikely(!key)) return -1; + r = mm->mp_ass_subscript(o, key, v); + Py_DECREF(key); + return r; + } + if (likely(sm && sm->sq_ass_item)) { + if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { + Py_ssize_t l = sm->sq_length(o); + if (likely(l >= 0)) { + i += l; + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + return -1; + PyErr_Clear(); + } + } + return sm->sq_ass_item(o, i, v); + } + } +#else + if (is_list || !PyMapping_Check(o)) + { + return PySequence_SetItem(o, i, v); + } +#endif + return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v); +} + +/* HasAttr */ +static CYTHON_INLINE int __Pyx_HasAttr(PyObject *o, PyObject *n) { + PyObject *r; + if (unlikely(!__Pyx_PyBaseString_Check(n))) { + PyErr_SetString(PyExc_TypeError, + "hasattr(): attribute name must be string"); + return -1; + } + r = __Pyx_GetAttr(o, n); + if (!r) { + PyErr_Clear(); + return 0; + } else { + Py_DECREF(r); + return 1; + } +} + +/* RaiseUnboundLocalError */ +static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname) { + PyErr_Format(PyExc_UnboundLocalError, "local variable '%s' referenced before assignment", varname); +} + +/* SliceObject */ +static CYTHON_INLINE int __Pyx_PyObject_SetSlice(PyObject* obj, PyObject* value, + Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, + int has_cstart, int has_cstop, int wraparound) { + __Pyx_TypeName obj_type_name; +#if CYTHON_USE_TYPE_SLOTS + PyMappingMethods* mp; +#if PY_MAJOR_VERSION < 3 + PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; + if (likely(ms && ms->sq_ass_slice)) { + if (!has_cstart) { + if (_py_start && (*_py_start != Py_None)) { + cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); + if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; + } else + cstart = 0; + } + if (!has_cstop) { + if (_py_stop && (*_py_stop != Py_None)) { + cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); + if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; + } else + cstop = PY_SSIZE_T_MAX; + } + if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { + Py_ssize_t l = ms->sq_length(obj); + if (likely(l >= 0)) { + if (cstop < 0) { + cstop += l; + if (cstop < 0) cstop = 0; + } + if (cstart < 0) { + cstart += l; + if (cstart < 0) cstart = 0; + } + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + goto bad; + PyErr_Clear(); + } + } + return ms->sq_ass_slice(obj, cstart, cstop, value); + } +#else + CYTHON_UNUSED_VAR(wraparound); +#endif + mp = Py_TYPE(obj)->tp_as_mapping; + if (likely(mp && mp->mp_ass_subscript)) +#else + CYTHON_UNUSED_VAR(wraparound); +#endif + { + int result; + PyObject *py_slice, *py_start, *py_stop; + if (_py_slice) { + py_slice = *_py_slice; + } else { + PyObject* owned_start = NULL; + PyObject* owned_stop = NULL; + if (_py_start) { + py_start = *_py_start; + } else { + if (has_cstart) { + owned_start = py_start = PyInt_FromSsize_t(cstart); + if (unlikely(!py_start)) goto bad; + } else + py_start = Py_None; + } + if (_py_stop) { + py_stop = *_py_stop; + } else { + if (has_cstop) { + owned_stop = py_stop = PyInt_FromSsize_t(cstop); + if (unlikely(!py_stop)) { + Py_XDECREF(owned_start); + goto bad; + } + } else + py_stop = Py_None; + } + py_slice = PySlice_New(py_start, py_stop, Py_None); + Py_XDECREF(owned_start); + Py_XDECREF(owned_stop); + if (unlikely(!py_slice)) goto bad; + } +#if CYTHON_USE_TYPE_SLOTS + result = mp->mp_ass_subscript(obj, py_slice, value); +#else + result = value ? PyObject_SetItem(obj, py_slice, value) : PyObject_DelItem(obj, py_slice); +#endif + if (!_py_slice) { + Py_DECREF(py_slice); + } + return result; + } + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "'" __Pyx_FMT_TYPENAME "' object does not support slice %.10s", + obj_type_name, value ? "assignment" : "deletion"); + __Pyx_DECREF_TypeName(obj_type_name); +bad: + return -1; +} + +/* PyObjectCall2Args */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2) { + PyObject *args[3] = {NULL, arg1, arg2}; + return __Pyx_PyObject_FastCall(function, args+1, 2 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); +} + +/* PyObjectGetMethod */ +static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) { + PyObject *attr; +#if CYTHON_UNPACK_METHODS && CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_PYTYPE_LOOKUP + __Pyx_TypeName type_name; + PyTypeObject *tp = Py_TYPE(obj); + PyObject *descr; + descrgetfunc f = NULL; + PyObject **dictptr, *dict; + int meth_found = 0; + assert (*method == NULL); + if (unlikely(tp->tp_getattro != PyObject_GenericGetAttr)) { + attr = __Pyx_PyObject_GetAttrStr(obj, name); + goto try_unpack; + } + if (unlikely(tp->tp_dict == NULL) && unlikely(PyType_Ready(tp) < 0)) { + return 0; + } + descr = _PyType_Lookup(tp, name); + if (likely(descr != NULL)) { + Py_INCREF(descr); +#if defined(Py_TPFLAGS_METHOD_DESCRIPTOR) && Py_TPFLAGS_METHOD_DESCRIPTOR + if (__Pyx_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR)) +#elif PY_MAJOR_VERSION >= 3 + #ifdef __Pyx_CyFunction_USED + if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type) || __Pyx_CyFunction_Check(descr))) + #else + if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type))) + #endif +#else + #ifdef __Pyx_CyFunction_USED + if (likely(PyFunction_Check(descr) || __Pyx_CyFunction_Check(descr))) + #else + if (likely(PyFunction_Check(descr))) + #endif +#endif + { + meth_found = 1; + } else { + f = Py_TYPE(descr)->tp_descr_get; + if (f != NULL && PyDescr_IsData(descr)) { + attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); + Py_DECREF(descr); + goto try_unpack; + } + } + } + dictptr = _PyObject_GetDictPtr(obj); + if (dictptr != NULL && (dict = *dictptr) != NULL) { + Py_INCREF(dict); + attr = __Pyx_PyDict_GetItemStr(dict, name); + if (attr != NULL) { + Py_INCREF(attr); + Py_DECREF(dict); + Py_XDECREF(descr); + goto try_unpack; + } + Py_DECREF(dict); + } + if (meth_found) { + *method = descr; + return 1; + } + if (f != NULL) { + attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); + Py_DECREF(descr); + goto try_unpack; + } + if (likely(descr != NULL)) { + *method = descr; + return 0; + } + type_name = __Pyx_PyType_GetName(tp); + PyErr_Format(PyExc_AttributeError, +#if PY_MAJOR_VERSION >= 3 + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", + type_name, name); +#else + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", + type_name, PyString_AS_STRING(name)); +#endif + __Pyx_DECREF_TypeName(type_name); + return 0; +#else + attr = __Pyx_PyObject_GetAttrStr(obj, name); + goto try_unpack; +#endif +try_unpack: +#if CYTHON_UNPACK_METHODS + if (likely(attr) && PyMethod_Check(attr) && likely(PyMethod_GET_SELF(attr) == obj)) { + PyObject *function = PyMethod_GET_FUNCTION(attr); + Py_INCREF(function); + Py_DECREF(attr); + *method = function; + return 1; + } +#endif + *method = attr; + return 0; +} + +/* PyObjectCallMethod1 */ +#if !(CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C00A2) +static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) { + PyObject *result = __Pyx_PyObject_CallOneArg(method, arg); + Py_DECREF(method); + return result; +} +#endif +static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) { +#if CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C00A2 + PyObject *args[2] = {obj, arg}; + (void) __Pyx_PyObject_GetMethod; + (void) __Pyx_PyObject_CallOneArg; + (void) __Pyx_PyObject_Call2Args; + return PyObject_VectorcallMethod(method_name, args, 2 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); +#else + PyObject *method = NULL, *result; + int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); + if (likely(is_method)) { + result = __Pyx_PyObject_Call2Args(method, obj, arg); + Py_DECREF(method); + return result; + } + if (unlikely(!method)) return NULL; + return __Pyx__PyObject_CallMethod1(method, arg); +#endif +} + +/* StringJoin */ +static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values) { + (void) __Pyx_PyObject_CallMethod1; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION < 3 + return _PyString_Join(sep, values); +#elif CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 + return _PyBytes_Join(sep, values); +#else + return __Pyx_PyObject_CallMethod1(sep, __pyx_n_s_join, values); +#endif +} + +/* PyObjectSetAttrStr */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_setattro)) + return tp->tp_setattro(obj, attr_name, value); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_setattr)) + return tp->tp_setattr(obj, PyString_AS_STRING(attr_name), value); +#endif + return PyObject_SetAttr(obj, attr_name, value); +} +#endif + +/* PyObjectCallNoArg */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { + PyObject *arg[2] = {NULL, NULL}; + return __Pyx_PyObject_FastCall(func, arg + 1, 0 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); +} + +/* PyObjectCallMethod0 */ +static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) { + PyObject *method = NULL, *result = NULL; + int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); + if (likely(is_method)) { + result = __Pyx_PyObject_CallOneArg(method, obj); + Py_DECREF(method); + return result; + } + if (unlikely(!method)) goto bad; + result = __Pyx_PyObject_CallNoArg(method); + Py_DECREF(method); +bad: + return result; +} + +/* ValidateBasesTuple */ +#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS +static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases) { + Py_ssize_t i, n; +#if CYTHON_ASSUME_SAFE_MACROS + n = PyTuple_GET_SIZE(bases); +#else + n = PyTuple_Size(bases); + if (n < 0) return -1; +#endif + for (i = 1; i < n; i++) + { +#if CYTHON_AVOID_BORROWED_REFS + PyObject *b0 = PySequence_GetItem(bases, i); + if (!b0) return -1; +#elif CYTHON_ASSUME_SAFE_MACROS + PyObject *b0 = PyTuple_GET_ITEM(bases, i); +#else + PyObject *b0 = PyTuple_GetItem(bases, i); + if (!b0) return -1; +#endif + PyTypeObject *b; +#if PY_MAJOR_VERSION < 3 + if (PyClass_Check(b0)) + { + PyErr_Format(PyExc_TypeError, "base class '%.200s' is an old-style class", + PyString_AS_STRING(((PyClassObject*)b0)->cl_name)); +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + return -1; + } +#endif + b = (PyTypeObject*) b0; + if (!__Pyx_PyType_HasFeature(b, Py_TPFLAGS_HEAPTYPE)) + { + __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); + PyErr_Format(PyExc_TypeError, + "base class '" __Pyx_FMT_TYPENAME "' is not a heap type", b_name); + __Pyx_DECREF_TypeName(b_name); +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + return -1; + } + if (dictoffset == 0) + { + Py_ssize_t b_dictoffset = 0; +#if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + b_dictoffset = b->tp_dictoffset; +#else + PyObject *py_b_dictoffset = PyObject_GetAttrString((PyObject*)b, "__dictoffset__"); + if (!py_b_dictoffset) goto dictoffset_return; + b_dictoffset = PyLong_AsSsize_t(py_b_dictoffset); + Py_DECREF(py_b_dictoffset); + if (b_dictoffset == -1 && PyErr_Occurred()) goto dictoffset_return; +#endif + if (b_dictoffset) { + { + __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); + PyErr_Format(PyExc_TypeError, + "extension type '%.200s' has no __dict__ slot, " + "but base type '" __Pyx_FMT_TYPENAME "' has: " + "either add 'cdef dict __dict__' to the extension type " + "or add '__slots__ = [...]' to the base type", + type_name, b_name); + __Pyx_DECREF_TypeName(b_name); + } +#if !(CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY) + dictoffset_return: +#endif +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + return -1; + } + } +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + } + return 0; +} +#endif + +/* PyType_Ready */ +static int __Pyx_PyType_Ready(PyTypeObject *t) { +#if CYTHON_USE_TYPE_SPECS || !(CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API) || defined(PYSTON_MAJOR_VERSION) + (void)__Pyx_PyObject_CallMethod0; +#if CYTHON_USE_TYPE_SPECS + (void)__Pyx_validate_bases_tuple; +#endif + return PyType_Ready(t); +#else + int r; + PyObject *bases = __Pyx_PyType_GetSlot(t, tp_bases, PyObject*); + if (bases && unlikely(__Pyx_validate_bases_tuple(t->tp_name, t->tp_dictoffset, bases) == -1)) + return -1; +#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) + { + int gc_was_enabled; + #if PY_VERSION_HEX >= 0x030A00b1 + gc_was_enabled = PyGC_Disable(); + (void)__Pyx_PyObject_CallMethod0; + #else + PyObject *ret, *py_status; + PyObject *gc = NULL; + #if PY_VERSION_HEX >= 0x030700a1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM+0 >= 0x07030400) + gc = PyImport_GetModule(__pyx_kp_u_gc); + #endif + if (unlikely(!gc)) gc = PyImport_Import(__pyx_kp_u_gc); + if (unlikely(!gc)) return -1; + py_status = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_isenabled); + if (unlikely(!py_status)) { + Py_DECREF(gc); + return -1; + } + gc_was_enabled = __Pyx_PyObject_IsTrue(py_status); + Py_DECREF(py_status); + if (gc_was_enabled > 0) { + ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_disable); + if (unlikely(!ret)) { + Py_DECREF(gc); + return -1; + } + Py_DECREF(ret); + } else if (unlikely(gc_was_enabled == -1)) { + Py_DECREF(gc); + return -1; + } + #endif + t->tp_flags |= Py_TPFLAGS_HEAPTYPE; +#if PY_VERSION_HEX >= 0x030A0000 + t->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; +#endif +#else + (void)__Pyx_PyObject_CallMethod0; +#endif + r = PyType_Ready(t); +#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) + t->tp_flags &= ~Py_TPFLAGS_HEAPTYPE; + #if PY_VERSION_HEX >= 0x030A00b1 + if (gc_was_enabled) + PyGC_Enable(); + #else + if (gc_was_enabled) { + PyObject *tp, *v, *tb; + PyErr_Fetch(&tp, &v, &tb); + ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_enable); + if (likely(ret || r == -1)) { + Py_XDECREF(ret); + PyErr_Restore(tp, v, tb); + } else { + Py_XDECREF(tp); + Py_XDECREF(v); + Py_XDECREF(tb); + r = -1; + } + } + Py_DECREF(gc); + #endif + } +#endif + return r; +#endif +} + +/* PyObject_GenericGetAttrNoDict */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static PyObject *__Pyx_RaiseGenericGetAttributeError(PyTypeObject *tp, PyObject *attr_name) { + __Pyx_TypeName type_name = __Pyx_PyType_GetName(tp); + PyErr_Format(PyExc_AttributeError, +#if PY_MAJOR_VERSION >= 3 + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", + type_name, attr_name); +#else + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", + type_name, PyString_AS_STRING(attr_name)); +#endif + __Pyx_DECREF_TypeName(type_name); + return NULL; +} +static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name) { + PyObject *descr; + PyTypeObject *tp = Py_TYPE(obj); + if (unlikely(!PyString_Check(attr_name))) { + return PyObject_GenericGetAttr(obj, attr_name); + } + assert(!tp->tp_dictoffset); + descr = _PyType_Lookup(tp, attr_name); + if (unlikely(!descr)) { + return __Pyx_RaiseGenericGetAttributeError(tp, attr_name); + } + Py_INCREF(descr); + #if PY_MAJOR_VERSION < 3 + if (likely(PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_HAVE_CLASS))) + #endif + { + descrgetfunc f = Py_TYPE(descr)->tp_descr_get; + if (unlikely(f)) { + PyObject *res = f(descr, obj, (PyObject *)tp); + Py_DECREF(descr); + return res; + } + } + return descr; +} +#endif + +/* PyObject_GenericGetAttr */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name) { + if (unlikely(Py_TYPE(obj)->tp_dictoffset)) { + return PyObject_GenericGetAttr(obj, attr_name); + } + return __Pyx_PyObject_GenericGetAttrNoDict(obj, attr_name); +} +#endif + +/* SetupReduce */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __Pyx_setup_reduce_is_named(PyObject* meth, PyObject* name) { + int ret; + PyObject *name_attr; + name_attr = __Pyx_PyObject_GetAttrStrNoError(meth, __pyx_n_s_name); + if (likely(name_attr)) { + ret = PyObject_RichCompareBool(name_attr, name, Py_EQ); + } else { + ret = -1; + } + if (unlikely(ret < 0)) { + PyErr_Clear(); + ret = 0; + } + Py_XDECREF(name_attr); + return ret; +} +static int __Pyx_setup_reduce(PyObject* type_obj) { + int ret = 0; + PyObject *object_reduce = NULL; + PyObject *object_getstate = NULL; + PyObject *object_reduce_ex = NULL; + PyObject *reduce = NULL; + PyObject *reduce_ex = NULL; + PyObject *reduce_cython = NULL; + PyObject *setstate = NULL; + PyObject *setstate_cython = NULL; + PyObject *getstate = NULL; +#if CYTHON_USE_PYTYPE_LOOKUP + getstate = _PyType_Lookup((PyTypeObject*)type_obj, __pyx_n_s_getstate); +#else + getstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_getstate); + if (!getstate && PyErr_Occurred()) { + goto __PYX_BAD; + } +#endif + if (getstate) { +#if CYTHON_USE_PYTYPE_LOOKUP + object_getstate = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_getstate); +#else + object_getstate = __Pyx_PyObject_GetAttrStrNoError((PyObject*)&PyBaseObject_Type, __pyx_n_s_getstate); + if (!object_getstate && PyErr_Occurred()) { + goto __PYX_BAD; + } +#endif + if (object_getstate != getstate) { + goto __PYX_GOOD; + } + } +#if CYTHON_USE_PYTYPE_LOOKUP + object_reduce_ex = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; +#else + object_reduce_ex = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; +#endif + reduce_ex = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce_ex); if (unlikely(!reduce_ex)) goto __PYX_BAD; + if (reduce_ex == object_reduce_ex) { +#if CYTHON_USE_PYTYPE_LOOKUP + object_reduce = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; +#else + object_reduce = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; +#endif + reduce = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce); if (unlikely(!reduce)) goto __PYX_BAD; + if (reduce == object_reduce || __Pyx_setup_reduce_is_named(reduce, __pyx_n_s_reduce_cython)) { + reduce_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_reduce_cython); + if (likely(reduce_cython)) { + ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce, reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + } else if (reduce == object_reduce || PyErr_Occurred()) { + goto __PYX_BAD; + } + setstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate); + if (!setstate) PyErr_Clear(); + if (!setstate || __Pyx_setup_reduce_is_named(setstate, __pyx_n_s_setstate_cython)) { + setstate_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate_cython); + if (likely(setstate_cython)) { + ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate, setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + } else if (!setstate || PyErr_Occurred()) { + goto __PYX_BAD; + } + } + PyType_Modified((PyTypeObject*)type_obj); + } + } + goto __PYX_GOOD; +__PYX_BAD: + if (!PyErr_Occurred()) { + __Pyx_TypeName type_obj_name = + __Pyx_PyType_GetName((PyTypeObject*)type_obj); + PyErr_Format(PyExc_RuntimeError, + "Unable to initialize pickling for " __Pyx_FMT_TYPENAME, type_obj_name); + __Pyx_DECREF_TypeName(type_obj_name); + } + ret = -1; +__PYX_GOOD: +#if !CYTHON_USE_PYTYPE_LOOKUP + Py_XDECREF(object_reduce); + Py_XDECREF(object_reduce_ex); + Py_XDECREF(object_getstate); + Py_XDECREF(getstate); +#endif + Py_XDECREF(reduce); + Py_XDECREF(reduce_ex); + Py_XDECREF(reduce_cython); + Py_XDECREF(setstate); + Py_XDECREF(setstate_cython); + return ret; +} +#endif + +/* Import */ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + PyObject *module = 0; + PyObject *empty_dict = 0; + PyObject *empty_list = 0; + #if PY_MAJOR_VERSION < 3 + PyObject *py_import; + py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); + if (unlikely(!py_import)) + goto bad; + if (!from_list) { + empty_list = PyList_New(0); + if (unlikely(!empty_list)) + goto bad; + from_list = empty_list; + } + #endif + empty_dict = PyDict_New(); + if (unlikely(!empty_dict)) + goto bad; + { + #if PY_MAJOR_VERSION >= 3 + if (level == -1) { + if (strchr(__Pyx_MODULE_NAME, '.') != NULL) { + module = PyImport_ImportModuleLevelObject( + name, __pyx_d, empty_dict, from_list, 1); + if (unlikely(!module)) { + if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) + goto bad; + PyErr_Clear(); + } + } + level = 0; + } + #endif + if (!module) { + #if PY_MAJOR_VERSION < 3 + PyObject *py_level = PyInt_FromLong(level); + if (unlikely(!py_level)) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, __pyx_d, empty_dict, from_list, level); + #endif + } + } +bad: + Py_XDECREF(empty_dict); + Py_XDECREF(empty_list); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_import); + #endif + return module; +} + +/* ImportDottedModule */ +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) { + PyObject *partial_name = NULL, *slice = NULL, *sep = NULL; + if (unlikely(PyErr_Occurred())) { + PyErr_Clear(); + } + if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) { + partial_name = name; + } else { + slice = PySequence_GetSlice(parts_tuple, 0, count); + if (unlikely(!slice)) + goto bad; + sep = PyUnicode_FromStringAndSize(".", 1); + if (unlikely(!sep)) + goto bad; + partial_name = PyUnicode_Join(sep, slice); + } + PyErr_Format( +#if PY_MAJOR_VERSION < 3 + PyExc_ImportError, + "No module named '%s'", PyString_AS_STRING(partial_name)); +#else +#if PY_VERSION_HEX >= 0x030600B1 + PyExc_ModuleNotFoundError, +#else + PyExc_ImportError, +#endif + "No module named '%U'", partial_name); +#endif +bad: + Py_XDECREF(sep); + Py_XDECREF(slice); + Py_XDECREF(partial_name); + return NULL; +} +#endif +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) { + PyObject *imported_module; +#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) + PyObject *modules = PyImport_GetModuleDict(); + if (unlikely(!modules)) + return NULL; + imported_module = __Pyx_PyDict_GetItemStr(modules, name); + Py_XINCREF(imported_module); +#else + imported_module = PyImport_GetModule(name); +#endif + return imported_module; +} +#endif +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) { + Py_ssize_t i, nparts; + nparts = PyTuple_GET_SIZE(parts_tuple); + for (i=1; i < nparts && module; i++) { + PyObject *part, *submodule; +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + part = PyTuple_GET_ITEM(parts_tuple, i); +#else + part = PySequence_ITEM(parts_tuple, i); +#endif + submodule = __Pyx_PyObject_GetAttrStrNoError(module, part); +#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) + Py_DECREF(part); +#endif + Py_DECREF(module); + module = submodule; + } + if (unlikely(!module)) { + return __Pyx__ImportDottedModule_Error(name, parts_tuple, i); + } + return module; +} +#endif +static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) { +#if PY_MAJOR_VERSION < 3 + PyObject *module, *from_list, *star = __pyx_n_s__6; + CYTHON_UNUSED_VAR(parts_tuple); + from_list = PyList_New(1); + if (unlikely(!from_list)) + return NULL; + Py_INCREF(star); + PyList_SET_ITEM(from_list, 0, star); + module = __Pyx_Import(name, from_list, 0); + Py_DECREF(from_list); + return module; +#else + PyObject *imported_module; + PyObject *module = __Pyx_Import(name, NULL, 0); + if (!parts_tuple || unlikely(!module)) + return module; + imported_module = __Pyx__ImportDottedModule_Lookup(name); + if (likely(imported_module)) { + Py_DECREF(module); + return imported_module; + } + PyErr_Clear(); + return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); +#endif +} +static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1 + PyObject *module = __Pyx__ImportDottedModule_Lookup(name); + if (likely(module)) { + PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec); + if (likely(spec)) { + PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing); + if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) { + Py_DECREF(spec); + spec = NULL; + } + Py_XDECREF(unsafe); + } + if (likely(!spec)) { + PyErr_Clear(); + return module; + } + Py_DECREF(spec); + Py_DECREF(module); + } else if (PyErr_Occurred()) { + PyErr_Clear(); + } +#endif + return __Pyx__ImportDottedModule(name, parts_tuple); +} + +/* ImportDottedModuleRelFirst */ +static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple) { + PyObject *module; + PyObject *from_list = NULL; +#if PY_MAJOR_VERSION < 3 + PyObject *star = __pyx_n_s__6; + from_list = PyList_New(1); + if (unlikely(!from_list)) + return NULL; + Py_INCREF(star); + PyList_SET_ITEM(from_list, 0, star); +#endif + module = __Pyx_Import(name, from_list, -1); + Py_XDECREF(from_list); + if (module) { + #if PY_MAJOR_VERSION >= 3 + if (parts_tuple) { + module = __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); + } + #endif + return module; + } + if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) + return NULL; + PyErr_Clear(); + return __Pyx_ImportDottedModule(name, parts_tuple); +} + +/* PyDictVersioning */ +#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0; +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) { + PyObject **dictptr = NULL; + Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset; + if (offset) { +#if CYTHON_COMPILING_IN_CPYTHON + dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj); +#else + dictptr = _PyObject_GetDictPtr(obj); +#endif + } + return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0; +} +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict))) + return 0; + return obj_dict_version == __Pyx_get_object_dict_version(obj); +} +#endif + +/* CLineInTraceback */ +#ifndef CYTHON_CLINE_IN_TRACEBACK +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) { + PyObject *use_cline; + PyObject *ptype, *pvalue, *ptraceback; +#if CYTHON_COMPILING_IN_CPYTHON + PyObject **cython_runtime_dict; +#endif + CYTHON_MAYBE_UNUSED_VAR(tstate); + if (unlikely(!__pyx_cython_runtime)) { + return c_line; + } + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); +#if CYTHON_COMPILING_IN_CPYTHON + cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); + if (likely(cython_runtime_dict)) { + __PYX_PY_DICT_LOOKUP_IF_MODIFIED( + use_cline, *cython_runtime_dict, + __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback)) + } else +#endif + { + PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); + if (use_cline_obj) { + use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; + Py_DECREF(use_cline_obj); + } else { + PyErr_Clear(); + use_cline = NULL; + } + } + if (!use_cline) { + c_line = 0; + (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); + } + else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) { + c_line = 0; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + return c_line; +} +#endif + +/* CodeObjectCache */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { + int start = 0, mid = 0, end = count - 1; + if (end >= 0 && code_line > entries[end].code_line) { + return count; + } + while (start < end) { + mid = start + (end - start) / 2; + if (code_line < entries[mid].code_line) { + end = mid; + } else if (code_line > entries[mid].code_line) { + start = mid + 1; + } else { + return mid; + } + } + if (code_line <= entries[mid].code_line) { + return mid; + } else { + return mid + 1; + } +} +static PyCodeObject *__pyx_find_code_object(int code_line) { + PyCodeObject* code_object; + int pos; + if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { + return NULL; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { + return NULL; + } + code_object = __pyx_code_cache.entries[pos].code_object; + Py_INCREF(code_object); + return code_object; +} +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { + int pos, i; + __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; + if (unlikely(!code_line)) { + return; + } + if (unlikely(!entries)) { + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); + if (likely(entries)) { + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = 64; + __pyx_code_cache.count = 1; + entries[0].code_line = code_line; + entries[0].code_object = code_object; + Py_INCREF(code_object); + } + return; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { + PyCodeObject* tmp = entries[pos].code_object; + entries[pos].code_object = code_object; + Py_DECREF(tmp); + return; + } + if (__pyx_code_cache.count == __pyx_code_cache.max_count) { + int new_max = __pyx_code_cache.max_count + 64; + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( + __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry)); + if (unlikely(!entries)) { + return; + } + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = new_max; + } + for (i=__pyx_code_cache.count; i>pos; i--) { + entries[i] = entries[i-1]; + } + entries[pos].code_line = code_line; + entries[pos].code_object = code_object; + __pyx_code_cache.count++; + Py_INCREF(code_object); +} +#endif + +/* AddTraceback */ +#include "compile.h" +#include "frameobject.h" +#include "traceback.h" +#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict, + PyObject *firstlineno, PyObject *name) { + PyObject *replace = NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL; + replace = PyObject_GetAttrString(code, "replace"); + if (likely(replace)) { + PyObject *result; + result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict); + Py_DECREF(replace); + return result; + } + PyErr_Clear(); + #if __PYX_LIMITED_VERSION_HEX < 0x030780000 + { + PyObject *compiled = NULL, *result = NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL; + compiled = Py_CompileString( + "out = type(code)(\n" + " code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n" + " code.co_flags, code.co_code, code.co_consts, code.co_names,\n" + " code.co_varnames, code.co_filename, co_name, co_firstlineno,\n" + " code.co_lnotab)\n", "", Py_file_input); + if (!compiled) return NULL; + result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict); + Py_DECREF(compiled); + if (!result) PyErr_Print(); + Py_DECREF(result); + result = PyDict_GetItemString(scratch_dict, "out"); + if (result) Py_INCREF(result); + return result; + } + #else + return NULL; + #endif +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL; + PyObject *replace = NULL, *getframe = NULL, *frame = NULL; + PyObject *exc_type, *exc_value, *exc_traceback; + int success = 0; + if (c_line) { + (void) __pyx_cfilenm; + (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line); + } + PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); + code_object = Py_CompileString("_getframe()", filename, Py_eval_input); + if (unlikely(!code_object)) goto bad; + py_py_line = PyLong_FromLong(py_line); + if (unlikely(!py_py_line)) goto bad; + py_funcname = PyUnicode_FromString(funcname); + if (unlikely(!py_funcname)) goto bad; + dict = PyDict_New(); + if (unlikely(!dict)) goto bad; + { + PyObject *old_code_object = code_object; + code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname); + Py_DECREF(old_code_object); + } + if (unlikely(!code_object)) goto bad; + getframe = PySys_GetObject("_getframe"); + if (unlikely(!getframe)) goto bad; + if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad; + frame = PyEval_EvalCode(code_object, dict, dict); + if (unlikely(!frame) || frame == Py_None) goto bad; + success = 1; + bad: + PyErr_Restore(exc_type, exc_value, exc_traceback); + Py_XDECREF(code_object); + Py_XDECREF(py_py_line); + Py_XDECREF(py_funcname); + Py_XDECREF(dict); + Py_XDECREF(replace); + if (success) { + PyTraceBack_Here( + (struct _frame*)frame); + } + Py_XDECREF(frame); +} +#else +static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( + const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = NULL; + PyObject *py_funcname = NULL; + #if PY_MAJOR_VERSION < 3 + PyObject *py_srcfile = NULL; + py_srcfile = PyString_FromString(filename); + if (!py_srcfile) goto bad; + #endif + if (c_line) { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + if (!py_funcname) goto bad; + #else + py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + if (!py_funcname) goto bad; + funcname = PyUnicode_AsUTF8(py_funcname); + if (!funcname) goto bad; + #endif + } + else { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromString(funcname); + if (!py_funcname) goto bad; + #endif + } + #if PY_MAJOR_VERSION < 3 + py_code = __Pyx_PyCode_New( + 0, + 0, + 0, + 0, + 0, + 0, + __pyx_empty_bytes, /*PyObject *code,*/ + __pyx_empty_tuple, /*PyObject *consts,*/ + __pyx_empty_tuple, /*PyObject *names,*/ + __pyx_empty_tuple, /*PyObject *varnames,*/ + __pyx_empty_tuple, /*PyObject *freevars,*/ + __pyx_empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + py_line, + __pyx_empty_bytes /*PyObject *lnotab*/ + ); + Py_DECREF(py_srcfile); + #else + py_code = PyCode_NewEmpty(filename, funcname, py_line); + #endif + Py_XDECREF(py_funcname); + return py_code; +bad: + Py_XDECREF(py_funcname); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_srcfile); + #endif + return NULL; +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject *ptype, *pvalue, *ptraceback; + if (c_line) { + c_line = __Pyx_CLineForTraceback(tstate, c_line); + } + py_code = __pyx_find_code_object(c_line ? -c_line : py_line); + if (!py_code) { + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); + py_code = __Pyx_CreateCodeObjectForTraceback( + funcname, c_line, py_line, filename); + if (!py_code) { + /* If the code object creation fails, then we should clear the + fetched exception references and propagate the new exception */ + Py_XDECREF(ptype); + Py_XDECREF(pvalue); + Py_XDECREF(ptraceback); + goto bad; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); + } + py_frame = PyFrame_New( + tstate, /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + __Pyx_PyFrame_SetLineNumber(py_frame, py_line); + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} +#endif + +/* CIntFromPyVerify */ +#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) +#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) +#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ + {\ + func_type value = func_value;\ + if (sizeof(target_type) < sizeof(func_type)) {\ + if (unlikely(value != (func_type) (target_type) value)) {\ + func_type zero = 0;\ + if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ + return (target_type) -1;\ + if (is_unsigned && unlikely(value < zero))\ + goto raise_neg_overflow;\ + else\ + goto raise_overflow;\ + }\ + }\ + return (target_type) value;\ + } + +/* CIntFromPy */ +static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const size_t neg_one = (size_t) -1, const_zero = (size_t) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(size_t) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (size_t) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + size_t val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (size_t) -1; + val = __Pyx_PyInt_As_size_t(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(size_t, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(size_t) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) >= 2 * PyLong_SHIFT)) { + return (size_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(size_t) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) >= 3 * PyLong_SHIFT)) { + return (size_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(size_t) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) >= 4 * PyLong_SHIFT)) { + return (size_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (size_t) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(size_t) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(size_t, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(size_t) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(size_t, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(size_t, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(size_t) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { + return (size_t) (((size_t)-1)*(((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(size_t) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { + return (size_t) ((((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { + return (size_t) (((size_t)-1)*(((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(size_t) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { + return (size_t) ((((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 4 * PyLong_SHIFT)) { + return (size_t) (((size_t)-1)*(((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(size_t) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 4 * PyLong_SHIFT)) { + return (size_t) ((((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(size_t) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(size_t, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(size_t) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(size_t, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + size_t val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (size_t) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (size_t) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (size_t) -1; + } else { + stepval = v; + } + v = NULL; + val = (size_t) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(size_t) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((size_t) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(size_t) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((size_t) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((size_t) 1) << (sizeof(size_t) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (size_t) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to size_t"); + return (size_t) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to size_t"); + return (size_t) -1; +} + +/* CIntToPy */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(long) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(long) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(long), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* CIntToPy */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(int) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(int) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(int) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(int), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(int)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* CIntFromPy */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(int) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (int) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + int val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (int) -1; + val = __Pyx_PyInt_As_int(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) { + return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) { + return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) { + return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(int) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { + return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(int) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + int val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (int) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (int) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (int) -1; + } else { + stepval = v; + } + v = NULL; + val = (int) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((int) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((int) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (int) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to int"); + return (int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; +} + +/* CIntFromPy */ +static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const char neg_one = (char) -1, const_zero = (char) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(char) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(char, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (char) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + char val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (char) -1; + val = __Pyx_PyInt_As_char(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(char, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(char) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) >= 2 * PyLong_SHIFT)) { + return (char) (((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(char) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) >= 3 * PyLong_SHIFT)) { + return (char) (((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(char) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) >= 4 * PyLong_SHIFT)) { + return (char) (((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (char) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(char) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(char, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(char) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(char, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(char, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(char) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { + return (char) (((char)-1)*(((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(char) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { + return (char) ((((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { + return (char) (((char)-1)*(((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(char) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { + return (char) ((((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 4 * PyLong_SHIFT)) { + return (char) (((char)-1)*(((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(char) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 4 * PyLong_SHIFT)) { + return (char) ((((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(char) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(char, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(char) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(char, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + char val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (char) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (char) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (char) -1; + } else { + stepval = v; + } + v = NULL; + val = (char) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(char) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((char) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(char) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((char) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((char) 1) << (sizeof(char) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (char) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to char"); + return (char) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to char"); + return (char) -1; +} + +/* CIntToPy */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_char(char value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const char neg_one = (char) -1, const_zero = (char) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(char) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(char) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(char) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(char) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(char) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(char), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(char)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* FormatTypeName */ +#if CYTHON_COMPILING_IN_LIMITED_API +static __Pyx_TypeName +__Pyx_PyType_GetName(PyTypeObject* tp) +{ + PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp, + __pyx_n_s_name); + if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) { + PyErr_Clear(); + Py_XDECREF(name); + name = __Pyx_NewRef(__pyx_n_s__13); + } + return name; +} +#endif + +/* CIntFromPy */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(long) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (long) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + long val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (long) -1; + val = __Pyx_PyInt_As_long(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) { + return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) { + return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) { + return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (long) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(long) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { + return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(long) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + long val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (long) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (long) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (long) -1; + } else { + stepval = v; + } + v = NULL; + val = (long) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((long) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((long) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (long) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to long"); + return (long) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; +} + +/* SwapException */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + PyObject *tmp_type, *tmp_value, *tmp_tb; + #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 + _PyErr_StackItem *exc_info = tstate->exc_info; + tmp_value = exc_info->exc_value; + exc_info->exc_value = *value; + if (tmp_value == NULL || tmp_value == Py_None) { + Py_XDECREF(tmp_value); + tmp_value = NULL; + tmp_type = NULL; + tmp_tb = NULL; + } else { + tmp_type = (PyObject*) Py_TYPE(tmp_value); + Py_INCREF(tmp_type); + #if CYTHON_COMPILING_IN_CPYTHON + tmp_tb = ((PyBaseExceptionObject*) tmp_value)->traceback; + Py_XINCREF(tmp_tb); + #else + tmp_tb = PyException_GetTraceback(tmp_value); + #endif + } + #elif CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = tstate->exc_info; + tmp_type = exc_info->exc_type; + tmp_value = exc_info->exc_value; + tmp_tb = exc_info->exc_traceback; + exc_info->exc_type = *type; + exc_info->exc_value = *value; + exc_info->exc_traceback = *tb; + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = *type; + tstate->exc_value = *value; + tstate->exc_traceback = *tb; + #endif + *type = tmp_type; + *value = tmp_value; + *tb = tmp_tb; +} +#else +static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb) { + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_GetExcInfo(&tmp_type, &tmp_value, &tmp_tb); + PyErr_SetExcInfo(*type, *value, *tb); + *type = tmp_type; + *value = tmp_value; + *tb = tmp_tb; +} +#endif + +/* CoroutineBase */ +#include +#if PY_VERSION_HEX >= 0x030b00a6 + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif +#define __Pyx_Coroutine_Undelegate(gen) Py_CLEAR((gen)->yieldfrom) +static int __Pyx_PyGen__FetchStopIterationValue(PyThreadState *__pyx_tstate, PyObject **pvalue) { + PyObject *et, *ev, *tb; + PyObject *value = NULL; + CYTHON_UNUSED_VAR(__pyx_tstate); + __Pyx_ErrFetch(&et, &ev, &tb); + if (!et) { + Py_XDECREF(tb); + Py_XDECREF(ev); + Py_INCREF(Py_None); + *pvalue = Py_None; + return 0; + } + if (likely(et == PyExc_StopIteration)) { + if (!ev) { + Py_INCREF(Py_None); + value = Py_None; + } +#if PY_VERSION_HEX >= 0x030300A0 + else if (likely(__Pyx_IS_TYPE(ev, (PyTypeObject*)PyExc_StopIteration))) { + value = ((PyStopIterationObject *)ev)->value; + Py_INCREF(value); + Py_DECREF(ev); + } +#endif + else if (unlikely(PyTuple_Check(ev))) { + if (PyTuple_GET_SIZE(ev) >= 1) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + value = PyTuple_GET_ITEM(ev, 0); + Py_INCREF(value); +#else + value = PySequence_ITEM(ev, 0); +#endif + } else { + Py_INCREF(Py_None); + value = Py_None; + } + Py_DECREF(ev); + } + else if (!__Pyx_TypeCheck(ev, (PyTypeObject*)PyExc_StopIteration)) { + value = ev; + } + if (likely(value)) { + Py_XDECREF(tb); + Py_DECREF(et); + *pvalue = value; + return 0; + } + } else if (!__Pyx_PyErr_GivenExceptionMatches(et, PyExc_StopIteration)) { + __Pyx_ErrRestore(et, ev, tb); + return -1; + } + PyErr_NormalizeException(&et, &ev, &tb); + if (unlikely(!PyObject_TypeCheck(ev, (PyTypeObject*)PyExc_StopIteration))) { + __Pyx_ErrRestore(et, ev, tb); + return -1; + } + Py_XDECREF(tb); + Py_DECREF(et); +#if PY_VERSION_HEX >= 0x030300A0 + value = ((PyStopIterationObject *)ev)->value; + Py_INCREF(value); + Py_DECREF(ev); +#else + { + PyObject* args = __Pyx_PyObject_GetAttrStr(ev, __pyx_n_s_args); + Py_DECREF(ev); + if (likely(args)) { + value = PySequence_GetItem(args, 0); + Py_DECREF(args); + } + if (unlikely(!value)) { + __Pyx_ErrRestore(NULL, NULL, NULL); + Py_INCREF(Py_None); + value = Py_None; + } + } +#endif + *pvalue = value; + return 0; +} +static CYTHON_INLINE +void __Pyx_Coroutine_ExceptionClear(__Pyx_ExcInfoStruct *exc_state) { +#if PY_VERSION_HEX >= 0x030B00a4 + Py_CLEAR(exc_state->exc_value); +#else + PyObject *t, *v, *tb; + t = exc_state->exc_type; + v = exc_state->exc_value; + tb = exc_state->exc_traceback; + exc_state->exc_type = NULL; + exc_state->exc_value = NULL; + exc_state->exc_traceback = NULL; + Py_XDECREF(t); + Py_XDECREF(v); + Py_XDECREF(tb); +#endif +} +#define __Pyx_Coroutine_AlreadyRunningError(gen) (__Pyx__Coroutine_AlreadyRunningError(gen), (PyObject*)NULL) +static void __Pyx__Coroutine_AlreadyRunningError(__pyx_CoroutineObject *gen) { + const char *msg; + CYTHON_MAYBE_UNUSED_VAR(gen); + if ((0)) { + #ifdef __Pyx_Coroutine_USED + } else if (__Pyx_Coroutine_Check((PyObject*)gen)) { + msg = "coroutine already executing"; + #endif + #ifdef __Pyx_AsyncGen_USED + } else if (__Pyx_AsyncGen_CheckExact((PyObject*)gen)) { + msg = "async generator already executing"; + #endif + } else { + msg = "generator already executing"; + } + PyErr_SetString(PyExc_ValueError, msg); +} +#define __Pyx_Coroutine_NotStartedError(gen) (__Pyx__Coroutine_NotStartedError(gen), (PyObject*)NULL) +static void __Pyx__Coroutine_NotStartedError(PyObject *gen) { + const char *msg; + CYTHON_MAYBE_UNUSED_VAR(gen); + if ((0)) { + #ifdef __Pyx_Coroutine_USED + } else if (__Pyx_Coroutine_Check(gen)) { + msg = "can't send non-None value to a just-started coroutine"; + #endif + #ifdef __Pyx_AsyncGen_USED + } else if (__Pyx_AsyncGen_CheckExact(gen)) { + msg = "can't send non-None value to a just-started async generator"; + #endif + } else { + msg = "can't send non-None value to a just-started generator"; + } + PyErr_SetString(PyExc_TypeError, msg); +} +#define __Pyx_Coroutine_AlreadyTerminatedError(gen, value, closing) (__Pyx__Coroutine_AlreadyTerminatedError(gen, value, closing), (PyObject*)NULL) +static void __Pyx__Coroutine_AlreadyTerminatedError(PyObject *gen, PyObject *value, int closing) { + CYTHON_MAYBE_UNUSED_VAR(gen); + CYTHON_MAYBE_UNUSED_VAR(closing); + #ifdef __Pyx_Coroutine_USED + if (!closing && __Pyx_Coroutine_Check(gen)) { + PyErr_SetString(PyExc_RuntimeError, "cannot reuse already awaited coroutine"); + } else + #endif + if (value) { + #ifdef __Pyx_AsyncGen_USED + if (__Pyx_AsyncGen_CheckExact(gen)) + PyErr_SetNone(__Pyx_PyExc_StopAsyncIteration); + else + #endif + PyErr_SetNone(PyExc_StopIteration); + } +} +static +PyObject *__Pyx_Coroutine_SendEx(__pyx_CoroutineObject *self, PyObject *value, int closing) { + __Pyx_PyThreadState_declare + PyThreadState *tstate; + __Pyx_ExcInfoStruct *exc_state; + PyObject *retval; + assert(!self->is_running); + if (unlikely(self->resume_label == 0)) { + if (unlikely(value && value != Py_None)) { + return __Pyx_Coroutine_NotStartedError((PyObject*)self); + } + } + if (unlikely(self->resume_label == -1)) { + return __Pyx_Coroutine_AlreadyTerminatedError((PyObject*)self, value, closing); + } +#if CYTHON_FAST_THREAD_STATE + __Pyx_PyThreadState_assign + tstate = __pyx_tstate; +#else + tstate = __Pyx_PyThreadState_Current; +#endif + exc_state = &self->gi_exc_state; + if (exc_state->exc_value) { + #if CYTHON_COMPILING_IN_PYPY + #else + PyObject *exc_tb; + #if PY_VERSION_HEX >= 0x030B00a4 && !CYTHON_COMPILING_IN_CPYTHON + exc_tb = PyException_GetTraceback(exc_state->exc_value); + #elif PY_VERSION_HEX >= 0x030B00a4 + exc_tb = ((PyBaseExceptionObject*) exc_state->exc_value)->traceback; + #else + exc_tb = exc_state->exc_traceback; + #endif + if (exc_tb) { + PyTracebackObject *tb = (PyTracebackObject *) exc_tb; + PyFrameObject *f = tb->tb_frame; + assert(f->f_back == NULL); + #if PY_VERSION_HEX >= 0x030B00A1 + f->f_back = PyThreadState_GetFrame(tstate); + #else + Py_XINCREF(tstate->frame); + f->f_back = tstate->frame; + #endif + #if PY_VERSION_HEX >= 0x030B00a4 && !CYTHON_COMPILING_IN_CPYTHON + Py_DECREF(exc_tb); + #endif + } + #endif + } +#if CYTHON_USE_EXC_INFO_STACK + exc_state->previous_item = tstate->exc_info; + tstate->exc_info = exc_state; +#else + if (exc_state->exc_type) { + __Pyx_ExceptionSwap(&exc_state->exc_type, &exc_state->exc_value, &exc_state->exc_traceback); + } else { + __Pyx_Coroutine_ExceptionClear(exc_state); + __Pyx_ExceptionSave(&exc_state->exc_type, &exc_state->exc_value, &exc_state->exc_traceback); + } +#endif + self->is_running = 1; + retval = self->body(self, tstate, value); + self->is_running = 0; +#if CYTHON_USE_EXC_INFO_STACK + exc_state = &self->gi_exc_state; + tstate->exc_info = exc_state->previous_item; + exc_state->previous_item = NULL; + __Pyx_Coroutine_ResetFrameBackpointer(exc_state); +#endif + return retval; +} +static CYTHON_INLINE void __Pyx_Coroutine_ResetFrameBackpointer(__Pyx_ExcInfoStruct *exc_state) { +#if CYTHON_COMPILING_IN_PYPY + CYTHON_UNUSED_VAR(exc_state); +#else + PyObject *exc_tb; + #if PY_VERSION_HEX >= 0x030B00a4 + if (!exc_state->exc_value) return; + exc_tb = PyException_GetTraceback(exc_state->exc_value); + #else + exc_tb = exc_state->exc_traceback; + #endif + if (likely(exc_tb)) { + PyTracebackObject *tb = (PyTracebackObject *) exc_tb; + PyFrameObject *f = tb->tb_frame; + Py_CLEAR(f->f_back); + #if PY_VERSION_HEX >= 0x030B00a4 + Py_DECREF(exc_tb); + #endif + } +#endif +} +static CYTHON_INLINE +PyObject *__Pyx_Coroutine_MethodReturn(PyObject* gen, PyObject *retval) { + CYTHON_MAYBE_UNUSED_VAR(gen); + if (unlikely(!retval)) { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + if (!__Pyx_PyErr_Occurred()) { + PyObject *exc = PyExc_StopIteration; + #ifdef __Pyx_AsyncGen_USED + if (__Pyx_AsyncGen_CheckExact(gen)) + exc = __Pyx_PyExc_StopAsyncIteration; + #endif + __Pyx_PyErr_SetNone(exc); + } + } + return retval; +} +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) +static CYTHON_INLINE +PyObject *__Pyx_PyGen_Send(PyGenObject *gen, PyObject *arg) { +#if PY_VERSION_HEX <= 0x030A00A1 + return _PyGen_Send(gen, arg); +#else + PyObject *result; + if (PyIter_Send((PyObject*)gen, arg ? arg : Py_None, &result) == PYGEN_RETURN) { + if (PyAsyncGen_CheckExact(gen)) { + assert(result == Py_None); + PyErr_SetNone(PyExc_StopAsyncIteration); + } + else if (result == Py_None) { + PyErr_SetNone(PyExc_StopIteration); + } + else { +#if PY_VERSION_HEX < 0x030d00A1 + _PyGen_SetStopIterationValue(result); +#else + if (!PyTuple_Check(result) && !PyExceptionInstance_Check(result)) { + PyErr_SetObject(PyExc_StopIteration, result); + } else { + PyObject *exc = __Pyx_PyObject_CallOneArg(PyExc_StopIteration, result); + if (likely(exc != NULL)) { + PyErr_SetObject(PyExc_StopIteration, exc); + Py_DECREF(exc); + } + } +#endif + } + Py_DECREF(result); + result = NULL; + } + return result; +#endif +} +#endif +static CYTHON_INLINE +PyObject *__Pyx_Coroutine_FinishDelegation(__pyx_CoroutineObject *gen) { + PyObject *ret; + PyObject *val = NULL; + __Pyx_Coroutine_Undelegate(gen); + __Pyx_PyGen__FetchStopIterationValue(__Pyx_PyThreadState_Current, &val); + ret = __Pyx_Coroutine_SendEx(gen, val, 0); + Py_XDECREF(val); + return ret; +} +static PyObject *__Pyx_Coroutine_Send(PyObject *self, PyObject *value) { + PyObject *retval; + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject*) self; + PyObject *yf = gen->yieldfrom; + if (unlikely(gen->is_running)) + return __Pyx_Coroutine_AlreadyRunningError(gen); + if (yf) { + PyObject *ret; + gen->is_running = 1; + #ifdef __Pyx_Generator_USED + if (__Pyx_Generator_CheckExact(yf)) { + ret = __Pyx_Coroutine_Send(yf, value); + } else + #endif + #ifdef __Pyx_Coroutine_USED + if (__Pyx_Coroutine_Check(yf)) { + ret = __Pyx_Coroutine_Send(yf, value); + } else + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_PyAsyncGenASend_CheckExact(yf)) { + ret = __Pyx_async_gen_asend_send(yf, value); + } else + #endif + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) + if (PyGen_CheckExact(yf)) { + ret = __Pyx_PyGen_Send((PyGenObject*)yf, value == Py_None ? NULL : value); + } else + #endif + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03050000 && defined(PyCoro_CheckExact) && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) + if (PyCoro_CheckExact(yf)) { + ret = __Pyx_PyGen_Send((PyGenObject*)yf, value == Py_None ? NULL : value); + } else + #endif + { + if (value == Py_None) + ret = __Pyx_PyObject_GetIterNextFunc(yf)(yf); + else + ret = __Pyx_PyObject_CallMethod1(yf, __pyx_n_s_send, value); + } + gen->is_running = 0; + if (likely(ret)) { + return ret; + } + retval = __Pyx_Coroutine_FinishDelegation(gen); + } else { + retval = __Pyx_Coroutine_SendEx(gen, value, 0); + } + return __Pyx_Coroutine_MethodReturn(self, retval); +} +static int __Pyx_Coroutine_CloseIter(__pyx_CoroutineObject *gen, PyObject *yf) { + PyObject *retval = NULL; + int err = 0; + #ifdef __Pyx_Generator_USED + if (__Pyx_Generator_CheckExact(yf)) { + retval = __Pyx_Coroutine_Close(yf); + if (!retval) + return -1; + } else + #endif + #ifdef __Pyx_Coroutine_USED + if (__Pyx_Coroutine_Check(yf)) { + retval = __Pyx_Coroutine_Close(yf); + if (!retval) + return -1; + } else + if (__Pyx_CoroutineAwait_CheckExact(yf)) { + retval = __Pyx_CoroutineAwait_Close((__pyx_CoroutineAwaitObject*)yf, NULL); + if (!retval) + return -1; + } else + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_PyAsyncGenASend_CheckExact(yf)) { + retval = __Pyx_async_gen_asend_close(yf, NULL); + } else + if (__pyx_PyAsyncGenAThrow_CheckExact(yf)) { + retval = __Pyx_async_gen_athrow_close(yf, NULL); + } else + #endif + { + PyObject *meth; + gen->is_running = 1; + meth = __Pyx_PyObject_GetAttrStrNoError(yf, __pyx_n_s_close); + if (unlikely(!meth)) { + if (unlikely(PyErr_Occurred())) { + PyErr_WriteUnraisable(yf); + } + } else { + retval = __Pyx_PyObject_CallNoArg(meth); + Py_DECREF(meth); + if (unlikely(!retval)) + err = -1; + } + gen->is_running = 0; + } + Py_XDECREF(retval); + return err; +} +static PyObject *__Pyx_Generator_Next(PyObject *self) { + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject*) self; + PyObject *yf = gen->yieldfrom; + if (unlikely(gen->is_running)) + return __Pyx_Coroutine_AlreadyRunningError(gen); + if (yf) { + PyObject *ret; + gen->is_running = 1; + #ifdef __Pyx_Generator_USED + if (__Pyx_Generator_CheckExact(yf)) { + ret = __Pyx_Generator_Next(yf); + } else + #endif + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) + if (PyGen_CheckExact(yf)) { + ret = __Pyx_PyGen_Send((PyGenObject*)yf, NULL); + } else + #endif + #ifdef __Pyx_Coroutine_USED + if (__Pyx_Coroutine_Check(yf)) { + ret = __Pyx_Coroutine_Send(yf, Py_None); + } else + #endif + ret = __Pyx_PyObject_GetIterNextFunc(yf)(yf); + gen->is_running = 0; + if (likely(ret)) { + return ret; + } + return __Pyx_Coroutine_FinishDelegation(gen); + } + return __Pyx_Coroutine_SendEx(gen, Py_None, 0); +} +static PyObject *__Pyx_Coroutine_Close_Method(PyObject *self, PyObject *arg) { + CYTHON_UNUSED_VAR(arg); + return __Pyx_Coroutine_Close(self); +} +static PyObject *__Pyx_Coroutine_Close(PyObject *self) { + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; + PyObject *retval, *raised_exception; + PyObject *yf = gen->yieldfrom; + int err = 0; + if (unlikely(gen->is_running)) + return __Pyx_Coroutine_AlreadyRunningError(gen); + if (yf) { + Py_INCREF(yf); + err = __Pyx_Coroutine_CloseIter(gen, yf); + __Pyx_Coroutine_Undelegate(gen); + Py_DECREF(yf); + } + if (err == 0) + PyErr_SetNone(PyExc_GeneratorExit); + retval = __Pyx_Coroutine_SendEx(gen, NULL, 1); + if (unlikely(retval)) { + const char *msg; + Py_DECREF(retval); + if ((0)) { + #ifdef __Pyx_Coroutine_USED + } else if (__Pyx_Coroutine_Check(self)) { + msg = "coroutine ignored GeneratorExit"; + #endif + #ifdef __Pyx_AsyncGen_USED + } else if (__Pyx_AsyncGen_CheckExact(self)) { +#if PY_VERSION_HEX < 0x03060000 + msg = "async generator ignored GeneratorExit - might require Python 3.6+ finalisation (PEP 525)"; +#else + msg = "async generator ignored GeneratorExit"; +#endif + #endif + } else { + msg = "generator ignored GeneratorExit"; + } + PyErr_SetString(PyExc_RuntimeError, msg); + return NULL; + } + raised_exception = PyErr_Occurred(); + if (likely(!raised_exception || __Pyx_PyErr_GivenExceptionMatches2(raised_exception, PyExc_GeneratorExit, PyExc_StopIteration))) { + if (raised_exception) PyErr_Clear(); + Py_INCREF(Py_None); + return Py_None; + } + return NULL; +} +static PyObject *__Pyx__Coroutine_Throw(PyObject *self, PyObject *typ, PyObject *val, PyObject *tb, + PyObject *args, int close_on_genexit) { + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; + PyObject *yf = gen->yieldfrom; + if (unlikely(gen->is_running)) + return __Pyx_Coroutine_AlreadyRunningError(gen); + if (yf) { + PyObject *ret; + Py_INCREF(yf); + if (__Pyx_PyErr_GivenExceptionMatches(typ, PyExc_GeneratorExit) && close_on_genexit) { + int err = __Pyx_Coroutine_CloseIter(gen, yf); + Py_DECREF(yf); + __Pyx_Coroutine_Undelegate(gen); + if (err < 0) + return __Pyx_Coroutine_MethodReturn(self, __Pyx_Coroutine_SendEx(gen, NULL, 0)); + goto throw_here; + } + gen->is_running = 1; + if (0 + #ifdef __Pyx_Generator_USED + || __Pyx_Generator_CheckExact(yf) + #endif + #ifdef __Pyx_Coroutine_USED + || __Pyx_Coroutine_Check(yf) + #endif + ) { + ret = __Pyx__Coroutine_Throw(yf, typ, val, tb, args, close_on_genexit); + #ifdef __Pyx_Coroutine_USED + } else if (__Pyx_CoroutineAwait_CheckExact(yf)) { + ret = __Pyx__Coroutine_Throw(((__pyx_CoroutineAwaitObject*)yf)->coroutine, typ, val, tb, args, close_on_genexit); + #endif + } else { + PyObject *meth = __Pyx_PyObject_GetAttrStrNoError(yf, __pyx_n_s_throw); + if (unlikely(!meth)) { + Py_DECREF(yf); + if (unlikely(PyErr_Occurred())) { + gen->is_running = 0; + return NULL; + } + __Pyx_Coroutine_Undelegate(gen); + gen->is_running = 0; + goto throw_here; + } + if (likely(args)) { + ret = __Pyx_PyObject_Call(meth, args, NULL); + } else { + PyObject *cargs[4] = {NULL, typ, val, tb}; + ret = __Pyx_PyObject_FastCall(meth, cargs+1, 3 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); + } + Py_DECREF(meth); + } + gen->is_running = 0; + Py_DECREF(yf); + if (!ret) { + ret = __Pyx_Coroutine_FinishDelegation(gen); + } + return __Pyx_Coroutine_MethodReturn(self, ret); + } +throw_here: + __Pyx_Raise(typ, val, tb, NULL); + return __Pyx_Coroutine_MethodReturn(self, __Pyx_Coroutine_SendEx(gen, NULL, 0)); +} +static PyObject *__Pyx_Coroutine_Throw(PyObject *self, PyObject *args) { + PyObject *typ; + PyObject *val = NULL; + PyObject *tb = NULL; + if (unlikely(!PyArg_UnpackTuple(args, (char *)"throw", 1, 3, &typ, &val, &tb))) + return NULL; + return __Pyx__Coroutine_Throw(self, typ, val, tb, args, 1); +} +static CYTHON_INLINE int __Pyx_Coroutine_traverse_excstate(__Pyx_ExcInfoStruct *exc_state, visitproc visit, void *arg) { +#if PY_VERSION_HEX >= 0x030B00a4 + Py_VISIT(exc_state->exc_value); +#else + Py_VISIT(exc_state->exc_type); + Py_VISIT(exc_state->exc_value); + Py_VISIT(exc_state->exc_traceback); +#endif + return 0; +} +static int __Pyx_Coroutine_traverse(__pyx_CoroutineObject *gen, visitproc visit, void *arg) { + Py_VISIT(gen->closure); + Py_VISIT(gen->classobj); + Py_VISIT(gen->yieldfrom); + return __Pyx_Coroutine_traverse_excstate(&gen->gi_exc_state, visit, arg); +} +static int __Pyx_Coroutine_clear(PyObject *self) { + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; + Py_CLEAR(gen->closure); + Py_CLEAR(gen->classobj); + Py_CLEAR(gen->yieldfrom); + __Pyx_Coroutine_ExceptionClear(&gen->gi_exc_state); +#ifdef __Pyx_AsyncGen_USED + if (__Pyx_AsyncGen_CheckExact(self)) { + Py_CLEAR(((__pyx_PyAsyncGenObject*)gen)->ag_finalizer); + } +#endif + Py_CLEAR(gen->gi_code); + Py_CLEAR(gen->gi_frame); + Py_CLEAR(gen->gi_name); + Py_CLEAR(gen->gi_qualname); + Py_CLEAR(gen->gi_modulename); + return 0; +} +static void __Pyx_Coroutine_dealloc(PyObject *self) { + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; + PyObject_GC_UnTrack(gen); + if (gen->gi_weakreflist != NULL) + PyObject_ClearWeakRefs(self); + if (gen->resume_label >= 0) { + PyObject_GC_Track(self); +#if PY_VERSION_HEX >= 0x030400a1 && CYTHON_USE_TP_FINALIZE + if (unlikely(PyObject_CallFinalizerFromDealloc(self))) +#else + Py_TYPE(gen)->tp_del(self); + if (unlikely(Py_REFCNT(self) > 0)) +#endif + { + return; + } + PyObject_GC_UnTrack(self); + } +#ifdef __Pyx_AsyncGen_USED + if (__Pyx_AsyncGen_CheckExact(self)) { + /* We have to handle this case for asynchronous generators + right here, because this code has to be between UNTRACK + and GC_Del. */ + Py_CLEAR(((__pyx_PyAsyncGenObject*)self)->ag_finalizer); + } +#endif + __Pyx_Coroutine_clear(self); + __Pyx_PyHeapTypeObject_GC_Del(gen); +} +static void __Pyx_Coroutine_del(PyObject *self) { + PyObject *error_type, *error_value, *error_traceback; + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; + __Pyx_PyThreadState_declare + if (gen->resume_label < 0) { + return; + } +#if !CYTHON_USE_TP_FINALIZE + assert(self->ob_refcnt == 0); + __Pyx_SET_REFCNT(self, 1); +#endif + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&error_type, &error_value, &error_traceback); +#ifdef __Pyx_AsyncGen_USED + if (__Pyx_AsyncGen_CheckExact(self)) { + __pyx_PyAsyncGenObject *agen = (__pyx_PyAsyncGenObject*)self; + PyObject *finalizer = agen->ag_finalizer; + if (finalizer && !agen->ag_closed) { + PyObject *res = __Pyx_PyObject_CallOneArg(finalizer, self); + if (unlikely(!res)) { + PyErr_WriteUnraisable(self); + } else { + Py_DECREF(res); + } + __Pyx_ErrRestore(error_type, error_value, error_traceback); + return; + } + } +#endif + if (unlikely(gen->resume_label == 0 && !error_value)) { +#ifdef __Pyx_Coroutine_USED +#ifdef __Pyx_Generator_USED + if (!__Pyx_Generator_CheckExact(self)) +#endif + { + PyObject_GC_UnTrack(self); +#if PY_MAJOR_VERSION >= 3 || defined(PyErr_WarnFormat) + if (unlikely(PyErr_WarnFormat(PyExc_RuntimeWarning, 1, "coroutine '%.50S' was never awaited", gen->gi_qualname) < 0)) + PyErr_WriteUnraisable(self); +#else + {PyObject *msg; + char *cmsg; + #if CYTHON_COMPILING_IN_PYPY + msg = NULL; + cmsg = (char*) "coroutine was never awaited"; + #else + char *cname; + PyObject *qualname; + qualname = gen->gi_qualname; + cname = PyString_AS_STRING(qualname); + msg = PyString_FromFormat("coroutine '%.50s' was never awaited", cname); + if (unlikely(!msg)) { + PyErr_Clear(); + cmsg = (char*) "coroutine was never awaited"; + } else { + cmsg = PyString_AS_STRING(msg); + } + #endif + if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, cmsg, 1) < 0)) + PyErr_WriteUnraisable(self); + Py_XDECREF(msg);} +#endif + PyObject_GC_Track(self); + } +#endif + } else { + PyObject *res = __Pyx_Coroutine_Close(self); + if (unlikely(!res)) { + if (PyErr_Occurred()) + PyErr_WriteUnraisable(self); + } else { + Py_DECREF(res); + } + } + __Pyx_ErrRestore(error_type, error_value, error_traceback); +#if !CYTHON_USE_TP_FINALIZE + assert(Py_REFCNT(self) > 0); + if (likely(--self->ob_refcnt == 0)) { + return; + } + { + Py_ssize_t refcnt = Py_REFCNT(self); + _Py_NewReference(self); + __Pyx_SET_REFCNT(self, refcnt); + } +#if CYTHON_COMPILING_IN_CPYTHON + assert(PyType_IS_GC(Py_TYPE(self)) && + _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED); + _Py_DEC_REFTOTAL; +#endif +#ifdef COUNT_ALLOCS + --Py_TYPE(self)->tp_frees; + --Py_TYPE(self)->tp_allocs; +#endif +#endif +} +static PyObject * +__Pyx_Coroutine_get_name(__pyx_CoroutineObject *self, void *context) +{ + PyObject *name = self->gi_name; + CYTHON_UNUSED_VAR(context); + if (unlikely(!name)) name = Py_None; + Py_INCREF(name); + return name; +} +static int +__Pyx_Coroutine_set_name(__pyx_CoroutineObject *self, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__name__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(self->gi_name, value); + return 0; +} +static PyObject * +__Pyx_Coroutine_get_qualname(__pyx_CoroutineObject *self, void *context) +{ + PyObject *name = self->gi_qualname; + CYTHON_UNUSED_VAR(context); + if (unlikely(!name)) name = Py_None; + Py_INCREF(name); + return name; +} +static int +__Pyx_Coroutine_set_qualname(__pyx_CoroutineObject *self, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__qualname__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(self->gi_qualname, value); + return 0; +} +static PyObject * +__Pyx_Coroutine_get_frame(__pyx_CoroutineObject *self, void *context) +{ + PyObject *frame = self->gi_frame; + CYTHON_UNUSED_VAR(context); + if (!frame) { + if (unlikely(!self->gi_code)) { + Py_RETURN_NONE; + } + frame = (PyObject *) PyFrame_New( + PyThreadState_Get(), /*PyThreadState *tstate,*/ + (PyCodeObject*) self->gi_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (unlikely(!frame)) + return NULL; + self->gi_frame = frame; + } + Py_INCREF(frame); + return frame; +} +static __pyx_CoroutineObject *__Pyx__Coroutine_New( + PyTypeObject* type, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, + PyObject *name, PyObject *qualname, PyObject *module_name) { + __pyx_CoroutineObject *gen = PyObject_GC_New(__pyx_CoroutineObject, type); + if (unlikely(!gen)) + return NULL; + return __Pyx__Coroutine_NewInit(gen, body, code, closure, name, qualname, module_name); +} +static __pyx_CoroutineObject *__Pyx__Coroutine_NewInit( + __pyx_CoroutineObject *gen, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, + PyObject *name, PyObject *qualname, PyObject *module_name) { + gen->body = body; + gen->closure = closure; + Py_XINCREF(closure); + gen->is_running = 0; + gen->resume_label = 0; + gen->classobj = NULL; + gen->yieldfrom = NULL; + #if PY_VERSION_HEX >= 0x030B00a4 + gen->gi_exc_state.exc_value = NULL; + #else + gen->gi_exc_state.exc_type = NULL; + gen->gi_exc_state.exc_value = NULL; + gen->gi_exc_state.exc_traceback = NULL; + #endif +#if CYTHON_USE_EXC_INFO_STACK + gen->gi_exc_state.previous_item = NULL; +#endif + gen->gi_weakreflist = NULL; + Py_XINCREF(qualname); + gen->gi_qualname = qualname; + Py_XINCREF(name); + gen->gi_name = name; + Py_XINCREF(module_name); + gen->gi_modulename = module_name; + Py_XINCREF(code); + gen->gi_code = code; + gen->gi_frame = NULL; + PyObject_GC_Track(gen); + return gen; +} + +/* PatchModuleWithCoroutine */ +static PyObject* __Pyx_Coroutine_patch_module(PyObject* module, const char* py_code) { +#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + int result; + PyObject *globals, *result_obj; + globals = PyDict_New(); if (unlikely(!globals)) goto ignore; + result = PyDict_SetItemString(globals, "_cython_coroutine_type", + #ifdef __Pyx_Coroutine_USED + (PyObject*)__pyx_CoroutineType); + #else + Py_None); + #endif + if (unlikely(result < 0)) goto ignore; + result = PyDict_SetItemString(globals, "_cython_generator_type", + #ifdef __Pyx_Generator_USED + (PyObject*)__pyx_GeneratorType); + #else + Py_None); + #endif + if (unlikely(result < 0)) goto ignore; + if (unlikely(PyDict_SetItemString(globals, "_module", module) < 0)) goto ignore; + if (unlikely(PyDict_SetItemString(globals, "__builtins__", __pyx_b) < 0)) goto ignore; + result_obj = PyRun_String(py_code, Py_file_input, globals, globals); + if (unlikely(!result_obj)) goto ignore; + Py_DECREF(result_obj); + Py_DECREF(globals); + return module; +ignore: + Py_XDECREF(globals); + PyErr_WriteUnraisable(module); + if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, "Cython module failed to patch module with custom type", 1) < 0)) { + Py_DECREF(module); + module = NULL; + } +#else + py_code++; +#endif + return module; +} + +/* PatchGeneratorABC */ +#ifndef CYTHON_REGISTER_ABCS +#define CYTHON_REGISTER_ABCS 1 +#endif +#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) +static PyObject* __Pyx_patch_abc_module(PyObject *module); +static PyObject* __Pyx_patch_abc_module(PyObject *module) { + module = __Pyx_Coroutine_patch_module( + module, "" +"if _cython_generator_type is not None:\n" +" try: Generator = _module.Generator\n" +" except AttributeError: pass\n" +" else: Generator.register(_cython_generator_type)\n" +"if _cython_coroutine_type is not None:\n" +" try: Coroutine = _module.Coroutine\n" +" except AttributeError: pass\n" +" else: Coroutine.register(_cython_coroutine_type)\n" + ); + return module; +} +#endif +static int __Pyx_patch_abc(void) { +#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + static int abc_patched = 0; + if (CYTHON_REGISTER_ABCS && !abc_patched) { + PyObject *module; + module = PyImport_ImportModule((PY_MAJOR_VERSION >= 3) ? "collections.abc" : "collections"); + if (unlikely(!module)) { + PyErr_WriteUnraisable(NULL); + if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, + ((PY_MAJOR_VERSION >= 3) ? + "Cython module failed to register with collections.abc module" : + "Cython module failed to register with collections module"), 1) < 0)) { + return -1; + } + } else { + module = __Pyx_patch_abc_module(module); + abc_patched = 1; + if (unlikely(!module)) + return -1; + Py_DECREF(module); + } + module = PyImport_ImportModule("backports_abc"); + if (module) { + module = __Pyx_patch_abc_module(module); + Py_XDECREF(module); + } + if (!module) { + PyErr_Clear(); + } + } +#else + if ((0)) __Pyx_Coroutine_patch_module(NULL, NULL); +#endif + return 0; +} + +/* Generator */ +static PyMethodDef __pyx_Generator_methods[] = { + {"send", (PyCFunction) __Pyx_Coroutine_Send, METH_O, + (char*) PyDoc_STR("send(arg) -> send 'arg' into generator,\nreturn next yielded value or raise StopIteration.")}, + {"throw", (PyCFunction) __Pyx_Coroutine_Throw, METH_VARARGS, + (char*) PyDoc_STR("throw(typ[,val[,tb]]) -> raise exception in generator,\nreturn next yielded value or raise StopIteration.")}, + {"close", (PyCFunction) __Pyx_Coroutine_Close_Method, METH_NOARGS, + (char*) PyDoc_STR("close() -> raise GeneratorExit inside generator.")}, + {0, 0, 0, 0} +}; +static PyMemberDef __pyx_Generator_memberlist[] = { + {(char *) "gi_running", T_BOOL, offsetof(__pyx_CoroutineObject, is_running), READONLY, NULL}, + {(char*) "gi_yieldfrom", T_OBJECT, offsetof(__pyx_CoroutineObject, yieldfrom), READONLY, + (char*) PyDoc_STR("object being iterated by 'yield from', or None")}, + {(char*) "gi_code", T_OBJECT, offsetof(__pyx_CoroutineObject, gi_code), READONLY, NULL}, + {(char *) "__module__", T_OBJECT, offsetof(__pyx_CoroutineObject, gi_modulename), 0, 0}, +#if CYTHON_USE_TYPE_SPECS + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CoroutineObject, gi_weakreflist), READONLY, 0}, +#endif + {0, 0, 0, 0, 0} +}; +static PyGetSetDef __pyx_Generator_getsets[] = { + {(char *) "__name__", (getter)__Pyx_Coroutine_get_name, (setter)__Pyx_Coroutine_set_name, + (char*) PyDoc_STR("name of the generator"), 0}, + {(char *) "__qualname__", (getter)__Pyx_Coroutine_get_qualname, (setter)__Pyx_Coroutine_set_qualname, + (char*) PyDoc_STR("qualified name of the generator"), 0}, + {(char *) "gi_frame", (getter)__Pyx_Coroutine_get_frame, NULL, + (char*) PyDoc_STR("Frame of the generator"), 0}, + {0, 0, 0, 0, 0} +}; +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_GeneratorType_slots[] = { + {Py_tp_dealloc, (void *)__Pyx_Coroutine_dealloc}, + {Py_tp_traverse, (void *)__Pyx_Coroutine_traverse}, + {Py_tp_iter, (void *)PyObject_SelfIter}, + {Py_tp_iternext, (void *)__Pyx_Generator_Next}, + {Py_tp_methods, (void *)__pyx_Generator_methods}, + {Py_tp_members, (void *)__pyx_Generator_memberlist}, + {Py_tp_getset, (void *)__pyx_Generator_getsets}, + {Py_tp_getattro, (void *) __Pyx_PyObject_GenericGetAttrNoDict}, +#if CYTHON_USE_TP_FINALIZE + {Py_tp_finalize, (void *)__Pyx_Coroutine_del}, +#endif + {0, 0}, +}; +static PyType_Spec __pyx_GeneratorType_spec = { + __PYX_TYPE_MODULE_PREFIX "generator", + sizeof(__pyx_CoroutineObject), + 0, + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, + __pyx_GeneratorType_slots +}; +#else +static PyTypeObject __pyx_GeneratorType_type = { + PyVarObject_HEAD_INIT(0, 0) + __PYX_TYPE_MODULE_PREFIX "generator", + sizeof(__pyx_CoroutineObject), + 0, + (destructor) __Pyx_Coroutine_dealloc, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, + 0, + (traverseproc) __Pyx_Coroutine_traverse, + 0, + 0, + offsetof(__pyx_CoroutineObject, gi_weakreflist), + 0, + (iternextfunc) __Pyx_Generator_Next, + __pyx_Generator_methods, + __pyx_Generator_memberlist, + __pyx_Generator_getsets, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +#if CYTHON_USE_TP_FINALIZE + 0, +#else + __Pyx_Coroutine_del, +#endif + 0, +#if CYTHON_USE_TP_FINALIZE + __Pyx_Coroutine_del, +#elif PY_VERSION_HEX >= 0x030400a1 + 0, +#endif +#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, +#endif +#if __PYX_NEED_TP_PRINT_SLOT + 0, +#endif +#if PY_VERSION_HEX >= 0x030C0000 + 0, +#endif +#if PY_VERSION_HEX >= 0x030d00A4 + 0, +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, +#endif +}; +#endif +static int __pyx_Generator_init(PyObject *module) { +#if CYTHON_USE_TYPE_SPECS + __pyx_GeneratorType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_GeneratorType_spec, NULL); +#else + CYTHON_UNUSED_VAR(module); + __pyx_GeneratorType_type.tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; + __pyx_GeneratorType_type.tp_iter = PyObject_SelfIter; + __pyx_GeneratorType = __Pyx_FetchCommonType(&__pyx_GeneratorType_type); +#endif + if (unlikely(!__pyx_GeneratorType)) { + return -1; + } + return 0; +} + +/* CheckBinaryVersion */ +static unsigned long __Pyx_get_runtime_version(void) { +#if __PYX_LIMITED_VERSION_HEX >= 0x030B00A4 + return Py_Version & ~0xFFUL; +#else + const char* rt_version = Py_GetVersion(); + unsigned long version = 0; + unsigned long factor = 0x01000000UL; + unsigned int digit = 0; + int i = 0; + while (factor) { + while ('0' <= rt_version[i] && rt_version[i] <= '9') { + digit = digit * 10 + (unsigned int) (rt_version[i] - '0'); + ++i; + } + version += factor * digit; + if (rt_version[i] != '.') + break; + digit = 0; + factor >>= 8; + ++i; + } + return version; +#endif +} +static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer) { + const unsigned long MAJOR_MINOR = 0xFFFF0000UL; + if ((rt_version & MAJOR_MINOR) == (ct_version & MAJOR_MINOR)) + return 0; + if (likely(allow_newer && (rt_version & MAJOR_MINOR) > (ct_version & MAJOR_MINOR))) + return 1; + { + char message[200]; + PyOS_snprintf(message, sizeof(message), + "compile time Python version %d.%d " + "of module '%.100s' " + "%s " + "runtime version %d.%d", + (int) (ct_version >> 24), (int) ((ct_version >> 16) & 0xFF), + __Pyx_MODULE_NAME, + (allow_newer) ? "was newer than" : "does not match", + (int) (rt_version >> 24), (int) ((rt_version >> 16) & 0xFF) + ); + return PyErr_WarnEx(NULL, message, 1); + } +} + +/* InitStrings */ +#if PY_MAJOR_VERSION >= 3 +static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) { + if (t.is_unicode | t.is_str) { + if (t.intern) { + *str = PyUnicode_InternFromString(t.s); + } else if (t.encoding) { + *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL); + } else { + *str = PyUnicode_FromStringAndSize(t.s, t.n - 1); + } + } else { + *str = PyBytes_FromStringAndSize(t.s, t.n - 1); + } + if (!*str) + return -1; + if (PyObject_Hash(*str) == -1) + return -1; + return 0; +} +#endif +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { + while (t->p) { + #if PY_MAJOR_VERSION >= 3 + __Pyx_InitString(*t, t->p); + #else + if (t->is_unicode) { + *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); + } else if (t->intern) { + *t->p = PyString_InternFromString(t->s); + } else { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + } + if (!*t->p) + return -1; + if (PyObject_Hash(*t->p) == -1) + return -1; + #endif + ++t; + } + return 0; +} + +#include +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) { + size_t len = strlen(s); + if (unlikely(len > (size_t) PY_SSIZE_T_MAX)) { + PyErr_SetString(PyExc_OverflowError, "byte string is too long"); + return -1; + } + return (Py_ssize_t) len; +} +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { + Py_ssize_t len = __Pyx_ssize_strlen(c_str); + if (unlikely(len < 0)) return NULL; + return __Pyx_PyUnicode_FromStringAndSize(c_str, len); +} +static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char* c_str) { + Py_ssize_t len = __Pyx_ssize_strlen(c_str); + if (unlikely(len < 0)) return NULL; + return PyByteArray_FromStringAndSize(c_str, len); +} +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { + Py_ssize_t ignore; + return __Pyx_PyObject_AsStringAndSize(o, &ignore); +} +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#if !CYTHON_PEP393_ENABLED +static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; + } + } + } +#endif + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +} +#else +static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + if (likely(PyUnicode_IS_ASCII(o))) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } +#else + return PyUnicode_AsUTF8AndSize(o, length); +#endif +} +#endif +#endif +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && +#endif + PyUnicode_Check(o)) { + return __Pyx_PyUnicode_AsStringAndSize(o, length); + } else +#endif +#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + if (PyByteArray_Check(o)) { + *length = PyByteArray_GET_SIZE(o); + return PyByteArray_AS_STRING(o); + } else +#endif + { + char* result; + int r = PyBytes_AsStringAndSize(o, &result, length); + if (unlikely(r < 0)) { + return NULL; + } else { + return result; + } + } +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { + int is_true = x == Py_True; + if (is_true | (x == Py_False) | (x == Py_None)) return is_true; + else return PyObject_IsTrue(x); +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) { + int retval; + if (unlikely(!x)) return -1; + retval = __Pyx_PyObject_IsTrue(x); + Py_DECREF(x); + return retval; +} +static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { + __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result)); +#if PY_MAJOR_VERSION >= 3 + if (PyLong_Check(result)) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "__int__ returned non-int (type " __Pyx_FMT_TYPENAME "). " + "The ability to return an instance of a strict subclass of int is deprecated, " + "and may be removed in a future version of Python.", + result_type_name)) { + __Pyx_DECREF_TypeName(result_type_name); + Py_DECREF(result); + return NULL; + } + __Pyx_DECREF_TypeName(result_type_name); + return result; + } +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")", + type_name, type_name, result_type_name); + __Pyx_DECREF_TypeName(result_type_name); + Py_DECREF(result); + return NULL; +} +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { +#if CYTHON_USE_TYPE_SLOTS + PyNumberMethods *m; +#endif + const char *name = NULL; + PyObject *res = NULL; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x) || PyLong_Check(x))) +#else + if (likely(PyLong_Check(x))) +#endif + return __Pyx_NewRef(x); +#if CYTHON_USE_TYPE_SLOTS + m = Py_TYPE(x)->tp_as_number; + #if PY_MAJOR_VERSION < 3 + if (m && m->nb_int) { + name = "int"; + res = m->nb_int(x); + } + else if (m && m->nb_long) { + name = "long"; + res = m->nb_long(x); + } + #else + if (likely(m && m->nb_int)) { + name = "int"; + res = m->nb_int(x); + } + #endif +#else + if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { + res = PyNumber_Int(x); + } +#endif + if (likely(res)) { +#if PY_MAJOR_VERSION < 3 + if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { +#else + if (unlikely(!PyLong_CheckExact(res))) { +#endif + return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); + } + } + else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + } + return res; +} +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { + Py_ssize_t ival; + PyObject *x; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(b))) { + if (sizeof(Py_ssize_t) >= sizeof(long)) + return PyInt_AS_LONG(b); + else + return PyInt_AsSsize_t(b); + } +#endif + if (likely(PyLong_CheckExact(b))) { + #if CYTHON_USE_PYLONG_INTERNALS + if (likely(__Pyx_PyLong_IsCompact(b))) { + return __Pyx_PyLong_CompactValue(b); + } else { + const digit* digits = __Pyx_PyLong_Digits(b); + const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b); + switch (size) { + case 2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + } + } + #endif + return PyLong_AsSsize_t(b); + } + x = PyNumber_Index(b); + if (!x) return -1; + ival = PyInt_AsSsize_t(x); + Py_DECREF(x); + return ival; +} +static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) { + if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) { + return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o); +#if PY_MAJOR_VERSION < 3 + } else if (likely(PyInt_CheckExact(o))) { + return PyInt_AS_LONG(o); +#endif + } else { + Py_ssize_t ival; + PyObject *x; + x = PyNumber_Index(o); + if (!x) return -1; + ival = PyInt_AsLong(x); + Py_DECREF(x); + return ival; + } +} +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { + return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False); +} +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { + return PyInt_FromSize_t(ival); +} + + +/* #### Code section: utility_code_pragmas_end ### */ +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + + + +/* #### Code section: end ### */ +#endif /* Py_PYTHON_H */ diff --git a/jcvi/formats/cblast.pyx b/src/jcvi/formats/cblast.pyx similarity index 100% rename from jcvi/formats/cblast.pyx rename to src/jcvi/formats/cblast.pyx diff --git a/jcvi/formats/cdt.py b/src/jcvi/formats/cdt.py similarity index 100% rename from jcvi/formats/cdt.py rename to src/jcvi/formats/cdt.py diff --git a/jcvi/formats/chain.py b/src/jcvi/formats/chain.py similarity index 100% rename from jcvi/formats/chain.py rename to src/jcvi/formats/chain.py diff --git a/jcvi/formats/contig.py b/src/jcvi/formats/contig.py similarity index 100% rename from jcvi/formats/contig.py rename to src/jcvi/formats/contig.py diff --git a/jcvi/formats/coords.py b/src/jcvi/formats/coords.py similarity index 100% rename from jcvi/formats/coords.py rename to src/jcvi/formats/coords.py diff --git a/jcvi/formats/excel.py b/src/jcvi/formats/excel.py similarity index 100% rename from jcvi/formats/excel.py rename to src/jcvi/formats/excel.py diff --git a/jcvi/formats/fasta.py b/src/jcvi/formats/fasta.py similarity index 100% rename from jcvi/formats/fasta.py rename to src/jcvi/formats/fasta.py diff --git a/jcvi/formats/fastq.py b/src/jcvi/formats/fastq.py similarity index 100% rename from jcvi/formats/fastq.py rename to src/jcvi/formats/fastq.py diff --git a/jcvi/formats/genbank.py b/src/jcvi/formats/genbank.py similarity index 100% rename from jcvi/formats/genbank.py rename to src/jcvi/formats/genbank.py diff --git a/jcvi/formats/gff.py b/src/jcvi/formats/gff.py similarity index 100% rename from jcvi/formats/gff.py rename to src/jcvi/formats/gff.py diff --git a/jcvi/formats/html.py b/src/jcvi/formats/html.py similarity index 100% rename from jcvi/formats/html.py rename to src/jcvi/formats/html.py diff --git a/jcvi/formats/maf.py b/src/jcvi/formats/maf.py similarity index 100% rename from jcvi/formats/maf.py rename to src/jcvi/formats/maf.py diff --git a/jcvi/formats/obo.py b/src/jcvi/formats/obo.py similarity index 100% rename from jcvi/formats/obo.py rename to src/jcvi/formats/obo.py diff --git a/jcvi/formats/paf.py b/src/jcvi/formats/paf.py similarity index 100% rename from jcvi/formats/paf.py rename to src/jcvi/formats/paf.py diff --git a/jcvi/formats/pdf.py b/src/jcvi/formats/pdf.py similarity index 100% rename from jcvi/formats/pdf.py rename to src/jcvi/formats/pdf.py diff --git a/jcvi/formats/psl.py b/src/jcvi/formats/psl.py similarity index 100% rename from jcvi/formats/psl.py rename to src/jcvi/formats/psl.py diff --git a/jcvi/formats/pyblast.py b/src/jcvi/formats/pyblast.py similarity index 100% rename from jcvi/formats/pyblast.py rename to src/jcvi/formats/pyblast.py diff --git a/jcvi/formats/sam.py b/src/jcvi/formats/sam.py similarity index 100% rename from jcvi/formats/sam.py rename to src/jcvi/formats/sam.py diff --git a/jcvi/formats/sizes.py b/src/jcvi/formats/sizes.py similarity index 100% rename from jcvi/formats/sizes.py rename to src/jcvi/formats/sizes.py diff --git a/jcvi/formats/vcf.py b/src/jcvi/formats/vcf.py similarity index 100% rename from jcvi/formats/vcf.py rename to src/jcvi/formats/vcf.py diff --git a/jcvi/graphics/__init__.py b/src/jcvi/graphics/__init__.py similarity index 100% rename from jcvi/graphics/__init__.py rename to src/jcvi/graphics/__init__.py diff --git a/jcvi/graphics/__main__.py b/src/jcvi/graphics/__main__.py similarity index 100% rename from jcvi/graphics/__main__.py rename to src/jcvi/graphics/__main__.py diff --git a/jcvi/graphics/align.py b/src/jcvi/graphics/align.py similarity index 100% rename from jcvi/graphics/align.py rename to src/jcvi/graphics/align.py diff --git a/jcvi/graphics/assembly.py b/src/jcvi/graphics/assembly.py similarity index 100% rename from jcvi/graphics/assembly.py rename to src/jcvi/graphics/assembly.py diff --git a/jcvi/graphics/base.py b/src/jcvi/graphics/base.py similarity index 100% rename from jcvi/graphics/base.py rename to src/jcvi/graphics/base.py diff --git a/jcvi/graphics/blastplot.py b/src/jcvi/graphics/blastplot.py similarity index 100% rename from jcvi/graphics/blastplot.py rename to src/jcvi/graphics/blastplot.py diff --git a/jcvi/graphics/chromosome.py b/src/jcvi/graphics/chromosome.py similarity index 100% rename from jcvi/graphics/chromosome.py rename to src/jcvi/graphics/chromosome.py diff --git a/jcvi/graphics/coverage.py b/src/jcvi/graphics/coverage.py similarity index 100% rename from jcvi/graphics/coverage.py rename to src/jcvi/graphics/coverage.py diff --git a/jcvi/graphics/dotplot.py b/src/jcvi/graphics/dotplot.py similarity index 100% rename from jcvi/graphics/dotplot.py rename to src/jcvi/graphics/dotplot.py diff --git a/jcvi/graphics/glyph.py b/src/jcvi/graphics/glyph.py similarity index 100% rename from jcvi/graphics/glyph.py rename to src/jcvi/graphics/glyph.py diff --git a/jcvi/graphics/grabseeds.py b/src/jcvi/graphics/grabseeds.py similarity index 100% rename from jcvi/graphics/grabseeds.py rename to src/jcvi/graphics/grabseeds.py diff --git a/jcvi/graphics/heatmap.py b/src/jcvi/graphics/heatmap.py similarity index 100% rename from jcvi/graphics/heatmap.py rename to src/jcvi/graphics/heatmap.py diff --git a/jcvi/graphics/histogram.py b/src/jcvi/graphics/histogram.py similarity index 100% rename from jcvi/graphics/histogram.py rename to src/jcvi/graphics/histogram.py diff --git a/jcvi/graphics/karyotype.py b/src/jcvi/graphics/karyotype.py similarity index 100% rename from jcvi/graphics/karyotype.py rename to src/jcvi/graphics/karyotype.py diff --git a/jcvi/graphics/landscape.py b/src/jcvi/graphics/landscape.py similarity index 100% rename from jcvi/graphics/landscape.py rename to src/jcvi/graphics/landscape.py diff --git a/jcvi/graphics/mummerplot.py b/src/jcvi/graphics/mummerplot.py similarity index 100% rename from jcvi/graphics/mummerplot.py rename to src/jcvi/graphics/mummerplot.py diff --git a/jcvi/graphics/synteny.py b/src/jcvi/graphics/synteny.py similarity index 100% rename from jcvi/graphics/synteny.py rename to src/jcvi/graphics/synteny.py diff --git a/jcvi/graphics/table.py b/src/jcvi/graphics/table.py similarity index 100% rename from jcvi/graphics/table.py rename to src/jcvi/graphics/table.py diff --git a/jcvi/graphics/tree.py b/src/jcvi/graphics/tree.py similarity index 100% rename from jcvi/graphics/tree.py rename to src/jcvi/graphics/tree.py diff --git a/jcvi/graphics/wheel.py b/src/jcvi/graphics/wheel.py similarity index 100% rename from jcvi/graphics/wheel.py rename to src/jcvi/graphics/wheel.py diff --git a/jcvi/projects/__init__.py b/src/jcvi/projects/__init__.py similarity index 100% rename from jcvi/projects/__init__.py rename to src/jcvi/projects/__init__.py diff --git a/jcvi/projects/__main__.py b/src/jcvi/projects/__main__.py similarity index 100% rename from jcvi/projects/__main__.py rename to src/jcvi/projects/__main__.py diff --git a/jcvi/projects/age.py b/src/jcvi/projects/age.py similarity index 100% rename from jcvi/projects/age.py rename to src/jcvi/projects/age.py diff --git a/jcvi/projects/allmaps.py b/src/jcvi/projects/allmaps.py similarity index 100% rename from jcvi/projects/allmaps.py rename to src/jcvi/projects/allmaps.py diff --git a/jcvi/projects/bites.py b/src/jcvi/projects/bites.py similarity index 100% rename from jcvi/projects/bites.py rename to src/jcvi/projects/bites.py diff --git a/jcvi/projects/ies.py b/src/jcvi/projects/ies.py similarity index 100% rename from jcvi/projects/ies.py rename to src/jcvi/projects/ies.py diff --git a/jcvi/projects/jcvi.py b/src/jcvi/projects/jcvi.py similarity index 100% rename from jcvi/projects/jcvi.py rename to src/jcvi/projects/jcvi.py diff --git a/jcvi/projects/misc.py b/src/jcvi/projects/misc.py similarity index 100% rename from jcvi/projects/misc.py rename to src/jcvi/projects/misc.py diff --git a/jcvi/projects/napus.py b/src/jcvi/projects/napus.py similarity index 100% rename from jcvi/projects/napus.py rename to src/jcvi/projects/napus.py diff --git a/jcvi/projects/pineapple.py b/src/jcvi/projects/pineapple.py similarity index 100% rename from jcvi/projects/pineapple.py rename to src/jcvi/projects/pineapple.py diff --git a/jcvi/projects/str.py b/src/jcvi/projects/str.py similarity index 100% rename from jcvi/projects/str.py rename to src/jcvi/projects/str.py diff --git a/jcvi/projects/sugarcane.py b/src/jcvi/projects/sugarcane.py similarity index 100% rename from jcvi/projects/sugarcane.py rename to src/jcvi/projects/sugarcane.py diff --git a/jcvi/projects/synfind.py b/src/jcvi/projects/synfind.py similarity index 100% rename from jcvi/projects/synfind.py rename to src/jcvi/projects/synfind.py diff --git a/jcvi/projects/tgbs.py b/src/jcvi/projects/tgbs.py similarity index 100% rename from jcvi/projects/tgbs.py rename to src/jcvi/projects/tgbs.py diff --git a/jcvi/projects/vanilla.py b/src/jcvi/projects/vanilla.py similarity index 100% rename from jcvi/projects/vanilla.py rename to src/jcvi/projects/vanilla.py diff --git a/jcvi/utils/__init__.py b/src/jcvi/utils/__init__.py similarity index 100% rename from jcvi/utils/__init__.py rename to src/jcvi/utils/__init__.py diff --git a/jcvi/utils/__main__.py b/src/jcvi/utils/__main__.py similarity index 100% rename from jcvi/utils/__main__.py rename to src/jcvi/utils/__main__.py diff --git a/jcvi/utils/aws.py b/src/jcvi/utils/aws.py similarity index 100% rename from jcvi/utils/aws.py rename to src/jcvi/utils/aws.py diff --git a/jcvi/utils/cbook.py b/src/jcvi/utils/cbook.py similarity index 100% rename from jcvi/utils/cbook.py rename to src/jcvi/utils/cbook.py diff --git a/jcvi/utils/console.py b/src/jcvi/utils/console.py similarity index 100% rename from jcvi/utils/console.py rename to src/jcvi/utils/console.py diff --git a/jcvi/utils/data/Airswing.ttf b/src/jcvi/utils/data/Airswing.ttf similarity index 100% rename from jcvi/utils/data/Airswing.ttf rename to src/jcvi/utils/data/Airswing.ttf diff --git a/jcvi/utils/data/Collegia.ttf b/src/jcvi/utils/data/Collegia.ttf similarity index 100% rename from jcvi/utils/data/Collegia.ttf rename to src/jcvi/utils/data/Collegia.ttf diff --git a/jcvi/utils/data/HookedUp.ttf b/src/jcvi/utils/data/HookedUp.ttf similarity index 100% rename from jcvi/utils/data/HookedUp.ttf rename to src/jcvi/utils/data/HookedUp.ttf diff --git a/jcvi/utils/data/Humor-Sans.ttf b/src/jcvi/utils/data/Humor-Sans.ttf similarity index 100% rename from jcvi/utils/data/Humor-Sans.ttf rename to src/jcvi/utils/data/Humor-Sans.ttf diff --git a/jcvi/utils/data/TREDs.meta.csv b/src/jcvi/utils/data/TREDs.meta.csv similarity index 100% rename from jcvi/utils/data/TREDs.meta.csv rename to src/jcvi/utils/data/TREDs.meta.csv diff --git a/jcvi/utils/data/__init__.py b/src/jcvi/utils/data/__init__.py similarity index 100% rename from jcvi/utils/data/__init__.py rename to src/jcvi/utils/data/__init__.py diff --git a/jcvi/utils/data/adapters.fasta b/src/jcvi/utils/data/adapters.fasta similarity index 100% rename from jcvi/utils/data/adapters.fasta rename to src/jcvi/utils/data/adapters.fasta diff --git a/jcvi/utils/data/blosum80.mat b/src/jcvi/utils/data/blosum80.mat similarity index 100% rename from jcvi/utils/data/blosum80.mat rename to src/jcvi/utils/data/blosum80.mat diff --git a/jcvi/utils/data/chrY.hg38.unique_ccn.gc b/src/jcvi/utils/data/chrY.hg38.unique_ccn.gc similarity index 100% rename from jcvi/utils/data/chrY.hg38.unique_ccn.gc rename to src/jcvi/utils/data/chrY.hg38.unique_ccn.gc diff --git a/jcvi/utils/data/colorchecker.txt b/src/jcvi/utils/data/colorchecker.txt similarity index 100% rename from jcvi/utils/data/colorchecker.txt rename to src/jcvi/utils/data/colorchecker.txt diff --git a/jcvi/utils/data/hg38.band.txt b/src/jcvi/utils/data/hg38.band.txt similarity index 100% rename from jcvi/utils/data/hg38.band.txt rename to src/jcvi/utils/data/hg38.band.txt diff --git a/jcvi/utils/data/hg38.chrom.sizes b/src/jcvi/utils/data/hg38.chrom.sizes similarity index 100% rename from jcvi/utils/data/hg38.chrom.sizes rename to src/jcvi/utils/data/hg38.chrom.sizes diff --git a/jcvi/utils/data/instance.json b/src/jcvi/utils/data/instance.json similarity index 100% rename from jcvi/utils/data/instance.json rename to src/jcvi/utils/data/instance.json diff --git a/jcvi/utils/db.py b/src/jcvi/utils/db.py similarity index 100% rename from jcvi/utils/db.py rename to src/jcvi/utils/db.py diff --git a/jcvi/utils/ez_setup.py b/src/jcvi/utils/ez_setup.py similarity index 100% rename from jcvi/utils/ez_setup.py rename to src/jcvi/utils/ez_setup.py diff --git a/jcvi/utils/grouper.py b/src/jcvi/utils/grouper.py similarity index 100% rename from jcvi/utils/grouper.py rename to src/jcvi/utils/grouper.py diff --git a/jcvi/utils/orderedcollections.py b/src/jcvi/utils/orderedcollections.py similarity index 100% rename from jcvi/utils/orderedcollections.py rename to src/jcvi/utils/orderedcollections.py diff --git a/jcvi/utils/range.py b/src/jcvi/utils/range.py similarity index 100% rename from jcvi/utils/range.py rename to src/jcvi/utils/range.py diff --git a/jcvi/utils/table.py b/src/jcvi/utils/table.py similarity index 100% rename from jcvi/utils/table.py rename to src/jcvi/utils/table.py diff --git a/jcvi/utils/taxonomy.py b/src/jcvi/utils/taxonomy.py similarity index 100% rename from jcvi/utils/taxonomy.py rename to src/jcvi/utils/taxonomy.py diff --git a/jcvi/utils/validator.py b/src/jcvi/utils/validator.py similarity index 100% rename from jcvi/utils/validator.py rename to src/jcvi/utils/validator.py diff --git a/jcvi/utils/webcolors.py b/src/jcvi/utils/webcolors.py similarity index 100% rename from jcvi/utils/webcolors.py rename to src/jcvi/utils/webcolors.py diff --git a/jcvi/variation/__init__.py b/src/jcvi/variation/__init__.py similarity index 100% rename from jcvi/variation/__init__.py rename to src/jcvi/variation/__init__.py diff --git a/jcvi/variation/__main__.py b/src/jcvi/variation/__main__.py similarity index 100% rename from jcvi/variation/__main__.py rename to src/jcvi/variation/__main__.py diff --git a/jcvi/variation/cnv.py b/src/jcvi/variation/cnv.py similarity index 100% rename from jcvi/variation/cnv.py rename to src/jcvi/variation/cnv.py diff --git a/jcvi/variation/deconvolute.py b/src/jcvi/variation/deconvolute.py similarity index 100% rename from jcvi/variation/deconvolute.py rename to src/jcvi/variation/deconvolute.py diff --git a/jcvi/variation/delly.py b/src/jcvi/variation/delly.py similarity index 100% rename from jcvi/variation/delly.py rename to src/jcvi/variation/delly.py diff --git a/jcvi/variation/impute.py b/src/jcvi/variation/impute.py similarity index 100% rename from jcvi/variation/impute.py rename to src/jcvi/variation/impute.py diff --git a/jcvi/variation/phase.py b/src/jcvi/variation/phase.py similarity index 100% rename from jcvi/variation/phase.py rename to src/jcvi/variation/phase.py diff --git a/jcvi/variation/snp.py b/src/jcvi/variation/snp.py similarity index 100% rename from jcvi/variation/snp.py rename to src/jcvi/variation/snp.py diff --git a/jcvi/variation/str.py b/src/jcvi/variation/str.py similarity index 100% rename from jcvi/variation/str.py rename to src/jcvi/variation/str.py diff --git a/src/jcvi/version.py b/src/jcvi/version.py new file mode 100644 index 00000000..0a6d76f8 --- /dev/null +++ b/src/jcvi/version.py @@ -0,0 +1,16 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple, Union + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = '1.4.24.dev13+gdf0cdc2d.d20241114' +__version_tuple__ = version_tuple = (1, 4, 24, 'dev13', 'gdf0cdc2d.d20241114') From 60ec604b060a69b225dad12bc7c388425667128e Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 15 Nov 2024 16:38:44 +1100 Subject: [PATCH 13/43] ignore version --- .gitignore | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 9d8bcab7..94cfaadd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,11 +2,11 @@ .DS_Store # Versioning -jcvi/version.py +src/jcvi/version.py # Ignore Cython generated C files -jcvi/assembly/chic.c -jcvi/formats/cblast.c +src/jcvi/assembly/chic.c +src/jcvi/formats/cblast.c ### Python ### # Byte-compiled / optimized / DLL files @@ -62,4 +62,5 @@ coverage.xml docs/_build/ # gffutils temp DB file -*.db \ No newline at end of file +*.db +src/jcvi/version.py From 98752a84ea199fe425749c10c686033a64c34648 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 15 Nov 2024 16:59:01 +1100 Subject: [PATCH 14/43] init env yml --- env_osx64.yml | 11 +++++++++++ environment.yml | 10 ++++++++++ 2 files changed, 21 insertions(+) create mode 100644 env_osx64.yml create mode 100644 environment.yml diff --git a/env_osx64.yml b/env_osx64.yml new file mode 100644 index 00000000..758acfc1 --- /dev/null +++ b/env_osx64.yml @@ -0,0 +1,11 @@ +name: jcvi-osx64 +channels: + - conda-forge/osx-64 + - bioconda/osx-64 +dependencies: + - python 3.12 + - ImageMagick + - pip + - pip: + - hatch + - pytest diff --git a/environment.yml b/environment.yml new file mode 100644 index 00000000..30f35514 --- /dev/null +++ b/environment.yml @@ -0,0 +1,10 @@ +name: jcvi +channels: + - conda-forge + - bioconda +dependencies: + - python >=3.12 + - pip + - pip: + - pytest + - hatch From 011d4679e417b5ecb392fd65b9ead173f6cfcdc7 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Thu, 26 Dec 2024 10:07:32 +1100 Subject: [PATCH 15/43] rm version, switch to dynamic vcs version --- src/jcvi/version.py | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 src/jcvi/version.py diff --git a/src/jcvi/version.py b/src/jcvi/version.py deleted file mode 100644 index 0a6d76f8..00000000 --- a/src/jcvi/version.py +++ /dev/null @@ -1,16 +0,0 @@ -# file generated by setuptools_scm -# don't change, don't track in version control -TYPE_CHECKING = False -if TYPE_CHECKING: - from typing import Tuple, Union - VERSION_TUPLE = Tuple[Union[int, str], ...] -else: - VERSION_TUPLE = object - -version: str -__version__: str -__version_tuple__: VERSION_TUPLE -version_tuple: VERSION_TUPLE - -__version__ = version = '1.4.24.dev13+gdf0cdc2d.d20241114' -__version_tuple__ = version_tuple = (1, 4, 24, 'dev13', 'gdf0cdc2d.d20241114') From ed97aa1fc314151f2e183d062ba33195a8fe9abb Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Thu, 26 Dec 2024 10:08:23 +1100 Subject: [PATCH 16/43] WIP: migrating to hatch --- pyproject.toml | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2ee5ec64..c56ca412 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,8 @@ requires = [ "hatchling", # Build backend "hatch-vcs", # Version control system plugin for dynamic versioning - "Cython", # Cython for compiling C extensions + "hatch-cython", # Cython plugin for compiling C extensions + "cython", # Cython for compiling C extensions "numpy", # NumPy for numerical operations and C extension includes ] @@ -66,7 +67,7 @@ dynamic = ["version"] # Optional dependencies for testing [project.optional-dependencies] -test = [ +tests = [ "PyYAML", "pytest", "pytest-cov", @@ -84,8 +85,9 @@ allow-direct-references = true # Hatch build configuration [tool.hatch.build] +source = "src" # Source directory include = [ - "jcvi/**", # Include all files in the jcvi directory + "src/jcvi/**", # Include all files in the jcvi directory "README.md", ] exclude = [ @@ -101,28 +103,28 @@ source = "vcs" # Use version control system for versioning # Version file location for VCS [tool.hatch.build.hooks.vcs] -version-file = "jcvi/version.py" # Path to write the version information +version-file = "src/jcvi/version.py" # Path to write the version information # Version control system (VCS) versioning [tool.hatch.version.vcs] tag-pattern = "v*" # Git tags starting with 'v' will be used for versioning fallback-version = "0.0.0" -# Hatch build targets for source distribution +# Cython build configuration +[tool.hatch.build.hooks.cython] +extensions = [ + {name = "jcvi_assembly_chic", sources = ["src/jcvi/assembly/chic.pyx"], include-dirs = ["{numpy_include}"], extra-compile-args = ["-O3"]}, + {name = "jcvi_formats_cblast", sources = ["src/jcvi/formats/cblast.pyx"], extra-compile-args = ["-O3"]} +] + [tool.hatch.build.targets.sdist] include = [ - "jcvi/**", + "src/**", "README.md", ] -# Hatch build targets for wheel distribution [tool.hatch.build.targets.wheel] include = [ - "jcvi/**", + "src/**", "README.md", -] - -# Cython extensions to be built -[tool.hatch.build.targets.wheel.ext_modules] -jcvi.assembly.chic = {sources = ["jcvi/assembly/chic.pyx"], include-dirs = ["{numpy.get_include()}"], extra-compile-args = ["-O3"]} -jcvi.formats.cblast = {sources = ["jcvi/formats/cblast.pyx"], extra-compile-args = ["-O3"]} \ No newline at end of file +] \ No newline at end of file From 267897004b3483f3435708c444b7cc91bb0fb07f Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 3 Jan 2025 14:06:46 +1100 Subject: [PATCH 17/43] rm cblast.c from tracking --- src/jcvi/formats/cblast.c | 16859 ------------------------------------ 1 file changed, 16859 deletions(-) delete mode 100644 src/jcvi/formats/cblast.c diff --git a/src/jcvi/formats/cblast.c b/src/jcvi/formats/cblast.c deleted file mode 100644 index 1be00a77..00000000 --- a/src/jcvi/formats/cblast.c +++ /dev/null @@ -1,16859 +0,0 @@ -/* Generated by Cython 3.0.11 */ - -/* BEGIN: Cython Metadata -{ - "distutils": { - "depends": [], - "name": "jcvi.formats.cblast", - "sources": [ - "/Users/adamtaranto/Documents/Adam/jcvi-dev/jcvi/jcvi/formats/cblast.pyx" - ] - }, - "module_name": "jcvi.formats.cblast" -} -END: Cython Metadata */ - -#ifndef PY_SSIZE_T_CLEAN -#define PY_SSIZE_T_CLEAN -#endif /* PY_SSIZE_T_CLEAN */ -#if defined(CYTHON_LIMITED_API) && 0 - #ifndef Py_LIMITED_API - #if CYTHON_LIMITED_API+0 > 0x03030000 - #define Py_LIMITED_API CYTHON_LIMITED_API - #else - #define Py_LIMITED_API 0x03030000 - #endif - #endif -#endif - -#include "Python.h" -#ifndef Py_PYTHON_H - #error Python headers needed to compile C extensions, please install development version of Python. -#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) - #error Cython requires Python 2.7+ or Python 3.3+. -#else -#if defined(CYTHON_LIMITED_API) && CYTHON_LIMITED_API -#define __PYX_EXTRA_ABI_MODULE_NAME "limited" -#else -#define __PYX_EXTRA_ABI_MODULE_NAME "" -#endif -#define CYTHON_ABI "3_0_11" __PYX_EXTRA_ABI_MODULE_NAME -#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI -#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "." -#define CYTHON_HEX_VERSION 0x03000BF0 -#define CYTHON_FUTURE_DIVISION 0 -#include -#ifndef offsetof - #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) -#endif -#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS) - #ifndef __stdcall - #define __stdcall - #endif - #ifndef __cdecl - #define __cdecl - #endif - #ifndef __fastcall - #define __fastcall - #endif -#endif -#ifndef DL_IMPORT - #define DL_IMPORT(t) t -#endif -#ifndef DL_EXPORT - #define DL_EXPORT(t) t -#endif -#define __PYX_COMMA , -#ifndef HAVE_LONG_LONG - #define HAVE_LONG_LONG -#endif -#ifndef PY_LONG_LONG - #define PY_LONG_LONG LONG_LONG -#endif -#ifndef Py_HUGE_VAL - #define Py_HUGE_VAL HUGE_VAL -#endif -#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX -#if defined(GRAALVM_PYTHON) - /* For very preliminary testing purposes. Most variables are set the same as PyPy. - The existence of this section does not imply that anything works or is even tested */ - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 1 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #undef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #undef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 1 - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) - #endif - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif - #undef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 -#elif defined(PYPY_VERSION) - #define CYTHON_COMPILING_IN_PYPY 1 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #undef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 1 - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) - #endif - #if PY_VERSION_HEX < 0x03090000 - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00) - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif - #undef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 -#elif defined(CYTHON_LIMITED_API) - #ifdef Py_LIMITED_API - #undef __PYX_LIMITED_VERSION_HEX - #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API - #endif - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 1 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_CLINE_IN_TRACEBACK - #define CYTHON_CLINE_IN_TRACEBACK 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #undef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 1 - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #ifndef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #endif - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 1 - #ifndef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #endif - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif - #undef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 -#elif defined(Py_GIL_DISABLED) || defined(Py_NOGIL) - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 1 - #ifndef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 1 - #endif - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #ifndef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #ifndef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #ifndef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 1 - #endif - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #ifndef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 1 - #endif - #ifndef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 1 - #endif - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #ifndef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 1 - #endif - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #ifndef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #ifndef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #endif - #ifndef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 1 - #endif - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 - #endif - #ifndef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 - #endif -#else - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 1 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #ifndef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 1 - #endif - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #ifndef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 1 - #endif - #if PY_MAJOR_VERSION < 3 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #ifndef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 1 - #endif - #ifndef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 1 - #endif - #ifndef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 1 - #endif - #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #elif !defined(CYTHON_USE_UNICODE_WRITER) - #define CYTHON_USE_UNICODE_WRITER 1 - #endif - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #ifndef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 1 - #endif - #ifndef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 1 - #endif - #ifndef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 1 - #endif - #ifndef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6) - #endif - #ifndef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1) - #endif - #ifndef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 1 - #endif - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #ifndef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #endif - #if PY_VERSION_HEX < 0x030400a1 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #elif !defined(CYTHON_USE_TP_FINALIZE) - #define CYTHON_USE_TP_FINALIZE 1 - #endif - #if PY_VERSION_HEX < 0x030600B1 - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #elif !defined(CYTHON_USE_DICT_VERSIONS) - #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX < 0x030C00A5) - #endif - #if PY_VERSION_HEX < 0x030700A3 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #elif !defined(CYTHON_USE_EXC_INFO_STACK) - #define CYTHON_USE_EXC_INFO_STACK 1 - #endif - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 - #endif - #ifndef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 1 - #endif -#endif -#if !defined(CYTHON_FAST_PYCCALL) -#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) -#endif -#if !defined(CYTHON_VECTORCALL) -#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1) -#endif -#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1) -#if CYTHON_USE_PYLONG_INTERNALS - #if PY_MAJOR_VERSION < 3 - #include "longintrepr.h" - #endif - #undef SHIFT - #undef BASE - #undef MASK - #ifdef SIZEOF_VOID_P - enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; - #endif -#endif -#ifndef __has_attribute - #define __has_attribute(x) 0 -#endif -#ifndef __has_cpp_attribute - #define __has_cpp_attribute(x) 0 -#endif -#ifndef CYTHON_RESTRICT - #if defined(__GNUC__) - #define CYTHON_RESTRICT __restrict__ - #elif defined(_MSC_VER) && _MSC_VER >= 1400 - #define CYTHON_RESTRICT __restrict - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define CYTHON_RESTRICT restrict - #else - #define CYTHON_RESTRICT - #endif -#endif -#ifndef CYTHON_UNUSED - #if defined(__cplusplus) - /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17 - * but leads to warnings with -pedantic, since it is a C++17 feature */ - #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) - #if __has_cpp_attribute(maybe_unused) - #define CYTHON_UNUSED [[maybe_unused]] - #endif - #endif - #endif -#endif -#ifndef CYTHON_UNUSED -# if defined(__GNUC__) -# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) -# define CYTHON_UNUSED __attribute__ ((__unused__)) -# else -# define CYTHON_UNUSED -# endif -# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) -# define CYTHON_UNUSED __attribute__ ((__unused__)) -# else -# define CYTHON_UNUSED -# endif -#endif -#ifndef CYTHON_UNUSED_VAR -# if defined(__cplusplus) - template void CYTHON_UNUSED_VAR( const T& ) { } -# else -# define CYTHON_UNUSED_VAR(x) (void)(x) -# endif -#endif -#ifndef CYTHON_MAYBE_UNUSED_VAR - #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x) -#endif -#ifndef CYTHON_NCP_UNUSED -# if CYTHON_COMPILING_IN_CPYTHON -# define CYTHON_NCP_UNUSED -# else -# define CYTHON_NCP_UNUSED CYTHON_UNUSED -# endif -#endif -#ifndef CYTHON_USE_CPP_STD_MOVE - #if defined(__cplusplus) && (\ - __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600)) - #define CYTHON_USE_CPP_STD_MOVE 1 - #else - #define CYTHON_USE_CPP_STD_MOVE 0 - #endif -#endif -#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) -#ifdef _MSC_VER - #ifndef _MSC_STDINT_H_ - #if _MSC_VER < 1300 - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - #else - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; - #endif - #endif - #if _MSC_VER < 1300 - #ifdef _WIN64 - typedef unsigned long long __pyx_uintptr_t; - #else - typedef unsigned int __pyx_uintptr_t; - #endif - #else - #ifdef _WIN64 - typedef unsigned __int64 __pyx_uintptr_t; - #else - typedef unsigned __int32 __pyx_uintptr_t; - #endif - #endif -#else - #include - typedef uintptr_t __pyx_uintptr_t; -#endif -#ifndef CYTHON_FALLTHROUGH - #if defined(__cplusplus) - /* for clang __has_cpp_attribute(fallthrough) is true even before C++17 - * but leads to warnings with -pedantic, since it is a C++17 feature */ - #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) - #if __has_cpp_attribute(fallthrough) - #define CYTHON_FALLTHROUGH [[fallthrough]] - #endif - #endif - #ifndef CYTHON_FALLTHROUGH - #if __has_cpp_attribute(clang::fallthrough) - #define CYTHON_FALLTHROUGH [[clang::fallthrough]] - #elif __has_cpp_attribute(gnu::fallthrough) - #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] - #endif - #endif - #endif - #ifndef CYTHON_FALLTHROUGH - #if __has_attribute(fallthrough) - #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) - #else - #define CYTHON_FALLTHROUGH - #endif - #endif - #if defined(__clang__) && defined(__apple_build_version__) - #if __apple_build_version__ < 7000000 - #undef CYTHON_FALLTHROUGH - #define CYTHON_FALLTHROUGH - #endif - #endif -#endif -#ifdef __cplusplus - template - struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);}; - #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL::value) -#else - #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0) -#endif -#if CYTHON_COMPILING_IN_PYPY == 1 - #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000) -#else - #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000) -#endif -#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer)) - -#ifndef CYTHON_INLINE - #if defined(__clang__) - #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) - #elif defined(__GNUC__) - #define CYTHON_INLINE __inline__ - #elif defined(_MSC_VER) - #define CYTHON_INLINE __inline - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define CYTHON_INLINE inline - #else - #define CYTHON_INLINE - #endif -#endif - -#define __PYX_BUILD_PY_SSIZE_T "n" -#define CYTHON_FORMAT_SSIZE_T "z" -#if PY_MAJOR_VERSION < 3 - #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" - #define __Pyx_DefaultClassType PyClass_Type - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#else - #define __Pyx_BUILTIN_MODULE_NAME "builtins" - #define __Pyx_DefaultClassType PyType_Type -#if CYTHON_COMPILING_IN_LIMITED_API - static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, - PyObject *code, PyObject *c, PyObject* n, PyObject *v, - PyObject *fv, PyObject *cell, PyObject* fn, - PyObject *name, int fline, PyObject *lnos) { - PyObject *exception_table = NULL; - PyObject *types_module=NULL, *code_type=NULL, *result=NULL; - #if __PYX_LIMITED_VERSION_HEX < 0x030B0000 - PyObject *version_info; - PyObject *py_minor_version = NULL; - #endif - long minor_version = 0; - PyObject *type, *value, *traceback; - PyErr_Fetch(&type, &value, &traceback); - #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000 - minor_version = 11; - #else - if (!(version_info = PySys_GetObject("version_info"))) goto end; - if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end; - minor_version = PyLong_AsLong(py_minor_version); - Py_DECREF(py_minor_version); - if (minor_version == -1 && PyErr_Occurred()) goto end; - #endif - if (!(types_module = PyImport_ImportModule("types"))) goto end; - if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end; - if (minor_version <= 7) { - (void)p; - result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code, - c, n, v, fn, name, fline, lnos, fv, cell); - } else if (minor_version <= 10) { - result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code, - c, n, v, fn, name, fline, lnos, fv, cell); - } else { - if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end; - result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code, - c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell); - } - end: - Py_XDECREF(code_type); - Py_XDECREF(exception_table); - Py_XDECREF(types_module); - if (type) { - PyErr_Restore(type, value, traceback); - } - return result; - } - #ifndef CO_OPTIMIZED - #define CO_OPTIMIZED 0x0001 - #endif - #ifndef CO_NEWLOCALS - #define CO_NEWLOCALS 0x0002 - #endif - #ifndef CO_VARARGS - #define CO_VARARGS 0x0004 - #endif - #ifndef CO_VARKEYWORDS - #define CO_VARKEYWORDS 0x0008 - #endif - #ifndef CO_ASYNC_GENERATOR - #define CO_ASYNC_GENERATOR 0x0200 - #endif - #ifndef CO_GENERATOR - #define CO_GENERATOR 0x0020 - #endif - #ifndef CO_COROUTINE - #define CO_COROUTINE 0x0080 - #endif -#elif PY_VERSION_HEX >= 0x030B0000 - static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, - PyObject *code, PyObject *c, PyObject* n, PyObject *v, - PyObject *fv, PyObject *cell, PyObject* fn, - PyObject *name, int fline, PyObject *lnos) { - PyCodeObject *result; - PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); - if (!empty_bytes) return NULL; - result = - #if PY_VERSION_HEX >= 0x030C0000 - PyUnstable_Code_NewWithPosOnlyArgs - #else - PyCode_NewWithPosOnlyArgs - #endif - (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes); - Py_DECREF(empty_bytes); - return result; - } -#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#else - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#endif -#endif -#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE) - #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type) -#else - #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type)) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is) - #define __Pyx_Py_Is(x, y) Py_Is(x, y) -#else - #define __Pyx_Py_Is(x, y) ((x) == (y)) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone) - #define __Pyx_Py_IsNone(ob) Py_IsNone(ob) -#else - #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue) - #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob) -#else - #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse) - #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob) -#else - #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False) -#endif -#define __Pyx_NoneAsNull(obj) (__Pyx_Py_IsNone(obj) ? NULL : (obj)) -#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o) -#else - #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o) -#endif -#ifndef CO_COROUTINE - #define CO_COROUTINE 0x80 -#endif -#ifndef CO_ASYNC_GENERATOR - #define CO_ASYNC_GENERATOR 0x200 -#endif -#ifndef Py_TPFLAGS_CHECKTYPES - #define Py_TPFLAGS_CHECKTYPES 0 -#endif -#ifndef Py_TPFLAGS_HAVE_INDEX - #define Py_TPFLAGS_HAVE_INDEX 0 -#endif -#ifndef Py_TPFLAGS_HAVE_NEWBUFFER - #define Py_TPFLAGS_HAVE_NEWBUFFER 0 -#endif -#ifndef Py_TPFLAGS_HAVE_FINALIZE - #define Py_TPFLAGS_HAVE_FINALIZE 0 -#endif -#ifndef Py_TPFLAGS_SEQUENCE - #define Py_TPFLAGS_SEQUENCE 0 -#endif -#ifndef Py_TPFLAGS_MAPPING - #define Py_TPFLAGS_MAPPING 0 -#endif -#ifndef METH_STACKLESS - #define METH_STACKLESS 0 -#endif -#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL) - #ifndef METH_FASTCALL - #define METH_FASTCALL 0x80 - #endif - typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs); - typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args, - Py_ssize_t nargs, PyObject *kwnames); -#else - #if PY_VERSION_HEX >= 0x030d00A4 - # define __Pyx_PyCFunctionFast PyCFunctionFast - # define __Pyx_PyCFunctionFastWithKeywords PyCFunctionFastWithKeywords - #else - # define __Pyx_PyCFunctionFast _PyCFunctionFast - # define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords - #endif -#endif -#if CYTHON_METH_FASTCALL - #define __Pyx_METH_FASTCALL METH_FASTCALL - #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast - #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords -#else - #define __Pyx_METH_FASTCALL METH_VARARGS - #define __Pyx_PyCFunction_FastCall PyCFunction - #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords -#endif -#if CYTHON_VECTORCALL - #define __pyx_vectorcallfunc vectorcallfunc - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET - #define __Pyx_PyVectorcall_NARGS(n) PyVectorcall_NARGS((size_t)(n)) -#elif CYTHON_BACKPORT_VECTORCALL - typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args, - size_t nargsf, PyObject *kwnames); - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1)) - #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)) -#else - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET 0 - #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(n)) -#endif -#if PY_MAJOR_VERSION >= 0x030900B1 -#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_CheckExact(func) -#else -#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_Check(func) -#endif -#define __Pyx_CyOrPyCFunction_Check(func) PyCFunction_Check(func) -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) (((PyCFunctionObject*)(func))->m_ml->ml_meth) -#elif !CYTHON_COMPILING_IN_LIMITED_API -#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) PyCFunction_GET_FUNCTION(func) -#endif -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_CyOrPyCFunction_GET_FLAGS(func) (((PyCFunctionObject*)(func))->m_ml->ml_flags) -static CYTHON_INLINE PyObject* __Pyx_CyOrPyCFunction_GET_SELF(PyObject *func) { - return (__Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_STATIC) ? NULL : ((PyCFunctionObject*)func)->m_self; -} -#endif -static CYTHON_INLINE int __Pyx__IsSameCFunction(PyObject *func, void *cfunc) { -#if CYTHON_COMPILING_IN_LIMITED_API - return PyCFunction_Check(func) && PyCFunction_GetFunction(func) == (PyCFunction) cfunc; -#else - return PyCFunction_Check(func) && PyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; -#endif -} -#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCFunction(func, cfunc) -#if __PYX_LIMITED_VERSION_HEX < 0x030900B1 - #define __Pyx_PyType_FromModuleAndSpec(m, s, b) ((void)m, PyType_FromSpecWithBases(s, b)) - typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *); -#else - #define __Pyx_PyType_FromModuleAndSpec(m, s, b) PyType_FromModuleAndSpec(m, s, b) - #define __Pyx_PyCMethod PyCMethod -#endif -#ifndef METH_METHOD - #define METH_METHOD 0x200 -#endif -#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) - #define PyObject_Malloc(s) PyMem_Malloc(s) - #define PyObject_Free(p) PyMem_Free(p) - #define PyObject_Realloc(p) PyMem_Realloc(p) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) - #define __Pyx_PyFrame_SetLineNumber(frame, lineno) -#else - #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) - #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyThreadState_Current PyThreadState_Get() -#elif !CYTHON_FAST_THREAD_STATE - #define __Pyx_PyThreadState_Current PyThreadState_GET() -#elif PY_VERSION_HEX >= 0x030d00A1 - #define __Pyx_PyThreadState_Current PyThreadState_GetUnchecked() -#elif PY_VERSION_HEX >= 0x03060000 - #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() -#elif PY_VERSION_HEX >= 0x03000000 - #define __Pyx_PyThreadState_Current PyThreadState_GET() -#else - #define __Pyx_PyThreadState_Current _PyThreadState_Current -#endif -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op) -{ - void *result; - result = PyModule_GetState(op); - if (!result) - Py_FatalError("Couldn't find the module state"); - return result; -} -#endif -#define __Pyx_PyObject_GetSlot(obj, name, func_ctype) __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype) -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((func_ctype) PyType_GetSlot((type), Py_##name)) -#else - #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((type)->name) -#endif -#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT) -#include "pythread.h" -#define Py_tss_NEEDS_INIT 0 -typedef int Py_tss_t; -static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) { - *key = PyThread_create_key(); - return 0; -} -static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) { - Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t)); - *key = Py_tss_NEEDS_INIT; - return key; -} -static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) { - PyObject_Free(key); -} -static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) { - return *key != Py_tss_NEEDS_INIT; -} -static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) { - PyThread_delete_key(*key); - *key = Py_tss_NEEDS_INIT; -} -static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) { - return PyThread_set_key_value(*key, value); -} -static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { - return PyThread_get_key_value(*key); -} -#endif -#if PY_MAJOR_VERSION < 3 - #if CYTHON_COMPILING_IN_PYPY - #if PYPY_VERSION_NUM < 0x07030600 - #if defined(__cplusplus) && __cplusplus >= 201402L - [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]] - #elif defined(__GNUC__) || defined(__clang__) - __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))) - #elif defined(_MSC_VER) - __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")) - #endif - static CYTHON_INLINE int PyGILState_Check(void) { - return 0; - } - #else // PYPY_VERSION_NUM < 0x07030600 - #endif // PYPY_VERSION_NUM < 0x07030600 - #else - static CYTHON_INLINE int PyGILState_Check(void) { - PyThreadState * tstate = _PyThreadState_Current; - return tstate && (tstate == PyGILState_GetThisThreadState()); - } - #endif -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 || defined(_PyDict_NewPresized) -#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) -#else -#define __Pyx_PyDict_NewPresized(n) PyDict_New() -#endif -#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION - #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) -#else - #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && PY_VERSION_HEX < 0x030d0000 && CYTHON_USE_UNICODE_INTERNALS -#define __Pyx_PyDict_GetItemStrWithError(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash) -static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) { - PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name); - if (res == NULL) PyErr_Clear(); - return res; -} -#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000) -#define __Pyx_PyDict_GetItemStrWithError PyDict_GetItemWithError -#define __Pyx_PyDict_GetItemStr PyDict_GetItem -#else -static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) { -#if CYTHON_COMPILING_IN_PYPY - return PyDict_GetItem(dict, name); -#else - PyDictEntry *ep; - PyDictObject *mp = (PyDictObject*) dict; - long hash = ((PyStringObject *) name)->ob_shash; - assert(hash != -1); - ep = (mp->ma_lookup)(mp, name, hash); - if (ep == NULL) { - return NULL; - } - return ep->me_value; -#endif -} -#define __Pyx_PyDict_GetItemStr PyDict_GetItem -#endif -#if CYTHON_USE_TYPE_SLOTS - #define __Pyx_PyType_GetFlags(tp) (((PyTypeObject *)tp)->tp_flags) - #define __Pyx_PyType_HasFeature(type, feature) ((__Pyx_PyType_GetFlags(type) & (feature)) != 0) - #define __Pyx_PyObject_GetIterNextFunc(obj) (Py_TYPE(obj)->tp_iternext) -#else - #define __Pyx_PyType_GetFlags(tp) (PyType_GetFlags((PyTypeObject *)tp)) - #define __Pyx_PyType_HasFeature(type, feature) PyType_HasFeature(type, feature) - #define __Pyx_PyObject_GetIterNextFunc(obj) PyIter_Next -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v) -#else - #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v) -#endif -#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000 -#define __Pyx_PyHeapTypeObject_GC_Del(obj) {\ - PyTypeObject *type = Py_TYPE((PyObject*)obj);\ - assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\ - PyObject_GC_Del(obj);\ - Py_DECREF(type);\ -} -#else -#define __Pyx_PyHeapTypeObject_GC_Del(obj) PyObject_GC_Del(obj) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define CYTHON_PEP393_ENABLED 1 - #define __Pyx_PyUnicode_READY(op) (0) - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GetLength(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((void)u, 1114111U) - #define __Pyx_PyUnicode_KIND(u) ((void)u, (0)) - #define __Pyx_PyUnicode_DATA(u) ((void*)u) - #define __Pyx_PyUnicode_READ(k, d, i) ((void)k, PyUnicode_ReadChar((PyObject*)(d), i)) - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GetLength(u)) -#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) - #define CYTHON_PEP393_ENABLED 1 - #if PY_VERSION_HEX >= 0x030C0000 - #define __Pyx_PyUnicode_READY(op) (0) - #else - #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ - 0 : _PyUnicode_Ready((PyObject *)(op))) - #endif - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) - #define __Pyx_PyUnicode_KIND(u) ((int)PyUnicode_KIND(u)) - #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) - #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) - #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, (Py_UCS4) ch) - #if PY_VERSION_HEX >= 0x030C0000 - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) - #else - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) - #else - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) - #endif - #endif -#else - #define CYTHON_PEP393_ENABLED 0 - #define PyUnicode_1BYTE_KIND 1 - #define PyUnicode_2BYTE_KIND 2 - #define PyUnicode_4BYTE_KIND 4 - #define __Pyx_PyUnicode_READY(op) (0) - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U) - #define __Pyx_PyUnicode_KIND(u) ((int)sizeof(Py_UNICODE)) - #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) - #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) - #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch) - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) -#endif -#if CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) - #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) -#else - #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) - #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ - PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) -#endif -#if CYTHON_COMPILING_IN_PYPY - #if !defined(PyUnicode_DecodeUnicodeEscape) - #define PyUnicode_DecodeUnicodeEscape(s, size, errors) PyUnicode_Decode(s, size, "unicode_escape", errors) - #endif - #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500) - #undef PyUnicode_Contains - #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) - #endif - #if !defined(PyByteArray_Check) - #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) - #endif - #if !defined(PyObject_Format) - #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) - #endif -#endif -#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) -#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) -#if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) -#else - #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) -#endif -#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) - #define PyObject_ASCII(o) PyObject_Repr(o) -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyBaseString_Type PyUnicode_Type - #define PyStringObject PyUnicodeObject - #define PyString_Type PyUnicode_Type - #define PyString_Check PyUnicode_Check - #define PyString_CheckExact PyUnicode_CheckExact -#ifndef PyObject_Unicode - #define PyObject_Unicode PyObject_Str -#endif -#endif -#if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) - #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) -#else - #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) - #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) -#endif -#if CYTHON_COMPILING_IN_CPYTHON - #define __Pyx_PySequence_ListKeepNew(obj)\ - (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj)) -#else - #define __Pyx_PySequence_ListKeepNew(obj) PySequence_List(obj) -#endif -#ifndef PySet_CheckExact - #define PySet_CheckExact(obj) __Pyx_IS_TYPE(obj, &PySet_Type) -#endif -#if PY_VERSION_HEX >= 0x030900A4 - #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt) - #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size) -#else - #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt) - #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size) -#endif -#if CYTHON_ASSUME_SAFE_MACROS - #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i) - #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq) - #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0)) - #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0)) - #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o) - #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o) - #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o) - #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o) - #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o) -#else - #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i) - #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq) - #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v) - #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v) - #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o) - #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o) - #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o) - #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o) - #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o) -#endif -#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 - #define __Pyx_PyImport_AddModuleRef(name) PyImport_AddModuleRef(name) -#else - static CYTHON_INLINE PyObject *__Pyx_PyImport_AddModuleRef(const char *name) { - PyObject *module = PyImport_AddModule(name); - Py_XINCREF(module); - return module; - } -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyIntObject PyLongObject - #define PyInt_Type PyLong_Type - #define PyInt_Check(op) PyLong_Check(op) - #define PyInt_CheckExact(op) PyLong_CheckExact(op) - #define __Pyx_Py3Int_Check(op) PyLong_Check(op) - #define __Pyx_Py3Int_CheckExact(op) PyLong_CheckExact(op) - #define PyInt_FromString PyLong_FromString - #define PyInt_FromUnicode PyLong_FromUnicode - #define PyInt_FromLong PyLong_FromLong - #define PyInt_FromSize_t PyLong_FromSize_t - #define PyInt_FromSsize_t PyLong_FromSsize_t - #define PyInt_AsLong PyLong_AsLong - #define PyInt_AS_LONG PyLong_AS_LONG - #define PyInt_AsSsize_t PyLong_AsSsize_t - #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask - #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask - #define PyNumber_Int PyNumber_Long -#else - #define __Pyx_Py3Int_Check(op) (PyLong_Check(op) || PyInt_Check(op)) - #define __Pyx_Py3Int_CheckExact(op) (PyLong_CheckExact(op) || PyInt_CheckExact(op)) -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyBoolObject PyLongObject -#endif -#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY - #ifndef PyUnicode_InternFromString - #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) - #endif -#endif -#if PY_VERSION_HEX < 0x030200A4 - typedef long Py_hash_t; - #define __Pyx_PyInt_FromHash_t PyInt_FromLong - #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t -#else - #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t - #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t -#endif -#if CYTHON_USE_ASYNC_SLOTS - #if PY_VERSION_HEX >= 0x030500B1 - #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods - #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) - #else - #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) - #endif -#else - #define __Pyx_PyType_AsAsync(obj) NULL -#endif -#ifndef __Pyx_PyAsyncMethodsStruct - typedef struct { - unaryfunc am_await; - unaryfunc am_aiter; - unaryfunc am_anext; - } __Pyx_PyAsyncMethodsStruct; -#endif - -#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) - #if !defined(_USE_MATH_DEFINES) - #define _USE_MATH_DEFINES - #endif -#endif -#include -#ifdef NAN -#define __PYX_NAN() ((float) NAN) -#else -static CYTHON_INLINE float __PYX_NAN() { - float value; - memset(&value, 0xFF, sizeof(value)); - return value; -} -#endif -#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) -#define __Pyx_truncl trunc -#else -#define __Pyx_truncl truncl -#endif - -#define __PYX_MARK_ERR_POS(f_index, lineno) \ - { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; } -#define __PYX_ERR(f_index, lineno, Ln_error) \ - { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; } - -#ifdef CYTHON_EXTERN_C - #undef __PYX_EXTERN_C - #define __PYX_EXTERN_C CYTHON_EXTERN_C -#elif defined(__PYX_EXTERN_C) - #ifdef _MSC_VER - #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") - #else - #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. - #endif -#else - #ifdef __cplusplus - #define __PYX_EXTERN_C extern "C" - #else - #define __PYX_EXTERN_C extern - #endif -#endif - -#define __PYX_HAVE__jcvi__formats__cblast -#define __PYX_HAVE_API__jcvi__formats__cblast -/* Early includes */ -#include -#include -#ifdef _OPENMP -#include -#endif /* _OPENMP */ - -#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) -#define CYTHON_WITHOUT_ASSERTIONS -#endif - -typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; - const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; - -#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 -#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0 -#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8) -#define __PYX_DEFAULT_STRING_ENCODING "" -#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString -#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize -#define __Pyx_uchar_cast(c) ((unsigned char)c) -#define __Pyx_long_cast(x) ((long)x) -#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ - (sizeof(type) < sizeof(Py_ssize_t)) ||\ - (sizeof(type) > sizeof(Py_ssize_t) &&\ - likely(v < (type)PY_SSIZE_T_MAX ||\ - v == (type)PY_SSIZE_T_MAX) &&\ - (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ - v == (type)PY_SSIZE_T_MIN))) ||\ - (sizeof(type) == sizeof(Py_ssize_t) &&\ - (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ - v == (type)PY_SSIZE_T_MAX))) ) -static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) { - return (size_t) i < (size_t) limit; -} -#if defined (__cplusplus) && __cplusplus >= 201103L - #include - #define __Pyx_sst_abs(value) std::abs(value) -#elif SIZEOF_INT >= SIZEOF_SIZE_T - #define __Pyx_sst_abs(value) abs(value) -#elif SIZEOF_LONG >= SIZEOF_SIZE_T - #define __Pyx_sst_abs(value) labs(value) -#elif defined (_MSC_VER) - #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) -#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define __Pyx_sst_abs(value) llabs(value) -#elif defined (__GNUC__) - #define __Pyx_sst_abs(value) __builtin_llabs(value) -#else - #define __Pyx_sst_abs(value) ((value<0) ? -value : value) -#endif -static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s); -static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); -static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); -static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char*); -#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) -#define __Pyx_PyBytes_FromString PyBytes_FromString -#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); -#if PY_MAJOR_VERSION < 3 - #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString - #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize -#else - #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString - #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize -#endif -#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyObject_AsWritableString(s) ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsWritableSString(s) ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) -#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) -#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) -#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) -#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) -#define __Pyx_PyUnicode_FromOrdinal(o) PyUnicode_FromOrdinal((int)o) -#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode -#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) -#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) -static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b); -static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); -static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*); -static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); -#define __Pyx_PySequence_Tuple(obj)\ - (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) -static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); -static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); -static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); -#if CYTHON_ASSUME_SAFE_MACROS -#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) -#else -#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) -#endif -#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) -#if PY_MAJOR_VERSION >= 3 -#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) -#else -#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) -#endif -#if CYTHON_USE_PYLONG_INTERNALS - #if PY_VERSION_HEX >= 0x030C00A7 - #ifndef _PyLong_SIGN_MASK - #define _PyLong_SIGN_MASK 3 - #endif - #ifndef _PyLong_NON_SIZE_BITS - #define _PyLong_NON_SIZE_BITS 3 - #endif - #define __Pyx_PyLong_Sign(x) (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK) - #define __Pyx_PyLong_IsNeg(x) ((__Pyx_PyLong_Sign(x) & 2) != 0) - #define __Pyx_PyLong_IsNonNeg(x) (!__Pyx_PyLong_IsNeg(x)) - #define __Pyx_PyLong_IsZero(x) (__Pyx_PyLong_Sign(x) & 1) - #define __Pyx_PyLong_IsPos(x) (__Pyx_PyLong_Sign(x) == 0) - #define __Pyx_PyLong_CompactValueUnsigned(x) (__Pyx_PyLong_Digits(x)[0]) - #define __Pyx_PyLong_DigitCount(x) ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS)) - #define __Pyx_PyLong_SignedDigitCount(x)\ - ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x)) - #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue) - #define __Pyx_PyLong_IsCompact(x) PyUnstable_Long_IsCompact((PyLongObject*) x) - #define __Pyx_PyLong_CompactValue(x) PyUnstable_Long_CompactValue((PyLongObject*) x) - #else - #define __Pyx_PyLong_IsCompact(x) (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS)) - #define __Pyx_PyLong_CompactValue(x) ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0]) - #endif - typedef Py_ssize_t __Pyx_compact_pylong; - typedef size_t __Pyx_compact_upylong; - #else - #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0) - #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0) - #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0) - #define __Pyx_PyLong_IsPos(x) (Py_SIZE(x) > 0) - #define __Pyx_PyLong_CompactValueUnsigned(x) ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0]) - #define __Pyx_PyLong_DigitCount(x) __Pyx_sst_abs(Py_SIZE(x)) - #define __Pyx_PyLong_SignedDigitCount(x) Py_SIZE(x) - #define __Pyx_PyLong_IsCompact(x) (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1) - #define __Pyx_PyLong_CompactValue(x)\ - ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0])) - typedef sdigit __Pyx_compact_pylong; - typedef digit __Pyx_compact_upylong; - #endif - #if PY_VERSION_HEX >= 0x030C00A5 - #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit) - #else - #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->ob_digit) - #endif -#endif -#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII -#include -static int __Pyx_sys_getdefaultencoding_not_ascii; -static int __Pyx_init_sys_getdefaultencoding_params(void) { - PyObject* sys; - PyObject* default_encoding = NULL; - PyObject* ascii_chars_u = NULL; - PyObject* ascii_chars_b = NULL; - const char* default_encoding_c; - sys = PyImport_ImportModule("sys"); - if (!sys) goto bad; - default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); - Py_DECREF(sys); - if (!default_encoding) goto bad; - default_encoding_c = PyBytes_AsString(default_encoding); - if (!default_encoding_c) goto bad; - if (strcmp(default_encoding_c, "ascii") == 0) { - __Pyx_sys_getdefaultencoding_not_ascii = 0; - } else { - char ascii_chars[128]; - int c; - for (c = 0; c < 128; c++) { - ascii_chars[c] = (char) c; - } - __Pyx_sys_getdefaultencoding_not_ascii = 1; - ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); - if (!ascii_chars_u) goto bad; - ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); - if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { - PyErr_Format( - PyExc_ValueError, - "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", - default_encoding_c); - goto bad; - } - Py_DECREF(ascii_chars_u); - Py_DECREF(ascii_chars_b); - } - Py_DECREF(default_encoding); - return 0; -bad: - Py_XDECREF(default_encoding); - Py_XDECREF(ascii_chars_u); - Py_XDECREF(ascii_chars_b); - return -1; -} -#endif -#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 -#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) -#else -#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) -#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT -#include -static char* __PYX_DEFAULT_STRING_ENCODING; -static int __Pyx_init_sys_getdefaultencoding_params(void) { - PyObject* sys; - PyObject* default_encoding = NULL; - char* default_encoding_c; - sys = PyImport_ImportModule("sys"); - if (!sys) goto bad; - default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); - Py_DECREF(sys); - if (!default_encoding) goto bad; - default_encoding_c = PyBytes_AsString(default_encoding); - if (!default_encoding_c) goto bad; - __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1); - if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; - strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); - Py_DECREF(default_encoding); - return 0; -bad: - Py_XDECREF(default_encoding); - return -1; -} -#endif -#endif - - -/* Test for GCC > 2.95 */ -#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) - #define likely(x) __builtin_expect(!!(x), 1) - #define unlikely(x) __builtin_expect(!!(x), 0) -#else /* !__GNUC__ or GCC < 2.95 */ - #define likely(x) (x) - #define unlikely(x) (x) -#endif /* __GNUC__ */ -static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } - -#if !CYTHON_USE_MODULE_STATE -static PyObject *__pyx_m = NULL; -#endif -static int __pyx_lineno; -static int __pyx_clineno = 0; -static const char * __pyx_cfilenm = __FILE__; -static const char *__pyx_filename; - -/* #### Code section: filename_table ### */ - -static const char *__pyx_f[] = { - "cblast.pyx", - "", -}; -/* #### Code section: utility_code_proto_before_types ### */ -/* ForceInitThreads.proto */ -#ifndef __PYX_FORCE_INIT_THREADS - #define __PYX_FORCE_INIT_THREADS 0 -#endif - -/* #### Code section: numeric_typedefs ### */ -/* #### Code section: complex_type_declarations ### */ -/* #### Code section: type_declarations ### */ - -/*--- Type declarations ---*/ -struct __pyx_obj_4jcvi_7formats_6cblast_Blast; -struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine; -struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; -struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; - -/* "jcvi/formats/cblast.pyx":21 - * - * - * cdef class Blast: # <<<<<<<<<<<<<< - * cdef: - * FILE* fh - */ -struct __pyx_obj_4jcvi_7formats_6cblast_Blast { - PyObject_HEAD - FILE *fh; - PyObject *filename; -}; - - -/* "jcvi/formats/cblast.pyx":66 - * - * - * cdef class BlastLine: # <<<<<<<<<<<<<< - * """ - * Given a string of tab-delimited (-m 8) blast output, parse it and create - */ -struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine { - PyObject_HEAD - char _query[0x80]; - char _subject[0x80]; - int hitlen; - int nmismatch; - int ngaps; - int qstart; - int qstop; - int sstart; - int sstop; - float pctid; - float score; - double evalue; - PyObject *qseqid; - PyObject *sseqid; - int qi; - int si; - char orientation; -}; - - -/* "jcvi/formats/cblast.pyx":172 - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< - * return BlastLine(b) - * - */ -struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr { - PyObject_HEAD - PyObject *__pyx_genexpr_arg_0; - PyObject *__pyx_v_x; - PyObject *__pyx_t_0; - Py_ssize_t __pyx_t_1; -}; - - -/* "cfunc.to_py":66 - * - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - */ -struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc { - PyObject_HEAD - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*__pyx_v_f)(char *, char *, float, int, int, int, int, int, int, int, float, float); -}; - -/* #### Code section: utility_code_proto ### */ - -/* --- Runtime support code (head) --- */ -/* Refnanny.proto */ -#ifndef CYTHON_REFNANNY - #define CYTHON_REFNANNY 0 -#endif -#if CYTHON_REFNANNY - typedef struct { - void (*INCREF)(void*, PyObject*, Py_ssize_t); - void (*DECREF)(void*, PyObject*, Py_ssize_t); - void (*GOTREF)(void*, PyObject*, Py_ssize_t); - void (*GIVEREF)(void*, PyObject*, Py_ssize_t); - void* (*SetupContext)(const char*, Py_ssize_t, const char*); - void (*FinishContext)(void**); - } __Pyx_RefNannyAPIStruct; - static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; - static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); - #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; -#ifdef WITH_THREAD - #define __Pyx_RefNannySetupContext(name, acquire_gil)\ - if (acquire_gil) {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ - PyGILState_Release(__pyx_gilstate_save);\ - } else {\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ - } - #define __Pyx_RefNannyFinishContextNogil() {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __Pyx_RefNannyFinishContext();\ - PyGILState_Release(__pyx_gilstate_save);\ - } -#else - #define __Pyx_RefNannySetupContext(name, acquire_gil)\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__)) - #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext() -#endif - #define __Pyx_RefNannyFinishContextNogil() {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __Pyx_RefNannyFinishContext();\ - PyGILState_Release(__pyx_gilstate_save);\ - } - #define __Pyx_RefNannyFinishContext()\ - __Pyx_RefNanny->FinishContext(&__pyx_refnanny) - #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_XINCREF(r) do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0) - #define __Pyx_XDECREF(r) do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0) - #define __Pyx_XGOTREF(r) do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0) - #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0) -#else - #define __Pyx_RefNannyDeclarations - #define __Pyx_RefNannySetupContext(name, acquire_gil) - #define __Pyx_RefNannyFinishContextNogil() - #define __Pyx_RefNannyFinishContext() - #define __Pyx_INCREF(r) Py_INCREF(r) - #define __Pyx_DECREF(r) Py_DECREF(r) - #define __Pyx_GOTREF(r) - #define __Pyx_GIVEREF(r) - #define __Pyx_XINCREF(r) Py_XINCREF(r) - #define __Pyx_XDECREF(r) Py_XDECREF(r) - #define __Pyx_XGOTREF(r) - #define __Pyx_XGIVEREF(r) -#endif -#define __Pyx_Py_XDECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; Py_XDECREF(tmp);\ - } while (0) -#define __Pyx_XDECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; __Pyx_XDECREF(tmp);\ - } while (0) -#define __Pyx_DECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; __Pyx_DECREF(tmp);\ - } while (0) -#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) -#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) - -/* PyErrExceptionMatches.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) -static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); -#else -#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) -#endif - -/* PyThreadStateGet.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; -#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; -#if PY_VERSION_HEX >= 0x030C00A6 -#define __Pyx_PyErr_Occurred() (__pyx_tstate->current_exception != NULL) -#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL) -#else -#define __Pyx_PyErr_Occurred() (__pyx_tstate->curexc_type != NULL) -#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->curexc_type) -#endif -#else -#define __Pyx_PyThreadState_declare -#define __Pyx_PyThreadState_assign -#define __Pyx_PyErr_Occurred() (PyErr_Occurred() != NULL) -#define __Pyx_PyErr_CurrentExceptionType() PyErr_Occurred() -#endif - -/* PyErrFetchRestore.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) -#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) -#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) -#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) -#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); -static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6 -#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) -#else -#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) -#endif -#else -#define __Pyx_PyErr_Clear() PyErr_Clear() -#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) -#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) -#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) -#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) -#endif - -/* PyObjectGetAttrStr.proto */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name); -#else -#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) -#endif - -/* PyObjectGetAttrStrNoError.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name); - -/* GetBuiltinName.proto */ -static PyObject *__Pyx_GetBuiltinName(PyObject *name); - -/* TupleAndListFromArray.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n); -static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n); -#endif - -/* IncludeStringH.proto */ -#include - -/* BytesEquals.proto */ -static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); - -/* UnicodeEquals.proto */ -static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); - -/* fastcall.proto */ -#if CYTHON_AVOID_BORROWED_REFS - #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i) -#elif CYTHON_ASSUME_SAFE_MACROS - #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i) -#else - #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i) -#endif -#if CYTHON_AVOID_BORROWED_REFS - #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg) - #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg) -#else - #define __Pyx_Arg_NewRef_VARARGS(arg) arg - #define __Pyx_Arg_XDECREF_VARARGS(arg) -#endif -#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds) -#define __Pyx_KwValues_VARARGS(args, nargs) NULL -#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s) -#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw) -#if CYTHON_METH_FASTCALL - #define __Pyx_Arg_FASTCALL(args, i) args[i] - #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds) - #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs)) - static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s); -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 - CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues); - #else - #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw) - #endif - #define __Pyx_Arg_NewRef_FASTCALL(arg) arg /* no-op, __Pyx_Arg_FASTCALL is direct and this needs - to have the same reference counting */ - #define __Pyx_Arg_XDECREF_FASTCALL(arg) -#else - #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS - #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS - #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS - #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS - #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS - #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg) - #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg) -#endif -#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS -#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start) -#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start) -#else -#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop) -#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop) -#endif - -/* RaiseArgTupleInvalid.proto */ -static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, - Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); - -/* RaiseDoubleKeywords.proto */ -static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); - -/* ParseKeywords.proto */ -static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues, - PyObject **argnames[], - PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, - const char* function_name); - -/* IncludeStructmemberH.proto */ -#include - -/* FixUpExtensionType.proto */ -#if CYTHON_USE_TYPE_SPECS -static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type); -#endif - -/* FetchSharedCythonModule.proto */ -static PyObject *__Pyx_FetchSharedCythonABIModule(void); - -/* FetchCommonType.proto */ -#if !CYTHON_USE_TYPE_SPECS -static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type); -#else -static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases); -#endif - -/* PyMethodNew.proto */ -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { - PyObject *typesModule=NULL, *methodType=NULL, *result=NULL; - CYTHON_UNUSED_VAR(typ); - if (!self) - return __Pyx_NewRef(func); - typesModule = PyImport_ImportModule("types"); - if (!typesModule) return NULL; - methodType = PyObject_GetAttrString(typesModule, "MethodType"); - Py_DECREF(typesModule); - if (!methodType) return NULL; - result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL); - Py_DECREF(methodType); - return result; -} -#elif PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { - CYTHON_UNUSED_VAR(typ); - if (!self) - return __Pyx_NewRef(func); - return PyMethod_New(func, self); -} -#else - #define __Pyx_PyMethod_New PyMethod_New -#endif - -/* PyVectorcallFastCallDict.proto */ -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw); -#endif - -/* CythonFunctionShared.proto */ -#define __Pyx_CyFunction_USED -#define __Pyx_CYFUNCTION_STATICMETHOD 0x01 -#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02 -#define __Pyx_CYFUNCTION_CCLASS 0x04 -#define __Pyx_CYFUNCTION_COROUTINE 0x08 -#define __Pyx_CyFunction_GetClosure(f)\ - (((__pyx_CyFunctionObject *) (f))->func_closure) -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_CyFunction_GetClassObj(f)\ - (((__pyx_CyFunctionObject *) (f))->func_classobj) -#else - #define __Pyx_CyFunction_GetClassObj(f)\ - ((PyObject*) ((PyCMethodObject *) (f))->mm_class) -#endif -#define __Pyx_CyFunction_SetClassObj(f, classobj)\ - __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj)) -#define __Pyx_CyFunction_Defaults(type, f)\ - ((type *)(((__pyx_CyFunctionObject *) (f))->defaults)) -#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\ - ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g) -typedef struct { -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject_HEAD - PyObject *func; -#elif PY_VERSION_HEX < 0x030900B1 - PyCFunctionObject func; -#else - PyCMethodObject func; -#endif -#if CYTHON_BACKPORT_VECTORCALL - __pyx_vectorcallfunc func_vectorcall; -#endif -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API - PyObject *func_weakreflist; -#endif - PyObject *func_dict; - PyObject *func_name; - PyObject *func_qualname; - PyObject *func_doc; - PyObject *func_globals; - PyObject *func_code; - PyObject *func_closure; -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - PyObject *func_classobj; -#endif - void *defaults; - int defaults_pyobjects; - size_t defaults_size; - int flags; - PyObject *defaults_tuple; - PyObject *defaults_kwdict; - PyObject *(*defaults_getter)(PyObject *); - PyObject *func_annotations; - PyObject *func_is_coroutine; -} __pyx_CyFunctionObject; -#undef __Pyx_CyOrPyCFunction_Check -#define __Pyx_CyFunction_Check(obj) __Pyx_TypeCheck(obj, __pyx_CyFunctionType) -#define __Pyx_CyOrPyCFunction_Check(obj) __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type) -#define __Pyx_CyFunction_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_CyFunctionType) -static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc); -#undef __Pyx_IsSameCFunction -#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCyOrCFunction(func, cfunc) -static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml, - int flags, PyObject* qualname, - PyObject *closure, - PyObject *module, PyObject *globals, - PyObject* code); -static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj); -static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m, - size_t size, - int pyobjects); -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m, - PyObject *tuple); -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m, - PyObject *dict); -static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, - PyObject *dict); -static int __pyx_CyFunction_init(PyObject *module); -#if CYTHON_METH_FASTCALL -static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -#if CYTHON_BACKPORT_VECTORCALL -#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall) -#else -#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall) -#endif -#endif - -/* CythonFunction.proto */ -static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, - int flags, PyObject* qualname, - PyObject *closure, - PyObject *module, PyObject *globals, - PyObject* code); - -/* GetTopmostException.proto */ -#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE -static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate); -#endif - -/* SaveResetException.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); -#else -#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb) -#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb) -#endif - -/* FastTypeChecks.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) -#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2) -static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); -static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b); -static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); -static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); -#else -#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) -#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2)) -#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) -#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) -#endif -#define __Pyx_PyErr_ExceptionMatches2(err1, err2) __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2) -#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) - -/* KeywordStringCheck.proto */ -static int __Pyx_CheckKeywordStrings(PyObject *kw, const char* function_name, int kw_allowed); - -/* RaiseException.proto */ -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); - -/* PyObjectCall.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); -#else -#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) -#endif - -/* UnpackUnboundCMethod.proto */ -typedef struct { - PyObject *type; - PyObject **method_name; - PyCFunction func; - PyObject *method; - int flag; -} __Pyx_CachedCFunction; - -/* CallUnboundCMethod1.proto */ -static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg); -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg); -#else -#define __Pyx_CallUnboundCMethod1(cfunc, self, arg) __Pyx__CallUnboundCMethod1(cfunc, self, arg) -#endif - -/* RaiseUnexpectedTypeError.proto */ -static int __Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj); - -/* decode_c_string_utf16.proto */ -static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, const char *errors) { - int byteorder = 0; - return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); -} -static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16LE(const char *s, Py_ssize_t size, const char *errors) { - int byteorder = -1; - return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); -} -static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16BE(const char *s, Py_ssize_t size, const char *errors) { - int byteorder = 1; - return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); -} - -/* decode_c_bytes.proto */ -static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes( - const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop, - const char* encoding, const char* errors, - PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)); - -/* decode_bytes.proto */ -static CYTHON_INLINE PyObject* __Pyx_decode_bytes( - PyObject* string, Py_ssize_t start, Py_ssize_t stop, - const char* encoding, const char* errors, - PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { - char* as_c_string; - Py_ssize_t size; -#if CYTHON_ASSUME_SAFE_MACROS - as_c_string = PyBytes_AS_STRING(string); - size = PyBytes_GET_SIZE(string); -#else - if (PyBytes_AsStringAndSize(string, &as_c_string, &size) < 0) { - return NULL; - } -#endif - return __Pyx_decode_c_bytes( - as_c_string, size, - start, stop, encoding, errors, decode_func); -} - -/* ArgTypeTest.proto */ -#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ - ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\ - __Pyx__ArgTypeTest(obj, type, name, exact)) -static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); - -/* PyFunctionFastCall.proto */ -#if CYTHON_FAST_PYCALL -#if !CYTHON_VECTORCALL -#define __Pyx_PyFunction_FastCall(func, args, nargs)\ - __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) -static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs); -#endif -#define __Pyx_BUILD_ASSERT_EXPR(cond)\ - (sizeof(char [1 - 2*!(cond)]) - 1) -#ifndef Py_MEMBER_SIZE -#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) -#endif -#if !CYTHON_VECTORCALL -#if PY_VERSION_HEX >= 0x03080000 - #include "frameobject.h" -#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif - #define __Pxy_PyFrame_Initialize_Offsets() - #define __Pyx_PyFrame_GetLocalsplus(frame) ((frame)->f_localsplus) -#else - static size_t __pyx_pyframe_localsplus_offset = 0; - #include "frameobject.h" - #define __Pxy_PyFrame_Initialize_Offsets()\ - ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\ - (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus))) - #define __Pyx_PyFrame_GetLocalsplus(frame)\ - (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset)) -#endif -#endif -#endif - -/* PyObjectCallMethO.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); -#endif - -/* PyObjectFastCall.proto */ -#define __Pyx_PyObject_FastCall(func, args, nargs) __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL) -static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs); - -/* PyObjectCallOneArg.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); - -/* SliceObject.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice( - PyObject* obj, Py_ssize_t cstart, Py_ssize_t cstop, - PyObject** py_start, PyObject** py_stop, PyObject** py_slice, - int has_cstart, int has_cstop, int wraparound); - -/* ListCompAppend.proto */ -#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS -static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { - PyListObject* L = (PyListObject*) list; - Py_ssize_t len = Py_SIZE(list); - if (likely(L->allocated > len)) { - Py_INCREF(x); - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 - L->ob_item[len] = x; - #else - PyList_SET_ITEM(list, len, x); - #endif - __Pyx_SET_SIZE(list, len + 1); - return 0; - } - return PyList_Append(list, x); -} -#else -#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) -#endif - -/* GetAttr.proto */ -static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *, PyObject *); - -/* SetItemInt.proto */ -#define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) :\ - (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) :\ - __Pyx_SetItemInt_Generic(o, to_py_func(i), v))) -static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v); -static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, - int is_list, int wraparound, int boundscheck); - -/* HasAttr.proto */ -static CYTHON_INLINE int __Pyx_HasAttr(PyObject *, PyObject *); - -/* RaiseUnboundLocalError.proto */ -static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname); - -/* PyObject_Str.proto */ -#define __Pyx_PyObject_Str(obj)\ - (likely(PyString_CheckExact(obj)) ? __Pyx_NewRef(obj) : PyObject_Str(obj)) - -/* SliceObject.proto */ -#define __Pyx_PyObject_DelSlice(obj, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound)\ - __Pyx_PyObject_SetSlice(obj, (PyObject*)NULL, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound) -static CYTHON_INLINE int __Pyx_PyObject_SetSlice( - PyObject* obj, PyObject* value, Py_ssize_t cstart, Py_ssize_t cstop, - PyObject** py_start, PyObject** py_stop, PyObject** py_slice, - int has_cstart, int has_cstop, int wraparound); - -/* PyObjectCall2Args.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2); - -/* PyObjectGetMethod.proto */ -static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method); - -/* PyObjectCallMethod1.proto */ -static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg); - -/* StringJoin.proto */ -#if PY_MAJOR_VERSION < 3 -#define __Pyx_PyString_Join __Pyx_PyBytes_Join -#define __Pyx_PyBaseString_Join(s, v) (PyUnicode_CheckExact(s) ? PyUnicode_Join(s, v) : __Pyx_PyBytes_Join(s, v)) -#else -#define __Pyx_PyString_Join PyUnicode_Join -#define __Pyx_PyBaseString_Join PyUnicode_Join -#endif -static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values); - -/* PyObjectSetAttrStr.proto */ -#if CYTHON_USE_TYPE_SLOTS -#define __Pyx_PyObject_DelAttrStr(o,n) __Pyx_PyObject_SetAttrStr(o, n, NULL) -static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value); -#else -#define __Pyx_PyObject_DelAttrStr(o,n) PyObject_DelAttr(o,n) -#define __Pyx_PyObject_SetAttrStr(o,n,v) PyObject_SetAttr(o,n,v) -#endif - -/* PyObjectCallNoArg.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); - -/* PyObjectCallMethod0.proto */ -static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name); - -/* ValidateBasesTuple.proto */ -#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS -static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases); -#endif - -/* PyType_Ready.proto */ -CYTHON_UNUSED static int __Pyx_PyType_Ready(PyTypeObject *t); - -/* PyObject_GenericGetAttrNoDict.proto */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name); -#else -#define __Pyx_PyObject_GenericGetAttrNoDict PyObject_GenericGetAttr -#endif - -/* PyObject_GenericGetAttr.proto */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name); -#else -#define __Pyx_PyObject_GenericGetAttr PyObject_GenericGetAttr -#endif - -/* SetupReduce.proto */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __Pyx_setup_reduce(PyObject* type_obj); -#endif - -/* Import.proto */ -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); - -/* ImportDottedModule.proto */ -static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple); -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple); -#endif - -/* ImportDottedModuleRelFirst.proto */ -static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple); - -/* PyDictVersioning.proto */ -#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS -#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1) -#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag) -#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\ - (version_var) = __PYX_GET_DICT_VERSION(dict);\ - (cache_var) = (value); -#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\ - static PY_UINT64_T __pyx_dict_version = 0;\ - static PyObject *__pyx_dict_cached_value = NULL;\ - if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\ - (VAR) = __pyx_dict_cached_value;\ - } else {\ - (VAR) = __pyx_dict_cached_value = (LOOKUP);\ - __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\ - }\ -} -static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj); -static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj); -static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version); -#else -#define __PYX_GET_DICT_VERSION(dict) (0) -#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var) -#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP); -#endif - -/* CLineInTraceback.proto */ -#ifdef CYTHON_CLINE_IN_TRACEBACK -#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) -#else -static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); -#endif - -/* CodeObjectCache.proto */ -#if !CYTHON_COMPILING_IN_LIMITED_API -typedef struct { - PyCodeObject* code_object; - int code_line; -} __Pyx_CodeObjectCacheEntry; -struct __Pyx_CodeObjectCache { - int count; - int max_count; - __Pyx_CodeObjectCacheEntry* entries; -}; -static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; -static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); -static PyCodeObject *__pyx_find_code_object(int code_line); -static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); -#endif - -/* AddTraceback.proto */ -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename); - -/* GCCDiagnostics.proto */ -#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) -#define __Pyx_HAS_GCC_DIAGNOSTIC -#endif - -/* CIntFromPy.proto */ -static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); - -/* CIntFromPy.proto */ -static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); - -/* CIntFromPy.proto */ -static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_char(char value); - -/* FormatTypeName.proto */ -#if CYTHON_COMPILING_IN_LIMITED_API -typedef PyObject *__Pyx_TypeName; -#define __Pyx_FMT_TYPENAME "%U" -static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp); -#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj) -#else -typedef const char *__Pyx_TypeName; -#define __Pyx_FMT_TYPENAME "%.200s" -#define __Pyx_PyType_GetName(tp) ((tp)->tp_name) -#define __Pyx_DECREF_TypeName(obj) -#endif - -/* CIntFromPy.proto */ -static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); - -/* SwapException.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_ExceptionSwap(type, value, tb) __Pyx__ExceptionSwap(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#else -static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb); -#endif - -/* CoroutineBase.proto */ -struct __pyx_CoroutineObject; -typedef PyObject *(*__pyx_coroutine_body_t)(struct __pyx_CoroutineObject *, PyThreadState *, PyObject *); -#if CYTHON_USE_EXC_INFO_STACK -#define __Pyx_ExcInfoStruct _PyErr_StackItem -#else -typedef struct { - PyObject *exc_type; - PyObject *exc_value; - PyObject *exc_traceback; -} __Pyx_ExcInfoStruct; -#endif -typedef struct __pyx_CoroutineObject { - PyObject_HEAD - __pyx_coroutine_body_t body; - PyObject *closure; - __Pyx_ExcInfoStruct gi_exc_state; - PyObject *gi_weakreflist; - PyObject *classobj; - PyObject *yieldfrom; - PyObject *gi_name; - PyObject *gi_qualname; - PyObject *gi_modulename; - PyObject *gi_code; - PyObject *gi_frame; - int resume_label; - char is_running; -} __pyx_CoroutineObject; -static __pyx_CoroutineObject *__Pyx__Coroutine_New( - PyTypeObject *type, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name); -static __pyx_CoroutineObject *__Pyx__Coroutine_NewInit( - __pyx_CoroutineObject *gen, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name); -static CYTHON_INLINE void __Pyx_Coroutine_ExceptionClear(__Pyx_ExcInfoStruct *self); -static int __Pyx_Coroutine_clear(PyObject *self); -static PyObject *__Pyx_Coroutine_Send(PyObject *self, PyObject *value); -static PyObject *__Pyx_Coroutine_Close(PyObject *self); -static PyObject *__Pyx_Coroutine_Throw(PyObject *gen, PyObject *args); -#if CYTHON_USE_EXC_INFO_STACK -#define __Pyx_Coroutine_SwapException(self) -#define __Pyx_Coroutine_ResetAndClearException(self) __Pyx_Coroutine_ExceptionClear(&(self)->gi_exc_state) -#else -#define __Pyx_Coroutine_SwapException(self) {\ - __Pyx_ExceptionSwap(&(self)->gi_exc_state.exc_type, &(self)->gi_exc_state.exc_value, &(self)->gi_exc_state.exc_traceback);\ - __Pyx_Coroutine_ResetFrameBackpointer(&(self)->gi_exc_state);\ - } -#define __Pyx_Coroutine_ResetAndClearException(self) {\ - __Pyx_ExceptionReset((self)->gi_exc_state.exc_type, (self)->gi_exc_state.exc_value, (self)->gi_exc_state.exc_traceback);\ - (self)->gi_exc_state.exc_type = (self)->gi_exc_state.exc_value = (self)->gi_exc_state.exc_traceback = NULL;\ - } -#endif -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyGen_FetchStopIterationValue(pvalue)\ - __Pyx_PyGen__FetchStopIterationValue(__pyx_tstate, pvalue) -#else -#define __Pyx_PyGen_FetchStopIterationValue(pvalue)\ - __Pyx_PyGen__FetchStopIterationValue(__Pyx_PyThreadState_Current, pvalue) -#endif -static int __Pyx_PyGen__FetchStopIterationValue(PyThreadState *tstate, PyObject **pvalue); -static CYTHON_INLINE void __Pyx_Coroutine_ResetFrameBackpointer(__Pyx_ExcInfoStruct *exc_state); - -/* PatchModuleWithCoroutine.proto */ -static PyObject* __Pyx_Coroutine_patch_module(PyObject* module, const char* py_code); - -/* PatchGeneratorABC.proto */ -static int __Pyx_patch_abc(void); - -/* Generator.proto */ -#define __Pyx_Generator_USED -#define __Pyx_Generator_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_GeneratorType) -#define __Pyx_Generator_New(body, code, closure, name, qualname, module_name)\ - __Pyx__Coroutine_New(__pyx_GeneratorType, body, code, closure, name, qualname, module_name) -static PyObject *__Pyx_Generator_Next(PyObject *self); -static int __pyx_Generator_init(PyObject *module); - -/* CheckBinaryVersion.proto */ -static unsigned long __Pyx_get_runtime_version(void); -static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer); - -/* InitStrings.proto */ -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); - -/* #### Code section: module_declarations ### */ - -/* Module declarations from "libc.string" */ - -/* Module declarations from "libc.stdio" */ - -/* Module declarations from "jcvi.formats.cblast" */ -static char const *__pyx_v_4jcvi_7formats_6cblast_blast_format; -static char const *__pyx_v_4jcvi_7formats_6cblast_blast_format_line; -static char const *__pyx_v_4jcvi_7formats_6cblast_blast_output; -static char const *__pyx_v_4jcvi_7formats_6cblast_bed_output; -static PyObject *__pyx_f_4jcvi_7formats_6cblast_c_str(PyObject *); /*proto*/ -static PyObject *__pyx_f_4jcvi_7formats_6cblast_py_str(PyObject *); /*proto*/ -static struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_f_4jcvi_7formats_6cblast_create_blast_line(char *, char *, float, int, int, int, int, int, int, int, float, float); /*proto*/ -static PyObject *__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*)(char *, char *, float, int, int, int, int, int, int, int, float, float)); /*proto*/ -static int __Pyx_carray_from_py_char(PyObject *, char *, Py_ssize_t); /*proto*/ -/* #### Code section: typeinfo ### */ -/* #### Code section: before_global_var ### */ -#define __Pyx_MODULE_NAME "jcvi.formats.cblast" -extern int __pyx_module_is_main_jcvi__formats__cblast; -int __pyx_module_is_main_jcvi__formats__cblast = 0; - -/* Implementation of "jcvi.formats.cblast" */ -/* #### Code section: global_var ### */ -static PyObject *__pyx_builtin_StopIteration; -static PyObject *__pyx_builtin_TypeError; -static PyObject *__pyx_builtin_id; -static PyObject *__pyx_builtin_OverflowError; -static PyObject *__pyx_builtin_enumerate; -static PyObject *__pyx_builtin_IndexError; -/* #### Code section: string_decls ### */ -static const char __pyx_k_s[] = "s"; -static const char __pyx_k__5[] = "\t"; -static const char __pyx_k__6[] = "*"; -static const char __pyx_k_gc[] = "gc"; -static const char __pyx_k_id[] = "id"; -static const char __pyx_k_qi[] = "qi"; -static const char __pyx_k_si[] = "si"; -static const char __pyx_k__13[] = "?"; -static const char __pyx_k_sys[] = "sys"; -static const char __pyx_k_args[] = "args"; -static const char __pyx_k_join[] = "join"; -static const char __pyx_k_main[] = "__main__"; -static const char __pyx_k_name[] = "__name__"; -static const char __pyx_k_self[] = "self"; -static const char __pyx_k_send[] = "send"; -static const char __pyx_k_spec[] = "__spec__"; -static const char __pyx_k_test[] = "__test__"; -static const char __pyx_k_wrap[] = "wrap"; -static const char __pyx_k_Blast[] = "Blast"; -static const char __pyx_k_UTF_8[] = "UTF-8"; -static const char __pyx_k_close[] = "close"; -static const char __pyx_k_ngaps[] = "ngaps"; -static const char __pyx_k_pctid[] = "pctid"; -static const char __pyx_k_qstop[] = "qstop"; -static const char __pyx_k_query[] = "query"; -static const char __pyx_k_score[] = "score"; -static const char __pyx_k_slots[] = "__slots__"; -static const char __pyx_k_sstop[] = "sstop"; -static const char __pyx_k_throw[] = "throw"; -static const char __pyx_k_enable[] = "enable"; -static const char __pyx_k_encode[] = "encode"; -static const char __pyx_k_evalue[] = "evalue"; -static const char __pyx_k_hitlen[] = "hitlen"; -static const char __pyx_k_import[] = "__import__"; -static const char __pyx_k_qseqid[] = "qseqid"; -static const char __pyx_k_qstart[] = "qstart"; -static const char __pyx_k_reduce[] = "__reduce__"; -static const char __pyx_k_sseqid[] = "sseqid"; -static const char __pyx_k_sstart[] = "sstart"; -static const char __pyx_k_Blast_s[] = "Blast('%s')"; -static const char __pyx_k_disable[] = "disable"; -static const char __pyx_k_genexpr[] = "genexpr"; -static const char __pyx_k_richcmp[] = "__richcmp__"; -static const char __pyx_k_subject[] = "subject"; -static const char __pyx_k_filename[] = "filename"; -static const char __pyx_k_getstate[] = "__getstate__"; -static const char __pyx_k_setstate[] = "__setstate__"; -static const char __pyx_k_BlastLine[] = "BlastLine"; -static const char __pyx_k_TypeError[] = "TypeError"; -static const char __pyx_k_enumerate[] = "enumerate"; -static const char __pyx_k_isenabled[] = "isenabled"; -static const char __pyx_k_nmismatch[] = "nmismatch"; -static const char __pyx_k_pyx_state[] = "__pyx_state"; -static const char __pyx_k_reduce_ex[] = "__reduce_ex__"; -static const char __pyx_k_IndexError[] = "IndexError"; -static const char __pyx_k_cblast_pyx[] = "cblast.pyx"; -static const char __pyx_k_cfunc_to_py[] = "cfunc.to_py"; -static const char __pyx_k_orientation[] = "orientation"; -static const char __pyx_k_initializing[] = "_initializing"; -static const char __pyx_k_is_coroutine[] = "_is_coroutine"; -static const char __pyx_k_stringsource[] = ""; -static const char __pyx_k_OverflowError[] = "OverflowError"; -static const char __pyx_k_StopIteration[] = "StopIteration"; -static const char __pyx_k_reduce_cython[] = "__reduce_cython__"; -static const char __pyx_k_setstate_cython[] = "__setstate_cython__"; -static const char __pyx_k_BlastLine___reduce[] = "BlastLine.__reduce__"; -static const char __pyx_k_asyncio_coroutines[] = "asyncio.coroutines"; -static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; -static const char __pyx_k_jcvi_formats_cblast[] = "jcvi.formats.cblast"; -static const char __pyx_k_Blast___reduce_cython[] = "Blast.__reduce_cython__"; -static const char __pyx_k_Blast___setstate_cython[] = "Blast.__setstate_cython__"; -static const char __pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma[] = "__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc..wrap"; -static const char __pyx_k_Cythonized_fast_version_of_Blas[] = "\nCythonized (fast) version of BlastLine\n\nStolen from brentp's biostuff (thanks):\n\n"; -static const char __pyx_k_that_comparison_not_implemented[] = "that comparison not implemented"; -static const char __pyx_k_BlastLine___get___locals_genexpr[] = "BlastLine.__get__..genexpr"; -static const char __pyx_k_BlastLine_s_to_s_eval_3f_score_1[] = "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)"; -static const char __pyx_k_no_default___reduce___due_to_non[] = "no default __reduce__ due to non-trivial __cinit__"; -/* #### Code section: decls ### */ -static PyObject *__pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(PyObject *__pyx_self, char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, char *__pyx_v_filename); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ -static void __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_s); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_other, size_t __pyx_v_op); /* proto */ -static Py_hash_t __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_genexpr_arg_0); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_Blast(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static __Pyx_CachedCFunction __pyx_umethod_PyString_Type_encode = {0, 0, 0, 0, 0}; -/* #### Code section: late_includes ### */ -/* #### Code section: module_state ### */ -typedef struct { - PyObject *__pyx_d; - PyObject *__pyx_b; - PyObject *__pyx_cython_runtime; - PyObject *__pyx_empty_tuple; - PyObject *__pyx_empty_bytes; - PyObject *__pyx_empty_unicode; - #ifdef __Pyx_CyFunction_USED - PyTypeObject *__pyx_CyFunctionType; - #endif - #ifdef __Pyx_FusedFunction_USED - PyTypeObject *__pyx_FusedFunctionType; - #endif - #ifdef __Pyx_Generator_USED - PyTypeObject *__pyx_GeneratorType; - #endif - #ifdef __Pyx_IterableCoroutine_USED - PyTypeObject *__pyx_IterableCoroutineType; - #endif - #ifdef __Pyx_Coroutine_USED - PyTypeObject *__pyx_CoroutineAwaitType; - #endif - #ifdef __Pyx_Coroutine_USED - PyTypeObject *__pyx_CoroutineType; - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - PyObject *__pyx_type_4jcvi_7formats_6cblast_Blast; - PyObject *__pyx_type_4jcvi_7formats_6cblast_BlastLine; - PyObject *__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; - PyObject *__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; - #endif - PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast_Blast; - PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast_BlastLine; - PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; - PyTypeObject *__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; - PyObject *__pyx_n_s_Blast; - PyObject *__pyx_n_s_BlastLine; - PyObject *__pyx_n_s_BlastLine___get___locals_genexpr; - PyObject *__pyx_n_s_BlastLine___reduce; - PyObject *__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1; - PyObject *__pyx_n_s_Blast___reduce_cython; - PyObject *__pyx_n_s_Blast___setstate_cython; - PyObject *__pyx_kp_s_Blast_s; - PyObject *__pyx_n_s_IndexError; - PyObject *__pyx_n_s_OverflowError; - PyObject *__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma; - PyObject *__pyx_n_s_StopIteration; - PyObject *__pyx_n_s_TypeError; - PyObject *__pyx_kp_s_UTF_8; - PyObject *__pyx_n_s__13; - PyObject *__pyx_kp_s__5; - PyObject *__pyx_n_s__6; - PyObject *__pyx_n_s_args; - PyObject *__pyx_n_s_asyncio_coroutines; - PyObject *__pyx_kp_s_cblast_pyx; - PyObject *__pyx_n_s_cfunc_to_py; - PyObject *__pyx_n_s_cline_in_traceback; - PyObject *__pyx_n_s_close; - PyObject *__pyx_kp_u_disable; - PyObject *__pyx_kp_u_enable; - PyObject *__pyx_n_s_encode; - PyObject *__pyx_n_s_enumerate; - PyObject *__pyx_n_s_evalue; - PyObject *__pyx_n_s_filename; - PyObject *__pyx_kp_u_gc; - PyObject *__pyx_n_s_genexpr; - PyObject *__pyx_n_s_getstate; - PyObject *__pyx_n_s_hitlen; - PyObject *__pyx_n_s_id; - PyObject *__pyx_n_s_import; - PyObject *__pyx_n_s_initializing; - PyObject *__pyx_n_s_is_coroutine; - PyObject *__pyx_kp_u_isenabled; - PyObject *__pyx_n_s_jcvi_formats_cblast; - PyObject *__pyx_n_s_join; - PyObject *__pyx_n_s_main; - PyObject *__pyx_n_s_name; - PyObject *__pyx_n_s_ngaps; - PyObject *__pyx_n_s_nmismatch; - PyObject *__pyx_kp_s_no_default___reduce___due_to_non; - PyObject *__pyx_n_s_orientation; - PyObject *__pyx_n_s_pctid; - PyObject *__pyx_n_s_pyx_state; - PyObject *__pyx_n_s_qi; - PyObject *__pyx_n_s_qseqid; - PyObject *__pyx_n_s_qstart; - PyObject *__pyx_n_s_qstop; - PyObject *__pyx_n_s_query; - PyObject *__pyx_n_s_reduce; - PyObject *__pyx_n_s_reduce_cython; - PyObject *__pyx_n_s_reduce_ex; - PyObject *__pyx_n_s_richcmp; - PyObject *__pyx_n_s_s; - PyObject *__pyx_n_s_score; - PyObject *__pyx_n_s_self; - PyObject *__pyx_n_s_send; - PyObject *__pyx_n_s_setstate; - PyObject *__pyx_n_s_setstate_cython; - PyObject *__pyx_n_s_si; - PyObject *__pyx_n_s_slots; - PyObject *__pyx_n_s_spec; - PyObject *__pyx_n_s_sseqid; - PyObject *__pyx_n_s_sstart; - PyObject *__pyx_n_s_sstop; - PyObject *__pyx_kp_s_stringsource; - PyObject *__pyx_n_s_subject; - PyObject *__pyx_n_s_sys; - PyObject *__pyx_n_s_test; - PyObject *__pyx_kp_s_that_comparison_not_implemented; - PyObject *__pyx_n_s_throw; - PyObject *__pyx_n_s_wrap; - PyObject *__pyx_int_2; - PyObject *__pyx_int_12; - PyObject *__pyx_tuple_; - PyObject *__pyx_slice__4; - PyObject *__pyx_tuple__3; - PyObject *__pyx_tuple__7; - PyObject *__pyx_tuple__9; - PyObject *__pyx_tuple__11; - PyObject *__pyx_codeobj__2; - PyObject *__pyx_codeobj__8; - PyObject *__pyx_codeobj__10; - PyObject *__pyx_codeobj__12; -} __pyx_mstate; - -#if CYTHON_USE_MODULE_STATE -#ifdef __cplusplus -namespace { - extern struct PyModuleDef __pyx_moduledef; -} /* anonymous namespace */ -#else -static struct PyModuleDef __pyx_moduledef; -#endif - -#define __pyx_mstate(o) ((__pyx_mstate *)__Pyx_PyModule_GetState(o)) - -#define __pyx_mstate_global (__pyx_mstate(PyState_FindModule(&__pyx_moduledef))) - -#define __pyx_m (PyState_FindModule(&__pyx_moduledef)) -#else -static __pyx_mstate __pyx_mstate_global_static = -#ifdef __cplusplus - {}; -#else - {0}; -#endif -static __pyx_mstate *__pyx_mstate_global = &__pyx_mstate_global_static; -#endif -/* #### Code section: module_state_clear ### */ -#if CYTHON_USE_MODULE_STATE -static int __pyx_m_clear(PyObject *m) { - __pyx_mstate *clear_module_state = __pyx_mstate(m); - if (!clear_module_state) return 0; - Py_CLEAR(clear_module_state->__pyx_d); - Py_CLEAR(clear_module_state->__pyx_b); - Py_CLEAR(clear_module_state->__pyx_cython_runtime); - Py_CLEAR(clear_module_state->__pyx_empty_tuple); - Py_CLEAR(clear_module_state->__pyx_empty_bytes); - Py_CLEAR(clear_module_state->__pyx_empty_unicode); - #ifdef __Pyx_CyFunction_USED - Py_CLEAR(clear_module_state->__pyx_CyFunctionType); - #endif - #ifdef __Pyx_FusedFunction_USED - Py_CLEAR(clear_module_state->__pyx_FusedFunctionType); - #endif - Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast_Blast); - Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast_Blast); - Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); - Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast_BlastLine); - Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); - Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); - Py_CLEAR(clear_module_state->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); - Py_CLEAR(clear_module_state->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); - Py_CLEAR(clear_module_state->__pyx_n_s_Blast); - Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine); - Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine___get___locals_genexpr); - Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine___reduce); - Py_CLEAR(clear_module_state->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1); - Py_CLEAR(clear_module_state->__pyx_n_s_Blast___reduce_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_Blast___setstate_cython); - Py_CLEAR(clear_module_state->__pyx_kp_s_Blast_s); - Py_CLEAR(clear_module_state->__pyx_n_s_IndexError); - Py_CLEAR(clear_module_state->__pyx_n_s_OverflowError); - Py_CLEAR(clear_module_state->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma); - Py_CLEAR(clear_module_state->__pyx_n_s_StopIteration); - Py_CLEAR(clear_module_state->__pyx_n_s_TypeError); - Py_CLEAR(clear_module_state->__pyx_kp_s_UTF_8); - Py_CLEAR(clear_module_state->__pyx_n_s__13); - Py_CLEAR(clear_module_state->__pyx_kp_s__5); - Py_CLEAR(clear_module_state->__pyx_n_s__6); - Py_CLEAR(clear_module_state->__pyx_n_s_args); - Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines); - Py_CLEAR(clear_module_state->__pyx_kp_s_cblast_pyx); - Py_CLEAR(clear_module_state->__pyx_n_s_cfunc_to_py); - Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback); - Py_CLEAR(clear_module_state->__pyx_n_s_close); - Py_CLEAR(clear_module_state->__pyx_kp_u_disable); - Py_CLEAR(clear_module_state->__pyx_kp_u_enable); - Py_CLEAR(clear_module_state->__pyx_n_s_encode); - Py_CLEAR(clear_module_state->__pyx_n_s_enumerate); - Py_CLEAR(clear_module_state->__pyx_n_s_evalue); - Py_CLEAR(clear_module_state->__pyx_n_s_filename); - Py_CLEAR(clear_module_state->__pyx_kp_u_gc); - Py_CLEAR(clear_module_state->__pyx_n_s_genexpr); - Py_CLEAR(clear_module_state->__pyx_n_s_getstate); - Py_CLEAR(clear_module_state->__pyx_n_s_hitlen); - Py_CLEAR(clear_module_state->__pyx_n_s_id); - Py_CLEAR(clear_module_state->__pyx_n_s_import); - Py_CLEAR(clear_module_state->__pyx_n_s_initializing); - Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine); - Py_CLEAR(clear_module_state->__pyx_kp_u_isenabled); - Py_CLEAR(clear_module_state->__pyx_n_s_jcvi_formats_cblast); - Py_CLEAR(clear_module_state->__pyx_n_s_join); - Py_CLEAR(clear_module_state->__pyx_n_s_main); - Py_CLEAR(clear_module_state->__pyx_n_s_name); - Py_CLEAR(clear_module_state->__pyx_n_s_ngaps); - Py_CLEAR(clear_module_state->__pyx_n_s_nmismatch); - Py_CLEAR(clear_module_state->__pyx_kp_s_no_default___reduce___due_to_non); - Py_CLEAR(clear_module_state->__pyx_n_s_orientation); - Py_CLEAR(clear_module_state->__pyx_n_s_pctid); - Py_CLEAR(clear_module_state->__pyx_n_s_pyx_state); - Py_CLEAR(clear_module_state->__pyx_n_s_qi); - Py_CLEAR(clear_module_state->__pyx_n_s_qseqid); - Py_CLEAR(clear_module_state->__pyx_n_s_qstart); - Py_CLEAR(clear_module_state->__pyx_n_s_qstop); - Py_CLEAR(clear_module_state->__pyx_n_s_query); - Py_CLEAR(clear_module_state->__pyx_n_s_reduce); - Py_CLEAR(clear_module_state->__pyx_n_s_reduce_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_reduce_ex); - Py_CLEAR(clear_module_state->__pyx_n_s_richcmp); - Py_CLEAR(clear_module_state->__pyx_n_s_s); - Py_CLEAR(clear_module_state->__pyx_n_s_score); - Py_CLEAR(clear_module_state->__pyx_n_s_self); - Py_CLEAR(clear_module_state->__pyx_n_s_send); - Py_CLEAR(clear_module_state->__pyx_n_s_setstate); - Py_CLEAR(clear_module_state->__pyx_n_s_setstate_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_si); - Py_CLEAR(clear_module_state->__pyx_n_s_slots); - Py_CLEAR(clear_module_state->__pyx_n_s_spec); - Py_CLEAR(clear_module_state->__pyx_n_s_sseqid); - Py_CLEAR(clear_module_state->__pyx_n_s_sstart); - Py_CLEAR(clear_module_state->__pyx_n_s_sstop); - Py_CLEAR(clear_module_state->__pyx_kp_s_stringsource); - Py_CLEAR(clear_module_state->__pyx_n_s_subject); - Py_CLEAR(clear_module_state->__pyx_n_s_sys); - Py_CLEAR(clear_module_state->__pyx_n_s_test); - Py_CLEAR(clear_module_state->__pyx_kp_s_that_comparison_not_implemented); - Py_CLEAR(clear_module_state->__pyx_n_s_throw); - Py_CLEAR(clear_module_state->__pyx_n_s_wrap); - Py_CLEAR(clear_module_state->__pyx_int_2); - Py_CLEAR(clear_module_state->__pyx_int_12); - Py_CLEAR(clear_module_state->__pyx_tuple_); - Py_CLEAR(clear_module_state->__pyx_slice__4); - Py_CLEAR(clear_module_state->__pyx_tuple__3); - Py_CLEAR(clear_module_state->__pyx_tuple__7); - Py_CLEAR(clear_module_state->__pyx_tuple__9); - Py_CLEAR(clear_module_state->__pyx_tuple__11); - Py_CLEAR(clear_module_state->__pyx_codeobj__2); - Py_CLEAR(clear_module_state->__pyx_codeobj__8); - Py_CLEAR(clear_module_state->__pyx_codeobj__10); - Py_CLEAR(clear_module_state->__pyx_codeobj__12); - return 0; -} -#endif -/* #### Code section: module_state_traverse ### */ -#if CYTHON_USE_MODULE_STATE -static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) { - __pyx_mstate *traverse_module_state = __pyx_mstate(m); - if (!traverse_module_state) return 0; - Py_VISIT(traverse_module_state->__pyx_d); - Py_VISIT(traverse_module_state->__pyx_b); - Py_VISIT(traverse_module_state->__pyx_cython_runtime); - Py_VISIT(traverse_module_state->__pyx_empty_tuple); - Py_VISIT(traverse_module_state->__pyx_empty_bytes); - Py_VISIT(traverse_module_state->__pyx_empty_unicode); - #ifdef __Pyx_CyFunction_USED - Py_VISIT(traverse_module_state->__pyx_CyFunctionType); - #endif - #ifdef __Pyx_FusedFunction_USED - Py_VISIT(traverse_module_state->__pyx_FusedFunctionType); - #endif - Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast_Blast); - Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast_Blast); - Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); - Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast_BlastLine); - Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); - Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); - Py_VISIT(traverse_module_state->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); - Py_VISIT(traverse_module_state->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); - Py_VISIT(traverse_module_state->__pyx_n_s_Blast); - Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine); - Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine___get___locals_genexpr); - Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine___reduce); - Py_VISIT(traverse_module_state->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1); - Py_VISIT(traverse_module_state->__pyx_n_s_Blast___reduce_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_Blast___setstate_cython); - Py_VISIT(traverse_module_state->__pyx_kp_s_Blast_s); - Py_VISIT(traverse_module_state->__pyx_n_s_IndexError); - Py_VISIT(traverse_module_state->__pyx_n_s_OverflowError); - Py_VISIT(traverse_module_state->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma); - Py_VISIT(traverse_module_state->__pyx_n_s_StopIteration); - Py_VISIT(traverse_module_state->__pyx_n_s_TypeError); - Py_VISIT(traverse_module_state->__pyx_kp_s_UTF_8); - Py_VISIT(traverse_module_state->__pyx_n_s__13); - Py_VISIT(traverse_module_state->__pyx_kp_s__5); - Py_VISIT(traverse_module_state->__pyx_n_s__6); - Py_VISIT(traverse_module_state->__pyx_n_s_args); - Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines); - Py_VISIT(traverse_module_state->__pyx_kp_s_cblast_pyx); - Py_VISIT(traverse_module_state->__pyx_n_s_cfunc_to_py); - Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback); - Py_VISIT(traverse_module_state->__pyx_n_s_close); - Py_VISIT(traverse_module_state->__pyx_kp_u_disable); - Py_VISIT(traverse_module_state->__pyx_kp_u_enable); - Py_VISIT(traverse_module_state->__pyx_n_s_encode); - Py_VISIT(traverse_module_state->__pyx_n_s_enumerate); - Py_VISIT(traverse_module_state->__pyx_n_s_evalue); - Py_VISIT(traverse_module_state->__pyx_n_s_filename); - Py_VISIT(traverse_module_state->__pyx_kp_u_gc); - Py_VISIT(traverse_module_state->__pyx_n_s_genexpr); - Py_VISIT(traverse_module_state->__pyx_n_s_getstate); - Py_VISIT(traverse_module_state->__pyx_n_s_hitlen); - Py_VISIT(traverse_module_state->__pyx_n_s_id); - Py_VISIT(traverse_module_state->__pyx_n_s_import); - Py_VISIT(traverse_module_state->__pyx_n_s_initializing); - Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine); - Py_VISIT(traverse_module_state->__pyx_kp_u_isenabled); - Py_VISIT(traverse_module_state->__pyx_n_s_jcvi_formats_cblast); - Py_VISIT(traverse_module_state->__pyx_n_s_join); - Py_VISIT(traverse_module_state->__pyx_n_s_main); - Py_VISIT(traverse_module_state->__pyx_n_s_name); - Py_VISIT(traverse_module_state->__pyx_n_s_ngaps); - Py_VISIT(traverse_module_state->__pyx_n_s_nmismatch); - Py_VISIT(traverse_module_state->__pyx_kp_s_no_default___reduce___due_to_non); - Py_VISIT(traverse_module_state->__pyx_n_s_orientation); - Py_VISIT(traverse_module_state->__pyx_n_s_pctid); - Py_VISIT(traverse_module_state->__pyx_n_s_pyx_state); - Py_VISIT(traverse_module_state->__pyx_n_s_qi); - Py_VISIT(traverse_module_state->__pyx_n_s_qseqid); - Py_VISIT(traverse_module_state->__pyx_n_s_qstart); - Py_VISIT(traverse_module_state->__pyx_n_s_qstop); - Py_VISIT(traverse_module_state->__pyx_n_s_query); - Py_VISIT(traverse_module_state->__pyx_n_s_reduce); - Py_VISIT(traverse_module_state->__pyx_n_s_reduce_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_reduce_ex); - Py_VISIT(traverse_module_state->__pyx_n_s_richcmp); - Py_VISIT(traverse_module_state->__pyx_n_s_s); - Py_VISIT(traverse_module_state->__pyx_n_s_score); - Py_VISIT(traverse_module_state->__pyx_n_s_self); - Py_VISIT(traverse_module_state->__pyx_n_s_send); - Py_VISIT(traverse_module_state->__pyx_n_s_setstate); - Py_VISIT(traverse_module_state->__pyx_n_s_setstate_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_si); - Py_VISIT(traverse_module_state->__pyx_n_s_slots); - Py_VISIT(traverse_module_state->__pyx_n_s_spec); - Py_VISIT(traverse_module_state->__pyx_n_s_sseqid); - Py_VISIT(traverse_module_state->__pyx_n_s_sstart); - Py_VISIT(traverse_module_state->__pyx_n_s_sstop); - Py_VISIT(traverse_module_state->__pyx_kp_s_stringsource); - Py_VISIT(traverse_module_state->__pyx_n_s_subject); - Py_VISIT(traverse_module_state->__pyx_n_s_sys); - Py_VISIT(traverse_module_state->__pyx_n_s_test); - Py_VISIT(traverse_module_state->__pyx_kp_s_that_comparison_not_implemented); - Py_VISIT(traverse_module_state->__pyx_n_s_throw); - Py_VISIT(traverse_module_state->__pyx_n_s_wrap); - Py_VISIT(traverse_module_state->__pyx_int_2); - Py_VISIT(traverse_module_state->__pyx_int_12); - Py_VISIT(traverse_module_state->__pyx_tuple_); - Py_VISIT(traverse_module_state->__pyx_slice__4); - Py_VISIT(traverse_module_state->__pyx_tuple__3); - Py_VISIT(traverse_module_state->__pyx_tuple__7); - Py_VISIT(traverse_module_state->__pyx_tuple__9); - Py_VISIT(traverse_module_state->__pyx_tuple__11); - Py_VISIT(traverse_module_state->__pyx_codeobj__2); - Py_VISIT(traverse_module_state->__pyx_codeobj__8); - Py_VISIT(traverse_module_state->__pyx_codeobj__10); - Py_VISIT(traverse_module_state->__pyx_codeobj__12); - return 0; -} -#endif -/* #### Code section: module_state_defines ### */ -#define __pyx_d __pyx_mstate_global->__pyx_d -#define __pyx_b __pyx_mstate_global->__pyx_b -#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime -#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple -#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes -#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode -#ifdef __Pyx_CyFunction_USED -#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType -#endif -#ifdef __Pyx_FusedFunction_USED -#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType -#endif -#ifdef __Pyx_Generator_USED -#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType -#endif -#ifdef __Pyx_IterableCoroutine_USED -#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType -#endif -#ifdef __Pyx_Coroutine_USED -#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType -#endif -#ifdef __Pyx_Coroutine_USED -#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#define __pyx_type_4jcvi_7formats_6cblast_Blast __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast_Blast -#define __pyx_type_4jcvi_7formats_6cblast_BlastLine __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast_BlastLine -#define __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr -#define __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc __pyx_mstate_global->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc -#endif -#define __pyx_ptype_4jcvi_7formats_6cblast_Blast __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast_Blast -#define __pyx_ptype_4jcvi_7formats_6cblast_BlastLine __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine -#define __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr -#define __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc __pyx_mstate_global->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc -#define __pyx_n_s_Blast __pyx_mstate_global->__pyx_n_s_Blast -#define __pyx_n_s_BlastLine __pyx_mstate_global->__pyx_n_s_BlastLine -#define __pyx_n_s_BlastLine___get___locals_genexpr __pyx_mstate_global->__pyx_n_s_BlastLine___get___locals_genexpr -#define __pyx_n_s_BlastLine___reduce __pyx_mstate_global->__pyx_n_s_BlastLine___reduce -#define __pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1 __pyx_mstate_global->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1 -#define __pyx_n_s_Blast___reduce_cython __pyx_mstate_global->__pyx_n_s_Blast___reduce_cython -#define __pyx_n_s_Blast___setstate_cython __pyx_mstate_global->__pyx_n_s_Blast___setstate_cython -#define __pyx_kp_s_Blast_s __pyx_mstate_global->__pyx_kp_s_Blast_s -#define __pyx_n_s_IndexError __pyx_mstate_global->__pyx_n_s_IndexError -#define __pyx_n_s_OverflowError __pyx_mstate_global->__pyx_n_s_OverflowError -#define __pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma __pyx_mstate_global->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma -#define __pyx_n_s_StopIteration __pyx_mstate_global->__pyx_n_s_StopIteration -#define __pyx_n_s_TypeError __pyx_mstate_global->__pyx_n_s_TypeError -#define __pyx_kp_s_UTF_8 __pyx_mstate_global->__pyx_kp_s_UTF_8 -#define __pyx_n_s__13 __pyx_mstate_global->__pyx_n_s__13 -#define __pyx_kp_s__5 __pyx_mstate_global->__pyx_kp_s__5 -#define __pyx_n_s__6 __pyx_mstate_global->__pyx_n_s__6 -#define __pyx_n_s_args __pyx_mstate_global->__pyx_n_s_args -#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines -#define __pyx_kp_s_cblast_pyx __pyx_mstate_global->__pyx_kp_s_cblast_pyx -#define __pyx_n_s_cfunc_to_py __pyx_mstate_global->__pyx_n_s_cfunc_to_py -#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback -#define __pyx_n_s_close __pyx_mstate_global->__pyx_n_s_close -#define __pyx_kp_u_disable __pyx_mstate_global->__pyx_kp_u_disable -#define __pyx_kp_u_enable __pyx_mstate_global->__pyx_kp_u_enable -#define __pyx_n_s_encode __pyx_mstate_global->__pyx_n_s_encode -#define __pyx_n_s_enumerate __pyx_mstate_global->__pyx_n_s_enumerate -#define __pyx_n_s_evalue __pyx_mstate_global->__pyx_n_s_evalue -#define __pyx_n_s_filename __pyx_mstate_global->__pyx_n_s_filename -#define __pyx_kp_u_gc __pyx_mstate_global->__pyx_kp_u_gc -#define __pyx_n_s_genexpr __pyx_mstate_global->__pyx_n_s_genexpr -#define __pyx_n_s_getstate __pyx_mstate_global->__pyx_n_s_getstate -#define __pyx_n_s_hitlen __pyx_mstate_global->__pyx_n_s_hitlen -#define __pyx_n_s_id __pyx_mstate_global->__pyx_n_s_id -#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import -#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing -#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine -#define __pyx_kp_u_isenabled __pyx_mstate_global->__pyx_kp_u_isenabled -#define __pyx_n_s_jcvi_formats_cblast __pyx_mstate_global->__pyx_n_s_jcvi_formats_cblast -#define __pyx_n_s_join __pyx_mstate_global->__pyx_n_s_join -#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main -#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name -#define __pyx_n_s_ngaps __pyx_mstate_global->__pyx_n_s_ngaps -#define __pyx_n_s_nmismatch __pyx_mstate_global->__pyx_n_s_nmismatch -#define __pyx_kp_s_no_default___reduce___due_to_non __pyx_mstate_global->__pyx_kp_s_no_default___reduce___due_to_non -#define __pyx_n_s_orientation __pyx_mstate_global->__pyx_n_s_orientation -#define __pyx_n_s_pctid __pyx_mstate_global->__pyx_n_s_pctid -#define __pyx_n_s_pyx_state __pyx_mstate_global->__pyx_n_s_pyx_state -#define __pyx_n_s_qi __pyx_mstate_global->__pyx_n_s_qi -#define __pyx_n_s_qseqid __pyx_mstate_global->__pyx_n_s_qseqid -#define __pyx_n_s_qstart __pyx_mstate_global->__pyx_n_s_qstart -#define __pyx_n_s_qstop __pyx_mstate_global->__pyx_n_s_qstop -#define __pyx_n_s_query __pyx_mstate_global->__pyx_n_s_query -#define __pyx_n_s_reduce __pyx_mstate_global->__pyx_n_s_reduce -#define __pyx_n_s_reduce_cython __pyx_mstate_global->__pyx_n_s_reduce_cython -#define __pyx_n_s_reduce_ex __pyx_mstate_global->__pyx_n_s_reduce_ex -#define __pyx_n_s_richcmp __pyx_mstate_global->__pyx_n_s_richcmp -#define __pyx_n_s_s __pyx_mstate_global->__pyx_n_s_s -#define __pyx_n_s_score __pyx_mstate_global->__pyx_n_s_score -#define __pyx_n_s_self __pyx_mstate_global->__pyx_n_s_self -#define __pyx_n_s_send __pyx_mstate_global->__pyx_n_s_send -#define __pyx_n_s_setstate __pyx_mstate_global->__pyx_n_s_setstate -#define __pyx_n_s_setstate_cython __pyx_mstate_global->__pyx_n_s_setstate_cython -#define __pyx_n_s_si __pyx_mstate_global->__pyx_n_s_si -#define __pyx_n_s_slots __pyx_mstate_global->__pyx_n_s_slots -#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec -#define __pyx_n_s_sseqid __pyx_mstate_global->__pyx_n_s_sseqid -#define __pyx_n_s_sstart __pyx_mstate_global->__pyx_n_s_sstart -#define __pyx_n_s_sstop __pyx_mstate_global->__pyx_n_s_sstop -#define __pyx_kp_s_stringsource __pyx_mstate_global->__pyx_kp_s_stringsource -#define __pyx_n_s_subject __pyx_mstate_global->__pyx_n_s_subject -#define __pyx_n_s_sys __pyx_mstate_global->__pyx_n_s_sys -#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test -#define __pyx_kp_s_that_comparison_not_implemented __pyx_mstate_global->__pyx_kp_s_that_comparison_not_implemented -#define __pyx_n_s_throw __pyx_mstate_global->__pyx_n_s_throw -#define __pyx_n_s_wrap __pyx_mstate_global->__pyx_n_s_wrap -#define __pyx_int_2 __pyx_mstate_global->__pyx_int_2 -#define __pyx_int_12 __pyx_mstate_global->__pyx_int_12 -#define __pyx_tuple_ __pyx_mstate_global->__pyx_tuple_ -#define __pyx_slice__4 __pyx_mstate_global->__pyx_slice__4 -#define __pyx_tuple__3 __pyx_mstate_global->__pyx_tuple__3 -#define __pyx_tuple__7 __pyx_mstate_global->__pyx_tuple__7 -#define __pyx_tuple__9 __pyx_mstate_global->__pyx_tuple__9 -#define __pyx_tuple__11 __pyx_mstate_global->__pyx_tuple__11 -#define __pyx_codeobj__2 __pyx_mstate_global->__pyx_codeobj__2 -#define __pyx_codeobj__8 __pyx_mstate_global->__pyx_codeobj__8 -#define __pyx_codeobj__10 __pyx_mstate_global->__pyx_codeobj__10 -#define __pyx_codeobj__12 __pyx_mstate_global->__pyx_codeobj__12 -/* #### Code section: module_code ### */ - -/* "cfunc.to_py":67 - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap, "wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'"); -static PyMethodDef __pyx_mdef_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap = {"wrap", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap}; -static PyObject *__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - char *__pyx_v_query; - char *__pyx_v_subject; - float __pyx_v_pctid; - int __pyx_v_hitlen; - int __pyx_v_nmismatch; - int __pyx_v_ngaps; - int __pyx_v_qstart; - int __pyx_v_qstop; - int __pyx_v_sstart; - int __pyx_v_sstop; - float __pyx_v_evalue; - float __pyx_v_score; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[12] = {0,0,0,0,0,0,0,0,0,0,0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("wrap (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_query,&__pyx_n_s_subject,&__pyx_n_s_pctid,&__pyx_n_s_hitlen,&__pyx_n_s_nmismatch,&__pyx_n_s_ngaps,&__pyx_n_s_qstart,&__pyx_n_s_qstop,&__pyx_n_s_sstart,&__pyx_n_s_sstop,&__pyx_n_s_evalue,&__pyx_n_s_score,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 12: values[11] = __Pyx_Arg_FASTCALL(__pyx_args, 11); - CYTHON_FALLTHROUGH; - case 11: values[10] = __Pyx_Arg_FASTCALL(__pyx_args, 10); - CYTHON_FALLTHROUGH; - case 10: values[9] = __Pyx_Arg_FASTCALL(__pyx_args, 9); - CYTHON_FALLTHROUGH; - case 9: values[8] = __Pyx_Arg_FASTCALL(__pyx_args, 8); - CYTHON_FALLTHROUGH; - case 8: values[7] = __Pyx_Arg_FASTCALL(__pyx_args, 7); - CYTHON_FALLTHROUGH; - case 7: values[6] = __Pyx_Arg_FASTCALL(__pyx_args, 6); - CYTHON_FALLTHROUGH; - case 6: values[5] = __Pyx_Arg_FASTCALL(__pyx_args, 5); - CYTHON_FALLTHROUGH; - case 5: values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4); - CYTHON_FALLTHROUGH; - case 4: values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3); - CYTHON_FALLTHROUGH; - case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_query)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_subject)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[1]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 1); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 2: - if (likely((values[2] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pctid)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[2]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 2); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 3: - if (likely((values[3] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_hitlen)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[3]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 3); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 4: - if (likely((values[4] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_nmismatch)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[4]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 4); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 5: - if (likely((values[5] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_ngaps)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[5]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 5); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 6: - if (likely((values[6] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_qstart)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[6]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 6); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 7: - if (likely((values[7] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_qstop)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[7]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 7); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 8: - if (likely((values[8] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_sstart)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[8]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 8); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 9: - if (likely((values[9] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_sstop)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[9]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 9); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 10: - if (likely((values[10] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_evalue)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[10]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 10); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 11: - if (likely((values[11] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_score)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[11]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 11); __PYX_ERR(1, 67, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "wrap") < 0)) __PYX_ERR(1, 67, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 12)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3); - values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4); - values[5] = __Pyx_Arg_FASTCALL(__pyx_args, 5); - values[6] = __Pyx_Arg_FASTCALL(__pyx_args, 6); - values[7] = __Pyx_Arg_FASTCALL(__pyx_args, 7); - values[8] = __Pyx_Arg_FASTCALL(__pyx_args, 8); - values[9] = __Pyx_Arg_FASTCALL(__pyx_args, 9); - values[10] = __Pyx_Arg_FASTCALL(__pyx_args, 10); - values[11] = __Pyx_Arg_FASTCALL(__pyx_args, 11); - } - __pyx_v_query = __Pyx_PyObject_AsWritableString(values[0]); if (unlikely((!__pyx_v_query) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_subject = __Pyx_PyObject_AsWritableString(values[1]); if (unlikely((!__pyx_v_subject) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_pctid = __pyx_PyFloat_AsFloat(values[2]); if (unlikely((__pyx_v_pctid == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_hitlen = __Pyx_PyInt_As_int(values[3]); if (unlikely((__pyx_v_hitlen == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_nmismatch = __Pyx_PyInt_As_int(values[4]); if (unlikely((__pyx_v_nmismatch == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_ngaps = __Pyx_PyInt_As_int(values[5]); if (unlikely((__pyx_v_ngaps == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_qstart = __Pyx_PyInt_As_int(values[6]); if (unlikely((__pyx_v_qstart == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_qstop = __Pyx_PyInt_As_int(values[7]); if (unlikely((__pyx_v_qstop == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_sstart = __Pyx_PyInt_As_int(values[8]); if (unlikely((__pyx_v_sstart == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_sstop = __Pyx_PyInt_As_int(values[9]); if (unlikely((__pyx_v_sstop == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_evalue = __pyx_PyFloat_AsFloat(values[10]); if (unlikely((__pyx_v_evalue == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_score = __pyx_PyFloat_AsFloat(values[11]); if (unlikely((__pyx_v_score == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, __pyx_nargs); __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc.wrap", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(__pyx_self, __pyx_v_query, __pyx_v_subject, __pyx_v_pctid, __pyx_v_hitlen, __pyx_v_nmismatch, __pyx_v_ngaps, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_score); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(PyObject *__pyx_self, char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score) { - struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_cur_scope; - struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_outer_scope; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("wrap", 1); - __pyx_outer_scope = (struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *) __Pyx_CyFunction_GetClosure(__pyx_self); - __pyx_cur_scope = __pyx_outer_scope; - - /* "cfunc.to_py":69 - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) # <<<<<<<<<<<<<< - * return wrap - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = ((PyObject *)__pyx_cur_scope->__pyx_v_f(__pyx_v_query, __pyx_v_subject, __pyx_v_pctid, __pyx_v_hitlen, __pyx_v_nmismatch, __pyx_v_ngaps, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_score)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 69, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "cfunc.to_py":67 - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc.wrap", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "cfunc.to_py":66 - * - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - */ - -static PyObject *__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*__pyx_v_f)(char *, char *, float, int, int, int, int, int, int, int, float, float)) { - struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_cur_scope; - PyObject *__pyx_v_wrap = 0; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", 0); - __pyx_cur_scope = (struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, __pyx_empty_tuple, NULL); - if (unlikely(!__pyx_cur_scope)) { - __pyx_cur_scope = ((struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)Py_None); - __Pyx_INCREF(Py_None); - __PYX_ERR(1, 66, __pyx_L1_error) - } else { - __Pyx_GOTREF((PyObject *)__pyx_cur_scope); - } - __pyx_cur_scope->__pyx_v_f = __pyx_v_f; - - /* "cfunc.to_py":67 - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) - */ - __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap, 0, __pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma, ((PyObject*)__pyx_cur_scope), __pyx_n_s_cfunc_to_py, __pyx_d, ((PyObject *)__pyx_codeobj__2)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 67, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_wrap = __pyx_t_1; - __pyx_t_1 = 0; - - /* "cfunc.to_py":70 - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) - * return wrap # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_wrap); - __pyx_r = __pyx_v_wrap; - goto __pyx_L0; - - /* "cfunc.to_py":66 - * - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_wrap); - __Pyx_DECREF((PyObject *)__pyx_cur_scope); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "carray.from_py":79 - * - * @cname("__Pyx_carray_from_py_char") - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: # <<<<<<<<<<<<<< - * cdef Py_ssize_t i = length - * try: - */ - -static int __Pyx_carray_from_py_char(PyObject *__pyx_v_o, char *__pyx_v_v, Py_ssize_t __pyx_v_length) { - Py_ssize_t __pyx_v_i; - PyObject *__pyx_v_item = NULL; - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - Py_ssize_t __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - PyObject *__pyx_t_7 = NULL; - Py_ssize_t __pyx_t_8; - PyObject *(*__pyx_t_9)(PyObject *); - PyObject *__pyx_t_10 = NULL; - char __pyx_t_11; - char const *__pyx_t_12; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__Pyx_carray_from_py_char", 1); - - /* "carray.from_py":80 - * @cname("__Pyx_carray_from_py_char") - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: - * cdef Py_ssize_t i = length # <<<<<<<<<<<<<< - * try: - * i = len(o) - */ - __pyx_v_i = __pyx_v_length; - - /* "carray.from_py":81 - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: - * cdef Py_ssize_t i = length - * try: # <<<<<<<<<<<<<< - * i = len(o) - * except (TypeError, OverflowError): - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_3); - /*try:*/ { - - /* "carray.from_py":82 - * cdef Py_ssize_t i = length - * try: - * i = len(o) # <<<<<<<<<<<<<< - * except (TypeError, OverflowError): - * pass - */ - __pyx_t_4 = PyObject_Length(__pyx_v_o); if (unlikely(__pyx_t_4 == ((Py_ssize_t)-1))) __PYX_ERR(1, 82, __pyx_L3_error) - __pyx_v_i = __pyx_t_4; - - /* "carray.from_py":81 - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: - * cdef Py_ssize_t i = length - * try: # <<<<<<<<<<<<<< - * i = len(o) - * except (TypeError, OverflowError): - */ - } - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L8_try_end; - __pyx_L3_error:; - - /* "carray.from_py":83 - * try: - * i = len(o) - * except (TypeError, OverflowError): # <<<<<<<<<<<<<< - * pass - * if i == length: - */ - __pyx_t_5 = __Pyx_PyErr_ExceptionMatches2(__pyx_builtin_TypeError, __pyx_builtin_OverflowError); - if (__pyx_t_5) { - __Pyx_ErrRestore(0,0,0); - goto __pyx_L4_exception_handled; - } - goto __pyx_L5_except_error; - - /* "carray.from_py":81 - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: - * cdef Py_ssize_t i = length - * try: # <<<<<<<<<<<<<< - * i = len(o) - * except (TypeError, OverflowError): - */ - __pyx_L5_except_error:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L1_error; - __pyx_L4_exception_handled:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - __pyx_L8_try_end:; - } - - /* "carray.from_py":85 - * except (TypeError, OverflowError): - * pass - * if i == length: # <<<<<<<<<<<<<< - * for i, item in enumerate(o): - * if i >= length: - */ - __pyx_t_6 = (__pyx_v_i == __pyx_v_length); - if (__pyx_t_6) { - - /* "carray.from_py":86 - * pass - * if i == length: - * for i, item in enumerate(o): # <<<<<<<<<<<<<< - * if i >= length: - * break - */ - __pyx_t_4 = 0; - if (likely(PyList_CheckExact(__pyx_v_o)) || PyTuple_CheckExact(__pyx_v_o)) { - __pyx_t_7 = __pyx_v_o; __Pyx_INCREF(__pyx_t_7); - __pyx_t_8 = 0; - __pyx_t_9 = NULL; - } else { - __pyx_t_8 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_v_o); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 86, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_9 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 86, __pyx_L1_error) - } - for (;;) { - if (likely(!__pyx_t_9)) { - if (likely(PyList_CheckExact(__pyx_t_7))) { - { - Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_7); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(1, 86, __pyx_L1_error) - #endif - if (__pyx_t_8 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_10 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_8); __Pyx_INCREF(__pyx_t_10); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(1, 86, __pyx_L1_error) - #else - __pyx_t_10 = __Pyx_PySequence_ITEM(__pyx_t_7, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 86, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_10); - #endif - } else { - { - Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_7); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(1, 86, __pyx_L1_error) - #endif - if (__pyx_t_8 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_8); __Pyx_INCREF(__pyx_t_10); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(1, 86, __pyx_L1_error) - #else - __pyx_t_10 = __Pyx_PySequence_ITEM(__pyx_t_7, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 86, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_10); - #endif - } - } else { - __pyx_t_10 = __pyx_t_9(__pyx_t_7); - if (unlikely(!__pyx_t_10)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(1, 86, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_10); - } - __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_10); - __pyx_t_10 = 0; - __pyx_v_i = __pyx_t_4; - __pyx_t_4 = (__pyx_t_4 + 1); - - /* "carray.from_py":87 - * if i == length: - * for i, item in enumerate(o): - * if i >= length: # <<<<<<<<<<<<<< - * break - * v[i] = item - */ - __pyx_t_6 = (__pyx_v_i >= __pyx_v_length); - if (__pyx_t_6) { - - /* "carray.from_py":88 - * for i, item in enumerate(o): - * if i >= length: - * break # <<<<<<<<<<<<<< - * v[i] = item - * else: - */ - goto __pyx_L11_break; - - /* "carray.from_py":87 - * if i == length: - * for i, item in enumerate(o): - * if i >= length: # <<<<<<<<<<<<<< - * break - * v[i] = item - */ - } - - /* "carray.from_py":89 - * if i >= length: - * break - * v[i] = item # <<<<<<<<<<<<<< - * else: - * i += 1 # convert index to length - */ - __pyx_t_11 = __Pyx_PyInt_As_char(__pyx_v_item); if (unlikely((__pyx_t_11 == (char)-1) && PyErr_Occurred())) __PYX_ERR(1, 89, __pyx_L1_error) - (__pyx_v_v[__pyx_v_i]) = __pyx_t_11; - - /* "carray.from_py":86 - * pass - * if i == length: - * for i, item in enumerate(o): # <<<<<<<<<<<<<< - * if i >= length: - * break - */ - } - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - goto __pyx_L13_for_else; - __pyx_L11_break:; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - goto __pyx_L14_for_end; - /*else*/ { - __pyx_L13_for_else:; - - /* "carray.from_py":91 - * v[i] = item - * else: - * i += 1 # convert index to length # <<<<<<<<<<<<<< - * if i == length: - * return 0 - */ - __pyx_v_i = (__pyx_v_i + 1); - - /* "carray.from_py":92 - * else: - * i += 1 # convert index to length - * if i == length: # <<<<<<<<<<<<<< - * return 0 - * - */ - __pyx_t_6 = (__pyx_v_i == __pyx_v_length); - if (__pyx_t_6) { - - /* "carray.from_py":93 - * i += 1 # convert index to length - * if i == length: - * return 0 # <<<<<<<<<<<<<< - * - * PyErr_Format( - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "carray.from_py":92 - * else: - * i += 1 # convert index to length - * if i == length: # <<<<<<<<<<<<<< - * return 0 - * - */ - } - } - __pyx_L14_for_end:; - - /* "carray.from_py":85 - * except (TypeError, OverflowError): - * pass - * if i == length: # <<<<<<<<<<<<<< - * for i, item in enumerate(o): - * if i >= length: - */ - } - - /* "carray.from_py":98 - * IndexError, - * ("too many values found during array assignment, expected %zd" - * if i >= length else # <<<<<<<<<<<<<< - * "not enough values found during array assignment, expected %zd, got %zd"), - * length, i) - */ - __pyx_t_6 = (__pyx_v_i >= __pyx_v_length); - if (__pyx_t_6) { - __pyx_t_12 = ((char const *)"too many values found during array assignment, expected %zd"); - } else { - __pyx_t_12 = ((char const *)"not enough values found during array assignment, expected %zd, got %zd"); - } - - /* "carray.from_py":95 - * return 0 - * - * PyErr_Format( # <<<<<<<<<<<<<< - * IndexError, - * ("too many values found during array assignment, expected %zd" - */ - __pyx_t_7 = PyErr_Format(__pyx_builtin_IndexError, __pyx_t_12, __pyx_v_length, __pyx_v_i); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 95, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "carray.from_py":79 - * - * @cname("__Pyx_carray_from_py_char") - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: # <<<<<<<<<<<<<< - * cdef Py_ssize_t i = length - * try: - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_10); - __Pyx_AddTraceback("carray.from_py.__Pyx_carray_from_py_char", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_item); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":26 - * object filename - * - * def __cinit__(self, char* filename): # <<<<<<<<<<<<<< - * self.fh = fopen(filename, 'r') - * self.filename = filename - */ - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - char *__pyx_v_filename; - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1; - #endif - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_filename,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_filename)) != 0)) { - (void)__Pyx_Arg_NewRef_VARARGS(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 26, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__cinit__") < 0)) __PYX_ERR(0, 26, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - } - __pyx_v_filename = __Pyx_PyObject_AsWritableString(values[0]); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) __PYX_ERR(0, 26, __pyx_L3_error) - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 26, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return -1; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self), __pyx_v_filename); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, char *__pyx_v_filename) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__cinit__", 1); - - /* "jcvi/formats/cblast.pyx":27 - * - * def __cinit__(self, char* filename): - * self.fh = fopen(filename, 'r') # <<<<<<<<<<<<<< - * self.filename = filename - * - */ - __pyx_v_self->fh = fopen(__pyx_v_filename, ((char const *)"r")); - - /* "jcvi/formats/cblast.pyx":28 - * def __cinit__(self, char* filename): - * self.fh = fopen(filename, 'r') - * self.filename = filename # <<<<<<<<<<<<<< - * - * def __iter__(self): - */ - __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 28, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __Pyx_GOTREF(__pyx_v_self->filename); - __Pyx_DECREF(__pyx_v_self->filename); - __pyx_v_self->filename = __pyx_t_1; - __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":26 - * object filename - * - * def __cinit__(self, char* filename): # <<<<<<<<<<<<<< - * self.fh = fopen(filename, 'r') - * self.filename = filename - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":30 - * self.filename = filename - * - * def __iter__(self): # <<<<<<<<<<<<<< - * rewind(self.fh) - * return self - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__iter__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__iter__", 1); - - /* "jcvi/formats/cblast.pyx":31 - * - * def __iter__(self): - * rewind(self.fh) # <<<<<<<<<<<<<< - * return self - * - */ - rewind(__pyx_v_self->fh); - - /* "jcvi/formats/cblast.pyx":32 - * def __iter__(self): - * rewind(self.fh) - * return self # <<<<<<<<<<<<<< - * - * def __next__(self): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_self); - __pyx_r = ((PyObject *)__pyx_v_self); - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":30 - * self.filename = filename - * - * def __iter__(self): # <<<<<<<<<<<<<< - * rewind(self.fh) - * return self - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":34 - * return self - * - * def __next__(self): # <<<<<<<<<<<<<< - * cdef: - * float pct = 0.0, evalue = 0.0, bit = 0.0 - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__next__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { - float __pyx_v_pct; - float __pyx_v_evalue; - float __pyx_v_bit; - char __pyx_v_qname[0x80]; - char __pyx_v_sname[0x80]; - int __pyx_v_hlen; - int __pyx_v_nmiss; - int __pyx_v_ngap; - int __pyx_v_qstart; - int __pyx_v_qstop; - int __pyx_v_sstart; - int __pyx_v_sstop; - int __pyx_v_success; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_error_without_exception = 0; /* StopIteration */ - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__next__", 1); - - /* "jcvi/formats/cblast.pyx":36 - * def __next__(self): - * cdef: - * float pct = 0.0, evalue = 0.0, bit = 0.0 # <<<<<<<<<<<<<< - * char qname[128] - * char sname[128] - */ - __pyx_v_pct = 0.0; - __pyx_v_evalue = 0.0; - __pyx_v_bit = 0.0; - - /* "jcvi/formats/cblast.pyx":43 - * int success - * - * success = fscanf(self.fh, blast_format_line, qname, sname, \ # <<<<<<<<<<<<<< - * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ - * &sstart, &sstop, &evalue, &bit ) - */ - __pyx_v_success = fscanf(__pyx_v_self->fh, __pyx_v_4jcvi_7formats_6cblast_blast_format_line, __pyx_v_qname, __pyx_v_sname, (&__pyx_v_pct), (&__pyx_v_hlen), (&__pyx_v_nmiss), (&__pyx_v_ngap), (&__pyx_v_qstart), (&__pyx_v_qstop), (&__pyx_v_sstart), (&__pyx_v_sstop), (&__pyx_v_evalue), (&__pyx_v_bit)); - - /* "jcvi/formats/cblast.pyx":46 - * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ - * &sstart, &sstop, &evalue, &bit ) - * if success == EOF: # <<<<<<<<<<<<<< - * raise StopIteration - * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, - */ - __pyx_t_1 = (__pyx_v_success == EOF); - if (unlikely(__pyx_t_1)) { - - /* "jcvi/formats/cblast.pyx":47 - * &sstart, &sstop, &evalue, &bit ) - * if success == EOF: - * raise StopIteration # <<<<<<<<<<<<<< - * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, - * qstart, qstop, sstart, sstop, evalue, bit) - */ - __pyx_error_without_exception = 1; - goto __pyx_L1_error;; - - /* "jcvi/formats/cblast.pyx":46 - * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ - * &sstart, &sstop, &evalue, &bit ) - * if success == EOF: # <<<<<<<<<<<<<< - * raise StopIteration - * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, - */ - } - - /* "jcvi/formats/cblast.pyx":48 - * if success == EOF: - * raise StopIteration - * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, # <<<<<<<<<<<<<< - * qstart, qstop, sstart, sstop, evalue, bit) - * - */ - __Pyx_XDECREF(__pyx_r); - - /* "jcvi/formats/cblast.pyx":49 - * raise StopIteration - * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, - * qstart, qstop, sstart, sstop, evalue, bit) # <<<<<<<<<<<<<< - * - * def __dealloc__(self): - */ - __pyx_t_2 = ((PyObject *)__pyx_f_4jcvi_7formats_6cblast_create_blast_line(__pyx_v_qname, __pyx_v_sname, __pyx_v_pct, __pyx_v_hlen, __pyx_v_nmiss, __pyx_v_ngap, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_bit)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 48, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":34 - * return self - * - * def __next__(self): # <<<<<<<<<<<<<< - * cdef: - * float pct = 0.0, evalue = 0.0, bit = 0.0 - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - if (!__pyx_error_without_exception) { - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__next__", __pyx_clineno, __pyx_lineno, __pyx_filename); - } - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":51 - * qstart, qstop, sstart, sstop, evalue, bit) - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * fclose(self.fh) - * - */ - -/* Python wrapper */ -static void __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(PyObject *__pyx_v_self); /*proto*/ -static void __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -static void __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { - - /* "jcvi/formats/cblast.pyx":52 - * - * def __dealloc__(self): - * fclose(self.fh) # <<<<<<<<<<<<<< - * - * def __repr__(self): - */ - (void)(fclose(__pyx_v_self->fh)); - - /* "jcvi/formats/cblast.pyx":51 - * qstart, qstop, sstart, sstop, evalue, bit) - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * fclose(self.fh) - * - */ - - /* function exit code */ -} - -/* "jcvi/formats/cblast.pyx":54 - * fclose(self.fh) - * - * def __repr__(self): # <<<<<<<<<<<<<< - * return "Blast('%s')" % (self.filename, ) - * - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__repr__", 1); - - /* "jcvi/formats/cblast.pyx":55 - * - * def __repr__(self): - * return "Blast('%s')" % (self.filename, ) # <<<<<<<<<<<<<< - * - * # Python 2 and 3 differ in str and unicode handling - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 55, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_self->filename); - __Pyx_GIVEREF(__pyx_v_self->filename); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_self->filename)) __PYX_ERR(0, 55, __pyx_L1_error); - __pyx_t_2 = __Pyx_PyString_Format(__pyx_kp_s_Blast_s, __pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 55, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":54 - * fclose(self.fh) - * - * def __repr__(self): # <<<<<<<<<<<<<< - * return "Blast('%s')" % (self.filename, ) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__ = {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce_cython__", 1); - - /* "(tree fragment)":2 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 2, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__ = {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v___pyx_state = values[0]; - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self), __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setstate_cython__", 1); - - /* "(tree fragment)":4 - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 4, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":59 - * # Python 2 and 3 differ in str and unicode handling - * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython - * cdef bytes c_str(str s): # <<<<<<<<<<<<<< - * return s.encode("UTF-8") - * - */ - -static PyObject *__pyx_f_4jcvi_7formats_6cblast_c_str(PyObject *__pyx_v_s) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("c_str", 1); - - /* "jcvi/formats/cblast.pyx":60 - * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython - * cdef bytes c_str(str s): - * return s.encode("UTF-8") # <<<<<<<<<<<<<< - * - * cdef str py_str(bytes s): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PyString_Type_encode, __pyx_v_s, __pyx_kp_s_UTF_8); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 60, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(PyBytes_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_t_1))) __PYX_ERR(0, 60, __pyx_L1_error) - __pyx_r = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":59 - * # Python 2 and 3 differ in str and unicode handling - * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython - * cdef bytes c_str(str s): # <<<<<<<<<<<<<< - * return s.encode("UTF-8") - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.c_str", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":62 - * return s.encode("UTF-8") - * - * cdef str py_str(bytes s): # <<<<<<<<<<<<<< - * return s.decode("UTF-8", "replace") - * - */ - -static PyObject *__pyx_f_4jcvi_7formats_6cblast_py_str(PyObject *__pyx_v_s) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("py_str", 1); - - /* "jcvi/formats/cblast.pyx":63 - * - * cdef str py_str(bytes s): - * return s.decode("UTF-8", "replace") # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - if (unlikely(__pyx_v_s == Py_None)) { - PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "decode"); - __PYX_ERR(0, 63, __pyx_L1_error) - } - __pyx_t_1 = __Pyx_decode_bytes(__pyx_v_s, 0, PY_SSIZE_T_MAX, NULL, ((char const *)"replace"), PyUnicode_DecodeUTF8); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 63, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(PyString_CheckExact(__pyx_t_1)) || __Pyx_RaiseUnexpectedTypeError("str", __pyx_t_1))) __PYX_ERR(0, 63, __pyx_L1_error) - __pyx_r = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":62 - * return s.encode("UTF-8") - * - * cdef str py_str(bytes s): # <<<<<<<<<<<<<< - * return s.decode("UTF-8", "replace") - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.py_str", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":95 - * - * property query: - * def __get__(self): # <<<<<<<<<<<<<< - * return py_str(self._query) - * def __set__(self, val: str): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - - /* "jcvi/formats/cblast.pyx":96 - * property query: - * def __get__(self): - * return py_str(self._query) # <<<<<<<<<<<<<< - * def __set__(self, val: str): - * strcpy(self._query, c_str(val)) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 96, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 96, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":95 - * - * property query: - * def __get__(self): # <<<<<<<<<<<<<< - * return py_str(self._query) - * def __set__(self, val: str): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.query.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":97 - * def __get__(self): - * return py_str(self._query) - * def __set__(self, val: str): # <<<<<<<<<<<<<< - * strcpy(self._query, c_str(val)) - * - */ - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_val), (&PyString_Type), 0, "val", 1))) __PYX_ERR(0, 97, __pyx_L1_error) - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject*)__pyx_v_val)); - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - char const *__pyx_t_2; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__set__", 1); - - /* "jcvi/formats/cblast.pyx":98 - * return py_str(self._query) - * def __set__(self, val: str): - * strcpy(self._query, c_str(val)) # <<<<<<<<<<<<<< - * - * property subject: - */ - __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(__pyx_v_val); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 98, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (unlikely(__pyx_t_1 == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 98, __pyx_L1_error) - } - __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_t_1); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 98, __pyx_L1_error) - (void)(strcpy(__pyx_v_self->_query, __pyx_t_2)); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":97 - * def __get__(self): - * return py_str(self._query) - * def __set__(self, val: str): # <<<<<<<<<<<<<< - * strcpy(self._query, c_str(val)) - * - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.query.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":101 - * - * property subject: - * def __get__(self): # <<<<<<<<<<<<<< - * return py_str(self._subject) - * def __set__(self, val: str): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - - /* "jcvi/formats/cblast.pyx":102 - * property subject: - * def __get__(self): - * return py_str(self._subject) # <<<<<<<<<<<<<< - * def __set__(self, val: str): - * strcpy(self._subject, c_str(val)) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 102, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":101 - * - * property subject: - * def __get__(self): # <<<<<<<<<<<<<< - * return py_str(self._subject) - * def __set__(self, val: str): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.subject.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":103 - * def __get__(self): - * return py_str(self._subject) - * def __set__(self, val: str): # <<<<<<<<<<<<<< - * strcpy(self._subject, c_str(val)) - * - */ - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_val), (&PyString_Type), 0, "val", 1))) __PYX_ERR(0, 103, __pyx_L1_error) - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject*)__pyx_v_val)); - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - char const *__pyx_t_2; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__set__", 1); - - /* "jcvi/formats/cblast.pyx":104 - * return py_str(self._subject) - * def __set__(self, val: str): - * strcpy(self._subject, c_str(val)) # <<<<<<<<<<<<<< - * - * def __init__(self, s): - */ - __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(__pyx_v_val); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 104, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (unlikely(__pyx_t_1 == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 104, __pyx_L1_error) - } - __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_t_1); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 104, __pyx_L1_error) - (void)(strcpy(__pyx_v_self->_subject, __pyx_t_2)); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":103 - * def __get__(self): - * return py_str(self._subject) - * def __set__(self, val: str): # <<<<<<<<<<<<<< - * strcpy(self._subject, c_str(val)) - * - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.subject.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":106 - * strcpy(self._subject, c_str(val)) - * - * def __init__(self, s): # <<<<<<<<<<<<<< - * sline = c_str(s) - * sscanf(sline, blast_format, self._query, self._subject, - */ - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_s = 0; - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__init__ (wrapper)", 0); - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1; - #endif - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_s,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_s)) != 0)) { - (void)__Pyx_Arg_NewRef_VARARGS(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 106, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__init__") < 0)) __PYX_ERR(0, 106, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - } - __pyx_v_s = values[0]; - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 106, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return -1; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), __pyx_v_s); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_s) { - PyObject *__pyx_v_sline = NULL; - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - char const *__pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__init__", 1); - - /* "jcvi/formats/cblast.pyx":107 - * - * def __init__(self, s): - * sline = c_str(s) # <<<<<<<<<<<<<< - * sscanf(sline, blast_format, self._query, self._subject, - * &self.pctid, &self.hitlen, &self.nmismatch, &self.ngaps, - */ - if (!(likely(PyString_CheckExact(__pyx_v_s))||((__pyx_v_s) == Py_None) || __Pyx_RaiseUnexpectedTypeError("str", __pyx_v_s))) __PYX_ERR(0, 107, __pyx_L1_error) - __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(((PyObject*)__pyx_v_s)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 107, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_sline = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":108 - * def __init__(self, s): - * sline = c_str(s) - * sscanf(sline, blast_format, self._query, self._subject, # <<<<<<<<<<<<<< - * &self.pctid, &self.hitlen, &self.nmismatch, &self.ngaps, - * &self.qstart, &self.qstop, - */ - if (unlikely(__pyx_v_sline == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 108, __pyx_L1_error) - } - __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_v_sline); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 108, __pyx_L1_error) - - /* "jcvi/formats/cblast.pyx":112 - * &self.qstart, &self.qstop, - * &self.sstart, &self.sstop, - * &self.evalue, &self.score) # <<<<<<<<<<<<<< - * - * self.orientation = '+' - */ - (void)(sscanf(__pyx_t_2, __pyx_v_4jcvi_7formats_6cblast_blast_format, __pyx_v_self->_query, __pyx_v_self->_subject, (&__pyx_v_self->pctid), (&__pyx_v_self->hitlen), (&__pyx_v_self->nmismatch), (&__pyx_v_self->ngaps), (&__pyx_v_self->qstart), (&__pyx_v_self->qstop), (&__pyx_v_self->sstart), (&__pyx_v_self->sstop), (&__pyx_v_self->evalue), (&__pyx_v_self->score))); - - /* "jcvi/formats/cblast.pyx":114 - * &self.evalue, &self.score) - * - * self.orientation = '+' # <<<<<<<<<<<<<< - * if self.qstart > self.qstop: - * self.qstart, self.qstop = self.qstop, self.qstart - */ - __pyx_v_self->orientation = '+'; - - /* "jcvi/formats/cblast.pyx":115 - * - * self.orientation = '+' - * if self.qstart > self.qstop: # <<<<<<<<<<<<<< - * self.qstart, self.qstop = self.qstop, self.qstart - * self.orientation = '-' - */ - __pyx_t_3 = (__pyx_v_self->qstart > __pyx_v_self->qstop); - if (__pyx_t_3) { - - /* "jcvi/formats/cblast.pyx":116 - * self.orientation = '+' - * if self.qstart > self.qstop: - * self.qstart, self.qstop = self.qstop, self.qstart # <<<<<<<<<<<<<< - * self.orientation = '-' - * if self.sstart > self.sstop: - */ - __pyx_t_4 = __pyx_v_self->qstop; - __pyx_t_5 = __pyx_v_self->qstart; - __pyx_v_self->qstart = __pyx_t_4; - __pyx_v_self->qstop = __pyx_t_5; - - /* "jcvi/formats/cblast.pyx":117 - * if self.qstart > self.qstop: - * self.qstart, self.qstop = self.qstop, self.qstart - * self.orientation = '-' # <<<<<<<<<<<<<< - * if self.sstart > self.sstop: - * self.sstart, self.sstop = self.sstop, self.sstart - */ - __pyx_v_self->orientation = '-'; - - /* "jcvi/formats/cblast.pyx":115 - * - * self.orientation = '+' - * if self.qstart > self.qstop: # <<<<<<<<<<<<<< - * self.qstart, self.qstop = self.qstop, self.qstart - * self.orientation = '-' - */ - } - - /* "jcvi/formats/cblast.pyx":118 - * self.qstart, self.qstop = self.qstop, self.qstart - * self.orientation = '-' - * if self.sstart > self.sstop: # <<<<<<<<<<<<<< - * self.sstart, self.sstop = self.sstop, self.sstart - * self.orientation = '-' - */ - __pyx_t_3 = (__pyx_v_self->sstart > __pyx_v_self->sstop); - if (__pyx_t_3) { - - /* "jcvi/formats/cblast.pyx":119 - * self.orientation = '-' - * if self.sstart > self.sstop: - * self.sstart, self.sstop = self.sstop, self.sstart # <<<<<<<<<<<<<< - * self.orientation = '-' - * - */ - __pyx_t_5 = __pyx_v_self->sstop; - __pyx_t_4 = __pyx_v_self->sstart; - __pyx_v_self->sstart = __pyx_t_5; - __pyx_v_self->sstop = __pyx_t_4; - - /* "jcvi/formats/cblast.pyx":120 - * if self.sstart > self.sstop: - * self.sstart, self.sstop = self.sstop, self.sstart - * self.orientation = '-' # <<<<<<<<<<<<<< - * - * def __richcmp__(BlastLine self, BlastLine other, size_t op): - */ - __pyx_v_self->orientation = '-'; - - /* "jcvi/formats/cblast.pyx":118 - * self.qstart, self.qstop = self.qstop, self.qstart - * self.orientation = '-' - * if self.sstart > self.sstop: # <<<<<<<<<<<<<< - * self.sstart, self.sstop = self.sstop, self.sstart - * self.orientation = '-' - */ - } - - /* "jcvi/formats/cblast.pyx":106 - * strcpy(self._subject, c_str(val)) - * - * def __init__(self, s): # <<<<<<<<<<<<<< - * sline = c_str(s) - * sscanf(sline, blast_format, self._query, self._subject, - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_sline); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":122 - * self.orientation = '-' - * - * def __richcmp__(BlastLine self, BlastLine other, size_t op): # <<<<<<<<<<<<<< - * if op == 2: # == - * if self.query != other.query and self.qstart != other.qstart: - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__(PyObject *__pyx_v_self, PyObject *__pyx_v_other, int __pyx_arg_op); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__(PyObject *__pyx_v_self, PyObject *__pyx_v_other, int __pyx_arg_op) { - size_t __pyx_v_op; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__richcmp__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_v_op = __pyx_arg_op; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_other), __pyx_ptype_4jcvi_7formats_6cblast_BlastLine, 1, "other", 0))) __PYX_ERR(0, 122, __pyx_L1_error) - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_other), ((size_t)__pyx_v_op)); - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = NULL; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_other, size_t __pyx_v_op) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - int __pyx_t_5; - PyObject *__pyx_t_6 = NULL; - unsigned int __pyx_t_7; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__richcmp__", 1); - - /* "jcvi/formats/cblast.pyx":123 - * - * def __richcmp__(BlastLine self, BlastLine other, size_t op): - * if op == 2: # == # <<<<<<<<<<<<<< - * if self.query != other.query and self.qstart != other.qstart: - * return False - */ - switch (__pyx_v_op) { - case 2: - - /* "jcvi/formats/cblast.pyx":124 - * def __richcmp__(BlastLine self, BlastLine other, size_t op): - * if op == 2: # == - * if self.query != other.query and self.qstart != other.qstart: # <<<<<<<<<<<<<< - * return False - * return self.subject == other.subject and \ - */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 124, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_other), __pyx_n_s_query); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 124, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_t_2, __pyx_t_3, Py_NE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 124, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 124, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (__pyx_t_5) { - } else { - __pyx_t_1 = __pyx_t_5; - goto __pyx_L4_bool_binop_done; - } - __pyx_t_5 = (__pyx_v_self->qstart != __pyx_v_other->qstart); - __pyx_t_1 = __pyx_t_5; - __pyx_L4_bool_binop_done:; - if (__pyx_t_1) { - - /* "jcvi/formats/cblast.pyx":125 - * if op == 2: # == - * if self.query != other.query and self.qstart != other.qstart: - * return False # <<<<<<<<<<<<<< - * return self.subject == other.subject and \ - * self.qstop == other.qstop and \ - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(Py_False); - __pyx_r = Py_False; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":124 - * def __richcmp__(BlastLine self, BlastLine other, size_t op): - * if op == 2: # == - * if self.query != other.query and self.qstart != other.qstart: # <<<<<<<<<<<<<< - * return False - * return self.subject == other.subject and \ - */ - } - - /* "jcvi/formats/cblast.pyx":126 - * if self.query != other.query and self.qstart != other.qstart: - * return False - * return self.subject == other.subject and \ # <<<<<<<<<<<<<< - * self.qstop == other.qstop and \ - * self.sstop == other.sstop and \ - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 126, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_other), __pyx_n_s_subject); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 126, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_6 = PyObject_RichCompare(__pyx_t_3, __pyx_t_2, Py_EQ); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 126, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 126, __pyx_L1_error) - if (__pyx_t_1) { - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - } else { - __Pyx_INCREF(__pyx_t_6); - __pyx_t_4 = __pyx_t_6; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - goto __pyx_L6_bool_binop_done; - } - - /* "jcvi/formats/cblast.pyx":127 - * return False - * return self.subject == other.subject and \ - * self.qstop == other.qstop and \ # <<<<<<<<<<<<<< - * self.sstop == other.sstop and \ - * self.evalue == other.evalue and \ - */ - __pyx_t_1 = (__pyx_v_self->qstop == __pyx_v_other->qstop); - if (__pyx_t_1) { - } else { - __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 127, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L6_bool_binop_done; - } - - /* "jcvi/formats/cblast.pyx":128 - * return self.subject == other.subject and \ - * self.qstop == other.qstop and \ - * self.sstop == other.sstop and \ # <<<<<<<<<<<<<< - * self.evalue == other.evalue and \ - * self.hitlen == other.hitlen - */ - __pyx_t_1 = (__pyx_v_self->sstop == __pyx_v_other->sstop); - if (__pyx_t_1) { - } else { - __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 128, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L6_bool_binop_done; - } - - /* "jcvi/formats/cblast.pyx":129 - * self.qstop == other.qstop and \ - * self.sstop == other.sstop and \ - * self.evalue == other.evalue and \ # <<<<<<<<<<<<<< - * self.hitlen == other.hitlen - * - */ - __pyx_t_1 = (__pyx_v_self->evalue == __pyx_v_other->evalue); - if (__pyx_t_1) { - } else { - __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 129, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L6_bool_binop_done; - } - - /* "jcvi/formats/cblast.pyx":130 - * self.sstop == other.sstop and \ - * self.evalue == other.evalue and \ - * self.hitlen == other.hitlen # <<<<<<<<<<<<<< - * - * elif op == 3: # != - */ - __pyx_t_1 = (__pyx_v_self->hitlen == __pyx_v_other->hitlen); - __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 130, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = __pyx_t_6; - __pyx_t_6 = 0; - __pyx_L6_bool_binop_done:; - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":123 - * - * def __richcmp__(BlastLine self, BlastLine other, size_t op): - * if op == 2: # == # <<<<<<<<<<<<<< - * if self.query != other.query and self.qstart != other.qstart: - * return False - */ - break; - case 3: - - /* "jcvi/formats/cblast.pyx":133 - * - * elif op == 3: # != - * return not self.__richcmp__(other, 2) # <<<<<<<<<<<<<< - * else: - * raise Exception("that comparison not implemented") - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_richcmp); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 133, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_2 = NULL; - __pyx_t_7 = 0; - #if CYTHON_UNPACK_METHODS - if (likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_2)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_2); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); - __pyx_t_7 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[3] = {__pyx_t_2, ((PyObject *)__pyx_v_other), __pyx_int_2}; - __pyx_t_4 = __Pyx_PyObject_FastCall(__pyx_t_6, __pyx_callargs+1-__pyx_t_7, 2+__pyx_t_7); - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 133, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - } - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 133, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyBool_FromLong((!__pyx_t_1)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 133, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":132 - * self.hitlen == other.hitlen - * - * elif op == 3: # != # <<<<<<<<<<<<<< - * return not self.__richcmp__(other, 2) - * else: - */ - break; - default: - - /* "jcvi/formats/cblast.pyx":135 - * return not self.__richcmp__(other, 2) - * else: - * raise Exception("that comparison not implemented") # <<<<<<<<<<<<<< - * - * def __hash__(self): - */ - __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])), __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 135, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_Raise(__pyx_t_4, 0, 0, 0); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __PYX_ERR(0, 135, __pyx_L1_error) - break; - } - - /* "jcvi/formats/cblast.pyx":122 - * self.orientation = '-' - * - * def __richcmp__(BlastLine self, BlastLine other, size_t op): # <<<<<<<<<<<<<< - * if op == 2: # == - * if self.query != other.query and self.qstart != other.qstart: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__richcmp__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":137 - * raise Exception("that comparison not implemented") - * - * def __hash__(self): # <<<<<<<<<<<<<< - * return id(self) - * - */ - -/* Python wrapper */ -static Py_hash_t __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__(PyObject *__pyx_v_self); /*proto*/ -static Py_hash_t __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - Py_hash_t __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__hash__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static Py_hash_t __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - Py_hash_t __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - Py_hash_t __pyx_t_2; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__hash__", 1); - - /* "jcvi/formats/cblast.pyx":138 - * - * def __hash__(self): - * return id(self) # <<<<<<<<<<<<<< - * - * def __repr__(self): - */ - __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_builtin_id, ((PyObject *)__pyx_v_self)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 138, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_AsHash_t(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_hash_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 138, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":137 - * raise Exception("that comparison not implemented") - * - * def __hash__(self): # <<<<<<<<<<<<<< - * return id(self) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__hash__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - if (unlikely(__pyx_r == -1) && !PyErr_Occurred()) __pyx_r = -2; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":140 - * return id(self) - * - * def __repr__(self): # <<<<<<<<<<<<<< - * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ - * (self.query, self.subject, self.evalue, self.score) - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__repr__", 1); - - /* "jcvi/formats/cblast.pyx":141 - * - * def __repr__(self): - * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ # <<<<<<<<<<<<<< - * (self.query, self.subject, self.evalue, self.score) - * - */ - __Pyx_XDECREF(__pyx_r); - - /* "jcvi/formats/cblast.pyx":142 - * def __repr__(self): - * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ - * (self.query, self.subject, self.evalue, self.score) # <<<<<<<<<<<<<< - * - * def __str__(self): - */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 142, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 142, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 142, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 142, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyTuple_New(4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 142, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_1)) __PYX_ERR(0, 142, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_2)) __PYX_ERR(0, 142, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_3)) __PYX_ERR(0, 142, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_4); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_4)) __PYX_ERR(0, 142, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_2 = 0; - __pyx_t_3 = 0; - __pyx_t_4 = 0; - - /* "jcvi/formats/cblast.pyx":141 - * - * def __repr__(self): - * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ # <<<<<<<<<<<<<< - * (self.query, self.subject, self.evalue, self.score) - * - */ - __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1, __pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 141, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":140 - * return id(self) - * - * def __repr__(self): # <<<<<<<<<<<<<< - * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ - * (self.query, self.subject, self.evalue, self.score) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":144 - * (self.query, self.subject, self.evalue, self.score) - * - * def __str__(self): # <<<<<<<<<<<<<< - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * if self.orientation == '-': - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__str__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_v_args = NULL; - char __pyx_v_result[0x200]; - PyObject *__pyx_v_attr = NULL; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - Py_ssize_t __pyx_t_4; - PyObject *(*__pyx_t_5)(PyObject *); - int __pyx_t_6; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__str__", 1); - - /* "jcvi/formats/cblast.pyx":145 - * - * def __str__(self): - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_n_s_slots); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyObject_GetSlice(__pyx_t_2, 0, 12, NULL, NULL, &__pyx_slice__4, 0, 1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (likely(PyList_CheckExact(__pyx_t_3)) || PyTuple_CheckExact(__pyx_t_3)) { - __pyx_t_2 = __pyx_t_3; __Pyx_INCREF(__pyx_t_2); - __pyx_t_4 = 0; - __pyx_t_5 = NULL; - } else { - __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 145, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - for (;;) { - if (likely(!__pyx_t_5)) { - if (likely(PyList_CheckExact(__pyx_t_2))) { - { - Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 145, __pyx_L1_error) - #endif - if (__pyx_t_4 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 145, __pyx_L1_error) - #else - __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - } else { - { - Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 145, __pyx_L1_error) - #endif - if (__pyx_t_4 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 145, __pyx_L1_error) - #else - __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - } - } else { - __pyx_t_3 = __pyx_t_5(__pyx_t_2); - if (unlikely(!__pyx_t_3)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 145, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_3); - } - __Pyx_XDECREF_SET(__pyx_v_attr, __pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttr(((PyObject *)__pyx_v_self), __pyx_v_attr); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_3))) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_args = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":146 - * def __str__(self): - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * if self.orientation == '-': # <<<<<<<<<<<<<< - * args[8], args[9] = args[9], args[8] - * - */ - __pyx_t_6 = (__pyx_v_self->orientation == '-'); - if (__pyx_t_6) { - - /* "jcvi/formats/cblast.pyx":147 - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] # <<<<<<<<<<<<<< - * - * cdef char result[512] - */ - __pyx_t_1 = PyList_GET_ITEM(__pyx_v_args, 9); - __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = PyList_GET_ITEM(__pyx_v_args, 8); - __Pyx_INCREF(__pyx_t_2); - if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 8, __pyx_t_1, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 147, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 9, __pyx_t_2, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 147, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/formats/cblast.pyx":146 - * def __str__(self): - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * if self.orientation == '-': # <<<<<<<<<<<<<< - * args[8], args[9] = args[9], args[8] - * - */ - } - - /* "jcvi/formats/cblast.pyx":150 - * - * cdef char result[512] - * sprintf(result, blast_output, self._query, self._subject, # <<<<<<<<<<<<<< - * self.pctid, self.hitlen, self.nmismatch, self.ngaps, - * self.qstart, self.qstop, - */ - (void)(sprintf(__pyx_v_result, __pyx_v_4jcvi_7formats_6cblast_blast_output, __pyx_v_self->_query, __pyx_v_self->_subject, __pyx_v_self->pctid, __pyx_v_self->hitlen, __pyx_v_self->nmismatch, __pyx_v_self->ngaps, __pyx_v_self->qstart, __pyx_v_self->qstop, __pyx_v_self->sstart, __pyx_v_self->sstop, __pyx_v_self->evalue, __pyx_v_self->score)); - - /* "jcvi/formats/cblast.pyx":156 - * self.evalue, self.score) - * - * return py_str(result) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __Pyx_PyObject_FromString(__pyx_v_result); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 156, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_2)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 156, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":144 - * (self.query, self.subject, self.evalue, self.score) - * - * def __str__(self): # <<<<<<<<<<<<<< - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * if self.orientation == '-': - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__str__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_args); - __Pyx_XDECREF(__pyx_v_attr); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":158 - * return py_str(result) - * - * @property # <<<<<<<<<<<<<< - * def has_score(self): - * return hasattr(self, "score") - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - - /* "jcvi/formats/cblast.pyx":160 - * @property - * def has_score(self): - * return hasattr(self, "score") # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_HasAttr(((PyObject *)__pyx_v_self), __pyx_n_s_score); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(0, 160, __pyx_L1_error) - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 160, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":158 - * return py_str(result) - * - * @property # <<<<<<<<<<<<<< - * def has_score(self): - * return hasattr(self, "score") - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.has_score.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":162 - * return hasattr(self, "score") - * - * @property # <<<<<<<<<<<<<< - * def swapped(self): - * """ - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} -static PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value); /* proto */ - -/* "jcvi/formats/cblast.pyx":172 - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< - * return BlastLine(b) - * - */ - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_genexpr_arg_0) { - struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_cur_scope; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("genexpr", 0); - __pyx_cur_scope = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, __pyx_empty_tuple, NULL); - if (unlikely(!__pyx_cur_scope)) { - __pyx_cur_scope = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)Py_None); - __Pyx_INCREF(Py_None); - __PYX_ERR(0, 172, __pyx_L1_error) - } else { - __Pyx_GOTREF((PyObject *)__pyx_cur_scope); - } - __pyx_cur_scope->__pyx_genexpr_arg_0 = __pyx_genexpr_arg_0; - __Pyx_INCREF(__pyx_cur_scope->__pyx_genexpr_arg_0); - __Pyx_GIVEREF(__pyx_cur_scope->__pyx_genexpr_arg_0); - { - __pyx_CoroutineObject *gen = __Pyx_Generator_New((__pyx_coroutine_body_t) __pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator, NULL, (PyObject *) __pyx_cur_scope, __pyx_n_s_genexpr, __pyx_n_s_BlastLine___get___locals_genexpr, __pyx_n_s_jcvi_formats_cblast); if (unlikely(!gen)) __PYX_ERR(0, 172, __pyx_L1_error) - __Pyx_DECREF(__pyx_cur_scope); - __Pyx_RefNannyFinishContext(); - return (PyObject *) gen; - } - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.swapped.__get__.genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_DECREF((PyObject *)__pyx_cur_scope); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value) /* generator body */ -{ - struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_cur_scope = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)__pyx_generator->closure); - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - Py_ssize_t __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("genexpr", 0); - switch (__pyx_generator->resume_label) { - case 0: goto __pyx_L3_first_run; - case 1: goto __pyx_L6_resume_from_yield; - default: /* CPython raises the right error here */ - __Pyx_RefNannyFinishContext(); - return NULL; - } - __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 172, __pyx_L1_error) - if (unlikely(!__pyx_cur_scope->__pyx_genexpr_arg_0)) { __Pyx_RaiseUnboundLocalError(".0"); __PYX_ERR(0, 172, __pyx_L1_error) } - __pyx_t_1 = __pyx_cur_scope->__pyx_genexpr_arg_0; __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = 0; - for (;;) { - { - Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 172, __pyx_L1_error) - #endif - if (__pyx_t_2 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely((0 < 0))) __PYX_ERR(0, 172, __pyx_L1_error) - #else - __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 172, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_x); - __Pyx_XDECREF_SET(__pyx_cur_scope->__pyx_v_x, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Str(__pyx_cur_scope->__pyx_v_x); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 172, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - __Pyx_XGIVEREF(__pyx_t_1); - __pyx_cur_scope->__pyx_t_0 = __pyx_t_1; - __pyx_cur_scope->__pyx_t_1 = __pyx_t_2; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - /* return from generator, yielding value */ - __pyx_generator->resume_label = 1; - return __pyx_r; - __pyx_L6_resume_from_yield:; - __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; - __pyx_cur_scope->__pyx_t_0 = 0; - __Pyx_XGOTREF(__pyx_t_1); - __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 172, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - CYTHON_MAYBE_UNUSED_VAR(__pyx_cur_scope); - - /* function exit code */ - PyErr_SetNone(PyExc_StopIteration); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_L0:; - __Pyx_XDECREF(__pyx_r); __pyx_r = 0; - #if !CYTHON_USE_EXC_INFO_STACK - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - #endif - __pyx_generator->resume_label = -1; - __Pyx_Coroutine_clear((PyObject*)__pyx_generator); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":162 - * return hasattr(self, "score") - * - * @property # <<<<<<<<<<<<<< - * def swapped(self): - * """ - */ - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_v_args = NULL; - PyObject *__pyx_v_b = NULL; - PyObject *__pyx_v_attr = NULL; - PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator = 0; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - Py_ssize_t __pyx_t_4; - PyObject *(*__pyx_t_5)(PyObject *); - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - int __pyx_t_8; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - - /* "jcvi/formats/cblast.pyx":167 - * Swap query and subject. - * """ - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< - * args[0:2] = [self.subject, self.query] - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_n_s_slots); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyObject_GetSlice(__pyx_t_2, 0, 12, NULL, NULL, &__pyx_slice__4, 0, 1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (likely(PyList_CheckExact(__pyx_t_3)) || PyTuple_CheckExact(__pyx_t_3)) { - __pyx_t_2 = __pyx_t_3; __Pyx_INCREF(__pyx_t_2); - __pyx_t_4 = 0; - __pyx_t_5 = NULL; - } else { - __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 167, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - for (;;) { - if (likely(!__pyx_t_5)) { - if (likely(PyList_CheckExact(__pyx_t_2))) { - { - Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 167, __pyx_L1_error) - #endif - if (__pyx_t_4 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 167, __pyx_L1_error) - #else - __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - } else { - { - Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 167, __pyx_L1_error) - #endif - if (__pyx_t_4 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 167, __pyx_L1_error) - #else - __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - } - } else { - __pyx_t_3 = __pyx_t_5(__pyx_t_2); - if (unlikely(!__pyx_t_3)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 167, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_3); - } - __Pyx_XDECREF_SET(__pyx_v_attr, __pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttr(((PyObject *)__pyx_v_self), __pyx_v_attr); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_3))) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_args = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":168 - * """ - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * args[0:2] = [self.subject, self.query] # <<<<<<<<<<<<<< - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - * if self.orientation == '-': - */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 168, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyList_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 168, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 0, __pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 1, __pyx_t_2)) __PYX_ERR(0, 168, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_2 = 0; - if (__Pyx_PyObject_SetSlice(__pyx_v_args, __pyx_t_3, 0, 2, NULL, NULL, NULL, 1, 1, 0) < 0) __PYX_ERR(0, 168, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "jcvi/formats/cblast.pyx":169 - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * args[0:2] = [self.subject, self.query] - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] # <<<<<<<<<<<<<< - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - */ - __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = PyList_New(4); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 0, __pyx_t_3)) __PYX_ERR(0, 169, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 1, __pyx_t_2)) __PYX_ERR(0, 169, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 2, __pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_6); - if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 3, __pyx_t_6)) __PYX_ERR(0, 169, __pyx_L1_error); - __pyx_t_3 = 0; - __pyx_t_2 = 0; - __pyx_t_1 = 0; - __pyx_t_6 = 0; - if (__Pyx_PyObject_SetSlice(__pyx_v_args, __pyx_t_7, 6, 10, NULL, NULL, NULL, 1, 1, 0) < 0) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "jcvi/formats/cblast.pyx":170 - * args[0:2] = [self.subject, self.query] - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - * if self.orientation == '-': # <<<<<<<<<<<<<< - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) - */ - __pyx_t_8 = (__pyx_v_self->orientation == '-'); - if (__pyx_t_8) { - - /* "jcvi/formats/cblast.pyx":171 - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] # <<<<<<<<<<<<<< - * b = "\t".join(str(x) for x in args) - * return BlastLine(b) - */ - __pyx_t_7 = PyList_GET_ITEM(__pyx_v_args, 9); - __Pyx_INCREF(__pyx_t_7); - __pyx_t_6 = PyList_GET_ITEM(__pyx_v_args, 8); - __Pyx_INCREF(__pyx_t_6); - if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 8, __pyx_t_7, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 171, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 9, __pyx_t_6, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 171, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - - /* "jcvi/formats/cblast.pyx":170 - * args[0:2] = [self.subject, self.query] - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - * if self.orientation == '-': # <<<<<<<<<<<<<< - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) - */ - } - - /* "jcvi/formats/cblast.pyx":172 - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< - * return BlastLine(b) - * - */ - __pyx_t_6 = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(NULL, __pyx_v_args); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 172, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyString_Join(__pyx_kp_s__5, __pyx_t_6); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 172, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_v_b = ((PyObject*)__pyx_t_7); - __pyx_t_7 = 0; - - /* "jcvi/formats/cblast.pyx":173 - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) - * return BlastLine(b) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyObject_CallOneArg(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_v_b); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 173, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_r = __pyx_t_7; - __pyx_t_7 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":162 - * return hasattr(self, "score") - * - * @property # <<<<<<<<<<<<<< - * def swapped(self): - * """ - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.swapped.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_args); - __Pyx_XDECREF(__pyx_v_b); - __Pyx_XDECREF(__pyx_v_attr); - __Pyx_XDECREF(__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":175 - * return BlastLine(b) - * - * @property # <<<<<<<<<<<<<< - * def bedline(self): - * cdef char result[512] - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - char __pyx_v_result[0x200]; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - - /* "jcvi/formats/cblast.pyx":178 - * def bedline(self): - * cdef char result[512] - * sprintf(result, bed_output, # <<<<<<<<<<<<<< - * self._subject, self.sstart - 1, self.sstop, - * self._query, self.qstart, self.qstop, - */ - (void)(sprintf(__pyx_v_result, __pyx_v_4jcvi_7formats_6cblast_bed_output, __pyx_v_self->_subject, (__pyx_v_self->sstart - 1), __pyx_v_self->sstop, __pyx_v_self->_query, __pyx_v_self->qstart, __pyx_v_self->qstop, __pyx_v_self->score, __pyx_v_self->orientation)); - - /* "jcvi/formats/cblast.pyx":183 - * self.score, self.orientation) - * - * return py_str(result) # <<<<<<<<<<<<<< - * - * def __reduce__(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_result); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 183, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 183, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":175 - * return BlastLine(b) - * - * @property # <<<<<<<<<<<<<< - * def bedline(self): - * cdef char result[512] - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.bedline.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":185 - * return py_str(result) - * - * def __reduce__(self): # <<<<<<<<<<<<<< - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_9BlastLine_11__reduce__ = {"__reduce__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce__", 0))) return NULL; - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - PyObject *__pyx_t_9 = NULL; - PyObject *__pyx_t_10 = NULL; - PyObject *__pyx_t_11 = NULL; - PyObject *__pyx_t_12 = NULL; - PyObject *__pyx_t_13 = NULL; - PyObject *__pyx_t_14 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce__", 1); - - /* "jcvi/formats/cblast.pyx":186 - * - * def __reduce__(self): - * return create_blast_line, ( # <<<<<<<<<<<<<< - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(__pyx_f_4jcvi_7formats_6cblast_create_blast_line); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 186, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - - /* "jcvi/formats/cblast.pyx":187 - * def __reduce__(self): - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, # <<<<<<<<<<<<<< - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - * self.evalue, self.score) - */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyFloat_FromDouble(__pyx_v_self->pctid); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_self->hitlen); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_self->nmismatch); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - - /* "jcvi/formats/cblast.pyx":188 - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, # <<<<<<<<<<<<<< - * self.evalue, self.score) - * - */ - __pyx_t_7 = __Pyx_PyInt_From_int(__pyx_v_self->ngaps); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 188, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 188, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_9 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 188, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 188, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_11 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 188, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_11); - - /* "jcvi/formats/cblast.pyx":189 - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - * self.evalue, self.score) # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_12 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 189, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_12); - __pyx_t_13 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 189, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - - /* "jcvi/formats/cblast.pyx":187 - * def __reduce__(self): - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, # <<<<<<<<<<<<<< - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - * self.evalue, self.score) - */ - __pyx_t_14 = PyTuple_New(12); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_2)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 1, __pyx_t_3)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_4); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 2, __pyx_t_4)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_5); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 3, __pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_6); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 4, __pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_7); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 5, __pyx_t_7)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_8); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 6, __pyx_t_8)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_9); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 7, __pyx_t_9)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_10); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 8, __pyx_t_10)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_11); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 9, __pyx_t_11)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_12); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 10, __pyx_t_12)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_13); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 11, __pyx_t_13)) __PYX_ERR(0, 187, __pyx_L1_error); - __pyx_t_2 = 0; - __pyx_t_3 = 0; - __pyx_t_4 = 0; - __pyx_t_5 = 0; - __pyx_t_6 = 0; - __pyx_t_7 = 0; - __pyx_t_8 = 0; - __pyx_t_9 = 0; - __pyx_t_10 = 0; - __pyx_t_11 = 0; - __pyx_t_12 = 0; - __pyx_t_13 = 0; - - /* "jcvi/formats/cblast.pyx":186 - * - * def __reduce__(self): - * return create_blast_line, ( # <<<<<<<<<<<<<< - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - */ - __pyx_t_13 = PyTuple_New(2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 186, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1)) __PYX_ERR(0, 186, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_14); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_t_14)) __PYX_ERR(0, 186, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_14 = 0; - __pyx_r = __pyx_t_13; - __pyx_t_13 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":185 - * return py_str(result) - * - * def __reduce__(self): # <<<<<<<<<<<<<< - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_XDECREF(__pyx_t_9); - __Pyx_XDECREF(__pyx_t_10); - __Pyx_XDECREF(__pyx_t_11); - __Pyx_XDECREF(__pyx_t_12); - __Pyx_XDECREF(__pyx_t_13); - __Pyx_XDECREF(__pyx_t_14); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__reduce__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":85 - * - * cdef public: - * char _query[128] # <<<<<<<<<<<<<< - * char _subject[128] - * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 85, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._query.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - char __pyx_t_1[0x80]; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - if (unlikely((__Pyx_carray_from_py_char(__pyx_v_value, __pyx_t_1, 0x80) < 0))) __PYX_ERR(0, 85, __pyx_L1_error) - if (unlikely((0x80) != (0x80))) { - PyErr_Format(PyExc_ValueError, "Assignment to slice of wrong length, expected %" CYTHON_FORMAT_SSIZE_T "d, got %" CYTHON_FORMAT_SSIZE_T "d", (Py_ssize_t)(0x80), (Py_ssize_t)(0x80)); - __PYX_ERR(0, 85, __pyx_L1_error) - } - memcpy(&(__pyx_v_self->_query[0]), __pyx_t_1, sizeof(__pyx_v_self->_query[0]) * (0x80)); - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._query.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":86 - * cdef public: - * char _query[128] - * char _subject[128] # <<<<<<<<<<<<<< - * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop - * float pctid, score - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 86, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._subject.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - char __pyx_t_1[0x80]; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - if (unlikely((__Pyx_carray_from_py_char(__pyx_v_value, __pyx_t_1, 0x80) < 0))) __PYX_ERR(0, 86, __pyx_L1_error) - if (unlikely((0x80) != (0x80))) { - PyErr_Format(PyExc_ValueError, "Assignment to slice of wrong length, expected %" CYTHON_FORMAT_SSIZE_T "d, got %" CYTHON_FORMAT_SSIZE_T "d", (Py_ssize_t)(0x80), (Py_ssize_t)(0x80)); - __PYX_ERR(0, 86, __pyx_L1_error) - } - memcpy(&(__pyx_v_self->_subject[0]), __pyx_t_1, sizeof(__pyx_v_self->_subject[0]) * (0x80)); - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._subject.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":87 - * char _query[128] - * char _subject[128] - * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop # <<<<<<<<<<<<<< - * float pctid, score - * double evalue - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->hitlen); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.hitlen.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->hitlen = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.hitlen.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->nmismatch); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.nmismatch.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->nmismatch = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.nmismatch.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->ngaps); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.ngaps.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->ngaps = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.ngaps.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstart.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->qstart = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstart.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstop.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->qstop = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstop.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstart.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->sstart = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstart.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstop.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->sstop = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstop.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":88 - * char _subject[128] - * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop - * float pctid, score # <<<<<<<<<<<<<< - * double evalue - * object qseqid, sseqid - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->pctid); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 88, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.pctid.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - float __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __pyx_PyFloat_AsFloat(__pyx_v_value); if (unlikely((__pyx_t_1 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 88, __pyx_L1_error) - __pyx_v_self->pctid = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.pctid.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 88, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.score.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - float __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __pyx_PyFloat_AsFloat(__pyx_v_value); if (unlikely((__pyx_t_1 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 88, __pyx_L1_error) - __pyx_v_self->score = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.score.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":89 - * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop - * float pctid, score - * double evalue # <<<<<<<<<<<<<< - * object qseqid, sseqid - * int qi, si - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 89, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.evalue.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - double __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __pyx_PyFloat_AsDouble(__pyx_v_value); if (unlikely((__pyx_t_1 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 89, __pyx_L1_error) - __pyx_v_self->evalue = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.evalue.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":90 - * float pctid, score - * double evalue - * object qseqid, sseqid # <<<<<<<<<<<<<< - * int qi, si - * char orientation - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_self->qseqid); - __pyx_r = __pyx_v_self->qseqid; - goto __pyx_L0; - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__", 1); - __Pyx_INCREF(__pyx_v_value); - __Pyx_GIVEREF(__pyx_v_value); - __Pyx_GOTREF(__pyx_v_self->qseqid); - __Pyx_DECREF(__pyx_v_self->qseqid); - __pyx_v_self->qseqid = __pyx_v_value; - - /* function exit code */ - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(PyObject *__pyx_v_self); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__del__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__del__", 1); - __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(Py_None); - __Pyx_GOTREF(__pyx_v_self->qseqid); - __Pyx_DECREF(__pyx_v_self->qseqid); - __pyx_v_self->qseqid = Py_None; - - /* function exit code */ - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_self->sseqid); - __pyx_r = __pyx_v_self->sseqid; - goto __pyx_L0; - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__", 1); - __Pyx_INCREF(__pyx_v_value); - __Pyx_GIVEREF(__pyx_v_value); - __Pyx_GOTREF(__pyx_v_self->sseqid); - __Pyx_DECREF(__pyx_v_self->sseqid); - __pyx_v_self->sseqid = __pyx_v_value; - - /* function exit code */ - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(PyObject *__pyx_v_self); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__del__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__del__", 1); - __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(Py_None); - __Pyx_GOTREF(__pyx_v_self->sseqid); - __Pyx_DECREF(__pyx_v_self->sseqid); - __pyx_v_self->sseqid = Py_None; - - /* function exit code */ - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":91 - * double evalue - * object qseqid, sseqid - * int qi, si # <<<<<<<<<<<<<< - * char orientation - * - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 91, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qi.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 91, __pyx_L1_error) - __pyx_v_self->qi = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qi.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->si); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 91, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.si.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 91, __pyx_L1_error) - __pyx_v_self->si = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.si.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":92 - * object qseqid, sseqid - * int qi, si - * char orientation # <<<<<<<<<<<<<< - * - * property query: - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_char(__pyx_v_self->orientation); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 92, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.orientation.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - char __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_char(__pyx_v_value); if (unlikely((__pyx_t_1 == (char)-1) && PyErr_Occurred())) __PYX_ERR(0, 92, __pyx_L1_error) - __pyx_v_self->orientation = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.orientation.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":192 - * - * - * cdef BlastLine create_blast_line(char *query, char *subject, float pctid, int hitlen, # <<<<<<<<<<<<<< - * int nmismatch, int ngaps, int qstart, int qstop, - * int sstart, int sstop, float evalue, float score): - */ - -static struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_f_4jcvi_7formats_6cblast_create_blast_line(char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score) { - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_b = 0; - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("create_blast_line", 1); - - /* "jcvi/formats/cblast.pyx":197 - * """ Factory method. - * """ - * cdef BlastLine b = BlastLine.__new__(BlastLine) # <<<<<<<<<<<<<< - * b.query = query - * b.subject = subject - */ - __pyx_t_1 = ((PyObject *)__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(((PyTypeObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_empty_tuple, NULL)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error) - __Pyx_GOTREF((PyObject *)__pyx_t_1); - __pyx_v_b = ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":198 - * """ - * cdef BlastLine b = BlastLine.__new__(BlastLine) - * b.query = query # <<<<<<<<<<<<<< - * b.subject = subject - * b.pctid = pctid - */ - __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 198, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (__Pyx_PyObject_SetAttrStr(((PyObject *)__pyx_v_b), __pyx_n_s_query, __pyx_t_1) < 0) __PYX_ERR(0, 198, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":199 - * cdef BlastLine b = BlastLine.__new__(BlastLine) - * b.query = query - * b.subject = subject # <<<<<<<<<<<<<< - * b.pctid = pctid - * b.hitlen = hitlen - */ - __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 199, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (__Pyx_PyObject_SetAttrStr(((PyObject *)__pyx_v_b), __pyx_n_s_subject, __pyx_t_1) < 0) __PYX_ERR(0, 199, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":200 - * b.query = query - * b.subject = subject - * b.pctid = pctid # <<<<<<<<<<<<<< - * b.hitlen = hitlen - * b.nmismatch = nmismatch - */ - __pyx_v_b->pctid = __pyx_v_pctid; - - /* "jcvi/formats/cblast.pyx":201 - * b.subject = subject - * b.pctid = pctid - * b.hitlen = hitlen # <<<<<<<<<<<<<< - * b.nmismatch = nmismatch - * b.ngaps = ngaps - */ - __pyx_v_b->hitlen = __pyx_v_hitlen; - - /* "jcvi/formats/cblast.pyx":202 - * b.pctid = pctid - * b.hitlen = hitlen - * b.nmismatch = nmismatch # <<<<<<<<<<<<<< - * b.ngaps = ngaps - * b.qstart = qstart - */ - __pyx_v_b->nmismatch = __pyx_v_nmismatch; - - /* "jcvi/formats/cblast.pyx":203 - * b.hitlen = hitlen - * b.nmismatch = nmismatch - * b.ngaps = ngaps # <<<<<<<<<<<<<< - * b.qstart = qstart - * b.qstop = qstop - */ - __pyx_v_b->ngaps = __pyx_v_ngaps; - - /* "jcvi/formats/cblast.pyx":204 - * b.nmismatch = nmismatch - * b.ngaps = ngaps - * b.qstart = qstart # <<<<<<<<<<<<<< - * b.qstop = qstop - * b.sstart = sstart - */ - __pyx_v_b->qstart = __pyx_v_qstart; - - /* "jcvi/formats/cblast.pyx":205 - * b.ngaps = ngaps - * b.qstart = qstart - * b.qstop = qstop # <<<<<<<<<<<<<< - * b.sstart = sstart - * b.sstop = sstop - */ - __pyx_v_b->qstop = __pyx_v_qstop; - - /* "jcvi/formats/cblast.pyx":206 - * b.qstart = qstart - * b.qstop = qstop - * b.sstart = sstart # <<<<<<<<<<<<<< - * b.sstop = sstop - * b.evalue = evalue - */ - __pyx_v_b->sstart = __pyx_v_sstart; - - /* "jcvi/formats/cblast.pyx":207 - * b.qstop = qstop - * b.sstart = sstart - * b.sstop = sstop # <<<<<<<<<<<<<< - * b.evalue = evalue - * b.score = score - */ - __pyx_v_b->sstop = __pyx_v_sstop; - - /* "jcvi/formats/cblast.pyx":208 - * b.sstart = sstart - * b.sstop = sstop - * b.evalue = evalue # <<<<<<<<<<<<<< - * b.score = score - * return b - */ - __pyx_v_b->evalue = __pyx_v_evalue; - - /* "jcvi/formats/cblast.pyx":209 - * b.sstop = sstop - * b.evalue = evalue - * b.score = score # <<<<<<<<<<<<<< - * return b - */ - __pyx_v_b->score = __pyx_v_score; - - /* "jcvi/formats/cblast.pyx":210 - * b.evalue = evalue - * b.score = score - * return b # <<<<<<<<<<<<<< - */ - __Pyx_XDECREF((PyObject *)__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_b); - __pyx_r = __pyx_v_b; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":192 - * - * - * cdef BlastLine create_blast_line(char *query, char *subject, float pctid, int hitlen, # <<<<<<<<<<<<<< - * int nmismatch, int ngaps, int qstart, int qstop, - * int sstart, int sstop, float evalue, float score): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.create_blast_line", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_b); - __Pyx_XGIVEREF((PyObject *)__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_Blast(PyTypeObject *t, PyObject *a, PyObject *k) { - struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o); - p->filename = Py_None; Py_INCREF(Py_None); - if (unlikely(__pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(o, a, k) < 0)) goto bad; - return o; - bad: - Py_DECREF(o); o = 0; - return NULL; -} - -static void __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast(PyObject *o) { - struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - { - PyObject *etype, *eval, *etb; - PyErr_Fetch(&etype, &eval, &etb); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1); - __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(o); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1); - PyErr_Restore(etype, eval, etb); - } - Py_CLEAR(p->filename); - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} - -static int __pyx_tp_traverse_4jcvi_7formats_6cblast_Blast(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; - if (p->filename) { - e = (*v)(p->filename, a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear_4jcvi_7formats_6cblast_Blast(PyObject *o) { - PyObject* tmp; - struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; - tmp = ((PyObject*)p->filename); - p->filename = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - return 0; -} - -static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *self, CYTHON_UNUSED PyObject *arg) { - PyObject *res = __pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(self); - if (!res && !PyErr_Occurred()) { PyErr_SetNone(PyExc_StopIteration); } - return res; -} -static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *self, CYTHON_UNUSED PyObject *arg) { - return __pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(self); -} - -static PyMethodDef __pyx_methods_4jcvi_7formats_6cblast_Blast[] = { - {"__next__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__, METH_NOARGS|METH_COEXIST, 0}, - {"__repr__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__, METH_NOARGS|METH_COEXIST, 0}, - {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_4jcvi_7formats_6cblast_Blast_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast}, - {Py_tp_repr, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast_Blast}, - {Py_tp_clear, (void *)__pyx_tp_clear_4jcvi_7formats_6cblast_Blast}, - {Py_tp_iter, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__}, - {Py_tp_iternext, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__}, - {Py_tp_methods, (void *)__pyx_methods_4jcvi_7formats_6cblast_Blast}, - {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast_Blast}, - {0, 0}, -}; -static PyType_Spec __pyx_type_4jcvi_7formats_6cblast_Blast_spec = { - "jcvi.formats.cblast.Blast", - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_Blast), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, - __pyx_type_4jcvi_7formats_6cblast_Blast_slots, -}; -#else - -static PyTypeObject __pyx_type_4jcvi_7formats_6cblast_Blast = { - PyVarObject_HEAD_INIT(0, 0) - "jcvi.formats.cblast.""Blast", /*tp_name*/ - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_Blast), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - __pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_4jcvi_7formats_6cblast_Blast, /*tp_traverse*/ - __pyx_tp_clear_4jcvi_7formats_6cblast_Blast, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - __pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__, /*tp_iter*/ - __pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__, /*tp_iternext*/ - __pyx_methods_4jcvi_7formats_6cblast_Blast, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_4jcvi_7formats_6cblast_Blast, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if PY_VERSION_HEX >= 0x030d00A4 - 0, /*tp_versions_used*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o); - p->qseqid = Py_None; Py_INCREF(Py_None); - p->sseqid = Py_None; Py_INCREF(Py_None); - return o; -} - -static void __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine(PyObject *o) { - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - Py_CLEAR(p->qseqid); - Py_CLEAR(p->sseqid); - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} - -static int __pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; - if (p->qseqid) { - e = (*v)(p->qseqid, a); if (e) return e; - } - if (p->sseqid) { - e = (*v)(p->sseqid, a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine(PyObject *o) { - PyObject* tmp; - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; - tmp = ((PyObject*)p->qseqid); - p->qseqid = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->sseqid); - p->sseqid = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - return 0; -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_query(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_query(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_subject(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_subject(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_has_score(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(o); -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_swapped(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(o); -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_bedline(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(o); -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__query(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__query(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__subject(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__subject(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_hitlen(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_hitlen(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_ngaps(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_ngaps(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstart(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstart(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstop(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstop(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstart(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstart(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstop(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstop(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_pctid(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_pctid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_score(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_score(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_evalue(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_evalue(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qseqid(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qseqid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(o, v); - } - else { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(o); - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sseqid(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sseqid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(o, v); - } - else { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(o); - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qi(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qi(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_si(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_si(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_orientation(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_orientation(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *self, CYTHON_UNUSED PyObject *arg) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(self); -} - -static PyMethodDef __pyx_methods_4jcvi_7formats_6cblast_BlastLine[] = { - {"__repr__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__, METH_NOARGS|METH_COEXIST, 0}, - {"__reduce__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {0, 0, 0, 0} -}; - -static struct PyGetSetDef __pyx_getsets_4jcvi_7formats_6cblast_BlastLine[] = { - {(char *)"query", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_query, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_query, (char *)0, 0}, - {(char *)"subject", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_subject, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_subject, (char *)0, 0}, - {(char *)"has_score", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_has_score, 0, (char *)0, 0}, - {(char *)"swapped", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_swapped, 0, (char *)PyDoc_STR("\n Swap query and subject.\n "), 0}, - {(char *)"bedline", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_bedline, 0, (char *)0, 0}, - {(char *)"_query", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__query, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__query, (char *)0, 0}, - {(char *)"_subject", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__subject, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__subject, (char *)0, 0}, - {(char *)"hitlen", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_hitlen, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_hitlen, (char *)0, 0}, - {(char *)"nmismatch", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch, (char *)0, 0}, - {(char *)"ngaps", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_ngaps, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_ngaps, (char *)0, 0}, - {(char *)"qstart", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstart, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstart, (char *)0, 0}, - {(char *)"qstop", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstop, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstop, (char *)0, 0}, - {(char *)"sstart", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstart, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstart, (char *)0, 0}, - {(char *)"sstop", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstop, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstop, (char *)0, 0}, - {(char *)"pctid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_pctid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_pctid, (char *)0, 0}, - {(char *)"score", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_score, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_score, (char *)0, 0}, - {(char *)"evalue", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_evalue, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_evalue, (char *)0, 0}, - {(char *)"qseqid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qseqid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qseqid, (char *)0, 0}, - {(char *)"sseqid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sseqid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sseqid, (char *)0, 0}, - {(char *)"qi", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qi, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qi, (char *)0, 0}, - {(char *)"si", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_si, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_si, (char *)0, 0}, - {(char *)"orientation", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_orientation, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_orientation, (char *)0, 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_4jcvi_7formats_6cblast_BlastLine_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine}, - {Py_tp_repr, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__}, - {Py_tp_hash, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__}, - {Py_tp_str, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__}, - {Py_tp_doc, (void *)PyDoc_STR("\n Given a string of tab-delimited (-m 8) blast output, parse it and create\n an object with the usual attrs:\n\n >>> b = BlastLine(\"Os09g11510\tOs08g13650\t92.31\t39\t3\t0\t2273\t2311\t3237\t3199\t0.001\t54.0\")\n >>> b.query\n 'Os09g11510'\n >>> attrs = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', ... 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score')\n >>> [getattr(b, attr) for attr in attrs] # doctest: +ELLIPSIS\n ['Os09g11510', 'Os08g13650', 92.3..., 39, 3, 0, 2273, 2311, 3237, 3199, 0.001..., 54.0]\n ")}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine}, - {Py_tp_clear, (void *)__pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine}, - {Py_tp_richcompare, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__}, - {Py_tp_methods, (void *)__pyx_methods_4jcvi_7formats_6cblast_BlastLine}, - {Py_tp_getset, (void *)__pyx_getsets_4jcvi_7formats_6cblast_BlastLine}, - {Py_tp_init, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__}, - {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine}, - {0, 0}, -}; -static PyType_Spec __pyx_type_4jcvi_7formats_6cblast_BlastLine_spec = { - "jcvi.formats.cblast.BlastLine", - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, - __pyx_type_4jcvi_7formats_6cblast_BlastLine_slots, -}; -#else - -static PyTypeObject __pyx_type_4jcvi_7formats_6cblast_BlastLine = { - PyVarObject_HEAD_INIT(0, 0) - "jcvi.formats.cblast.""BlastLine", /*tp_name*/ - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__, /*tp_hash*/ - 0, /*tp_call*/ - __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - PyDoc_STR("\n Given a string of tab-delimited (-m 8) blast output, parse it and create\n an object with the usual attrs:\n\n >>> b = BlastLine(\"Os09g11510\tOs08g13650\t92.31\t39\t3\t0\t2273\t2311\t3237\t3199\t0.001\t54.0\")\n >>> b.query\n 'Os09g11510'\n >>> attrs = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', ... 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score')\n >>> [getattr(b, attr) for attr in attrs] # doctest: +ELLIPSIS\n ['Os09g11510', 'Os08g13650', 92.3..., 39, 3, 0, 2273, 2311, 3237, 3199, 0.001..., 54.0]\n "), /*tp_doc*/ - __pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine, /*tp_traverse*/ - __pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine, /*tp_clear*/ - __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_4jcvi_7formats_6cblast_BlastLine, /*tp_methods*/ - 0, /*tp_members*/ - __pyx_getsets_4jcvi_7formats_6cblast_BlastLine, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_4jcvi_7formats_6cblast_BlastLine, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if PY_VERSION_HEX >= 0x030d00A4 - 0, /*tp_versions_used*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -#if CYTHON_USE_FREELISTS -static struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[8]; -static int __pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = 0; -#endif - -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - #if CYTHON_USE_FREELISTS - if (likely((int)(__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)))) { - o = (PyObject*)__pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[--__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr]; - memset(o, 0, sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)); - (void) PyObject_INIT(o, t); - PyObject_GC_Track(o); - } else - #endif - { - o = (*t->tp_alloc)(t, 0); - if (unlikely(!o)) return 0; - } - #endif - return o; -} - -static void __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyObject *o) { - struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *p = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - Py_CLEAR(p->__pyx_genexpr_arg_0); - Py_CLEAR(p->__pyx_v_x); - Py_CLEAR(p->__pyx_t_0); - #if CYTHON_USE_FREELISTS - if (((int)(__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)))) { - __pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr++] = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o); - } else - #endif - { - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif - } -} - -static int __pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *p = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o; - if (p->__pyx_genexpr_arg_0) { - e = (*v)(p->__pyx_genexpr_arg_0, a); if (e) return e; - } - if (p->__pyx_v_x) { - e = (*v)(p->__pyx_v_x, a); if (e) return e; - } - if (p->__pyx_t_0) { - e = (*v)(p->__pyx_t_0, a); if (e) return e; - } - return 0; -} -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, - {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, - {0, 0}, -}; -static PyType_Spec __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec = { - "jcvi.formats.cblast.__pyx_scope_struct__genexpr", - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, - __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_slots, -}; -#else - -static PyTypeObject __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = { - PyVarObject_HEAD_INIT(0, 0) - "jcvi.formats.cblast.""__pyx_scope_struct__genexpr", /*tp_name*/ - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if PY_VERSION_HEX >= 0x030d00A4 - 0, /*tp_versions_used*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -#if CYTHON_USE_FREELISTS -static struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[8]; -static int __pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = 0; -#endif - -static PyObject *__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - #if CYTHON_USE_FREELISTS - if (likely((int)(__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)))) { - o = (PyObject*)__pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[--__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc]; - memset(o, 0, sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)); - (void) PyObject_INIT(o, t); - } else - #endif - { - o = (*t->tp_alloc)(t, 0); - if (unlikely(!o)) return 0; - } - #endif - return o; -} - -static void __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyObject *o) { - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && (!PyType_IS_GC(Py_TYPE(o)) || !__Pyx_PyObject_GC_IsFinalized(o))) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - #if CYTHON_USE_FREELISTS - if (((int)(__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)))) { - __pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc++] = ((struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)o); - } else - #endif - { - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif - } -} -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc}, - {Py_tp_new, (void *)__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc}, - {0, 0}, -}; -static PyType_Spec __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec = { - "jcvi.formats.cblast.__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", - sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_FINALIZE, - __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_slots, -}; -#else - -static PyTypeObject __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = { - PyVarObject_HEAD_INIT(0, 0) - "jcvi.formats.cblast.""__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", /*tp_name*/ - sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ - 0, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if PY_VERSION_HEX >= 0x030d00A4 - 0, /*tp_versions_used*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static PyMethodDef __pyx_methods[] = { - {0, 0, 0, 0} -}; -#ifndef CYTHON_SMALL_CODE -#if defined(__clang__) - #define CYTHON_SMALL_CODE -#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) - #define CYTHON_SMALL_CODE __attribute__((cold)) -#else - #define CYTHON_SMALL_CODE -#endif -#endif -/* #### Code section: pystring_table ### */ - -static int __Pyx_CreateStringTabAndInitStrings(void) { - __Pyx_StringTabEntry __pyx_string_tab[] = { - {&__pyx_n_s_Blast, __pyx_k_Blast, sizeof(__pyx_k_Blast), 0, 0, 1, 1}, - {&__pyx_n_s_BlastLine, __pyx_k_BlastLine, sizeof(__pyx_k_BlastLine), 0, 0, 1, 1}, - {&__pyx_n_s_BlastLine___get___locals_genexpr, __pyx_k_BlastLine___get___locals_genexpr, sizeof(__pyx_k_BlastLine___get___locals_genexpr), 0, 0, 1, 1}, - {&__pyx_n_s_BlastLine___reduce, __pyx_k_BlastLine___reduce, sizeof(__pyx_k_BlastLine___reduce), 0, 0, 1, 1}, - {&__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1, __pyx_k_BlastLine_s_to_s_eval_3f_score_1, sizeof(__pyx_k_BlastLine_s_to_s_eval_3f_score_1), 0, 0, 1, 0}, - {&__pyx_n_s_Blast___reduce_cython, __pyx_k_Blast___reduce_cython, sizeof(__pyx_k_Blast___reduce_cython), 0, 0, 1, 1}, - {&__pyx_n_s_Blast___setstate_cython, __pyx_k_Blast___setstate_cython, sizeof(__pyx_k_Blast___setstate_cython), 0, 0, 1, 1}, - {&__pyx_kp_s_Blast_s, __pyx_k_Blast_s, sizeof(__pyx_k_Blast_s), 0, 0, 1, 0}, - {&__pyx_n_s_IndexError, __pyx_k_IndexError, sizeof(__pyx_k_IndexError), 0, 0, 1, 1}, - {&__pyx_n_s_OverflowError, __pyx_k_OverflowError, sizeof(__pyx_k_OverflowError), 0, 0, 1, 1}, - {&__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma, __pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma, sizeof(__pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma), 0, 0, 1, 1}, - {&__pyx_n_s_StopIteration, __pyx_k_StopIteration, sizeof(__pyx_k_StopIteration), 0, 0, 1, 1}, - {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1}, - {&__pyx_kp_s_UTF_8, __pyx_k_UTF_8, sizeof(__pyx_k_UTF_8), 0, 0, 1, 0}, - {&__pyx_n_s__13, __pyx_k__13, sizeof(__pyx_k__13), 0, 0, 1, 1}, - {&__pyx_kp_s__5, __pyx_k__5, sizeof(__pyx_k__5), 0, 0, 1, 0}, - {&__pyx_n_s__6, __pyx_k__6, sizeof(__pyx_k__6), 0, 0, 1, 1}, - {&__pyx_n_s_args, __pyx_k_args, sizeof(__pyx_k_args), 0, 0, 1, 1}, - {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1}, - {&__pyx_kp_s_cblast_pyx, __pyx_k_cblast_pyx, sizeof(__pyx_k_cblast_pyx), 0, 0, 1, 0}, - {&__pyx_n_s_cfunc_to_py, __pyx_k_cfunc_to_py, sizeof(__pyx_k_cfunc_to_py), 0, 0, 1, 1}, - {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, - {&__pyx_n_s_close, __pyx_k_close, sizeof(__pyx_k_close), 0, 0, 1, 1}, - {&__pyx_kp_u_disable, __pyx_k_disable, sizeof(__pyx_k_disable), 0, 1, 0, 0}, - {&__pyx_kp_u_enable, __pyx_k_enable, sizeof(__pyx_k_enable), 0, 1, 0, 0}, - {&__pyx_n_s_encode, __pyx_k_encode, sizeof(__pyx_k_encode), 0, 0, 1, 1}, - {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, - {&__pyx_n_s_evalue, __pyx_k_evalue, sizeof(__pyx_k_evalue), 0, 0, 1, 1}, - {&__pyx_n_s_filename, __pyx_k_filename, sizeof(__pyx_k_filename), 0, 0, 1, 1}, - {&__pyx_kp_u_gc, __pyx_k_gc, sizeof(__pyx_k_gc), 0, 1, 0, 0}, - {&__pyx_n_s_genexpr, __pyx_k_genexpr, sizeof(__pyx_k_genexpr), 0, 0, 1, 1}, - {&__pyx_n_s_getstate, __pyx_k_getstate, sizeof(__pyx_k_getstate), 0, 0, 1, 1}, - {&__pyx_n_s_hitlen, __pyx_k_hitlen, sizeof(__pyx_k_hitlen), 0, 0, 1, 1}, - {&__pyx_n_s_id, __pyx_k_id, sizeof(__pyx_k_id), 0, 0, 1, 1}, - {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, - {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1}, - {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1}, - {&__pyx_kp_u_isenabled, __pyx_k_isenabled, sizeof(__pyx_k_isenabled), 0, 1, 0, 0}, - {&__pyx_n_s_jcvi_formats_cblast, __pyx_k_jcvi_formats_cblast, sizeof(__pyx_k_jcvi_formats_cblast), 0, 0, 1, 1}, - {&__pyx_n_s_join, __pyx_k_join, sizeof(__pyx_k_join), 0, 0, 1, 1}, - {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, - {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, - {&__pyx_n_s_ngaps, __pyx_k_ngaps, sizeof(__pyx_k_ngaps), 0, 0, 1, 1}, - {&__pyx_n_s_nmismatch, __pyx_k_nmismatch, sizeof(__pyx_k_nmismatch), 0, 0, 1, 1}, - {&__pyx_kp_s_no_default___reduce___due_to_non, __pyx_k_no_default___reduce___due_to_non, sizeof(__pyx_k_no_default___reduce___due_to_non), 0, 0, 1, 0}, - {&__pyx_n_s_orientation, __pyx_k_orientation, sizeof(__pyx_k_orientation), 0, 0, 1, 1}, - {&__pyx_n_s_pctid, __pyx_k_pctid, sizeof(__pyx_k_pctid), 0, 0, 1, 1}, - {&__pyx_n_s_pyx_state, __pyx_k_pyx_state, sizeof(__pyx_k_pyx_state), 0, 0, 1, 1}, - {&__pyx_n_s_qi, __pyx_k_qi, sizeof(__pyx_k_qi), 0, 0, 1, 1}, - {&__pyx_n_s_qseqid, __pyx_k_qseqid, sizeof(__pyx_k_qseqid), 0, 0, 1, 1}, - {&__pyx_n_s_qstart, __pyx_k_qstart, sizeof(__pyx_k_qstart), 0, 0, 1, 1}, - {&__pyx_n_s_qstop, __pyx_k_qstop, sizeof(__pyx_k_qstop), 0, 0, 1, 1}, - {&__pyx_n_s_query, __pyx_k_query, sizeof(__pyx_k_query), 0, 0, 1, 1}, - {&__pyx_n_s_reduce, __pyx_k_reduce, sizeof(__pyx_k_reduce), 0, 0, 1, 1}, - {&__pyx_n_s_reduce_cython, __pyx_k_reduce_cython, sizeof(__pyx_k_reduce_cython), 0, 0, 1, 1}, - {&__pyx_n_s_reduce_ex, __pyx_k_reduce_ex, sizeof(__pyx_k_reduce_ex), 0, 0, 1, 1}, - {&__pyx_n_s_richcmp, __pyx_k_richcmp, sizeof(__pyx_k_richcmp), 0, 0, 1, 1}, - {&__pyx_n_s_s, __pyx_k_s, sizeof(__pyx_k_s), 0, 0, 1, 1}, - {&__pyx_n_s_score, __pyx_k_score, sizeof(__pyx_k_score), 0, 0, 1, 1}, - {&__pyx_n_s_self, __pyx_k_self, sizeof(__pyx_k_self), 0, 0, 1, 1}, - {&__pyx_n_s_send, __pyx_k_send, sizeof(__pyx_k_send), 0, 0, 1, 1}, - {&__pyx_n_s_setstate, __pyx_k_setstate, sizeof(__pyx_k_setstate), 0, 0, 1, 1}, - {&__pyx_n_s_setstate_cython, __pyx_k_setstate_cython, sizeof(__pyx_k_setstate_cython), 0, 0, 1, 1}, - {&__pyx_n_s_si, __pyx_k_si, sizeof(__pyx_k_si), 0, 0, 1, 1}, - {&__pyx_n_s_slots, __pyx_k_slots, sizeof(__pyx_k_slots), 0, 0, 1, 1}, - {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1}, - {&__pyx_n_s_sseqid, __pyx_k_sseqid, sizeof(__pyx_k_sseqid), 0, 0, 1, 1}, - {&__pyx_n_s_sstart, __pyx_k_sstart, sizeof(__pyx_k_sstart), 0, 0, 1, 1}, - {&__pyx_n_s_sstop, __pyx_k_sstop, sizeof(__pyx_k_sstop), 0, 0, 1, 1}, - {&__pyx_kp_s_stringsource, __pyx_k_stringsource, sizeof(__pyx_k_stringsource), 0, 0, 1, 0}, - {&__pyx_n_s_subject, __pyx_k_subject, sizeof(__pyx_k_subject), 0, 0, 1, 1}, - {&__pyx_n_s_sys, __pyx_k_sys, sizeof(__pyx_k_sys), 0, 0, 1, 1}, - {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, - {&__pyx_kp_s_that_comparison_not_implemented, __pyx_k_that_comparison_not_implemented, sizeof(__pyx_k_that_comparison_not_implemented), 0, 0, 1, 0}, - {&__pyx_n_s_throw, __pyx_k_throw, sizeof(__pyx_k_throw), 0, 0, 1, 1}, - {&__pyx_n_s_wrap, __pyx_k_wrap, sizeof(__pyx_k_wrap), 0, 0, 1, 1}, - {0, 0, 0, 0, 0, 0, 0} - }; - return __Pyx_InitStrings(__pyx_string_tab); -} -/* #### Code section: cached_builtins ### */ -static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_StopIteration = __Pyx_GetBuiltinName(__pyx_n_s_StopIteration); if (!__pyx_builtin_StopIteration) __PYX_ERR(0, 47, __pyx_L1_error) - __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) __PYX_ERR(1, 2, __pyx_L1_error) - __pyx_builtin_id = __Pyx_GetBuiltinName(__pyx_n_s_id); if (!__pyx_builtin_id) __PYX_ERR(0, 138, __pyx_L1_error) - __pyx_builtin_OverflowError = __Pyx_GetBuiltinName(__pyx_n_s_OverflowError); if (!__pyx_builtin_OverflowError) __PYX_ERR(1, 83, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(1, 86, __pyx_L1_error) - __pyx_builtin_IndexError = __Pyx_GetBuiltinName(__pyx_n_s_IndexError); if (!__pyx_builtin_IndexError) __PYX_ERR(1, 96, __pyx_L1_error) - return 0; - __pyx_L1_error:; - return -1; -} -/* #### Code section: cached_constants ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - - /* "cfunc.to_py":67 - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) - */ - __pyx_tuple_ = PyTuple_Pack(12, __pyx_n_s_query, __pyx_n_s_subject, __pyx_n_s_pctid, __pyx_n_s_hitlen, __pyx_n_s_nmismatch, __pyx_n_s_ngaps, __pyx_n_s_qstart, __pyx_n_s_qstop, __pyx_n_s_sstart, __pyx_n_s_sstop, __pyx_n_s_evalue, __pyx_n_s_score); if (unlikely(!__pyx_tuple_)) __PYX_ERR(1, 67, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple_); - __Pyx_GIVEREF(__pyx_tuple_); - __pyx_codeobj__2 = (PyObject*)__Pyx_PyCode_New(12, 0, 0, 12, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple_, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_wrap, 67, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__2)) __PYX_ERR(1, 67, __pyx_L1_error) - - /* "jcvi/formats/cblast.pyx":135 - * return not self.__richcmp__(other, 2) - * else: - * raise Exception("that comparison not implemented") # <<<<<<<<<<<<<< - * - * def __hash__(self): - */ - __pyx_tuple__3 = PyTuple_Pack(1, __pyx_kp_s_that_comparison_not_implemented); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 135, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__3); - __Pyx_GIVEREF(__pyx_tuple__3); - - /* "jcvi/formats/cblast.pyx":145 - * - * def __str__(self): - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - */ - __pyx_slice__4 = PySlice_New(Py_None, __pyx_int_12, Py_None); if (unlikely(!__pyx_slice__4)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_slice__4); - __Pyx_GIVEREF(__pyx_slice__4); - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_tuple__7 = PyTuple_Pack(1, __pyx_n_s_self); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__7); - __Pyx_GIVEREF(__pyx_tuple__7); - __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_reduce_cython, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(1, 1, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __pyx_tuple__9 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_pyx_state); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__9); - __Pyx_GIVEREF(__pyx_tuple__9); - __pyx_codeobj__10 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__9, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_setstate_cython, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__10)) __PYX_ERR(1, 3, __pyx_L1_error) - - /* "jcvi/formats/cblast.pyx":80 - * """ - * - * __slots__ = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ # <<<<<<<<<<<<<< - * 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score', \ - * 'qseqid', 'sseqid', 'qi', 'si', 'orientation') - */ - __pyx_tuple__11 = PyTuple_Pack(17, __pyx_n_s_query, __pyx_n_s_subject, __pyx_n_s_pctid, __pyx_n_s_hitlen, __pyx_n_s_nmismatch, __pyx_n_s_ngaps, __pyx_n_s_qstart, __pyx_n_s_qstop, __pyx_n_s_sstart, __pyx_n_s_sstop, __pyx_n_s_evalue, __pyx_n_s_score, __pyx_n_s_qseqid, __pyx_n_s_sseqid, __pyx_n_s_qi, __pyx_n_s_si, __pyx_n_s_orientation); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(0, 80, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__11); - __Pyx_GIVEREF(__pyx_tuple__11); - - /* "jcvi/formats/cblast.pyx":185 - * return py_str(result) - * - * def __reduce__(self): # <<<<<<<<<<<<<< - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - */ - __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_cblast_pyx, __pyx_n_s_reduce, 185, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) __PYX_ERR(0, 185, __pyx_L1_error) - __Pyx_RefNannyFinishContext(); - return 0; - __pyx_L1_error:; - __Pyx_RefNannyFinishContext(); - return -1; -} -/* #### Code section: init_constants ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) { - __pyx_umethod_PyString_Type_encode.type = (PyObject*)&PyString_Type; - __pyx_umethod_PyString_Type_encode.method_name = &__pyx_n_s_encode; - if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error); - __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_int_12 = PyInt_FromLong(12); if (unlikely(!__pyx_int_12)) __PYX_ERR(0, 1, __pyx_L1_error) - return 0; - __pyx_L1_error:; - return -1; -} -/* #### Code section: init_globals ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) { - return 0; -} -/* #### Code section: init_module ### */ - -static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/ - -static int __Pyx_modinit_global_init_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0); - /*--- Global init code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_variable_export_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0); - /*--- Variable export code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_function_export_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0); - /*--- Function export code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_type_init_code(void) { - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); - /*--- Type init code ---*/ - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_4jcvi_7formats_6cblast_Blast = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast_Blast_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast_Blast)) __PYX_ERR(0, 21, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast_Blast_spec, __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) - #else - __pyx_ptype_4jcvi_7formats_6cblast_Blast = &__pyx_type_4jcvi_7formats_6cblast_Blast; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_Blast, (PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_setup_reduce((PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_4jcvi_7formats_6cblast_BlastLine = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast_BlastLine_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast_BlastLine)) __PYX_ERR(0, 66, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast_BlastLine_spec, __pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) - #else - __pyx_ptype_4jcvi_7formats_6cblast_BlastLine = &__pyx_type_4jcvi_7formats_6cblast_BlastLine; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_BlastLine, (PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)) __PYX_ERR(0, 172, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec, __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) < 0) __PYX_ERR(0, 172, __pyx_L1_error) - #else - __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = &__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) < 0) __PYX_ERR(0, 172, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - } - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec, NULL); if (unlikely(!__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)) __PYX_ERR(1, 66, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec, __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) < 0) __PYX_ERR(1, 66, __pyx_L1_error) - #else - __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = &__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) < 0) __PYX_ERR(1, 66, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_dictoffset && __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - } - #endif - __Pyx_RefNannyFinishContext(); - return 0; - __pyx_L1_error:; - __Pyx_RefNannyFinishContext(); - return -1; -} - -static int __Pyx_modinit_type_import_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0); - /*--- Type import code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_variable_import_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0); - /*--- Variable import code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_function_import_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0); - /*--- Function import code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - - -#if PY_MAJOR_VERSION >= 3 -#if CYTHON_PEP489_MULTI_PHASE_INIT -static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ -static int __pyx_pymod_exec_cblast(PyObject* module); /*proto*/ -static PyModuleDef_Slot __pyx_moduledef_slots[] = { - {Py_mod_create, (void*)__pyx_pymod_create}, - {Py_mod_exec, (void*)__pyx_pymod_exec_cblast}, - {0, NULL} -}; -#endif - -#ifdef __cplusplus -namespace { - struct PyModuleDef __pyx_moduledef = - #else - static struct PyModuleDef __pyx_moduledef = - #endif - { - PyModuleDef_HEAD_INIT, - "cblast", - __pyx_k_Cythonized_fast_version_of_Blas, /* m_doc */ - #if CYTHON_PEP489_MULTI_PHASE_INIT - 0, /* m_size */ - #elif CYTHON_USE_MODULE_STATE - sizeof(__pyx_mstate), /* m_size */ - #else - -1, /* m_size */ - #endif - __pyx_methods /* m_methods */, - #if CYTHON_PEP489_MULTI_PHASE_INIT - __pyx_moduledef_slots, /* m_slots */ - #else - NULL, /* m_reload */ - #endif - #if CYTHON_USE_MODULE_STATE - __pyx_m_traverse, /* m_traverse */ - __pyx_m_clear, /* m_clear */ - NULL /* m_free */ - #else - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL /* m_free */ - #endif - }; - #ifdef __cplusplus -} /* anonymous namespace */ -#endif -#endif - -#ifndef CYTHON_NO_PYINIT_EXPORT -#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC -#elif PY_MAJOR_VERSION < 3 -#ifdef __cplusplus -#define __Pyx_PyMODINIT_FUNC extern "C" void -#else -#define __Pyx_PyMODINIT_FUNC void -#endif -#else -#ifdef __cplusplus -#define __Pyx_PyMODINIT_FUNC extern "C" PyObject * -#else -#define __Pyx_PyMODINIT_FUNC PyObject * -#endif -#endif - - -#if PY_MAJOR_VERSION < 3 -__Pyx_PyMODINIT_FUNC initcblast(void) CYTHON_SMALL_CODE; /*proto*/ -__Pyx_PyMODINIT_FUNC initcblast(void) -#else -__Pyx_PyMODINIT_FUNC PyInit_cblast(void) CYTHON_SMALL_CODE; /*proto*/ -__Pyx_PyMODINIT_FUNC PyInit_cblast(void) -#if CYTHON_PEP489_MULTI_PHASE_INIT -{ - return PyModuleDef_Init(&__pyx_moduledef); -} -static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) { - #if PY_VERSION_HEX >= 0x030700A1 - static PY_INT64_T main_interpreter_id = -1; - PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp); - if (main_interpreter_id == -1) { - main_interpreter_id = current_id; - return (unlikely(current_id == -1)) ? -1 : 0; - } else if (unlikely(main_interpreter_id != current_id)) - #else - static PyInterpreterState *main_interpreter = NULL; - PyInterpreterState *current_interpreter = PyThreadState_Get()->interp; - if (!main_interpreter) { - main_interpreter = current_interpreter; - } else if (unlikely(main_interpreter != current_interpreter)) - #endif - { - PyErr_SetString( - PyExc_ImportError, - "Interpreter change detected - this module can only be loaded into one interpreter per process."); - return -1; - } - return 0; -} -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none) -#else -static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) -#endif -{ - PyObject *value = PyObject_GetAttrString(spec, from_name); - int result = 0; - if (likely(value)) { - if (allow_none || value != Py_None) { -#if CYTHON_COMPILING_IN_LIMITED_API - result = PyModule_AddObject(module, to_name, value); -#else - result = PyDict_SetItemString(moddict, to_name, value); -#endif - } - Py_DECREF(value); - } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Clear(); - } else { - result = -1; - } - return result; -} -static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) { - PyObject *module = NULL, *moddict, *modname; - CYTHON_UNUSED_VAR(def); - if (__Pyx_check_single_interpreter()) - return NULL; - if (__pyx_m) - return __Pyx_NewRef(__pyx_m); - modname = PyObject_GetAttrString(spec, "name"); - if (unlikely(!modname)) goto bad; - module = PyModule_NewObject(modname); - Py_DECREF(modname); - if (unlikely(!module)) goto bad; -#if CYTHON_COMPILING_IN_LIMITED_API - moddict = module; -#else - moddict = PyModule_GetDict(module); - if (unlikely(!moddict)) goto bad; -#endif - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad; - return module; -bad: - Py_XDECREF(module); - return NULL; -} - - -static CYTHON_SMALL_CODE int __pyx_pymod_exec_cblast(PyObject *__pyx_pyinit_module) -#endif -#endif -{ - int stringtab_initialized = 0; - #if CYTHON_USE_MODULE_STATE - int pystate_addmodule_run = 0; - #endif - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - #if CYTHON_PEP489_MULTI_PHASE_INIT - if (__pyx_m) { - if (__pyx_m == __pyx_pyinit_module) return 0; - PyErr_SetString(PyExc_RuntimeError, "Module 'cblast' has already been imported. Re-initialisation is not supported."); - return -1; - } - #elif PY_MAJOR_VERSION >= 3 - if (__pyx_m) return __Pyx_NewRef(__pyx_m); - #endif - /*--- Module creation code ---*/ - #if CYTHON_PEP489_MULTI_PHASE_INIT - __pyx_m = __pyx_pyinit_module; - Py_INCREF(__pyx_m); - #else - #if PY_MAJOR_VERSION < 3 - __pyx_m = Py_InitModule4("cblast", __pyx_methods, __pyx_k_Cythonized_fast_version_of_Blas, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); - if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) - #elif CYTHON_USE_MODULE_STATE - __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) - { - int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef); - __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "cblast" pseudovariable */ - if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - pystate_addmodule_run = 1; - } - #else - __pyx_m = PyModule_Create(&__pyx_moduledef); - if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #endif - CYTHON_UNUSED_VAR(__pyx_t_1); - __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) - Py_INCREF(__pyx_d); - __pyx_b = __Pyx_PyImport_AddModuleRef(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_cython_runtime = __Pyx_PyImport_AddModuleRef((const char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) - if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #if CYTHON_REFNANNY -__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); -if (!__Pyx_RefNanny) { - PyErr_Clear(); - __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); - if (!__Pyx_RefNanny) - Py_FatalError("failed to import 'refnanny' module"); -} -#endif - __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_cblast(void)", 0); - if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #ifdef __Pxy_PyFrame_Initialize_Offsets - __Pxy_PyFrame_Initialize_Offsets(); - #endif - __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) - #ifdef __Pyx_CyFunction_USED - if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_FusedFunction_USED - if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_Coroutine_USED - if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_Generator_USED - if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_AsyncGen_USED - if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_StopAsyncIteration_USED - if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - /*--- Library function declarations ---*/ - /*--- Threads initialization code ---*/ - #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS - PyEval_InitThreads(); - #endif - /*--- Initialize various global constants etc. ---*/ - if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - stringtab_initialized = 1; - if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) - if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - if (__pyx_module_is_main_jcvi__formats__cblast) { - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - } - #if PY_MAJOR_VERSION >= 3 - { - PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) - if (!PyDict_GetItemString(modules, "jcvi.formats.cblast")) { - if (unlikely((PyDict_SetItemString(modules, "jcvi.formats.cblast", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - } - } - #endif - /*--- Builtin init code ---*/ - if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - /*--- Constants init code ---*/ - if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - /*--- Global type/function init code ---*/ - (void)__Pyx_modinit_global_init_code(); - (void)__Pyx_modinit_variable_export_code(); - (void)__Pyx_modinit_function_export_code(); - if (unlikely((__Pyx_modinit_type_init_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - (void)__Pyx_modinit_type_import_code(); - (void)__Pyx_modinit_variable_import_code(); - (void)__Pyx_modinit_function_import_code(); - /*--- Execution code ---*/ - #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - - /* "jcvi/formats/cblast.pyx":9 - * - * """ - * import sys # <<<<<<<<<<<<<< - * from libc.stdio cimport FILE, EOF, fopen, fscanf, rewind, fclose, sscanf, \ - * fgets, sprintf - */ - __pyx_t_2 = __Pyx_ImportDottedModuleRelFirst(__pyx_n_s_sys, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_sys, __pyx_t_2) < 0) __PYX_ERR(0, 9, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/formats/cblast.pyx":15 - * - * - * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" # <<<<<<<<<<<<<< - * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" - * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" - */ - __pyx_v_4jcvi_7formats_6cblast_blast_format = ((char const *)"%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f"); - - /* "jcvi/formats/cblast.pyx":16 - * - * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" - * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" # <<<<<<<<<<<<<< - * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" - * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" - */ - __pyx_v_4jcvi_7formats_6cblast_blast_format_line = ((char const *)"%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n"); - - /* "jcvi/formats/cblast.pyx":17 - * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" - * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" - * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" # <<<<<<<<<<<<<< - * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" - * - */ - __pyx_v_4jcvi_7formats_6cblast_blast_output = ((char const *)"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g"); - - /* "jcvi/formats/cblast.pyx":18 - * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" - * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" - * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" # <<<<<<<<<<<<<< - * - * - */ - __pyx_v_4jcvi_7formats_6cblast_bed_output = ((char const *)"%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c"); - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Blast___reduce_cython, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__8)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_reduce_cython, __pyx_t_2) < 0) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Blast___setstate_cython, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__10)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_setstate_cython, __pyx_t_2) < 0) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/formats/cblast.pyx":80 - * """ - * - * __slots__ = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ # <<<<<<<<<<<<<< - * 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score', \ - * 'qseqid', 'sseqid', 'qi', 'si', 'orientation') - */ - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine, __pyx_n_s_slots, __pyx_tuple__11) < 0) __PYX_ERR(0, 80, __pyx_L1_error) - PyType_Modified(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); - - /* "jcvi/formats/cblast.pyx":185 - * return py_str(result) - * - * def __reduce__(self): # <<<<<<<<<<<<<< - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - */ - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_BlastLine___reduce, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__12)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 185, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine, __pyx_n_s_reduce, __pyx_t_2) < 0) __PYX_ERR(0, 185, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - PyType_Modified(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); - - /* "jcvi/formats/cblast.pyx":1 - * # cython: language_level=2, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True # <<<<<<<<<<<<<< - * - * """ - */ - __pyx_t_2 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /*--- Wrapped vars code ---*/ - - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - if (__pyx_m) { - if (__pyx_d && stringtab_initialized) { - __Pyx_AddTraceback("init jcvi.formats.cblast", __pyx_clineno, __pyx_lineno, __pyx_filename); - } - #if !CYTHON_USE_MODULE_STATE - Py_CLEAR(__pyx_m); - #else - Py_DECREF(__pyx_m); - if (pystate_addmodule_run) { - PyObject *tp, *value, *tb; - PyErr_Fetch(&tp, &value, &tb); - PyState_RemoveModule(&__pyx_moduledef); - PyErr_Restore(tp, value, tb); - } - #endif - } else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ImportError, "init jcvi.formats.cblast"); - } - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - #if CYTHON_PEP489_MULTI_PHASE_INIT - return (__pyx_m != NULL) ? 0 : -1; - #elif PY_MAJOR_VERSION >= 3 - return __pyx_m; - #else - return; - #endif -} -/* #### Code section: cleanup_globals ### */ -/* #### Code section: cleanup_module ### */ -/* #### Code section: main_method ### */ -/* #### Code section: utility_code_pragmas ### */ -#ifdef _MSC_VER -#pragma warning( push ) -/* Warning 4127: conditional expression is constant - * Cython uses constant conditional expressions to allow in inline functions to be optimized at - * compile-time, so this warning is not useful - */ -#pragma warning( disable : 4127 ) -#endif - - - -/* #### Code section: utility_code_def ### */ - -/* --- Runtime support code --- */ -/* Refnanny */ -#if CYTHON_REFNANNY -static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { - PyObject *m = NULL, *p = NULL; - void *r = NULL; - m = PyImport_ImportModule(modname); - if (!m) goto end; - p = PyObject_GetAttrString(m, "RefNannyAPI"); - if (!p) goto end; - r = PyLong_AsVoidPtr(p); -end: - Py_XDECREF(p); - Py_XDECREF(m); - return (__Pyx_RefNannyAPIStruct *)r; -} -#endif - -/* PyErrExceptionMatches */ -#if CYTHON_FAST_THREAD_STATE -static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(tuple); -#if PY_MAJOR_VERSION >= 3 - for (i=0; i= 0x030C00A6 - PyObject *current_exception = tstate->current_exception; - if (unlikely(!current_exception)) return 0; - exc_type = (PyObject*) Py_TYPE(current_exception); - if (exc_type == err) return 1; -#else - exc_type = tstate->curexc_type; - if (exc_type == err) return 1; - if (unlikely(!exc_type)) return 0; -#endif - #if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(exc_type); - #endif - if (unlikely(PyTuple_Check(err))) { - result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); - } else { - result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err); - } - #if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(exc_type); - #endif - return result; -} -#endif - -/* PyErrFetchRestore */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { -#if PY_VERSION_HEX >= 0x030C00A6 - PyObject *tmp_value; - assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value))); - if (value) { - #if CYTHON_COMPILING_IN_CPYTHON - if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb)) - #endif - PyException_SetTraceback(value, tb); - } - tmp_value = tstate->current_exception; - tstate->current_exception = value; - Py_XDECREF(tmp_value); - Py_XDECREF(type); - Py_XDECREF(tb); -#else - PyObject *tmp_type, *tmp_value, *tmp_tb; - tmp_type = tstate->curexc_type; - tmp_value = tstate->curexc_value; - tmp_tb = tstate->curexc_traceback; - tstate->curexc_type = type; - tstate->curexc_value = value; - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); -#endif -} -static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { -#if PY_VERSION_HEX >= 0x030C00A6 - PyObject* exc_value; - exc_value = tstate->current_exception; - tstate->current_exception = 0; - *value = exc_value; - *type = NULL; - *tb = NULL; - if (exc_value) { - *type = (PyObject*) Py_TYPE(exc_value); - Py_INCREF(*type); - #if CYTHON_COMPILING_IN_CPYTHON - *tb = ((PyBaseExceptionObject*) exc_value)->traceback; - Py_XINCREF(*tb); - #else - *tb = PyException_GetTraceback(exc_value); - #endif - } -#else - *type = tstate->curexc_type; - *value = tstate->curexc_value; - *tb = tstate->curexc_traceback; - tstate->curexc_type = 0; - tstate->curexc_value = 0; - tstate->curexc_traceback = 0; -#endif -} -#endif - -/* PyObjectGetAttrStr */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { - PyTypeObject* tp = Py_TYPE(obj); - if (likely(tp->tp_getattro)) - return tp->tp_getattro(obj, attr_name); -#if PY_MAJOR_VERSION < 3 - if (likely(tp->tp_getattr)) - return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); -#endif - return PyObject_GetAttr(obj, attr_name); -} -#endif - -/* PyObjectGetAttrStrNoError */ -#if __PYX_LIMITED_VERSION_HEX < 0x030d00A1 -static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError))) - __Pyx_PyErr_Clear(); -} -#endif -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) { - PyObject *result; -#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 - (void) PyObject_GetOptionalAttr(obj, attr_name, &result); - return result; -#else -#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1 - PyTypeObject* tp = Py_TYPE(obj); - if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) { - return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1); - } -#endif - result = __Pyx_PyObject_GetAttrStr(obj, attr_name); - if (unlikely(!result)) { - __Pyx_PyObject_GetAttrStr_ClearAttributeError(); - } - return result; -#endif -} - -/* GetBuiltinName */ -static PyObject *__Pyx_GetBuiltinName(PyObject *name) { - PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name); - if (unlikely(!result) && !PyErr_Occurred()) { - PyErr_Format(PyExc_NameError, -#if PY_MAJOR_VERSION >= 3 - "name '%U' is not defined", name); -#else - "name '%.200s' is not defined", PyString_AS_STRING(name)); -#endif - } - return result; -} - -/* TupleAndListFromArray */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) { - PyObject *v; - Py_ssize_t i; - for (i = 0; i < length; i++) { - v = dest[i] = src[i]; - Py_INCREF(v); - } -} -static CYTHON_INLINE PyObject * -__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) -{ - PyObject *res; - if (n <= 0) { - Py_INCREF(__pyx_empty_tuple); - return __pyx_empty_tuple; - } - res = PyTuple_New(n); - if (unlikely(res == NULL)) return NULL; - __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n); - return res; -} -static CYTHON_INLINE PyObject * -__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n) -{ - PyObject *res; - if (n <= 0) { - return PyList_New(0); - } - res = PyList_New(n); - if (unlikely(res == NULL)) return NULL; - __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n); - return res; -} -#endif - -/* BytesEquals */ -static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { -#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API - return PyObject_RichCompareBool(s1, s2, equals); -#else - if (s1 == s2) { - return (equals == Py_EQ); - } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { - const char *ps1, *ps2; - Py_ssize_t length = PyBytes_GET_SIZE(s1); - if (length != PyBytes_GET_SIZE(s2)) - return (equals == Py_NE); - ps1 = PyBytes_AS_STRING(s1); - ps2 = PyBytes_AS_STRING(s2); - if (ps1[0] != ps2[0]) { - return (equals == Py_NE); - } else if (length == 1) { - return (equals == Py_EQ); - } else { - int result; -#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000) - Py_hash_t hash1, hash2; - hash1 = ((PyBytesObject*)s1)->ob_shash; - hash2 = ((PyBytesObject*)s2)->ob_shash; - if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { - return (equals == Py_NE); - } -#endif - result = memcmp(ps1, ps2, (size_t)length); - return (equals == Py_EQ) ? (result == 0) : (result != 0); - } - } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { - return (equals == Py_NE); - } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { - return (equals == Py_NE); - } else { - int result; - PyObject* py_result = PyObject_RichCompare(s1, s2, equals); - if (!py_result) - return -1; - result = __Pyx_PyObject_IsTrue(py_result); - Py_DECREF(py_result); - return result; - } -#endif -} - -/* UnicodeEquals */ -static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { -#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API - return PyObject_RichCompareBool(s1, s2, equals); -#else -#if PY_MAJOR_VERSION < 3 - PyObject* owned_ref = NULL; -#endif - int s1_is_unicode, s2_is_unicode; - if (s1 == s2) { - goto return_eq; - } - s1_is_unicode = PyUnicode_CheckExact(s1); - s2_is_unicode = PyUnicode_CheckExact(s2); -#if PY_MAJOR_VERSION < 3 - if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { - owned_ref = PyUnicode_FromObject(s2); - if (unlikely(!owned_ref)) - return -1; - s2 = owned_ref; - s2_is_unicode = 1; - } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { - owned_ref = PyUnicode_FromObject(s1); - if (unlikely(!owned_ref)) - return -1; - s1 = owned_ref; - s1_is_unicode = 1; - } else if (((!s2_is_unicode) & (!s1_is_unicode))) { - return __Pyx_PyBytes_Equals(s1, s2, equals); - } -#endif - if (s1_is_unicode & s2_is_unicode) { - Py_ssize_t length; - int kind; - void *data1, *data2; - if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0)) - return -1; - length = __Pyx_PyUnicode_GET_LENGTH(s1); - if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { - goto return_ne; - } -#if CYTHON_USE_UNICODE_INTERNALS - { - Py_hash_t hash1, hash2; - #if CYTHON_PEP393_ENABLED - hash1 = ((PyASCIIObject*)s1)->hash; - hash2 = ((PyASCIIObject*)s2)->hash; - #else - hash1 = ((PyUnicodeObject*)s1)->hash; - hash2 = ((PyUnicodeObject*)s2)->hash; - #endif - if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { - goto return_ne; - } - } -#endif - kind = __Pyx_PyUnicode_KIND(s1); - if (kind != __Pyx_PyUnicode_KIND(s2)) { - goto return_ne; - } - data1 = __Pyx_PyUnicode_DATA(s1); - data2 = __Pyx_PyUnicode_DATA(s2); - if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { - goto return_ne; - } else if (length == 1) { - goto return_eq; - } else { - int result = memcmp(data1, data2, (size_t)(length * kind)); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_EQ) ? (result == 0) : (result != 0); - } - } else if ((s1 == Py_None) & s2_is_unicode) { - goto return_ne; - } else if ((s2 == Py_None) & s1_is_unicode) { - goto return_ne; - } else { - int result; - PyObject* py_result = PyObject_RichCompare(s1, s2, equals); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - if (!py_result) - return -1; - result = __Pyx_PyObject_IsTrue(py_result); - Py_DECREF(py_result); - return result; - } -return_eq: - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_EQ); -return_ne: - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_NE); -#endif -} - -/* fastcall */ -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s) -{ - Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames); - for (i = 0; i < n; i++) - { - if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i]; - } - for (i = 0; i < n; i++) - { - int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ); - if (unlikely(eq != 0)) { - if (unlikely(eq < 0)) return NULL; - return kwvalues[i]; - } - } - return NULL; -} -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 -CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) { - Py_ssize_t i, nkwargs = PyTuple_GET_SIZE(kwnames); - PyObject *dict; - dict = PyDict_New(); - if (unlikely(!dict)) - return NULL; - for (i=0; i= 3 - "%s() got multiple values for keyword argument '%U'", func_name, kw_name); - #else - "%s() got multiple values for keyword argument '%s'", func_name, - PyString_AsString(kw_name)); - #endif -} - -/* ParseKeywords */ -static int __Pyx_ParseOptionalKeywords( - PyObject *kwds, - PyObject *const *kwvalues, - PyObject **argnames[], - PyObject *kwds2, - PyObject *values[], - Py_ssize_t num_pos_args, - const char* function_name) -{ - PyObject *key = 0, *value = 0; - Py_ssize_t pos = 0; - PyObject*** name; - PyObject*** first_kw_arg = argnames + num_pos_args; - int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds)); - while (1) { - Py_XDECREF(key); key = NULL; - Py_XDECREF(value); value = NULL; - if (kwds_is_tuple) { - Py_ssize_t size; -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(kwds); -#else - size = PyTuple_Size(kwds); - if (size < 0) goto bad; -#endif - if (pos >= size) break; -#if CYTHON_AVOID_BORROWED_REFS - key = __Pyx_PySequence_ITEM(kwds, pos); - if (!key) goto bad; -#elif CYTHON_ASSUME_SAFE_MACROS - key = PyTuple_GET_ITEM(kwds, pos); -#else - key = PyTuple_GetItem(kwds, pos); - if (!key) goto bad; -#endif - value = kwvalues[pos]; - pos++; - } - else - { - if (!PyDict_Next(kwds, &pos, &key, &value)) break; -#if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(key); -#endif - } - name = first_kw_arg; - while (*name && (**name != key)) name++; - if (*name) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(value); - Py_DECREF(key); -#endif - key = NULL; - value = NULL; - continue; - } -#if !CYTHON_AVOID_BORROWED_REFS - Py_INCREF(key); -#endif - Py_INCREF(value); - name = first_kw_arg; - #if PY_MAJOR_VERSION < 3 - if (likely(PyString_Check(key))) { - while (*name) { - if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) - && _PyString_Eq(**name, key)) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - value = NULL; -#endif - break; - } - name++; - } - if (*name) continue; - else { - PyObject*** argname = argnames; - while (argname != first_kw_arg) { - if ((**argname == key) || ( - (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) - && _PyString_Eq(**argname, key))) { - goto arg_passed_twice; - } - argname++; - } - } - } else - #endif - if (likely(PyUnicode_Check(key))) { - while (*name) { - int cmp = ( - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 - (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : - #endif - PyUnicode_Compare(**name, key) - ); - if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; - if (cmp == 0) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - value = NULL; -#endif - break; - } - name++; - } - if (*name) continue; - else { - PyObject*** argname = argnames; - while (argname != first_kw_arg) { - int cmp = (**argname == key) ? 0 : - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 - (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : - #endif - PyUnicode_Compare(**argname, key); - if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; - if (cmp == 0) goto arg_passed_twice; - argname++; - } - } - } else - goto invalid_keyword_type; - if (kwds2) { - if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; - } else { - goto invalid_keyword; - } - } - Py_XDECREF(key); - Py_XDECREF(value); - return 0; -arg_passed_twice: - __Pyx_RaiseDoubleKeywordsError(function_name, key); - goto bad; -invalid_keyword_type: - PyErr_Format(PyExc_TypeError, - "%.200s() keywords must be strings", function_name); - goto bad; -invalid_keyword: - #if PY_MAJOR_VERSION < 3 - PyErr_Format(PyExc_TypeError, - "%.200s() got an unexpected keyword argument '%.200s'", - function_name, PyString_AsString(key)); - #else - PyErr_Format(PyExc_TypeError, - "%s() got an unexpected keyword argument '%U'", - function_name, key); - #endif -bad: - Py_XDECREF(key); - Py_XDECREF(value); - return -1; -} - -/* FixUpExtensionType */ -#if CYTHON_USE_TYPE_SPECS -static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) { -#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - CYTHON_UNUSED_VAR(spec); - CYTHON_UNUSED_VAR(type); -#else - const PyType_Slot *slot = spec->slots; - while (slot && slot->slot && slot->slot != Py_tp_members) - slot++; - if (slot && slot->slot == Py_tp_members) { - int changed = 0; -#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON) - const -#endif - PyMemberDef *memb = (PyMemberDef*) slot->pfunc; - while (memb && memb->name) { - if (memb->name[0] == '_' && memb->name[1] == '_') { -#if PY_VERSION_HEX < 0x030900b1 - if (strcmp(memb->name, "__weaklistoffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); - type->tp_weaklistoffset = memb->offset; - changed = 1; - } - else if (strcmp(memb->name, "__dictoffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); - type->tp_dictoffset = memb->offset; - changed = 1; - } -#if CYTHON_METH_FASTCALL - else if (strcmp(memb->name, "__vectorcalloffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); -#if PY_VERSION_HEX >= 0x030800b4 - type->tp_vectorcall_offset = memb->offset; -#else - type->tp_print = (printfunc) memb->offset; -#endif - changed = 1; - } -#endif -#else - if ((0)); -#endif -#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON - else if (strcmp(memb->name, "__module__") == 0) { - PyObject *descr; - assert(memb->type == T_OBJECT); - assert(memb->flags == 0 || memb->flags == READONLY); - descr = PyDescr_NewMember(type, memb); - if (unlikely(!descr)) - return -1; - if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) { - Py_DECREF(descr); - return -1; - } - Py_DECREF(descr); - changed = 1; - } -#endif - } - memb++; - } - if (changed) - PyType_Modified(type); - } -#endif - return 0; -} -#endif - -/* FetchSharedCythonModule */ -static PyObject *__Pyx_FetchSharedCythonABIModule(void) { - return __Pyx_PyImport_AddModuleRef((char*) __PYX_ABI_MODULE_NAME); -} - -/* FetchCommonType */ -static int __Pyx_VerifyCachedType(PyObject *cached_type, - const char *name, - Py_ssize_t basicsize, - Py_ssize_t expected_basicsize) { - if (!PyType_Check(cached_type)) { - PyErr_Format(PyExc_TypeError, - "Shared Cython type %.200s is not a type object", name); - return -1; - } - if (basicsize != expected_basicsize) { - PyErr_Format(PyExc_TypeError, - "Shared Cython type %.200s has the wrong size, try recompiling", - name); - return -1; - } - return 0; -} -#if !CYTHON_USE_TYPE_SPECS -static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) { - PyObject* abi_module; - const char* object_name; - PyTypeObject *cached_type = NULL; - abi_module = __Pyx_FetchSharedCythonABIModule(); - if (!abi_module) return NULL; - object_name = strrchr(type->tp_name, '.'); - object_name = object_name ? object_name+1 : type->tp_name; - cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name); - if (cached_type) { - if (__Pyx_VerifyCachedType( - (PyObject *)cached_type, - object_name, - cached_type->tp_basicsize, - type->tp_basicsize) < 0) { - goto bad; - } - goto done; - } - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; - PyErr_Clear(); - if (PyType_Ready(type) < 0) goto bad; - if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0) - goto bad; - Py_INCREF(type); - cached_type = type; -done: - Py_DECREF(abi_module); - return cached_type; -bad: - Py_XDECREF(cached_type); - cached_type = NULL; - goto done; -} -#else -static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) { - PyObject *abi_module, *cached_type = NULL; - const char* object_name = strrchr(spec->name, '.'); - object_name = object_name ? object_name+1 : spec->name; - abi_module = __Pyx_FetchSharedCythonABIModule(); - if (!abi_module) return NULL; - cached_type = PyObject_GetAttrString(abi_module, object_name); - if (cached_type) { - Py_ssize_t basicsize; -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *py_basicsize; - py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__"); - if (unlikely(!py_basicsize)) goto bad; - basicsize = PyLong_AsSsize_t(py_basicsize); - Py_DECREF(py_basicsize); - py_basicsize = 0; - if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; -#else - basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1; -#endif - if (__Pyx_VerifyCachedType( - cached_type, - object_name, - basicsize, - spec->basicsize) < 0) { - goto bad; - } - goto done; - } - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; - PyErr_Clear(); - CYTHON_UNUSED_VAR(module); - cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases); - if (unlikely(!cached_type)) goto bad; - if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad; - if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad; -done: - Py_DECREF(abi_module); - assert(cached_type == NULL || PyType_Check(cached_type)); - return (PyTypeObject *) cached_type; -bad: - Py_XDECREF(cached_type); - cached_type = NULL; - goto done; -} -#endif - -/* PyVectorcallFastCallDict */ -#if CYTHON_METH_FASTCALL -static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) -{ - PyObject *res = NULL; - PyObject *kwnames; - PyObject **newargs; - PyObject **kwvalues; - Py_ssize_t i, pos; - size_t j; - PyObject *key, *value; - unsigned long keys_are_strings; - Py_ssize_t nkw = PyDict_GET_SIZE(kw); - newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0])); - if (unlikely(newargs == NULL)) { - PyErr_NoMemory(); - return NULL; - } - for (j = 0; j < nargs; j++) newargs[j] = args[j]; - kwnames = PyTuple_New(nkw); - if (unlikely(kwnames == NULL)) { - PyMem_Free(newargs); - return NULL; - } - kwvalues = newargs + nargs; - pos = i = 0; - keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; - while (PyDict_Next(kw, &pos, &key, &value)) { - keys_are_strings &= Py_TYPE(key)->tp_flags; - Py_INCREF(key); - Py_INCREF(value); - PyTuple_SET_ITEM(kwnames, i, key); - kwvalues[i] = value; - i++; - } - if (unlikely(!keys_are_strings)) { - PyErr_SetString(PyExc_TypeError, "keywords must be strings"); - goto cleanup; - } - res = vc(func, newargs, nargs, kwnames); -cleanup: - Py_DECREF(kwnames); - for (i = 0; i < nkw; i++) - Py_DECREF(kwvalues[i]); - PyMem_Free(newargs); - return res; -} -static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) -{ - if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) { - return vc(func, args, nargs, NULL); - } - return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw); -} -#endif - -/* CythonFunctionShared */ -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { - if (__Pyx_CyFunction_Check(func)) { - return PyCFunction_GetFunction(((__pyx_CyFunctionObject*)func)->func) == (PyCFunction) cfunc; - } else if (PyCFunction_Check(func)) { - return PyCFunction_GetFunction(func) == (PyCFunction) cfunc; - } - return 0; -} -#else -static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { - return __Pyx_CyOrPyCFunction_Check(func) && __Pyx_CyOrPyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; -} -#endif -static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) { -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - __Pyx_Py_XDECREF_SET( - __Pyx_CyFunction_GetClassObj(f), - ((classobj) ? __Pyx_NewRef(classobj) : NULL)); -#else - __Pyx_Py_XDECREF_SET( - ((PyCMethodObject *) (f))->mm_class, - (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL)); -#endif -} -static PyObject * -__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure) -{ - CYTHON_UNUSED_VAR(closure); - if (unlikely(op->func_doc == NULL)) { -#if CYTHON_COMPILING_IN_LIMITED_API - op->func_doc = PyObject_GetAttrString(op->func, "__doc__"); - if (unlikely(!op->func_doc)) return NULL; -#else - if (((PyCFunctionObject*)op)->m_ml->ml_doc) { -#if PY_MAJOR_VERSION >= 3 - op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); -#else - op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); -#endif - if (unlikely(op->func_doc == NULL)) - return NULL; - } else { - Py_INCREF(Py_None); - return Py_None; - } -#endif - } - Py_INCREF(op->func_doc); - return op->func_doc; -} -static int -__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (value == NULL) { - value = Py_None; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_doc, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(op->func_name == NULL)) { -#if CYTHON_COMPILING_IN_LIMITED_API - op->func_name = PyObject_GetAttrString(op->func, "__name__"); -#elif PY_MAJOR_VERSION >= 3 - op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); -#else - op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); -#endif - if (unlikely(op->func_name == NULL)) - return NULL; - } - Py_INCREF(op->func_name); - return op->func_name; -} -static int -__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__name__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_name, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - Py_INCREF(op->func_qualname); - return op->func_qualname; -} -static int -__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__qualname__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_qualname, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(op->func_dict == NULL)) { - op->func_dict = PyDict_New(); - if (unlikely(op->func_dict == NULL)) - return NULL; - } - Py_INCREF(op->func_dict); - return op->func_dict; -} -static int -__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(value == NULL)) { - PyErr_SetString(PyExc_TypeError, - "function's dictionary may not be deleted"); - return -1; - } - if (unlikely(!PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "setting function's dictionary to a non-dict"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_dict, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - Py_INCREF(op->func_globals); - return op->func_globals; -} -static PyObject * -__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(op); - CYTHON_UNUSED_VAR(context); - Py_INCREF(Py_None); - return Py_None; -} -static PyObject * -__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context) -{ - PyObject* result = (op->func_code) ? op->func_code : Py_None; - CYTHON_UNUSED_VAR(context); - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) { - int result = 0; - PyObject *res = op->defaults_getter((PyObject *) op); - if (unlikely(!res)) - return -1; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - op->defaults_tuple = PyTuple_GET_ITEM(res, 0); - Py_INCREF(op->defaults_tuple); - op->defaults_kwdict = PyTuple_GET_ITEM(res, 1); - Py_INCREF(op->defaults_kwdict); - #else - op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0); - if (unlikely(!op->defaults_tuple)) result = -1; - else { - op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1); - if (unlikely(!op->defaults_kwdict)) result = -1; - } - #endif - Py_DECREF(res); - return result; -} -static int -__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value) { - value = Py_None; - } else if (unlikely(value != Py_None && !PyTuple_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__defaults__ must be set to a tuple object"); - return -1; - } - PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not " - "currently affect the values used in function calls", 1); - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->defaults_tuple, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->defaults_tuple; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - if (op->defaults_getter) { - if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; - result = op->defaults_tuple; - } else { - result = Py_None; - } - } - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value) { - value = Py_None; - } else if (unlikely(value != Py_None && !PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__kwdefaults__ must be set to a dict object"); - return -1; - } - PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not " - "currently affect the values used in function calls", 1); - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->defaults_kwdict; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - if (op->defaults_getter) { - if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; - result = op->defaults_kwdict; - } else { - result = Py_None; - } - } - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value || value == Py_None) { - value = NULL; - } else if (unlikely(!PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__annotations__ must be set to a dict object"); - return -1; - } - Py_XINCREF(value); - __Pyx_Py_XDECREF_SET(op->func_annotations, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->func_annotations; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - result = PyDict_New(); - if (unlikely(!result)) return NULL; - op->func_annotations = result; - } - Py_INCREF(result); - return result; -} -static PyObject * -__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) { - int is_coroutine; - CYTHON_UNUSED_VAR(context); - if (op->func_is_coroutine) { - return __Pyx_NewRef(op->func_is_coroutine); - } - is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE; -#if PY_VERSION_HEX >= 0x03050000 - if (is_coroutine) { - PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine; - fromlist = PyList_New(1); - if (unlikely(!fromlist)) return NULL; - Py_INCREF(marker); -#if CYTHON_ASSUME_SAFE_MACROS - PyList_SET_ITEM(fromlist, 0, marker); -#else - if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) { - Py_DECREF(marker); - Py_DECREF(fromlist); - return NULL; - } -#endif - module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0); - Py_DECREF(fromlist); - if (unlikely(!module)) goto ignore; - op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker); - Py_DECREF(module); - if (likely(op->func_is_coroutine)) { - return __Pyx_NewRef(op->func_is_coroutine); - } -ignore: - PyErr_Clear(); - } -#endif - op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine); - return __Pyx_NewRef(op->func_is_coroutine); -} -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject * -__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) { - CYTHON_UNUSED_VAR(context); - return PyObject_GetAttrString(op->func, "__module__"); -} -static int -__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - return PyObject_SetAttrString(op->func, "__module__", value); -} -#endif -static PyGetSetDef __pyx_CyFunction_getsets[] = { - {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, - {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, - {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, - {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, - {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0}, - {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, - {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, - {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, - {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, - {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, - {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, - {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, - {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, - {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, - {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, - {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0}, - {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, - {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0}, -#if CYTHON_COMPILING_IN_LIMITED_API - {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0}, -#endif - {0, 0, 0, 0, 0} -}; -static PyMemberDef __pyx_CyFunction_members[] = { -#if !CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0}, -#endif -#if CYTHON_USE_TYPE_SPECS - {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0}, -#if CYTHON_METH_FASTCALL -#if CYTHON_BACKPORT_VECTORCALL - {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0}, -#else -#if !CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0}, -#endif -#endif -#endif -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0}, -#else - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0}, -#endif -#endif - {0, 0, 0, 0, 0} -}; -static PyObject * -__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args) -{ - CYTHON_UNUSED_VAR(args); -#if PY_MAJOR_VERSION >= 3 - Py_INCREF(m->func_qualname); - return m->func_qualname; -#else - return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name); -#endif -} -static PyMethodDef __pyx_CyFunction_methods[] = { - {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0}, - {0, 0, 0, 0} -}; -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API -#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist) -#else -#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist) -#endif -static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname, - PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { -#if !CYTHON_COMPILING_IN_LIMITED_API - PyCFunctionObject *cf = (PyCFunctionObject*) op; -#endif - if (unlikely(op == NULL)) - return NULL; -#if CYTHON_COMPILING_IN_LIMITED_API - op->func = PyCFunction_NewEx(ml, (PyObject*)op, module); - if (unlikely(!op->func)) return NULL; -#endif - op->flags = flags; - __Pyx_CyFunction_weakreflist(op) = NULL; -#if !CYTHON_COMPILING_IN_LIMITED_API - cf->m_ml = ml; - cf->m_self = (PyObject *) op; -#endif - Py_XINCREF(closure); - op->func_closure = closure; -#if !CYTHON_COMPILING_IN_LIMITED_API - Py_XINCREF(module); - cf->m_module = module; -#endif - op->func_dict = NULL; - op->func_name = NULL; - Py_INCREF(qualname); - op->func_qualname = qualname; - op->func_doc = NULL; -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - op->func_classobj = NULL; -#else - ((PyCMethodObject*)op)->mm_class = NULL; -#endif - op->func_globals = globals; - Py_INCREF(op->func_globals); - Py_XINCREF(code); - op->func_code = code; - op->defaults_pyobjects = 0; - op->defaults_size = 0; - op->defaults = NULL; - op->defaults_tuple = NULL; - op->defaults_kwdict = NULL; - op->defaults_getter = NULL; - op->func_annotations = NULL; - op->func_is_coroutine = NULL; -#if CYTHON_METH_FASTCALL - switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { - case METH_NOARGS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS; - break; - case METH_O: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O; - break; - case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD; - break; - case METH_FASTCALL | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS; - break; - case METH_VARARGS | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = NULL; - break; - default: - PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); - Py_DECREF(op); - return NULL; - } -#endif - return (PyObject *) op; -} -static int -__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m) -{ - Py_CLEAR(m->func_closure); -#if CYTHON_COMPILING_IN_LIMITED_API - Py_CLEAR(m->func); -#else - Py_CLEAR(((PyCFunctionObject*)m)->m_module); -#endif - Py_CLEAR(m->func_dict); - Py_CLEAR(m->func_name); - Py_CLEAR(m->func_qualname); - Py_CLEAR(m->func_doc); - Py_CLEAR(m->func_globals); - Py_CLEAR(m->func_code); -#if !CYTHON_COMPILING_IN_LIMITED_API -#if PY_VERSION_HEX < 0x030900B1 - Py_CLEAR(__Pyx_CyFunction_GetClassObj(m)); -#else - { - PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class; - ((PyCMethodObject *) (m))->mm_class = NULL; - Py_XDECREF(cls); - } -#endif -#endif - Py_CLEAR(m->defaults_tuple); - Py_CLEAR(m->defaults_kwdict); - Py_CLEAR(m->func_annotations); - Py_CLEAR(m->func_is_coroutine); - if (m->defaults) { - PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); - int i; - for (i = 0; i < m->defaults_pyobjects; i++) - Py_XDECREF(pydefaults[i]); - PyObject_Free(m->defaults); - m->defaults = NULL; - } - return 0; -} -static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m) -{ - if (__Pyx_CyFunction_weakreflist(m) != NULL) - PyObject_ClearWeakRefs((PyObject *) m); - __Pyx_CyFunction_clear(m); - __Pyx_PyHeapTypeObject_GC_Del(m); -} -static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m) -{ - PyObject_GC_UnTrack(m); - __Pyx__CyFunction_dealloc(m); -} -static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg) -{ - Py_VISIT(m->func_closure); -#if CYTHON_COMPILING_IN_LIMITED_API - Py_VISIT(m->func); -#else - Py_VISIT(((PyCFunctionObject*)m)->m_module); -#endif - Py_VISIT(m->func_dict); - Py_VISIT(m->func_name); - Py_VISIT(m->func_qualname); - Py_VISIT(m->func_doc); - Py_VISIT(m->func_globals); - Py_VISIT(m->func_code); -#if !CYTHON_COMPILING_IN_LIMITED_API - Py_VISIT(__Pyx_CyFunction_GetClassObj(m)); -#endif - Py_VISIT(m->defaults_tuple); - Py_VISIT(m->defaults_kwdict); - Py_VISIT(m->func_is_coroutine); - if (m->defaults) { - PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); - int i; - for (i = 0; i < m->defaults_pyobjects; i++) - Py_VISIT(pydefaults[i]); - } - return 0; -} -static PyObject* -__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) -{ -#if PY_MAJOR_VERSION >= 3 - return PyUnicode_FromFormat("", - op->func_qualname, (void *)op); -#else - return PyString_FromFormat("", - PyString_AsString(op->func_qualname), (void *)op); -#endif -} -static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) { -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *f = ((__pyx_CyFunctionObject*)func)->func; - PyObject *py_name = NULL; - PyCFunction meth; - int flags; - meth = PyCFunction_GetFunction(f); - if (unlikely(!meth)) return NULL; - flags = PyCFunction_GetFlags(f); - if (unlikely(flags < 0)) return NULL; -#else - PyCFunctionObject* f = (PyCFunctionObject*)func; - PyCFunction meth = f->m_ml->ml_meth; - int flags = f->m_ml->ml_flags; -#endif - Py_ssize_t size; - switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) { - case METH_VARARGS: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) - return (*meth)(self, arg); - break; - case METH_VARARGS | METH_KEYWORDS: - return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw); - case METH_NOARGS: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) { -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(arg); -#else - size = PyTuple_Size(arg); - if (unlikely(size < 0)) return NULL; -#endif - if (likely(size == 0)) - return (*meth)(self, NULL); -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, - "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - py_name, size); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - f->m_ml->ml_name, size); -#endif - return NULL; - } - break; - case METH_O: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) { -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(arg); -#else - size = PyTuple_Size(arg); - if (unlikely(size < 0)) return NULL; -#endif - if (likely(size == 1)) { - PyObject *result, *arg0; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - arg0 = PyTuple_GET_ITEM(arg, 0); - #else - arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL; - #endif - result = (*meth)(self, arg0); - #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) - Py_DECREF(arg0); - #endif - return result; - } -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, - "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - py_name, size); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - f->m_ml->ml_name, size); -#endif - return NULL; - } - break; - default: - PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); - return NULL; - } -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments", - py_name); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", - f->m_ml->ml_name); -#endif - return NULL; -} -static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) { - PyObject *self, *result; -#if CYTHON_COMPILING_IN_LIMITED_API - self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func); - if (unlikely(!self) && PyErr_Occurred()) return NULL; -#else - self = ((PyCFunctionObject*)func)->m_self; -#endif - result = __Pyx_CyFunction_CallMethod(func, self, arg, kw); - return result; -} -static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) { - PyObject *result; - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; -#if CYTHON_METH_FASTCALL - __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc); - if (vc) { -#if CYTHON_ASSUME_SAFE_MACROS - return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw); -#else - (void) &__Pyx_PyVectorcall_FastCallDict; - return PyVectorcall_Call(func, args, kw); -#endif - } -#endif - if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { - Py_ssize_t argc; - PyObject *new_args; - PyObject *self; -#if CYTHON_ASSUME_SAFE_MACROS - argc = PyTuple_GET_SIZE(args); -#else - argc = PyTuple_Size(args); - if (unlikely(!argc) < 0) return NULL; -#endif - new_args = PyTuple_GetSlice(args, 1, argc); - if (unlikely(!new_args)) - return NULL; - self = PyTuple_GetItem(args, 0); - if (unlikely(!self)) { - Py_DECREF(new_args); -#if PY_MAJOR_VERSION > 2 - PyErr_Format(PyExc_TypeError, - "unbound method %.200S() needs an argument", - cyfunc->func_qualname); -#else - PyErr_SetString(PyExc_TypeError, - "unbound method needs an argument"); -#endif - return NULL; - } - result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw); - Py_DECREF(new_args); - } else { - result = __Pyx_CyFunction_Call(func, args, kw); - } - return result; -} -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames) -{ - int ret = 0; - if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { - if (unlikely(nargs < 1)) { - PyErr_Format(PyExc_TypeError, "%.200s() needs an argument", - ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); - return -1; - } - ret = 1; - } - if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); - return -1; - } - return ret; -} -static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - if (unlikely(nargs != 0)) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - def->ml_name, nargs); - return NULL; - } - return def->ml_meth(self, NULL); -} -static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - if (unlikely(nargs != 1)) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - def->ml_name, nargs); - return NULL; - } - return def->ml_meth(self, args[0]); -} -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - return ((__Pyx_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames); -} -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; - PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc); -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames); -} -#endif -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_CyFunctionType_slots[] = { - {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc}, - {Py_tp_repr, (void *)__Pyx_CyFunction_repr}, - {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod}, - {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse}, - {Py_tp_clear, (void *)__Pyx_CyFunction_clear}, - {Py_tp_methods, (void *)__pyx_CyFunction_methods}, - {Py_tp_members, (void *)__pyx_CyFunction_members}, - {Py_tp_getset, (void *)__pyx_CyFunction_getsets}, - {Py_tp_descr_get, (void *)__Pyx_PyMethod_New}, - {0, 0}, -}; -static PyType_Spec __pyx_CyFunctionType_spec = { - __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", - sizeof(__pyx_CyFunctionObject), - 0, -#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR - Py_TPFLAGS_METHOD_DESCRIPTOR | -#endif -#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL) - _Py_TPFLAGS_HAVE_VECTORCALL | -#endif - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, - __pyx_CyFunctionType_slots -}; -#else -static PyTypeObject __pyx_CyFunctionType_type = { - PyVarObject_HEAD_INIT(0, 0) - __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", - sizeof(__pyx_CyFunctionObject), - 0, - (destructor) __Pyx_CyFunction_dealloc, -#if !CYTHON_METH_FASTCALL - 0, -#elif CYTHON_BACKPORT_VECTORCALL - (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall), -#else - offsetof(PyCFunctionObject, vectorcall), -#endif - 0, - 0, -#if PY_MAJOR_VERSION < 3 - 0, -#else - 0, -#endif - (reprfunc) __Pyx_CyFunction_repr, - 0, - 0, - 0, - 0, - __Pyx_CyFunction_CallAsMethod, - 0, - 0, - 0, - 0, -#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR - Py_TPFLAGS_METHOD_DESCRIPTOR | -#endif -#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL - _Py_TPFLAGS_HAVE_VECTORCALL | -#endif - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, - 0, - (traverseproc) __Pyx_CyFunction_traverse, - (inquiry) __Pyx_CyFunction_clear, - 0, -#if PY_VERSION_HEX < 0x030500A0 - offsetof(__pyx_CyFunctionObject, func_weakreflist), -#else - offsetof(PyCFunctionObject, m_weakreflist), -#endif - 0, - 0, - __pyx_CyFunction_methods, - __pyx_CyFunction_members, - __pyx_CyFunction_getsets, - 0, - 0, - __Pyx_PyMethod_New, - 0, - offsetof(__pyx_CyFunctionObject, func_dict), - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -#if PY_VERSION_HEX >= 0x030400a1 - 0, -#endif -#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, -#endif -#if __PYX_NEED_TP_PRINT_SLOT - 0, -#endif -#if PY_VERSION_HEX >= 0x030C0000 - 0, -#endif -#if PY_VERSION_HEX >= 0x030d00A4 - 0, -#endif -#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, -#endif -}; -#endif -static int __pyx_CyFunction_init(PyObject *module) { -#if CYTHON_USE_TYPE_SPECS - __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL); -#else - CYTHON_UNUSED_VAR(module); - __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type); -#endif - if (unlikely(__pyx_CyFunctionType == NULL)) { - return -1; - } - return 0; -} -static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults = PyObject_Malloc(size); - if (unlikely(!m->defaults)) - return PyErr_NoMemory(); - memset(m->defaults, 0, size); - m->defaults_pyobjects = pyobjects; - m->defaults_size = size; - return m->defaults; -} -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults_tuple = tuple; - Py_INCREF(tuple); -} -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults_kwdict = dict; - Py_INCREF(dict); -} -static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->func_annotations = dict; - Py_INCREF(dict); -} - -/* CythonFunction */ -static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname, - PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { - PyObject *op = __Pyx_CyFunction_Init( - PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType), - ml, flags, qualname, closure, module, globals, code - ); - if (likely(op)) { - PyObject_GC_Track(op); - } - return op; -} - -/* GetTopmostException */ -#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE -static _PyErr_StackItem * -__Pyx_PyErr_GetTopmostException(PyThreadState *tstate) -{ - _PyErr_StackItem *exc_info = tstate->exc_info; - while ((exc_info->exc_value == NULL || exc_info->exc_value == Py_None) && - exc_info->previous_item != NULL) - { - exc_info = exc_info->previous_item; - } - return exc_info; -} -#endif - -/* SaveResetException */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); - PyObject *exc_value = exc_info->exc_value; - if (exc_value == NULL || exc_value == Py_None) { - *value = NULL; - *type = NULL; - *tb = NULL; - } else { - *value = exc_value; - Py_INCREF(*value); - *type = (PyObject*) Py_TYPE(exc_value); - Py_INCREF(*type); - *tb = PyException_GetTraceback(exc_value); - } - #elif CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); - *type = exc_info->exc_type; - *value = exc_info->exc_value; - *tb = exc_info->exc_traceback; - Py_XINCREF(*type); - Py_XINCREF(*value); - Py_XINCREF(*tb); - #else - *type = tstate->exc_type; - *value = tstate->exc_value; - *tb = tstate->exc_traceback; - Py_XINCREF(*type); - Py_XINCREF(*value); - Py_XINCREF(*tb); - #endif -} -static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = tstate->exc_info; - PyObject *tmp_value = exc_info->exc_value; - exc_info->exc_value = value; - Py_XDECREF(tmp_value); - Py_XDECREF(type); - Py_XDECREF(tb); - #else - PyObject *tmp_type, *tmp_value, *tmp_tb; - #if CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = tstate->exc_info; - tmp_type = exc_info->exc_type; - tmp_value = exc_info->exc_value; - tmp_tb = exc_info->exc_traceback; - exc_info->exc_type = type; - exc_info->exc_value = value; - exc_info->exc_traceback = tb; - #else - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = type; - tstate->exc_value = value; - tstate->exc_traceback = tb; - #endif - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); - #endif -} -#endif - -/* FastTypeChecks */ -#if CYTHON_COMPILING_IN_CPYTHON -static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { - while (a) { - a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*); - if (a == b) - return 1; - } - return b == &PyBaseObject_Type; -} -static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { - PyObject *mro; - if (a == b) return 1; - mro = a->tp_mro; - if (likely(mro)) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(mro); - for (i = 0; i < n; i++) { - if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) - return 1; - } - return 0; - } - return __Pyx_InBases(a, b); -} -static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) { - PyObject *mro; - if (cls == a || cls == b) return 1; - mro = cls->tp_mro; - if (likely(mro)) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(mro); - for (i = 0; i < n; i++) { - PyObject *base = PyTuple_GET_ITEM(mro, i); - if (base == (PyObject *)a || base == (PyObject *)b) - return 1; - } - return 0; - } - return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b); -} -#if PY_MAJOR_VERSION == 2 -static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { - PyObject *exception, *value, *tb; - int res; - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&exception, &value, &tb); - res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; - if (unlikely(res == -1)) { - PyErr_WriteUnraisable(err); - res = 0; - } - if (!res) { - res = PyObject_IsSubclass(err, exc_type2); - if (unlikely(res == -1)) { - PyErr_WriteUnraisable(err); - res = 0; - } - } - __Pyx_ErrRestore(exception, value, tb); - return res; -} -#else -static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { - if (exc_type1) { - return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2); - } else { - return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); - } -} -#endif -static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { - Py_ssize_t i, n; - assert(PyExceptionClass_Check(exc_type)); - n = PyTuple_GET_SIZE(tuple); -#if PY_MAJOR_VERSION >= 3 - for (i=0; i= 0x030C00A6 - PyException_SetTraceback(value, tb); - #elif CYTHON_FAST_THREAD_STATE - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject* tmp_tb = tstate->curexc_traceback; - if (tb != tmp_tb) { - Py_INCREF(tb); - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_tb); - } -#else - PyObject *tmp_type, *tmp_value, *tmp_tb; - PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); - Py_INCREF(tb); - PyErr_Restore(tmp_type, tmp_value, tb); - Py_XDECREF(tmp_tb); -#endif - } -bad: - Py_XDECREF(owned_instance); - return; -} -#endif - -/* PyObjectCall */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { - PyObject *result; - ternaryfunc call = Py_TYPE(func)->tp_call; - if (unlikely(!call)) - return PyObject_Call(func, arg, kw); - #if PY_MAJOR_VERSION < 3 - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - #else - if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) - return NULL; - #endif - result = (*call)(func, arg, kw); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -/* UnpackUnboundCMethod */ -static PyObject *__Pyx_SelflessCall(PyObject *method, PyObject *args, PyObject *kwargs) { - PyObject *result; - PyObject *selfless_args = PyTuple_GetSlice(args, 1, PyTuple_Size(args)); - if (unlikely(!selfless_args)) return NULL; - result = PyObject_Call(method, selfless_args, kwargs); - Py_DECREF(selfless_args); - return result; -} -static PyMethodDef __Pyx_UnboundCMethod_Def = { - "CythonUnboundCMethod", - __PYX_REINTERPRET_FUNCION(PyCFunction, __Pyx_SelflessCall), - METH_VARARGS | METH_KEYWORDS, - NULL -}; -static int __Pyx_TryUnpackUnboundCMethod(__Pyx_CachedCFunction* target) { - PyObject *method; - method = __Pyx_PyObject_GetAttrStr(target->type, *target->method_name); - if (unlikely(!method)) - return -1; - target->method = method; -#if CYTHON_COMPILING_IN_CPYTHON - #if PY_MAJOR_VERSION >= 3 - if (likely(__Pyx_TypeCheck(method, &PyMethodDescr_Type))) - #else - if (likely(!__Pyx_CyOrPyCFunction_Check(method))) - #endif - { - PyMethodDescrObject *descr = (PyMethodDescrObject*) method; - target->func = descr->d_method->ml_meth; - target->flag = descr->d_method->ml_flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_STACKLESS); - } else -#endif -#if CYTHON_COMPILING_IN_PYPY -#else - if (PyCFunction_Check(method)) -#endif - { - PyObject *self; - int self_found; -#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY - self = PyObject_GetAttrString(method, "__self__"); - if (!self) { - PyErr_Clear(); - } -#else - self = PyCFunction_GET_SELF(method); -#endif - self_found = (self && self != Py_None); -#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY - Py_XDECREF(self); -#endif - if (self_found) { - PyObject *unbound_method = PyCFunction_New(&__Pyx_UnboundCMethod_Def, method); - if (unlikely(!unbound_method)) return -1; - Py_DECREF(method); - target->method = unbound_method; - } - } - return 0; -} - -/* CallUnboundCMethod1 */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg) { - if (likely(cfunc->func)) { - int flag = cfunc->flag; - if (flag == METH_O) { - return (*(cfunc->func))(self, arg); - } else if ((PY_VERSION_HEX >= 0x030600B1) && flag == METH_FASTCALL) { - #if PY_VERSION_HEX >= 0x030700A0 - return (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)cfunc->func)(self, &arg, 1); - #else - return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL); - #endif - } else if ((PY_VERSION_HEX >= 0x030700A0) && flag == (METH_FASTCALL | METH_KEYWORDS)) { - return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL); - } - } - return __Pyx__CallUnboundCMethod1(cfunc, self, arg); -} -#endif -static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg){ - PyObject *args, *result = NULL; - if (unlikely(!cfunc->func && !cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL; -#if CYTHON_COMPILING_IN_CPYTHON - if (cfunc->func && (cfunc->flag & METH_VARARGS)) { - args = PyTuple_New(1); - if (unlikely(!args)) goto bad; - Py_INCREF(arg); - PyTuple_SET_ITEM(args, 0, arg); - if (cfunc->flag & METH_KEYWORDS) - result = (*(PyCFunctionWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, NULL); - else - result = (*cfunc->func)(self, args); - } else { - args = PyTuple_New(2); - if (unlikely(!args)) goto bad; - Py_INCREF(self); - PyTuple_SET_ITEM(args, 0, self); - Py_INCREF(arg); - PyTuple_SET_ITEM(args, 1, arg); - result = __Pyx_PyObject_Call(cfunc->method, args, NULL); - } -#else - args = PyTuple_Pack(2, self, arg); - if (unlikely(!args)) goto bad; - result = __Pyx_PyObject_Call(cfunc->method, args, NULL); -#endif -bad: - Py_XDECREF(args); - return result; -} - -/* RaiseUnexpectedTypeError */ -static int -__Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj) -{ - __Pyx_TypeName obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, "Expected %s, got " __Pyx_FMT_TYPENAME, - expected, obj_type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return 0; -} - -/* decode_c_bytes */ -static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes( - const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop, - const char* encoding, const char* errors, - PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { - if (unlikely((start < 0) | (stop < 0))) { - if (start < 0) { - start += length; - if (start < 0) - start = 0; - } - if (stop < 0) - stop += length; - } - if (stop > length) - stop = length; - if (unlikely(stop <= start)) - return __Pyx_NewRef(__pyx_empty_unicode); - length = stop - start; - cstring += start; - if (decode_func) { - return decode_func(cstring, length, errors); - } else { - return PyUnicode_Decode(cstring, length, encoding, errors); - } -} - -/* ArgTypeTest */ -static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) -{ - __Pyx_TypeName type_name; - __Pyx_TypeName obj_type_name; - if (unlikely(!type)) { - PyErr_SetString(PyExc_SystemError, "Missing type object"); - return 0; - } - else if (exact) { - #if PY_MAJOR_VERSION == 2 - if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; - #endif - } - else { - if (likely(__Pyx_TypeCheck(obj, type))) return 1; - } - type_name = __Pyx_PyType_GetName(type); - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME - ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name); - __Pyx_DECREF_TypeName(type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return 0; -} - -/* PyFunctionFastCall */ -#if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL -static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, - PyObject *globals) { - PyFrameObject *f; - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject **fastlocals; - Py_ssize_t i; - PyObject *result; - assert(globals != NULL); - /* XXX Perhaps we should create a specialized - PyFrame_New() that doesn't take locals, but does - take builtins without sanity checking them. - */ - assert(tstate != NULL); - f = PyFrame_New(tstate, co, globals, NULL); - if (f == NULL) { - return NULL; - } - fastlocals = __Pyx_PyFrame_GetLocalsplus(f); - for (i = 0; i < na; i++) { - Py_INCREF(*args); - fastlocals[i] = *args++; - } - result = PyEval_EvalFrameEx(f,0); - ++tstate->recursion_depth; - Py_DECREF(f); - --tstate->recursion_depth; - return result; -} -static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) { - PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); - PyObject *globals = PyFunction_GET_GLOBALS(func); - PyObject *argdefs = PyFunction_GET_DEFAULTS(func); - PyObject *closure; -#if PY_MAJOR_VERSION >= 3 - PyObject *kwdefs; -#endif - PyObject *kwtuple, **k; - PyObject **d; - Py_ssize_t nd; - Py_ssize_t nk; - PyObject *result; - assert(kwargs == NULL || PyDict_Check(kwargs)); - nk = kwargs ? PyDict_Size(kwargs) : 0; - #if PY_MAJOR_VERSION < 3 - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) { - return NULL; - } - #else - if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) { - return NULL; - } - #endif - if ( -#if PY_MAJOR_VERSION >= 3 - co->co_kwonlyargcount == 0 && -#endif - likely(kwargs == NULL || nk == 0) && - co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { - if (argdefs == NULL && co->co_argcount == nargs) { - result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); - goto done; - } - else if (nargs == 0 && argdefs != NULL - && co->co_argcount == Py_SIZE(argdefs)) { - /* function called with no arguments, but all parameters have - a default value: use default values as arguments .*/ - args = &PyTuple_GET_ITEM(argdefs, 0); - result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); - goto done; - } - } - if (kwargs != NULL) { - Py_ssize_t pos, i; - kwtuple = PyTuple_New(2 * nk); - if (kwtuple == NULL) { - result = NULL; - goto done; - } - k = &PyTuple_GET_ITEM(kwtuple, 0); - pos = i = 0; - while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { - Py_INCREF(k[i]); - Py_INCREF(k[i+1]); - i += 2; - } - nk = i / 2; - } - else { - kwtuple = NULL; - k = NULL; - } - closure = PyFunction_GET_CLOSURE(func); -#if PY_MAJOR_VERSION >= 3 - kwdefs = PyFunction_GET_KW_DEFAULTS(func); -#endif - if (argdefs != NULL) { - d = &PyTuple_GET_ITEM(argdefs, 0); - nd = Py_SIZE(argdefs); - } - else { - d = NULL; - nd = 0; - } -#if PY_MAJOR_VERSION >= 3 - result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, - args, (int)nargs, - k, (int)nk, - d, (int)nd, kwdefs, closure); -#else - result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, - args, (int)nargs, - k, (int)nk, - d, (int)nd, closure); -#endif - Py_XDECREF(kwtuple); -done: - Py_LeaveRecursiveCall(); - return result; -} -#endif - -/* PyObjectCallMethO */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { - PyObject *self, *result; - PyCFunction cfunc; - cfunc = __Pyx_CyOrPyCFunction_GET_FUNCTION(func); - self = __Pyx_CyOrPyCFunction_GET_SELF(func); - #if PY_MAJOR_VERSION < 3 - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - #else - if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) - return NULL; - #endif - result = cfunc(self, arg); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -/* PyObjectFastCall */ -#if PY_VERSION_HEX < 0x03090000 || CYTHON_COMPILING_IN_LIMITED_API -static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) { - PyObject *argstuple; - PyObject *result = 0; - size_t i; - argstuple = PyTuple_New((Py_ssize_t)nargs); - if (unlikely(!argstuple)) return NULL; - for (i = 0; i < nargs; i++) { - Py_INCREF(args[i]); - if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad; - } - result = __Pyx_PyObject_Call(func, argstuple, kwargs); - bad: - Py_DECREF(argstuple); - return result; -} -#endif -static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) { - Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs); -#if CYTHON_COMPILING_IN_CPYTHON - if (nargs == 0 && kwargs == NULL) { - if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_NOARGS)) - return __Pyx_PyObject_CallMethO(func, NULL); - } - else if (nargs == 1 && kwargs == NULL) { - if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_O)) - return __Pyx_PyObject_CallMethO(func, args[0]); - } -#endif - #if PY_VERSION_HEX < 0x030800B1 - #if CYTHON_FAST_PYCCALL - if (PyCFunction_Check(func)) { - if (kwargs) { - return _PyCFunction_FastCallDict(func, args, nargs, kwargs); - } else { - return _PyCFunction_FastCallKeywords(func, args, nargs, NULL); - } - } - #if PY_VERSION_HEX >= 0x030700A1 - if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) { - return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL); - } - #endif - #endif - #if CYTHON_FAST_PYCALL - if (PyFunction_Check(func)) { - return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs); - } - #endif - #endif - if (kwargs == NULL) { - #if CYTHON_VECTORCALL - #if PY_VERSION_HEX < 0x03090000 - vectorcallfunc f = _PyVectorcall_Function(func); - #else - vectorcallfunc f = PyVectorcall_Function(func); - #endif - if (f) { - return f(func, args, (size_t)nargs, NULL); - } - #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL - if (__Pyx_CyFunction_CheckExact(func)) { - __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func); - if (f) return f(func, args, (size_t)nargs, NULL); - } - #endif - } - if (nargs == 0) { - return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs); - } - #if PY_VERSION_HEX >= 0x03090000 && !CYTHON_COMPILING_IN_LIMITED_API - return PyObject_VectorcallDict(func, args, (size_t)nargs, kwargs); - #else - return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs); - #endif -} - -/* PyObjectCallOneArg */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject *args[2] = {NULL, arg}; - return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); -} - -/* SliceObject */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(PyObject* obj, - Py_ssize_t cstart, Py_ssize_t cstop, - PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, - int has_cstart, int has_cstop, int wraparound) { - __Pyx_TypeName obj_type_name; -#if CYTHON_USE_TYPE_SLOTS - PyMappingMethods* mp; -#if PY_MAJOR_VERSION < 3 - PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; - if (likely(ms && ms->sq_slice)) { - if (!has_cstart) { - if (_py_start && (*_py_start != Py_None)) { - cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); - if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; - } else - cstart = 0; - } - if (!has_cstop) { - if (_py_stop && (*_py_stop != Py_None)) { - cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); - if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; - } else - cstop = PY_SSIZE_T_MAX; - } - if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { - Py_ssize_t l = ms->sq_length(obj); - if (likely(l >= 0)) { - if (cstop < 0) { - cstop += l; - if (cstop < 0) cstop = 0; - } - if (cstart < 0) { - cstart += l; - if (cstart < 0) cstart = 0; - } - } else { - if (!PyErr_ExceptionMatches(PyExc_OverflowError)) - goto bad; - PyErr_Clear(); - } - } - return ms->sq_slice(obj, cstart, cstop); - } -#else - CYTHON_UNUSED_VAR(wraparound); -#endif - mp = Py_TYPE(obj)->tp_as_mapping; - if (likely(mp && mp->mp_subscript)) -#else - CYTHON_UNUSED_VAR(wraparound); -#endif - { - PyObject* result; - PyObject *py_slice, *py_start, *py_stop; - if (_py_slice) { - py_slice = *_py_slice; - } else { - PyObject* owned_start = NULL; - PyObject* owned_stop = NULL; - if (_py_start) { - py_start = *_py_start; - } else { - if (has_cstart) { - owned_start = py_start = PyInt_FromSsize_t(cstart); - if (unlikely(!py_start)) goto bad; - } else - py_start = Py_None; - } - if (_py_stop) { - py_stop = *_py_stop; - } else { - if (has_cstop) { - owned_stop = py_stop = PyInt_FromSsize_t(cstop); - if (unlikely(!py_stop)) { - Py_XDECREF(owned_start); - goto bad; - } - } else - py_stop = Py_None; - } - py_slice = PySlice_New(py_start, py_stop, Py_None); - Py_XDECREF(owned_start); - Py_XDECREF(owned_stop); - if (unlikely(!py_slice)) goto bad; - } -#if CYTHON_USE_TYPE_SLOTS - result = mp->mp_subscript(obj, py_slice); -#else - result = PyObject_GetItem(obj, py_slice); -#endif - if (!_py_slice) { - Py_DECREF(py_slice); - } - return result; - } - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "'" __Pyx_FMT_TYPENAME "' object is unsliceable", obj_type_name); - __Pyx_DECREF_TypeName(obj_type_name); -bad: - return NULL; -} - -/* GetAttr */ -static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *o, PyObject *n) { -#if CYTHON_USE_TYPE_SLOTS -#if PY_MAJOR_VERSION >= 3 - if (likely(PyUnicode_Check(n))) -#else - if (likely(PyString_Check(n))) -#endif - return __Pyx_PyObject_GetAttrStr(o, n); -#endif - return PyObject_GetAttr(o, n); -} - -/* SetItemInt */ -static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) { - int r; - if (unlikely(!j)) return -1; - r = PyObject_SetItem(o, j, v); - Py_DECREF(j); - return r; -} -static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list, - CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS - if (is_list || PyList_CheckExact(o)) { - Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o)); - if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o)))) { - PyObject* old = PyList_GET_ITEM(o, n); - Py_INCREF(v); - PyList_SET_ITEM(o, n, v); - Py_DECREF(old); - return 1; - } - } else { - PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; - PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; - if (mm && mm->mp_ass_subscript) { - int r; - PyObject *key = PyInt_FromSsize_t(i); - if (unlikely(!key)) return -1; - r = mm->mp_ass_subscript(o, key, v); - Py_DECREF(key); - return r; - } - if (likely(sm && sm->sq_ass_item)) { - if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { - Py_ssize_t l = sm->sq_length(o); - if (likely(l >= 0)) { - i += l; - } else { - if (!PyErr_ExceptionMatches(PyExc_OverflowError)) - return -1; - PyErr_Clear(); - } - } - return sm->sq_ass_item(o, i, v); - } - } -#else - if (is_list || !PyMapping_Check(o)) - { - return PySequence_SetItem(o, i, v); - } -#endif - return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v); -} - -/* HasAttr */ -static CYTHON_INLINE int __Pyx_HasAttr(PyObject *o, PyObject *n) { - PyObject *r; - if (unlikely(!__Pyx_PyBaseString_Check(n))) { - PyErr_SetString(PyExc_TypeError, - "hasattr(): attribute name must be string"); - return -1; - } - r = __Pyx_GetAttr(o, n); - if (!r) { - PyErr_Clear(); - return 0; - } else { - Py_DECREF(r); - return 1; - } -} - -/* RaiseUnboundLocalError */ -static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname) { - PyErr_Format(PyExc_UnboundLocalError, "local variable '%s' referenced before assignment", varname); -} - -/* SliceObject */ -static CYTHON_INLINE int __Pyx_PyObject_SetSlice(PyObject* obj, PyObject* value, - Py_ssize_t cstart, Py_ssize_t cstop, - PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, - int has_cstart, int has_cstop, int wraparound) { - __Pyx_TypeName obj_type_name; -#if CYTHON_USE_TYPE_SLOTS - PyMappingMethods* mp; -#if PY_MAJOR_VERSION < 3 - PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; - if (likely(ms && ms->sq_ass_slice)) { - if (!has_cstart) { - if (_py_start && (*_py_start != Py_None)) { - cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); - if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; - } else - cstart = 0; - } - if (!has_cstop) { - if (_py_stop && (*_py_stop != Py_None)) { - cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); - if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; - } else - cstop = PY_SSIZE_T_MAX; - } - if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { - Py_ssize_t l = ms->sq_length(obj); - if (likely(l >= 0)) { - if (cstop < 0) { - cstop += l; - if (cstop < 0) cstop = 0; - } - if (cstart < 0) { - cstart += l; - if (cstart < 0) cstart = 0; - } - } else { - if (!PyErr_ExceptionMatches(PyExc_OverflowError)) - goto bad; - PyErr_Clear(); - } - } - return ms->sq_ass_slice(obj, cstart, cstop, value); - } -#else - CYTHON_UNUSED_VAR(wraparound); -#endif - mp = Py_TYPE(obj)->tp_as_mapping; - if (likely(mp && mp->mp_ass_subscript)) -#else - CYTHON_UNUSED_VAR(wraparound); -#endif - { - int result; - PyObject *py_slice, *py_start, *py_stop; - if (_py_slice) { - py_slice = *_py_slice; - } else { - PyObject* owned_start = NULL; - PyObject* owned_stop = NULL; - if (_py_start) { - py_start = *_py_start; - } else { - if (has_cstart) { - owned_start = py_start = PyInt_FromSsize_t(cstart); - if (unlikely(!py_start)) goto bad; - } else - py_start = Py_None; - } - if (_py_stop) { - py_stop = *_py_stop; - } else { - if (has_cstop) { - owned_stop = py_stop = PyInt_FromSsize_t(cstop); - if (unlikely(!py_stop)) { - Py_XDECREF(owned_start); - goto bad; - } - } else - py_stop = Py_None; - } - py_slice = PySlice_New(py_start, py_stop, Py_None); - Py_XDECREF(owned_start); - Py_XDECREF(owned_stop); - if (unlikely(!py_slice)) goto bad; - } -#if CYTHON_USE_TYPE_SLOTS - result = mp->mp_ass_subscript(obj, py_slice, value); -#else - result = value ? PyObject_SetItem(obj, py_slice, value) : PyObject_DelItem(obj, py_slice); -#endif - if (!_py_slice) { - Py_DECREF(py_slice); - } - return result; - } - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "'" __Pyx_FMT_TYPENAME "' object does not support slice %.10s", - obj_type_name, value ? "assignment" : "deletion"); - __Pyx_DECREF_TypeName(obj_type_name); -bad: - return -1; -} - -/* PyObjectCall2Args */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2) { - PyObject *args[3] = {NULL, arg1, arg2}; - return __Pyx_PyObject_FastCall(function, args+1, 2 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); -} - -/* PyObjectGetMethod */ -static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) { - PyObject *attr; -#if CYTHON_UNPACK_METHODS && CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_PYTYPE_LOOKUP - __Pyx_TypeName type_name; - PyTypeObject *tp = Py_TYPE(obj); - PyObject *descr; - descrgetfunc f = NULL; - PyObject **dictptr, *dict; - int meth_found = 0; - assert (*method == NULL); - if (unlikely(tp->tp_getattro != PyObject_GenericGetAttr)) { - attr = __Pyx_PyObject_GetAttrStr(obj, name); - goto try_unpack; - } - if (unlikely(tp->tp_dict == NULL) && unlikely(PyType_Ready(tp) < 0)) { - return 0; - } - descr = _PyType_Lookup(tp, name); - if (likely(descr != NULL)) { - Py_INCREF(descr); -#if defined(Py_TPFLAGS_METHOD_DESCRIPTOR) && Py_TPFLAGS_METHOD_DESCRIPTOR - if (__Pyx_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR)) -#elif PY_MAJOR_VERSION >= 3 - #ifdef __Pyx_CyFunction_USED - if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type) || __Pyx_CyFunction_Check(descr))) - #else - if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type))) - #endif -#else - #ifdef __Pyx_CyFunction_USED - if (likely(PyFunction_Check(descr) || __Pyx_CyFunction_Check(descr))) - #else - if (likely(PyFunction_Check(descr))) - #endif -#endif - { - meth_found = 1; - } else { - f = Py_TYPE(descr)->tp_descr_get; - if (f != NULL && PyDescr_IsData(descr)) { - attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); - Py_DECREF(descr); - goto try_unpack; - } - } - } - dictptr = _PyObject_GetDictPtr(obj); - if (dictptr != NULL && (dict = *dictptr) != NULL) { - Py_INCREF(dict); - attr = __Pyx_PyDict_GetItemStr(dict, name); - if (attr != NULL) { - Py_INCREF(attr); - Py_DECREF(dict); - Py_XDECREF(descr); - goto try_unpack; - } - Py_DECREF(dict); - } - if (meth_found) { - *method = descr; - return 1; - } - if (f != NULL) { - attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); - Py_DECREF(descr); - goto try_unpack; - } - if (likely(descr != NULL)) { - *method = descr; - return 0; - } - type_name = __Pyx_PyType_GetName(tp); - PyErr_Format(PyExc_AttributeError, -#if PY_MAJOR_VERSION >= 3 - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", - type_name, name); -#else - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", - type_name, PyString_AS_STRING(name)); -#endif - __Pyx_DECREF_TypeName(type_name); - return 0; -#else - attr = __Pyx_PyObject_GetAttrStr(obj, name); - goto try_unpack; -#endif -try_unpack: -#if CYTHON_UNPACK_METHODS - if (likely(attr) && PyMethod_Check(attr) && likely(PyMethod_GET_SELF(attr) == obj)) { - PyObject *function = PyMethod_GET_FUNCTION(attr); - Py_INCREF(function); - Py_DECREF(attr); - *method = function; - return 1; - } -#endif - *method = attr; - return 0; -} - -/* PyObjectCallMethod1 */ -#if !(CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C00A2) -static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) { - PyObject *result = __Pyx_PyObject_CallOneArg(method, arg); - Py_DECREF(method); - return result; -} -#endif -static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) { -#if CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C00A2 - PyObject *args[2] = {obj, arg}; - (void) __Pyx_PyObject_GetMethod; - (void) __Pyx_PyObject_CallOneArg; - (void) __Pyx_PyObject_Call2Args; - return PyObject_VectorcallMethod(method_name, args, 2 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); -#else - PyObject *method = NULL, *result; - int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); - if (likely(is_method)) { - result = __Pyx_PyObject_Call2Args(method, obj, arg); - Py_DECREF(method); - return result; - } - if (unlikely(!method)) return NULL; - return __Pyx__PyObject_CallMethod1(method, arg); -#endif -} - -/* StringJoin */ -static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values) { - (void) __Pyx_PyObject_CallMethod1; -#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION < 3 - return _PyString_Join(sep, values); -#elif CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 - return _PyBytes_Join(sep, values); -#else - return __Pyx_PyObject_CallMethod1(sep, __pyx_n_s_join, values); -#endif -} - -/* PyObjectSetAttrStr */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value) { - PyTypeObject* tp = Py_TYPE(obj); - if (likely(tp->tp_setattro)) - return tp->tp_setattro(obj, attr_name, value); -#if PY_MAJOR_VERSION < 3 - if (likely(tp->tp_setattr)) - return tp->tp_setattr(obj, PyString_AS_STRING(attr_name), value); -#endif - return PyObject_SetAttr(obj, attr_name, value); -} -#endif - -/* PyObjectCallNoArg */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { - PyObject *arg[2] = {NULL, NULL}; - return __Pyx_PyObject_FastCall(func, arg + 1, 0 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); -} - -/* PyObjectCallMethod0 */ -static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) { - PyObject *method = NULL, *result = NULL; - int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); - if (likely(is_method)) { - result = __Pyx_PyObject_CallOneArg(method, obj); - Py_DECREF(method); - return result; - } - if (unlikely(!method)) goto bad; - result = __Pyx_PyObject_CallNoArg(method); - Py_DECREF(method); -bad: - return result; -} - -/* ValidateBasesTuple */ -#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS -static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases) { - Py_ssize_t i, n; -#if CYTHON_ASSUME_SAFE_MACROS - n = PyTuple_GET_SIZE(bases); -#else - n = PyTuple_Size(bases); - if (n < 0) return -1; -#endif - for (i = 1; i < n; i++) - { -#if CYTHON_AVOID_BORROWED_REFS - PyObject *b0 = PySequence_GetItem(bases, i); - if (!b0) return -1; -#elif CYTHON_ASSUME_SAFE_MACROS - PyObject *b0 = PyTuple_GET_ITEM(bases, i); -#else - PyObject *b0 = PyTuple_GetItem(bases, i); - if (!b0) return -1; -#endif - PyTypeObject *b; -#if PY_MAJOR_VERSION < 3 - if (PyClass_Check(b0)) - { - PyErr_Format(PyExc_TypeError, "base class '%.200s' is an old-style class", - PyString_AS_STRING(((PyClassObject*)b0)->cl_name)); -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - return -1; - } -#endif - b = (PyTypeObject*) b0; - if (!__Pyx_PyType_HasFeature(b, Py_TPFLAGS_HEAPTYPE)) - { - __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); - PyErr_Format(PyExc_TypeError, - "base class '" __Pyx_FMT_TYPENAME "' is not a heap type", b_name); - __Pyx_DECREF_TypeName(b_name); -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - return -1; - } - if (dictoffset == 0) - { - Py_ssize_t b_dictoffset = 0; -#if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - b_dictoffset = b->tp_dictoffset; -#else - PyObject *py_b_dictoffset = PyObject_GetAttrString((PyObject*)b, "__dictoffset__"); - if (!py_b_dictoffset) goto dictoffset_return; - b_dictoffset = PyLong_AsSsize_t(py_b_dictoffset); - Py_DECREF(py_b_dictoffset); - if (b_dictoffset == -1 && PyErr_Occurred()) goto dictoffset_return; -#endif - if (b_dictoffset) { - { - __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); - PyErr_Format(PyExc_TypeError, - "extension type '%.200s' has no __dict__ slot, " - "but base type '" __Pyx_FMT_TYPENAME "' has: " - "either add 'cdef dict __dict__' to the extension type " - "or add '__slots__ = [...]' to the base type", - type_name, b_name); - __Pyx_DECREF_TypeName(b_name); - } -#if !(CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY) - dictoffset_return: -#endif -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - return -1; - } - } -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - } - return 0; -} -#endif - -/* PyType_Ready */ -static int __Pyx_PyType_Ready(PyTypeObject *t) { -#if CYTHON_USE_TYPE_SPECS || !(CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API) || defined(PYSTON_MAJOR_VERSION) - (void)__Pyx_PyObject_CallMethod0; -#if CYTHON_USE_TYPE_SPECS - (void)__Pyx_validate_bases_tuple; -#endif - return PyType_Ready(t); -#else - int r; - PyObject *bases = __Pyx_PyType_GetSlot(t, tp_bases, PyObject*); - if (bases && unlikely(__Pyx_validate_bases_tuple(t->tp_name, t->tp_dictoffset, bases) == -1)) - return -1; -#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) - { - int gc_was_enabled; - #if PY_VERSION_HEX >= 0x030A00b1 - gc_was_enabled = PyGC_Disable(); - (void)__Pyx_PyObject_CallMethod0; - #else - PyObject *ret, *py_status; - PyObject *gc = NULL; - #if PY_VERSION_HEX >= 0x030700a1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM+0 >= 0x07030400) - gc = PyImport_GetModule(__pyx_kp_u_gc); - #endif - if (unlikely(!gc)) gc = PyImport_Import(__pyx_kp_u_gc); - if (unlikely(!gc)) return -1; - py_status = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_isenabled); - if (unlikely(!py_status)) { - Py_DECREF(gc); - return -1; - } - gc_was_enabled = __Pyx_PyObject_IsTrue(py_status); - Py_DECREF(py_status); - if (gc_was_enabled > 0) { - ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_disable); - if (unlikely(!ret)) { - Py_DECREF(gc); - return -1; - } - Py_DECREF(ret); - } else if (unlikely(gc_was_enabled == -1)) { - Py_DECREF(gc); - return -1; - } - #endif - t->tp_flags |= Py_TPFLAGS_HEAPTYPE; -#if PY_VERSION_HEX >= 0x030A0000 - t->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; -#endif -#else - (void)__Pyx_PyObject_CallMethod0; -#endif - r = PyType_Ready(t); -#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) - t->tp_flags &= ~Py_TPFLAGS_HEAPTYPE; - #if PY_VERSION_HEX >= 0x030A00b1 - if (gc_was_enabled) - PyGC_Enable(); - #else - if (gc_was_enabled) { - PyObject *tp, *v, *tb; - PyErr_Fetch(&tp, &v, &tb); - ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_enable); - if (likely(ret || r == -1)) { - Py_XDECREF(ret); - PyErr_Restore(tp, v, tb); - } else { - Py_XDECREF(tp); - Py_XDECREF(v); - Py_XDECREF(tb); - r = -1; - } - } - Py_DECREF(gc); - #endif - } -#endif - return r; -#endif -} - -/* PyObject_GenericGetAttrNoDict */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static PyObject *__Pyx_RaiseGenericGetAttributeError(PyTypeObject *tp, PyObject *attr_name) { - __Pyx_TypeName type_name = __Pyx_PyType_GetName(tp); - PyErr_Format(PyExc_AttributeError, -#if PY_MAJOR_VERSION >= 3 - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", - type_name, attr_name); -#else - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", - type_name, PyString_AS_STRING(attr_name)); -#endif - __Pyx_DECREF_TypeName(type_name); - return NULL; -} -static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name) { - PyObject *descr; - PyTypeObject *tp = Py_TYPE(obj); - if (unlikely(!PyString_Check(attr_name))) { - return PyObject_GenericGetAttr(obj, attr_name); - } - assert(!tp->tp_dictoffset); - descr = _PyType_Lookup(tp, attr_name); - if (unlikely(!descr)) { - return __Pyx_RaiseGenericGetAttributeError(tp, attr_name); - } - Py_INCREF(descr); - #if PY_MAJOR_VERSION < 3 - if (likely(PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_HAVE_CLASS))) - #endif - { - descrgetfunc f = Py_TYPE(descr)->tp_descr_get; - if (unlikely(f)) { - PyObject *res = f(descr, obj, (PyObject *)tp); - Py_DECREF(descr); - return res; - } - } - return descr; -} -#endif - -/* PyObject_GenericGetAttr */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name) { - if (unlikely(Py_TYPE(obj)->tp_dictoffset)) { - return PyObject_GenericGetAttr(obj, attr_name); - } - return __Pyx_PyObject_GenericGetAttrNoDict(obj, attr_name); -} -#endif - -/* SetupReduce */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __Pyx_setup_reduce_is_named(PyObject* meth, PyObject* name) { - int ret; - PyObject *name_attr; - name_attr = __Pyx_PyObject_GetAttrStrNoError(meth, __pyx_n_s_name); - if (likely(name_attr)) { - ret = PyObject_RichCompareBool(name_attr, name, Py_EQ); - } else { - ret = -1; - } - if (unlikely(ret < 0)) { - PyErr_Clear(); - ret = 0; - } - Py_XDECREF(name_attr); - return ret; -} -static int __Pyx_setup_reduce(PyObject* type_obj) { - int ret = 0; - PyObject *object_reduce = NULL; - PyObject *object_getstate = NULL; - PyObject *object_reduce_ex = NULL; - PyObject *reduce = NULL; - PyObject *reduce_ex = NULL; - PyObject *reduce_cython = NULL; - PyObject *setstate = NULL; - PyObject *setstate_cython = NULL; - PyObject *getstate = NULL; -#if CYTHON_USE_PYTYPE_LOOKUP - getstate = _PyType_Lookup((PyTypeObject*)type_obj, __pyx_n_s_getstate); -#else - getstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_getstate); - if (!getstate && PyErr_Occurred()) { - goto __PYX_BAD; - } -#endif - if (getstate) { -#if CYTHON_USE_PYTYPE_LOOKUP - object_getstate = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_getstate); -#else - object_getstate = __Pyx_PyObject_GetAttrStrNoError((PyObject*)&PyBaseObject_Type, __pyx_n_s_getstate); - if (!object_getstate && PyErr_Occurred()) { - goto __PYX_BAD; - } -#endif - if (object_getstate != getstate) { - goto __PYX_GOOD; - } - } -#if CYTHON_USE_PYTYPE_LOOKUP - object_reduce_ex = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; -#else - object_reduce_ex = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; -#endif - reduce_ex = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce_ex); if (unlikely(!reduce_ex)) goto __PYX_BAD; - if (reduce_ex == object_reduce_ex) { -#if CYTHON_USE_PYTYPE_LOOKUP - object_reduce = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; -#else - object_reduce = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; -#endif - reduce = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce); if (unlikely(!reduce)) goto __PYX_BAD; - if (reduce == object_reduce || __Pyx_setup_reduce_is_named(reduce, __pyx_n_s_reduce_cython)) { - reduce_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_reduce_cython); - if (likely(reduce_cython)) { - ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce, reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - } else if (reduce == object_reduce || PyErr_Occurred()) { - goto __PYX_BAD; - } - setstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate); - if (!setstate) PyErr_Clear(); - if (!setstate || __Pyx_setup_reduce_is_named(setstate, __pyx_n_s_setstate_cython)) { - setstate_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate_cython); - if (likely(setstate_cython)) { - ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate, setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - } else if (!setstate || PyErr_Occurred()) { - goto __PYX_BAD; - } - } - PyType_Modified((PyTypeObject*)type_obj); - } - } - goto __PYX_GOOD; -__PYX_BAD: - if (!PyErr_Occurred()) { - __Pyx_TypeName type_obj_name = - __Pyx_PyType_GetName((PyTypeObject*)type_obj); - PyErr_Format(PyExc_RuntimeError, - "Unable to initialize pickling for " __Pyx_FMT_TYPENAME, type_obj_name); - __Pyx_DECREF_TypeName(type_obj_name); - } - ret = -1; -__PYX_GOOD: -#if !CYTHON_USE_PYTYPE_LOOKUP - Py_XDECREF(object_reduce); - Py_XDECREF(object_reduce_ex); - Py_XDECREF(object_getstate); - Py_XDECREF(getstate); -#endif - Py_XDECREF(reduce); - Py_XDECREF(reduce_ex); - Py_XDECREF(reduce_cython); - Py_XDECREF(setstate); - Py_XDECREF(setstate_cython); - return ret; -} -#endif - -/* Import */ -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { - PyObject *module = 0; - PyObject *empty_dict = 0; - PyObject *empty_list = 0; - #if PY_MAJOR_VERSION < 3 - PyObject *py_import; - py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); - if (unlikely(!py_import)) - goto bad; - if (!from_list) { - empty_list = PyList_New(0); - if (unlikely(!empty_list)) - goto bad; - from_list = empty_list; - } - #endif - empty_dict = PyDict_New(); - if (unlikely(!empty_dict)) - goto bad; - { - #if PY_MAJOR_VERSION >= 3 - if (level == -1) { - if (strchr(__Pyx_MODULE_NAME, '.') != NULL) { - module = PyImport_ImportModuleLevelObject( - name, __pyx_d, empty_dict, from_list, 1); - if (unlikely(!module)) { - if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) - goto bad; - PyErr_Clear(); - } - } - level = 0; - } - #endif - if (!module) { - #if PY_MAJOR_VERSION < 3 - PyObject *py_level = PyInt_FromLong(level); - if (unlikely(!py_level)) - goto bad; - module = PyObject_CallFunctionObjArgs(py_import, - name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL); - Py_DECREF(py_level); - #else - module = PyImport_ImportModuleLevelObject( - name, __pyx_d, empty_dict, from_list, level); - #endif - } - } -bad: - Py_XDECREF(empty_dict); - Py_XDECREF(empty_list); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(py_import); - #endif - return module; -} - -/* ImportDottedModule */ -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) { - PyObject *partial_name = NULL, *slice = NULL, *sep = NULL; - if (unlikely(PyErr_Occurred())) { - PyErr_Clear(); - } - if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) { - partial_name = name; - } else { - slice = PySequence_GetSlice(parts_tuple, 0, count); - if (unlikely(!slice)) - goto bad; - sep = PyUnicode_FromStringAndSize(".", 1); - if (unlikely(!sep)) - goto bad; - partial_name = PyUnicode_Join(sep, slice); - } - PyErr_Format( -#if PY_MAJOR_VERSION < 3 - PyExc_ImportError, - "No module named '%s'", PyString_AS_STRING(partial_name)); -#else -#if PY_VERSION_HEX >= 0x030600B1 - PyExc_ModuleNotFoundError, -#else - PyExc_ImportError, -#endif - "No module named '%U'", partial_name); -#endif -bad: - Py_XDECREF(sep); - Py_XDECREF(slice); - Py_XDECREF(partial_name); - return NULL; -} -#endif -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) { - PyObject *imported_module; -#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) - PyObject *modules = PyImport_GetModuleDict(); - if (unlikely(!modules)) - return NULL; - imported_module = __Pyx_PyDict_GetItemStr(modules, name); - Py_XINCREF(imported_module); -#else - imported_module = PyImport_GetModule(name); -#endif - return imported_module; -} -#endif -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) { - Py_ssize_t i, nparts; - nparts = PyTuple_GET_SIZE(parts_tuple); - for (i=1; i < nparts && module; i++) { - PyObject *part, *submodule; -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - part = PyTuple_GET_ITEM(parts_tuple, i); -#else - part = PySequence_ITEM(parts_tuple, i); -#endif - submodule = __Pyx_PyObject_GetAttrStrNoError(module, part); -#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) - Py_DECREF(part); -#endif - Py_DECREF(module); - module = submodule; - } - if (unlikely(!module)) { - return __Pyx__ImportDottedModule_Error(name, parts_tuple, i); - } - return module; -} -#endif -static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) { -#if PY_MAJOR_VERSION < 3 - PyObject *module, *from_list, *star = __pyx_n_s__6; - CYTHON_UNUSED_VAR(parts_tuple); - from_list = PyList_New(1); - if (unlikely(!from_list)) - return NULL; - Py_INCREF(star); - PyList_SET_ITEM(from_list, 0, star); - module = __Pyx_Import(name, from_list, 0); - Py_DECREF(from_list); - return module; -#else - PyObject *imported_module; - PyObject *module = __Pyx_Import(name, NULL, 0); - if (!parts_tuple || unlikely(!module)) - return module; - imported_module = __Pyx__ImportDottedModule_Lookup(name); - if (likely(imported_module)) { - Py_DECREF(module); - return imported_module; - } - PyErr_Clear(); - return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); -#endif -} -static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) { -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1 - PyObject *module = __Pyx__ImportDottedModule_Lookup(name); - if (likely(module)) { - PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec); - if (likely(spec)) { - PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing); - if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) { - Py_DECREF(spec); - spec = NULL; - } - Py_XDECREF(unsafe); - } - if (likely(!spec)) { - PyErr_Clear(); - return module; - } - Py_DECREF(spec); - Py_DECREF(module); - } else if (PyErr_Occurred()) { - PyErr_Clear(); - } -#endif - return __Pyx__ImportDottedModule(name, parts_tuple); -} - -/* ImportDottedModuleRelFirst */ -static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple) { - PyObject *module; - PyObject *from_list = NULL; -#if PY_MAJOR_VERSION < 3 - PyObject *star = __pyx_n_s__6; - from_list = PyList_New(1); - if (unlikely(!from_list)) - return NULL; - Py_INCREF(star); - PyList_SET_ITEM(from_list, 0, star); -#endif - module = __Pyx_Import(name, from_list, -1); - Py_XDECREF(from_list); - if (module) { - #if PY_MAJOR_VERSION >= 3 - if (parts_tuple) { - module = __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); - } - #endif - return module; - } - if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) - return NULL; - PyErr_Clear(); - return __Pyx_ImportDottedModule(name, parts_tuple); -} - -/* PyDictVersioning */ -#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) { - PyObject *dict = Py_TYPE(obj)->tp_dict; - return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0; -} -static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) { - PyObject **dictptr = NULL; - Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset; - if (offset) { -#if CYTHON_COMPILING_IN_CPYTHON - dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj); -#else - dictptr = _PyObject_GetDictPtr(obj); -#endif - } - return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0; -} -static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) { - PyObject *dict = Py_TYPE(obj)->tp_dict; - if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict))) - return 0; - return obj_dict_version == __Pyx_get_object_dict_version(obj); -} -#endif - -/* CLineInTraceback */ -#ifndef CYTHON_CLINE_IN_TRACEBACK -static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) { - PyObject *use_cline; - PyObject *ptype, *pvalue, *ptraceback; -#if CYTHON_COMPILING_IN_CPYTHON - PyObject **cython_runtime_dict; -#endif - CYTHON_MAYBE_UNUSED_VAR(tstate); - if (unlikely(!__pyx_cython_runtime)) { - return c_line; - } - __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); -#if CYTHON_COMPILING_IN_CPYTHON - cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); - if (likely(cython_runtime_dict)) { - __PYX_PY_DICT_LOOKUP_IF_MODIFIED( - use_cline, *cython_runtime_dict, - __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback)) - } else -#endif - { - PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); - if (use_cline_obj) { - use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; - Py_DECREF(use_cline_obj); - } else { - PyErr_Clear(); - use_cline = NULL; - } - } - if (!use_cline) { - c_line = 0; - (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); - } - else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) { - c_line = 0; - } - __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); - return c_line; -} -#endif - -/* CodeObjectCache */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { - int start = 0, mid = 0, end = count - 1; - if (end >= 0 && code_line > entries[end].code_line) { - return count; - } - while (start < end) { - mid = start + (end - start) / 2; - if (code_line < entries[mid].code_line) { - end = mid; - } else if (code_line > entries[mid].code_line) { - start = mid + 1; - } else { - return mid; - } - } - if (code_line <= entries[mid].code_line) { - return mid; - } else { - return mid + 1; - } -} -static PyCodeObject *__pyx_find_code_object(int code_line) { - PyCodeObject* code_object; - int pos; - if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { - return NULL; - } - pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); - if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { - return NULL; - } - code_object = __pyx_code_cache.entries[pos].code_object; - Py_INCREF(code_object); - return code_object; -} -static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { - int pos, i; - __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; - if (unlikely(!code_line)) { - return; - } - if (unlikely(!entries)) { - entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); - if (likely(entries)) { - __pyx_code_cache.entries = entries; - __pyx_code_cache.max_count = 64; - __pyx_code_cache.count = 1; - entries[0].code_line = code_line; - entries[0].code_object = code_object; - Py_INCREF(code_object); - } - return; - } - pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); - if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { - PyCodeObject* tmp = entries[pos].code_object; - entries[pos].code_object = code_object; - Py_DECREF(tmp); - return; - } - if (__pyx_code_cache.count == __pyx_code_cache.max_count) { - int new_max = __pyx_code_cache.max_count + 64; - entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( - __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry)); - if (unlikely(!entries)) { - return; - } - __pyx_code_cache.entries = entries; - __pyx_code_cache.max_count = new_max; - } - for (i=__pyx_code_cache.count; i>pos; i--) { - entries[i] = entries[i-1]; - } - entries[pos].code_line = code_line; - entries[pos].code_object = code_object; - __pyx_code_cache.count++; - Py_INCREF(code_object); -} -#endif - -/* AddTraceback */ -#include "compile.h" -#include "frameobject.h" -#include "traceback.h" -#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict, - PyObject *firstlineno, PyObject *name) { - PyObject *replace = NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL; - replace = PyObject_GetAttrString(code, "replace"); - if (likely(replace)) { - PyObject *result; - result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict); - Py_DECREF(replace); - return result; - } - PyErr_Clear(); - #if __PYX_LIMITED_VERSION_HEX < 0x030780000 - { - PyObject *compiled = NULL, *result = NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL; - compiled = Py_CompileString( - "out = type(code)(\n" - " code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n" - " code.co_flags, code.co_code, code.co_consts, code.co_names,\n" - " code.co_varnames, code.co_filename, co_name, co_firstlineno,\n" - " code.co_lnotab)\n", "", Py_file_input); - if (!compiled) return NULL; - result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict); - Py_DECREF(compiled); - if (!result) PyErr_Print(); - Py_DECREF(result); - result = PyDict_GetItemString(scratch_dict, "out"); - if (result) Py_INCREF(result); - return result; - } - #else - return NULL; - #endif -} -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename) { - PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL; - PyObject *replace = NULL, *getframe = NULL, *frame = NULL; - PyObject *exc_type, *exc_value, *exc_traceback; - int success = 0; - if (c_line) { - (void) __pyx_cfilenm; - (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line); - } - PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); - code_object = Py_CompileString("_getframe()", filename, Py_eval_input); - if (unlikely(!code_object)) goto bad; - py_py_line = PyLong_FromLong(py_line); - if (unlikely(!py_py_line)) goto bad; - py_funcname = PyUnicode_FromString(funcname); - if (unlikely(!py_funcname)) goto bad; - dict = PyDict_New(); - if (unlikely(!dict)) goto bad; - { - PyObject *old_code_object = code_object; - code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname); - Py_DECREF(old_code_object); - } - if (unlikely(!code_object)) goto bad; - getframe = PySys_GetObject("_getframe"); - if (unlikely(!getframe)) goto bad; - if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad; - frame = PyEval_EvalCode(code_object, dict, dict); - if (unlikely(!frame) || frame == Py_None) goto bad; - success = 1; - bad: - PyErr_Restore(exc_type, exc_value, exc_traceback); - Py_XDECREF(code_object); - Py_XDECREF(py_py_line); - Py_XDECREF(py_funcname); - Py_XDECREF(dict); - Py_XDECREF(replace); - if (success) { - PyTraceBack_Here( - (struct _frame*)frame); - } - Py_XDECREF(frame); -} -#else -static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( - const char *funcname, int c_line, - int py_line, const char *filename) { - PyCodeObject *py_code = NULL; - PyObject *py_funcname = NULL; - #if PY_MAJOR_VERSION < 3 - PyObject *py_srcfile = NULL; - py_srcfile = PyString_FromString(filename); - if (!py_srcfile) goto bad; - #endif - if (c_line) { - #if PY_MAJOR_VERSION < 3 - py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); - if (!py_funcname) goto bad; - #else - py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); - if (!py_funcname) goto bad; - funcname = PyUnicode_AsUTF8(py_funcname); - if (!funcname) goto bad; - #endif - } - else { - #if PY_MAJOR_VERSION < 3 - py_funcname = PyString_FromString(funcname); - if (!py_funcname) goto bad; - #endif - } - #if PY_MAJOR_VERSION < 3 - py_code = __Pyx_PyCode_New( - 0, - 0, - 0, - 0, - 0, - 0, - __pyx_empty_bytes, /*PyObject *code,*/ - __pyx_empty_tuple, /*PyObject *consts,*/ - __pyx_empty_tuple, /*PyObject *names,*/ - __pyx_empty_tuple, /*PyObject *varnames,*/ - __pyx_empty_tuple, /*PyObject *freevars,*/ - __pyx_empty_tuple, /*PyObject *cellvars,*/ - py_srcfile, /*PyObject *filename,*/ - py_funcname, /*PyObject *name,*/ - py_line, - __pyx_empty_bytes /*PyObject *lnotab*/ - ); - Py_DECREF(py_srcfile); - #else - py_code = PyCode_NewEmpty(filename, funcname, py_line); - #endif - Py_XDECREF(py_funcname); - return py_code; -bad: - Py_XDECREF(py_funcname); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(py_srcfile); - #endif - return NULL; -} -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename) { - PyCodeObject *py_code = 0; - PyFrameObject *py_frame = 0; - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject *ptype, *pvalue, *ptraceback; - if (c_line) { - c_line = __Pyx_CLineForTraceback(tstate, c_line); - } - py_code = __pyx_find_code_object(c_line ? -c_line : py_line); - if (!py_code) { - __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); - py_code = __Pyx_CreateCodeObjectForTraceback( - funcname, c_line, py_line, filename); - if (!py_code) { - /* If the code object creation fails, then we should clear the - fetched exception references and propagate the new exception */ - Py_XDECREF(ptype); - Py_XDECREF(pvalue); - Py_XDECREF(ptraceback); - goto bad; - } - __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); - __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); - } - py_frame = PyFrame_New( - tstate, /*PyThreadState *tstate,*/ - py_code, /*PyCodeObject *code,*/ - __pyx_d, /*PyObject *globals,*/ - 0 /*PyObject *locals*/ - ); - if (!py_frame) goto bad; - __Pyx_PyFrame_SetLineNumber(py_frame, py_line); - PyTraceBack_Here(py_frame); -bad: - Py_XDECREF(py_code); - Py_XDECREF(py_frame); -} -#endif - -/* CIntFromPyVerify */ -#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) -#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) -#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ - {\ - func_type value = func_value;\ - if (sizeof(target_type) < sizeof(func_type)) {\ - if (unlikely(value != (func_type) (target_type) value)) {\ - func_type zero = 0;\ - if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ - return (target_type) -1;\ - if (is_unsigned && unlikely(value < zero))\ - goto raise_neg_overflow;\ - else\ - goto raise_overflow;\ - }\ - }\ - return (target_type) value;\ - } - -/* CIntFromPy */ -static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const size_t neg_one = (size_t) -1, const_zero = (size_t) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(size_t) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (size_t) val; - } - } -#endif - if (unlikely(!PyLong_Check(x))) { - size_t val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (size_t) -1; - val = __Pyx_PyInt_As_size_t(tmp); - Py_DECREF(tmp); - return val; - } - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(size_t, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(size_t) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) >= 2 * PyLong_SHIFT)) { - return (size_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(size_t) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) >= 3 * PyLong_SHIFT)) { - return (size_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(size_t) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) >= 4 * PyLong_SHIFT)) { - return (size_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (size_t) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(size_t) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(size_t) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(size_t, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(size_t) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { - return (size_t) (((size_t)-1)*(((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(size_t) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { - return (size_t) ((((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { - return (size_t) (((size_t)-1)*(((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(size_t) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { - return (size_t) ((((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 4 * PyLong_SHIFT)) { - return (size_t) (((size_t)-1)*(((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(size_t) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 4 * PyLong_SHIFT)) { - return (size_t) ((((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(size_t) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(size_t) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - size_t val; - int ret = -1; -#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API - Py_ssize_t bytes_copied = PyLong_AsNativeBytes( - x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); - if (unlikely(bytes_copied == -1)) { - } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { - goto raise_overflow; - } else { - ret = 0; - } -#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)x, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *v; - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (likely(PyLong_CheckExact(x))) { - v = __Pyx_NewRef(x); - } else { - v = PyNumber_Long(x); - if (unlikely(!v)) return (size_t) -1; - assert(PyLong_CheckExact(v)); - } - { - int result = PyObject_RichCompareBool(v, Py_False, Py_LT); - if (unlikely(result < 0)) { - Py_DECREF(v); - return (size_t) -1; - } - is_negative = result == 1; - } - if (is_unsigned && unlikely(is_negative)) { - Py_DECREF(v); - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - Py_DECREF(v); - if (unlikely(!stepval)) - return (size_t) -1; - } else { - stepval = v; - } - v = NULL; - val = (size_t) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(size_t) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - long idigit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - val |= ((size_t) idigit) << bits; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - } - Py_DECREF(shift); shift = NULL; - Py_DECREF(mask); mask = NULL; - { - long idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(size_t) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((size_t) idigit) << bits; - } - if (!is_unsigned) { - if (unlikely(val & (((size_t) 1) << (sizeof(size_t) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - if (unlikely(ret)) - return (size_t) -1; - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to size_t"); - return (size_t) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to size_t"); - return (size_t) -1; -} - -/* CIntToPy */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const long neg_one = (long) -1, const_zero = (long) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(long) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(long) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(long) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 - if (is_unsigned) { - return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); - } else { - return PyLong_FromNativeBytes(bytes, sizeof(value), -1); - } -#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 - int one = 1; int little = (int)*(unsigned char *)&one; - return _PyLong_FromByteArray(bytes, sizeof(long), - little, !is_unsigned); -#else - int one = 1; int little = (int)*(unsigned char *)&one; - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - if (!is_unsigned) { - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; - } - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(kwds); - Py_XDECREF(arg_tuple); - Py_XDECREF(order_str); - Py_XDECREF(py_bytes); - Py_XDECREF(from_bytes); - return result; -#endif - } -} - -/* CIntToPy */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int neg_one = (int) -1, const_zero = (int) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(int) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(int) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(int) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 - if (is_unsigned) { - return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); - } else { - return PyLong_FromNativeBytes(bytes, sizeof(value), -1); - } -#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 - int one = 1; int little = (int)*(unsigned char *)&one; - return _PyLong_FromByteArray(bytes, sizeof(int), - little, !is_unsigned); -#else - int one = 1; int little = (int)*(unsigned char *)&one; - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(int)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - if (!is_unsigned) { - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; - } - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(kwds); - Py_XDECREF(arg_tuple); - Py_XDECREF(order_str); - Py_XDECREF(py_bytes); - Py_XDECREF(from_bytes); - return result; -#endif - } -} - -/* CIntFromPy */ -static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int neg_one = (int) -1, const_zero = (int) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(int) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (int) val; - } - } -#endif - if (unlikely(!PyLong_Check(x))) { - int val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (int) -1; - val = __Pyx_PyInt_As_int(tmp); - Py_DECREF(tmp); - return val; - } - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) { - return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) { - return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) { - return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (int) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(int) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { - return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(int) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - int val; - int ret = -1; -#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API - Py_ssize_t bytes_copied = PyLong_AsNativeBytes( - x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); - if (unlikely(bytes_copied == -1)) { - } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { - goto raise_overflow; - } else { - ret = 0; - } -#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)x, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *v; - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (likely(PyLong_CheckExact(x))) { - v = __Pyx_NewRef(x); - } else { - v = PyNumber_Long(x); - if (unlikely(!v)) return (int) -1; - assert(PyLong_CheckExact(v)); - } - { - int result = PyObject_RichCompareBool(v, Py_False, Py_LT); - if (unlikely(result < 0)) { - Py_DECREF(v); - return (int) -1; - } - is_negative = result == 1; - } - if (is_unsigned && unlikely(is_negative)) { - Py_DECREF(v); - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - Py_DECREF(v); - if (unlikely(!stepval)) - return (int) -1; - } else { - stepval = v; - } - v = NULL; - val = (int) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - long idigit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - val |= ((int) idigit) << bits; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - } - Py_DECREF(shift); shift = NULL; - Py_DECREF(mask); mask = NULL; - { - long idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((int) idigit) << bits; - } - if (!is_unsigned) { - if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - if (unlikely(ret)) - return (int) -1; - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to int"); - return (int) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to int"); - return (int) -1; -} - -/* CIntFromPy */ -static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const char neg_one = (char) -1, const_zero = (char) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(char) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(char, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (char) val; - } - } -#endif - if (unlikely(!PyLong_Check(x))) { - char val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (char) -1; - val = __Pyx_PyInt_As_char(tmp); - Py_DECREF(tmp); - return val; - } - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(char, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(char) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) >= 2 * PyLong_SHIFT)) { - return (char) (((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(char) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) >= 3 * PyLong_SHIFT)) { - return (char) (((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(char) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) >= 4 * PyLong_SHIFT)) { - return (char) (((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (char) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(char) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(char, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(char) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(char, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(char, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(char) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { - return (char) (((char)-1)*(((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(char) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { - return (char) ((((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { - return (char) (((char)-1)*(((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(char) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { - return (char) ((((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 4 * PyLong_SHIFT)) { - return (char) (((char)-1)*(((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(char) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 4 * PyLong_SHIFT)) { - return (char) ((((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(char) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(char, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(char) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(char, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - char val; - int ret = -1; -#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API - Py_ssize_t bytes_copied = PyLong_AsNativeBytes( - x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); - if (unlikely(bytes_copied == -1)) { - } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { - goto raise_overflow; - } else { - ret = 0; - } -#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)x, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *v; - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (likely(PyLong_CheckExact(x))) { - v = __Pyx_NewRef(x); - } else { - v = PyNumber_Long(x); - if (unlikely(!v)) return (char) -1; - assert(PyLong_CheckExact(v)); - } - { - int result = PyObject_RichCompareBool(v, Py_False, Py_LT); - if (unlikely(result < 0)) { - Py_DECREF(v); - return (char) -1; - } - is_negative = result == 1; - } - if (is_unsigned && unlikely(is_negative)) { - Py_DECREF(v); - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - Py_DECREF(v); - if (unlikely(!stepval)) - return (char) -1; - } else { - stepval = v; - } - v = NULL; - val = (char) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(char) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - long idigit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - val |= ((char) idigit) << bits; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - } - Py_DECREF(shift); shift = NULL; - Py_DECREF(mask); mask = NULL; - { - long idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(char) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((char) idigit) << bits; - } - if (!is_unsigned) { - if (unlikely(val & (((char) 1) << (sizeof(char) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - if (unlikely(ret)) - return (char) -1; - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to char"); - return (char) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to char"); - return (char) -1; -} - -/* CIntToPy */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_char(char value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const char neg_one = (char) -1, const_zero = (char) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(char) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(char) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(char) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(char) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(char) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 - if (is_unsigned) { - return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); - } else { - return PyLong_FromNativeBytes(bytes, sizeof(value), -1); - } -#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 - int one = 1; int little = (int)*(unsigned char *)&one; - return _PyLong_FromByteArray(bytes, sizeof(char), - little, !is_unsigned); -#else - int one = 1; int little = (int)*(unsigned char *)&one; - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(char)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - if (!is_unsigned) { - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; - } - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(kwds); - Py_XDECREF(arg_tuple); - Py_XDECREF(order_str); - Py_XDECREF(py_bytes); - Py_XDECREF(from_bytes); - return result; -#endif - } -} - -/* FormatTypeName */ -#if CYTHON_COMPILING_IN_LIMITED_API -static __Pyx_TypeName -__Pyx_PyType_GetName(PyTypeObject* tp) -{ - PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp, - __pyx_n_s_name); - if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) { - PyErr_Clear(); - Py_XDECREF(name); - name = __Pyx_NewRef(__pyx_n_s__13); - } - return name; -} -#endif - -/* CIntFromPy */ -static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const long neg_one = (long) -1, const_zero = (long) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(long) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (long) val; - } - } -#endif - if (unlikely(!PyLong_Check(x))) { - long val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (long) -1; - val = __Pyx_PyInt_As_long(tmp); - Py_DECREF(tmp); - return val; - } - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) { - return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) { - return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) { - return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (long) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(long) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { - return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(long) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - long val; - int ret = -1; -#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API - Py_ssize_t bytes_copied = PyLong_AsNativeBytes( - x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); - if (unlikely(bytes_copied == -1)) { - } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { - goto raise_overflow; - } else { - ret = 0; - } -#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)x, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *v; - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (likely(PyLong_CheckExact(x))) { - v = __Pyx_NewRef(x); - } else { - v = PyNumber_Long(x); - if (unlikely(!v)) return (long) -1; - assert(PyLong_CheckExact(v)); - } - { - int result = PyObject_RichCompareBool(v, Py_False, Py_LT); - if (unlikely(result < 0)) { - Py_DECREF(v); - return (long) -1; - } - is_negative = result == 1; - } - if (is_unsigned && unlikely(is_negative)) { - Py_DECREF(v); - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - Py_DECREF(v); - if (unlikely(!stepval)) - return (long) -1; - } else { - stepval = v; - } - v = NULL; - val = (long) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - long idigit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - val |= ((long) idigit) << bits; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - } - Py_DECREF(shift); shift = NULL; - Py_DECREF(mask); mask = NULL; - { - long idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((long) idigit) << bits; - } - if (!is_unsigned) { - if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - if (unlikely(ret)) - return (long) -1; - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to long"); - return (long) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to long"); - return (long) -1; -} - -/* SwapException */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { - PyObject *tmp_type, *tmp_value, *tmp_tb; - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = tstate->exc_info; - tmp_value = exc_info->exc_value; - exc_info->exc_value = *value; - if (tmp_value == NULL || tmp_value == Py_None) { - Py_XDECREF(tmp_value); - tmp_value = NULL; - tmp_type = NULL; - tmp_tb = NULL; - } else { - tmp_type = (PyObject*) Py_TYPE(tmp_value); - Py_INCREF(tmp_type); - #if CYTHON_COMPILING_IN_CPYTHON - tmp_tb = ((PyBaseExceptionObject*) tmp_value)->traceback; - Py_XINCREF(tmp_tb); - #else - tmp_tb = PyException_GetTraceback(tmp_value); - #endif - } - #elif CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = tstate->exc_info; - tmp_type = exc_info->exc_type; - tmp_value = exc_info->exc_value; - tmp_tb = exc_info->exc_traceback; - exc_info->exc_type = *type; - exc_info->exc_value = *value; - exc_info->exc_traceback = *tb; - #else - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = *type; - tstate->exc_value = *value; - tstate->exc_traceback = *tb; - #endif - *type = tmp_type; - *value = tmp_value; - *tb = tmp_tb; -} -#else -static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb) { - PyObject *tmp_type, *tmp_value, *tmp_tb; - PyErr_GetExcInfo(&tmp_type, &tmp_value, &tmp_tb); - PyErr_SetExcInfo(*type, *value, *tb); - *type = tmp_type; - *value = tmp_value; - *tb = tmp_tb; -} -#endif - -/* CoroutineBase */ -#include -#if PY_VERSION_HEX >= 0x030b00a6 - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif -#define __Pyx_Coroutine_Undelegate(gen) Py_CLEAR((gen)->yieldfrom) -static int __Pyx_PyGen__FetchStopIterationValue(PyThreadState *__pyx_tstate, PyObject **pvalue) { - PyObject *et, *ev, *tb; - PyObject *value = NULL; - CYTHON_UNUSED_VAR(__pyx_tstate); - __Pyx_ErrFetch(&et, &ev, &tb); - if (!et) { - Py_XDECREF(tb); - Py_XDECREF(ev); - Py_INCREF(Py_None); - *pvalue = Py_None; - return 0; - } - if (likely(et == PyExc_StopIteration)) { - if (!ev) { - Py_INCREF(Py_None); - value = Py_None; - } -#if PY_VERSION_HEX >= 0x030300A0 - else if (likely(__Pyx_IS_TYPE(ev, (PyTypeObject*)PyExc_StopIteration))) { - value = ((PyStopIterationObject *)ev)->value; - Py_INCREF(value); - Py_DECREF(ev); - } -#endif - else if (unlikely(PyTuple_Check(ev))) { - if (PyTuple_GET_SIZE(ev) >= 1) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - value = PyTuple_GET_ITEM(ev, 0); - Py_INCREF(value); -#else - value = PySequence_ITEM(ev, 0); -#endif - } else { - Py_INCREF(Py_None); - value = Py_None; - } - Py_DECREF(ev); - } - else if (!__Pyx_TypeCheck(ev, (PyTypeObject*)PyExc_StopIteration)) { - value = ev; - } - if (likely(value)) { - Py_XDECREF(tb); - Py_DECREF(et); - *pvalue = value; - return 0; - } - } else if (!__Pyx_PyErr_GivenExceptionMatches(et, PyExc_StopIteration)) { - __Pyx_ErrRestore(et, ev, tb); - return -1; - } - PyErr_NormalizeException(&et, &ev, &tb); - if (unlikely(!PyObject_TypeCheck(ev, (PyTypeObject*)PyExc_StopIteration))) { - __Pyx_ErrRestore(et, ev, tb); - return -1; - } - Py_XDECREF(tb); - Py_DECREF(et); -#if PY_VERSION_HEX >= 0x030300A0 - value = ((PyStopIterationObject *)ev)->value; - Py_INCREF(value); - Py_DECREF(ev); -#else - { - PyObject* args = __Pyx_PyObject_GetAttrStr(ev, __pyx_n_s_args); - Py_DECREF(ev); - if (likely(args)) { - value = PySequence_GetItem(args, 0); - Py_DECREF(args); - } - if (unlikely(!value)) { - __Pyx_ErrRestore(NULL, NULL, NULL); - Py_INCREF(Py_None); - value = Py_None; - } - } -#endif - *pvalue = value; - return 0; -} -static CYTHON_INLINE -void __Pyx_Coroutine_ExceptionClear(__Pyx_ExcInfoStruct *exc_state) { -#if PY_VERSION_HEX >= 0x030B00a4 - Py_CLEAR(exc_state->exc_value); -#else - PyObject *t, *v, *tb; - t = exc_state->exc_type; - v = exc_state->exc_value; - tb = exc_state->exc_traceback; - exc_state->exc_type = NULL; - exc_state->exc_value = NULL; - exc_state->exc_traceback = NULL; - Py_XDECREF(t); - Py_XDECREF(v); - Py_XDECREF(tb); -#endif -} -#define __Pyx_Coroutine_AlreadyRunningError(gen) (__Pyx__Coroutine_AlreadyRunningError(gen), (PyObject*)NULL) -static void __Pyx__Coroutine_AlreadyRunningError(__pyx_CoroutineObject *gen) { - const char *msg; - CYTHON_MAYBE_UNUSED_VAR(gen); - if ((0)) { - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_Coroutine_Check((PyObject*)gen)) { - msg = "coroutine already executing"; - #endif - #ifdef __Pyx_AsyncGen_USED - } else if (__Pyx_AsyncGen_CheckExact((PyObject*)gen)) { - msg = "async generator already executing"; - #endif - } else { - msg = "generator already executing"; - } - PyErr_SetString(PyExc_ValueError, msg); -} -#define __Pyx_Coroutine_NotStartedError(gen) (__Pyx__Coroutine_NotStartedError(gen), (PyObject*)NULL) -static void __Pyx__Coroutine_NotStartedError(PyObject *gen) { - const char *msg; - CYTHON_MAYBE_UNUSED_VAR(gen); - if ((0)) { - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_Coroutine_Check(gen)) { - msg = "can't send non-None value to a just-started coroutine"; - #endif - #ifdef __Pyx_AsyncGen_USED - } else if (__Pyx_AsyncGen_CheckExact(gen)) { - msg = "can't send non-None value to a just-started async generator"; - #endif - } else { - msg = "can't send non-None value to a just-started generator"; - } - PyErr_SetString(PyExc_TypeError, msg); -} -#define __Pyx_Coroutine_AlreadyTerminatedError(gen, value, closing) (__Pyx__Coroutine_AlreadyTerminatedError(gen, value, closing), (PyObject*)NULL) -static void __Pyx__Coroutine_AlreadyTerminatedError(PyObject *gen, PyObject *value, int closing) { - CYTHON_MAYBE_UNUSED_VAR(gen); - CYTHON_MAYBE_UNUSED_VAR(closing); - #ifdef __Pyx_Coroutine_USED - if (!closing && __Pyx_Coroutine_Check(gen)) { - PyErr_SetString(PyExc_RuntimeError, "cannot reuse already awaited coroutine"); - } else - #endif - if (value) { - #ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(gen)) - PyErr_SetNone(__Pyx_PyExc_StopAsyncIteration); - else - #endif - PyErr_SetNone(PyExc_StopIteration); - } -} -static -PyObject *__Pyx_Coroutine_SendEx(__pyx_CoroutineObject *self, PyObject *value, int closing) { - __Pyx_PyThreadState_declare - PyThreadState *tstate; - __Pyx_ExcInfoStruct *exc_state; - PyObject *retval; - assert(!self->is_running); - if (unlikely(self->resume_label == 0)) { - if (unlikely(value && value != Py_None)) { - return __Pyx_Coroutine_NotStartedError((PyObject*)self); - } - } - if (unlikely(self->resume_label == -1)) { - return __Pyx_Coroutine_AlreadyTerminatedError((PyObject*)self, value, closing); - } -#if CYTHON_FAST_THREAD_STATE - __Pyx_PyThreadState_assign - tstate = __pyx_tstate; -#else - tstate = __Pyx_PyThreadState_Current; -#endif - exc_state = &self->gi_exc_state; - if (exc_state->exc_value) { - #if CYTHON_COMPILING_IN_PYPY - #else - PyObject *exc_tb; - #if PY_VERSION_HEX >= 0x030B00a4 && !CYTHON_COMPILING_IN_CPYTHON - exc_tb = PyException_GetTraceback(exc_state->exc_value); - #elif PY_VERSION_HEX >= 0x030B00a4 - exc_tb = ((PyBaseExceptionObject*) exc_state->exc_value)->traceback; - #else - exc_tb = exc_state->exc_traceback; - #endif - if (exc_tb) { - PyTracebackObject *tb = (PyTracebackObject *) exc_tb; - PyFrameObject *f = tb->tb_frame; - assert(f->f_back == NULL); - #if PY_VERSION_HEX >= 0x030B00A1 - f->f_back = PyThreadState_GetFrame(tstate); - #else - Py_XINCREF(tstate->frame); - f->f_back = tstate->frame; - #endif - #if PY_VERSION_HEX >= 0x030B00a4 && !CYTHON_COMPILING_IN_CPYTHON - Py_DECREF(exc_tb); - #endif - } - #endif - } -#if CYTHON_USE_EXC_INFO_STACK - exc_state->previous_item = tstate->exc_info; - tstate->exc_info = exc_state; -#else - if (exc_state->exc_type) { - __Pyx_ExceptionSwap(&exc_state->exc_type, &exc_state->exc_value, &exc_state->exc_traceback); - } else { - __Pyx_Coroutine_ExceptionClear(exc_state); - __Pyx_ExceptionSave(&exc_state->exc_type, &exc_state->exc_value, &exc_state->exc_traceback); - } -#endif - self->is_running = 1; - retval = self->body(self, tstate, value); - self->is_running = 0; -#if CYTHON_USE_EXC_INFO_STACK - exc_state = &self->gi_exc_state; - tstate->exc_info = exc_state->previous_item; - exc_state->previous_item = NULL; - __Pyx_Coroutine_ResetFrameBackpointer(exc_state); -#endif - return retval; -} -static CYTHON_INLINE void __Pyx_Coroutine_ResetFrameBackpointer(__Pyx_ExcInfoStruct *exc_state) { -#if CYTHON_COMPILING_IN_PYPY - CYTHON_UNUSED_VAR(exc_state); -#else - PyObject *exc_tb; - #if PY_VERSION_HEX >= 0x030B00a4 - if (!exc_state->exc_value) return; - exc_tb = PyException_GetTraceback(exc_state->exc_value); - #else - exc_tb = exc_state->exc_traceback; - #endif - if (likely(exc_tb)) { - PyTracebackObject *tb = (PyTracebackObject *) exc_tb; - PyFrameObject *f = tb->tb_frame; - Py_CLEAR(f->f_back); - #if PY_VERSION_HEX >= 0x030B00a4 - Py_DECREF(exc_tb); - #endif - } -#endif -} -static CYTHON_INLINE -PyObject *__Pyx_Coroutine_MethodReturn(PyObject* gen, PyObject *retval) { - CYTHON_MAYBE_UNUSED_VAR(gen); - if (unlikely(!retval)) { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - if (!__Pyx_PyErr_Occurred()) { - PyObject *exc = PyExc_StopIteration; - #ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(gen)) - exc = __Pyx_PyExc_StopAsyncIteration; - #endif - __Pyx_PyErr_SetNone(exc); - } - } - return retval; -} -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) -static CYTHON_INLINE -PyObject *__Pyx_PyGen_Send(PyGenObject *gen, PyObject *arg) { -#if PY_VERSION_HEX <= 0x030A00A1 - return _PyGen_Send(gen, arg); -#else - PyObject *result; - if (PyIter_Send((PyObject*)gen, arg ? arg : Py_None, &result) == PYGEN_RETURN) { - if (PyAsyncGen_CheckExact(gen)) { - assert(result == Py_None); - PyErr_SetNone(PyExc_StopAsyncIteration); - } - else if (result == Py_None) { - PyErr_SetNone(PyExc_StopIteration); - } - else { -#if PY_VERSION_HEX < 0x030d00A1 - _PyGen_SetStopIterationValue(result); -#else - if (!PyTuple_Check(result) && !PyExceptionInstance_Check(result)) { - PyErr_SetObject(PyExc_StopIteration, result); - } else { - PyObject *exc = __Pyx_PyObject_CallOneArg(PyExc_StopIteration, result); - if (likely(exc != NULL)) { - PyErr_SetObject(PyExc_StopIteration, exc); - Py_DECREF(exc); - } - } -#endif - } - Py_DECREF(result); - result = NULL; - } - return result; -#endif -} -#endif -static CYTHON_INLINE -PyObject *__Pyx_Coroutine_FinishDelegation(__pyx_CoroutineObject *gen) { - PyObject *ret; - PyObject *val = NULL; - __Pyx_Coroutine_Undelegate(gen); - __Pyx_PyGen__FetchStopIterationValue(__Pyx_PyThreadState_Current, &val); - ret = __Pyx_Coroutine_SendEx(gen, val, 0); - Py_XDECREF(val); - return ret; -} -static PyObject *__Pyx_Coroutine_Send(PyObject *self, PyObject *value) { - PyObject *retval; - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject*) self; - PyObject *yf = gen->yieldfrom; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - PyObject *ret; - gen->is_running = 1; - #ifdef __Pyx_Generator_USED - if (__Pyx_Generator_CheckExact(yf)) { - ret = __Pyx_Coroutine_Send(yf, value); - } else - #endif - #ifdef __Pyx_Coroutine_USED - if (__Pyx_Coroutine_Check(yf)) { - ret = __Pyx_Coroutine_Send(yf, value); - } else - #endif - #ifdef __Pyx_AsyncGen_USED - if (__pyx_PyAsyncGenASend_CheckExact(yf)) { - ret = __Pyx_async_gen_asend_send(yf, value); - } else - #endif - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) - if (PyGen_CheckExact(yf)) { - ret = __Pyx_PyGen_Send((PyGenObject*)yf, value == Py_None ? NULL : value); - } else - #endif - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03050000 && defined(PyCoro_CheckExact) && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) - if (PyCoro_CheckExact(yf)) { - ret = __Pyx_PyGen_Send((PyGenObject*)yf, value == Py_None ? NULL : value); - } else - #endif - { - if (value == Py_None) - ret = __Pyx_PyObject_GetIterNextFunc(yf)(yf); - else - ret = __Pyx_PyObject_CallMethod1(yf, __pyx_n_s_send, value); - } - gen->is_running = 0; - if (likely(ret)) { - return ret; - } - retval = __Pyx_Coroutine_FinishDelegation(gen); - } else { - retval = __Pyx_Coroutine_SendEx(gen, value, 0); - } - return __Pyx_Coroutine_MethodReturn(self, retval); -} -static int __Pyx_Coroutine_CloseIter(__pyx_CoroutineObject *gen, PyObject *yf) { - PyObject *retval = NULL; - int err = 0; - #ifdef __Pyx_Generator_USED - if (__Pyx_Generator_CheckExact(yf)) { - retval = __Pyx_Coroutine_Close(yf); - if (!retval) - return -1; - } else - #endif - #ifdef __Pyx_Coroutine_USED - if (__Pyx_Coroutine_Check(yf)) { - retval = __Pyx_Coroutine_Close(yf); - if (!retval) - return -1; - } else - if (__Pyx_CoroutineAwait_CheckExact(yf)) { - retval = __Pyx_CoroutineAwait_Close((__pyx_CoroutineAwaitObject*)yf, NULL); - if (!retval) - return -1; - } else - #endif - #ifdef __Pyx_AsyncGen_USED - if (__pyx_PyAsyncGenASend_CheckExact(yf)) { - retval = __Pyx_async_gen_asend_close(yf, NULL); - } else - if (__pyx_PyAsyncGenAThrow_CheckExact(yf)) { - retval = __Pyx_async_gen_athrow_close(yf, NULL); - } else - #endif - { - PyObject *meth; - gen->is_running = 1; - meth = __Pyx_PyObject_GetAttrStrNoError(yf, __pyx_n_s_close); - if (unlikely(!meth)) { - if (unlikely(PyErr_Occurred())) { - PyErr_WriteUnraisable(yf); - } - } else { - retval = __Pyx_PyObject_CallNoArg(meth); - Py_DECREF(meth); - if (unlikely(!retval)) - err = -1; - } - gen->is_running = 0; - } - Py_XDECREF(retval); - return err; -} -static PyObject *__Pyx_Generator_Next(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject*) self; - PyObject *yf = gen->yieldfrom; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - PyObject *ret; - gen->is_running = 1; - #ifdef __Pyx_Generator_USED - if (__Pyx_Generator_CheckExact(yf)) { - ret = __Pyx_Generator_Next(yf); - } else - #endif - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) - if (PyGen_CheckExact(yf)) { - ret = __Pyx_PyGen_Send((PyGenObject*)yf, NULL); - } else - #endif - #ifdef __Pyx_Coroutine_USED - if (__Pyx_Coroutine_Check(yf)) { - ret = __Pyx_Coroutine_Send(yf, Py_None); - } else - #endif - ret = __Pyx_PyObject_GetIterNextFunc(yf)(yf); - gen->is_running = 0; - if (likely(ret)) { - return ret; - } - return __Pyx_Coroutine_FinishDelegation(gen); - } - return __Pyx_Coroutine_SendEx(gen, Py_None, 0); -} -static PyObject *__Pyx_Coroutine_Close_Method(PyObject *self, PyObject *arg) { - CYTHON_UNUSED_VAR(arg); - return __Pyx_Coroutine_Close(self); -} -static PyObject *__Pyx_Coroutine_Close(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - PyObject *retval, *raised_exception; - PyObject *yf = gen->yieldfrom; - int err = 0; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - Py_INCREF(yf); - err = __Pyx_Coroutine_CloseIter(gen, yf); - __Pyx_Coroutine_Undelegate(gen); - Py_DECREF(yf); - } - if (err == 0) - PyErr_SetNone(PyExc_GeneratorExit); - retval = __Pyx_Coroutine_SendEx(gen, NULL, 1); - if (unlikely(retval)) { - const char *msg; - Py_DECREF(retval); - if ((0)) { - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_Coroutine_Check(self)) { - msg = "coroutine ignored GeneratorExit"; - #endif - #ifdef __Pyx_AsyncGen_USED - } else if (__Pyx_AsyncGen_CheckExact(self)) { -#if PY_VERSION_HEX < 0x03060000 - msg = "async generator ignored GeneratorExit - might require Python 3.6+ finalisation (PEP 525)"; -#else - msg = "async generator ignored GeneratorExit"; -#endif - #endif - } else { - msg = "generator ignored GeneratorExit"; - } - PyErr_SetString(PyExc_RuntimeError, msg); - return NULL; - } - raised_exception = PyErr_Occurred(); - if (likely(!raised_exception || __Pyx_PyErr_GivenExceptionMatches2(raised_exception, PyExc_GeneratorExit, PyExc_StopIteration))) { - if (raised_exception) PyErr_Clear(); - Py_INCREF(Py_None); - return Py_None; - } - return NULL; -} -static PyObject *__Pyx__Coroutine_Throw(PyObject *self, PyObject *typ, PyObject *val, PyObject *tb, - PyObject *args, int close_on_genexit) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - PyObject *yf = gen->yieldfrom; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - PyObject *ret; - Py_INCREF(yf); - if (__Pyx_PyErr_GivenExceptionMatches(typ, PyExc_GeneratorExit) && close_on_genexit) { - int err = __Pyx_Coroutine_CloseIter(gen, yf); - Py_DECREF(yf); - __Pyx_Coroutine_Undelegate(gen); - if (err < 0) - return __Pyx_Coroutine_MethodReturn(self, __Pyx_Coroutine_SendEx(gen, NULL, 0)); - goto throw_here; - } - gen->is_running = 1; - if (0 - #ifdef __Pyx_Generator_USED - || __Pyx_Generator_CheckExact(yf) - #endif - #ifdef __Pyx_Coroutine_USED - || __Pyx_Coroutine_Check(yf) - #endif - ) { - ret = __Pyx__Coroutine_Throw(yf, typ, val, tb, args, close_on_genexit); - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_CoroutineAwait_CheckExact(yf)) { - ret = __Pyx__Coroutine_Throw(((__pyx_CoroutineAwaitObject*)yf)->coroutine, typ, val, tb, args, close_on_genexit); - #endif - } else { - PyObject *meth = __Pyx_PyObject_GetAttrStrNoError(yf, __pyx_n_s_throw); - if (unlikely(!meth)) { - Py_DECREF(yf); - if (unlikely(PyErr_Occurred())) { - gen->is_running = 0; - return NULL; - } - __Pyx_Coroutine_Undelegate(gen); - gen->is_running = 0; - goto throw_here; - } - if (likely(args)) { - ret = __Pyx_PyObject_Call(meth, args, NULL); - } else { - PyObject *cargs[4] = {NULL, typ, val, tb}; - ret = __Pyx_PyObject_FastCall(meth, cargs+1, 3 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); - } - Py_DECREF(meth); - } - gen->is_running = 0; - Py_DECREF(yf); - if (!ret) { - ret = __Pyx_Coroutine_FinishDelegation(gen); - } - return __Pyx_Coroutine_MethodReturn(self, ret); - } -throw_here: - __Pyx_Raise(typ, val, tb, NULL); - return __Pyx_Coroutine_MethodReturn(self, __Pyx_Coroutine_SendEx(gen, NULL, 0)); -} -static PyObject *__Pyx_Coroutine_Throw(PyObject *self, PyObject *args) { - PyObject *typ; - PyObject *val = NULL; - PyObject *tb = NULL; - if (unlikely(!PyArg_UnpackTuple(args, (char *)"throw", 1, 3, &typ, &val, &tb))) - return NULL; - return __Pyx__Coroutine_Throw(self, typ, val, tb, args, 1); -} -static CYTHON_INLINE int __Pyx_Coroutine_traverse_excstate(__Pyx_ExcInfoStruct *exc_state, visitproc visit, void *arg) { -#if PY_VERSION_HEX >= 0x030B00a4 - Py_VISIT(exc_state->exc_value); -#else - Py_VISIT(exc_state->exc_type); - Py_VISIT(exc_state->exc_value); - Py_VISIT(exc_state->exc_traceback); -#endif - return 0; -} -static int __Pyx_Coroutine_traverse(__pyx_CoroutineObject *gen, visitproc visit, void *arg) { - Py_VISIT(gen->closure); - Py_VISIT(gen->classobj); - Py_VISIT(gen->yieldfrom); - return __Pyx_Coroutine_traverse_excstate(&gen->gi_exc_state, visit, arg); -} -static int __Pyx_Coroutine_clear(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - Py_CLEAR(gen->closure); - Py_CLEAR(gen->classobj); - Py_CLEAR(gen->yieldfrom); - __Pyx_Coroutine_ExceptionClear(&gen->gi_exc_state); -#ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(self)) { - Py_CLEAR(((__pyx_PyAsyncGenObject*)gen)->ag_finalizer); - } -#endif - Py_CLEAR(gen->gi_code); - Py_CLEAR(gen->gi_frame); - Py_CLEAR(gen->gi_name); - Py_CLEAR(gen->gi_qualname); - Py_CLEAR(gen->gi_modulename); - return 0; -} -static void __Pyx_Coroutine_dealloc(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - PyObject_GC_UnTrack(gen); - if (gen->gi_weakreflist != NULL) - PyObject_ClearWeakRefs(self); - if (gen->resume_label >= 0) { - PyObject_GC_Track(self); -#if PY_VERSION_HEX >= 0x030400a1 && CYTHON_USE_TP_FINALIZE - if (unlikely(PyObject_CallFinalizerFromDealloc(self))) -#else - Py_TYPE(gen)->tp_del(self); - if (unlikely(Py_REFCNT(self) > 0)) -#endif - { - return; - } - PyObject_GC_UnTrack(self); - } -#ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(self)) { - /* We have to handle this case for asynchronous generators - right here, because this code has to be between UNTRACK - and GC_Del. */ - Py_CLEAR(((__pyx_PyAsyncGenObject*)self)->ag_finalizer); - } -#endif - __Pyx_Coroutine_clear(self); - __Pyx_PyHeapTypeObject_GC_Del(gen); -} -static void __Pyx_Coroutine_del(PyObject *self) { - PyObject *error_type, *error_value, *error_traceback; - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - __Pyx_PyThreadState_declare - if (gen->resume_label < 0) { - return; - } -#if !CYTHON_USE_TP_FINALIZE - assert(self->ob_refcnt == 0); - __Pyx_SET_REFCNT(self, 1); -#endif - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&error_type, &error_value, &error_traceback); -#ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(self)) { - __pyx_PyAsyncGenObject *agen = (__pyx_PyAsyncGenObject*)self; - PyObject *finalizer = agen->ag_finalizer; - if (finalizer && !agen->ag_closed) { - PyObject *res = __Pyx_PyObject_CallOneArg(finalizer, self); - if (unlikely(!res)) { - PyErr_WriteUnraisable(self); - } else { - Py_DECREF(res); - } - __Pyx_ErrRestore(error_type, error_value, error_traceback); - return; - } - } -#endif - if (unlikely(gen->resume_label == 0 && !error_value)) { -#ifdef __Pyx_Coroutine_USED -#ifdef __Pyx_Generator_USED - if (!__Pyx_Generator_CheckExact(self)) -#endif - { - PyObject_GC_UnTrack(self); -#if PY_MAJOR_VERSION >= 3 || defined(PyErr_WarnFormat) - if (unlikely(PyErr_WarnFormat(PyExc_RuntimeWarning, 1, "coroutine '%.50S' was never awaited", gen->gi_qualname) < 0)) - PyErr_WriteUnraisable(self); -#else - {PyObject *msg; - char *cmsg; - #if CYTHON_COMPILING_IN_PYPY - msg = NULL; - cmsg = (char*) "coroutine was never awaited"; - #else - char *cname; - PyObject *qualname; - qualname = gen->gi_qualname; - cname = PyString_AS_STRING(qualname); - msg = PyString_FromFormat("coroutine '%.50s' was never awaited", cname); - if (unlikely(!msg)) { - PyErr_Clear(); - cmsg = (char*) "coroutine was never awaited"; - } else { - cmsg = PyString_AS_STRING(msg); - } - #endif - if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, cmsg, 1) < 0)) - PyErr_WriteUnraisable(self); - Py_XDECREF(msg);} -#endif - PyObject_GC_Track(self); - } -#endif - } else { - PyObject *res = __Pyx_Coroutine_Close(self); - if (unlikely(!res)) { - if (PyErr_Occurred()) - PyErr_WriteUnraisable(self); - } else { - Py_DECREF(res); - } - } - __Pyx_ErrRestore(error_type, error_value, error_traceback); -#if !CYTHON_USE_TP_FINALIZE - assert(Py_REFCNT(self) > 0); - if (likely(--self->ob_refcnt == 0)) { - return; - } - { - Py_ssize_t refcnt = Py_REFCNT(self); - _Py_NewReference(self); - __Pyx_SET_REFCNT(self, refcnt); - } -#if CYTHON_COMPILING_IN_CPYTHON - assert(PyType_IS_GC(Py_TYPE(self)) && - _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED); - _Py_DEC_REFTOTAL; -#endif -#ifdef COUNT_ALLOCS - --Py_TYPE(self)->tp_frees; - --Py_TYPE(self)->tp_allocs; -#endif -#endif -} -static PyObject * -__Pyx_Coroutine_get_name(__pyx_CoroutineObject *self, void *context) -{ - PyObject *name = self->gi_name; - CYTHON_UNUSED_VAR(context); - if (unlikely(!name)) name = Py_None; - Py_INCREF(name); - return name; -} -static int -__Pyx_Coroutine_set_name(__pyx_CoroutineObject *self, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__name__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(self->gi_name, value); - return 0; -} -static PyObject * -__Pyx_Coroutine_get_qualname(__pyx_CoroutineObject *self, void *context) -{ - PyObject *name = self->gi_qualname; - CYTHON_UNUSED_VAR(context); - if (unlikely(!name)) name = Py_None; - Py_INCREF(name); - return name; -} -static int -__Pyx_Coroutine_set_qualname(__pyx_CoroutineObject *self, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__qualname__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(self->gi_qualname, value); - return 0; -} -static PyObject * -__Pyx_Coroutine_get_frame(__pyx_CoroutineObject *self, void *context) -{ - PyObject *frame = self->gi_frame; - CYTHON_UNUSED_VAR(context); - if (!frame) { - if (unlikely(!self->gi_code)) { - Py_RETURN_NONE; - } - frame = (PyObject *) PyFrame_New( - PyThreadState_Get(), /*PyThreadState *tstate,*/ - (PyCodeObject*) self->gi_code, /*PyCodeObject *code,*/ - __pyx_d, /*PyObject *globals,*/ - 0 /*PyObject *locals*/ - ); - if (unlikely(!frame)) - return NULL; - self->gi_frame = frame; - } - Py_INCREF(frame); - return frame; -} -static __pyx_CoroutineObject *__Pyx__Coroutine_New( - PyTypeObject* type, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name) { - __pyx_CoroutineObject *gen = PyObject_GC_New(__pyx_CoroutineObject, type); - if (unlikely(!gen)) - return NULL; - return __Pyx__Coroutine_NewInit(gen, body, code, closure, name, qualname, module_name); -} -static __pyx_CoroutineObject *__Pyx__Coroutine_NewInit( - __pyx_CoroutineObject *gen, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name) { - gen->body = body; - gen->closure = closure; - Py_XINCREF(closure); - gen->is_running = 0; - gen->resume_label = 0; - gen->classobj = NULL; - gen->yieldfrom = NULL; - #if PY_VERSION_HEX >= 0x030B00a4 - gen->gi_exc_state.exc_value = NULL; - #else - gen->gi_exc_state.exc_type = NULL; - gen->gi_exc_state.exc_value = NULL; - gen->gi_exc_state.exc_traceback = NULL; - #endif -#if CYTHON_USE_EXC_INFO_STACK - gen->gi_exc_state.previous_item = NULL; -#endif - gen->gi_weakreflist = NULL; - Py_XINCREF(qualname); - gen->gi_qualname = qualname; - Py_XINCREF(name); - gen->gi_name = name; - Py_XINCREF(module_name); - gen->gi_modulename = module_name; - Py_XINCREF(code); - gen->gi_code = code; - gen->gi_frame = NULL; - PyObject_GC_Track(gen); - return gen; -} - -/* PatchModuleWithCoroutine */ -static PyObject* __Pyx_Coroutine_patch_module(PyObject* module, const char* py_code) { -#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - int result; - PyObject *globals, *result_obj; - globals = PyDict_New(); if (unlikely(!globals)) goto ignore; - result = PyDict_SetItemString(globals, "_cython_coroutine_type", - #ifdef __Pyx_Coroutine_USED - (PyObject*)__pyx_CoroutineType); - #else - Py_None); - #endif - if (unlikely(result < 0)) goto ignore; - result = PyDict_SetItemString(globals, "_cython_generator_type", - #ifdef __Pyx_Generator_USED - (PyObject*)__pyx_GeneratorType); - #else - Py_None); - #endif - if (unlikely(result < 0)) goto ignore; - if (unlikely(PyDict_SetItemString(globals, "_module", module) < 0)) goto ignore; - if (unlikely(PyDict_SetItemString(globals, "__builtins__", __pyx_b) < 0)) goto ignore; - result_obj = PyRun_String(py_code, Py_file_input, globals, globals); - if (unlikely(!result_obj)) goto ignore; - Py_DECREF(result_obj); - Py_DECREF(globals); - return module; -ignore: - Py_XDECREF(globals); - PyErr_WriteUnraisable(module); - if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, "Cython module failed to patch module with custom type", 1) < 0)) { - Py_DECREF(module); - module = NULL; - } -#else - py_code++; -#endif - return module; -} - -/* PatchGeneratorABC */ -#ifndef CYTHON_REGISTER_ABCS -#define CYTHON_REGISTER_ABCS 1 -#endif -#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) -static PyObject* __Pyx_patch_abc_module(PyObject *module); -static PyObject* __Pyx_patch_abc_module(PyObject *module) { - module = __Pyx_Coroutine_patch_module( - module, "" -"if _cython_generator_type is not None:\n" -" try: Generator = _module.Generator\n" -" except AttributeError: pass\n" -" else: Generator.register(_cython_generator_type)\n" -"if _cython_coroutine_type is not None:\n" -" try: Coroutine = _module.Coroutine\n" -" except AttributeError: pass\n" -" else: Coroutine.register(_cython_coroutine_type)\n" - ); - return module; -} -#endif -static int __Pyx_patch_abc(void) { -#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - static int abc_patched = 0; - if (CYTHON_REGISTER_ABCS && !abc_patched) { - PyObject *module; - module = PyImport_ImportModule((PY_MAJOR_VERSION >= 3) ? "collections.abc" : "collections"); - if (unlikely(!module)) { - PyErr_WriteUnraisable(NULL); - if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, - ((PY_MAJOR_VERSION >= 3) ? - "Cython module failed to register with collections.abc module" : - "Cython module failed to register with collections module"), 1) < 0)) { - return -1; - } - } else { - module = __Pyx_patch_abc_module(module); - abc_patched = 1; - if (unlikely(!module)) - return -1; - Py_DECREF(module); - } - module = PyImport_ImportModule("backports_abc"); - if (module) { - module = __Pyx_patch_abc_module(module); - Py_XDECREF(module); - } - if (!module) { - PyErr_Clear(); - } - } -#else - if ((0)) __Pyx_Coroutine_patch_module(NULL, NULL); -#endif - return 0; -} - -/* Generator */ -static PyMethodDef __pyx_Generator_methods[] = { - {"send", (PyCFunction) __Pyx_Coroutine_Send, METH_O, - (char*) PyDoc_STR("send(arg) -> send 'arg' into generator,\nreturn next yielded value or raise StopIteration.")}, - {"throw", (PyCFunction) __Pyx_Coroutine_Throw, METH_VARARGS, - (char*) PyDoc_STR("throw(typ[,val[,tb]]) -> raise exception in generator,\nreturn next yielded value or raise StopIteration.")}, - {"close", (PyCFunction) __Pyx_Coroutine_Close_Method, METH_NOARGS, - (char*) PyDoc_STR("close() -> raise GeneratorExit inside generator.")}, - {0, 0, 0, 0} -}; -static PyMemberDef __pyx_Generator_memberlist[] = { - {(char *) "gi_running", T_BOOL, offsetof(__pyx_CoroutineObject, is_running), READONLY, NULL}, - {(char*) "gi_yieldfrom", T_OBJECT, offsetof(__pyx_CoroutineObject, yieldfrom), READONLY, - (char*) PyDoc_STR("object being iterated by 'yield from', or None")}, - {(char*) "gi_code", T_OBJECT, offsetof(__pyx_CoroutineObject, gi_code), READONLY, NULL}, - {(char *) "__module__", T_OBJECT, offsetof(__pyx_CoroutineObject, gi_modulename), 0, 0}, -#if CYTHON_USE_TYPE_SPECS - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CoroutineObject, gi_weakreflist), READONLY, 0}, -#endif - {0, 0, 0, 0, 0} -}; -static PyGetSetDef __pyx_Generator_getsets[] = { - {(char *) "__name__", (getter)__Pyx_Coroutine_get_name, (setter)__Pyx_Coroutine_set_name, - (char*) PyDoc_STR("name of the generator"), 0}, - {(char *) "__qualname__", (getter)__Pyx_Coroutine_get_qualname, (setter)__Pyx_Coroutine_set_qualname, - (char*) PyDoc_STR("qualified name of the generator"), 0}, - {(char *) "gi_frame", (getter)__Pyx_Coroutine_get_frame, NULL, - (char*) PyDoc_STR("Frame of the generator"), 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_GeneratorType_slots[] = { - {Py_tp_dealloc, (void *)__Pyx_Coroutine_dealloc}, - {Py_tp_traverse, (void *)__Pyx_Coroutine_traverse}, - {Py_tp_iter, (void *)PyObject_SelfIter}, - {Py_tp_iternext, (void *)__Pyx_Generator_Next}, - {Py_tp_methods, (void *)__pyx_Generator_methods}, - {Py_tp_members, (void *)__pyx_Generator_memberlist}, - {Py_tp_getset, (void *)__pyx_Generator_getsets}, - {Py_tp_getattro, (void *) __Pyx_PyObject_GenericGetAttrNoDict}, -#if CYTHON_USE_TP_FINALIZE - {Py_tp_finalize, (void *)__Pyx_Coroutine_del}, -#endif - {0, 0}, -}; -static PyType_Spec __pyx_GeneratorType_spec = { - __PYX_TYPE_MODULE_PREFIX "generator", - sizeof(__pyx_CoroutineObject), - 0, - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, - __pyx_GeneratorType_slots -}; -#else -static PyTypeObject __pyx_GeneratorType_type = { - PyVarObject_HEAD_INIT(0, 0) - __PYX_TYPE_MODULE_PREFIX "generator", - sizeof(__pyx_CoroutineObject), - 0, - (destructor) __Pyx_Coroutine_dealloc, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, - 0, - (traverseproc) __Pyx_Coroutine_traverse, - 0, - 0, - offsetof(__pyx_CoroutineObject, gi_weakreflist), - 0, - (iternextfunc) __Pyx_Generator_Next, - __pyx_Generator_methods, - __pyx_Generator_memberlist, - __pyx_Generator_getsets, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -#if CYTHON_USE_TP_FINALIZE - 0, -#else - __Pyx_Coroutine_del, -#endif - 0, -#if CYTHON_USE_TP_FINALIZE - __Pyx_Coroutine_del, -#elif PY_VERSION_HEX >= 0x030400a1 - 0, -#endif -#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, -#endif -#if __PYX_NEED_TP_PRINT_SLOT - 0, -#endif -#if PY_VERSION_HEX >= 0x030C0000 - 0, -#endif -#if PY_VERSION_HEX >= 0x030d00A4 - 0, -#endif -#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, -#endif -}; -#endif -static int __pyx_Generator_init(PyObject *module) { -#if CYTHON_USE_TYPE_SPECS - __pyx_GeneratorType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_GeneratorType_spec, NULL); -#else - CYTHON_UNUSED_VAR(module); - __pyx_GeneratorType_type.tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - __pyx_GeneratorType_type.tp_iter = PyObject_SelfIter; - __pyx_GeneratorType = __Pyx_FetchCommonType(&__pyx_GeneratorType_type); -#endif - if (unlikely(!__pyx_GeneratorType)) { - return -1; - } - return 0; -} - -/* CheckBinaryVersion */ -static unsigned long __Pyx_get_runtime_version(void) { -#if __PYX_LIMITED_VERSION_HEX >= 0x030B00A4 - return Py_Version & ~0xFFUL; -#else - const char* rt_version = Py_GetVersion(); - unsigned long version = 0; - unsigned long factor = 0x01000000UL; - unsigned int digit = 0; - int i = 0; - while (factor) { - while ('0' <= rt_version[i] && rt_version[i] <= '9') { - digit = digit * 10 + (unsigned int) (rt_version[i] - '0'); - ++i; - } - version += factor * digit; - if (rt_version[i] != '.') - break; - digit = 0; - factor >>= 8; - ++i; - } - return version; -#endif -} -static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer) { - const unsigned long MAJOR_MINOR = 0xFFFF0000UL; - if ((rt_version & MAJOR_MINOR) == (ct_version & MAJOR_MINOR)) - return 0; - if (likely(allow_newer && (rt_version & MAJOR_MINOR) > (ct_version & MAJOR_MINOR))) - return 1; - { - char message[200]; - PyOS_snprintf(message, sizeof(message), - "compile time Python version %d.%d " - "of module '%.100s' " - "%s " - "runtime version %d.%d", - (int) (ct_version >> 24), (int) ((ct_version >> 16) & 0xFF), - __Pyx_MODULE_NAME, - (allow_newer) ? "was newer than" : "does not match", - (int) (rt_version >> 24), (int) ((rt_version >> 16) & 0xFF) - ); - return PyErr_WarnEx(NULL, message, 1); - } -} - -/* InitStrings */ -#if PY_MAJOR_VERSION >= 3 -static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) { - if (t.is_unicode | t.is_str) { - if (t.intern) { - *str = PyUnicode_InternFromString(t.s); - } else if (t.encoding) { - *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL); - } else { - *str = PyUnicode_FromStringAndSize(t.s, t.n - 1); - } - } else { - *str = PyBytes_FromStringAndSize(t.s, t.n - 1); - } - if (!*str) - return -1; - if (PyObject_Hash(*str) == -1) - return -1; - return 0; -} -#endif -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { - while (t->p) { - #if PY_MAJOR_VERSION >= 3 - __Pyx_InitString(*t, t->p); - #else - if (t->is_unicode) { - *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); - } else if (t->intern) { - *t->p = PyString_InternFromString(t->s); - } else { - *t->p = PyString_FromStringAndSize(t->s, t->n - 1); - } - if (!*t->p) - return -1; - if (PyObject_Hash(*t->p) == -1) - return -1; - #endif - ++t; - } - return 0; -} - -#include -static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) { - size_t len = strlen(s); - if (unlikely(len > (size_t) PY_SSIZE_T_MAX)) { - PyErr_SetString(PyExc_OverflowError, "byte string is too long"); - return -1; - } - return (Py_ssize_t) len; -} -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { - Py_ssize_t len = __Pyx_ssize_strlen(c_str); - if (unlikely(len < 0)) return NULL; - return __Pyx_PyUnicode_FromStringAndSize(c_str, len); -} -static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char* c_str) { - Py_ssize_t len = __Pyx_ssize_strlen(c_str); - if (unlikely(len < 0)) return NULL; - return PyByteArray_FromStringAndSize(c_str, len); -} -static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { - Py_ssize_t ignore; - return __Pyx_PyObject_AsStringAndSize(o, &ignore); -} -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT -#if !CYTHON_PEP393_ENABLED -static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { - char* defenc_c; - PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); - if (!defenc) return NULL; - defenc_c = PyBytes_AS_STRING(defenc); -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - { - char* end = defenc_c + PyBytes_GET_SIZE(defenc); - char* c; - for (c = defenc_c; c < end; c++) { - if ((unsigned char) (*c) >= 128) { - PyUnicode_AsASCIIString(o); - return NULL; - } - } - } -#endif - *length = PyBytes_GET_SIZE(defenc); - return defenc_c; -} -#else -static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { - if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - if (likely(PyUnicode_IS_ASCII(o))) { - *length = PyUnicode_GET_LENGTH(o); - return PyUnicode_AsUTF8(o); - } else { - PyUnicode_AsASCIIString(o); - return NULL; - } -#else - return PyUnicode_AsUTF8AndSize(o, length); -#endif -} -#endif -#endif -static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT - if ( -#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - __Pyx_sys_getdefaultencoding_not_ascii && -#endif - PyUnicode_Check(o)) { - return __Pyx_PyUnicode_AsStringAndSize(o, length); - } else -#endif -#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) - if (PyByteArray_Check(o)) { - *length = PyByteArray_GET_SIZE(o); - return PyByteArray_AS_STRING(o); - } else -#endif - { - char* result; - int r = PyBytes_AsStringAndSize(o, &result, length); - if (unlikely(r < 0)) { - return NULL; - } else { - return result; - } - } -} -static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { - int is_true = x == Py_True; - if (is_true | (x == Py_False) | (x == Py_None)) return is_true; - else return PyObject_IsTrue(x); -} -static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) { - int retval; - if (unlikely(!x)) return -1; - retval = __Pyx_PyObject_IsTrue(x); - Py_DECREF(x); - return retval; -} -static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { - __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result)); -#if PY_MAJOR_VERSION >= 3 - if (PyLong_Check(result)) { - if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "__int__ returned non-int (type " __Pyx_FMT_TYPENAME "). " - "The ability to return an instance of a strict subclass of int is deprecated, " - "and may be removed in a future version of Python.", - result_type_name)) { - __Pyx_DECREF_TypeName(result_type_name); - Py_DECREF(result); - return NULL; - } - __Pyx_DECREF_TypeName(result_type_name); - return result; - } -#endif - PyErr_Format(PyExc_TypeError, - "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")", - type_name, type_name, result_type_name); - __Pyx_DECREF_TypeName(result_type_name); - Py_DECREF(result); - return NULL; -} -static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { -#if CYTHON_USE_TYPE_SLOTS - PyNumberMethods *m; -#endif - const char *name = NULL; - PyObject *res = NULL; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x) || PyLong_Check(x))) -#else - if (likely(PyLong_Check(x))) -#endif - return __Pyx_NewRef(x); -#if CYTHON_USE_TYPE_SLOTS - m = Py_TYPE(x)->tp_as_number; - #if PY_MAJOR_VERSION < 3 - if (m && m->nb_int) { - name = "int"; - res = m->nb_int(x); - } - else if (m && m->nb_long) { - name = "long"; - res = m->nb_long(x); - } - #else - if (likely(m && m->nb_int)) { - name = "int"; - res = m->nb_int(x); - } - #endif -#else - if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { - res = PyNumber_Int(x); - } -#endif - if (likely(res)) { -#if PY_MAJOR_VERSION < 3 - if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { -#else - if (unlikely(!PyLong_CheckExact(res))) { -#endif - return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); - } - } - else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, - "an integer is required"); - } - return res; -} -static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { - Py_ssize_t ival; - PyObject *x; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_CheckExact(b))) { - if (sizeof(Py_ssize_t) >= sizeof(long)) - return PyInt_AS_LONG(b); - else - return PyInt_AsSsize_t(b); - } -#endif - if (likely(PyLong_CheckExact(b))) { - #if CYTHON_USE_PYLONG_INTERNALS - if (likely(__Pyx_PyLong_IsCompact(b))) { - return __Pyx_PyLong_CompactValue(b); - } else { - const digit* digits = __Pyx_PyLong_Digits(b); - const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b); - switch (size) { - case 2: - if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { - return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -2: - if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case 3: - if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { - return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -3: - if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case 4: - if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { - return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -4: - if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - } - } - #endif - return PyLong_AsSsize_t(b); - } - x = PyNumber_Index(b); - if (!x) return -1; - ival = PyInt_AsSsize_t(x); - Py_DECREF(x); - return ival; -} -static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) { - if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) { - return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o); -#if PY_MAJOR_VERSION < 3 - } else if (likely(PyInt_CheckExact(o))) { - return PyInt_AS_LONG(o); -#endif - } else { - Py_ssize_t ival; - PyObject *x; - x = PyNumber_Index(o); - if (!x) return -1; - ival = PyInt_AsLong(x); - Py_DECREF(x); - return ival; - } -} -static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { - return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False); -} -static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { - return PyInt_FromSize_t(ival); -} - - -/* #### Code section: utility_code_pragmas_end ### */ -#ifdef _MSC_VER -#pragma warning( pop ) -#endif - - - -/* #### Code section: end ### */ -#endif /* Py_PYTHON_H */ From b203b88d45a0745fb2f744dd3402fd50ce7b0fc9 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 3 Jan 2025 14:07:04 +1100 Subject: [PATCH 18/43] ignore dev files --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 94cfaadd..0fc8e9a4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # Mac stuff .DS_Store +# Development +dev/ + # Versioning src/jcvi/version.py From e5be70067c6559c45d4b7d550809fb689dacf10d Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 3 Jan 2025 14:08:13 +1100 Subject: [PATCH 19/43] Use setuptools for cython modules --- pyproject.toml | 47 ++++++++++------------------------------------- setup.py | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 37 deletions(-) create mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml index c56ca412..92a0f42b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,11 +3,10 @@ requires = [ "hatchling", # Build backend "hatch-vcs", # Version control system plugin for dynamic versioning - "hatch-cython", # Cython plugin for compiling C extensions - "cython", # Cython for compiling C extensions + "setuptools", # Setuptools for compiling C extensions + "cython", # Cython for compiling C extensions "numpy", # NumPy for numerical operations and C extension includes ] - build-backend = "hatchling.build" # Project metadata and configuration @@ -62,10 +61,8 @@ dependencies = [ "webcolors" ] -# Indicates that the version is dynamically determined dynamic = ["version"] -# Optional dependencies for testing [project.optional-dependencies] tests = [ "PyYAML", @@ -75,56 +72,32 @@ tests = [ "mock" ] -# Project URLs [project.urls] homepage = "http://github.com/tanghaibao/jcvi" -# Hatch metadata configuration [tool.hatch.metadata] allow-direct-references = true -# Hatch build configuration [tool.hatch.build] -source = "src" # Source directory -include = [ - "src/jcvi/**", # Include all files in the jcvi directory - "README.md", -] -exclude = [ - ".github/*", # Exclude GitHub workflows and configurations - "docker/*", # Exclude Docker-related files - "tests/*", # Exclude test files - ".*" # Exclude hidden files -] +packages = ["jcvi"] +source = "src" -# Hatch version configuration using VCS [tool.hatch.version] -source = "vcs" # Use version control system for versioning +source = "vcs" -# Version file location for VCS [tool.hatch.build.hooks.vcs] -version-file = "src/jcvi/version.py" # Path to write the version information +version-file = "src/jcvi/version.py" -# Version control system (VCS) versioning [tool.hatch.version.vcs] -tag-pattern = "v*" # Git tags starting with 'v' will be used for versioning +tag-pattern = "v*" fallback-version = "0.0.0" -# Cython build configuration -[tool.hatch.build.hooks.cython] -extensions = [ - {name = "jcvi_assembly_chic", sources = ["src/jcvi/assembly/chic.pyx"], include-dirs = ["{numpy_include}"], extra-compile-args = ["-O3"]}, - {name = "jcvi_formats_cblast", sources = ["src/jcvi/formats/cblast.pyx"], extra-compile-args = ["-O3"]} -] - [tool.hatch.build.targets.sdist] include = [ - "src/**", + "src/**/*.py", + "src/**/*.pyx", "README.md", ] [tool.hatch.build.targets.wheel] -include = [ - "src/**", - "README.md", -] \ No newline at end of file +packages = ["jcvi"] \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..ceb375e8 --- /dev/null +++ b/setup.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +"""Package setup for Cython extensions only""" + +from Cython.Build import build_ext +from setuptools import setup, Extension +import numpy as np + +ext_modules = [ + Extension( + "jcvi.assembly.chic", + ["src/jcvi/assembly/chic.pyx"], + include_dirs=[np.get_include()], + extra_compile_args=["-O3"], + ), + Extension( + "jcvi.formats.cblast", + ["src/jcvi/formats/cblast.pyx"], + extra_compile_args=["-O3"] + ), +] + +setup( + ext_modules=ext_modules, + cmdclass={"build_ext": build_ext}, +) From a6407761b472e016acdf08256427b1f69ce313e1 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 3 Jan 2025 14:31:58 +1100 Subject: [PATCH 20/43] automate run setup.py for cython build --- build.py | 8 ++++++++ pyproject.toml | 9 ++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 build.py diff --git a/build.py b/build.py new file mode 100644 index 00000000..1bcaac52 --- /dev/null +++ b/build.py @@ -0,0 +1,8 @@ +import subprocess +from hatchling.builders.hooks.plugin.interface import BuildHookInterface + +class CustomBuildHook(BuildHookInterface): + def initialize(self, version, build_data): + # Run setup.py build_ext before main build + subprocess.check_call(["python", "setup.py", "build_ext", "--inplace"]) + return super().initialize(version, build_data) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 92a0f42b..8a5d2952 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,9 @@ requires = [ ] build-backend = "hatchling.build" +[tool.hatch.build.hooks.custom] +path = "build.py" + # Project metadata and configuration [project] name = "jcvi" @@ -79,8 +82,8 @@ homepage = "http://github.com/tanghaibao/jcvi" allow-direct-references = true [tool.hatch.build] -packages = ["jcvi"] -source = "src" +packages = ["src/jcvi"] +#source = "src" [tool.hatch.version] source = "vcs" @@ -100,4 +103,4 @@ include = [ ] [tool.hatch.build.targets.wheel] -packages = ["jcvi"] \ No newline at end of file +packages = ["src/jcvi"] \ No newline at end of file From e6a722bbd2614af751f7cfc58ebe774448a18692 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 3 Jan 2025 15:49:05 +1100 Subject: [PATCH 21/43] setup_magick_home() assumes ImageMagick always installed with homebrew on MacOS, change error to warning if homebrew path not found. --- src/jcvi/apps/base.py | 2 +- src/jcvi/graphics/grabseeds.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/jcvi/apps/base.py b/src/jcvi/apps/base.py index cdee0853..ba9c761f 100644 --- a/src/jcvi/apps/base.py +++ b/src/jcvi/apps/base.py @@ -1232,7 +1232,7 @@ def setup_magick_home(): if op.isdir(magick_home): os.environ["MAGICK_HOME"] = magick_home else: - logger.error("MAGICK_HOME not set") + logger.warning("MAGICK_HOME not set") def popen(cmd, debug=True, shell="/bin/bash"): diff --git a/src/jcvi/graphics/grabseeds.py b/src/jcvi/graphics/grabseeds.py index 14bcbe09..756a2d78 100644 --- a/src/jcvi/graphics/grabseeds.py +++ b/src/jcvi/graphics/grabseeds.py @@ -18,6 +18,7 @@ from ..apps.base import setup_magick_home +# Attempt to set MACICK_HOME ENV variable if imagemagick installed with homebrew on Mac setup_magick_home() from PIL.Image import open as iopen From c6d1140bfb13e22aab1a394ef748c76e7763847c Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 3 Jan 2025 15:51:17 +1100 Subject: [PATCH 22/43] install non-pip deps using conda. Install libmagic without homebrew. --- env_osx64.yml | 5 ++++- environment.yml | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/env_osx64.yml b/env_osx64.yml index 758acfc1..74bf21e8 100644 --- a/env_osx64.yml +++ b/env_osx64.yml @@ -4,7 +4,10 @@ channels: - bioconda/osx-64 dependencies: - python 3.12 - - ImageMagick + - bedtools + - imagemagick + - libmagic # System-level magic library + - wand # Python bindings for ImageMagick - pip - pip: - hatch diff --git a/environment.yml b/environment.yml index 30f35514..5ecec614 100644 --- a/environment.yml +++ b/environment.yml @@ -4,6 +4,10 @@ channels: - bioconda dependencies: - python >=3.12 + - bedtools + - imagemagick + - libmagic # System-level magic library + - wand # Python bindings for ImageMagick - pip - pip: - pytest From 66583b82ec07363d5c589d2cf4c465e83416d6c3 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 3 Jan 2025 15:51:55 +1100 Subject: [PATCH 23/43] fix bug - incorrect selection of wget on MocOS without wget --- src/jcvi/utils/ez_setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/jcvi/utils/ez_setup.py b/src/jcvi/utils/ez_setup.py index 65d53c8a..3dfd4d51 100644 --- a/src/jcvi/utils/ez_setup.py +++ b/src/jcvi/utils/ez_setup.py @@ -101,7 +101,9 @@ def has_wget(): try: try: subprocess.check_call(cmd, stdout=devnull, stderr=devnull) - except FileNotFoundError: + except (FileNotFoundError, NotADirectoryError): + return False + except subprocess.CalledProcessError: return False finally: devnull.close() From f11b1a04bd424223d666c61f1860dbd873d3a756 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 3 Jan 2025 22:16:39 +1100 Subject: [PATCH 24/43] version to _version --- .gitignore | 2 +- pyproject.toml | 2 +- src/jcvi/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 0fc8e9a4..f42334d1 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,7 @@ dev/ # Versioning -src/jcvi/version.py +src/jcvi/_version.py # Ignore Cython generated C files src/jcvi/assembly/chic.c diff --git a/pyproject.toml b/pyproject.toml index 8a5d2952..8747c2c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,7 @@ packages = ["src/jcvi"] source = "vcs" [tool.hatch.build.hooks.vcs] -version-file = "src/jcvi/version.py" +version-file = "src/jcvi/_version.py" [tool.hatch.version.vcs] tag-pattern = "v*" diff --git a/src/jcvi/__init__.py b/src/jcvi/__init__.py index ad657eb5..dbd33d8d 100644 --- a/src/jcvi/__init__.py +++ b/src/jcvi/__init__.py @@ -12,7 +12,7 @@ __status__ = "Development" try: - from .version import __version__ # noqa + from ._version import __version__ # noqa except ImportError as exc: # pragma: no cover raise ImportError( "Failed to find (autogenerated) version.py. " From 81ab0b1037bf86d7a13788eb38a3b3656a8ac485 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 3 Jan 2025 23:52:21 +1100 Subject: [PATCH 25/43] bump min Python version. --- pyproject.toml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8747c2c5..f79b6d4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ path = "build.py" name = "jcvi" description = "Python utility libraries on genome assembly, annotation and comparative genomics" readme = "README.md" -requires-python = ">=3.6" +requires-python = ">=3.8" license = {text = "BSD"} authors = [ {name = "Haibao Tang", email = "tanghaibao@gmail.com"}, @@ -34,17 +34,16 @@ classifiers = [ ] dependencies = [ - "CrossMap", - "Wand", "biopython", "boto3", "brewer2mpl", + "CrossMap", "deap", "ete3", "ftpretty", + "genomepy", "gffutils", "goatools", - "genomepy", "graphviz", "jinja2", "matplotlib", @@ -61,6 +60,7 @@ dependencies = [ "scikit-image", "scipy", "seaborn", + "Wand", "webcolors" ] @@ -68,11 +68,11 @@ dynamic = ["version"] [project.optional-dependencies] tests = [ - "PyYAML", - "pytest", - "pytest-cov", + "mock", "pytest-benchmark", - "mock" + "pytest-cov", + "pytest", + "PyYAML", ] [project.urls] From 34a5eb976de8336da9be2641fb16dc050913793a Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Fri, 3 Jan 2025 23:53:26 +1100 Subject: [PATCH 26/43] conda env instructions --- README.md | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 15f862c2..c2115ca5 100644 --- a/README.md +++ b/README.md @@ -117,24 +117,48 @@ best way is to install them via `pip install` when you see ## Installation -The easiest way is to install it via PyPI: +**Installing jcvi in a Conda environment:** -```console -pip install jcvi +You can create a Python 3.12 environment with basic dependencies for JCVI using the YAML files in this repo. + +```bash +conda env create -f environment.yml + +conda activate jcvi +``` + +Note: If you are using a Mac with an ARM64 (Apple Silicon) processor, some dependencies are not currently available from Bioconda for this architecture. + +You can instead create a virtual OSX64 env like this: + +```bash +conda env create -f env_osx64.yml + +conda activate jcvi-osx64 ``` -To install the development version: +After activating the Conda environment install JCVI using one of the following options. + +**Installation options:** + +pip install the latest development version directly from this repo. ```console pip install git+git://github.com/tanghaibao/jcvi.git ``` +Install latest release from PyPi. + +```console +pip install jcvi +``` + Alternatively, if you want to install manually: ```console cd ~/code # or any directory of your choice -git clone git://github.com/tanghaibao/jcvi.git -pip install -e . +git clone git://github.com/tanghaibao/jcvi.git && cd jcvi +pip install -e '.[tests]' ``` In addition, a few module might ask for locations of external programs, From 63310a77c187eaf04d5c59c94e3042e83144e59a Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sun, 19 Feb 2023 13:41:49 +1100 Subject: [PATCH 27/43] Do not try to set usetex=False or resave if savefig fails. Instead just return error msg. --- jcvi/graphics/base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/jcvi/graphics/base.py b/jcvi/graphics/base.py index c52a6d70..c4e451a5 100644 --- a/jcvi/graphics/base.py +++ b/jcvi/graphics/base.py @@ -352,7 +352,11 @@ def savefig(figname, dpi=150, iopts=None, cleanup=True, transparent=False): logging.debug(f"Attempting save as: {figname}") plt.savefig(figname, dpi=dpi, format=format, transparent=transparent) except Exception as e: - logger.error("savefig failed with message:\n%s", e) + message = "savefig failed with message:" + message += "\n{0}".format(str(e)) + logger.error(message) + logger.debug(f"Matplotlib backend is: {mpl.get_backend()}") + logger.debug(f"Attempted save as: {format}") logger.info("Try running again with --notex option to disable latex.") if op.exists(figname): if op.getsize(figname) < 1000: From 5535264da0fd08bb414107a98beca521659b893f Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sun, 19 Feb 2023 19:31:53 +1100 Subject: [PATCH 28/43] Replace check_call with check_out in sh() Allows collection of outputs. Added redirect_error arg so stderr can be redirected to stdout and user in error logging. --- jcvi/apps/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jcvi/apps/base.py b/jcvi/apps/base.py index cdee0853..cc6b13fe 100644 --- a/jcvi/apps/base.py +++ b/jcvi/apps/base.py @@ -29,6 +29,9 @@ from typing import Any, Collection, List, Optional, Tuple, Union from urllib.parse import urlencode +#from optparse import OptionParser as OptionP, OptionGroup, SUPPRESS_HELP + + from natsort import natsorted from rich.console import Console from rich.logging import RichHandler From 95fe1c0e081577bacae9ba0bf676af18171fcc19 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 12:30:06 +1100 Subject: [PATCH 29/43] Move files back to jcvi/ --- jcvi/__init__.py | 21 + jcvi/_version.py | 16 + jcvi/algorithms/__init__.py | 0 jcvi/algorithms/__main__.py | 11 + jcvi/algorithms/ec.py | 213 + jcvi/algorithms/formula.py | 255 + jcvi/algorithms/graph.py | 514 + jcvi/algorithms/lis.py | 214 + jcvi/algorithms/lpsolve.py | 802 + jcvi/algorithms/matrix.py | 209 + jcvi/algorithms/maxsum.py | 51 + jcvi/algorithms/supermap.py | 176 + jcvi/algorithms/tsp.py | 393 + jcvi/annotation/__init__.py | 0 jcvi/annotation/__main__.py | 12 + jcvi/annotation/ahrd.py | 708 + jcvi/annotation/automaton.py | 287 + jcvi/annotation/depth.py | 240 + jcvi/annotation/evm.py | 268 + jcvi/annotation/maker.py | 537 + jcvi/annotation/pasa.py | 595 + jcvi/annotation/qc.py | 376 + jcvi/annotation/reformat.py | 1398 ++ jcvi/annotation/stats.py | 386 + jcvi/annotation/train.py | 227 + jcvi/annotation/trinity.py | 173 + jcvi/apps/__init__.py | 0 jcvi/apps/__main__.py | 11 + jcvi/apps/align.py | 713 + jcvi/apps/base.py | 2277 +++ jcvi/apps/biomart.py | 426 + jcvi/apps/blastplus.py | 132 + jcvi/apps/bowtie.py | 213 + jcvi/apps/bwa.py | 301 + jcvi/apps/cdhit.py | 260 + jcvi/apps/emboss.py | 103 + jcvi/apps/fetch.py | 729 + jcvi/apps/gbsubmit.py | 676 + jcvi/apps/gmap.py | 253 + jcvi/apps/grid.py | 664 + jcvi/apps/lastz.py | 272 + jcvi/apps/mask.py | 126 + jcvi/apps/phylo.py | 1204 ++ jcvi/apps/r.py | 82 + jcvi/apps/restriction.py | 168 + jcvi/apps/softlink.py | 155 + jcvi/apps/uclust.py | 1106 ++ jcvi/apps/uniprot.py | 216 + jcvi/apps/vecscreen.py | 133 + jcvi/assembly/__init__.py | 0 jcvi/assembly/__main__.py | 11 + jcvi/assembly/allmaps.py | 2018 +++ jcvi/assembly/allpaths.py | 530 + jcvi/assembly/automaton.py | 482 + jcvi/assembly/base.py | 210 + jcvi/assembly/chic.c | 14222 ++++++++++++++++++ jcvi/assembly/chic.pyx | 105 + jcvi/assembly/coverage.py | 160 + jcvi/assembly/gaps.py | 294 + jcvi/assembly/geneticmap.py | 714 + jcvi/assembly/goldenpath.py | 1192 ++ jcvi/assembly/hic.py | 1772 +++ jcvi/assembly/kmer.py | 1410 ++ jcvi/assembly/opticalmap.py | 427 + jcvi/assembly/patch.py | 968 ++ jcvi/assembly/postprocess.py | 537 + jcvi/assembly/preprocess.py | 735 + jcvi/assembly/sim.py | 215 + jcvi/assembly/soap.py | 331 + jcvi/assembly/syntenypath.py | 553 + jcvi/compara/__init__.py | 0 jcvi/compara/__main__.py | 11 + jcvi/compara/base.py | 164 + jcvi/compara/blastfilter.py | 325 + jcvi/compara/catalog.py | 982 ++ jcvi/compara/fractionation.py | 854 ++ jcvi/compara/ks.py | 1176 ++ jcvi/compara/pad.py | 314 + jcvi/compara/pedigree.py | 270 + jcvi/compara/phylogeny.py | 91 + jcvi/compara/quota.py | 288 + jcvi/compara/reconstruct.py | 379 + jcvi/compara/synfind.py | 279 + jcvi/compara/synteny.py | 1883 +++ jcvi/formats/__init__.py | 0 jcvi/formats/__main__.py | 11 + jcvi/formats/agp.py | 2188 +++ jcvi/formats/base.py | 1196 ++ jcvi/formats/bed.py | 2504 ++++ jcvi/formats/blast.py | 1543 ++ jcvi/formats/cblast.c | 16862 ++++++++++++++++++++++ jcvi/formats/cblast.pyx | 210 + jcvi/formats/cdt.py | 122 + jcvi/formats/chain.py | 311 + jcvi/formats/contig.py | 182 + jcvi/formats/coords.py | 612 + jcvi/formats/excel.py | 246 + jcvi/formats/fasta.py | 2642 ++++ jcvi/formats/fastq.py | 1104 ++ jcvi/formats/genbank.py | 522 + jcvi/formats/gff.py | 3768 +++++ jcvi/formats/html.py | 158 + jcvi/formats/maf.py | 286 + jcvi/formats/obo.py | 106 + jcvi/formats/paf.py | 127 + jcvi/formats/pdf.py | 101 + jcvi/formats/psl.py | 395 + jcvi/formats/pyblast.py | 101 + jcvi/formats/sam.py | 1025 ++ jcvi/formats/sizes.py | 289 + jcvi/formats/vcf.py | 849 ++ jcvi/graphics/__init__.py | 0 jcvi/graphics/__main__.py | 11 + jcvi/graphics/align.py | 554 + jcvi/graphics/assembly.py | 516 + jcvi/graphics/base.py | 843 ++ jcvi/graphics/blastplot.py | 345 + jcvi/graphics/chromosome.py | 730 + jcvi/graphics/coverage.py | 245 + jcvi/graphics/dotplot.py | 549 + jcvi/graphics/glyph.py | 761 + jcvi/graphics/grabseeds.py | 881 ++ jcvi/graphics/heatmap.py | 176 + jcvi/graphics/histogram.py | 387 + jcvi/graphics/karyotype.py | 476 + jcvi/graphics/landscape.py | 1316 ++ jcvi/graphics/mummerplot.py | 158 + jcvi/graphics/synteny.py | 736 + jcvi/graphics/table.py | 184 + jcvi/graphics/tree.py | 688 + jcvi/graphics/wheel.py | 225 + jcvi/projects/__init__.py | 0 jcvi/projects/__main__.py | 11 + jcvi/projects/age.py | 738 + jcvi/projects/allmaps.py | 532 + jcvi/projects/bites.py | 229 + jcvi/projects/ies.py | 426 + jcvi/projects/jcvi.py | 335 + jcvi/projects/misc.py | 777 + jcvi/projects/napus.py | 858 ++ jcvi/projects/pineapple.py | 411 + jcvi/projects/str.py | 2271 +++ jcvi/projects/sugarcane.py | 807 ++ jcvi/projects/synfind.py | 860 ++ jcvi/projects/tgbs.py | 696 + jcvi/projects/vanilla.py | 450 + jcvi/utils/__init__.py | 0 jcvi/utils/__main__.py | 11 + jcvi/utils/aws.py | 810 ++ jcvi/utils/cbook.py | 465 + jcvi/utils/console.py | 19 + jcvi/utils/data/Airswing.ttf | Bin 0 -> 16912 bytes jcvi/utils/data/Collegia.ttf | Bin 0 -> 103940 bytes jcvi/utils/data/HookedUp.ttf | Bin 0 -> 20468 bytes jcvi/utils/data/Humor-Sans.ttf | Bin 0 -> 25832 bytes jcvi/utils/data/TREDs.meta.csv | 33 + jcvi/utils/data/__init__.py | 0 jcvi/utils/data/adapters.fasta | 38 + jcvi/utils/data/blosum80.mat | 40 + jcvi/utils/data/chrY.hg38.unique_ccn.gc | 300 + jcvi/utils/data/colorchecker.txt | 4 + jcvi/utils/data/hg38.band.txt | 1294 ++ jcvi/utils/data/hg38.chrom.sizes | 455 + jcvi/utils/data/instance.json | 42 + jcvi/utils/db.py | 334 + jcvi/utils/ez_setup.py | 167 + jcvi/utils/grouper.py | 114 + jcvi/utils/orderedcollections.py | 297 + jcvi/utils/range.py | 529 + jcvi/utils/table.py | 145 + jcvi/utils/taxonomy.py | 200 + jcvi/utils/validator.py | 56 + jcvi/utils/webcolors.py | 54 + jcvi/variation/__init__.py | 0 jcvi/variation/__main__.py | 11 + jcvi/variation/cnv.py | 1509 ++ jcvi/variation/deconvolute.py | 258 + jcvi/variation/delly.py | 343 + jcvi/variation/impute.py | 384 + jcvi/variation/phase.py | 132 + jcvi/variation/snp.py | 369 + jcvi/variation/str.py | 1568 ++ 182 files changed, 118827 insertions(+) create mode 100644 jcvi/__init__.py create mode 100644 jcvi/_version.py create mode 100644 jcvi/algorithms/__init__.py create mode 100644 jcvi/algorithms/__main__.py create mode 100644 jcvi/algorithms/ec.py create mode 100644 jcvi/algorithms/formula.py create mode 100644 jcvi/algorithms/graph.py create mode 100755 jcvi/algorithms/lis.py create mode 100755 jcvi/algorithms/lpsolve.py create mode 100644 jcvi/algorithms/matrix.py create mode 100644 jcvi/algorithms/maxsum.py create mode 100755 jcvi/algorithms/supermap.py create mode 100644 jcvi/algorithms/tsp.py create mode 100644 jcvi/annotation/__init__.py create mode 100644 jcvi/annotation/__main__.py create mode 100644 jcvi/annotation/ahrd.py create mode 100644 jcvi/annotation/automaton.py create mode 100755 jcvi/annotation/depth.py create mode 100644 jcvi/annotation/evm.py create mode 100644 jcvi/annotation/maker.py create mode 100644 jcvi/annotation/pasa.py create mode 100644 jcvi/annotation/qc.py create mode 100644 jcvi/annotation/reformat.py create mode 100644 jcvi/annotation/stats.py create mode 100644 jcvi/annotation/train.py create mode 100644 jcvi/annotation/trinity.py create mode 100644 jcvi/apps/__init__.py create mode 100644 jcvi/apps/__main__.py create mode 100644 jcvi/apps/align.py create mode 100644 jcvi/apps/base.py create mode 100644 jcvi/apps/biomart.py create mode 100755 jcvi/apps/blastplus.py create mode 100644 jcvi/apps/bowtie.py create mode 100644 jcvi/apps/bwa.py create mode 100644 jcvi/apps/cdhit.py create mode 100644 jcvi/apps/emboss.py create mode 100644 jcvi/apps/fetch.py create mode 100644 jcvi/apps/gbsubmit.py create mode 100644 jcvi/apps/gmap.py create mode 100644 jcvi/apps/grid.py create mode 100755 jcvi/apps/lastz.py create mode 100755 jcvi/apps/mask.py create mode 100644 jcvi/apps/phylo.py create mode 100644 jcvi/apps/r.py create mode 100644 jcvi/apps/restriction.py create mode 100644 jcvi/apps/softlink.py create mode 100644 jcvi/apps/uclust.py create mode 100644 jcvi/apps/uniprot.py create mode 100644 jcvi/apps/vecscreen.py create mode 100644 jcvi/assembly/__init__.py create mode 100644 jcvi/assembly/__main__.py create mode 100644 jcvi/assembly/allmaps.py create mode 100644 jcvi/assembly/allpaths.py create mode 100644 jcvi/assembly/automaton.py create mode 100644 jcvi/assembly/base.py create mode 100644 jcvi/assembly/chic.c create mode 100644 jcvi/assembly/chic.pyx create mode 100644 jcvi/assembly/coverage.py create mode 100644 jcvi/assembly/gaps.py create mode 100644 jcvi/assembly/geneticmap.py create mode 100644 jcvi/assembly/goldenpath.py create mode 100644 jcvi/assembly/hic.py create mode 100644 jcvi/assembly/kmer.py create mode 100644 jcvi/assembly/opticalmap.py create mode 100644 jcvi/assembly/patch.py create mode 100644 jcvi/assembly/postprocess.py create mode 100644 jcvi/assembly/preprocess.py create mode 100644 jcvi/assembly/sim.py create mode 100644 jcvi/assembly/soap.py create mode 100644 jcvi/assembly/syntenypath.py create mode 100644 jcvi/compara/__init__.py create mode 100644 jcvi/compara/__main__.py create mode 100644 jcvi/compara/base.py create mode 100755 jcvi/compara/blastfilter.py create mode 100644 jcvi/compara/catalog.py create mode 100644 jcvi/compara/fractionation.py create mode 100644 jcvi/compara/ks.py create mode 100644 jcvi/compara/pad.py create mode 100644 jcvi/compara/pedigree.py create mode 100644 jcvi/compara/phylogeny.py create mode 100755 jcvi/compara/quota.py create mode 100644 jcvi/compara/reconstruct.py create mode 100755 jcvi/compara/synfind.py create mode 100755 jcvi/compara/synteny.py create mode 100644 jcvi/formats/__init__.py create mode 100644 jcvi/formats/__main__.py create mode 100644 jcvi/formats/agp.py create mode 100644 jcvi/formats/base.py create mode 100755 jcvi/formats/bed.py create mode 100644 jcvi/formats/blast.py create mode 100644 jcvi/formats/cblast.c create mode 100644 jcvi/formats/cblast.pyx create mode 100644 jcvi/formats/cdt.py create mode 100644 jcvi/formats/chain.py create mode 100644 jcvi/formats/contig.py create mode 100644 jcvi/formats/coords.py create mode 100644 jcvi/formats/excel.py create mode 100644 jcvi/formats/fasta.py create mode 100644 jcvi/formats/fastq.py create mode 100644 jcvi/formats/genbank.py create mode 100644 jcvi/formats/gff.py create mode 100644 jcvi/formats/html.py create mode 100644 jcvi/formats/maf.py create mode 100755 jcvi/formats/obo.py create mode 100644 jcvi/formats/paf.py create mode 100644 jcvi/formats/pdf.py create mode 100755 jcvi/formats/psl.py create mode 100644 jcvi/formats/pyblast.py create mode 100644 jcvi/formats/sam.py create mode 100644 jcvi/formats/sizes.py create mode 100644 jcvi/formats/vcf.py create mode 100644 jcvi/graphics/__init__.py create mode 100644 jcvi/graphics/__main__.py create mode 100644 jcvi/graphics/align.py create mode 100644 jcvi/graphics/assembly.py create mode 100644 jcvi/graphics/base.py create mode 100755 jcvi/graphics/blastplot.py create mode 100644 jcvi/graphics/chromosome.py create mode 100644 jcvi/graphics/coverage.py create mode 100755 jcvi/graphics/dotplot.py create mode 100644 jcvi/graphics/glyph.py create mode 100644 jcvi/graphics/grabseeds.py create mode 100644 jcvi/graphics/heatmap.py create mode 100644 jcvi/graphics/histogram.py create mode 100644 jcvi/graphics/karyotype.py create mode 100644 jcvi/graphics/landscape.py create mode 100644 jcvi/graphics/mummerplot.py create mode 100644 jcvi/graphics/synteny.py create mode 100644 jcvi/graphics/table.py create mode 100644 jcvi/graphics/tree.py create mode 100644 jcvi/graphics/wheel.py create mode 100644 jcvi/projects/__init__.py create mode 100644 jcvi/projects/__main__.py create mode 100644 jcvi/projects/age.py create mode 100644 jcvi/projects/allmaps.py create mode 100644 jcvi/projects/bites.py create mode 100644 jcvi/projects/ies.py create mode 100644 jcvi/projects/jcvi.py create mode 100644 jcvi/projects/misc.py create mode 100644 jcvi/projects/napus.py create mode 100644 jcvi/projects/pineapple.py create mode 100644 jcvi/projects/str.py create mode 100644 jcvi/projects/sugarcane.py create mode 100644 jcvi/projects/synfind.py create mode 100644 jcvi/projects/tgbs.py create mode 100644 jcvi/projects/vanilla.py create mode 100644 jcvi/utils/__init__.py create mode 100644 jcvi/utils/__main__.py create mode 100644 jcvi/utils/aws.py create mode 100644 jcvi/utils/cbook.py create mode 100644 jcvi/utils/console.py create mode 100755 jcvi/utils/data/Airswing.ttf create mode 100755 jcvi/utils/data/Collegia.ttf create mode 100755 jcvi/utils/data/HookedUp.ttf create mode 100644 jcvi/utils/data/Humor-Sans.ttf create mode 100644 jcvi/utils/data/TREDs.meta.csv create mode 100644 jcvi/utils/data/__init__.py create mode 100644 jcvi/utils/data/adapters.fasta create mode 100644 jcvi/utils/data/blosum80.mat create mode 100644 jcvi/utils/data/chrY.hg38.unique_ccn.gc create mode 100644 jcvi/utils/data/colorchecker.txt create mode 100644 jcvi/utils/data/hg38.band.txt create mode 100644 jcvi/utils/data/hg38.chrom.sizes create mode 100644 jcvi/utils/data/instance.json create mode 100644 jcvi/utils/db.py create mode 100644 jcvi/utils/ez_setup.py create mode 100755 jcvi/utils/grouper.py create mode 100644 jcvi/utils/orderedcollections.py create mode 100644 jcvi/utils/range.py create mode 100644 jcvi/utils/table.py create mode 100644 jcvi/utils/taxonomy.py create mode 100644 jcvi/utils/validator.py create mode 100755 jcvi/utils/webcolors.py create mode 100644 jcvi/variation/__init__.py create mode 100644 jcvi/variation/__main__.py create mode 100644 jcvi/variation/cnv.py create mode 100644 jcvi/variation/deconvolute.py create mode 100644 jcvi/variation/delly.py create mode 100644 jcvi/variation/impute.py create mode 100644 jcvi/variation/phase.py create mode 100644 jcvi/variation/snp.py create mode 100644 jcvi/variation/str.py diff --git a/jcvi/__init__.py b/jcvi/__init__.py new file mode 100644 index 00000000..dbd33d8d --- /dev/null +++ b/jcvi/__init__.py @@ -0,0 +1,21 @@ +from datetime import datetime + +__author__ = ( + "Haibao Tang", + "Vivek Krishnakumar", + "Xingtan Zhang", + "Won Cheol Yim", +) +__copyright__ = f"Copyright (c) 2010-{datetime.now().year}, Haibao Tang" +__email__ = "tanghaibao@gmail.com" +__license__ = "BSD" +__status__ = "Development" + +try: + from ._version import __version__ # noqa +except ImportError as exc: # pragma: no cover + raise ImportError( + "Failed to find (autogenerated) version.py. " + "This might be because you are installing from GitHub's tarballs, " + "use the PyPI ones." + ) from exc diff --git a/jcvi/_version.py b/jcvi/_version.py new file mode 100644 index 00000000..d9acf684 --- /dev/null +++ b/jcvi/_version.py @@ -0,0 +1,16 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple, Union + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = '1.4.24.dev30+gf11b1a04' +__version_tuple__ = version_tuple = (1, 4, 24, 'dev30', 'gf11b1a04') diff --git a/jcvi/algorithms/__init__.py b/jcvi/algorithms/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/algorithms/__main__.py b/jcvi/algorithms/__main__.py new file mode 100644 index 00000000..baf6ccd7 --- /dev/null +++ b/jcvi/algorithms/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Implementations of several key algorithms, such as: TSP, Graph, SuperMap, Linear Programming, ML, etc. used by other modules. +""" + +from jcvi.apps.base import dmain + + +if __name__ == "__main__": + dmain(__file__) diff --git a/jcvi/algorithms/ec.py b/jcvi/algorithms/ec.py new file mode 100644 index 00000000..6106f877 --- /dev/null +++ b/jcvi/algorithms/ec.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +This module contains methods to interface with DEAP evolutionary computation +framewor, including a Genetic Algorithm (GA) based method to solve scaffold +ordering and orientation problem. +""" + +import array +import random +import multiprocessing + +from deap import base, creator, tools +from deap.algorithms import varAnd + +from ..apps.base import logger +from ..utils.console import printf + +from .lis import longest_monotonic_subseq_length + + +# This has to be in global space, otherwise runs into error "creator.Individual +# not found" when runnning on macOS. See also: +# https://github.com/DEAP/deap/issues/268 +creator.create("FitnessMax", base.Fitness, weights=(1.0,)) +creator.create("Individual", array.array, typecode="i", fitness=creator.FitnessMax) + + +def make_data(POINTS, SCF): + seq = range(POINTS) + scaffolds = [] + batch = POINTS // SCF + for i in range(SCF): + p = seq[i * batch : (i + 1) * batch] + scaffolds.append(p) + return scaffolds + + +def colinear_evaluate(tour, scaffolds): + series = [] + for t in tour: + series.extend(scaffolds[t]) + score, diff = longest_monotonic_subseq_length(series) + return (score,) + + +def genome_mutation(candidate): + """Return the mutants created by inversion mutation on the candidates. + + This function performs inversion or insertion. It randomly chooses two + locations along the candidate and reverses the values within that + slice. Insertion is done by popping one item and insert it back at random + position. + """ + size = len(candidate) + prob = random.random() + if prob > 0.5: # Inversion + p = random.randint(0, size - 1) + q = random.randint(0, size - 1) + if p > q: + p, q = q, p + q += 1 + s = candidate[p:q] + x = candidate[:p] + s[::-1] + candidate[q:] + return (creator.Individual(x),) + else: # Insertion + p = random.randint(0, size - 1) + q = random.randint(0, size - 1) + cq = candidate.pop(q) + candidate.insert(p, cq) + return (candidate,) + + +def genome_mutation_orientation(candidate): + size = len(candidate) + prob = random.random() + if prob > 0.5: # Range flip + p = random.randint(0, size - 1) + q = random.randint(0, size - 1) + if p > q: + p, q = q, p + q += 1 + for x in range(p, q): + candidate[x] = -candidate[x] + else: # Single flip + p = random.randint(0, size - 1) + candidate[p] = -candidate[p] + return (candidate,) + + +def GA_setup(guess): + toolbox = base.Toolbox() + + toolbox.register("individual", creator.Individual, guess) + toolbox.register("population", tools.initRepeat, list, toolbox.individual) + toolbox.register("mate", tools.cxPartialyMatched) + toolbox.register("mutate", genome_mutation) + toolbox.register("select", tools.selTournament, tournsize=3) + return toolbox + + +def eaSimpleConverge( + population, + toolbox, + cxpb, + mutpb, + ngen, + stats=None, + halloffame=None, + callback=None, + verbose=True, +): + """This algorithm reproduce the simplest evolutionary algorithm as + presented in chapter 7 of [Back2000]_. + + Modified to allow checking if there is no change for ngen, as a simple + rule for convergence. Interface is similar to eaSimple(). However, in + eaSimple, ngen is total number of iterations; in eaSimpleConverge, we + terminate only when the best is NOT updated for ngen iterations. + """ + # Evaluate the individuals with an invalid fitness + invalid_ind = [ind for ind in population if not ind.fitness.valid] + fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) + for ind, fit in zip(invalid_ind, fitnesses): + ind.fitness.values = fit + + if halloffame is not None: + halloffame.update(population) + + record = stats.compile(population) if stats else {} + + # Begin the generational process + gen = 1 + best = (0,) + while True: + # Select the next generation individuals + offspring = toolbox.select(population, len(population)) + + # Vary the pool of individuals + offspring = varAnd(offspring, toolbox, cxpb, mutpb) + + # Evaluate the individuals with an invalid fitness + invalid_ind = [ind for ind in offspring if not ind.fitness.valid] + fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) + for ind, fit in zip(invalid_ind, fitnesses): + ind.fitness.values = fit + + # Update the hall of fame with the generated individuals + if halloffame is not None: + halloffame.update(offspring) + + if callback is not None: + callback(halloffame[0], gen) + + # Replace the current population by the offspring + population[:] = offspring + + # Append the current generation statistics to the logbook + record = stats.compile(population) if stats else {} + current_best = record["max"] + if gen % 20 == 0 and verbose: + printf( + "Current iteration {0}: max_score={1}".format(gen, current_best), + ) + + if current_best > best: + best = current_best + updated = gen + + gen += 1 + if gen - updated > ngen: + break + + return population + + +def GA_run(toolbox, ngen=500, npop=100, seed=666, cpus=1, callback=None): + logger.debug("GA setup: ngen=%d npop=%d cpus=%d seed=%d", ngen, npop, cpus, seed) + if cpus > 1: + pool = multiprocessing.Pool(cpus) + toolbox.register("map", pool.map) + random.seed(seed) + pop = toolbox.population(n=npop) + hof = tools.HallOfFame(1) + + stats = tools.Statistics(lambda ind: ind.fitness.values) + stats.register("max", max) + stats.register("min", min) + + eaSimpleConverge( + pop, toolbox, 0.7, 0.2, ngen, stats=stats, halloffame=hof, callback=callback + ) + tour = hof[0] + if cpus > 1: + pool.terminate() + return tour, tour.fitness + + +if __name__ == "__main__": + POINTS, SCF = 200, 20 + scaffolds = make_data(POINTS, SCF) + + # Demo case: scramble of the list + guess = list(range(SCF)) + guess[5:15] = guess[5:15][::-1] + guess[7:18] = guess[7:18][::-1] + printf(guess) + + toolbox = GA_setup(guess) + toolbox.register("evaluate", colinear_evaluate, scaffolds=scaffolds) + tour, tour.fitness = GA_run(toolbox, cpus=8) + printf(tour, tour.fitness) diff --git a/jcvi/algorithms/formula.py b/jcvi/algorithms/formula.py new file mode 100644 index 00000000..64384c26 --- /dev/null +++ b/jcvi/algorithms/formula.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Some math formula for various calculations +""" +import sys + +from collections import Counter +from functools import lru_cache +from math import log, exp, sqrt + +import numpy as np +import scipy + +from ..utils.cbook import human_size + + +def mean_confidence_interval(data, confidence=0.95): + # Compute the confidence interval around the mean + + a = 1.0 * np.array(data) + n = len(a) + m, se = np.mean(a), scipy.stats.sem(a) + h = se * scipy.stats.t._ppf((1 + confidence) / 2.0, n - 1) + return m, m - h, m + h + + +def confidence_interval(data, confidence=0.95): + # Compute the confidence interval of the data + # Note the difference from mean_confidence_interval() + a = 1.0 * np.array(data) + n = len(a) + m, stdev = np.mean(a), np.std(a) + h = 1.96 * stdev + return m, m - h, m + h + + +def MAD_interval(data): + # Compute the MAD interval of the data + A = 1.0 * np.array(data) + M = np.median(A) + D = np.absolute(A - M) + MAD = np.median(D) + return M, M - MAD, M + MAD + + +def erf(x): + # save the sign of x + sign = 1 if x >= 0 else -1 + x = abs(x) + + # constants + a1 = 0.254829592 + a2 = -0.284496736 + a3 = 1.421413741 + a4 = -1.453152027 + a5 = 1.061405429 + p = 0.3275911 + + # A&S formula 7.1.26 + t = 1.0 / (1.0 + p * x) + y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) + return sign * y # erf(-x) = -erf(x) + + +def gaussian_prob_le(mu, sigma, x): + if sigma == 0: + return 1 if mu <= x else 0 + z = (x - mu) / (sigma * sqrt(2)) + return 0.5 + 0.5 * erf(z) + + +def choose_insertsize(readlen=150, step=20, cutoff=0.01): + """ + Calculate ratio of overlap for a range of insert sizes. Idea borrowed from + ALLPATHS code (`allpaths_cache/CacheToAllPathsInputs.pl`). + """ + print("Insert-size\tOverlap", file=sys.stderr) + for i in range(0, 3 * readlen, step): + p = gaussian_prob_le(i, i / 5, 2 * readlen) + if p < cutoff or p > 1 - cutoff: + continue + print("{0}bp\t{1}%".format(i, int(round(100 * p))), file=sys.stderr) + + +def get_kmeans(a, k, iter=100): + from scipy.cluster.vq import vq, kmeans + + a = np.array(a) + centroids, _ = kmeans(a, k, iter=iter) + centroids.sort() + idx, _ = vq(a, centroids) + return idx + + +def spearmanr(x, y): + """ + Michiel de Hoon's library (available in BioPython or standalone as + PyCluster) returns Spearman rsb which does include a tie correction. + + >>> x = [5.05, 6.75, 3.21, 2.66] + >>> y = [1.65, 26.5, -5.93, 7.96] + >>> z = [1.65, 2.64, 2.64, 6.95] + >>> round(spearmanr(x, y), 4) + 0.4 + >>> round(spearmanr(x, z), 4) + -0.6325 + """ + from scipy import stats + + if not x or not y: + return 0 + corr, pvalue = stats.spearmanr(x, y) + return corr + + +def reject_outliers(a, threshold=3.5): + """ + Iglewicz and Hoaglin's robust test for multiple outliers (two sided test). + + + See also: + + + >>> a = [0, 1, 2, 4, 12, 58, 188, 189] + >>> list(reject_outliers(a)) + [False, False, False, False, False, True, True, True] + """ + if len(a) < 3: + return np.zeros(len(a), dtype=bool) + + A = np.array(a, dtype=float) + lb, ub = outlier_cutoff(A, threshold=threshold) + return np.logical_or(A > ub, A < lb) + + +def outlier_cutoff(a, threshold=3.5): + """ + Iglewicz and Hoaglin's robust, returns the cutoff values - lower bound and + upper bound. + """ + A = np.array(a, dtype=float) + M = np.median(A) + D = np.absolute(A - M) + MAD = np.median(D) + C = threshold / 0.67449 * MAD + return M - C, M + C + + +def recomb_probability(cM, method="kosambi"): + """ + + + >>> recomb_probability(1) + 0.009998666879965463 + >>> recomb_probability(100) + 0.48201379003790845 + >>> recomb_probability(10000) + 0.5 + """ + assert method in ("kosambi", "haldane") + d = cM / 100.0 + if method == "kosambi": + e4d = exp(4 * d) + return (e4d - 1) / (e4d + 1) / 2 + elif method == "haldane": + return (1 - exp(-2 * d)) / 2 + + +def jukesCantorD(p, L=100): + """ + >>> jukesCantorD(.1) + (0.10732563273050497, 0.001198224852071006) + >>> jukesCantorD(.7) + (2.0310376508266565, 0.47249999999999864) + """ + assert 0 <= p < 0.75 + + rD = 1 - 4.0 / 3 * p + D = -0.75 * log(rD) + varD = p * (1 - p) / (rD**2 * L) + + return D, varD + + +def jukesCantorP(D): + """ + >>> jukesCantorP(.1) + 0.09362001071778939 + >>> jukesCantorP(2) + 0.6978874115828988 + """ + rD = exp(-4.0 / 3 * D) + p = 0.75 * (1 - rD) + return p + + +def velvet(readsize, genomesize, numreads, K): + """ + Calculate velvet memory requirement. + + + Ram required for velvetg = -109635 + 18977*ReadSize + 86326*GenomeSize + + 233353*NumReads - 51092*K + + Read size is in bases. + Genome size is in millions of bases (Mb) + Number of reads is in millions + K is the kmer hash value used in velveth + """ + ram = ( + -109635 + 18977 * readsize + 86326 * genomesize + 233353 * numreads - 51092 * K + ) + print("ReadSize: {0}".format(readsize), file=sys.stderr) + print("GenomeSize: {0}Mb".format(genomesize), file=sys.stderr) + print("NumReads: {0}M".format(numreads), file=sys.stderr) + print("K: {0}".format(K), file=sys.stderr) + + ram = human_size(ram * 1000, a_kilobyte_is_1024_bytes=True) + print("RAM usage: {0} (MAXKMERLENGTH=31)".format(ram), file=sys.stderr) + + +@lru_cache(maxsize=None) +def calc_ldscore(a: str, b: str) -> float: + """ + Calculate Linkage disequilibrium (r2) between two genotypes. + """ + assert len(a) == len(b), f"{a}\n{b}" + # Assumes markers as A/B + c = Counter(zip(a, b)) + c_aa = c[("A", "A")] + c_ab = c[("A", "B")] + c_ba = c[("B", "A")] + c_bb = c[("B", "B")] + n = c_aa + c_ab + c_ba + c_bb + if n == 0: + return 0 + + f = 1.0 / n + x_aa = c_aa * f + x_ab = c_ab * f + x_ba = c_ba * f + x_bb = c_bb * f + p_a = x_aa + x_ab + p_b = x_ba + x_bb + q_a = x_aa + x_ba + q_b = x_ab + x_bb + D = x_aa - p_a * q_a + denominator = p_a * p_b * q_a * q_b + if denominator == 0: + return 0 + + r2 = D * D / denominator + return r2 diff --git a/jcvi/algorithms/graph.py b/jcvi/algorithms/graph.py new file mode 100644 index 00000000..ffa90dea --- /dev/null +++ b/jcvi/algorithms/graph.py @@ -0,0 +1,514 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Wrapper for the common graph algorithms. +""" +import sys + +from collections import deque + +import networkx as nx + +from more_itertools import pairwise + +from ..apps.base import logger +from ..formats.base import must_open + + +""" +Bidirectional graph. +""" +dirs = (">", "<") +trans = str.maketrans("+?-", ">><") + + +class BiNode(object): + def __init__(self, v): + self.v = v + self.ins = [] + self.outs = [] + + def get_next(self, tag="<"): + """ + This function is tricky and took me a while to figure out. + + The tag specifies the direction where the current edge came from. + + tag ntag + ---> V >----> U + cur next + + This means the next vertex should follow the outs since this tag is + inward '<'. Check if there are multiple branches if len(L) == 1, and + also check if the next it finds has multiple incoming edges though if + len(B) == 1. + """ + next, ntag = None, None + + L = self.outs if tag == "<" else self.ins + + if len(L) == 1: + (e,) = L + if e.v1.v == self.v: + next, ntag = e.v2, e.o2 + ntag = "<" if ntag == ">" else ">" # Flip tag if on other end + else: + next, ntag = e.v1, e.o1 + + if next: # Validate the next vertex + B = next.ins if ntag == "<" else next.outs + if len(B) > 1: + return None, None + + return next, ntag + + def __str__(self): + return str(self.v) + + __repr__ = __str__ + + +class BiEdge(object): + def __init__(self, v1, v2, o1, o2, color="black", length=None): + o1 = o1.translate(trans) + o2 = o2.translate(trans) + assert o1 in dirs and o2 in dirs + self.o1 = o1 + self.o2 = o2 + + self.color = color + self.length = length + + def __str__(self): + return "".join(str(x) for x in (self.v1, self.o1, "--", self.o2, self.v2)) + + def flip(self): + self.v2, self.v1 = self.v1, self.v2 + o1, o2 = self.o1, self.o2 + self.o1 = ">" if o2 == "<" else "<" + self.o2 = ">" if o1 == "<" else "<" + + +class BiGraph(object): + def __init__(self): + self.nodes = {} + self.edges = {} + + def __str__(self): + return "BiGraph with {0} nodes and {1} edges".format( + len(self.nodes), len(self.edges) + ) + + def add_node(self, v): + if v not in self.nodes: + self.nodes[v] = BiNode(v) + + def add_edge(self, v1, v2, o1, o2, color="black", length=None): + for v in (v1, v2): + self.add_node(v) + n1 = self.nodes.get(v1) + n2 = self.nodes.get(v2) + + if (v1, v2) in self.edges or (v2, v1) in self.edges: + return + + e = BiEdge(v1, v2, o1, o2, color=color, length=length) + l = n1.outs if e.o1 == ">" else n1.ins + r = n2.ins if e.o2 == ">" else n2.outs + l.append(e) + r.append(e) + e.v1, e.v2 = n1, n2 + if v1 > v2: + v1, v2 = v2, v1 + e.flip() + self.edges[(v1, v2)] = e + + def get_node(self, v): + return self.nodes[v] + + def get_edge(self, av, bv): + flip = False + if av > bv: + av, bv = bv, av + flip = True + e = self.edges[(av, bv)] + if flip: + e.flip() + return e + + def iter_paths(self): + + discovered = set() + for v, vv in self.nodes.items(): + if v in discovered: + continue + + path = deque([vv]) + + # print "cur", v + discovered.add(v) + prev, ptag = vv.get_next(tag=">") + while prev: + # print prev, ptag + if prev.v in discovered: + break + path.appendleft(prev) + discovered.add(prev.v) + prev, ptag = prev.get_next(tag=ptag) + + next, ntag = vv.get_next(tag="<") + while next: + # print next, ntag + if next.v in discovered: + break + path.append(next) + discovered.add(next.v) + next, ntag = next.get_next(tag=ntag) + + # discovered |= set(x.v for x in path) + yield path + + def path(self, path, flip=False): + oo = [] + if len(path) == 1: + m = "Singleton {0}".format(path[0]) + oo.append((path[0].v, True)) + return m, oo + + edges = [] + for a, b in pairwise(path): + av, bv = a.v, b.v + e = self.get_edge(av, bv) + + if not oo: # First edge imports two nodes + oo.append((e.v1.v, e.o1 == ">")) + last = oo[-1] + assert last == (e.v1.v, e.o1 == ">") + oo.append((e.v2.v, e.o2 == ">")) + + if flip: + se = str(e) + e.flip() + else: + se = str(e) + edges.append(se) + + return "|".join(edges), oo + + def read(self, filename, color="black"): + fp = open(filename) + nedges = 0 + for row in fp: + a, b = row.strip().split("--") + oa = a[-1] + ob = b[0] + a, b = a.strip("<>"), b.strip("<>") + self.add_edge(a, b, oa, ob, color=color) + nedges += 1 + logger.debug( + "A total of {0} edges imported from `{1}` (color={2}).".format( + nedges, filename, color + ) + ) + + def write(self, filename="stdout"): + + fw = must_open(filename, "w") + for e in self.edges.values(): + print(e, file=fw) + logger.debug("Graph written to `{0}`.".format(filename)) + + def draw( + self, + pngfile, + dpi=96, + verbose=False, + namestart=0, + nodehighlight=None, + prog="circo", + ): + import pygraphviz as pgv + + G = pgv.AGraph() + for e in self.edges.values(): + arrowhead = e.o1 == ">" + arrowtail = e.o2 == "<" + if e.o1 != e.o2: # Not sure why this is necessary + arrowhead = not arrowhead + arrowtail = not arrowtail + arrowhead = "normal" if arrowhead else "inv" + arrowtail = "normal" if arrowtail else "inv" + v1, v2 = e.v1, e.v2 + v1, v2 = str(v1)[namestart:], str(v2)[namestart:] + G.add_edge(v1, v2, color=e.color, arrowhead=arrowhead, arrowtail=arrowtail) + + if nodehighlight: + for n in nodehighlight: + n = n[namestart:] + n = G.get_node(n) + n.attr["shape"] = "box" + + G.graph_attr.update(dpi=str(dpi)) + if verbose: + G.write(sys.stderr) + G.draw(pngfile, prog=prog) + logger.debug("Graph written to `{0}`.".format(pngfile)) + + def get_next(self, node, tag="<"): + return self.get_node(node).get_next(tag) + + def get_path(self, n1, n2, tag="<"): + # return all intermediate nodes on path n1 -> n2 + path = deque() + next, ntag = self.get_next(n1, tag=tag) + while next: + if next.v == n2: + return path + path.append((next, ntag)) + next, ntag = next.get_next(tag=ntag) + return path if n2 is None else None + + +def graph_stats(G, diameter=False): + logger.debug("Graph stats: |V|={0}, |E|={1}".format(len(G), G.size())) + if diameter: + d = max(nx.diameter(H) for H in nx.connected_component_subgraphs(G)) + logger.debug("Graph diameter: {0}".format(d)) + + +def graph_local_neighborhood(G, query, maxdegree=10000, maxsize=10000): + c = [k for k, d in G.degree().iteritems() if d > maxdegree] + if c: + logger.debug("Remove {0} nodes with deg > {1}".format(len(c), maxdegree)) + G.remove_nodes_from(c) + + logger.debug("BFS search from {0}".format(query)) + + queue = set(query) + # BFS search of max depth + seen = set(query) + coresize = len(query) + depth = 0 + while True: + neighbors = set() + for q in queue: + if q not in G: + continue + neighbors |= set(G.neighbors(q)) + queue = neighbors - seen + if not queue: + break + + if len(seen | queue) > maxsize + coresize: + break + + seen |= queue + print( + "iter: {0}, graph size={1} ({2} excluding core)".format( + depth, len(seen), len(seen) - coresize + ), + file=sys.stderr, + ) + depth += 1 + + return G.subgraph(seen) + + +def graph_simplify(G): + """ + Simplify big graphs: remove spurs and contract unique paths. + """ + spurs = [] + path_nodes = [] + for k, d in G.degree().iteritems(): + if d == 1: + spurs.append(k) + elif d == 2: + path_nodes.append(k) + + logger.debug("Remove {0} spurs.".format(len(spurs))) + G.remove_nodes_from(spurs) + + SG = G.subgraph(path_nodes) + cc = nx.connected_components(SG) + for c in cc: + if len(c) == 1: + continue + c = set(c) + neighbors = set() + for x in c: + neighbors |= set(G.neighbors(x)) + neighbors -= c + newtag = list(c)[0] + "*" + for n in neighbors: + G.add_edge(newtag, n) + G.remove_nodes_from(c) + logger.debug( + "Contract {0} path nodes into {1} nodes.".format(len(path_nodes), len(cc)) + ) + + +def bigraph_test(): + g = BiGraph() + g.add_edge(1, 2, ">", "<") + g.add_edge(2, 3, "<", "<", color="red") + g.add_edge(2, 3, ">", ">", color="blue") + g.add_edge(5, 3, ">", ">") + g.add_edge(4, 3, "<", ">") + g.add_edge(4, 6, ">", ">") + g.add_edge(7, 1, ">", ">") + g.add_edge(7, 5, "<", ">") + g.add_edge(8, 6, ">", "<") + print(g) + g.write() + for path in g.iter_paths(): + p, oo = g.path(path) + print(p) + print(oo) + + # g.draw("demo.png", verbose=True) + + +def update_weight(G, a, b, w): + if G.has_edge(a, b): # Parallel edges found! + G[a][b]["weight"] += w + else: + G.add_edge(a, b, weight=w) + + +def make_paths(paths, weights=None): + """ + Zip together paths. Called by merge_paths(). + """ + npaths = len(paths) + weights = weights or [1] * npaths + assert len(paths) == len(weights) + + G = nx.DiGraph() + for path, w in zip(paths, weights): + for a, b in pairwise(path): + update_weight(G, a, b, w) + return G + + +def reduce_paths(G): + """ + Make graph into a directed acyclic graph (DAG). + """ + from jcvi.algorithms.lpsolve import min_feedback_arc_set + + while not nx.is_directed_acyclic_graph(G): + edges = [] + for a, b, w in G.edges_iter(data=True): + w = w["weight"] + edges.append((a, b, w)) + mf, mf_score = min_feedback_arc_set(edges) + for a, b, w in mf: + G.remove_edge(a, b) + + assert nx.is_directed_acyclic_graph(G) + G = transitive_reduction(G) + return G + + +def draw_graph(G, pngfile, prog="dot"): + G = nx.to_agraph(G) + G.draw(pngfile, prog=prog) + logger.debug("Graph written to `{0}`.".format(pngfile)) + + +def transitive_reduction(G): + """ + Returns a transitive reduction of a graph. The original graph + is not modified. + + A transitive reduction H of G has a path from x to y if and + only if there was a path from x to y in G. Deleting any edge + of H destroys this property. A transitive reduction is not + unique in general. A transitive reduction has the same + transitive closure as the original graph. + + A transitive reduction of a complete graph is a tree. A + transitive reduction of a tree is itself. + + >>> G = nx.DiGraph([(1, 2), (1, 3), (2, 3), (2, 4), (3, 4)]) + >>> H = transitive_reduction(G) + >>> H.edges() + [(1, 2), (2, 3), (3, 4)] + """ + H = G.copy() + for a, b, w in G.edges_iter(data=True): + # Try deleting the edge, see if we still have a path + # between the vertices + H.remove_edge(a, b) + if not nx.has_path(H, a, b): # we shouldn't have deleted it + H.add_edge(a, b, w) + return H + + +def merge_paths(paths, weights=None): + """ + Zip together sorted lists. + + >>> paths = [[1, 2, 3], [1, 3, 4], [2, 4, 5]] + >>> G = merge_paths(paths) + >>> nx.topological_sort(G) + [1, 2, 3, 4, 5] + >>> paths = [[1, 2, 3, 4], [1, 2, 3, 2, 4]] + >>> G = merge_paths(paths, weights=(1, 2)) + >>> nx.topological_sort(G) + [1, 2, 3, 4] + """ + G = make_paths(paths, weights=weights) + G = reduce_paths(G) + return G + + +def longest_path_weighted_nodes(G, source, target, weights=None): + """ + The longest path problem is the problem of finding a simple path of maximum + length in a given graph. While for general graph, this problem is NP-hard, + but if G is a directed acyclic graph (DAG), longest paths in G can be found + in linear time with dynamic programming. + + >>> G = nx.DiGraph([(1, 2), (1, 3), (2, "M"), (3, "M")]) + >>> longest_path_weighted_nodes(G, 1, "M", weights={1: 1, 2: 1, 3: 2, "M": 1}) + ([1, 3, 'M'], 4) + """ + assert nx.is_directed_acyclic_graph(G) + + tree = nx.topological_sort(G) + node_to_index = dict((t, i) for i, t in enumerate(tree)) + + nnodes = len(tree) + weights = [weights.get(x, 1) for x in tree] if weights else [1] * nnodes + score, fromc = weights[:], [-1] * nnodes + si = node_to_index[source] + ti = node_to_index[target] + for a in tree[si:ti]: + ai = node_to_index[a] + for b, w in G[a].items(): + bi = node_to_index[b] + w = w.get("weight", 1) + d = score[ai] + weights[bi] * w # Favor heavier edges + if d <= score[bi]: + continue + score[bi] = d # Update longest distance so far + fromc[bi] = ai + + # Backtracking + path = [] + while ti != -1: + path.append(ti) + ti = fromc[ti] + + path = [tree[x] for x in path[::-1]] + return path, score[ti] + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + # bigraph_test() diff --git a/jcvi/algorithms/lis.py b/jcvi/algorithms/lis.py new file mode 100755 index 00000000..78b399a8 --- /dev/null +++ b/jcvi/algorithms/lis.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Longest increasing subsequence, code stolen from internet (thanks) +http://wordaligned.org/articles/patience-sort +""" +import bisect + +# We want a maximum function which accepts a default value +from functools import partial, reduce + +maximum = partial(reduce, max) + + +def patience_sort(xs): + """Patience sort an iterable, xs. + + This function generates a series of pairs (x, pile), where "pile" + is the 0-based index of the pile "x" should be placed on top of. + Elements of "xs" must be less-than comparable. + """ + pile_tops = list() + for x in xs: + pile = bisect.bisect_left(pile_tops, x) + if pile == len(pile_tops): + pile_tops.append(x) + else: + pile_tops[pile] = x + yield x, pile + + +def longest_monotonic_subseq_length(xs): + """Return the length of the longest monotonic subsequence of xs, second + return value is the difference between increasing and decreasing lengths. + + >>> longest_monotonic_subseq_length((4, 5, 1, 2, 3)) + (3, 1) + >>> longest_monotonic_subseq_length((1, 2, 3, 5, 4)) + (4, 2) + >>> longest_monotonic_subseq_length((1, 2, 1)) + (2, 0) + """ + li = longest_increasing_subseq_length(xs) + ld = longest_decreasing_subseq_length(xs) + return max(li, ld), li - ld + + +def longest_increasing_subseq_length(xs): + """Return the length of the longest increasing subsequence of xs. + + >>> longest_increasing_subseq_length(range(3)) + 3 + >>> longest_increasing_subseq_length([3, 1, 2, 0]) + 2 + """ + return 1 + maximum((pile for x, pile in patience_sort(xs)), -1) + + +def longest_decreasing_subseq_length(xs): + return longest_increasing_subseq_length(reversed(xs)) + + +def longest_monotonic_subseq_length_loose(xs): + li = longest_increasing_subseq_length_loose(xs) + ld = longest_decreasing_subseq_length_loose(xs) + return max(li, ld), li - ld + + +def longest_increasing_subseq_length_loose(xs): + xs = [(x, i) for (i, x) in enumerate(xs)] + return longest_increasing_subseq_length(xs) + + +def longest_decreasing_subseq_length_loose(xs): + xs = [(x, -i) for (i, x) in enumerate(xs)] + return longest_decreasing_subseq_length(xs) + + +def longest_increasing_subsequence(xs): + """Return a longest increasing subsequence of xs. + + (Note that there may be more than one such subsequence.) + >>> longest_increasing_subsequence(range(3)) + [0, 1, 2] + >>> longest_increasing_subsequence([3, 1, 2, 0]) + [1, 2] + """ + # Patience sort xs, stacking (x, prev_ix) pairs on the piles. + # Prev_ix indexes the element at the top of the previous pile, + # which has a lower x value than the current x value. + piles = [[]] # Create a dummy pile 0 + for x, p in patience_sort(xs): + if p + 1 == len(piles): + piles.append([]) + # backlink to the top of the previous pile + piles[p + 1].append((x, len(piles[p]) - 1)) + # Backtrack to find a longest increasing subsequence + npiles = len(piles) - 1 + prev = 0 + lis = list() + for pile in range(npiles, 0, -1): + x, prev = piles[pile][prev] + lis.append(x) + lis.reverse() + return lis + + +def longest_decreasing_subsequence(xs): + """ + Wrapper that calls longest_increasing_subsequence + >>> longest_decreasing_subsequence([23, 19, 97, 16, 37, 44, 88, 77, 26]) + [97, 88, 77, 26] + """ + return list(reversed(longest_increasing_subsequence(reversed(xs)))) + + +def longest_monotonic_subsequence(xs): + lis = longest_increasing_subsequence(xs) + lds = longest_decreasing_subsequence(xs) + if len(lis) >= len(lds): + return lis + return lds + + +def longest_monotonic_subsequence_loose(xs): + lis = longest_increasing_subsequence_loose(xs) + lds = longest_decreasing_subsequence_loose(xs) + if len(lis) >= len(lds): + return lis + return lds + + +def longest_increasing_subsequence_loose(xs): + xs = [(x, i) for (i, x) in enumerate(xs)] + ll = longest_increasing_subsequence(xs) + return [x for (x, i) in ll] + + +def longest_decreasing_subsequence_loose(xs): + xs = [(x, -i) for (i, x) in enumerate(xs)] + ll = longest_decreasing_subsequence(xs) + return [x for (x, i) in ll] + + +def backtracking(a, L, bestsofar): + """ + Start with the heaviest weight and emit index + """ + w, j = max(L.items()) + while j != -1: + yield j + w, j = bestsofar[j] + + +def heaviest_increasing_subsequence(a, debug=False): + """ + Returns the heaviest increasing subsequence for array a. Elements are (key, + weight) pairs. + + >>> heaviest_increasing_subsequence([(3, 3), (2, 2), (1, 1), (0, 5)]) + ([(0, 5)], 5) + """ + # Stores the smallest idx of last element of a subsequence of weight w + L = {0: -1} + bestsofar = [(0, -1)] * len(a) # (best weight, from_idx) + for i, (key, weight) in enumerate(a): + + for w, j in list(L.items()): + if j != -1 and a[j][0] >= key: + continue + + new_weight = w + weight + if new_weight in L and a[L[new_weight]][0] <= key: + continue + + L[new_weight] = i + newbest = (new_weight, j) + if newbest > bestsofar[i]: + bestsofar[i] = newbest + + if debug: + # print (key, weight), L + print((key, weight), bestsofar) + + tb = reversed(list(backtracking(a, L, bestsofar))) + return [a[x] for x in tb], max(L.items())[0] + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + import numpy as np + + LENGTH = 20 + A = [np.random.randint(0, 20) for x in range(LENGTH)] + A = list(A) + B = list(zip(A, [1] * LENGTH)) + print(A) + lis = longest_increasing_subsequence(A) + print("longest increasing:", lis) + lds = longest_decreasing_subsequence(A) + print("longest decreasing:", lds) + lisl = longest_increasing_subsequence_loose(A) + print("longest increasing loose:", lisl) + ldsl = longest_decreasing_subsequence_loose(A) + print("longest decreasing loose:", ldsl) + # this should be the same as longest_increasing_subsequence + his, his_dd = heaviest_increasing_subsequence(B) + hlis, wts = zip(*his) + print("heaviest increasing (weight 1, compare with lis):", hlis) + assert len(lis) == len(his) diff --git a/jcvi/algorithms/lpsolve.py b/jcvi/algorithms/lpsolve.py new file mode 100755 index 00000000..0c85b6cf --- /dev/null +++ b/jcvi/algorithms/lpsolve.py @@ -0,0 +1,802 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Implement a few MIP solvers, based on benchmark found on +SCIP solver is ~16x faster than GLPK solver. However, I found in rare cases +it will segfault. Therefore the default is SCIP, the program will switch to +GLPK solver for crashed cases. + +The input lp_data is assumed in .lp format, see below + +>>> lp_data = ''' +... Maximize +... 5 x1 + 3 x2 + 2 x3 +... Subject to +... x2 + x3 <= 1 +... Binary +... x1 +... x2 +... x3 +... End''' +>>> print SCIPSolver(lp_data).results +[0, 1] +>>> print GLPKSolver(lp_data).results +[0, 1] +""" +import os.path as op + +from dataclasses import dataclass +from io import StringIO +from more_itertools import pairwise + +import networkx as nx + +from ..apps.base import cleanup, logger, mkdir, sh +from ..formats.base import flexible_cast +from ..utils.cbook import fill + +from .tsp import populate_edge_weights, node_to_edge + + +Work_dir = "lpsolve_work" + +# CPLEX LP format +# +MAXIMIZE = "Maximize" +MINIMIZE = "Minimize" +SUBJECTTO = "Subject To" +BOUNDS = "Bounds" +BINARY = "Binary" +GENERNAL = "General" +END = "End" + + +@dataclass +class MIPDataModel: + """Data model for use with OR-tools. Modeled after the tutorial.""" + + constraint_coeffs: list # List of dict of coefficients + bounds: list # Maximum value for each constraint clause + obj_coeffs: list # Coefficient in the objective function + num_vars: int + num_constraints: int + + def format_lp(self) -> str: + """Format data dictionary into MIP formatted string. + + Returns: + str: MIP formatted string + """ + lp_handle = StringIO() + + lp_handle.write(f"{MAXIMIZE}\n ") + records = 0 + for i, score in enumerate(self.obj_coeffs): + lp_handle.write("+ %d x%d " % (score, i)) + # SCIP does not like really long string per row + records += 1 + if records % 10 == 0: + lp_handle.write("\n") + lp_handle.write("\n") + + lp_handle.write(f"{SUBJECTTO}\n") + for constraint, bound in zip(self.constraint_coeffs, self.bounds): + additions = " + ".join("x{}".format(i) for (i, x) in constraint.items()) + lp_handle.write(" %s <= %d\n" % (additions, bound)) + + self.log() + + lp_handle.write(f"{BINARY}\n") + for i in range(self.num_vars): + lp_handle.write(" x{}\n".format(i)) + + lp_handle.write(f"{END}\n") + + lp_data = lp_handle.getvalue() + lp_handle.close() + + return lp_data + + def create_solver(self, backend: str = "SCIP"): + """ + Create OR-tools solver instance. See also: + https://developers.google.com/optimization/mip/mip_var_array + + Args: + backend (str, optional): Backend for the MIP solver. Defaults to "SCIP". + + Returns: + OR-tools solver instance + """ + from ortools.linear_solver import pywraplp + + solver = pywraplp.Solver.CreateSolver(backend) + x = {} + for j in range(self.num_vars): + x[j] = solver.IntVar(0, 1, "x[%i]" % j) + + for bound, constraint_coeff in zip(self.bounds, self.constraint_coeffs): + constraint = solver.RowConstraint(0, bound, "") + for j, coeff in constraint_coeff.items(): + constraint.SetCoefficient(x[j], coeff) + + self.log() + + objective = solver.Objective() + for j, score in enumerate(self.obj_coeffs): + objective.SetCoefficient(x[j], score) + objective.SetMaximization() + + return solver, x + + def log(self): + """Log the size of the MIP instance""" + logger.info( + "Number of variables (%d), number of constraints (%d)", + self.num_vars, + self.num_constraints, + ) + + def solve(self, work_dir="work", verbose=False): + """Solve the MIP instance. This runs OR-tools as default solver, then + SCIP, GLPK in that order. + + Args: + work_dir (str, optional): Work directory, only used when OR-tools fail. Defaults to "work". + verbose (bool, optional): Verbosity level, only used when OR-tools fail. Defaults to False. + + Returns: + list[int]: List of indices that are selected + """ + filtered_list = [] + + if has_ortools(): + # Use OR-tools + from ortools.linear_solver import pywraplp + + solver, x = self.create_solver() + status = solver.Solve() + if status == pywraplp.Solver.OPTIMAL: + logger.info("Objective value = %d", solver.Objective().Value()) + filtered_list = [ + j for j in range(self.num_vars) if x[j].solution_value() == 1 + ] + logger.info("Problem solved in %d milliseconds", solver.wall_time()) + logger.info("Problem solved in %d iterations", solver.iterations()) + logger.info( + "Problem solved in %d branch-and-bound nodes", solver.nodes() + ) + + # Use custom formatter as a backup + if not filtered_list: + lp_data = self.format_lp() + filtered_list = SCIPSolver(lp_data, work_dir, verbose=verbose).results + if not filtered_list: + logger.error("SCIP fails... trying GLPK") + filtered_list = GLPKSolver(lp_data, work_dir, verbose=verbose).results + + return filtered_list + + +class AbstractMIPSolver(object): + """ + Base class for LP solvers + """ + + obj_val: float + + def __init__(self, lp_data, work_dir=Work_dir, clean=True, verbose=False): + + self.work_dir = work_dir + self.clean = clean + self.verbose = verbose + + mkdir(work_dir) + + lpfile = op.join(work_dir, "data.lp") # problem instance + logger.debug("Write MIP instance to `%s`", lpfile) + + fw = open(lpfile, "w") + fw.write(lp_data) + fw.close() + + retcode, outfile = self.run(lpfile) + if retcode < 0: + self.results = [] + else: + self.results = self.parse_output(outfile) + + if self.results: + logger.debug("Optimized objective value (%s)", self.obj_val) + + def run(self, lp_data): + raise NotImplementedError + + def parse_output(self): + raise NotImplementedError + + def cleanup(self): + cleanup(self.work_dir) + + +class GLPKSolver(AbstractMIPSolver): + """ + GNU Linear Programming Kit (GLPK) solver, wrapper for calling GLPSOL + """ + + def run(self, lpfile): + + outfile = op.join(self.work_dir, "data.lp.out") # verbose output + listfile = op.join(self.work_dir, "data.lp.list") # simple output + # cleanup in case something wrong happens + cleanup(outfile, listfile) + + cmd = "glpsol --cuts --fpump --lp {0} -o {1} -w {2}".format( + lpfile, outfile, listfile + ) + + outf = None if self.verbose else "/dev/null" + retcode = sh(cmd, outfile=outf) + + if retcode == 127: + logger.error( + "You need to install program `glpsol` [http://www.gnu.org/software/glpk/]" + ) + return -1, None + + return retcode, listfile + + def parse_output(self, listfile, clean=False): + """Extract results from the GLPK output. The GLPK output looks like + + c Problem: + c Rows: 306 + c Columns: 520 + c Non-zeros: 623 + c Status: INTEGER OPTIMAL + c Objective: obj = 23405 (MAXimum) + c + s mip 306 520 o 23405 + i 1 1 + i 2 1 + i 3 1 + i 4 1 + i 5 1 + i 6 1 + ... + """ + fp = open(listfile) + results = [] + expected_cols, observed_cols = 0, 0 + for row in fp: + if row[0] == "s": + s, mip, rows, cols, o, obj_val = row.split() + expected_cols = int(cols) + self.obj_val = int(obj_val) + if row[0] != "j": + continue + observed_cols += 1 + tag, row_id, value = row.split() + assert tag == "j", "Expecting the first field == j" + row_id, value = int(row_id), int(value) + if value == 1: + results.append(row_id - 1) + + assert ( + expected_cols == observed_cols + ), "Number of columns mismatch: expected {}, observed {}".format( + expected_cols, observed_cols + ) + + fp.close() + + if self.clean: + self.cleanup() + + return results + + +class SCIPSolver(AbstractMIPSolver): + """ + SCIP solver, wrapper for calling SCIP executable + """ + + def run(self, lpfile): + + outfile = self.work_dir + "/data.lp.out" # verbose output + cleanup(outfile) + + cmd = "scip -f {0} -l {1}".format(lpfile, outfile) + + outf = None if self.verbose else "/dev/null" + retcode = sh(cmd, outfile=outf) + + if retcode == 127: + logger.error("You need to install program `scip` [http://scip.zib.de/]") + return -1, None + + return retcode, outfile + + def parse_output(self, outfile): + + fp = open(outfile) + for row in fp: + if row.startswith("objective value"): + obj_row = row + break + + results = [] + for row in fp: + """ + objective value: 8 + x1 1 (obj:5) + x2 1 (obj:3) + """ + if row.strip() == "": # blank line ends the section + break + x = row.split()[0] + results.append(int(x[1:]) - 1) # 0-based indexing + + if results: + self.obj_val = flexible_cast(obj_row.split(":")[1].strip()) + + fp.close() + + if self.clean: + self.cleanup() + + return results + + +class LPInstance(object): + """ + CPLEX LP format commonly contains three blocks: + objective, constraints, vars + spec + """ + + def __init__(self): + self.objective = MAXIMIZE + self.sum = "" + self.constraints = [] + self.bounds = [] + self.binaryvars = [] + self.generalvars = [] + + def print_instance(self): + self.handle = fw = StringIO() + print(self.objective, file=fw) + print(self.sum, file=fw) + print(SUBJECTTO, file=fw) + assert self.constraints, "Must contain constraints" + print("\n".join(self.constraints), file=fw) + if self.bounds: + print(BOUNDS, file=fw) + print("\n".join(self.bounds), file=fw) + if self.binaryvars: + print(BINARY, file=fw) + print("\n".join(self.binaryvars), file=fw) + if self.generalvars: + print(GENERNAL, file=fw) + print("\n".join(self.generalvars), file=fw) + print(END, file=fw) + + def add_objective(self, edges, objective=MAXIMIZE): + assert edges, "Edges must be non-empty" + self.objective = objective + items = [ + " + {0}x{1}".format(w, i + 1) for i, (a, b, w) in enumerate(edges) if w + ] + sums = fill(items, width=10) + self.sum = sums + + def add_vars(self, nedges, offset=1, binary=True): + vars = [" x{0}".format(i + offset) for i in range(nedges)] + if binary: + self.binaryvars = vars + else: + self.generalvars = vars + + def lpsolve(self, solver="scip", clean=True): + self.print_instance() + + solver = SCIPSolver if solver == "scip" else GLPKSolver + lp_data = self.handle.getvalue() + self.handle.close() + + g = solver(lp_data, clean=clean) + selected = set(g.results) + try: + obj_val = g.obj_val + except AttributeError: # No solution! + return None, None + return selected, obj_val + + +def has_ortools() -> bool: + """Do we have an installation of OR-tools? + + Returns: + bool: True if installed + """ + try: + from ortools.linear_solver import pywraplp + + return True + except ImportError: + return False + + +def summation(incident_edges): + s = "".join(" + x{0}".format(i + 1) for i in incident_edges) + return s + + +def edges_to_graph(edges): + G = nx.DiGraph() + for e in edges: + a, b = e[:2] + G.add_edge(a, b) + return G + + +def edges_to_path(edges): + """ + Connect edges and return a path. + """ + if not edges: + return None + + G = edges_to_graph(edges) + path = nx.topological_sort(G) + return path + + +def hamiltonian(edges, directed=False): + """ + Calculates shortest path that traverses each node exactly once. Convert + Hamiltonian path problem to TSP by adding one dummy point that has a distance + of zero to all your other points. Solve the TSP and get rid of the dummy + point - what remains is the Hamiltonian Path. + + >>> g = [(1,2), (2,3), (3,4), (4,2), (3,5)] + >>> hamiltonian(g) + [1, 2, 4, 3, 5] + >>> g = [(1,2), (2,3), (1,4), (2,5), (3,6)] + >>> hamiltonian(g) + """ + edges = populate_edge_weights(edges) + _, _, nodes = node_to_edge(edges, directed=False) + if not directed: # Make graph symmetric + dual_edges = edges[:] + for a, b, w in edges: + dual_edges.append((b, a, w)) + edges = dual_edges + + DUMMY = "DUMMY" + dummy_edges = ( + edges + [(DUMMY, x, 0) for x in nodes] + [(x, DUMMY, 0) for x in nodes] + ) + + results = tsp_gurobi(dummy_edges) + if results: + results = [x for x in results if DUMMY not in x] + results = edges_to_path(results) + if not directed: + results = min(results, results[::-1]) + return results + + +def tsp_gurobi(edges): + """ + Modeled using GUROBI python example. + """ + from gurobipy import Model, GRB, quicksum + + edges = populate_edge_weights(edges) + incoming, outgoing, nodes = node_to_edge(edges) + idx = dict((n, i) for i, n in enumerate(nodes)) + nedges = len(edges) + n = len(nodes) + + m = Model() + + def step(x): + return "u_{0}".format(x) + + # Create variables + vars = {} + for i, (a, b, w) in enumerate(edges): + vars[i] = m.addVar(obj=w, vtype=GRB.BINARY, name=str(i)) + for u in nodes[1:]: + u = step(u) + vars[u] = m.addVar(obj=0, vtype=GRB.INTEGER, name=u) + m.update() + + # Bounds for step variables + for u in nodes[1:]: + u = step(u) + vars[u].lb = 1 + vars[u].ub = n - 1 + + # Add degree constraint + for v in nodes: + incoming_edges = incoming[v] + outgoing_edges = outgoing[v] + m.addConstr(quicksum(vars[x] for x in incoming_edges) == 1) + m.addConstr(quicksum(vars[x] for x in outgoing_edges) == 1) + + # Subtour elimination + edge_store = dict(((idx[a], idx[b]), i) for i, (a, b, w) in enumerate(edges)) + + # Given a list of edges, finds the shortest subtour + def subtour(s_edges): + visited = [False] * n + cycles = [] + lengths = [] + selected = [[] for i in range(n)] + for x, y in s_edges: + selected[x].append(y) + while True: + current = visited.index(False) + thiscycle = [current] + while True: + visited[current] = True + neighbors = [x for x in selected[current] if not visited[x]] + if len(neighbors) == 0: + break + current = neighbors[0] + thiscycle.append(current) + cycles.append(thiscycle) + lengths.append(len(thiscycle)) + if sum(lengths) == n: + break + return cycles[lengths.index(min(lengths))] + + def subtourelim(model, where): + if where != GRB.callback.MIPSOL: + return + selected = [] + # make a list of edges selected in the solution + sol = model.cbGetSolution([model._vars[i] for i in range(nedges)]) + selected = [edges[i] for i, x in enumerate(sol) if x > 0.5] + selected = [(idx[a], idx[b]) for a, b, w in selected] + # find the shortest cycle in the selected edge list + tour = subtour(selected) + if len(tour) == n: + return + # add a subtour elimination constraint + c = tour + incident = [edge_store[a, b] for a, b in pairwise(c + [c[0]])] + model.cbLazy(quicksum(model._vars[x] for x in incident) <= len(tour) - 1) + + m.update() + + m._vars = vars + m.params.LazyConstraints = 1 + m.optimize(subtourelim) + + selected = [v.varName for v in m.getVars() if v.x > 0.5] + selected = [int(x) for x in selected if x[:2] != "u_"] + results = ( + sorted(x for i, x in enumerate(edges) if i in selected) if selected else None + ) + return results + + +def tsp(edges, constraint_generation=False): + """ + Calculates shortest cycle that traverses each node exactly once. Also known + as the Traveling Salesman Problem (TSP). + """ + edges = populate_edge_weights(edges) + incoming, outgoing, nodes = node_to_edge(edges) + + nedges, nnodes = len(edges), len(nodes) + L = LPInstance() + + L.add_objective(edges, objective=MINIMIZE) + balance = [] + # For each node, select exactly 1 incoming and 1 outgoing edge + for v in nodes: + incoming_edges = incoming[v] + outgoing_edges = outgoing[v] + icc = summation(incoming_edges) + occ = summation(outgoing_edges) + balance.append("{0} = 1".format(icc)) + balance.append("{0} = 1".format(occ)) + + # Subtour elimination - Miller-Tucker-Zemlin (MTZ) formulation + # + # Desrochers and laporte, 1991 (DFJ) has a stronger constraint + # See also: + # G. Laporte / The traveling salesman problem: Overview of algorithms + start_step = nedges + 1 + u0 = nodes[0] + nodes_to_steps = dict((n, start_step + i) for i, n in enumerate(nodes[1:])) + edge_store = dict((e[:2], i) for i, e in enumerate(edges)) + mtz = [] + for i, e in enumerate(edges): + a, b = e[:2] + if u0 in (a, b): + continue + na, nb = nodes_to_steps[a], nodes_to_steps[b] + con_ab = " x{0} - x{1} + {2}x{3}".format(na, nb, nnodes - 1, i + 1) + if (b, a) in edge_store: # This extra term is the stronger DFJ formulation + j = edge_store[(b, a)] + con_ab += " + {0}x{1}".format(nnodes - 3, j + 1) + con_ab += " <= {0}".format(nnodes - 2) + mtz.append(con_ab) + + # Step variables u_i bound between 1 and n, as additional variables + bounds = [] + for i in range(start_step, nedges + nnodes): + bounds.append(" 1 <= x{0} <= {1}".format(i, nnodes - 1)) + + L.add_vars(nedges) + + """ + Constraint generation seek to find 'cuts' in the LP problem, by solving the + relaxed form. The subtours were then incrementally added to the constraints. + """ + if constraint_generation: + L.constraints = balance + subtours = [] + while True: + selected, obj_val = L.lpsolve() + results = ( + sorted(x for i, x in enumerate(edges) if i in selected) + if selected + else None + ) + if not results: + break + G = edges_to_graph(results) + cycles = list(nx.simple_cycles(G)) + if len(cycles) == 1: + break + for c in cycles: + incident = [edge_store[a, b] for a, b in pairwise(c + [c[0]])] + icc = summation(incident) + subtours.append("{0} <= {1}".format(icc, len(incident) - 1)) + L.constraints = balance + subtours + else: + L.constraints = balance + mtz + L.add_vars(nnodes - 1, offset=start_step, binary=False) + L.bounds = bounds + selected, obj_val = L.lpsolve() + results = ( + sorted(x for i, x in enumerate(edges) if i in selected) + if selected + else None + ) + + return results + + +def path(edges, source, sink, flavor="longest"): + """ + Calculates shortest/longest path from list of edges in a graph + + >>> g = [(1,2,1),(2,3,9),(2,4,3),(2,5,2),(3,6,8),(4,6,10),(4,7,4)] + >>> g += [(6,8,7),(7,9,5),(8,9,6),(9,10,11)] + >>> path(g, 1, 8, flavor="shortest") + ([1, 2, 4, 6, 8], 21) + >>> path(g, 1, 8, flavor="longest") + ([1, 2, 3, 6, 8], 25) + """ + outgoing, incoming, nodes = node_to_edge(edges) + + nedges = len(edges) + L = LPInstance() + + assert flavor in ("longest", "shortest") + + objective = MAXIMIZE if flavor == "longest" else MINIMIZE + L.add_objective(edges, objective=objective) + + # Balancing constraint, incoming edges equal to outgoing edges except + # source and sink + + constraints = [] + for v in nodes: + incoming_edges = incoming[v] + outgoing_edges = outgoing[v] + icc = summation(incoming_edges) + occ = summation(outgoing_edges) + + if v == source: + if not outgoing_edges: + return None + constraints.append("{0} = 1".format(occ)) + elif v == sink: + if not incoming_edges: + return None + constraints.append("{0} = 1".format(icc)) + else: + # Balancing + constraints.append("{0}{1} = 0".format(icc, occ.replace("+", "-"))) + # Simple path + if incoming_edges: + constraints.append("{0} <= 1".format(icc)) + if outgoing_edges: + constraints.append("{0} <= 1".format(occ)) + + L.constraints = constraints + L.add_vars(nedges) + + selected, obj_val = L.lpsolve() + results = ( + sorted(x for i, x in enumerate(edges) if i in selected) if selected else None + ) + results = edges_to_path(results) + + return results, obj_val + + +def min_feedback_arc_set(edges, remove=False, maxcycles=20000): + """ + A directed graph may contain directed cycles, when such cycles are + undesirable, we wish to eliminate them and obtain a directed acyclic graph + (DAG). A feedback arc set has the property that it has at least one edge + of every cycle in the graph. A minimum feedback arc set is the set that + minimizes the total weight of the removed edges; or alternatively maximize + the remaining edges. See: . + + The MIP formulation proceeds as follows: use 0/1 indicator variable to + select whether an edge is in the set, subject to constraint that each cycle + must pick at least one such edge. + + >>> g = [(1, 2, 2), (2, 3, 2), (3, 4, 2)] + [(1, 3, 1), (3, 2, 1), (2, 4, 1)] + >>> min_feedback_arc_set(g) + ([(3, 2, 1)], 1) + >>> min_feedback_arc_set(g, remove=True) # Return DAG + ([(1, 2, 2), (2, 3, 2), (3, 4, 2), (1, 3, 1), (2, 4, 1)], 1) + """ + G = nx.DiGraph() + edge_to_index = {} + for i, (a, b, w) in enumerate(edges): + G.add_edge(a, b) + edge_to_index[a, b] = i + + nedges = len(edges) + L = LPInstance() + + L.add_objective(edges, objective=MINIMIZE) + + constraints = [] + ncycles = 0 + for c in nx.simple_cycles(G): + cycle_edges = [] + rc = c + [c[0]] # Rotate the cycle + for a, b in pairwise(rc): + cycle_edges.append(edge_to_index[a, b]) + cc = summation(cycle_edges) + constraints.append("{0} >= 1".format(cc)) + ncycles += 1 + if ncycles == maxcycles: + break + logger.debug("A total of %d cycles found.", ncycles) + + L.constraints = constraints + L.add_vars(nedges) + + selected, obj_val = L.lpsolve(clean=False) + if remove: + results = ( + [x for i, x in enumerate(edges) if i not in selected] if selected else None + ) + else: + results = ( + [x for i, x in enumerate(edges) if i in selected] if selected else None + ) + + return results, obj_val + + +if __name__ == "__main__": + + import doctest + + doctest.testmod() diff --git a/jcvi/algorithms/matrix.py b/jcvi/algorithms/matrix.py new file mode 100644 index 00000000..2f845af4 --- /dev/null +++ b/jcvi/algorithms/matrix.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Matrix related subroutines +""" + +import math +import numpy as np + + +is_symmetric = lambda M: (M.T == M).all() + + +def compact(A, factor=2): + """Make a matrix compact by a compact_factor. + Reference: + https://stackoverflow.com/questions/36383107/how-to-evaluate-the-sum-of-values-within-array-blocks + + Args: + A (numpy.ndarray): 2D matrix + factor (int, optional): Compact factor. Defaults to 2. + + Example: + >>> A = np.arange(16, dtype=int).reshape(4, 4); A + array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11], + [12, 13, 14, 15]]) + >>> compact(A, factor=2) + array([[10, 18], + [42, 50]]) + >>> compact(A, factor=4) + array([[120]]) + """ + assert len(A.shape) == 2, "Input matrix must be 2D" + rows, cols = A.shape + new_rows = rows // factor * factor + new_cols = cols // factor * factor + if (new_rows, new_cols) != A.shape: + A = A[:new_rows, :new_cols] + A_reshaped = A.reshape(rows // factor, factor, cols // factor, factor) + return np.einsum("ijkl->ik", A_reshaped) + + +def moving_sum(a, window=10): + kernel = np.repeat(1, window) + return np.convolve(a, kernel, mode="same") + + +def moving_average(a, window=10): + kernel = np.repeat(1.0, window) / window + return np.convolve(a, kernel) + + +def chunk_average(a, window=10, offset=None): + # Fixed size window, take average within the window + offset = offset or window + + bins = int(math.ceil((a.size - window) * 1.0 / offset)) + 1 + r = np.zeros((bins,), dtype=np.float) + start = 0 + for i in range(bins): + r[i] = np.average(a[start : start + window]) + start += offset + return r + + +def determine_positions(nodes, edges): + """ + Construct the problem instance to solve the positions of contigs. + + The input for spring_system() is A, K, L, which looks like the following. + A = np.array([[1, -1, 0], [0, 1, -1], [1, 0, -1]]) + K = np.eye(3, dtype=int) + L = np.array([1, 2, 3]) + + For example, A-B distance 1, B-C distance 2, A-C distance 3, solve positions + + >>> determine_positions([0, 1, 2], [(0, 1, 1), (1, 2, 2), (0, 2, 3)]) + array([0, 1, 3]) + """ + N = len(nodes) + E = len(edges) + + A = np.zeros((E, N), dtype=int) + for i, (a, b, distance) in enumerate(edges): + A[i, a] = 1 + A[i, b] = -1 + + K = np.eye(E, dtype=int) + L = np.array([x[-1] for x in edges]) + + s = spring_system(A, K, L) + return np.array([0] + [int(round(x, 0)) for x in s]) + + +def determine_signs(nodes, edges, cutoff=1e-10): + """ + Construct the orientation matrix for the pairs on N molecules. + + >>> determine_signs([0, 1, 2], [(0, 1, 1), (0, 2, -1), (1, 2, -1)]) + array([ 1, 1, -1]) + """ + N = len(nodes) + M = np.zeros((N, N), dtype=float) + for a, b, w in edges: + M[a, b] += w + M = symmetrize(M) + + return get_signs(M, cutoff=cutoff, validate=False) + + +def symmetrize(M): + """ + If M only has a triangle filled with values, all the rest are zeroes, + this function will copy stuff to the other triangle + """ + return M + M.T - np.diag(M.diagonal()) + + +def get_signs(M, cutoff=1e-10, validate=True, ambiguous=True): + """ + Given a numpy array M that contains pairwise orientations, find the largest + eigenvalue and associated eigenvector and return the signs for the + eigenvector. This should correspond to the original orientations for the + individual molecule. In the first example below, let's say 3 molecules A, B + and C, A-B:same direction, A-C:opposite direction, B-C:opposite + direction. The final solution is to flip C. + + >>> M = np.array([[0,1,-1],[1,0,-1],[-1,-1,0]]) + >>> get_signs(M) + array([ 1, 1, -1]) + >>> M = np.array([[0,1,-1],[1,0,0],[-1,0,0]]) + >>> get_signs(M) + array([ 1, 1, -1]) + """ + # Is this a symmetric matrix? + assert is_symmetric(M), "the matrix is not symmetric:\n{0}".format(str(M)) + N, x = M.shape + + # eigh() works on symmetric matrix (Hermitian) + w, v = np.linalg.eigh(M) + m = np.argmax(w) + mv = v[:, m] + f = lambda x: (x if abs(x) > cutoff else 0) + mv = [f(x) for x in mv] + + sign_array = np.array(np.sign(mv), dtype=int) + + # it does not really matter, but we prefer as few flippings as possible + if np.sum(sign_array) < 0: + sign_array = -sign_array + + if validate: + diag = np.eye(N, dtype=int) * sign_array + # final = diag @ M @ diag + final = diag.dot(M).dot(diag) # Python2.7 compatible + # The final result should have all pairwise in the same direction + assert (final >= 0).all(), "result check fails:\n{0}".format(final) + + if not ambiguous: # Do we allow ambiguous orientation (=0) ? + sign_array[sign_array == 0] = 1 + + return sign_array + + +def spring_system(A, K, L): + """ + Solving the equilibrium positions of the objects, linked by springs of + length L, stiffness of K, and connectivity matrix A. Then solving: + + F_nodes = -A'KAx - A'KL = 0 + + In the context of scaffolding, lengths (L) are inferred by mate inserts, + stiffness (K) is inferred via the number of links, connectivity (A) is the + contigs they connect. The mate pairs form the linkages between the contigs, + and can be considered as "springs" of certain lengths. The "springs" are + stretched or compressed if the distance deviates from the expected insert size. + + See derivation from Dayarian et al. 2010. SOPRA paper. + + o---------o--------------o + x0 x1 x2 + |~~~~L1~~~|~~~~~~L2~~~~~~| + |~~~~~~~~~~L3~~~~~~~~~~~~| + + >>> A = np.array([[1, -1, 0], [0, 1, -1], [1, 0, -1]]) + >>> K = np.eye(3, dtype=int) + >>> L = np.array([1, 2, 3]) + >>> spring_system(A, K, L) + array([1., 3.]) + """ + # Linear equation is A'KAx = -A'KL + C = np.dot(A.T, K) + left = np.dot(C, A) + right = -np.dot(C, L) + + left = left[1:, 1:] + right = right[1:] + x = np.linalg.solve(left, right) + + return x + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/jcvi/algorithms/maxsum.py b/jcvi/algorithms/maxsum.py new file mode 100644 index 00000000..8843a0af --- /dev/null +++ b/jcvi/algorithms/maxsum.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Implements the max sum segment algorithm, using Kadane's algorithm, see + + +""" +Infinity = 1e10000 + + +def max_sum(a): + """ + For an input array a, output the range that gives the largest sum + + >>> max_sum([4, 4, 9, -5, -6, -1, 5, -6, -8, 9]) + (17, 0, 2) + >>> max_sum([8, -10, 10, -9, -6, 9, -7, -4, -10, -8]) + (10, 2, 2) + >>> max_sum([10, 1, -10, -8, 6, 10, -10, 6, -3, 10]) + (19, 4, 9) + """ + + max_sum, max_start_index, max_end_index = -Infinity, 0, 0 + current_max_sum = 0 + current_start_index = 0 + for current_end_index, x in enumerate(a): + current_max_sum += x + if current_max_sum > max_sum: + max_sum, max_start_index, max_end_index = ( + current_max_sum, + current_start_index, + current_end_index, + ) + if current_max_sum < 0: + current_max_sum = 0 + current_start_index = current_end_index + 1 + + return max_sum, max_start_index, max_end_index + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + import numpy as np + + A = np.random.random_integers(-10, 10, 10) + print("max_sum(%s)" % list(A)) + print(max_sum(A)) diff --git a/jcvi/algorithms/supermap.py b/jcvi/algorithms/supermap.py new file mode 100755 index 00000000..e4f54bad --- /dev/null +++ b/jcvi/algorithms/supermap.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog infile [options] + +This script combines pairwise alignments, sort and filter the alignments. +Infile expect BLAST tabular format (-m8) or nucmer .coords file. + +In order to handle dups, we have to run two monotonic chains in both genomes, +first chain using ref, and a second chain using query and we will have options +to keep either the union or the intersection of retain chained alignments from +both genomes, similar to the SUPERMAP algorithm. This operation is symmetrical. +""" +import sys + +from ..apps.base import OptionParser, logger +from ..formats.blast import BlastLine +from ..formats.coords import CoordsLine +from ..utils.range import Range, range_chain + + +def BlastOrCoordsLine(filename, filter="ref", dialect="blast", clip=0): + allowed_filters = ("ref", "query") + REF, QUERY = range(len(allowed_filters)) + + allowed_dialects = ("blast", "coords") + BLAST, COORDS = range(len(allowed_dialects)) + + assert filter in allowed_filters + filter = allowed_filters.index(filter) + + assert dialect in allowed_dialects + dialect = allowed_dialects.index(dialect) + + fp = open(filename) + for i, row in enumerate(fp): + if row[0] == "#": + continue + if dialect == BLAST: + b = BlastLine(row) + if filter == QUERY: + query, start, end = b.query, b.qstart, b.qstop + else: + query, start, end = b.subject, b.sstart, b.sstop + else: + try: + b = CoordsLine(row) + except AssertionError: + continue + + if filter == QUERY: + query, start, end = b.query, b.start2, b.end2 + else: + query, start, end = b.ref, b.start1, b.end1 + + if start > end: + start, end = end, start + + if clip: + # clip cannot be more than 5% of the range + r = end - start + 1 + cc = min(0.05 * r, clip) + start = start + cc + end = end - cc + + yield Range(query, start, end, b.score, i) + + +def supermap(blast_file, filter="intersection", dialect="blast", clip=0): + # filter by query + if filter != "ref": + logger.debug("filter by query") + ranges = list( + BlastOrCoordsLine(blast_file, filter="query", dialect=dialect, clip=clip) + ) + + query_selected, query_score = range_chain(ranges) + query_idx = set(x.id for x in query_selected) + + # filter by ref + if filter != "query": + logger.debug("filter by ref") + ranges = list( + BlastOrCoordsLine(blast_file, filter="ref", dialect=dialect, clip=clip) + ) + + ref_selected, ref_score = range_chain(ranges) + ref_idx = set(x.id for x in ref_selected) + + if filter == "ref": + selected_idx = ref_idx + + elif filter == "query": + selected_idx = query_idx + + elif filter == "intersection": + logger.debug("perform intersection") + selected_idx = ref_idx & query_idx + + elif filter == "union": + logger.debug("perform union") + selected_idx = ref_idx | query_idx + + assert len(selected_idx) != 0 + + # selected_idx is in fact the lineno in the BLAST file + fp = open(blast_file) + + if filter == "intersection": + tag = "" + else: + tag = "." + filter + supermapfile = blast_file + tag + ".supermap" + fw = open(supermapfile, "w") + + selected_idx = iter(sorted(selected_idx)) + selected = next(selected_idx) + for i, row in enumerate(fp): + if i < selected: + continue + print(row.rstrip(), file=fw) + try: + selected = next(selected_idx) + except StopIteration: + break + + logger.debug("Write output file to `{0}`".format(supermapfile)) + fw.close() + + from jcvi.formats.blast import sort + + ofilter = "ref" if filter == "ref" else "query" + args = [supermapfile, "--" + ofilter] + if dialect == "coords": + args += ["--coords"] + + sort(args) + + return supermapfile + + +if __name__ == "__main__": + + p = OptionParser(__doc__) + + filter_choices = ("ref", "query", "intersection", "union") + dialect_choices = ("blast", "coords") + p.add_argument( + "--filter", + choices=filter_choices, + default="intersection", + help="Available filters", + ) + p.add_argument("--dialect", choices=dialect_choices, help="Input format") + p.add_argument( + "--clip", + default=0, + type=int, + help="Clip ranges so that to allow minor overlaps", + ) + + opts, args = p.parse_args() + + if len(args) != 1: + sys.exit(p.print_help()) + + (blast_file,) = args + + dialect = opts.dialect + if not dialect: + # guess from the suffix + dialect = "coords" if blast_file.endswith(".coords") else "blast" + logger.debug("dialect is %s" % dialect) + + supermap(blast_file, filter=opts.filter, dialect=dialect, clip=opts.clip) diff --git a/jcvi/algorithms/tsp.py b/jcvi/algorithms/tsp.py new file mode 100644 index 00000000..2288d25d --- /dev/null +++ b/jcvi/algorithms/tsp.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +TSP solver using Concorde or OR-tools. This is much faster than the LP-formulation in +algorithms.lpsolve.tsp(). See also: +https://developers.google.com/optimization/routing/tsp +""" +import os.path as op + +from collections import defaultdict +from dataclasses import dataclass +from itertools import combinations + +import numpy as np + +from more_itertools import pairwise + +from jcvi.formats.base import must_open +from jcvi.apps.base import cleanup, logger, mkdir, sh, which + + +INF = 10000 +NEG_INF = -INF +Work_dir = "tsp_work" + + +@dataclass +class TSPDataModel: + edges: list # List of tuple (source, target, weight) + + def distance_matrix(self, precision=0) -> tuple: + """Compute the distance matrix + + Returns: + np.array: Numpy square matrix with integer entries as distance + """ + _, _, nodes = node_to_edge(self.edges, directed=False) + nodes_indices = dict((n, i) for i, n in enumerate(nodes)) + nnodes = len(nodes) + + # TSPLIB requires explicit weights to be integral, and non-negative + weights = [x[-1] for x in self.edges] + max_x, min_x = max(weights), min(weights) + inf = 2 * max(abs(max_x), abs(min_x)) + factor = 10**precision + logger.debug( + "TSP rescale: max_x=%d, min_x=%d, inf=%d, factor=%d", + max_x, + min_x, + inf, + factor, + ) + + D = np.ones((nnodes, nnodes), dtype=float) * inf + for a, b, w in self.edges: + ia, ib = nodes_indices[a], nodes_indices[b] + D[ia, ib] = D[ib, ia] = w + D = (D - min_x) * factor + D = D.astype(int) + return D, nodes + + def solve(self, time_limit=5, concorde=False, precision=0) -> list: + """Solve the TSP instance. + + Args: + time_limit (int, optional): Time limit to run. Default to 5 seconds. + concorde (bool, optional): Shall we run concorde? Defaults to False. + precision (int, optional): Float precision of distance. Defaults to 0. + + Returns: + list: Ordered list of node indices to visit + """ + if concorde: + return Concorde(self, precision=precision).tour + + # Use OR-tools + from ortools.constraint_solver import routing_enums_pb2 + from ortools.constraint_solver import pywrapcp + + D, nodes = self.distance_matrix(precision) + nnodes = len(nodes) + + # Create the routing index manager + manager = pywrapcp.RoutingIndexManager(nnodes, 1, 0) + + # Create routing model + routing = pywrapcp.RoutingModel(manager) + + def distance_callback(from_index, to_index): + """Returns the distance between the two nodes.""" + from_node = manager.IndexToNode(from_index) + to_node = manager.IndexToNode(to_index) + return D[from_node, to_node] + + transit_callback_index = routing.RegisterTransitCallback(distance_callback) + + # Define cost of each arc + routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index) + + # Search strategy + search_parameters = pywrapcp.DefaultRoutingSearchParameters() + search_parameters.local_search_metaheuristic = ( + routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH + ) + search_parameters.time_limit.seconds = time_limit + + # Solve the problem + solution = routing.SolveWithParameters(search_parameters) + + tour = [] + logger.info("Objective: %d", solution.ObjectiveValue()) + index = routing.Start(0) + route_distance = 0 + while not routing.IsEnd(index): + tour.append(manager.IndexToNode(index)) + previous_index = index + index = solution.Value(routing.NextVar(index)) + route_distance = routing.GetArcCostForVehicle(previous_index, index, 0) + logger.info("Route distance: %d", route_distance) + + return [nodes[x] for x in tour] + + +class Concorde(object): + def __init__( + self, + data: TSPDataModel, + work_dir=Work_dir, + clean=True, + verbose=False, + precision=0, + seed=666, + ): + """Run concorde on TSP instance + + Args: + data (TSPDataModel): TSP instance with edge weights + work_dir ([type], optional): Path to the work dir. Defaults to Work_dir. + clean (bool, optional): Clean up intermediate results. Defaults to True. + verbose (bool, optional): Show verbose messages. Defaults to False. + precision (int, optional): Float precision of distance. Defaults to 0. + seed (int, optional): Random seed. Defaults to 666. + """ + self.data = data + self.work_dir = work_dir + self.clean = clean + self.verbose = verbose + + mkdir(work_dir) + tspfile = op.join(work_dir, "data.tsp") + self.print_to_tsplib(tspfile, precision=precision) + _, outfile = self.run_concorde(tspfile, seed=seed) + self.tour = self.parse_output(outfile) + + if clean: + cleanup(work_dir) + residual_output = ["data.sol", "data.res", "Odata.res"] + cleanup(residual_output) + + def print_to_tsplib(self, tspfile, precision=0): + """ + See TSPlib format: + + + NAME: bayg29 + TYPE: TSP + COMMENT: 29 Cities in Bavaria, geographical distances + DIMENSION: 29 + EDGE_WEIGHT_TYPE: EXPLICIT + EDGE_WEIGHT_FORMAT: UPPER_ROW + DISPLAY_DATA_TYPE: TWOD_DISPLAY + EDGE_WEIGHT_SECTION + (... numbers ...) + """ + fw = must_open(tspfile, "w") + D, nodes = self.data.distance_matrix(precision) + self.nodes = nodes + self.nnodes = len(nodes) + + print("NAME: data", file=fw) + print("TYPE: TSP", file=fw) + print("DIMENSION: {}".format(self.nnodes), file=fw) + print("EDGE_WEIGHT_TYPE: EXPLICIT", file=fw) + print("EDGE_WEIGHT_FORMAT: FULL_MATRIX", file=fw) + print("EDGE_WEIGHT_SECTION", file=fw) + + for row in D: # Dump the full matrix + print(" " + " ".join(str(x) for x in row), file=fw) + + print("EOF", file=fw) + fw.close() + logger.debug("Write TSP instance to `%s`", tspfile) + + def run_concorde(self, tspfile, seed=666): + outfile = op.join(self.work_dir, "data.sol") + cleanup(outfile) + + cc = "concorde" + assert which(cc), ( + "You must install `concorde` on your PATH" + + " [http://www.math.uwaterloo.ca/tsp/concorde.html]" + ) + cmd = "{0} -s {1} -x -o {2} {3}".format(cc, seed, outfile, tspfile) + + outf = None if self.verbose else "/dev/null" + retcode = sh(cmd, outfile=outf, errfile=outf) + return retcode, outfile + + def parse_output(self, outfile): + fp = open(outfile) + dimension = int(next(fp).strip()) # header + assert dimension == self.nnodes + tour = [] + for row in fp: + tour += [int(x) for x in row.split()] + tour = [self.nodes[x] for x in tour] + return tour + + +def node_to_edge(edges, directed=True): + """ + From list of edges, record per node, incoming and outgoing edges + """ + outgoing = defaultdict(set) + incoming = defaultdict(set) if directed else outgoing + nodes = set() + for i, edge in enumerate(edges): + ( + a, + b, + ) = edge[:2] + outgoing[a].add(i) + incoming[b].add(i) + nodes.add(a) + nodes.add(b) + nodes = list(nodes) + return outgoing, incoming, nodes + + +def populate_edge_weights(edges): + # assume weight is 1 if not specified + new_edges = [] + for e in edges: + assert len(e) in (2, 3) + if len(e) == 2: + a, b = e + w = 1 + else: + a, b, w = e + new_edges.append((a, b, w)) + return new_edges + + +def hamiltonian(edges, directed=False, time_limit=5, concorde=False, precision=0): + """ + Calculates shortest path that traverses each node exactly once. Convert + Hamiltonian path problem to TSP by adding one dummy point that has a distance + of zero to all your other points. Solve the TSP and get rid of the dummy + point - what remains is the Hamiltonian Path. + + >>> g = [(1,2), (2,3), (3,4), (4,2), (3,5)] + >>> hamiltonian(g) + [1, 2, 4, 3, 5] + >>> hamiltonian([(1, 2), (2, 3)], directed=True) + [1, 2, 3] + """ + edges = populate_edge_weights(edges) + _, _, nodes = node_to_edge(edges, directed=False) + DUMMY = "DUMMY" + dummy_edges = edges + [(DUMMY, x, 0) for x in nodes] + if directed: + dummy_edges += [(x, DUMMY, 0) for x in nodes] + dummy_edges = reformulate_atsp_as_tsp(dummy_edges) + + tour = tsp( + dummy_edges, time_limit=time_limit, concorde=concorde, precision=precision + ) + + dummy_index = tour.index(DUMMY) + tour = tour[dummy_index:] + tour[:dummy_index] + if directed: + dummy_star_index = tour.index((DUMMY, "*")) + assert dummy_star_index in (1, len(tour) - 1), tour + if dummy_star_index == len(tour) - 1: # need to flip + tour = tour[1:] + tour[:1] + tour = tour[::-1] + path = tour[1:] + path = [x for x in path if not isinstance(x, tuple)] + else: + path = tour[1:] + + return path + + +def tsp(edges, time_limit=5, concorde=False, precision=0) -> list: + """Compute TSP solution + + Args: + edges (list): List of tuple (source, target, weight) + time_limit (int, optional): Time limit to run. Default to 5 seconds. + concorde (bool, optional): Shall we run concorde? Defaults to False. + precision (int, optional): Float precision of distance. Defaults to 0. + + Returns: + list: List of nodes to visit + """ + data = TSPDataModel(edges) + return data.solve(time_limit=time_limit, concorde=concorde, precision=precision) + + +def reformulate_atsp_as_tsp(edges): + """ + To reformulate the ATSP as a TSP, for each city a dummy city (e.g, for New + York, a dummy city New York* is added. Between each city and its + corresponding dummy city a negative or very small distance with value cheap + is used. This makes sure that each cities always occurs in the solution + together with its dummy city. The original distances are used between the + cities and the dummy cities, where each city is responsible for the distance + going to the city and the dummy city is responsible for the distance coming + from the city. The distances between all cities and the distances between + all dummy cities are set to infeasible. + """ + _, _, nodes = node_to_edge(edges, directed=False) + new_edges = [] + for a, b, w in edges: + new_edges.append(((a, "*"), b, w)) + for n in nodes: + new_edges.append((n, (n, "*"), NEG_INF)) # A negative weight + return new_edges + + +def make_data(N, directed=False): + x = np.random.randn(N) + y = np.random.randn(N) + xy = list(zip(x, y)) + M = np.zeros((N, N), dtype=float) + for ia, ib in combinations(range(N), 2): + ax, ay = xy[ia] + bx, by = xy[ib] + d = ((ax - bx) ** 2 + (ay - by) ** 2) ** 0.5 + M[ia, ib] = M[ib, ia] = d + + edges = [] + for ia, ib in combinations(range(N), 2): + edges.append((ia, ib, M[ia, ib])) + if directed: + edges.append((ib, ia, M[ib, ia])) + + return x, y, M, edges + + +def evaluate(tour, M): + score = 0 + for ia, ib in pairwise(tour): + score += M[ia, ib] + return score + + +def plot_data(x, y, tour, M): + from jcvi.graphics.base import plt, savefig + + plt.plot(x, y, "ro") + for ia, ib in pairwise(tour): + plt.plot((x[ia], x[ib]), (y[ia], y[ib]), "r-") + + score = evaluate(tour, M) + plt.title("Score={0:.2f}".format(score)) + + savefig("demo.pdf") + + +def concorde_demo(POINTS=100): + x, y, M, edges = make_data(POINTS) + ctour = hamiltonian(edges, precision=3) + plot_data(x, y, ctour, M) + + +def compare_lpsolve_to_concorde(POINTS=80, directed=False): + from jcvi.algorithms.lpsolve import hamiltonian as lhamiltonian + + _, _, M, edges = make_data(POINTS, directed=directed) + ltour = lhamiltonian(edges, directed=directed) + print(ltour, evaluate(ltour, M)) + + ctour = hamiltonian(edges, directed=directed, precision=3) + print(ctour, evaluate(ctour, M)) + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/jcvi/annotation/__init__.py b/jcvi/annotation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/annotation/__main__.py b/jcvi/annotation/__main__.py new file mode 100644 index 00000000..74cb9a18 --- /dev/null +++ b/jcvi/annotation/__main__.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Collection of scripts to run gene finders, execute annotation pipelines, perform QC checks and generate summary statistics +""" + + +from ..apps.base import dmain + + +if __name__ == "__main__": + dmain(__file__) diff --git a/jcvi/annotation/ahrd.py b/jcvi/annotation/ahrd.py new file mode 100644 index 00000000..b8b28208 --- /dev/null +++ b/jcvi/annotation/ahrd.py @@ -0,0 +1,708 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Utility to run Automated Human Readable Description (AHRD) pipeline. + + +""" +import os.path as op +import sys +import re + +from os import symlink + +from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir, glob +from ..formats.base import must_open + + +# --- Compiled RegExps ---- +# Cellular locations +loc_pat = re.compile(r",\s*(chloroplastic|cytoplasmic|mitochondrial).*?\s$", re.I) +# Any word that matches e.g. Os02g0234800 +osg_pat = re.compile(r"\bOs\d{2}g\d{7}.*?\s", re.I) +# (fragment) +frag_pat = re.compile(r"\(fragment[s]?\)", re.I) +# Trailing protein numeric copy (e.g. Myb 1) +trail_pat = re.compile(r"(? ' +apos_pat = re.compile(r"'?") + +# > => none +gt_pat = re.compile(r">") + +# -like to -like protein +like_pat = re.compile(r"[-]like$", re.I) + +# 'repeat$' to 'repeat protein' +repeat_pat = re.compile(r"repeat$", re.I) + +# re used by the following 3 cases +Protein_pat = re.compile(r"Protein\s+", re.I) + +# 'binding$' to 'binding protein' +binding_pat = re.compile(r"binding$", re.I) + +# 'domain$' to 'domain-containing protein' +domain_pat = re.compile(r"domain$", re.I) + +# 'related$' to '-like protein' +related_pat = re.compile(r"[,\s+]*[\s+|-]*related$", re.I) + +# '[0-9]+ homolog' to '-like protein' +homolog_pat1 = re.compile(r"(? sulfur +# sulph -> sulf +sulfer_pat = re.compile(r"sulfer") +sulph_pat = re.compile(r"sulph") + +# monoxy to monooxy +monoxy_pat = re.compile(r"monoxy") + +# proteine to protein +proteine_pat = re.compile(r"proteine") + +# signalling to signaling +signalling_pat = re.compile(r"signalling") + +# aluminium to aluminum +aluminium_pat = re.compile(r"aluminium", re.I) + +# haem to heme +# haemo to hemo +haem_pat = re.compile(r"\bhaem\b", re.I) +haemo_pat = re.compile(r"haemo", re.I) + +# assessory -> accessory +assessory_pat = re.compile(r"assessory") + +# british to american spelling conversion +# -ise -> -ize +# -ised -> -ized +# -isation -> -ization +# -bre -> -ber +ise_pat = re.compile(r"\b([A-z]+)ise([d]?)\b") +isation_pat = re.compile(r"\b([A-z]+)isation\b") +bre_pat = re.compile(r"\b([A-z]+)bre\b") + +# /with \S+ and \S+/ pattern +# /, and \S+/ pattern +# identify names with two domains +with_and_pat = re.compile(r"[with|,]\s*\S+and\S+") + +Template = """ +proteins_fasta: {2} +token_score_bit_score_weight: {4} +token_score_database_score_weight: {5} +token_score_overlap_score_weight: {6} +description_score_relative_description_frequency_weight: 0.6 +output: {3} +blast_dbs: + swissprot: + weight: 100 + file: ./swissprot/{1}.swissprot.tab + database: ./dbs/swissprot.fasta + blacklist: {0}/blacklist_descline.txt + filter: {0}/filter_descline_sprot.txt + token_blacklist: {0}/blacklist_token.txt + description_score_bit_score_weight: 0.2 + + tair: + weight: 50 + file: ./tair/{1}.tair.tab + database: ./dbs/tair.fasta + blacklist: {0}/blacklist_descline.txt + filter: {0}/filter_descline_tair.txt + fasta_header_regex: "^>(?[aA][tT][0-9mMcC][gG]\\\\d+(\\\\.\\\\d+)?)\\\\s+\\\\|[^\\\\|]+\\\\|\\\\s+(?[^\\\\|]+)(\\\\s*\\\\|.*)?$" + short_accession_regex: "^(?.+)$" + token_blacklist: {0}/blacklist_token.txt + description_score_bit_score_weight: 0.4 + + trembl: + weight: 10 + file: ./trembl/{1}.trembl.tab + database: ./dbs/trembl.fasta + blacklist: {0}/blacklist_descline.txt + filter: {0}/filter_descline_trembl.txt + token_blacklist: {0}/blacklist_token.txt + description_score_bit_score_weight: 0.4 +{7} +""" + +iprscanTemplate = """ +interpro_database: ./interpro.xml +interpro_result: {0} +""" + +# Necessary for the script to know the location of `interpro.xml` and `interpro.dtd` +iprscan_datadir = "/usr/local/devel/ANNOTATION/iprscan/iprscan_v4.7/data" + + +def main(): + + actions = ( + ("batch", "batch run AHRD"), + ("merge", "merge AHRD run results"), + ("fix", "fix AHRD names"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +Unknown = "Unknown protein" +Hypothetical = "hypothetical protein" + + +def read_interpro(ipr): + store = {} + fp = open(ipr) + # Aco000343.1 0d98a55eb3399a408e06252a2e24efcf 2083 Pfam + # PF00476 DNA polymerase family A 1685 2075 1.70E-55 T + # 10-10-2014 IPR001098 "DNA-directed DNA polymerase, family A, + # palm domain" GO:0003677|GO:0003887|GO:0006260 KEGG: + # 00230+2.7.7.7|KEGG: 00240+2.7.7.7 + for row in fp: + ( + accession, + md5, + seqlen, + analysis, + signature, + signature_description, + start, + stop, + score, + status, + date, + interpro, + interpro_description, + GO, + pathway, + ) = row.split("\t") + accession = accession.split(".")[0] + interpro_description = interpro_description.replace('"', "") + pathway = pathway.strip() + if accession not in ipr: + store[accession] = (interpro, interpro_description, GO, pathway) + return store + + +def fix_text(s, ignore_sym_pat=False): + + if not ignore_sym_pat: + # Fix descriptions like D7TDB1 ( + s = re.sub(r"([A-Z0-9]){6} \(", "", s) + s = s.split(";")[0] + + # Fix parantheses containing names + s = s.strip("[]") + s = s.replace("(-)", "[-]") + s = s.replace("(+)", "[+]") + s = s.replace("(Uncharacterized protein)", "") + if not ignore_sym_pat: + s = s.strip("()") + + # fix minor typos, seen in `autonaming` output + # change 'protei ' to 'protein ' + # change 'hypthetical' to 'hypothetical' + # fix string starting with 'ytochrome' + if "protei " in s: + s = s.replace("protei ", "protein ") + if "hypthetical" in s: + s = s.replace("hypthetical", "hypothetical") + if s.startswith("ytochrome"): + s = s.replace("ytochrome", "cytochrome") + + # before trimming off at the first ";", check if name has glycosidic + # linkage information (e.g 1,3 or 1,4). If so, also check if multiple + # linkages are separated by ";". If so, replace ";" by "-" + m = re.findall(glycosidic_link_pat, s) + if m and ";" in s: + s = re.sub(r";\s*", "-", s) + + # remove underscore from description + s = re.sub("_", " ", s) + + # Cellular locations + # Any word that matches e.g. AT5G54690 + # Any word that matches e.g. Os02g0234800 + # (fragment) + # UPF + # Remove 'DDB_G\d+' ID + # '_At[0-9]+g[0-9]+' to '' + for pat in (loc_pat, osg_pat, frag_pat, upf_pat, ddb_pat): + # below is a hack since word boundaries don't work on / + s = s.strip() + " " + s = re.sub(pat, "", s) + + # '? => ' + s = re.sub(apos_pat, "'", s) + # > => none + s = re.sub(gt_pat, "", s) + # reduce runs such as -- ''' + s = re.sub(r"[-]+", "-", s) + s = re.sub(r"[']+", "'", s) + + s = s.strip() + + # -like to -like protein + s = re.sub(like_pat, "-like protein", s) + + # 'repeat$' to 'repeat protein' + if re.search(repeat_pat, s): + s += "-containing protein" + + # 'binding$' to 'binding protein' + if re.search(binding_pat, s): + s += " protein" + if re.match(Protein_pat, s): + s = re.sub(Protein_pat, "", s) + + # 'domain$' to 'domain-containing protein' + if re.search(domain_pat, s): + s += "-containing protein" + if re.search(r"-domain", s): + s = re.sub(r"-domain", " domain", s) + if re.match(Protein_pat, s): + s = re.sub(Protein_pat, "", s) + + # 'related$' to '-like protein' + if re.search(related_pat, s): + s = re.sub(related_pat, "-like protein", s) + if re.match(Protein_pat, s) and not re.match(r"Protein kinase", s): + s = re.sub(Protein_pat, "", s) + + # '[0-9]+ homolog' to '-like protein' + if re.search(homolog_pat1, s): + s = re.sub(homolog_pat1, "-like protein", s) + if re.match(Protein_pat, s): + s = re.sub(Protein_pat, "", s) + + # 'Protein\s+(.*)\s+homolog' to '$1-like protein' + match = re.search(homolog_pat2, s) + if match and not re.match(r"Protein kinase", s): + ret = match.group(1) + s = re.sub(homolog_pat2, ret + "-like protein", s) + s = re.sub(r"^\s+", "", s) + s = s.capitalize() + + # 'homolog protein' to '-like protein' + # 'homologue$' to '-like protein' + # 'homolog$' to '-like protein' + for pat in (homolog_pat3, homolog_pat5, homolog_pat6): + if re.search(pat, s): + s = re.sub(pat, "-like protein", s) + + # 'Agenet domain-containing protein / bromo-adjacent homology (BAH) domain-containing protein' + # to 'Agenet and bromo-adjacent homology (BAH) domain-containing protein' + if re.search(agenet_pat, s): + s = re.sub(agenet_pat, "Agenet and ", s) + + # plural to singular + if re.search(plural_pat, s): + if (s.find("biogenesis") == -1 and s.find("Topors") == -1) or ( + not re.search(with_and_pat, s) + ): + s = re.sub(r"s$", "", s) + + # 'like_TBP' or 'likeTBP' to 'like TBP' + if re.search(tbp_pat, s): + s = re.sub(tbp_pat, "like TBP", s) + + # 'protein protein' to 'protein' + if re.search(prot_pat, s): + s = re.sub(prot_pat, "protein", s) + + # 'dimerisation' to 'dimerization' + if re.search(dimer_pat, s): + s = re.sub(dimer_pat, "dimerization", s) + + # Any AHRD that matches e.g. "AT5G54690-like protein" + # Any AHRD that contains the words '^Belongs|^Encoded|^Expression|^highly' + for pat in (atg_pat, athila_pat1): + if re.search(pat, s): + s = Unknown + + # remove 'arabidopsis[ thaliana]' and/or embedded Atg IDs + for pat in (atg_id_pat, athila_pat2, athila_pat3, athila_pat4): + # below is a hack since word boundaries don't work on / + s = s.strip() + " " + s = re.sub(pat, "", s) + + # remove "\s+LENGTH=\d+" from TAIR deflines + if re.search(length_pat, s): + s = re.sub(length_pat, "", s) + + # if name has a dot followed by a space (". ") in it and contains multiple + # parts separated by a comma, strip name starting from first occurrence of "," + if re.search(r"\. ", s): + if re.search(r",", s): + s = s.split(",")[0] + + # if name contains any of the disallowed words, + # remove word occurrence from name + # if name contains references to any other organism, trim name upto + # that occurrence + for pat in (disallow_pat, organism_pat): + if re.search(pat, s): + s = re.sub(pat, "", s) + + s = s.strip() + + if not ignore_sym_pat: + # 'homolog \d+' to '-like protein' + if re.search(homolog_pat4, s): + s = re.sub(homolog_pat4, "", s) + + # Trailing protein numeric copy (e.g. Myb 1) + if re.search(trail_pat, s): + s = re.sub(trail_pat, "", s) + + # if name is entirely a gene symbol-like (all capital letters, maybe followed by numbers) + # add a "-like protein" at the end + if (re.search(sym_pat, s) or re.search(lc_sym_pat, s)) and not re.search( + spada_pat, s + ): + s = s + "-like protein" + + # if gene symbol in parantheses at EOL, remove symbol + if re.search(eol_sym_pat, s): + s = re.sub(eol_sym_pat, "", s) + + # if name terminates at a symbol([^A-Za-z0-9_]), trim it off + if re.search(r"\W+$", s) and not re.search(r"\)$", s): + s = re.sub(r"\W+$", "", s) + + if "uncharacterized" in s: + s = "uncharacterized protein" + + # change sulfer to sulfur + if re.search(sulfer_pat, s): + s = re.sub(sulfer_pat, "sulfur", s) + + # change sulph to sulf + if re.search(sulph_pat, s): + s = re.sub(sulph_pat, "sulf", s) + + # change monoxy to monooxy + if re.search(monoxy_pat, s): + s = re.sub(monoxy_pat, "monooxy", s) + + # change proteine to protein + if re.search(proteine_pat, s): + s = re.sub(proteine_pat, "protein", s) + + # change signalling to signaling + if re.search(signalling_pat, s): + s = re.sub(signalling_pat, "signaling", s) + + # change aluminium to aluminum + if re.search(aluminium_pat, s): + s = re.sub(aluminium_pat, "aluminum", s) + + # change haem to heme + if re.search(haem_pat, s): + s = re.sub(haem_pat, "heme", s) + + # chage haemo to hemo + if re.search(haemo_pat, s): + s = re.sub(haemo_pat, "hemo", s) + + # change assessory to accessory + if re.search(assessory_pat, s): + s = re.sub(assessory_pat, "accessory", s) + + # change -ise/-ised/-isation to -ize/-ized/-ization + match = re.search(ise_pat, s) + if match: + ret = match.group(1) + if match.group(2): + suff = match.group(2) + s = re.sub(ise_pat, "{0}ize{1}".format(ret, suff), s) + else: + s = re.sub(ise_pat, "{0}ize".format(ret), s) + + match = re.search(isation_pat, s) + if match: + ret = match.group(1) + s = re.sub(isation_pat, "{0}ization".format(ret), s) + + # change -bre to -ber + match = re.search(bre_pat, s) + if match: + ret = match.group(1) + s = re.sub(bre_pat, "{0}ber".format(ret), s) + + if not s.startswith(Hypothetical): + # 'Candidate|Hypothetical|Novel|Predicted|Possible|Probable|Uncharacterized' to 'Putative' + if s.startswith("Uncharacterized") and any( + pat in s for pat in ("UCP", "UPF", "protein") + ): + pass + else: + if re.search(put_pat, s): + s = re.sub(put_pat, "Putative", s) + + sl = s.lower() + + # Any mention of `clone` or `contig` is not informative + if "clone" in sl or "contig" in sl: + s = Unknown + + # All that's left is `protein` is not informative + if sl in ("protein", "protein, putative", ""): + s = Unknown + + if Unknown.lower() in sl: + s = Unknown + + if "FUNCTIONS IN".lower() in sl and "unknown" in sl: + s = Unknown + + if "LOCATED IN".lower() in sl: + s = Unknown + + s = re.sub(r"[,]*\s+putative$", "", s) + + if s == Unknown or s.strip() == "protein": + s = Hypothetical + + # Compact all spaces + s = " ".join(s.split()) + + assert s.strip() + + return s + + +def fix(args): + """ + %prog fix ahrd.csv > ahrd.fixed.csv + + Fix ugly names from Uniprot. + """ + p = OptionParser(fix.__doc__) + p.add_argument( + "--ignore_sym_pat", + default=False, + action="store_true", + help="Do not fix names matching symbol patterns i.e." + + " names beginning or ending with gene symbols or a series of numbers." + + " e.g. `ARM repeat superfamily protein`, `beta-hexosaminidase 3`," + + " `CYCLIN A3;4`, `WALL ASSOCIATED KINASE (WAK)-LIKE 10`", + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + (csvfile,) = args + fp = open(csvfile) + fw = must_open(opts.outfile, "w") + for row in fp: + if row[0] == "#": + continue + if row.strip() == "": + continue + atoms = row.rstrip("\r\n").split("\t") + name, hit, ahrd_code, desc = ( + atoms[:4] if len(atoms) > 2 else (atoms[0], None, None, atoms[-1]) + ) + + newdesc = fix_text(desc, ignore_sym_pat=opts.ignore_sym_pat) + if hit and hit.strip() != "" and newdesc == Hypothetical: + newdesc = "conserved " + newdesc + print("\t".join(atoms[:4] + [newdesc] + atoms[4:]), file=fw) + + +def merge(args): + """ + %prog merge output/*.csv > ahrd.csv + + Merge AHRD results, remove redundant headers, empty lines, etc. If there are + multiple lines containing the same ID (first column). Then whatever comes + the first will get retained. + """ + p = OptionParser(merge.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + csvfiles = args + cf = csvfiles[0] + fp = open(cf) + for row in fp: + if row.startswith("Protein"): + break + header = row.rstrip() + print(header) + + seen = set() + for cf in csvfiles: + fp = open(cf) + for row in fp: + if row[0] == "#": + continue + if row.strip() == "": + continue + if row.strip() == header: + continue + + atoms = row.rstrip().split("\t") + id = atoms[0] + if id in seen: + logger.error("ID `%s` ignored.", id) + continue + + seen.add(id) + print(row.strip()) + + +def batch(args): + """ + %prog batch splits output + + The arguments are two folders. + Input FASTA sequences are in splits/. + Output csv files are in output/. + + Must have folders swissprot/, tair/, trembl/ that contains the respective + BLAST output. Once finished, you can run, for example: + + $ parallel java -Xmx2g -jar ~/code/AHRD/dist/ahrd.jar {} ::: output/*.yml + """ + p = OptionParser(batch.__doc__) + + ahrd_weights = {"blastp": [0.5, 0.3, 0.2], "blastx": [0.6, 0.4, 0.0]} + blast_progs = tuple(ahrd_weights.keys()) + + p.add_argument( + "--path", + default="~/code/AHRD/", + help="Path where AHRD is installed", + ) + p.add_argument( + "--blastprog", + default="blastp", + choices=blast_progs, + help="Specify the blast program being run. Based on this option," + + " the AHRD parameters (score_weights) will be modified", + ) + p.add_argument( + "--iprscan", + default=None, + help="Specify path to InterProScan results file if available." + + " If specified, the yml conf file will be modified" + + " appropriately", + ) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + splits, output = args + mkdir(output) + + bit_score, db_score, ovl_score = ahrd_weights[opts.blastprog] + + for f in glob("{0}/*.fa*".format(splits)): + fb = op.basename(f).rsplit(".", 1)[0] + fw = open(op.join(output, fb + ".yml"), "w") + + path = op.expanduser(opts.path) + dir = op.join(path, "test/resources") + outfile = op.join(output, fb + ".csv") + interpro = iprscanTemplate.format(opts.iprscan) if opts.iprscan else "" + + print( + Template.format( + dir, fb, f, outfile, bit_score, db_score, ovl_score, interpro + ), + file=fw, + ) + + if opts.iprscan: + if not op.lexists("interpro.xml"): + symlink(op.join(iprscan_datadir, "interpro.xml"), "interpro.xml") + + if not op.lexists("interpro.dtd"): + symlink(op.join(iprscan_datadir, "interpro.dtd"), "interpro.dtd") + + +if __name__ == "__main__": + main() diff --git a/jcvi/annotation/automaton.py b/jcvi/annotation/automaton.py new file mode 100644 index 00000000..98782db1 --- /dev/null +++ b/jcvi/annotation/automaton.py @@ -0,0 +1,287 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Automate genome annotation by iterating processing a set of files, individually. +""" + +import os.path as op +import sys + +from functools import partial +from tempfile import mkdtemp + +from ..assembly.automaton import iter_project +from ..apps.grid import Jobs, MakeManager +from ..formats.base import FileMerger, split +from ..apps.base import ( + ActionDispatcher, + OptionParser, + cleanup, + iglob, + logger, + mkdir, + need_update, + sh, +) + + +def main(): + + actions = ( + ("augustus", "run parallel AUGUSTUS"), + ("cufflinks", "run cufflinks following tophat"), + ("star", "run star alignment"), + ("tophat", "run tophat on a list of inputs"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def augustuswrap(fastafile, species="maize", gff3=True, cfgfile=None, hintsfile=None): + cmd = "augustus {0}".format(fastafile) + if gff3: + cmd += " --gff3=on" + cmd += " --species={0}".format(species) + if cfgfile: + cmd += " --extrinsicCfgFile={0}".format(cfgfile) + if hintsfile: + cmd += " --alternatives-from-evidence=true" + cmd += " --hintsfile={0} --allow_hinted_splicesites=atac".format(hintsfile) + cmd += " --introns=on --genemodel=complete" + suffix = ".gff3" if gff3 else ".out" + outfile = fastafile.rsplit(".", 1)[0] + suffix + sh(cmd, outfile=outfile) + return outfile + + +def augustus(args): + """ + %prog augustus fastafile + + Run parallel AUGUSTUS. Final results can be reformatted using + annotation.reformat.augustus(). + """ + p = OptionParser(augustus.__doc__) + p.add_argument( + "--species", default="maize", help="Use species model for prediction" + ) + p.add_argument("--hintsfile", help="Hint-guided AUGUSTUS") + p.add_argument( + "--nogff3", default=False, action="store_true", help="Turn --gff3=off" + ) + p.set_home("augustus") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + cpus = opts.cpus + mhome = opts.augustus_home + gff3 = not opts.nogff3 + suffix = ".gff3" if gff3 else ".out" + cfgfile = op.join(mhome, "config/extrinsic/extrinsic.M.RM.E.W.cfg") + + outdir = mkdtemp(dir=".") + fs = split([fastafile, outdir, str(cpus)]) + + augustuswrap_params = partial( + augustuswrap, + species=opts.species, + gff3=gff3, + cfgfile=cfgfile, + hintsfile=opts.hintsfile, + ) + g = Jobs(augustuswrap_params, fs.names) + g.run() + + gff3files = [x.rsplit(".", 1)[0] + suffix for x in fs.names] + outfile = fastafile.rsplit(".", 1)[0] + suffix + FileMerger(gff3files, outfile=outfile).merge() + cleanup(outdir) + + if gff3: + from jcvi.annotation.reformat import augustus as reformat_augustus + + reformat_outfile = outfile.replace(".gff3", ".reformat.gff3") + reformat_augustus([outfile, "--outfile={0}".format(reformat_outfile)]) + + +def star(args): + """ + %prog star folder reference + + Run star on a folder with reads. + """ + p = OptionParser(star.__doc__) + p.add_argument( + "--single", default=False, action="store_true", help="Single end mapping" + ) + p.set_fastq_names() + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + cpus = opts.cpus + mm = MakeManager() + + num = 1 if opts.single else 2 + folder, reference = args + gd = "GenomeDir" + mkdir(gd) + STAR = "STAR --runThreadN {0} --genomeDir {1}".format(cpus, gd) + + # Step 0: build genome index + genomeidx = op.join(gd, "Genome") + if need_update(reference, genomeidx): + cmd = STAR + " --runMode genomeGenerate" + cmd += " --genomeFastaFiles {0}".format(reference) + mm.add(reference, genomeidx, cmd) + + # Step 1: align + for p, prefix in iter_project(folder, opts.names, num): + pf = "{0}_star".format(prefix) + bamfile = pf + "Aligned.sortedByCoord.out.bam" + cmd = STAR + " --readFilesIn {0}".format(" ".join(p)) + if p[0].endswith(".gz"): + cmd += " --readFilesCommand zcat" + cmd += " --outSAMtype BAM SortedByCoordinate" + cmd += " --outFileNamePrefix {0}".format(pf) + cmd += " --twopassMode Basic" + # Compatibility for cufflinks + cmd += " --outSAMstrandField intronMotif" + cmd += " --outFilterIntronMotifs RemoveNoncanonical" + mm.add(p, bamfile, cmd) + + mm.write() + + +def cufflinks(args): + """ + %prog cufflinks folder reference + + Run cufflinks on a folder containing tophat results. + """ + p = OptionParser(cufflinks.__doc__) + p.add_argument("--gtf", help="Reference annotation") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + folder, reference = args + cpus = opts.cpus + gtf = opts.gtf + transcripts = "transcripts.gtf" + + mm = MakeManager() + gtfs = [] + for bam in iglob(folder, "*.bam"): + pf = op.basename(bam).split(".")[0] + outdir = pf + "_cufflinks" + cmd = "cufflinks" + cmd += " -o {0}".format(outdir) + cmd += " -p {0}".format(cpus) + if gtf: + cmd += " -g {0}".format(gtf) + cmd += " --frag-bias-correct {0}".format(reference) + cmd += " --multi-read-correct" + cmd += " {0}".format(bam) + cgtf = op.join(outdir, transcripts) + mm.add(bam, cgtf, cmd) + gtfs.append(cgtf) + + assemblylist = "assembly_list.txt" + cmd = 'find . -name "{0}" > {1}'.format(transcripts, assemblylist) + mm.add(gtfs, assemblylist, cmd) + + mergedgtf = "merged/merged.gtf" + cmd = "cuffmerge" + cmd += " -o merged" + cmd += " -p {0}".format(cpus) + if gtf: + cmd += " -g {0}".format(gtf) + cmd += " -s {0}".format(reference) + cmd += " {0}".format(assemblylist) + mm.add(assemblylist, mergedgtf, cmd) + + mm.write() + + +def tophat(args): + """ + %prog tophat folder reference + + Run tophat on a folder of reads. + """ + from jcvi.apps.bowtie import check_index + from jcvi.formats.fastq import guessoffset + + p = OptionParser(tophat.__doc__) + p.add_argument("--gtf", help="Reference annotation") + p.add_argument( + "--single", default=False, action="store_true", help="Single end mapping" + ) + p.add_argument( + "--intron", + default=15000, + type=int, + help="Max intron size", + ) + p.add_argument( + "--dist", + default=-50, + type=int, + help="Mate inner distance", + ) + p.add_argument( + "--stdev", + default=50, + type=int, + help="Mate standard deviation", + ) + p.set_phred() + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + num = 1 if opts.single else 2 + folder, reference = args + reference = check_index(reference) + for p, prefix in iter_project(folder, n=num): + outdir = "{0}_tophat".format(prefix) + outfile = op.join(outdir, "accepted_hits.bam") + if op.exists(outfile): + logger.debug("File `%s` found. Skipping.", outfile) + continue + + cmd = "tophat -p {0}".format(opts.cpus) + if opts.gtf: + cmd += " -G {0}".format(opts.gtf) + cmd += " -o {0}".format(outdir) + + if num == 1: # Single-end + (a,) = p + else: # Paired-end + a, b = p + cmd += " --max-intron-length {0}".format(opts.intron) + cmd += " --mate-inner-dist {0}".format(opts.dist) + cmd += " --mate-std-dev {0}".format(opts.stdev) + + phred = opts.phred or str(guessoffset([a])) + if phred == "64": + cmd += " --phred64-quals" + cmd += " {0} {1}".format(reference, " ".join(p)) + + sh(cmd) + + +if __name__ == "__main__": + main() diff --git a/jcvi/annotation/depth.py b/jcvi/annotation/depth.py new file mode 100755 index 00000000..2fbd8971 --- /dev/null +++ b/jcvi/annotation/depth.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +From genomeCovergeBed results, initialize the count array, set cutoffs +and optimize against the truth, to determine the cutoff for incorporating +RNA-seq into annotation pipelines. +""" +import sys +import os.path as op + +from itertools import groupby + +import numpy as np + +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..formats.base import BaseFile, must_open +from ..formats.sizes import Sizes + + +class BinFile(BaseFile): + """ + The binfile contains per base count, fastafile provides the coordinate + system. + """ + + def __init__(self, binfile, dtype=np.uint8): + super().__init__(binfile) + assert op.exists( + binfile + ), "Binary file `{0}` not found. Rerun depth.count().".format(binfile) + self.dtype = dtype + + @property + def array(self): + binfile = self.filename + return np.fromfile(binfile, dtype=self.dtype) + + @property + def mmarray(self): + binfile = self.filename + return np.memmap(binfile, dtype=self.dtype, mode="r") + + +def main(): + + actions = ( + ("count", "initialize the count array"), + ("query", "query the count array to get depth at particular site"), + ("merge", "merge several count arrays into one"), + ( + "bed", + "write bed files where the bases have at least certain depth", + ), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def bed(args): + """ + %prog bed binfile fastafile + + Write bed files where the bases have at least certain depth. + """ + p = OptionParser(bed.__doc__) + p.add_argument( + "-o", + dest="output", + default="stdout", + help="Output file name", + ) + p.add_argument( + "--cutoff", + dest="cutoff", + default=10, + type=int, + help="Minimum read depth to report intervals", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + binfile, fastafile = args + fw = must_open(opts.output, "w") + cutoff = opts.cutoff + assert cutoff >= 0, "Need non-negative cutoff" + + b = BinFile(binfile) + ar = b.array + + fastasize, sizes, offsets = get_offsets(fastafile) + s = Sizes(fastafile) + for ctg, ctglen in s.iter_sizes(): + offset = offsets[ctg] + subarray = ar[offset : offset + ctglen] + key = lambda x: x[1] >= cutoff + for tf, array_elements in groupby(enumerate(subarray), key=key): + array_elements = list(array_elements) + if not tf: + continue + + # 0-based system => 1-based system + start = array_elements[0][0] + 1 + end = array_elements[-1][0] + 1 + + mean_depth = sum([x[1] for x in array_elements]) / len(array_elements) + mean_depth = int(mean_depth) + + name = "na" + print( + "\t".join(str(x) for x in (ctg, start - 1, end, name, mean_depth)), + file=fw, + ) + + +def merge(args): + """ + %prog merge *.bin merged.bin + + Merge several count arrays into one. Overflows will be capped at uint8_max + (255). + """ + p = OptionParser(merge.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + binfiles = args[:-1] + mergedbin = args[-1] + if op.exists(mergedbin): + logger.error("`{0}` file exists. Remove before proceed.".format(mergedbin)) + return + + b = BinFile(binfiles[0]) + ar = b.mmarray + (fastasize,) = ar.shape + logger.debug("Initialize array of uint16 with size {0}".format(fastasize)) + + merged_ar = np.zeros(fastasize, dtype=np.uint16) + for binfile in binfiles: + b = BinFile(binfile) + merged_ar += b.array + + logger.debug("Resetting the count max to 255.") + merged_ar[merged_ar > 255] = 255 + + logger.debug("Compact array back to uint8 with size {0}".format(fastasize)) + merged_ar = np.array(merged_ar, dtype=np.uint8) + merged_ar.tofile(mergedbin) + logger.debug("Merged array written to `{0}`".format(mergedbin)) + + +def query(args): + """ + %prog query binfile fastafile ctgID baseID + + Get the depth at a particular base. + """ + p = OptionParser(query.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + binfile, fastafile, ctgID, baseID = args + b = BinFile(binfile) + ar = b.mmarray + + fastasize, sizes, offsets = get_offsets(fastafile) + oi = offsets[ctgID] + int(baseID) - 1 + print("\t".join((ctgID, baseID, str(ar[oi])))) + + +def update_array(ar, coveragefile, offsets): + fp = open(coveragefile) + logger.debug("Parse file `{0}`".format(coveragefile)) + for k, rows in groupby(fp, key=(lambda x: x.split()[0])): + rows = list(rows) + offset = offsets[k] + ctglen = len(rows) + + if ctglen < 100000: + sys.stdout.write(".") + else: + print(k, offset) + + # assert ctglen == sizes[k] + for i, row in enumerate(rows): + ctgID, baseID, count = row.split() + oi = offset + i + newcount = ar[oi] + int(count) + if newcount > 255: + newcount = 255 + + ar[oi] = newcount + + +def get_offsets(fastafile): + s = Sizes(fastafile) + fastasize = s.totalsize + sizes = s.mapping + offsets = s.cumsizes_mapping + return fastasize, sizes, offsets + + +def count(args): + """ + %prog count t.coveragePerBase fastafile + + Serialize the genomeCoverage results. The coordinate system of the count array + will be based on the fastafile. + """ + p = OptionParser(count.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + coveragefile, fastafile = args + + countsfile = coveragefile.split(".")[0] + ".bin" + if op.exists(countsfile): + logger.error("`{0}` file exists. Remove before proceed.".format(countsfile)) + return + + fastasize, sizes, offsets = get_offsets(fastafile) + logger.debug("Initialize array of uint8 with size {0}".format(fastasize)) + ar = np.zeros(fastasize, dtype=np.uint8) + + update_array(ar, coveragefile, offsets) + + ar.tofile(countsfile) + logger.debug("Array written to `{0}`".format(countsfile)) + + +if __name__ == "__main__": + main() diff --git a/jcvi/annotation/evm.py b/jcvi/annotation/evm.py new file mode 100644 index 00000000..6764470d --- /dev/null +++ b/jcvi/annotation/evm.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Wrapper for running series of EVM commands. There are two flavors of running +EVM - TIGR only mode which communicates with the Sybase db; evm mode which +communicates with GFF file. +""" +import os.path as op +import sys + +from collections import defaultdict + +from ..apps.base import ActionDispatcher, OptionParser, need_update, sh +from ..formats.base import write_file +from ..formats.fasta import ids + + +EVMRUN = r""" +W=`pwd`/weights.txt + +$EVM/EvmUtils/write_EVM_commands.pl --genome genome.fasta --weights $W \ + --gene_predictions {0} \ + --transcript_alignments {1} \ + --protein_alignments {2} \ + --terminalExons pasa.terminal_exons.gff3 \ + --output_file_name evm.out --partitions partitions_list.out > commands.list + +$EGC_SCRIPTS/run_cmds_on_grid.pl commands.list 0372 + +#$EVM/EvmUtils/execute_EVM_commands.pl commands.list +""" + +EVMLOAD = r""" +$EVM/EvmUtils/recombine_EVM_partial_outputs.pl \ + --partitions partitions_list.out \ + --output_file_name evm.out + +$EVM/TIGR-only/TIGR_EVM_loader.pl --db {0} \ + --partitions partitions_list.out \ + --output_file_name evm.out \ + --ev_type {1} + +#$EVM/EvmUtils/convert_EVM_outputs_to_GFF3.pl \ +# --partitions partitions_list.out \ +# --output evm.out +""" + + +def main(): + + actions = ( + ("pasa", "extract terminal exons"), + ("tigrprepare", "run EVM in TIGR-only mode"), + ("tigrload", "load EVM results into TIGR db"), + ("maker", "run EVM based on MAKER output"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def partition(evs): + partition_list = "partitions_list.out" + A, T, P = evs + if not need_update(evs, partition_list): + return + + cmd = "$EVM/EvmUtils/partition_EVM_inputs.pl --genome genome.fasta" + cmd += " --gene_predictions {0}".format(A) + cmd += " --transcript_alignments {0}".format(T) + cmd += " --protein_alignments {0}".format(P) + cmd += " --segmentSize 500000 --overlapSize 10000 " + cmd += " --partition_listing partitions_list.out" + + termexons = "pasa.terminal_exons.gff3" + if op.exists(termexons): + cmd += " --pasaTerminalExons {0}".format(termexons) + + sh(cmd) + + +def maker(args): + """ + %prog maker maker.gff3 genome.fasta + + Prepare EVM inputs by separating tracks from MAKER. + """ + from jcvi.formats.base import SetFile + from jcvi.apps.base import cleanup + + A, T, P = "ABINITIO_PREDICTION", "TRANSCRIPT", "PROTEIN" + # Stores default weights and types + Registry = { + "maker": (A, 5), + "augustus_masked": (A, 1), + "snap_masked": (A, 1), + "genemark": (A, 1), + "est2genome": (T, 5), + "est_gff": (T, 5), + "protein2genome": (P, 5), + "blastx": (P, 1), + } + + p = OptionParser(maker.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gffile, fastafile = args + + types = "type.ids" + if need_update(gffile, types): + cmd = "cut -f2 -s {0} | sort -u".format(gffile) + sh(cmd, outfile=types) + + types = SetFile(types) + reg = defaultdict(list) + weightsfile = "weights.txt" + contents = [] + for s in types: + rs = s.split(":")[0] + if rs not in Registry: + continue + + type, weight = Registry[rs] + reg[type].append(s) + contents.append("\t".join(str(x) for x in (type, s, weight))) + + contents = "\n".join(sorted(contents)) + write_file(weightsfile, contents) + + evs = [x + ".gff" for x in (A, T, P)] + cleanup(evs) + + for type, tracks in reg.items(): + for t in tracks: + cmd = "grep '\t{0}' {1} | grep -v '_match\t' >> {2}.gff".format( + t, gffile, type + ) + sh(cmd) + + partition(evs) + runfile = "run.sh" + contents = EVMRUN.format(*evs) + write_file(runfile, contents) + + +def tigrload(args): + """ + %prog tigrload db ev_type + + Load EVM results into TIGR db. Actually, just write a load.sh script. The + ev_type should be set, e.g. "EVM1", "EVM2", etc. + """ + p = OptionParser(tigrload.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + db, ev_type = args + + runfile = "load.sh" + contents = EVMLOAD.format(db, ev_type) + write_file(runfile, contents) + + +def pasa(args): + """ + %prog pasa pasa_db fastafile + + Run EVM in TIGR-only mode. + """ + p = OptionParser(pasa.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + pasa_db, fastafile = args + + termexons = "pasa.terminal_exons.gff3" + if need_update(fastafile, termexons): + cmd = "$ANNOT_DEVEL/PASA2/scripts/pasa_asmbls_to_training_set.dbi" + cmd += ' -M "{0}:mysql.tigr.org" -p "access:access"'.format(pasa_db) + cmd += " -g {0}".format(fastafile) + sh(cmd) + + cmd = "$EVM/PasaUtils/retrieve_terminal_CDS_exons.pl" + cmd += " trainingSetCandidates.fasta trainingSetCandidates.gff" + sh(cmd, outfile=termexons) + + return termexons + + +def fix_transcript(): + # Fix `transcript_alignments.gff3` + transcript = "transcript_alignments.gff3" + fixedtranscript = "transcript_alignments.fixed.gff3" + if need_update(transcript, fixedtranscript): + fp = open(transcript) + fw = open(fixedtranscript, "w") + stack = "" + for row in fp: + row = row.rstrip() + goodline = len(row.split()) == 9 + if goodline: + if stack: + print(stack, file=fw) + stack = row + else: + print(stack + row, file=fw) + stack = "" + + fw.close() + + return fixedtranscript + + +def tigrprepare(args): + """ + %prog tigrprepare asmbl.fasta asmbl.ids db pasa.terminal_exons.gff3 + + Run EVM in TIGR-only mode. + """ + p = OptionParser(tigrprepare.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + fastafile, asmbl_id, db, pasa_db = args + if asmbl_id == "all": + idsfile = fastafile + ".ids" + if need_update(fastafile, idsfile): + ids([fastafile, "-o", idsfile]) + else: + idsfile = asmbl_id + + oneid = next(open(idsfile)).strip() + + weightsfile = "weights.txt" + if need_update(idsfile, weightsfile): + cmd = "$EVM/TIGR-only/create_sample_weights_file.dbi" + cmd += " {0} {1} | tee weights.txt".format(db, oneid) + sh(cmd) + + evs = [ + "gene_predictions.gff3", + "transcript_alignments.gff3", + "protein_alignments.gff3", + ] + if need_update(weightsfile, evs): + cmd = "$EVM/TIGR-only/write_GFF3_files.dbi" + cmd += " --db {0} --asmbl_id {1} --weights {2}".format(db, idsfile, weightsfile) + sh(cmd) + + evs[1] = fix_transcript() + + partition(evs) + runfile = "run.sh" + contents = EVMRUN.format(*evs) + write_file(runfile, contents) + + +if __name__ == "__main__": + main() diff --git a/jcvi/annotation/maker.py b/jcvi/annotation/maker.py new file mode 100644 index 00000000..bdb7c139 --- /dev/null +++ b/jcvi/annotation/maker.py @@ -0,0 +1,537 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Utility script for annotations based on MAKER. + +Many of the routines in this script is to select among a set of conflicting +models, either through accuracy (batcheval) or simply the length (longest). +""" + +import os +import os.path as op +import sys + +from collections import Counter, defaultdict + +from ..apps.grid import GridProcess, get_grid_engine, PBS_STANZA +from ..apps.base import ( + ActionDispatcher, + OptionParser, + logger, + need_update, + popen, + sh, + mkdir, + glob, + get_abs_path, +) +from ..formats.base import BaseFile, LineFile, write_file + + +class CTLine(object): + def __init__(self, row): + row = row.strip() + tag = value = real = comment = "" + if "#" in row: + real, comment = row.split("#", 1) + if "=" in real: + tag, value = real.split("=", 1) + + self.tag = tag.strip() + self.value = value.strip() + self.comment = comment.strip() + + def __str__(self): + tag = self.tag + value = self.value + comment = self.comment + + s = "=".join(str(x) for x in (tag, value)) if tag else "" + if s: + if comment: + s += " # " + comment + else: + if comment: + s += "# " + comment + return s + + +class CTLFile(LineFile): + def __init__(self, filename): + super().__init__(filename) + fp = open(filename) + for row in fp: + self.append(CTLine(row)) + fp.close() + + def update_abs_path(self): + for r in self: + path = r.value + if path and op.exists(path): + npath = get_abs_path(path) + logger.debug("{0}={1} => {2}".format(r.tag, path, npath)) + r.value = npath + + def update_tag(self, key, value): + for r in self: + if r.tag == key: + logger.debug("{0}={1} => {2}".format(r.tag, r.value, value)) + r.value = value + break + + def write_file(self, filename): + fw = open(filename, "w") + for r in self: + print(r, file=fw) + fw.close() + logger.debug("File written to `%s`.", filename) + + +class DatastoreIndexFile(BaseFile): + def __init__(self, filename): + super().__init__(filename) + scaffold_status = {} + failed = [] + + fp = open(filename) + for row in fp: + scaffold, dir, status = row.strip().split("\t") + scaffold_status[scaffold] = status + for scaffold, status in scaffold_status.items(): + if status != "FINISHED": + failed.append(scaffold) + + self.scaffold_status = scaffold_status + self.failed = failed + + +def main(): + + actions = ( + ("parallel", "partition the genome into parts and run separately"), + ("merge", "generate the gff files after parallel"), + ("validate", "validate after MAKER run to check for failures"), + ("datastore", "generate a list of gff filenames to merge"), + ("split", "split MAKER models by checking against evidences"), + ("batcheval", "calls bed.evaluate() in batch"), + ("longest", "pick the longest model per group"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +arraysh = """ +DIR=`awk "NR==$SGE_TASK_ID" {0}` +cd $DIR +{1} --ignore_nfs_tmp""" + +arraysh_ua = ( + PBS_STANZA + + """ +cd $PBS_O_WORKDIR +DIR=`awk "NR==$PBS_ARRAY_INDEX" {2}` +cd $DIR +{3} --ignore_nfs_tmp > ../maker.$PBS_ARRAY_INDEX.out 2>&1 +""" +) + + +def parallel(args): + """ + %prog parallel genome.fasta N + + Partition the genome into parts and run separately. This is useful if MAKER + is to be run on the grid. + """ + from jcvi.formats.base import split + + p = OptionParser(parallel.__doc__) + p.set_home("maker") + p.set_tmpdir(tmpdir="tmp") + p.set_grid_opts(array=True) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + genome, NN = args + threaded = opts.threaded or 1 + tmpdir = opts.tmpdir + + mkdir(tmpdir) + tmpdir = get_abs_path(tmpdir) + + N = int(NN) + assert 1 <= N < 1000, "Required: 1 < N < 1000!" + + outdir = "outdir" + fs = split([genome, outdir, NN]) + + c = CTLFile("maker_opts.ctl") + c.update_abs_path() + if threaded > 1: + c.update_tag("cpus", threaded) + + cwd = os.getcwd() + dirs = [] + for name in fs.names: + fn = get_abs_path(name) + bn = op.basename(name) + dirs.append(bn) + c.update_tag("genome", fn) + mkdir(bn) + sh("cp *.ctl {0}".format(bn)) + + os.chdir(bn) + c.write_file("maker_opts.ctl") + os.chdir(cwd) + + jobs = "jobs" + fw = open(jobs, "w") + print("\n".join(dirs), file=fw) + fw.close() + + # Submit to grid + ncmds = len(dirs) + runfile = "array.sh" + cmd = op.join(opts.maker_home, "bin/maker") + if tmpdir: + cmd += " -TMP {0}".format(tmpdir) + + engine = get_grid_engine() + contents = ( + arraysh.format(jobs, cmd) + if engine == "SGE" + else arraysh_ua.format(N, threaded, jobs, cmd) + ) + write_file(runfile, contents) + + if engine == "PBS": + return + + # qsub script + outfile = r"maker.\$TASK_ID.out" + p = GridProcess( + runfile, outfile=outfile, errfile=outfile, arr=ncmds, grid_opts=opts + ) + qsubfile = "qsub.sh" + qsub = p.build() + write_file(qsubfile, qsub) + + +mergesh = """ +BASE=$1 +cd $1{0}/$1.maker.output +{1} -n -d $1_master_datastore_index.log +mv $1.all.gff ../../ +""" + + +def get_fsnames(outdir): + fnames = glob(op.join(outdir, "*.fa*")) + suffix = "." + fnames[0].split(".")[-1] + fsnames = [op.basename(x).rsplit(".", 1)[0] for x in fnames] + + return fsnames, suffix + + +def merge(args): + """ + %prog merge outdir output.gff + + Follow-up command after grid jobs are completed after parallel(). + """ + from jcvi.formats.gff import merge as gmerge + + p = OptionParser(merge.__doc__) + p.set_home("maker") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + outdir, outputgff = args + fsnames, suffix = get_fsnames(outdir) + nfs = len(fsnames) + cmd = op.join(opts.maker_home, "bin/gff3_merge") + + outfile = "merge.sh" + write_file(outfile, mergesh.format(suffix, cmd)) + + # Generate per split directory + # Note that gff3_merge write to /tmp, so I limit processes here to avoid + # filling up disk space + sh("parallel -j 8 merge.sh {} ::: " + " ".join(fsnames)) + + # One final output + gffnames = glob("*.all.gff") + assert len(gffnames) == nfs + + # Again, DO NOT USE gff3_merge to merge with a smallish /tmp/ area + gfflist = "gfflist" + fw = open(gfflist, "w") + print("\n".join(gffnames), file=fw) + fw.close() + + nlines = sum(1 for _ in open(gfflist)) + assert nlines == nfs # Be extra, extra careful to include all results + gmerge([gfflist, "-o", outputgff]) + logger.debug("Merged GFF file written to `{0}`".format(outputgff)) + + +def validate(args): + """ + %prog validate outdir genome.fasta + + Validate current folder after MAKER run and check for failures. Failed batch + will be written to a directory for additional work. + """ + p = OptionParser(validate.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + outdir, genome = args + counter = Counter() + + fsnames, suffix = get_fsnames(outdir) + dsfile = "{0}{1}/{0}.maker.output/{0}_master_datastore_index.log" + dslogs = [dsfile.format(x, suffix) for x in fsnames] + all_failed = [] + for f, d in zip(fsnames, dslogs): + dslog = DatastoreIndexFile(d) + counter.update(dslog.scaffold_status.values()) + all_failed.extend([(f, x) for x in dslog.failed]) + + cmd = 'tail maker.*.out | grep -c "now finished"' + n = int(popen(cmd).read()) + assert len(fsnames) == n + print("ALL jobs have been finished", file=sys.stderr) + + nfailed = len(all_failed) + if nfailed == 0: + print("ALL scaffolds are completed with no errors", file=sys.stderr) + return + + print("Scaffold status:", file=sys.stderr) + print(counter, file=sys.stderr) + failed = "FAILED" + fw = open(failed, "w") + print("\n".join(["\t".join((f, x)) for f, x in all_failed]), file=fw) + fw.close() + + nlines = sum(1 for _ in open("FAILED")) + assert nlines == nfailed + print("FAILED !! {0} instances.".format(nfailed), file=sys.stderr) + + # Rebuild the failed batch + failed_ids = failed + ".ids" + failed_fasta = failed + ".fasta" + cmd = "cut -f2 {0}".format(failed) + sh(cmd, outfile=failed_ids) + if need_update((genome, failed_ids), failed_fasta): + cmd = "faSomeRecords {} {} {}".format(genome, failed_ids, failed_fasta) + sh(cmd) + + +def batcheval(args): + """ + %prog batcheval model.ids gff_file evidences.bed fastafile + + Get the accuracy for a list of models against evidences in the range of the + genes. For example: + + $ %prog batcheval all.gff3 isoforms.ids proteins.bed scaffolds.fasta + + Outfile contains the scores for the models can be found in models.scores + """ + from jcvi.formats.bed import evaluate + from jcvi.formats.gff import make_index + + p = OptionParser(evaluate.__doc__) + p.add_argument( + "--type", + default="CDS", + help="list of features to extract, use comma to separate (e.g." + "'five_prime_UTR,CDS,three_prime_UTR')", + ) + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + model_ids, gff_file, evidences_bed, fastafile = args + type = set(opts.type.split(",")) + + g = make_index(gff_file) + fp = open(model_ids) + prefix = model_ids.rsplit(".", 1)[0] + fwscores = open(prefix + ".scores", "w") + + for row in fp: + cid = row.strip() + b = next(g.parents(cid, 1)) + query = "{0}:{1}-{2}".format(b.chrom, b.start, b.stop) + children = [c for c in g.children(cid, 1)] + + cidbed = prefix + ".bed" + fw = open(cidbed, "w") + for c in children: + if c.featuretype not in type: + continue + + fw.write(c.to_bed()) + + fw.close() + + b = evaluate([cidbed, evidences_bed, fastafile, "--query={0}".format(query)]) + print("\t".join((cid, b.score)), file=fwscores) + fwscores.flush() + + +def get_bed_file(gff_file, stype, key): + + from jcvi.formats.gff import bed + + opr = stype.replace(",", "") + ".bed" + bed_opts = ["--type=" + stype, "--key=" + key] + bed_file = ".".join((gff_file.split(".")[0], opr)) + + if need_update(gff_file, bed_file): + bed([gff_file, "--outfile={0}".format(bed_file)] + bed_opts) + + return bed_file + + +def get_splits(split_bed, gff_file, stype, key): + """ + Use intersectBed to find the fused gene => split genes mappings. + """ + bed_file = get_bed_file(gff_file, stype, key) + cmd = "intersectBed -a {0} -b {1} -wao".format(split_bed, bed_file) + cmd += " | cut -f4,10" + p = popen(cmd) + splits = defaultdict(set) + for row in p: + a, b = row.split() + splits[a].add(b) + + return splits + + +def get_accuracy(query, gff_file, evidences_bed, sizesfile, type, key): + """ + Get sensitivity, specificity and accuracy given gff_file, and a query range + that look like "chr1:1-10000". + """ + from jcvi.formats.bed import evaluate + + bed_file = get_bed_file(gff_file, type, key) + b = evaluate([bed_file, evidences_bed, sizesfile, "--query={0}".format(query)]) + + return b + + +def split(args): + """ + %prog split split.bed evidences.bed predictor1.gff predictor2.gff fastafile + + Split MAKER models by checking against predictors (such as AUGUSTUS and + FGENESH). For each region covered by a working model. Find out the + combination of predictors that gives the best accuracy against evidences + (such as PASA). + + `split.bed` can be generated by pulling out subset from a list of ids + $ python -m jcvi.formats.base join split.ids working.bed + --column=0,3 --noheader | cut -f2-7 > split.bed + """ + from jcvi.formats.bed import Bed + + p = OptionParser(split.__doc__) + p.add_argument( + "--key", + default="Name", + help="Key in the attributes to extract predictor.gff", + ) + p.add_argument( + "--parents", + default="match", + help="list of features to extract, use comma to separate (e.g.'gene,mRNA')", + ) + p.add_argument( + "--children", + default="match_part", + help="list of features to extract, use comma to separate (e.g." + "'five_prime_UTR,CDS,three_prime_UTR')", + ) + opts, args = p.parse_args(args) + + if len(args) != 5: + sys.exit(not p.print_help()) + + split_bed, evidences_bed, p1_gff, p2_gff, fastafile = args + parents = opts.parents + children = opts.children + key = opts.key + + bed = Bed(split_bed) + + s1 = get_splits(split_bed, p1_gff, parents, key) + s2 = get_splits(split_bed, p2_gff, parents, key) + + for b in bed: + query = "{0}:{1}-{2}".format(b.seqid, b.start, b.end) + b1 = get_accuracy(query, p1_gff, evidences_bed, fastafile, children, key) + b2 = get_accuracy(query, p2_gff, evidences_bed, fastafile, children, key) + accn = b.accn + c1 = "|".join(s1[accn]) + c2 = "|".join(s2[accn]) + ac1 = b1.accuracy + ac2 = b2.accuracy + tag = p1_gff if ac1 >= ac2 else p2_gff + tag = tag.split(".")[0] + + ac1 = "{0:.3f}".format(ac1) + ac2 = "{0:.3f}".format(ac2) + + print("\t".join((accn, tag, ac1, ac2, c1, c2))) + + +def datastore(args): + """ + %prog datastore datastore.log > gfflist.log + + Generate a list of gff filenames to merge. The `datastore.log` file can be + generated by something like: + + $ find + /usr/local/scratch/htang/EVM_test/gannotation/maker/1132350111853_default/i1/ + -maxdepth 4 -name "*datastore*.log" > datastore.log + """ + p = OptionParser(datastore.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (ds,) = args + fp = open(ds) + for row in fp: + fn = row.strip() + assert op.exists(fn) + pp, logfile = op.split(fn) + flog = open(fn) + for inner_row in flog: + ctg, folder, status = inner_row.split() + if status != "FINISHED": + continue + + gff_file = op.join(pp, folder, ctg + ".gff") + assert op.exists(gff_file) + print(gff_file) + + +if __name__ == "__main__": + main() diff --git a/jcvi/annotation/pasa.py b/jcvi/annotation/pasa.py new file mode 100644 index 00000000..34c6358d --- /dev/null +++ b/jcvi/annotation/pasa.py @@ -0,0 +1,595 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Utilities for submitting PASA jobs and processing PASA results. +""" +import os +import os.path as op +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger, sh, symlink, which +from ..formats.base import write_file, must_open, FileMerger + + +alignAssembly_conf = """ +# MySQL settings +MYSQLDB={0} + +#script validate_alignments_in_db.dbi +validate_alignments_in_db.dbi:--MIN_PERCENT_ALIGNED={1} +validate_alignments_in_db.dbi:--MIN_AVG_PER_ID={2} +validate_alignments_in_db.dbi:--NUM_BP_PERFECT_SPLICE_BOUNDARY={3} + +#script subcluster_builder.dbi +subcluster_builder.dbi:-m=50 +""" + +annotCompare_conf = """ +# MySQL settings +MYSQLDB={0} + +#script cDNA_annotation_comparer.dbi +cDNA_annotation_comparer.dbi:--MIN_PERCENT_OVERLAP={1} +cDNA_annotation_comparer.dbi:--MIN_PERCENT_PROT_CODING={2} +cDNA_annotation_comparer.dbi:--MIN_PERID_PROT_COMPARE={3} +cDNA_annotation_comparer.dbi:--MIN_PERCENT_LENGTH_FL_COMPARE={4} +cDNA_annotation_comparer.dbi:--MIN_PERCENT_LENGTH_NONFL_COMPARE={5} +cDNA_annotation_comparer.dbi:--MIN_FL_ORF_SIZE={6} +cDNA_annotation_comparer.dbi:--MIN_PERCENT_ALIGN_LENGTH={7} +cDNA_annotation_comparer.dbi:--MIN_PERCENT_OVERLAP_GENE_REPLACE={8} +cDNA_annotation_comparer.dbi:--STOMP_HIGH_PERCENTAGE_OVERLAPPING_GENE={9} +cDNA_annotation_comparer.dbi:--TRUST_FL_STATUS={10} +cDNA_annotation_comparer.dbi:--MAX_UTR_EXONS={11} +""" + +annotation = "annotation.gff3" +tdn, flaccs = "tdn.accs", "FL_accs.txt" +tfasta, gfasta = "transcripts.fasta", "genome.fasta" +aaconf, acconf = "alignAssembly.conf", "annotCompare.conf" +ALLOWED_ALIGNERS = ("blat", "gmap") + + +def main(): + + actions = ( + ("assemble", "run pasa alignment assembly pipeline"), + ("compare", "run pasa annotation comparison pipeline"), + ("longest", "label longest transcript per gene as full-length"), + ( + "consolidate", + "generate consolidated annotation set from 2 or more annot compare results", + ), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def assemble(args): + """ + %prog assemble pasa_db_name genome.fasta transcripts-dn.fasta [transcript-gg.fasta] + + Run the PASA alignment assembly pipeline + + If two transcript fasta files (Trinity denovo and genome guided) are provided + and the `--compreh` param is enabled, the PASA Comprehensive Transcriptome DB + protocol is followed + + Using the `--prepare` option creates a shell script with the run commands without + executing the pipeline + """ + p = OptionParser(assemble.__doc__) + p.set_pasa_opts() + p.add_argument( + "--prepare", + default=False, + action="store_true", + help="Prepare PASA run script with commands", + ) + p.set_grid() + p.set_grid_opts() + opts, args = p.parse_args(args) + + if len(args) not in (3, 4): + sys.exit(not p.print_help()) + + ( + pasa_db, + genome, + dnfasta, + ) = args[:3] + ggfasta = args[3] if len(args) == 4 else None + + PASA_HOME = opts.pasa_home + if not op.isdir(PASA_HOME): + logger.error("PASA_HOME={0} directory does not exist".format(PASA_HOME)) + sys.exit() + + aligners = opts.aligners.split(",") + for aligner in aligners: + if aligner not in ALLOWED_ALIGNERS: + logger.error("Error: Unknown aligner `{0}`".format(aligner)) + logger.error( + "Can be any of {0}, ".format("|".join(ALLOWED_ALIGNERS)) + + "combine multiple aligners in list separated by comma" + ) + sys.exit() + + clean = opts.clean + seqclean = op.join(opts.tgi_home, "seqclean") + + accn_extract = which(op.join(PASA_HOME, "misc_utilities", "accession_extractor.pl")) + launch_pasa = which(op.join(PASA_HOME, "scripts", "Launch_PASA_pipeline.pl")) + build_compreh_trans = which( + op.join(PASA_HOME, "scripts", "build_comprehensive_transcriptome.dbi") + ) + + fl_accs = opts.fl_accs + cpus = opts.cpus + grid = opts.grid + prepare, runfile = opts.prepare, "run.sh" + pctcov, pctid = opts.pctcov, opts.pctid + compreh_pctid = opts.compreh_pctid + compreh_pctcov, bpsplice = opts.compreh_pctcov, opts.bpsplice + + cmds = [] + + # set PASAHOME env variable if preparing shell script + if prepare: + env_cmd = 'export PASAHOME="{0}"'.format(PASA_HOME) + cmds.append(env_cmd) + + if ggfasta: + transcripts = FileMerger([dnfasta, ggfasta], tfasta).merge() + accn_extract_cmd = "cat {0} | {1} > {2}".format(dnfasta, accn_extract, tdn) + cmds.append(accn_extract_cmd) + if not prepare: + sh(accn_extract_cmd) + else: + symlink(dnfasta, tfasta) + transcripts = tfasta + + if opts.grid and not opts.threaded: + opts.threaded = opts.cpus + + prjobid = None + if clean: + ccpus = 16 if cpus >= 16 else cpus + cleancmd = "{0} {1} -c {2} -l 60".format(seqclean, transcripts, ccpus) + if prepare: + cmds.append(cleancmd) + else: + prjobid = sh(cleancmd, grid=grid, grid_opts=opts) + + aafw = must_open(aaconf, "w") + print( + alignAssembly_conf.format("{0}_pasa".format(pasa_db), pctcov, pctid, bpsplice), + file=aafw, + ) + aafw.close() + + symlink(genome, gfasta) + + aacmd = "{0} -c {1} -C -R -g {2}".format(launch_pasa, aaconf, gfasta) + aacmd += ( + " -t {0}.clean -T -u {0}".format(transcripts) + if clean + else " -t {0}".format(transcripts) + ) + if fl_accs: + symlink(fl_accs, flaccs) + aacmd += " -f {0}".format(flaccs) + if ggfasta: + aacmd += " --TDN {0}".format(tdn) + aacmd += " --ALIGNERS {0} -I {1} --CPU {2}".format( + ",".join(aligners), opts.intron, cpus + ) + + if prepare: + cmds.append(aacmd) + else: + opts.hold_jid = prjobid + prjobid = sh(aacmd, grid=grid, grid_opts=opts) + + if opts.compreh and ggfasta: + comprehcmd = "{0} -c {1} -t {2}".format( + build_compreh_trans, aaconf, transcripts + ) + comprehcmd += " --min_per_ID {0} --min_per_aligned {1}".format( + compreh_pctid, compreh_pctcov + ) + + if prepare: + cmds.append(comprehcmd) + else: + opts.hold_jid = prjobid + prjobid = sh(comprehcmd, grid=grid, grid_opts=opts) + + if prepare: + write_file(runfile, "\n".join(cmds)) # initialize run script + + +def compare(args): + """ + %prog compare pasa_db_name [--annots_gff3=annotation.gff3] + + Run the PASA annotation comparison pipeline + + This assumes that PASA alignment assembly has alredy been completed and + run directory contains `genome.fasta` and `transcript.fasta` files. + + If `--annots_gff3` is specified, the PASA database is loaded with the annotations + first before starting annotation comparison. Otherwise, it uses previously + loaded annotation data. + + Using the `--prepare` option creates a shell script with the run commands without + executing the pipeline + """ + p = OptionParser(compare.__doc__) + p.set_pasa_opts(action="compare") + p.add_argument( + "--prepare", + default=False, + action="store_true", + help="Prepare PASA run script with commands", + ) + p.set_grid() + p.set_grid_opts() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + (pasa_db,) = args + + PASA_HOME = opts.pasa_home + if not op.isdir(PASA_HOME): + logger.error("PASA_HOME={0} directory does not exist".format(PASA_HOME)) + sys.exit() + + launch_pasa = which(op.join(PASA_HOME, "scripts", "Launch_PASA_pipeline.pl")) + + annots_gff3 = opts.annots_gff3 + grid = opts.grid + prepare, runfile = opts.prepare, "run.sh" + + os.chdir(pasa_db) + + if prepare: + write_file(runfile, "", append=True, skipcheck=True) # initialize run script + + acfw = must_open(acconf, "w") + print( + annotCompare_conf.format( + "{0}_pasa".format(pasa_db), + opts.pctovl, + opts.pct_coding, + opts.pctid_prot, + opts.pctlen_FL, + opts.pctlen_nonFL, + opts.orf_size, + opts.pct_aln, + opts.pctovl_gene, + opts.stompovl, + opts.trust_FL, + opts.utr_exons, + ), + file=acfw, + ) + acfw.close() + + if not op.exists(gfasta): + sys.exit("Genome fasta file `{0}` does not exist".format(gfasta)) + + transcripts = tfasta + if not op.exists(transcripts): + sys.exit("Transcript fasta file `{0}` does not exist".format(transcripts)) + + if op.exists("{0}.clean".format(transcripts)): + transcripts = "{0}.clean".format(transcripts) + + accmd = "{0} -c {1} -A -g {2} -t {3} --GENETIC_CODE {4}".format( + launch_pasa, acconf, gfasta, transcripts, opts.genetic_code + ) + + if annots_gff3: + if not op.exists(annots_gff3): + sys.exit("Annotation gff3 file `{0}` does not exist".format(annots_gff3)) + symlink(annots_gff3, annotation) + accmd += " -L --annots_gff3 {0}".format(annotation) + + if prepare: + write_file(runfile, accmd, append=True) + else: + sh(accmd, grid=grid, grid_opts=opts) + + +def longest(args): + """ + %prog longest pasa.fasta output.subclusters.out + + Find the longest PASA assembly and label it as full-length. Also removes + transcripts shorter than half the length of the longest, or shorter than + 200bp. The assemblies for the same locus is found in + `output.subclusters.out`. In particular the lines that look like: + + sub-cluster: asmbl_25 asmbl_26 asmbl_27 + """ + from jcvi.formats.fasta import Fasta, SeqIO + from jcvi.formats.sizes import Sizes + + p = OptionParser(longest.__doc__) + p.add_argument( + "--prefix", + default="pasa", + help="Replace asmbl_ with prefix", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, subclusters = args + prefix = fastafile.rsplit(".", 1)[0] + + idsfile = prefix + ".fl.ids" + fw = open(idsfile, "w") + sizes = Sizes(fastafile).mapping + + name_convert = lambda x: x.replace("asmbl", opts.prefix) + + keep = set() # List of IDs to write + fp = open(subclusters) + nrecs = 0 + for row in fp: + if not row.startswith("sub-cluster:"): + continue + asmbls = row.split()[1:] + longest_asmbl = max(asmbls, key=lambda x: sizes[x]) + longest_size = sizes[longest_asmbl] + print(name_convert(longest_asmbl), file=fw) + nrecs += 1 + cutoff = max(longest_size / 2, 200) + keep.update(set(x for x in asmbls if sizes[x] >= cutoff)) + + fw.close() + logger.debug("{0} fl-cDNA records written to `{1}`.".format(nrecs, idsfile)) + + f = Fasta(fastafile, lazy=True) + newfastafile = prefix + ".clean.fasta" + fw = open(newfastafile, "w") + nrecs = 0 + for name, rec in f.iteritems_ordered(): + if name not in keep: + continue + + rec.id = name_convert(name) + rec.description = "" + SeqIO.write([rec], fw, "fasta") + nrecs += 1 + + fw.close() + logger.debug("{0} valid records written to `{1}`.".format(nrecs, newfastafile)) + + +def consolidate(args): + """ + %prog consolidate gffile1 gffile2 ... > consolidated.out + + Given 2 or more gff files generated by pasa annotation comparison, + iterate through each locus (shared locus name or overlapping CDS) + and identify same/different isoforms (shared splicing structure) + across the input datasets. + + If `slop` is enabled, consolidation will collapse any variation + in terminal UTR lengths, keeping the longest as representative. + """ + from jcvi.formats.base import longest_unique_prefix + from jcvi.formats.gff import make_index, match_subfeats + from jcvi.utils.cbook import AutoVivification + from jcvi.utils.grouper import Grouper + from itertools import combinations, product + + supported_modes = ["name", "coords"] + p = OptionParser(consolidate.__doc__) + p.add_argument( + "--slop", + default=False, + action="store_true", + help="allow minor variation in terminal 5'/3' UTR start/stop position", + ) + p.add_argument( + "--inferUTR", + default=False, + action="store_true", + help="infer presence of UTRs from exon coordinates", + ) + p.add_argument( + "--mode", + default="name", + choices=supported_modes, + help="method used to determine overlapping loci", + ) + p.add_argument( + "--summary", + default=False, + action="store_true", + help="Generate summary table of consolidation process", + ) + p.add_argument( + "--clusters", + default=False, + action="store_true", + help="Generate table of cluster members after consolidation", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + slop = opts.slop + inferUTR = opts.inferUTR + mode = opts.mode + + if len(args) < 2: + sys.exit(not p.print_help()) + + gffdbx = {} + for gffile in args: + dbn = longest_unique_prefix(gffile, args) + gffdbx[dbn] = make_index(gffile) + + loci = Grouper() + for dbn in gffdbx: + odbns = [odbn for odbn in gffdbx if dbn != odbn] + for gene in gffdbx[dbn].features_of_type("gene", order_by=("seqid", "start")): + if mode == "name": + loci.join(gene.id, (gene.id, dbn)) + else: + if (gene.id, dbn) not in loci: + loci.join((gene.id, dbn)) + gene_cds = list( + gffdbx[dbn].children(gene, featuretype="CDS", order_by="start") + ) + gene_cds_start, gene_cds_stop = gene_cds[0].start, gene_cds[-1].stop + for odbn in odbns: + for ogene_cds in gffdbx[odbn].region( + seqid=gene.seqid, + start=gene_cds_start, + end=gene_cds_stop, + strand=gene.strand, + featuretype="CDS", + ): + for ogene in gffdbx[odbn].parents( + ogene_cds, featuretype="gene" + ): + loci.join((gene.id, dbn), (ogene.id, odbn)) + + gfeats = {} + mrna = AutoVivification() + for i, locus in enumerate(loci): + gene = "gene_{0:0{pad}}".format(i, pad=6) if mode == "coords" else None + + for elem in locus: + if type(elem) == tuple: + _gene, dbn = elem + if gene is None: + gene = _gene + + g = gffdbx[dbn][_gene] + if gene not in gfeats: + gfeats[gene] = g + gfeats[gene].attributes["ID"] = [gene] + else: + if g.start < gfeats[gene].start: + gfeats[gene].start = g.start + if g.stop > gfeats[gene].stop: + gfeats[gene].stop = g.stop + + c = list( + gffdbx[dbn].children(_gene, featuretype="mRNA", order_by="start") + ) + if len(c) > 0: + mrna[gene][dbn] = c + + fw = must_open(opts.outfile, "w") + print("##gff-version 3", file=fw) + seen = {} + if opts.summary: + summaryfile = "{0}.summary.txt".format(opts.outfile.rsplit(".")[0]) + sfw = must_open(summaryfile, "w") + summary = ["id"] + summary.extend(gffdbx.keys()) + print("\t".join(str(x) for x in summary), file=sfw) + if opts.clusters: + clustersfile = "{0}.clusters.txt".format(opts.outfile.rsplit(".")[0]) + cfw = must_open(clustersfile, "w") + clusters = ["id", "dbns", "members", "trlens"] + print("\t".join(str(x) for x in clusters), file=cfw) + for gene in mrna: + g = Grouper() + dbns = list(combinations(mrna[gene], 2)) + if len(dbns) > 0: + for dbn1, dbn2 in dbns: + dbx1, dbx2 = gffdbx[dbn1], gffdbx[dbn2] + for mrna1, mrna2 in product(mrna[gene][dbn1], mrna[gene][dbn2]): + mrna1s, mrna2s = ( + mrna1.stop - mrna1.start + 1, + mrna2.stop - mrna2.start + 1, + ) + g.join((dbn1, mrna1.id, mrna1s)) + g.join((dbn2, mrna2.id, mrna2s)) + + if match_subfeats(mrna1, mrna2, dbx1, dbx2, featuretype="CDS"): + res = [] + ftypes = ( + ["exon"] + if inferUTR + else ["five_prime_UTR", "three_prime_UTR"] + ) + for ftype in ftypes: + res.append( + match_subfeats( + mrna1, + mrna2, + dbx1, + dbx2, + featuretype=ftype, + slop=slop, + ) + ) + + if all(res): + g.join((dbn1, mrna1.id, mrna1s), (dbn2, mrna2.id, mrna2s)) + else: + for dbn1 in mrna[gene]: + for mrna1 in mrna[gene][dbn1]: + g.join((dbn1, mrna1.id, mrna1.stop - mrna1.start + 1)) + + print(gfeats[gene], file=fw) + + for group in g: + group.sort(key=lambda x: x[2], reverse=True) + dbs, mrnas = [el[0] for el in group], [el[1] for el in group] + d, m = dbs[0], mrnas[0] + + dbid, _mrnaid = "|".join(str(x) for x in set(dbs)), [] + for x in mrnas: + if x not in _mrnaid: + _mrnaid.append(x) + mrnaid = "{0}|{1}".format(dbid, "-".join(_mrnaid)) + if mrnaid not in seen: + seen[mrnaid] = 0 + else: + seen[mrnaid] += 1 + mrnaid = "{0}-{1}".format(mrnaid, seen[mrnaid]) + + _mrna = gffdbx[d][m] + _mrna.attributes["ID"] = [mrnaid] + _mrna.attributes["Parent"] = [gene] + children = gffdbx[d].children(m, order_by="start") + print(_mrna, file=fw) + for child in children: + child.attributes["ID"] = ["{0}|{1}".format(dbid, child.id)] + child.attributes["Parent"] = [mrnaid] + print(child, file=fw) + + if opts.summary: + summary = [mrnaid] + summary.extend(["Y" if db in set(dbs) else "N" for db in gffdbx]) + print("\t".join(str(x) for x in summary), file=sfw) + + if opts.clusters: + clusters = [mrnaid] + clusters.append(",".join(str(el[0]) for el in group)) + clusters.append(",".join(str(el[1]) for el in group)) + clusters.append(",".join(str(el[2]) for el in group)) + print("\t".join(str(x) for x in clusters), file=cfw) + + fw.close() + if opts.summary: + sfw.close() + if opts.clusters: + cfw.close() + + +if __name__ == "__main__": + main() diff --git a/jcvi/annotation/qc.py b/jcvi/annotation/qc.py new file mode 100644 index 00000000..72d8b1d1 --- /dev/null +++ b/jcvi/annotation/qc.py @@ -0,0 +1,376 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Run quality control (QC) on gene annotation. MAKER output was used during +testing. Several aspects of annotation QC are implemented in this script. + +- Trim UTRs. MAKER sometimes predict UTRs that extend into other genes. +- Remove overlapping models. +""" +import sys + +from ..apps.base import ActionDispatcher, OptionParser +from ..formats.gff import ( + Gff, + get_piles, + make_index, + import_feats, + populate_children, + to_range, +) +from ..formats.base import must_open +from ..formats.sizes import Sizes +from ..utils.range import range_chain, range_minmax, range_overlap + + +def main(): + + actions = ( + ("trimUTR", "remove UTRs in the annotation set"), + ("uniq", "remove overlapping gene models"), + ("nmd", "identify transcript variant candidates for nonsense-mediated decay"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def uniq(args): + """ + %prog uniq gffile cdsfasta + + Remove overlapping gene models. Similar to formats.gff.uniq(), overlapping + 'piles' are processed, one by one. + + Here, we use a different algorithm, that retains the best non-overlapping + subset witin each pile, rather than single best model. Scoring function is + also different, rather than based on score or span, we optimize for the + subset that show the best combined score. Score is defined by: + + score = (1 - AED) * length + """ + + p = OptionParser(uniq.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gffile, cdsfasta = args + gff = Gff(gffile) + sizes = Sizes(cdsfasta).mapping + gene_register = {} + for g in gff: + if g.type != "mRNA": + continue + aed = float(g.attributes["_AED"][0]) + gene_register[g.parent] = (1 - aed) * sizes[g.accn] + + allgenes = import_feats(gffile) + g = get_piles(allgenes) + + bestids = set() + for group in g: + ranges = [to_range(x, score=gene_register[x.accn], id=x.accn) for x in group] + selected_chain, score = range_chain(ranges) + bestids |= set(x.id for x in selected_chain) + + removed = set(x.accn for x in allgenes) - bestids + fw = open("removed.ids", "w") + print("\n".join(sorted(removed)), file=fw) + fw.close() + populate_children(opts.outfile, bestids, gffile, "gene") + + +def get_cds_minmax(g, cid, level=2): + cds = [x for x in g.children(cid, level) if x.featuretype == "CDS"] + cdsranges = [(x.start, x.end) for x in cds] + return range_minmax(cdsranges) + + +def trim(c, start, end, trim5=False, trim3=False, both=True): + cstart, cend = c.start, c.end + # Trim coordinates for feature c based on overlap to start and end + if ((trim5 or both) and c.strand == "+") or ((trim3 or both) and c.strand == "-"): + c.start = max(cstart, start) + if ((trim3 or both) and c.strand == "+") or ((trim5 or both) and c.strand == "-"): + c.end = min(cend, end) + + if c.start != cstart or c.end != cend: + print( + c.id, + "trimmed [{0}, {1}] => [{2}, {3}]".format(cstart, cend, c.start, c.end), + file=sys.stderr, + ) + else: + print(c.id, "no change", file=sys.stderr) + + +def reinstate(c, rc, trim5=False, trim3=False, both=True): + cstart, cend = c.start, c.end + # reinstate coordinates for feature `c` based on reference feature `refc` + if ((trim5 or both) and c.strand == "+") or ((trim3 or both) and c.strand == "-"): + c.start = rc.start + if ((trim3 or both) and c.strand == "+") or ((trim5 or both) and c.strand == "-"): + c.end = rc.end + + if c.start != cstart or c.end != cend: + print( + c.id, + "reinstated [{0}, {1}] => [{2}, {3}]".format(cstart, cend, c.start, c.end), + file=sys.stderr, + ) + else: + print(c.id, "no change", file=sys.stderr) + + +def cmp_children(cid, gff, refgff, cftype="CDS"): + start, end = get_cds_minmax(gff, cid, level=1) + rstart, rend = get_cds_minmax(refgff, cid, level=1) + return ( + ((start == rstart) and (end == rend)) + and ( + len(list(gff.children(cid, featuretype=cftype))) + == len(list(refgff.children(cid, featuretype=cftype))) + ) + and ( + gff.children_bp(cid, child_featuretype=cftype) + == refgff.children_bp(cid, child_featuretype=cftype) + ) + ) + + +def fprint(c, fw): + if c.start > c.end: + print(c.id, "destroyed [{0} > {1}]".format(c.start, c.end), file=sys.stderr) + else: + print(c, file=fw) + + +def trimUTR(args): + """ + %prog trimUTR gffile + + Remove UTRs in the annotation set. + + If reference GFF3 is provided, reinstate UTRs from reference + transcripts after trimming. + + Note: After running trimUTR, it is advised to also run + `python -m jcvi.formats.gff fixboundaries` on the resultant GFF3 + to adjust the boundaries of all parent 'gene' features + """ + import gffutils + from jcvi.formats.base import SetFile + + p = OptionParser(trimUTR.__doc__) + p.add_argument( + "--trim5", + default=None, + type=str, + help="File containing gene list for 5' UTR trimming", + ) + p.add_argument( + "--trim3", + default=None, + type=str, + help="File containing gene list for 3' UTR trimming", + ) + p.add_argument( + "--trimrange", + default=None, + type=str, + help="File containing gene list for UTR trim back" + + "based on suggested (start, stop) coordinate range", + ) + p.add_argument( + "--refgff", + default=None, + type=str, + help="Reference GFF3 used as fallback to replace UTRs", + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + gff = make_index(gffile) + + trim_both = False if (opts.trim5 or opts.trim3) else True + trim5 = SetFile(opts.trim5) if opts.trim5 else set() + trim3 = SetFile(opts.trim3) if opts.trim3 else set() + trimrange = dict() + if opts.trimrange: + trf = must_open(opts.trimrange) + for tr in trf: + assert ( + len(tr.split("\t")) == 3 + ), "Must specify (start, stop) coordinate range" + id, start, stop = tr.split("\t") + trimrange[id] = (int(start), int(stop)) + trf.close() + + refgff = make_index(opts.refgff) if opts.refgff else None + + fw = must_open(opts.outfile, "w") + for feat in gff.iter_by_parent_childs( + featuretype="gene", order_by=("seqid", "start"), level=1 + ): + for c in feat: + cid, ctype, cparent = ( + c.id, + c.featuretype, + c.attributes.get("Parent", [None])[0], + ) + t5, t3 = False, False + if ctype == "gene": + t5 = True if cid in trim5 else False + t3 = True if cid in trim3 else False + start, end = get_cds_minmax(gff, cid) + trim(c, start, end, trim5=t5, trim3=t3, both=trim_both) + fprint(c, fw) + elif ctype == "mRNA": + utr_types, extras = [], set() + if any(id in trim5 for id in (cid, cparent)): + t5 = True + trim5.add(cid) + if any(id in trim3 for id in (cid, cparent)): + t3 = True + trim3.add(cid) + refc = None + if refgff: + try: + refc = refgff[cid] + refctype = refc.featuretype + refptype = refgff[refc.attributes["Parent"][0]].featuretype + if refctype == "mRNA" and refptype == "gene": + if cmp_children(cid, gff, refgff, cftype="CDS"): + reinstate(c, refc, trim5=t5, trim3=t3, both=trim_both) + if t5: + utr_types.append("five_prime_UTR") + if t3: + utr_types.append("three_prime_UTR") + for utr_type in utr_types: + for utr in refgff.children( + refc, featuretype=utr_type + ): + extras.add(utr) + for exon in refgff.region( + region=utr, featuretype="exon" + ): + if exon.attributes["Parent"][0] == cid: + extras.add(exon) + else: + refc = None + except gffutils.exceptions.FeatureNotFoundError: + pass + start, end = get_cds_minmax(gff, cid, level=1) + if cid in trimrange: + start, end = range_minmax([trimrange[cid], (start, end)]) + if not refc: + trim(c, start, end, trim5=t5, trim3=t3, both=trim_both) + fprint(c, fw) + for cc in gff.children(cid, order_by="start"): + _ctype = cc.featuretype + if _ctype not in utr_types: + if _ctype != "CDS": + if _ctype == "exon": + eskip = [ + range_overlap(to_range(cc), to_range(x)) + for x in extras + if x.featuretype == "exon" + ] + if any(eskip): + continue + trim(cc, start, end, trim5=t5, trim3=t3, both=trim_both) + fprint(cc, fw) + else: + fprint(cc, fw) + for x in extras: + fprint(x, fw) + fw.close() + + +def nmd(args): + """ + %prog nmd gffile + + Identify transcript variants which might be candidates for nonsense + mediated decay (NMD) + + A transcript is considered to be a candidate for NMD when the CDS stop + codon is located more than 50nt upstream of terminal splice site donor + + References: + http://www.nature.com/horizon/rna/highlights/figures/s2_spec1_f3.html + http://www.biomedcentral.com/1741-7007/7/23/figure/F1 + """ + from jcvi.utils.cbook import enumerate_reversed + + p = OptionParser(nmd.__doc__) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + gff = make_index(gffile) + + fw = must_open(opts.outfile, "w") + for gene in gff.features_of_type("gene", order_by=("seqid", "start")): + _enumerate = enumerate if gene.strand == "-" else enumerate_reversed + for mrna in gff.children(gene, featuretype="mRNA", order_by="start"): + tracker = dict() + tracker["exon"] = list( + gff.children(mrna, featuretype="exon", order_by="start") + ) + tracker["cds"] = [None] * len(tracker["exon"]) + + tcds_pos = None + for i, exon in _enumerate(tracker["exon"]): + for cds in gff.region( + region=exon, featuretype="CDS", completely_within=True + ): + if mrna.id in cds["Parent"]: + tracker["cds"][i] = cds + tcds_pos = i + break + if tcds_pos: + break + + NMD, distance = False, 0 + if (mrna.strand == "+" and tcds_pos + 1 < len(tracker["exon"])) or ( + mrna.strand == "-" and tcds_pos - 1 >= 0 + ): + tcds = tracker["cds"][tcds_pos] + texon = tracker["exon"][tcds_pos] + + PTC = tcds.end if mrna.strand == "+" else tcds.start + TDSS = texon.end if mrna.strand == "+" else texon.start + distance = abs(TDSS - PTC) + NMD = True if distance > 50 else False + + print( + "\t".join( + str(x) + for x in ( + gene.id, + mrna.id, + gff.children_bp(mrna, child_featuretype="CDS"), + distance, + NMD, + ) + ), + file=fw, + ) + + fw.close() + + +if __name__ == "__main__": + main() diff --git a/jcvi/annotation/reformat.py b/jcvi/annotation/reformat.py new file mode 100644 index 00000000..b7139a98 --- /dev/null +++ b/jcvi/annotation/reformat.py @@ -0,0 +1,1398 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Convert common output files from gene prediction software into gff3 format. + +Similar to the utilities in DAWGPAWS. + +""" +import os +import sys +import re + +from collections import defaultdict +from itertools import groupby, product + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + cleanup, + logger, + need_update, + popen, + sh, +) +from ..utils.cbook import AutoVivification +from ..utils.grouper import Grouper +from ..formats.bed import Bed, BedLine, sort +from ..formats.base import SetFile, flexible_cast, get_number, must_open + + +FRAME, RETAIN, OVERLAP, NEW = "FRAME", "RETAIN", "OVERLAP", "NEW" +PRIORITY = (FRAME, RETAIN, OVERLAP, NEW) + +new_id_pat = re.compile(r"^\d+\.[cemtx]+\S+") +atg_name_pat = re.compile( + r""" + ^(?P + (?:(?P\w+[\D\d\D])\.?)(?P[\d|C|M]+)(?P[A-z]+)(?P\d+) + ) + \.?(?P\d+)? + """, + re.VERBOSE, +) + + +class Stride(object): + """ + Allows four basic strides and three extended strides: + __. + 0 10 | + 0 5 10 | basic set of strides + 0 3 7 10 | + 0 2 5 8 10 __| + 0 2 4 6 8 10 | + 0 1 3 5 7 9 10 | extended set of strides + 01 23 45 67 89 10 __| + + We have main parameters, # we need, # available go through all possible + numbers excluding everything in black. + """ + + def __init__(self, needed, available, extended=False): + configurations = ("0", "05", "037", "0258") + if extended: + configurations += ("02468", "013579", "0123456789") + nneeded = len(needed) + self.conf = None + self.available = None + for c in configurations: + a = [x for x in available if str(x)[-1] in c] + if len(a) >= nneeded: + self.conf = c + self.available = a + break + + +class NameRegister(object): + def __init__(self, prefix="Medtr", pad0=6, uc=False): + self.black = set() + self.gaps = [] + self.prefix = prefix + self.pad0 = pad0 + self.uc = uc + + def get_blacklist(self, filename): + black = SetFile(filename) + for x in black: + chr, rank = atg_name(x) + self.black.add((chr, rank)) + + def get_gaps(self, filename): + self.gapfile = filename + + def allocate(self, info, chr, start_id, end_id, id_table, extended_stride=False): + + start_bp = info[0].start + end_bp = info[-1].end + + current_chr = chr_number(chr) + needed = info + assert end_id > start_id, "end ({0}) > start ({1})".format(end_id, start_id) + + spots = end_id - start_id - 1 + available = [ + x for x in range(start_id + 1, end_id) if (current_chr, x) not in self.black + ] + + message = "{0} need {1} ids, has {2} spots ({3} available)".format( + chr, len(needed), spots, len(available) + ) + + start_gene = gene_name( + current_chr, start_id, prefix=self.prefix, pad0=self.pad0, uc=self.uc + ) + end_gene = gene_name( + current_chr, end_id, prefix=self.prefix, pad0=self.pad0, uc=self.uc + ) + message += " between {0} - {1}\n".format(start_gene, end_gene) + + assert end_bp > start_bp + + b = "\t".join(str(x) for x in (chr, start_bp - 1, end_bp)) + cmd = "echo '{0}' |".format(b) + cmd += " intersectBed -a {0} -b stdin".format(self.gapfile) + gaps = list(BedLine(x) for x in popen(cmd, debug=False)) + ngaps = len(gaps) + + gapsexpanded = [] + GeneDensity = 10000.0 # assume 10Kb per gene + for gap in gaps: + gap_bp = int(gap.score) + gap_ids = int(round(gap_bp / GeneDensity)) + gapsexpanded += [gap] * gap_ids + + lines = sorted(info + gapsexpanded, key=lambda x: x.start) + + message += "between bp: {0} - {1}, there are {2} gaps (total {3} ids)".format( + start_bp, end_bp, ngaps, len(lines) + ) + + needed = lines + stride = Stride(needed, available, extended=extended_stride) + conf = stride.conf + message += " stride: {0}".format(conf) + print(message, file=sys.stderr) + + nneeded = len(needed) + if conf is None: # prefix rule - prepend version number for spills + magic = 400000 # version 4 + firstdigit = 100000 + step = 10 # stride for the prefixed ids + rank = start_id + magic + if rank > magic + firstdigit: + rank -= firstdigit + available = [] + while len(available) != nneeded: + rank += step + if (current_chr, rank) in self.black: # avoid blacklisted ids + continue + available.append(rank) + + else: # follow the best stride + available = stride.available + if start_id == 0: # follow right flank at start of chr + available = available[-nneeded:] + else: # follow left flank otherwise + available = available[:nneeded] + + # Finally assign the ids + assert len(needed) == len(available) + for b, rank in zip(needed, available): + name = gene_name( + current_chr, rank, prefix=self.prefix, pad0=self.pad0, uc=self.uc + ) + print("\t".join((str(b), name)), file=sys.stderr) + id_table[b.accn] = name + self.black.add((current_chr, rank)) + print(file=sys.stderr) + + +def main(): + + actions = ( + ("rename", "rename genes for annotation release"), + # perform following actions on list files + ("reindex", "reindex isoforms per gene locus"), + ("publocus", "create pub_locus identifiers according to GenBank specs"), + # Medicago gene renumbering + ("annotate", "annotation new bed file with features from old"), + ("renumber", "renumber genes for annotation updates"), + ("instantiate", "instantiate NEW genes tagged by renumber"), + ("plot", "plot gene identifiers along certain chromosome"), + # External gene prediction programs + ("augustus", "convert augustus output into gff3"), + ("tRNAscan", "convert tRNAscan-SE output into gff3"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def plot(args): + """ + %prog plot tagged.new.bed chr1 + + Plot gene identifiers along a particular chromosome, often to illustrate the + gene id assignment procedure. + """ + from jcvi.graphics.base import plt, savefig + from jcvi.graphics.chromosome import ChromosomeMap + + p = OptionParser(plot.__doc__) + p.add_argument("--firstn", type=int, help="Only plot the first N genes") + p.add_argument("--ymax", type=int, help="Y-axis max value") + p.add_argument("--log", action="store_true", help="Write plotting data") + opts, args, iopts = p.set_image_options(args, figsize="6x4") + + if len(args) != 2: + sys.exit(not p.print_help()) + + taggedbed, chr = args + bed = Bed(taggedbed) + beds = list(bed.sub_bed(chr)) + old, new = [], [] + i = 0 + for b in beds: + accn = b.extra[0] + if "te" in accn: + continue + + accn, tag = accn.split("|") + if tag == "OVERLAP": + continue + + c, r = atg_name(accn) + if tag == "NEW": + new.append((i, r)) + else: + old.append((i, r)) + i += 1 + + ngenes = i + assert ngenes == len(new) + len(old) + + logger.debug("Imported {0} ranks on {1}.".format(ngenes, chr)) + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + xstart, xend = 0.2, 0.8 + ystart, yend = 0.2, 0.8 + pad = 0.02 + + ngenes = opts.firstn or ngenes + ymax = opts.ymax or 500000 + + title = "Assignment of Medtr identifiers" + if opts.ymax: + subtitle = "{0}, first {1} genes".format(chr, ngenes) + else: + subtitle = "{0}, {1} genes ({2} new)".format(chr, ngenes, len(new)) + + chr_map = ChromosomeMap( + fig, root, xstart, xend, ystart, yend, pad, 0, ymax, 5, title, subtitle + ) + + ax = chr_map.axes + + if opts.log: + from jcvi.utils.table import write_csv + + header = ["x", "y"] + write_csv(header, new, filename=chr + ".new") + write_csv(header, old, filename=chr + ".old") + + x, y = zip(*new) + ax.plot(x, y, "b,") + x, y = zip(*old) + ax.plot(x, y, "r,") + + # Legends + ymid = (ystart + yend) / 2 + y = ymid + pad + root.plot([0.2], [y], "r.", lw=2) + root.text(0.2 + pad, y, "Existing Medtr ids", va="center", size=10) + y = ymid - pad + root.plot([0.2], [y], "b.", lw=2) + root.text(0.2 + pad, y, "Newly instantiated ids", va="center", size=10) + + ax.set_xlim(0, ngenes) + ax.set_ylim(0, ymax) + ax.set_axis_off() + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + image_name = chr + ".identifiers." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def instantiate(args): + """ + %prog instantiate tagged.bed blacklist.ids big_gaps.bed + + instantiate NEW genes tagged by renumber. + """ + p = OptionParser(instantiate.__doc__) + p.set_annot_reformat_opts() + p.add_argument( + "--extended_stride", + default=False, + action="store_true", + help="Toggle extended strides for gene numbering", + ) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + taggedbed, blacklist, gapsbed = args + r = NameRegister(prefix=opts.prefix, pad0=opts.pad0, uc=opts.uc) + r.get_blacklist(blacklist) + r.get_gaps(gapsbed) + + # Run through the bed, identify stretch of NEW ids to instantiate, + # identify the flanking FRAMEs, interpolate! + bed = Bed(taggedbed) + outputbed = taggedbed.rsplit(".", 1)[0] + ".new.bed" + fw = open(outputbed, "w") + + tagkey = lambda x: x.rsplit("|", 1)[-1] + for chr, sbed in bed.sub_beds(): + current_chr = chr_number(chr) + if not current_chr: + continue + + sbed = list(sbed) + + ranks = [] + for i, s in enumerate(sbed): + nametag = s.extra[0] + tag = tagkey(nametag) + + if tag in (NEW, FRAME): + ranks.append((i, nametag)) + + blocks = [] + for tag, names in groupby(ranks, key=lambda x: tagkey(x[-1])): + names = list(names) + if tag == NEW: + blocks.append((tag, [sbed[x[0]] for x in names])) + else: + start, end = names[0][-1], names[-1][-1] + start, end = ( + atg_name(start, retval="rank"), + atg_name(end, retval="rank"), + ) + blocks.append((tag, [start, end])) + + id_table = {} # old to new name conversion + for i, (tag, info) in enumerate(blocks): + if tag != NEW: + continue + + start_id = 0 if i == 0 else blocks[i - 1][1][-1] + end_id = start_id + 10000 if i == len(blocks) - 1 else blocks[i + 1][1][0] + + r.allocate( + info, + chr, + start_id, + end_id, + id_table, + extended_stride=opts.extended_stride, + ) + + # Output new names + for i, s in enumerate(sbed): + nametag = s.extra[0] + name, tag = nametag.split("|") + + if tag == NEW: + assert name == "." + name = id_table[s.accn] + elif tag == OVERLAP: + if name in id_table: + name = id_table[name] + + s.extra[0] = "|".join((name, tag)) + print(s, file=fw) + + fw.close() + + +def atg_name(name, retval="chr,rank", trimpad0=True): + seps = ["g", "te", "trna", "s", "u", "nc"] + pad0s = ["rank"] + + if name is not None: + m = re.match(atg_name_pat, name) + if m is not None and m.group("sep").lower() in seps: + retvals = [] + for grp in retval.split(","): + if grp == "chr": + val = chr_number(m.group(grp)) + else: + val = ( + get_number(m.group(grp)) + if trimpad0 and grp in pad0s + else m.group(grp) + ) + retvals.append(val) + + return (x for x in retvals) if len(retvals) > 1 else retvals[0] + + return (None for _ in retval.split(",")) + + +def gene_name(current_chr, x, prefix="Medtr", sep="g", pad0=6, uc=False): + identifier = "{0}{1}{2}{3:0{4}}".format(prefix, current_chr, sep, x, pad0) + if uc: + identifier = identifier.upper() + return identifier + + +def chr_number(chr): + chr_pat = re.compile( + r"(?P\D*)(?P[\d|C|M]+)$", re.VERBOSE | re.IGNORECASE + ) + + if chr is not None: + m = re.match(chr_pat, chr) + if m is not None: + return flexible_cast(m.group("chr")) + + return None + + +def prepare(bedfile): + """ + Remove prepended tags in gene names. + """ + pf = bedfile.rsplit(".", 1)[0] + abedfile = pf + ".a.bed" + bbedfile = pf + ".b.bed" + fwa = open(abedfile, "w") + fwb = open(bbedfile, "w") + + bed = Bed(bedfile) + seen = set() + for b in bed: + accns = b.accn.split(";") + new_accns = [] + for accn in accns: + if ":" in accn: + method, a = accn.split(":", 1) + if method in ("liftOver", "GMAP", ""): + accn = a + if accn in seen: + logger.error("Duplicate id {0} found. Ignored.".format(accn)) + continue + + new_accns.append(accn) + b.accn = accn + print(b, file=fwa) + seen.add(accn) + + b.accn = ";".join(new_accns) + print(b, file=fwb) + fwa.close() + fwb.close() + + +def renumber(args): + """ + %prog renumber Mt35.consolidated.bed > tagged.bed + + Renumber genes for annotation updates. + """ + from jcvi.algorithms.lis import longest_increasing_subsequence + + p = OptionParser(renumber.__doc__) + p.set_annot_reformat_opts() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + + pf = bedfile.rsplit(".", 1)[0] + abedfile = pf + ".a.bed" + bbedfile = pf + ".b.bed" + if need_update(bedfile, (abedfile, bbedfile)): + prepare(bedfile) + + mbed = Bed(bbedfile) + g = Grouper() + for s in mbed: + accn = s.accn + g.join(*accn.split(";")) + + bed = Bed(abedfile) + for chr, sbed in bed.sub_beds(): + current_chr = chr_number(chr) + if not current_chr: + continue + + ranks = [] + gg = set() + for s in sbed: + accn = s.accn + achr, arank = atg_name(accn) + if achr != current_chr: + continue + ranks.append(arank) + gg.add(accn) + + lranks = longest_increasing_subsequence(ranks) + print( + current_chr, + len(sbed), + "==>", + len(ranks), + "==>", + len(lranks), + file=sys.stderr, + ) + + granks = set( + gene_name(current_chr, x, prefix=opts.prefix, pad0=opts.pad0, uc=opts.uc) + for x in lranks + ) | set( + gene_name( + current_chr, x, prefix=opts.prefix, pad0=opts.pad0, sep="te", uc=opts.uc + ) + for x in lranks + ) + + tagstore = {} + for s in sbed: + achr, arank = atg_name(s.accn) + accn = s.accn + if accn in granks: + tag = (accn, FRAME) + elif accn in gg: + tag = (accn, RETAIN) + else: + tag = (".", NEW) + + tagstore[accn] = tag + + # Find cases where genes overlap + for s in sbed: + accn = s.accn + gaccn = g[accn] + tags = [((tagstore[x][-1] if x in tagstore else NEW), x) for x in gaccn] + group = [(PRIORITY.index(tag), x) for tag, x in tags] + best = min(group)[-1] + + if accn != best: + tag = (best, OVERLAP) + else: + tag = tagstore[accn] + + print("\t".join((str(s), "|".join(tag)))) + + +def annotate(args): + r""" + %prog annotate new.bed old.bed 2> log + + Annotate the `new.bed` with features from `old.bed` for the purpose of + gene numbering. + + Ambiguity in ID assignment can be resolved by either of the following 2 methods: + - `alignment`: make use of global sequence alignment score (calculated by `needle`) + - `overlap`: make use of overlap length (calculated by `intersectBed`) + + Transfer over as many identifiers as possible while following guidelines: + http://www.arabidopsis.org/portals/nomenclature/guidelines.jsp#editing + + Note: Following RegExp pattern describes the structure of the identifier + assigned to features in the `new.bed` file. + + new_id_pat = re.compile(r"^\d+\.[cemtx]+\S+") + + Examples: 23231.m312389, 23231.t004898, 23231.tRNA.144 + Adjust the value of `new_id_pat` manually as per your ID naming conventions. + """ + valid_resolve_choices = ["alignment", "overlap"] + + p = OptionParser(annotate.__doc__) + p.add_argument( + "--resolve", + default="alignment", + choices=valid_resolve_choices, + help="Resolve ID assignment based on a certain metric", + ) + p.add_argument( + "--atg_name", + default=False, + action="store_true", + help="Specify is locus IDs in `new.bed` file follow ATG nomenclature", + ) + + g1 = p.add_argument_group( + "Optional parameters (alignment):\n" + + "Use if resolving ambiguities based on sequence `alignment`", + ) + g1.add_argument( + "--pid", + dest="pid", + default=35.0, + type=float, + help="Percent identity cutoff", + ) + g1.add_argument( + "--score", + dest="score", + default=250.0, + type=float, + help="Alignment score cutoff", + ) + + g2 = p.add_argument_group( + "Optional parameters (overlap):\n" + + "Use if resolving ambiguities based on `overlap` length\n" + + "Parameters equivalent to `intersectBed`", + ) + g2.add_argument( + "-f", + dest="f", + default=0.5, + type=float, + help="Minimum overlap fraction (0.0 - 1.0)", + ) + g2.add_argument( + "-r", + dest="r", + default=False, + action="store_true", + help="Require fraction overlap to be reciprocal", + ) + g2.add_argument( + "-s", + dest="s", + default=True, + action="store_true", + help="Require same strandedness", + ) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + nbedfile, obedfile = args + npf, opf = nbedfile.rsplit(".", 1)[0], obedfile.rsplit(".", 1)[0] + + # Make consolidated.bed + cbedfile = "consolidated.bed" + if not os.path.isfile(cbedfile): + consolidate(nbedfile, obedfile, cbedfile) + else: + logger.warning("`{0}` already exists. Skipping step".format(cbedfile)) + + logger.warning( + "Resolving ID assignment ambiguity based on `{0}`".format(opts.resolve) + ) + + if opts.resolve == "alignment": + # Get pairs and prompt to run needle + pairsfile = "nw.pairs" + scoresfile = "nw.scores" + if not os.path.isfile(pairsfile): + get_pairs(cbedfile, pairsfile) + else: + logger.warning( + "`{0}` already exists. Checking for needle output".format(pairsfile) + ) + + # If needle scores do not exist, prompt user to run needle + if not os.path.isfile(scoresfile): + logger.error( + "`{0}` does not exist. Please process {1} using `needle`".format( + scoresfile, pairsfile + ) + ) + sys.exit() + else: + scoresfile = "ovl.scores" + # Calculate overlap length using intersectBed + calculate_ovl(nbedfile, obedfile, opts, scoresfile) + + logger.warning("`{0}' exists. Storing scores in memory".format(scoresfile)) + scores = read_scores(scoresfile, opts) + + # Iterate through consolidated bed and + # filter piles based on score + abedline = {} + + cbed = Bed(cbedfile) + g = Grouper() + for c in cbed: + accn = c.accn + g.join(*accn.split(";")) + + nbedline = {} + nbed = Bed(nbedfile) + for line in nbed: + nbedline[line.accn] = line + + splits = set() + for chr, chrbed in nbed.sub_beds(): + abedline, splits = annotate_chr(chr, chrbed, g, scores, abedline, opts, splits) + + if splits is not None: + abedline = process_splits(splits, scores, nbedline, abedline) + + abedfile = npf + ".annotated.bed" + afh = open(abedfile, "w") + for accn in abedline: + print(abedline[accn], file=afh) + afh.close() + + sort([abedfile, "-i"]) + + +def calculate_ovl(nbedfile, obedfile, opts, scoresfile): + from pybedtools import BedTool + + nbedtool = BedTool(nbedfile) + obedtool = BedTool(obedfile) + + ab = nbedtool.intersect(obedtool, wao=True, f=opts.f, r=opts.r, s=opts.s) + cmd = """cut -f4,5,10,13 | awk -F $'\t' 'BEGIN { OFS = FS } ($3 != "."){ print $1,$3,$2,$4; }'""" + sh(cmd, infile=ab.fn, outfile=scoresfile) + + +def read_scores(scoresfile, opts=None, sort=False, trimsuffix=True): + scores = {} + _pid, _score, resolve = ( + (0.0, 0.0, "alignment") + if opts is None + else (opts.pid, opts.score, opts.resolve) + ) + + fp = must_open(scoresfile) + logger.debug("Load scores file `{0}`".format(scoresfile)) + for row in fp: + (new, old, identity, score) = row.strip().split("\t") + if trimsuffix: + old = re.sub(r"\.\d+$", "", old) + if resolve == "alignment": + match = re.search(r"\d+/\d+\s+\(\s*(\d+\.\d+)%\)", identity) + pid = match.group(1) + if float(pid) < _pid or float(score) < _score: + continue + else: + pid = identity + + if new not in scores: + scores[new] = [] + + scores[new].append((new, old, float(pid), float(score))) + + if sort: + for new in scores: + scores[new].sort(key=lambda k: (-k[2], -k[3])) + + return scores + + +def annotate_chr(chr, chrbed, g, scores, abedline, opts, splits): + current_chr = chr_number(chr) + + for line in chrbed: + accn = line.accn + if accn not in g or (opts.atg_name and not current_chr): + abedline[accn] = line + continue + + gaccns = g[accn] + new = [a for a in gaccns if re.search(new_id_pat, a)] + newgrp = ";".join(sorted(new)) + + if accn in scores: + scores[accn] = sorted(scores[accn], key=lambda x: x[1]) + scores[accn] = sorted(scores[accn], key=lambda x: float(x[3]), reverse=True) + + accns = [] + print(accn, file=sys.stderr) + for elem in scores[accn]: + print("\t" + ", ".join([str(x) for x in elem[1:]]), file=sys.stderr) + if opts.atg_name: + achr, arank = atg_name(elem[1]) + if not achr or achr != current_chr: + continue + + accns.append(elem[1]) + if len(new) > 1: + if newgrp not in scores: + scores[newgrp] = [] + scores[newgrp].append(elem) + else: + accns[0:0] = [accn] + line.accn = ";".join([str(x) for x in accns]) + if len(scores[accn]) > 1: + break + + if len(new) > 1: + splits.add(newgrp) + else: + abedline[line.accn] = line + + return abedline, splits + + +def process_splits(splits, scores, nbedline, abedline): + for newgrp in splits: + new = newgrp.split(";") + print(new, file=sys.stderr) + if newgrp in scores: + best = {} + scores[newgrp] = sorted(scores[newgrp], key=lambda x: (x[0], x[1])) + scores[newgrp] = sorted( + scores[newgrp], key=lambda x: float(x[3]), reverse=True + ) + + for elem in scores[newgrp]: + if elem[1] not in best: + best[elem[1]] = elem[0] + + for n in new: + line = nbedline[n] + if n in scores: + accns = set() + scores[n] = sorted(scores[n], key=lambda x: x[1]) + scores[n] = sorted( + scores[n], key=lambda x: float(x[3]), reverse=True + ) + accns.add(n) + print("\t" + n, file=sys.stderr) + for elem in scores[n]: + if not elem[0] == n: + continue + print( + "\t\t" + ", ".join([str(x) for x in elem[1:]]), + file=sys.stderr, + ) + if elem[1] in best and n == best[elem[1]]: + accns.add(elem[1]) + accns = sorted(accns) + line.accn = ";".join([str(x) for x in accns]) + break + abedline[line.accn] = line + else: + for n in new: + abedline[n] = nbedline[n] + + return abedline + + +def get_pairs(cbedfile, pairsfile): + fp = open(pairsfile, "w") + bed = Bed(cbedfile) + for b in bed: + if ";" in b.accn: + genes = b.accn.split(";") + new = [x for x in genes if re.search(new_id_pat, x)] + old = [x for x in genes if not re.search(new_id_pat, x)] + for a, b in product(new, old): + print("\t".join((a, b)), file=fp) + + fp.close() + + +def consolidate(nbedfile, obedfile, cbedfile): + from pybedtools import BedTool + + nbedtool = BedTool(nbedfile) + obedtool = BedTool(obedfile) + + ab = nbedtool.intersect(obedtool, s=True, u=True) + ba = obedtool.intersect(nbedtool, s=True, u=True) + + cmd = "cat {0} {1} | sort -k1,1 -k2,2n".format(ab.fn, ba.fn) + fp = popen(cmd) + ovl = BedTool(fp.readlines()) + + abmerge = ovl.merge(s=True, nms=True, scores="mean").sort() + cmd = "cat {0}".format(abmerge.fn) + fp = popen(cmd, debug=False) + ovl = BedTool(fp.readlines()) + + notovl = nbedtool.intersect(ovl.sort(), s=True, v=True) + + infile = "{0} {1}".format(notovl.fn, ovl.fn) + tmpfile = "/tmp/reformat.{0}.bed".format(os.getpid()) + cmd = "sort -k1,1 -k2,2n" + sh(cmd, infile=infile, outfile=tmpfile) + + fp = open(cbedfile, "w") + bed = Bed(tmpfile) + for b in bed: + if ";" in b.accn: + accns = set() + for accn in b.accn.split(";"): + accns.add(accn) + b.accn = ";".join(accns) + print(b, file=fp) + fp.close() + cleanup(tmpfile) + + sort([cbedfile, "-i"]) + + +def rename(args): + """ + %prog rename genes.bed [gaps.bed] + + Rename genes for annotation release. + + For genes on chromosomes (e.g. the 12th gene on C1): + Bo1g00120 + + For genes on scaffolds (e.g. the 12th gene on unplaced Scaffold00285): + Bo00285s120 + + The genes identifiers will increment by 10. So assuming no gap, these are + the consecutive genes: + Bo1g00120, Bo1g00130, Bo1g00140... + Bo00285s120, Bo00285s130, Bo00285s140... + + When we encounter gaps, we would like the increment to be larger. For example, + Bo1g00120, , Bo1g01120... + + Gaps bed file is optional. + """ + import string + + p = OptionParser(rename.__doc__) + p.add_argument( + "-a", + dest="gene_increment", + default=10, + type=int, + help="Increment for continuous genes", + ) + p.add_argument( + "-b", + dest="gap_increment", + default=1000, + type=int, + help="Increment for gaps", + ) + p.add_argument( + "--pad0", + default=6, + type=int, + help="Pad gene identifiers with 0", + ) + p.add_argument( + "--spad0", + default=4, + type=int, + help="Pad gene identifiers on small scaffolds", + ) + p.add_argument("--prefix", default="Bo", help="Genome prefix") + p.add_argument( + "--jgi", + default=False, + action="store_true", + help="Create JGI style identifier PREFIX.NN[G|TE]NNNNN.1", + ) + opts, args = p.parse_args(args) + + if len(args) not in (1, 2): + sys.exit(not p.print_help()) + + genebed = args[0] + gapbed = args[1] if len(args) == 2 else None + prefix = opts.prefix + gene_increment = opts.gene_increment + gap_increment = opts.gap_increment + + genes = Bed(genebed) + if gapbed: + fp = open(gapbed) + for row in fp: + genes.append(BedLine(row)) + + genes.sort(key=genes.key) + idsfile = prefix + ".ids" + newbedfile = prefix + ".bed" + gap_increment -= gene_increment + assert gap_increment >= 0 + + if opts.jgi: + prefix += "." + fw = open(idsfile, "w") + for chr, lines in groupby(genes, key=lambda x: x.seqid): + lines = list(lines) + pad0 = opts.pad0 if len(lines) > 1000 else opts.spad0 + isChr = chr[0].upper() == "C" + digits = "".join(x for x in chr if x in string.digits) + gs = "g" if isChr else "s" + pp = prefix + digits + gs + idx = 0 + if isChr: + idx += gap_increment + + for r in lines: + isGap = r.strand not in ("+", "-") + if isGap: + idx += gap_increment + continue + else: + idx += gene_increment + accn = pp + "{0:0{1}d}".format(idx, pad0) + oldaccn = r.accn + print("\t".join((oldaccn, accn)), file=fw) + r.accn = accn + + genes.print_to_file(newbedfile) + logger.debug("Converted IDs written to `{0}`.".format(idsfile)) + logger.debug("Converted bed written to `{0}`.".format(newbedfile)) + + +def parse_prefix(identifier): + """ + Parse identifier such as a|c|le|d|li|re|or|AT4G00480.1 and return + tuple of prefix string (separated at '|') and suffix (AGI identifier) + """ + pf, id = (), identifier + if "|" in identifier: + pf, id = tuple(identifier.split("|")[:-1]), identifier.split("|")[-1] + + return pf, id + + +def reindex(args): + """ + %prog reindex gffile pep.fasta ref.pep.fasta + + Reindex the splice isoforms (mRNA) in input GFF file, preferably + generated after PASA annotation update + + In the input GFF file, there can be several types of mRNA within a locus: + * CDS matches reference, UTR extended, inherits reference mRNA ID + * CDS (slightly) different from reference, inherits reference mRNA ID + * Novel isoform added by PASA, have IDs like "LOCUS.1.1", "LOCUS.1.2" + * Multiple mRNA collapsed due to shared structure, have IDs like "LOCUS.1-LOCUS.1.1" + + In the case of multiple mRNA which have inherited the same reference mRNA ID, + break ties by comparing the new protein with the reference protein using + EMBOSS `needle` to decide which mRNA retains ID and which is assigned a new ID. + + All mRNA identifiers should follow the AGI naming conventions. + + When reindexing the isoform identifiers, order mRNA based on: + * decreasing transcript length + * decreasing support from multiple input datasets used to run pasa.consolidate() + """ + from jcvi.formats.gff import make_index + from jcvi.formats.fasta import Fasta + from jcvi.apps.emboss import needle + from tempfile import mkstemp + + p = OptionParser(reindex.__doc__) + p.add_argument( + "--scores", type=str, help="read from existing EMBOSS `needle` scores file" + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + ( + gffile, + pep, + refpep, + ) = args + gffdb = make_index(gffile) + reffasta = Fasta(refpep) + + if not opts.scores: + fh, pairsfile = mkstemp(prefix="pairs", suffix=".txt", dir=".") + fw = must_open(pairsfile, "w") + + conflict, novel = AutoVivification(), {} + for gene in gffdb.features_of_type("gene", order_by=("seqid", "start")): + geneid = atg_name(gene.id, retval="locus") + novel[geneid] = [] + updated_mrna, hybrid_mrna = [], [] + for mrna in gffdb.children( + gene, featuretype="mRNA", order_by=("seqid", "start") + ): + if re.match(atg_name_pat, mrna.id) is not None and "_" not in mrna.id: + pf, mrnaid = parse_prefix(mrna.id) + mlen = gffdb.children_bp(mrna, child_featuretype="exon") + if "-" in mrna.id: + hybrid_mrna.append((mrna.id, mrna.start, mlen, len(pf))) + else: + updated_mrna.append((mrna.id, mrna.start, mlen, len(pf))) + + for mrna in sorted(updated_mrna, key=lambda k: (k[1], -k[2], -k[3])): + pf, mrnaid = parse_prefix(mrna[0]) + mstart, mlen = mrna[1], mrna[2] + + iso = atg_name(mrnaid, retval="iso") + newiso = "{0}{1}".format(iso, re.sub(atg_name_pat, "", mrnaid)) + if iso == newiso: + if iso not in conflict[geneid]: + conflict[geneid][iso] = [] + conflict[geneid][iso].append( + (mrna[0], iso, newiso, mstart, mlen, len(pf)) + ) + else: + novel[geneid].append((mrna[0], None, newiso, mstart, mlen, len(pf))) + + for mrna in sorted(hybrid_mrna, key=lambda k: (k[1], -k[2], -k[3])): + pf, mrnaid = parse_prefix(mrna[0]) + mstart, mlen = mrna[1], mrna[2] + + _iso, _newiso = [], [] + for id in sorted(mrnaid.split("-")): + a = atg_name(id, retval="iso") + b = "{0}{1}".format(a, re.sub(atg_name_pat, "", id)) + _iso.append(a) + _newiso.append(b) + + _novel = None + newiso = "-".join(str(x) for x in set(_newiso)) + for iso, niso in zip(_iso, _newiso): + if iso == niso: + if iso not in conflict[geneid]: + conflict[geneid][iso] = [ + (mrna[0], iso, newiso, mstart, mlen, len(pf)) + ] + _novel = None + break + + _novel = True + + if _novel is not None: + novel[geneid].append((mrna[0], None, newiso, mstart, mlen, len(pf))) + + if not opts.scores: + for isoform in sorted(conflict[geneid]): + mrnaid = "{0}.{1}".format(geneid, isoform) + if mrnaid in reffasta.keys(): + for mrna in conflict[geneid][isoform]: + print("\t".join(str(x) for x in (mrnaid, mrna[0])), file=fw) + + if not opts.scores: + fw.close() + needle([pairsfile, refpep, pep]) + cleanup(pairsfile) + scoresfile = "{0}.scores".format(pairsfile.rsplit(".")[0]) + else: + scoresfile = opts.scores + + scores = read_scores(scoresfile, sort=True, trimsuffix=False) + + primary = {} + for geneid in conflict: + primary[geneid] = [] + for iso in sorted(conflict[geneid]): + conflict[geneid][iso].sort(key=lambda k: (k[3], -k[4], -k[5])) + _iso = "{0}.{1}".format(geneid, iso) + if _iso not in scores: + novel[geneid].extend(conflict[geneid][iso]) + continue + top_score = scores[_iso][0][1] + result = next( + (i for i, v in enumerate(conflict[geneid][iso]) if v[0] == top_score), + None, + ) + if result is not None: + primary[geneid].append(conflict[geneid][iso][result]) + del conflict[geneid][iso][result] + if geneid not in novel: + novel[geneid] = [] + novel[geneid].extend(conflict[geneid][iso]) + novel[geneid].sort(key=lambda k: (k[3], -k[4], -k[5])) + + fw = must_open(opts.outfile, "w") + for gene in gffdb.features_of_type("gene", order_by=("seqid", "start")): + geneid = gene.id + print(gene, file=fw) + seen = [] + if geneid in primary: + all_mrna = primary[geneid] + all_mrna.extend(novel[geneid]) + for iso, mrna in enumerate(all_mrna): + _mrna = gffdb[mrna[0]] + _iso = mrna[1] + if mrna not in novel[geneid]: + seen.append(int(mrna[1])) + else: + mseen = 0 if len(seen) == 0 else max(seen) + _iso = (mseen + iso + 1) - len(seen) + + _mrnaid = "{0}.{1}".format(geneid, _iso) + _mrna["ID"], _mrna["_old_ID"] = [_mrnaid], [_mrna.id] + + print(_mrna, file=fw) + for c in gffdb.children(_mrna, order_by="start"): + c["Parent"] = [_mrnaid] + print(c, file=fw) + else: + for feat in gffdb.children(gene, order_by=("seqid", "start")): + print(feat, file=fw) + + fw.close() + + +def publocus(args): + """ + %prog publocus idsfile > idsfiles.publocus + + Given a list of model identifiers, convert each into a GenBank approved + pub_locus. + + Example output: + Medtr1g007020.1 MTR_1g007020 + Medtr1g007030.1 MTR_1g007030 + Medtr1g007060.1 MTR_1g007060A + Medtr1g007060.2 MTR_1g007060B + """ + p = OptionParser(publocus.__doc__) + p.add_argument("--locus_tag", default="MTR_", help="GenBank locus tag") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + locus_tag = opts.locus_tag + + index = AutoVivification() + (idsfile,) = args + fp = must_open(idsfile) + for row in fp: + locus, chrom, sep, rank, iso = atg_name(row, retval="locus,chr,sep,rank,iso") + if None in (locus, chrom, sep, rank, iso): + logger.warning("{0} is not a valid gene model identifier".format(row)) + continue + if locus not in index.keys(): + pub_locus = gene_name(chrom, rank, prefix=locus_tag, sep=sep) + index[locus]["pub_locus"] = pub_locus + index[locus]["isos"] = set() + + index[locus]["isos"].add(int(iso)) + + for locus in index: + pub_locus = index[locus]["pub_locus"] + index[locus]["isos"] = sorted(index[locus]["isos"]) + if len(index[locus]["isos"]) > 1: + new = [chr(n + 64) for n in index[locus]["isos"] if n < 27] + for i, ni in zip(index[locus]["isos"], new): + print( + "\t".join( + x + for x in ( + "{0}.{1}".format(locus, i), + "{0}{1}".format(pub_locus, ni), + ) + ) + ) + else: + print( + "\t".join( + x + for x in ( + "{0}.{1}".format(locus, index[locus]["isos"][0]), + pub_locus, + ) + ) + ) + + +def augustus(args): + """ + %prog augustus augustus.gff3 > reformatted.gff3 + + AUGUSTUS does generate a gff3 (--gff3=on) but need some refinement. + """ + from jcvi.formats.gff import Gff + + p = OptionParser(augustus.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (ingff3,) = args + gff = Gff(ingff3) + fw = must_open(opts.outfile, "w") + seen = defaultdict(int) + for g in gff: + if g.type not in ("gene", "transcript", "CDS"): + continue + + if g.type == "transcript": + g.type = "mRNA" + + prefix = g.seqid + "_" + pid = prefix + g.id + newid = "{0}-{1}".format(pid, seen[pid]) if pid in seen else pid + seen[pid] += 1 + g.attributes["ID"] = [newid] + g.attributes["Parent"] = [(prefix + x) for x in g.attributes["Parent"]] + g.update_attributes() + print(g, file=fw) + fw.close() + + +def tRNAscan(args): + """ + %prog tRNAscan all.trna > all.trna.gff3 + + Convert tRNAscan-SE output into gff3 format. + + Sequence tRNA Bounds tRNA Anti Intron Bounds Cove + Name tRNA # Begin End Type Codon Begin End Score + -------- ------ ---- ------ ---- ----- ----- ---- ------ + 23231 1 335355 335440 Tyr GTA 335392 335404 69.21 + 23231 2 1076190 1076270 Leu AAG 0 0 66.33 + + Conversion based on PERL one-liner in: + + """ + from jcvi.formats.gff import sort + + p = OptionParser(tRNAscan.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (trnaout,) = args + gffout = trnaout + ".gff3" + fp = open(trnaout) + fw = open(gffout, "w") + + next(fp) + next(fp) + row = next(fp) + assert row.startswith("--------") + + for row in fp: + atoms = [x.strip() for x in row.split("\t")] + contig, trnanum, start, end, aa, codon, intron_start, intron_end, score = atoms + + start, end = int(start), int(end) + orientation = "+" + if start > end: + start, end = end, start + orientation = "-" + + source = "tRNAscan" + type = "tRNA" + if codon == "???": + codon = "XXX" + + comment = "ID={0}.tRNA.{1};Name=tRNA-{2} (anticodon: {3})".format( + contig, trnanum, aa, codon + ) + + print( + "\t".join( + str(x) + for x in ( + contig, + source, + type, + start, + end, + score, + orientation, + ".", + comment, + ) + ), + file=fw, + ) + + fw.close() + sort([gffout, "-i"]) + + +if __name__ == "__main__": + main() diff --git a/jcvi/annotation/stats.py b/jcvi/annotation/stats.py new file mode 100644 index 00000000..ee60436d --- /dev/null +++ b/jcvi/annotation/stats.py @@ -0,0 +1,386 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Collect gene statistics based on gff file: +Exon length, Intron length, Gene length, Exon count +""" +import os.path as op +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir, need_update +from ..formats.base import DictFile, must_open +from ..formats.fasta import Fasta +from ..formats.gff import make_index +from ..utils.cbook import SummaryStats, human_size, percentage +from ..utils.range import range_interleave +from ..utils.table import tabulate + + +metrics = ("Exon_Length", "Intron_Length", "Gene_Length", "Exon_Count") + + +class GeneStats(object): + def __init__(self, feat, conf_class, transcript_sizes, exons): + self.fid = feat.id + self.conf_class = conf_class + self.num_exons = len(exons) + self.num_transcripts = len(transcript_sizes) + self.locus_size = feat.stop - feat.start + 1 + self.cum_transcript_size = sum(transcript_sizes) + self.cum_exon_size = sum((stop - start + 1) for (c, start, stop) in exons) + + def __str__(self): + return "\t".join( + str(x) + for x in ( + self.fid, + self.conf_class, + self.num_exons, + self.num_transcripts, + self.locus_size, + self.cum_transcript_size, + self.cum_exon_size, + ) + ) + + +def main(): + + actions = ( + ("stats", "collect gene statistics based on gff file"), + ("statstable", "print gene statistics table based on output of stats"), + ("histogram", "plot gene statistics based on output of stats"), + # summary tables of various styles + ("genestats", "print detailed gene statistics"), + ("summary", "print detailed gene/exon/intron statistics"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def gc(seqs): + gc = total = 0 + for s in seqs: + s = s.upper() + gc += s.count("G") + s.count("C") + total += sum(s.count(x) for x in "ACGT") + return percentage(gc, total, precision=0, mode=-1) + + +def summary(args): + """ + %prog summary gffile fastafile + + Print summary stats, including: + - Gene/Exon/Intron + - Number + - Average size (bp) + - Median size (bp) + - Total length (Mb) + - % of genome + - % GC + """ + p = OptionParser(summary.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gff_file, ref = args + s = Fasta(ref) + g = make_index(gff_file) + geneseqs, exonseqs, intronseqs = [], [], [] # Calc % GC + for f in g.features_of_type("gene"): + fid = f.id + fseq = s.sequence({"chr": f.chrom, "start": f.start, "stop": f.stop}) + geneseqs.append(fseq) + exons = set( + (c.chrom, c.start, c.stop) + for c in g.children(fid, 2) + if c.featuretype == "exon" + ) + exons = list(exons) + for chrom, start, stop in exons: + fseq = s.sequence({"chr": chrom, "start": start, "stop": stop}) + exonseqs.append(fseq) + introns = range_interleave(exons) + for chrom, start, stop in introns: + fseq = s.sequence({"chr": chrom, "start": start, "stop": stop}) + intronseqs.append(fseq) + + r = {} # Report + for t, tseqs in zip(("Gene", "Exon", "Intron"), (geneseqs, exonseqs, intronseqs)): + tsizes = [len(x) for x in tseqs] + tsummary = SummaryStats(tsizes, dtype=int) + r[t, "Number"] = tsummary.size + r[t, "Average size (bp)"] = tsummary.mean + r[t, "Median size (bp)"] = tsummary.median + r[t, "Total length (Mb)"] = human_size(tsummary.sum, precision=0, target="Mb") + r[t, "% of genome"] = percentage( + tsummary.sum, s.totalsize, precision=0, mode=-1 + ) + r[t, "% GC"] = gc(tseqs) + + print(tabulate(r), file=sys.stderr) + + +def genestats(args): + """ + %prog genestats gffile + + Print summary stats, including: + - Number of genes + - Number of single-exon genes + - Number of multi-exon genes + - Number of distinct exons + - Number of genes with alternative transcript variants + - Number of predicted transcripts + - Mean number of distinct exons per gene + - Mean number of transcripts per gene + - Mean gene locus size (first to last exon) + - Mean transcript size (UTR, CDS) + - Mean exon size + + Stats modeled after barley genome paper Table 1. + A physical, genetic and functional sequence assembly of the barley genome + """ + p = OptionParser(genestats.__doc__) + p.add_argument( + "--groupby", default="conf_class", help="Print separate stats groupby" + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gff_file,) = args + gb = opts.groupby + g = make_index(gff_file) + + tf = gff_file + ".transcript.sizes" + if need_update(gff_file, tf): + fw = open(tf, "w") + for feat in g.features_of_type("mRNA"): + fid = feat.id + conf_class = feat.attributes.get(gb, "all") + tsize = sum( + (c.stop - c.start + 1) + for c in g.children(fid, 1) + if c.featuretype == "exon" + ) + print("\t".join((fid, str(tsize), conf_class)), file=fw) + fw.close() + + tsizes = DictFile(tf, cast=int) + conf_classes = DictFile(tf, valuepos=2) + logger.debug("A total of {0} transcripts populated.".format(len(tsizes))) + + genes = [] + for feat in g.features_of_type("gene"): + fid = feat.id + transcripts = [c.id for c in g.children(fid, 1) if c.featuretype == "mRNA"] + if len(transcripts) == 0: + continue + transcript_sizes = [tsizes[x] for x in transcripts] + exons = set( + (c.chrom, c.start, c.stop) + for c in g.children(fid, 2) + if c.featuretype == "exon" + ) + conf_class = conf_classes[transcripts[0]] + gs = GeneStats(feat, conf_class, transcript_sizes, exons) + genes.append(gs) + + r = {} # Report + distinct_groups = set(conf_classes.values()) + for g in distinct_groups: + num_genes = num_single_exon_genes = num_multi_exon_genes = 0 + num_genes_with_alts = num_transcripts = num_exons = max_transcripts = 0 + cum_locus_size = cum_transcript_size = cum_exon_size = 0 + for gs in genes: + if gs.conf_class != g: + continue + num_genes += 1 + if gs.num_exons == 1: + num_single_exon_genes += 1 + else: + num_multi_exon_genes += 1 + num_exons += gs.num_exons + if gs.num_transcripts > 1: + num_genes_with_alts += 1 + if gs.num_transcripts > max_transcripts: + max_transcripts = gs.num_transcripts + num_transcripts += gs.num_transcripts + cum_locus_size += gs.locus_size + cum_transcript_size += gs.cum_transcript_size + cum_exon_size += gs.cum_exon_size + + mean_num_exons = num_exons * 1.0 / num_genes + mean_num_transcripts = num_transcripts * 1.0 / num_genes + mean_locus_size = cum_locus_size * 1.0 / num_genes + mean_transcript_size = cum_transcript_size * 1.0 / num_transcripts + mean_exon_size = cum_exon_size * 1.0 / num_exons if num_exons != 0 else 0 + + r[("Number of genes", g)] = num_genes + r[("Number of single-exon genes", g)] = percentage( + num_single_exon_genes, num_genes, mode=1 + ) + r[("Number of multi-exon genes", g)] = percentage( + num_multi_exon_genes, num_genes, mode=1 + ) + r[("Number of distinct exons", g)] = num_exons + r[("Number of genes with alternative transcript variants", g)] = percentage( + num_genes_with_alts, num_genes, mode=1 + ) + r[("Number of predicted transcripts", g)] = num_transcripts + r[("Mean number of distinct exons per gene", g)] = mean_num_exons + r[("Mean number of transcripts per gene", g)] = mean_num_transcripts + r[("Max number of transcripts per gene", g)] = max_transcripts + r[("Mean gene locus size (first to last exon)", g)] = mean_locus_size + r[("Mean transcript size (UTR, CDS)", g)] = mean_transcript_size + r[("Mean exon size", g)] = mean_exon_size + + fw = must_open(opts.outfile, "w") + print(tabulate(r), file=fw) + fw.close() + + +def statstable(args): + """ + %prog statstable *.gff + + Print gene statistics table. + """ + p = OptionParser(statstable.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + gff_files = args + for metric in metrics: + logger.debug("Parsing files in `{0}`..".format(metric)) + + table = {} + for x in gff_files: + pf = op.basename(x).split(".")[0] + numberfile = op.join(metric, pf + ".txt") + ar = [int(x.strip()) for x in open(numberfile)] + sum = SummaryStats(ar).todict().items() + keys, vals = zip(*sum) + keys = [(pf, x) for x in keys] + table.update(dict(zip(keys, vals))) + + print(tabulate(table), file=sys.stderr) + + +def histogram(args): + """ + %prog histogram *.gff + + Plot gene statistics based on output of stats. For each gff file, look to + see if the metrics folder (i.e. Exon_Length) contains the data and plot + them. + """ + from jcvi.graphics.histogram import histogram_multiple + + p = OptionParser(histogram.__doc__) + p.add_argument( + "--bins", + dest="bins", + default=40, + type=int, + help="number of bins to plot in the histogram", + ) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + gff_files = args + # metrics = ("Exon_Length", "Intron_Length", "Gene_Length", "Exon_Count") + colors = ("red", "green", "blue", "black") + vmaxes = (1000, 1000, 4000, 20) + xlabels = ("bp", "bp", "bp", "number") + for metric, color, vmax, xlabel in zip(metrics, colors, vmaxes, xlabels): + logger.debug("Parsing files in `{0}`..".format(metric)) + numberfiles = [ + op.join(metric, op.basename(x).split(".")[0] + ".txt") for x in gff_files + ] + + histogram_multiple( + numberfiles, + 0, + vmax, + xlabel, + metric, + bins=opts.bins, + facet=True, + fill=color, + prefix=metric + ".", + ) + + +def stats(args): + """ + %prog stats infile.gff + + Collect gene statistics based on gff file. There are some terminology issues + here and so normally we call "gene" are actually mRNA, and sometimes "exon" + are actually CDS, but they are configurable. + + Thee numbers are written to text file in four separate folders, + corresponding to the four metrics: + + Exon length, Intron length, Gene length, Exon count + + With data written to disk then you can run %prog histogram + """ + p = OptionParser(stats.__doc__) + p.add_argument("--gene", default="mRNA", help="The gene type") + p.add_argument("--exon", default="CDS", help="The exon type") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gff_file,) = args + g = make_index(gff_file) + exon_lengths = [] + intron_lengths = [] + gene_lengths = [] + exon_counts = [] + for feat in g.features_of_type(opts.gene): + exons = [] + for c in g.children(feat.id, 1): + if c.featuretype != opts.exon: + continue + exons.append((c.chrom, c.start, c.stop)) + introns = range_interleave(exons) + feat_exon_lengths = [(stop - start + 1) for (chrom, start, stop) in exons] + feat_intron_lengths = [(stop - start + 1) for (chrom, start, stop) in introns] + exon_lengths += feat_exon_lengths + intron_lengths += feat_intron_lengths + gene_lengths.append(sum(feat_exon_lengths)) + exon_counts.append(len(feat_exon_lengths)) + + a = SummaryStats(exon_lengths) + b = SummaryStats(intron_lengths) + c = SummaryStats(gene_lengths) + d = SummaryStats(exon_counts) + for x, title in zip((a, b, c, d), metrics): + x.title = title + print(x, file=sys.stderr) + + prefix = gff_file.split(".")[0] + for x in (a, b, c, d): + dirname = x.title + mkdir(dirname) + txtfile = op.join(dirname, prefix + ".txt") + x.tofile(txtfile) + + +if __name__ == "__main__": + main() diff --git a/jcvi/annotation/train.py b/jcvi/annotation/train.py new file mode 100644 index 00000000..40c35b3d --- /dev/null +++ b/jcvi/annotation/train.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Train ab initio gene predictors. +""" +import os +import os.path as op +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir, need_update, sh + + +def main(): + + actions = ( + ("pasa", "extract pasa training models"), + ("snap", "train snap model"), + ("augustus", "train augustus model"), + ("genemark", "train genemark model"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def pasa(args): + """ + %prog ${pasadb}.assemblies.fasta ${pasadb}.pasa_assemblies.gff3 + + Wraps `pasa_asmbls_to_training_set.dbi`. + """ + from jcvi.formats.base import SetFile + from jcvi.formats.gff import Gff + + p = OptionParser(pasa.__doc__) + p.set_home("pasa") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, gffile = args + transcodergff = fastafile + ".transdecoder.gff3" + transcodergenomegff = fastafile + ".transdecoder.genome.gff3" + if need_update((fastafile, gffile), (transcodergff, transcodergenomegff)): + cmd = "{0}/scripts/pasa_asmbls_to_training_set.dbi".format(opts.pasa_home) + cmd += " --pasa_transcripts_fasta {0} --pasa_transcripts_gff3 {1}".format( + fastafile, gffile + ) + sh(cmd) + + completeids = fastafile.rsplit(".", 1)[0] + ".complete.ids" + if need_update(transcodergff, completeids): + cmd = "grep complete {0} | cut -f1 | sort -u".format(transcodergff) + sh(cmd, outfile=completeids) + + complete = SetFile(completeids) + seen = set() + completegff = transcodergenomegff.rsplit(".", 1)[0] + ".complete.gff3" + fw = open(completegff, "w") + gff = Gff(transcodergenomegff) + for g in gff: + a = g.attributes + if "Parent" in a: + id = a["Parent"][0] + else: + id = a["ID"][0] + asmbl_id = id.split("|")[0] + if asmbl_id not in complete: + continue + print(g, file=fw) + if g.type == "gene": + seen.add(id) + + fw.close() + logger.debug( + "A total of {0} complete models extracted to `{1}`.".format( + len(seen), completegff + ) + ) + + +def genemark(args): + """ + %prog genemark species fastafile + + Train GENEMARK model given fastafile. GENEMARK self-trains so no trainig + model gff file is needed. + """ + p = OptionParser(genemark.__doc__) + p.add_argument("--junctions", help="Path to `junctions.bed` from Tophat2") + p.set_home("gmes") + p.set_cpus(cpus=32) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + species, fastafile = args + junctions = opts.junctions + mhome = opts.gmes_home + + license = op.expanduser("~/.gm_key") + assert op.exists(license), "License key ({0}) not found!".format(license) + cmd = "{0}/gmes_petap.pl --sequence {1}".format(mhome, fastafile) + cmd += " --cores {0}".format(opts.cpus) + if junctions: + intronsgff = "introns.gff" + if need_update(junctions, intronsgff): + jcmd = "{0}/bet_to_gff.pl".format(mhome) + jcmd += " --bed {0} --gff {1} --label Tophat2".format(junctions, intronsgff) + sh(jcmd) + cmd += " --ET {0} --et_score 10".format(intronsgff) + else: + cmd += " --ES" + sh(cmd) + + logger.debug("GENEMARK matrix written to `output/gmhmm.mod") + + +def snap(args): + """ + %prog snap species gffile fastafile + + Train SNAP model given gffile and fastafile. Whole procedure taken from: + + """ + p = OptionParser(snap.__doc__) + p.set_home("maker") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + species, gffile, fastafile = args + gffile = os.path.abspath(gffile) + fastafile = os.path.abspath(fastafile) + mhome = opts.maker_home + snapdir = "snap" + mkdir(snapdir) + + cwd = os.getcwd() + os.chdir(snapdir) + + newgffile = "training.gff3" + logger.debug("Construct GFF file combined with sequence ...") + sh("cat {0} > {1}".format(gffile, newgffile)) + sh('echo "##FASTA" >> {0}'.format(newgffile)) + sh("cat {0} >> {1}".format(fastafile, newgffile)) + + logger.debug("Make models ...") + sh("{0}/src/bin/maker2zff training.gff3".format(mhome)) + sh("{0}/exe/snap/fathom -categorize 1000 genome.ann genome.dna".format(mhome)) + sh("{0}/exe/snap/fathom -export 1000 -plus uni.ann uni.dna".format(mhome)) + sh("{0}/exe/snap/forge export.ann export.dna".format(mhome)) + sh("{0}/exe/snap/hmm-assembler.pl {1} . > {1}.hmm".format(mhome, species)) + + os.chdir(cwd) + logger.debug("SNAP matrix written to `{0}/{1}.hmm`".format(snapdir, species)) + + +def augustus(args): + """ + %prog augustus species gffile fastafile + + Train AUGUSTUS model given gffile and fastafile. Whole procedure taken from: + + """ + p = OptionParser(augustus.__doc__) + p.add_argument( + "--autotrain", + default=False, + action="store_true", + help="Run autoAugTrain.pl to iteratively train AUGUSTUS", + ) + p.set_home("augustus") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + species, gffile, fastafile = args + gffile = os.path.abspath(gffile) + fastafile = os.path.abspath(fastafile) + mhome = opts.augustus_home + augdir = "augustus" + + cwd = os.getcwd() + mkdir(augdir) + os.chdir(augdir) + target = "{0}/config/species/{1}".format(mhome, species) + + if op.exists(target): + logger.debug("Removing existing target `{0}`".format(target)) + sh("rm -rf {0}".format(target)) + + config_path = "{0}/config".format(mhome) + sh( + "{0}/scripts/new_species.pl --species={1} --AUGUSTUS_CONFIG_PATH={2}".format( + mhome, species, config_path + ) + ) + sh( + "{0}/scripts/gff2gbSmallDNA.pl {1} {2} 1000 raw.gb".format( + mhome, gffile, fastafile + ) + ) + sh("{0}/bin/etraining --species={1} raw.gb 2> train.err".format(mhome, species)) + sh(r"cat train.err | perl -pe 's/.*in sequence (\S+): .*/$1/' > badgenes.lst") + sh("{0}/scripts/filterGenes.pl badgenes.lst raw.gb > training.gb".format(mhome)) + sh("grep -c LOCUS raw.gb training.gb") + + # autoAugTrain failed to execute, disable for now + if opts.autotrain: + sh("rm -rf {0}".format(target)) + sh( + "{0}/scripts/autoAugTrain.pl --trainingset=training.gb --species={1}".format( + mhome, species + ) + ) + + os.chdir(cwd) + sh("cp -r {0} augustus/".format(target)) + + +if __name__ == "__main__": + main() diff --git a/jcvi/annotation/trinity.py b/jcvi/annotation/trinity.py new file mode 100644 index 00000000..2f7e31f1 --- /dev/null +++ b/jcvi/annotation/trinity.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Trinity assembly of RNAseq reads. Contains de novo (DN) method and genome-guided +(GG) method. + +DN: http://trinityrnaseq.sourceforge.net/ +GG: http://trinityrnaseq.sourceforge.net/genome_guided_trinity.html +""" + +import os.path as op +import os +import sys + +from ..apps.base import ActionDispatcher, OptionParser, iglob, mkdir +from ..formats.base import FileMerger, write_file + + +def main(): + + actions = ( + ("prepare", "prepare shell script to run trinity-dn/gg on a folder of reads"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def prepare(args): + """ + %prog prepare [--options] folder [--bam rnaseq.coordSorted.bam] + + Run Trinity on a folder of reads. When paired-end (--paired) mode is on, + filenames will be scanned based on whether they contain the patterns + ("_1_" and "_2_") or (".1." and ".2.") or ("_1." and "_2."). + + By default, prepare script for DN-Trinity. + + If coord-sorted BAM is provided, prepare script for GG-Trinity, using BAM + as starting point. + + Newer versions of trinity can take multiple fastq files as input. + If "--merge" is specified, the fastq files are merged together before assembling + """ + p = OptionParser(prepare.__doc__) + p.add_argument( + "--paired", + default=False, + action="store_true", + help="Paired-end mode", + ) + p.add_argument( + "--merge", + default=False, + action="store_true", + help="Merge individual input fastq's into left/right/single file(s)", + ) + p.set_trinity_opts() + p.set_fastq_names() + p.set_grid() + opts, args = p.parse_args(args) + + if len(args) not in (1, 2): + sys.exit(not p.print_help()) + + (inparam,) = args[:1] + + paired = opts.paired + merge = opts.merge + trinity_home = opts.trinity_home + hpc_grid_runner_home = opts.hpcgridrunner_home + + method = "DN" + bam = opts.bam + if bam and op.exists(bam): + bam = op.abspath(bam) + method = "GG" + + pf = inparam.split(".")[0] + tfolder = "{0}_{1}".format(pf, method) + + cwd = os.getcwd() + mkdir(tfolder) + os.chdir(tfolder) + + cmds = [] + + # set TRINITY_HOME env variable when preparing shell script + env_cmd = 'export TRINITY_HOME="{0}"'.format(trinity_home) + cmds.append(env_cmd) + + if method == "DN": + assert op.exists("../" + inparam) + + flist = iglob("../" + inparam, opts.names) + if paired: + f1 = [ + x for x in flist if "_1_" in x or ".1." in x or "_1." in x or "_R1" in x + ] + f2 = [ + x for x in flist if "_2_" in x or ".2." in x or "_2." in x or "_R2" in x + ] + assert len(f1) == len(f2) + if merge: + r1, r2 = "left.fastq", "right.fastq" + reads = ((f1, r1), (f2, r2)) + else: + if merge: + r = "single.fastq" + reads = ((flist, r),) + + if merge: + for fl, r in reads: + fm = FileMerger(fl, r) + fm.merge(checkexists=True) + + cmd = op.join(trinity_home, "Trinity") + cmd += " --seqType fq --max_memory {0} --CPU {1}".format(opts.max_memory, opts.cpus) + cmd += " --min_contig_length {0}".format(opts.min_contig_length) + + if opts.bflyGCThreads: + cmd += " --bflyGCThreads {0}".format(opts.bflyGCThreads) + + if method == "GG": + cmd += " --genome_guided_bam {0}".format(bam) + cmd += " --genome_guided_max_intron {0}".format(opts.max_intron) + else: + if paired: + if merge: + cmd += " --left {0} --right {1}".format(reads[0][-1], reads[1][-1]) + else: + cmd += " --left {0}".format(",".join(f1)) + cmd += " --right {0}".format(",".join(f2)) + else: + if merge: + cmd += " --single {0}".format(reads[0][-1]) + else: + for f in flist: + cmd += " --single {0}".format(f) + + if opts.grid and opts.grid_conf_file: + hpc_grid_runner = op.join(hpc_grid_runner_home, "hpc_cmds_GridRunner.pl") + hpc_grid_conf_file = op.join( + hpc_grid_runner_home, "hpc_conf", opts.grid_conf_file + ) + assert op.exists( + hpc_grid_conf_file + ), "HpcGridRunner conf file does not exist: {0}".format(hpc_grid_conf_file) + + cmd += ' --grid_exec "{0} --grid_conf {1} -c"'.format( + hpc_grid_runner, hpc_grid_conf_file + ) + + if opts.extra: + cmd += " {0}".format(opts.extra) + + cmds.append(cmd) + + if opts.cleanup: + cleanup_cmd = ( + 'rm -rf !("Trinity.fasta"|"Trinity.gene_trans_map"|"Trinity.timing")' + if method == "DN" + else 'rm -rf !("Trinity-GG.fasta"|"Trinity-GG.gene_trans_map"|"Trinity.timing")' + ) + cmds.append(cleanup_cmd) + + runfile = "run.sh" + write_file(runfile, "\n".join(cmds)) + os.chdir(cwd) + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/__init__.py b/jcvi/apps/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/apps/__main__.py b/jcvi/apps/__main__.py new file mode 100644 index 00000000..5eb8aad6 --- /dev/null +++ b/jcvi/apps/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Miscellany of wrapper scripts for command-line bioinformatics tools, public data downloaders and other generic routines. +""" + +from .base import dmain + + +if __name__ == "__main__": + dmain(__file__) diff --git a/jcvi/apps/align.py b/jcvi/apps/align.py new file mode 100644 index 00000000..bb22b90b --- /dev/null +++ b/jcvi/apps/align.py @@ -0,0 +1,713 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Perform DNA-DNA alignment using BLAST, NUCMER and BLAT. Keep the interface the +same and does parallelization both in core and on grid. +""" +import os.path as op +import sys +import shutil + +from subprocess import CalledProcessError, STDOUT + +from ..utils.cbook import depends + +from .base import ( + ActionDispatcher, + OptionParser, + cleanup, + get_abs_path, + logger, + mkdir, + sh, + which, +) +from .grid import MakeManager + + +@depends +def run_formatdb(infile=None, outfile=None, dbtype="nucl"): + cmd = "makeblastdb" + cmd += " -dbtype {0} -in {1}".format(dbtype, infile) + sh(cmd) + + +@depends +def run_diamond_makedb(infile=None, outfile=None): + cmd = "diamond makedb" + cmd += " --in {0} --db {1} -p 5".format(infile, infile) + sh(cmd) + + +@depends +def run_blat( + infile=None, + outfile=None, + db="UniVec_Core", + pctid=95, + hitlen=50, + cpus=16, + overwrite=True, +): + cmd = "pblat -threads={0}".format(cpus) if which("pblat") else "blat" + cmd += " {0} {1} -out=blast8 {2}".format(db, infile, outfile) + sh(cmd) + + blatfile = outfile + filtered_blatfile = outfile + ".P{0}L{1}".format(pctid, hitlen) + run_blast_filter( + infile=blatfile, outfile=filtered_blatfile, pctid=pctid, hitlen=hitlen + ) + if overwrite: + shutil.move(filtered_blatfile, blatfile) + + +@depends +def run_vecscreen(infile=None, outfile=None, db="UniVec_Core", pctid=None, hitlen=None): + """ + BLASTN parameters reference: + http://www.ncbi.nlm.nih.gov/VecScreen/VecScreen_docs.html + """ + db = get_abs_path(db) + nin = db + ".nin" + run_formatdb(infile=db, outfile=nin) + + cmd = "blastn" + cmd += " -task blastn" + cmd += " -query {0} -db {1} -out {2}".format(infile, db, outfile) + cmd += " -penalty -5 -gapopen 4 -gapextend 4 -dust yes -soft_masking true" + cmd += " -searchsp 1750000000000 -evalue 0.01 -outfmt 6 -num_threads 8" + sh(cmd) + + +@depends +def run_megablast( + infile=None, + outfile=None, + db=None, + wordsize=None, + pctid=98, + hitlen=100, + best=None, + evalue=0.01, + task="megablast", + cpus=16, +): + assert db, "Need to specify database fasta file." + + db = get_abs_path(db) + nin = db + ".nin" + nin00 = db + ".00.nin" + nin = nin00 if op.exists(nin00) else (db + ".nin") + run_formatdb(infile=db, outfile=nin) + + cmd = "blastn" + cmd += " -query {0} -db {1} -out {2}".format(infile, db, outfile) + cmd += " -evalue {0} -outfmt 6 -num_threads {1}".format(evalue, cpus) + cmd += " -task {0}".format(task) + if wordsize: + cmd += " -word_size {0}".format(wordsize) + if pctid: + cmd += " -perc_identity {0}".format(pctid) + if best: + cmd += " -max_target_seqs {0}".format(best) + sh(cmd) + + if pctid and hitlen: + blastfile = outfile + filtered_blastfile = outfile + ".P{0}L{1}".format(pctid, hitlen) + run_blast_filter( + infile=blastfile, outfile=filtered_blastfile, pctid=pctid, hitlen=hitlen + ) + shutil.move(filtered_blastfile, blastfile) + + +def run_blast_filter(infile=None, outfile=None, pctid=95, hitlen=50): + from jcvi.formats.blast import filter + + logger.debug("Filter BLAST result (pctid={0}, hitlen={1})".format(pctid, hitlen)) + pctidopt = "--pctid={0}".format(pctid) + hitlenopt = "--hitlen={0}".format(hitlen) + filter([infile, pctidopt, hitlenopt]) + + +def main(): + actions = ( + ("blast", "run blastn using query against reference"), + ("blat", "run blat using query against reference"), + ("blasr", "run blasr on a set of pacbio reads"), + ("nucmer", "run nucmer using query against reference"), + ("last", "run last using query against reference"), + ("lastgenome", "run whole genome LAST"), + ("lastgenomeuniq", "run whole genome LAST and screen for 1-to-1 matches"), + ("minimap", "run minimap2 aligner"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def minimap(args): + """ + %prog minimap ref.fasta query.fasta + + Wrap minimap2 aligner using query against sequences. When query and ref + is the same, we are in "self-scan" mode (e.g. useful for finding internal + duplications resulted from mis-assemblies). + """ + from jcvi.formats.fasta import Fasta + + p = OptionParser(minimap.__doc__) + p.add_argument( + "--chunks", + type=int, + default=2000000, + help="Split ref.fasta into chunks of size in self-scan mode", + ) + p.set_outdir(outdir="outdir") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + ref, query = args + chunks = opts.chunks + outdir = opts.outdir + if ref != query: + raise NotImplementedError + + # "self-scan" mode + # build faidx (otherwise, parallel make may complain) + sh("samtools faidx {}".format(ref)) + f = Fasta(ref) + mkdir(outdir) + mm = MakeManager() + for name, size in f.itersizes(): + start = 0 + for end in range(chunks, size, chunks): + fafile = op.join(outdir, "{}_{}_{}.fa".format(name, start + 1, end)) + cmd = "samtools faidx {} {}:{}-{} -o {}".format( + ref, name, start + 1, end, fafile + ) + mm.add(ref, fafile, cmd) + + paffile = fafile.rsplit(".", 1)[0] + ".paf" + cmd = "minimap2 -P {} {} > {}".format(fafile, fafile, paffile) + mm.add(fafile, paffile, cmd) + + epsfile = fafile.rsplit(".", 1)[0] + ".eps" + cmd = "minidot {} > {}".format(paffile, epsfile) + mm.add(paffile, epsfile, cmd) + start += chunks + + mm.write() + + +def nucmer(args): + """ + %prog nucmer ref.fasta query.fasta + + Run NUCMER using query against reference. Parallel implementation derived + from: + """ + from itertools import product + + from jcvi.formats.base import split + + p = OptionParser(nucmer.__doc__) + p.add_argument( + "--chunks", type=int, help="Split both query and subject into chunks" + ) + p.set_params(prog="nucmer", params="-l 100 -c 500") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + ref, query = args + cpus = opts.cpus + nrefs = nqueries = opts.chunks or int(cpus**0.5) + refdir = ref.split(".")[0] + "-outdir" + querydir = query.split(".")[0] + "-outdir" + reflist = split([ref, refdir, str(nrefs)]).names + querylist = split([query, querydir, str(nqueries)]).names + + mm = MakeManager() + for i, (r, q) in enumerate(product(reflist, querylist)): + pf = "{0:04d}".format(i) + cmd = "nucmer -maxmatch" + cmd += " {0}".format(opts.extra) + cmd += " {0} {1} -p {2}".format(r, q, pf) + deltafile = pf + ".delta" + mm.add((r, q), deltafile, cmd) + print(cmd) + + mm.write() + + +def blasr(args): + """ + %prog blasr ref.fasta fofn + + Run blasr on a set of PacBio reads. This is based on a divide-and-conquer + strategy described below. + """ + from more_itertools import grouper + + p = OptionParser(blasr.__doc__) + p.set_cpus(cpus=8) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + reffasta, fofn = args + flist = sorted([x.strip() for x in open(fofn)]) + h5list = [] + mm = MakeManager() + for i, fl in enumerate(grouper(flist, 3)): + chunkname = "chunk{0:03d}".format(i) + fn = chunkname + ".fofn" + h5 = chunkname + ".cmp.h5" + fw = open(fn, "w") + print("\n".join(fl), file=fw) + fw.close() + + cmd = "pbalign {0} {1} {2}".format(fn, reffasta, h5) + cmd += " --nproc {0} --forQuiver --tmpDir .".format(opts.cpus) + mm.add((fn, reffasta), h5, cmd) + h5list.append(h5) + + # Merge h5, sort and repack + allh5 = "all.cmp.h5" + tmph5 = "tmp.cmp.h5" + cmd_merge = "cmph5tools.py merge --outFile {0}".format(allh5) + cmd_merge += " " + " ".join(h5list) + cmd_sort = "cmph5tools.py sort --deep {0} --tmpDir .".format(allh5) + cmd_repack = "h5repack -f GZIP=1 {0} {1}".format(allh5, tmph5) + cmd_repack += " && mv {0} {1}".format(tmph5, allh5) + mm.add(h5list, allh5, [cmd_merge, cmd_sort, cmd_repack]) + + # Quiver + pf = reffasta.rsplit(".", 1)[0] + variantsgff = pf + ".variants.gff" + consensusfasta = pf + ".consensus.fasta" + cmd_faidx = "samtools faidx {0}".format(reffasta) + cmd = "quiver -j 32 {0}".format(allh5) + cmd += " -r {0} -o {1} -o {2}".format(reffasta, variantsgff, consensusfasta) + mm.add(allh5, consensusfasta, [cmd_faidx, cmd]) + + mm.write() + + +def get_outfile(reffasta, queryfasta, suffix="blast", outdir=None): + q = op.basename(queryfasta).split(".")[0] + r = op.basename(reffasta).split(".")[0] + outfile = ".".join((q, r, suffix)) + if outdir: + outfile = op.join(outdir, outfile) + + return outfile + + +def blat(args): + """ + %prog blat ref.fasta query.fasta + + Calls blat and filters BLAST hits. + """ + p = OptionParser(blat.__doc__) + p.set_align(pctid=95, hitlen=30) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + reffasta, queryfasta = args + blastfile = get_outfile(reffasta, queryfasta, suffix="blat") + + run_blat( + infile=queryfasta, + outfile=blastfile, + db=reffasta, + pctid=opts.pctid, + hitlen=opts.hitlen, + cpus=opts.cpus, + overwrite=False, + ) + + return blastfile + + +def blast(args): + """ + %prog blast ref.fasta query.fasta + + Calls blast and then filter the BLAST hits. Default is megablast. + """ + task_choices = ("blastn", "blastn-short", "dc-megablast", "megablast", "vecscreen") + p = OptionParser(blast.__doc__) + p.set_align(pctid=0, evalue=0.01) + p.add_argument("--wordsize", type=int, help="Word size") + p.add_argument("--best", default=1, type=int, help="Only look for best N hits") + p.add_argument( + "--task", default="megablast", choices=task_choices, help="Task of the blastn" + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + reffasta, queryfasta = args + blastfile = get_outfile(reffasta, queryfasta) + + run_megablast( + infile=queryfasta, + outfile=blastfile, + db=reffasta, + wordsize=opts.wordsize, + pctid=opts.pctid, + evalue=opts.evalue, + hitlen=None, + best=opts.best, + task=opts.task, + cpus=opts.cpus, + ) + + return blastfile + + +def lastgenome(args): + """ + %prog genome_A.fasta genome_B.fasta + + Run LAST by calling LASTDB, LASTAL. The script runs the following steps: + $ lastdb -P0 -uNEAR -R01 Chr10A-NEAR Chr10A.fa + $ lastal -E0.05 -C2 Chr10A-NEAR Chr10A.fa -fTAB > Chr10A.Chr10A.tab + $ last-dotplot Chr10A.Chr10A.tab + """ + p = OptionParser(lastgenome.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gA, gB = args + mm = MakeManager() + bb = lambda x: op.basename(x).rsplit(".", 1)[0] + gA_pf, gB_pf = bb(gA), bb(gB) + + # Build LASTDB + dbname = "-".join((gA_pf, "NEAR")) + dbfile = dbname + ".suf" + build_db_cmd = "lastdb -P0 -uNEAR -R01 {} {}".format(dbfile, gA) + mm.add(gA, dbfile, build_db_cmd) + + # Run LASTAL + tabfile = "{}.{}.tab".format(gA_pf, gB_pf) + lastal_cmd = "lastal -E0.05 -C2 {} {}".format(dbname, gB) + lastal_cmd += " -fTAB > {}".format(tabfile) + mm.add([dbfile, gB], tabfile, lastal_cmd) + + mm.write() + + +def lastgenomeuniq(args): + """ + %prog genome_A.fasta genome_B.fasta + + Run LAST by calling LASTDB, LASTAL and LAST-SPLIT. The recipe is based on + tutorial here: + + + + The script runs the following steps: + $ lastdb -P0 -uNEAR -R01 Chr10A-NEAR Chr10A.fa + $ lastal -E0.05 -C2 Chr10A-NEAR Chr10B.fa | last-split -m1 | maf-swap | last-split -m1 -fMAF > Chr10A.Chr10B.1-1.maf + $ maf-convert -n blasttab Chr10A.Chr10B.1-1.maf > Chr10A.Chr10B.1-1.blast + + Works with LAST v959. + """ + p = OptionParser(lastgenome.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gA, gB = args + mm = MakeManager() + bb = lambda x: op.basename(x).rsplit(".", 1)[0] + gA_pf, gB_pf = bb(gA), bb(gB) + + # Build LASTDB + dbname = "-".join((gA_pf, "NEAR")) + dbfile = dbname + ".suf" + build_db_cmd = "lastdb -P0 -uNEAR -R01 {} {}".format(dbfile, gA) + mm.add(gA, dbfile, build_db_cmd) + + # Run LASTAL + maffile = "{}.{}.1-1.maf".format(gA_pf, gB_pf) + lastal_cmd = "lastal -E0.05 -C2 {} {}".format(dbname, gB) + lastal_cmd += " | last-split -m1" + lastal_cmd += " | maf-swap" + lastal_cmd += " | last-split -m1 -fMAF > {}".format(maffile) + mm.add([dbfile, gB], maffile, lastal_cmd) + + # Convert to BLAST format + blastfile = maffile.replace(".maf", ".blast") + convert_cmd = "maf-convert -n blasttab {} > {}".format(maffile, blastfile) + mm.add(maffile, blastfile, convert_cmd) + + mm.write() + + +@depends +def run_lastdb( + infile=None, outfile=None, mask=False, lastdb_bin="lastdb", dbtype="nucl" +): + outfilebase = outfile.rsplit(".", 1)[0] + db = "-p " if dbtype == "prot" else "" + mask = "-c " if mask else "" + cmd = "{0} {1}{2}{3} {4}".format(lastdb_bin, db, mask, outfilebase, infile) + sh(cmd) + + +def last(args, dbtype=None): + """ + %prog database.fasta query.fasta + + Run LAST by calling LASTDB and LASTAL. LAST program available: + + + Works with LAST-719. + """ + p = OptionParser(last.__doc__) + p.add_argument( + "--dbtype", + default="nucl", + choices=("nucl", "prot"), + help="Molecule type of subject database", + ) + p.add_argument("--path", help="Specify LAST path") + p.add_argument( + "--mask", default=False, action="store_true", help="Invoke -c in lastdb" + ) + p.add_argument( + "--format", + default="BlastTab", + choices=("TAB", "MAF", "BlastTab", "BlastTab+"), + help="Output format", + ) + p.add_argument( + "--minlen", + default=0, + type=int, + help="Filter alignments by how many bases match", + ) + p.add_argument("--minid", default=0, type=int, help="Minimum sequence identity") + p.set_cpus() + p.set_outdir() + p.set_params() + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + subject, query = args + path = opts.path + cpus = opts.cpus + if not dbtype: + dbtype = opts.dbtype + getpath = lambda x: op.join(path, x) if path else x + lastdb_bin = getpath("lastdb") + lastal_bin = getpath("lastal") + for bin in (lastdb_bin, lastal_bin): + if not which(bin): + logger.fatal("`%s` not found on PATH. Have you installed LAST?", bin) + sys.exit(1) + + subjectdb = subject.rsplit(".", 1)[0] + run_lastdb( + infile=subject, + outfile=subjectdb + ".prj", + mask=opts.mask, + lastdb_bin=lastdb_bin, + dbtype=dbtype, + ) + + u = 2 if opts.mask else 0 + cmd = "{0} -u {1} -i3G".format(lastal_bin, u) + cmd += " -f {0}".format(opts.format) + + minlen = opts.minlen + minid = opts.minid + extra = opts.extra + assert minid != 100, "Perfect match not yet supported" + mm = minid / (100 - minid) + + if minlen: + extra += " -e{0}".format(minlen) + if minid: + extra += " -r1 -q{0} -a{0} -b{0}".format(mm) + if extra: + cmd += " " + extra.strip() + + lastfile = get_outfile(subject, query, suffix="last", outdir=opts.outdir) + # Make several attempts to run LASTAL + try: + sh( + cmd + f" -P {cpus} {subjectdb} {query}", + outfile=lastfile, + check=True, + redirect_error=STDOUT, + ) + except CalledProcessError as e: # multi-threading disabled + message = "lastal failed with message:" + message += "\n{0}".format(e.output.decode()) + logger.error(message) + try: + logger.debug("Failed to run `lastal` with multi-threading. Trying again.") + sh( + cmd + f" -P 1 {subjectdb} {query}", + outfile=lastfile, + check=True, + redirect_error=STDOUT, + ) + except CalledProcessError as e: + message = "lastal failed with message:" + message += "\n{0}".format(e.output.decode()) + logger.error(message) + logger.fatal("Failed to run `lastal`. Aborted.") + cleanup(lastfile) + sys.exit(1) + return lastfile + + +def blast_main(args, dbtype=None): + """ + %prog database.fasta query.fasta + + Run blastp/blastn by calling BLAST+ blastp/blastn depends on dbtype. + """ + p = OptionParser(blast_main.__doc__) + p.add_argument( + "--dbtype", + default="nucl", + choices=("nucl", "prot"), + help="Molecule type of subject database", + ) + p.add_argument("--path", help="Specify BLAST path for blastn or blastp") + + p.set_cpus() + p.set_outdir() + p.set_params() + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + subject, query = args + path = opts.path + cpus = opts.cpus + if not dbtype: + dbtype = opts.dbtype + + getpath = lambda x: op.join(path, x) if path else x + cmd = "blastn" if dbtype == "nucl" else "blastp" + lastdb_bin = getpath("makeblastdb") + lastal_bin = getpath(cmd) + for bin in (lastdb_bin, lastal_bin): + if not which(bin): + logger.fatal("`%s` not found on PATH. Have you installed BLAST?", bin) + sys.exit(1) + + db_suffix = ".nin" if dbtype == "nucl" else ".pin" + + run_formatdb(infile=subject, outfile=subject + db_suffix, dbtype=dbtype) + + blastfile = get_outfile(subject, query, suffix="last", outdir=opts.outdir) + # Make several attempts to run LASTAL + try: + sh( + cmd + + f" -num_threads {cpus} -query {query} -db {subject} -out {blastfile}" + + " -outfmt 6 -max_target_seqs 1000 -evalue 1e-5", + check=False, + redirect_error=STDOUT, + ) + except CalledProcessError as e: # multi-threading disabled + message = f"{cmd} failed with message:" + message += "\n{0}".format(e.output.decode()) + logger.error(message) + logger.fatal("Failed to run `blast`. Aborted.") + cleanup(blastfile) + sys.exit(1) + return blastfile + + +def diamond_blastp_main(args, dbtype="prot"): + """ + %prog database.fasta query.fasta + + Run diamond blastp for protein alignment. + """ + p = OptionParser(diamond_blastp_main.__doc__) + + p.add_argument("--path", help="Specify diamond path for diamond blastp") + + p.set_cpus() + p.set_outdir() + p.set_params() + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + subject, query = args + path = opts.path + cpus = opts.cpus + if not dbtype: + dbtype = opts.dbtype + + getpath = lambda x: op.join(path, x) if path else x + cmd = "diamond blastp" + diamond_bin = getpath("diamond") + for bin in (diamond_bin,): + if not which(bin): + logger.fatal("`%s` not found on PATH. Have you installed Diamond?", bin) + sys.exit(1) + + run_diamond_makedb( + infile=subject, + outfile=subject + ".dmnd", + ) + + blastfile = get_outfile(subject, query, suffix="last", outdir=opts.outdir) + # Make several attempts to run LASTAL + try: + sh( + cmd + + f" --threads {cpus} --query {query} --db {subject} --out {blastfile}" + + " --ultra-sensitive --max-target-seqs 1000 --evalue 1e-5 --outfmt 6", + check=False, + redirect_error=STDOUT, + ) + except CalledProcessError as e: # multi-threading disabled + message = f"{cmd} failed with message:" + message += "\n{0}".format(e.output.decode()) + logger.error(message) + logger.fatal("Failed to run `diamond blastp`. Aborted.") + cleanup(blastfile) + sys.exit(1) + return blastfile + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/base.py b/jcvi/apps/base.py new file mode 100644 index 00000000..ba9c761f --- /dev/null +++ b/jcvi/apps/base.py @@ -0,0 +1,2277 @@ +""" +Basic support for running library as script +""" + +import errno +import fnmatch +import logging +import os +import os.path as op +import platform +import shutil +import signal +import sys +import time + +from argparse import ArgumentParser, SUPPRESS +from collections.abc import Iterable +from configparser import ( + ConfigParser, + RawConfigParser, + NoOptionError, + NoSectionError, + ParsingError, +) +from http.client import HTTPSConnection +from socket import gethostname +from subprocess import CalledProcessError, PIPE, call, check_output +from time import ctime +from typing import Any, Collection, List, Optional, Tuple, Union +from urllib.parse import urlencode + +from natsort import natsorted +from rich.console import Console +from rich.logging import RichHandler + +from .. import __copyright__, __version__ as version + + +os.environ["LC_ALL"] = "C" +# http://newbebweb.blogspot.com/2012/02/python-head-ioerror-errno-32-broken.html +signal.signal(signal.SIGPIPE, signal.SIG_DFL) +JCVIHELP = f"JCVI utility libraries {version} [{__copyright__}]\n" +TextCollection = Union[str, List[str], Tuple[str, ...]] + + +def get_logger(name: str, level: int = logging.DEBUG): + """ + Return a logger with a default ColoredFormatter. + """ + log = logging.getLogger(name) + if log.hasHandlers(): + log.handlers.clear() + log.addHandler(RichHandler(console=Console(stderr=True))) + log.propagate = False + log.setLevel(level) + return log + + +logger = get_logger("jcvi") + + +class ActionDispatcher(object): + """ + This class will be invoked + a) when the base package is run via __main__, listing all MODULESs + a) when a directory is run via __main__, listing all SCRIPTs + b) when a script is run directly, listing all ACTIONs + + This is controlled through the meta variable, which is automatically + determined in get_meta(). + """ + + def __init__(self, actions): + self.actions = actions + if not actions: + actions = [(None, None)] + self.valid_actions, self.action_helps = zip(*actions) + + def get_meta(self): + args = splitall(sys.argv[0])[-3:] + args[-1] = args[-1].replace(".py", "") + if args[-2] == "jcvi": + meta = "MODULE" + elif args[-1] == "__main__": + meta = "SCRIPT" + else: + meta = "ACTION" + return meta, args + + def print_help(self): + meta, args = self.get_meta() + if meta == "MODULE": + del args[0] + args[-1] = meta + elif meta == "SCRIPT": + args[-1] = meta + else: + args[-1] += " " + meta + + help = "Usage:\n python -m {0}\n\n\n".format(".".join(args)) + help += "Available {0}s:\n".format(meta) + max_action_len = max(len(action) for action, ah in self.actions) + for action, action_help in sorted(self.actions): + action = action.rjust(max_action_len + 4) + help += ( + " | ".join((action, action_help[0].upper() + action_help[1:])) + "\n" + ) + help += "\n" + JCVIHELP + + sys.stderr.write(help) + sys.exit(1) + + def dispatch(self, globals): + from difflib import get_close_matches + + meta = "ACTION" # function is only invoked for listing ACTIONs + if len(sys.argv) == 1: + self.print_help() + + action = sys.argv[1] + + if not action in self.valid_actions: + print("[error] {0} not a valid {1}\n".format(action, meta), file=sys.stderr) + alt = get_close_matches(action, self.valid_actions) + print( + "Did you mean one of these?\n\t{0}\n".format(", ".join(alt)), + file=sys.stderr, + ) + self.print_help() + + globals[action](sys.argv[2:]) + + +class OptionParser(ArgumentParser): + """ + This class is a wrapper around argparse.ArgumentParser, with some added + features. + """ + + def __init__(self, doc: Optional[str]): + usage = doc.replace("%prog", "%(prog)s") if doc else None + super().__init__(usage=usage, epilog=JCVIHELP) + + def parse_args(self, args=None): + """ + Parse the command line arguments. + """ + dests = set() + ol = [] + for g in [self] + self._action_groups: + ol += g._actions + for o in ol: + if o.dest in dests: + continue + self.add_help_from_choices(o) + dests.add(o.dest) + + return self.parse_known_args(args) + + def add_help_from_choices(self, o): + if o.help == SUPPRESS: + return + + default_tag = "%(default)s" + assert o.help, "Option {0} do not have help string".format(o) + help_pf = o.help[:1].upper() + o.help[1:] + if "[" in help_pf: + help_pf = help_pf.rsplit("[", 1)[0] + help_pf = help_pf.strip() + + if o.type == "choice": + if o.default is None: + default_tag = "guess" + ctext = "|".join(natsorted(str(x) for x in o.choices)) + if len(ctext) > 100: + ctext = ctext[:100] + " ... " + choice_text = "must be one of {0}".format(ctext) + o.help = "{0}, {1} [default: {2}]".format(help_pf, choice_text, default_tag) + else: + o.help = help_pf + if o.default is None: + default_tag = "disabled" + if not set(o.option_strings) & set(("--help", "--version")): + o.help += " [default: {0}]".format(default_tag) + + def set_grid(self): + """ + Add --grid options for command line programs + """ + self.add_argument( + "--grid", + dest="grid", + default=False, + action="store_true", + help="Run on the grid", + ) + + def set_grid_opts(self, array: bool = False): + group = self.add_argument_group("Grid parameters") + group.add_argument( + "-l", + dest="queue", + help="Name of the queue", + ) + group.add_argument( + "-t", + dest="threaded", + default=None, + type=int, + help="Append '-pe threaded N'", + ) + if array: + group.add_argument( + "-c", + dest="concurrency", + type=int, + help="Append task concurrency limit '-tc N'", + ) + group.add_argument( + "-d", + dest="outdir", + default=".", + help="Specify directory to store grid output/error files", + ) + group.add_argument( + "-N", dest="name", default=None, help="Specify descriptive name for the job" + ) + group.add_argument( + "-H", dest="hold_jid", default=None, help="Define the job dependency list" + ) + + def set_table(self, sep=",", align=False): + group = self.add_argument_group("Table formatting") + group.add_argument("--sep", default=sep, help="Separator") + if align: + group.add_argument( + "--noalign", + dest="align", + default=True, + action="store_false", + help="Cell alignment", + ) + else: + group.add_argument( + "--align", default=False, action="store_true", help="Cell alignment" + ) + + def set_downloader(self, downloader=None): + """ + Add --downloader options for given command line program. + """ + from jcvi.utils.ez_setup import ALL_DOWNLOADERS + + downloader_choices = [x[0] for x in ALL_DOWNLOADERS] + self.add_argument( + "--downloader", + default=downloader, + choices=downloader_choices, + help="Use the specified downloader to retrieve resources", + ) + + def set_params(self, prog=None, params=""): + """ + Add --params options for given command line programs + """ + dest_prog = "to {0}".format(prog) if prog else "" + self.add_argument( + "--params", + dest="extra", + default=params, + help="Extra parameters to pass {0}".format(dest_prog) + + " (these WILL NOT be validated)", + ) + + def set_outfile(self, outfile: Optional[str] = "stdout"): + """ + Add --outfile options to print out to filename. + """ + self.add_argument("-o", "--outfile", default=outfile, help="Outfile name") + + def set_outdir(self, outdir: Optional[str] = "."): + self.add_argument("--outdir", default=outdir, help="Specify output directory") + + def set_email(self): + """ + Add --email option to specify an email address + """ + self.add_argument( + "--email", + default=get_email_address(), + help="Specify an email address", + ) + + def set_tmpdir(self, tmpdir=None): + """ + Add --temporary_directory option to specify unix `sort` tmpdir + """ + self.add_argument( + "-T", "--tmpdir", default=tmpdir, help="Use temp directory instead of $TMP" + ) + + def set_cpus(self, cpus=0): + """ + Add --cpus options to specify how many threads to use. + """ + from multiprocessing import cpu_count + + max_cpus = cpu_count() + if not 0 < cpus < max_cpus: + cpus = max_cpus + self.add_argument( + "--cpus", + default=cpus, + type=int, + help="Number of CPUs to use, 0=unlimited", + ) + + def set_db_opts(self, dbname="mta4", credentials=True): + """ + Add db connection specific attributes + """ + from jcvi.utils.db import valid_dbconn, get_profile + + self.add_argument( + "--db", + default=dbname, + dest="dbname", + help="Specify name of database to query", + ) + self.add_argument( + "--connector", + default="Sybase", + dest="dbconn", + choices=valid_dbconn.keys(), + help="Specify database connector", + ) + hostname, username, password = get_profile() + if credentials: + self.add_argument("--hostname", default=hostname, help="Specify hostname") + self.add_argument( + "--username", default=username, help="Username to connect to database" + ) + self.add_argument( + "--password", default=password, help="Password to connect to database" + ) + self.add_argument("--port", type=int, help="Specify port number") + + def set_aws_opts(self, store="hli-mv-data-science/htang"): + from jcvi.utils.aws import s3ify + + store = s3ify(store) + group = self.add_argument_group("AWS and Docker options") + # https://github.com/hlids/infrastructure/wiki/Docker-calling-convention + group.add_argument("--sample_id", help="Sample ID") + group.add_argument("--workflow_execution_id", help="Workflow execution ID") + group.add_argument("--input_bam_path", help="Input BAM location (s3 ok)") + group.add_argument("--output_path", default=store, help="Output s3 path") + group.add_argument("--workdir", default=os.getcwd(), help="Specify work dir") + group.add_argument( + "--nocleanup", + default=False, + action="store_true", + help="Don't clean up after done", + ) + + def set_stripnames(self, default=True): + if default: + self.add_argument( + "--no_strip_names", + dest="strip_names", + action="store_false", + default=True, + help="do not strip alternative splicing " + "(e.g. At5g06540.1 -> At5g06540)", + ) + else: + self.add_argument( + "--strip_names", + action="store_true", + default=False, + help="strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", + ) + + def set_fixchrnames(self, orgn="medicago"): + self.add_argument( + "--fixchrname", + default=orgn, + dest="fix_chr_name", + help="Fix quirky chromosome names", + ) + + def set_SO_opts(self): + verifySO_choices = ("verify", "resolve:prefix", "resolve:suffix") + self.add_argument( + "--verifySO", + choices=verifySO_choices, + help="Verify validity of GFF3 feature type against the SO; " + + "`resolve` will try to converge towards a valid SO " + + "term by removing elements from the feature type " + + "string by splitting at underscores. Example: " + + "`mRNA_TE_gene` resolves to `mRNA` using 'resolve:prefix'", + ) + + def set_beds(self): + self.add_argument("--qbed", help="Path to qbed") + self.add_argument("--sbed", help="Path to sbed") + + def set_histogram(self, vmin=0, vmax=None, bins=20, xlabel="value", title=None): + self.add_argument( + "--vmin", default=vmin, type=int, help="Minimum value, inclusive" + ) + self.add_argument( + "--vmax", default=vmax, type=int, help="Maximum value, inclusive" + ) + self.add_argument( + "--bins", + default=bins, + type=int, + help="Number of bins to plot in the histogram", + ) + self.add_argument("--xlabel", default=xlabel, help="Label on the X-axis") + self.add_argument("--title", default=title, help="Title of the plot") + + def set_sam_options(self, extra=True, bowtie=False): + self.add_argument( + "--sam", + dest="bam", + default=True, + action="store_false", + help="Write to SAM file instead of BAM", + ) + self.add_argument( + "--uniq", + default=False, + action="store_true", + help="Keep only uniquely mapped", + ) + if bowtie: + self.add_argument( + "--mapped", default=False, action="store_true", help="Keep mapped reads" + ) + self.add_argument( + "--unmapped", default=False, action="store_true", help="Keep unmapped reads" + ) + if extra: + self.set_cpus() + self.set_params() + + def set_mingap(self, default=100): + self.add_argument( + "--mingap", default=default, type=int, help="Minimum size of gaps" + ) + + def set_align( + self, + pctid=None, + hitlen=None, + pctcov=None, + evalue=None, + compreh_pctid=None, + compreh_pctcov=None, + intron=None, + bpsplice=None, + ): + if pctid is not None: + self.add_argument( + "--pctid", default=pctid, type=float, help="Sequence percent identity" + ) + if hitlen is not None: + self.add_argument( + "--hitlen", default=hitlen, type=int, help="Minimum overlap length" + ) + if pctcov is not None: + self.add_argument( + "--pctcov", + default=pctcov, + type=int, + help="Percentage coverage cutoff", + ) + if evalue is not None: + self.add_argument( + "--evalue", default=evalue, type=float, help="E-value cutoff" + ) + if compreh_pctid is not None: + self.add_argument( + "--compreh_pctid", + default=compreh_pctid, + type=int, + help="Sequence percent identity cutoff used to " + + "build PASA comprehensive transcriptome", + ) + if compreh_pctcov is not None: + self.add_argument( + "--compreh_pctcov", + default=compreh_pctcov, + type=int, + help="Percent coverage cutoff used to " + + "build PASA comprehensive transcriptome", + ) + if intron is not None: + self.add_argument( + "--intron", + default=intron, + type=int, + help="Maximum intron length used for mapping", + ) + if bpsplice is not None: + self.add_argument( + "--bpsplice", + default=bpsplice, + type=int, + help="Number of bp of perfect splice boundary", + ) + + def set_image_options( + self, + args=None, + figsize="6x6", + dpi=300, + format="pdf", + font="Helvetica", + style="darkgrid", + cmap="jet", + seed: Optional[int] = None, + ): + """ + Add image format options for given command line programs. + """ + from jcvi.graphics.base import ( + GRAPHIC_FORMATS, + ImageOptions, + is_tex_available, + setup_theme, + ) + + allowed_fonts = ( + "Helvetica", + "Liberation Sans", + "Palatino", + "Schoolbook", + "Arial", + ) + allowed_styles = ("darkgrid", "whitegrid", "dark", "white", "ticks") + allowed_diverge = ( + "BrBG", + "PiYG", + "PRGn", + "PuOr", + "RdBu", + "RdGy", + "RdYlBu", + "RdYlGn", + "Spectral", + ) + + group = self.add_argument_group("Image options") + group.add_argument( + "--figsize", default=figsize, help="Figure size `width`x`height` in inches" + ) + group.add_argument( + "--dpi", + default=dpi, + type=int, + help="Physical dot density (dots per inch)", + ) + group.add_argument( + "--format", + default=format, + choices=GRAPHIC_FORMATS, + help="Generate image of format", + ) + group.add_argument( + "--font", default=font, choices=allowed_fonts, help="Font name" + ) + group.add_argument( + "--style", default=style, choices=allowed_styles, help="Axes background" + ) + group.add_argument( + "--diverge", + default="PiYG", + choices=allowed_diverge, + help="Contrasting color scheme", + ) + group.add_argument("--cmap", default=cmap, help="Use this color map") + group.add_argument( + "--notex", default=False, action="store_true", help="Do not use tex" + ) + # https://github.com/tanghaibao/jcvi/issues/515#issuecomment-1327305211 + if ( + "--seed" not in self._option_string_actions + and "--seed" not in group._option_string_actions + ): + group.add_argument( + "--seed", + default=seed, + type=int, + help="Random seed when assigning colors (supported only for some plots)", + ) + + if args is None: + args = sys.argv[1:] + + opts, args = self.parse_args(args) + + assert opts.dpi > 0 + assert "x" in opts.figsize + + iopts = ImageOptions(opts) + + if opts.notex: + logger.info("--notex=%s. latex use is disabled.", opts.notex) + elif not is_tex_available(): + if not bool(which("latex")): + logger.info("`latex` not found. latex use is disabled.") + if not bool(which("lp")): + logger.info("`lp` not found. latex use is disabled.") + + setup_theme(style=opts.style, font=opts.font, usetex=iopts.usetex) + + return opts, args, iopts + + def set_dotplot_opts(self, theme: int = 2): + """ + Used in compara.catalog and graphics.dotplot + """ + from jcvi.graphics.base import set1 + + group = self.add_argument_group("Dot plot parameters") + group.add_argument( + "--skipempty", + default=False, + action="store_true", + help="Skip seqids that do not have matches", + ) + group.add_argument( + "--nochpf", + default=False, + action="store_true", + help="Do not change the contig name", + ) + group.add_argument( + "--nostdpf", + default=False, + action="store_true", + help="Do not standardize contig names", + ) + group.add_argument( + "--genomenames", + type=str, + default=None, + help="genome names for labeling axes in the form of qname_sname, " + 'eg. "*Vitis vinifera*_*Oryza sativa*"', + ) + group.add_argument( + "--theme", + choices=[str(x) for x in range(len(set1))], + default=str(theme), + help="Color index within the palette for contig grid boundaries. Palette contains: {}".format( + "|".join(set1) + ), + ) + return group + + def set_depth(self, depth=50): + self.add_argument("--depth", default=depth, type=float, help="Desired depth") + + def set_rclip(self, rclip=0): + self.add_argument( + "--rclip", + default=rclip, + type=int, + help="Pair ID is derived from rstrip N chars", + ) + + def set_chr(self, chr=",".join([str(x) for x in range(1, 23)] + ["X", "Y", "MT"])): + self.add_argument("--chr", default=chr, help="Chromosomes to process") + + def set_ref(self, ref="/mnt/ref"): + self.add_argument("--ref", default=ref, help="Reference folder") + + def set_cutoff(self, cutoff=0): + self.add_argument( + "--cutoff", + default=cutoff, + type=int, + help="Distance to call valid links between mates", + ) + + def set_mateorientation(self, mateorientation=None): + self.add_argument( + "--mateorientation", + default=mateorientation, + choices=("++", "--", "+-", "-+"), + help="Use only certain mate orientations", + ) + + def set_mates(self, rclip=0, cutoff=0, mateorientation=None): + self.set_rclip(rclip=rclip) + self.set_cutoff(cutoff=cutoff) + self.set_mateorientation(mateorientation=mateorientation) + + def set_bedpe(self): + self.add_argument( + "--norc", + dest="rc", + default=True, + action="store_false", + help="Do not reverse complement, expect innie reads", + ) + self.add_argument( + "--minlen", default=2000, type=int, help="Minimum insert size" + ) + self.add_argument( + "--maxlen", default=8000, type=int, help="Maximum insert size" + ) + self.add_argument( + "--dup", + default=10, + type=int, + help="Filter duplicates with coordinates within this distance", + ) + + def set_fastq_names(self): + self.add_argument( + "--names", + default="*.fq,*.fastq,*.fq.gz,*.fastq.gz", + help="File names to search, use comma to separate multiple", + ) + + def set_pairs(self): + """ + %prog pairs + + Report how many paired ends mapped, avg distance between paired ends, etc. + Paired reads must have the same prefix, use --rclip to remove trailing + part, e.g. /1, /2, or .f, .r, default behavior is to truncate until last + char. + """ + self.usage = self.set_pairs.__doc__ + + self.add_argument( + "--pairsfile", default=None, help="Write valid pairs to pairsfile" + ) + self.add_argument( + "--nrows", default=200000, type=int, help="Only use the first n lines" + ) + self.set_mates() + self.add_argument( + "--pdf", + default=False, + action="store_true", + help="Print PDF instead ASCII histogram", + ) + self.add_argument( + "--bins", default=20, type=int, help="Number of bins in the histogram" + ) + self.add_argument( + "--distmode", + default="ss", + choices=("ss", "ee"), + help="Distance mode between paired reads, ss is outer distance, " + "ee is inner distance", + ) + + def set_sep(self, sep="\t", help="Separator in the tabfile", multiple=False): + if multiple: + help += ", multiple values allowed" + self.add_argument("--sep", default=sep, help=help) + + def set_firstN(self, firstN=100000): + self.add_argument( + "--firstN", default=firstN, type=int, help="Use only the first N reads" + ) + + def set_tag(self, tag=False, specify_tag=False): + if not specify_tag: + self.add_argument( + "--tag", + default=tag, + action="store_true", + help="Add tag (/1, /2) to the read name", + ) + else: + tag_choices = ["/1", "/2"] + self.add_argument( + "--tag", + default=None, + choices=tag_choices, + help="Specify tag to be added to read name", + ) + + def set_phred(self, phred=None): + phdchoices = ("33", "64") + self.add_argument( + "--phred", + default=phred, + choices=phdchoices, + help="Phred score offset {0} [default: guess]".format(phdchoices), + ) + + def set_size(self, size=0): + self.add_argument( + "--size", + default=size, + type=int, + help="Insert mean size, stdev assumed to be 20% around mean", + ) + + def set_trinity_opts(self): + self.set_home("trinity") + self.set_home("hpcgridrunner") + self.set_cpus() + self.set_params(prog="Trinity") + topts = self.add_argument_group("General Trinity options") + topts.add_argument( + "--max_memory", + default="128G", + type=str, + help="Jellyfish memory allocation", + ) + topts.add_argument( + "--min_contig_length", + default=90, + type=int, + help="Minimum assembled contig length to report", + ) + topts.add_argument( + "--bflyGCThreads", + default=None, + type=int, + help="Threads for garbage collection", + ) + topts.add_argument( + "--grid_conf_file", + default="JCVI_SGE.0689.conf", + type=str, + help="HpcGridRunner config file for supported compute farms", + ) + topts.add_argument( + "--cleanup", + default=False, + action="store_true", + help="Force clean-up of unwanted files after Trinity run is complete", + ) + ggopts = self.add_argument_group("Genome-guided Trinity options") + ggopts.add_argument( + "--bam", + default=None, + type=str, + help="provide coord-sorted bam file as starting point", + ) + ggopts.add_argument( + "--max_intron", + default=15000, + type=int, + help="maximum allowed intron length", + ) + + def set_pasa_opts(self, action="assemble"): + self.set_home("pasa") + if action == "assemble": + self.set_home("tgi") + self.add_argument( + "--clean", + default=False, + action="store_true", + help="Clean transcripts using tgi seqclean", + ) + self.set_align(pctid=95, pctcov=90, intron=15000, bpsplice=3) + self.add_argument( + "--aligners", + default="blat,gmap", + help="Specify splice aligners to use for mapping", + ) + self.add_argument( + "--fl_accs", + default=None, + type=str, + help="File containing list of FL-cDNA accessions", + ) + self.set_cpus() + self.add_argument( + "--compreh", + default=False, + action="store_true", + help="Run comprehensive transcriptome assembly", + ) + self.set_align(compreh_pctid=95, compreh_pctcov=30) + self.add_argument( + "--prefix", + default="compreh_init_build", + type=str, + help="Prefix for compreh_trans output file names", + ) + elif action == "compare": + self.add_argument( + "--annots_gff3", + default=None, + type=str, + help="Reference annotation to load and compare against", + ) + genetic_code = [ + "universal", + "Euplotes", + "Tetrahymena", + "Candida", + "Acetabularia", + ] + self.add_argument( + "--genetic_code", + default="universal", + choices=genetic_code, + help="Choose translation table", + ) + self.add_argument( + "--pctovl", + default=50, + type=int, + help="Minimum pct overlap between gene and FL assembly", + ) + self.add_argument( + "--pct_coding", + default=50, + type=int, + help="Minimum pct of cDNA sequence to be protein coding", + ) + self.add_argument( + "--orf_size", + default=0, + type=int, + help="Minimum size of ORF encoded protein", + ) + self.add_argument( + "--utr_exons", default=2, type=int, help="Maximum number of UTR exons" + ) + self.add_argument( + "--pctlen_FL", + default=70, + type=int, + help="Minimum protein length for comparisons involving " + + "FL assemblies", + ) + self.add_argument( + "--pctlen_nonFL", + default=70, + type=int, + help="Minimum protein length for comparisons involving " + + "non-FL assemblies", + ) + self.add_argument( + "--pctid_prot", + default=70, + type=int, + help="Minimum pctid allowed for protein pairwise comparison", + ) + self.add_argument( + "--pct_aln", + default=70, + type=int, + help="Minimum pct of shorter protein length aligning to " + + "update protein or isoform", + ) + self.add_argument( + "--pctovl_gene", + default=80, + type=int, + help="Minimum pct overlap among genome span of the ORF of " + + "each overlapping gene to allow merging", + ) + self.add_argument( + "--stompovl", + default="", + action="store_true", + help="Ignore alignment results, only consider genome span of ORF", + ) + self.add_argument( + "--trust_FL", + default="", + action="store_true", + help="Trust FL-status of cDNA", + ) + + def set_annot_reformat_opts(self): + self.add_argument( + "--pad0", default=6, type=int, help="Pad gene identifiers with 0" + ) + self.add_argument("--prefix", default="Medtr", help="Genome prefix") + self.add_argument( + "--uc", + default=False, + action="store_true", + help="Toggle gene identifier upper case", + ) + + def set_home(self, prog, default=None): + tag = f"--{prog}_home" + if default is None: # Last attempt at guessing the path + try: + default = op.dirname(which(prog)) + except: + default = None + else: + default = op.expanduser(default) + help = f"Home directory for {prog.upper()}" + self.add_argument(tag, default=default, help=help) + + def set_aligner(self, aligner="bowtie"): + valid_aligners = ("bowtie", "bwa") + self.add_argument( + "--aligner", default=aligner, choices=valid_aligners, help="Use aligner" + ) + + def set_verbose(self, help="Print detailed reports"): + self.add_argument("--verbose", default=False, action="store_true", help=help) + + +def ConfigSectionMap(Config, section): + """ + Read a specific section from a ConfigParser() object and return + a dict of all key-value pairs in that section + """ + cfg = {} + options = Config.options(section) + for option in options: + try: + cfg[option] = Config.get(section, option) + if cfg[option] == -1: + logger.debug("Skip: %s", option) + except: + logger.error("Exception on %s", option) + cfg[option] = None + return cfg + + +def get_abs_path(link_name): + source = link_name + if op.islink(source): + source = os.readlink(source) + else: + source = op.basename(source) + + link_dir = op.dirname(link_name) + source = op.normpath(op.join(link_dir, source)) + source = op.abspath(source) + if source == link_name: + return source + else: + return get_abs_path(source) + + +datadir = get_abs_path(op.join(op.dirname(__file__), "../utils/data")) + + +def datafile(x: str, datadir: str = datadir): + """ + Return the full path to the data file in the data directory. + """ + return op.join(datadir, x) + + +def splitall(path): + allparts = [] + while True: + path, p1 = op.split(path) + if not p1: + break + allparts.append(p1) + allparts = allparts[::-1] + return allparts + + +def get_module_docstring(filepath): + """Get module-level docstring of Python module at filepath, e.g. 'path/to/file.py'.""" + co = compile(open(filepath).read(), filepath, "exec") + if co.co_consts and isinstance(co.co_consts[0], str): + docstring = co.co_consts[0] + else: + docstring = None + return docstring + + +def dmain(mainfile, type="action"): + cwd = op.dirname(mainfile) + pyscripts = ( + [x for x in glob(op.join(cwd, "*", "__main__.py"))] + if type == "module" + else glob(op.join(cwd, "*.py")) + ) + actions = [] + for ps in sorted(pyscripts): + action = ( + op.basename(op.dirname(ps)) + if type == "module" + else op.basename(ps).replace(".py", "") + ) + if action[0] == "_": # hidden namespace + continue + pd = get_module_docstring(ps) + action_help = ( + [ + x.rstrip(":.,\n") + for x in pd.splitlines(True) + if len(x.strip()) > 10 and x[0] != "%" + ][0] + if pd + else "no docstring found" + ) + actions.append((action, action_help)) + + a = ActionDispatcher(actions) + a.print_help() + + +def backup(filename): + bakname = filename + ".bak" + if op.exists(filename): + logger.debug("Backup `%s` to `%s`", filename, bakname) + sh("mv {0} {1}".format(filename, bakname)) + return bakname + + +def getusername(): + from getpass import getuser + + return getuser() + + +def getdomainname(): + from socket import getfqdn + + return ".".join(str(x) for x in getfqdn().split(".")[1:]) + + +def sh( + cmd, + grid=False, + infile=None, + outfile=None, + errfile=None, + append=False, + background=False, + threaded=None, + log=True, + grid_opts=None, + silent=False, + shell="/bin/bash", + check=False, + redirect_error=None, +): + """ + simple wrapper for system calls + """ + if not cmd: + return 1 + if silent: + outfile = errfile = "/dev/null" + if grid: + from jcvi.apps.grid import GridProcess + + pr = GridProcess( + cmd, + infile=infile, + outfile=outfile, + errfile=errfile, + threaded=threaded, + grid_opts=grid_opts, + ) + pr.start() + return pr.jobid + else: + if infile: + cat = "cat" + if infile.endswith(".gz"): + cat = "zcat" + cmd = "{0} {1} |".format(cat, infile) + cmd + if outfile and outfile not in ("-", "stdout"): + if outfile.endswith(".gz"): + cmd += " | gzip" + tag = ">" + if append: + tag = ">>" + cmd += " {0}{1}".format(tag, outfile) + if errfile: + if errfile == outfile: + errfile = "&1" + cmd += " 2>{0}".format(errfile) + if background: + cmd += " &" + + if log: + logger.debug(cmd) + + call_func = check_output if check else call + return call_func(cmd, shell=True, executable=shell, stderr=redirect_error) + + +def Popen(cmd, stdin=None, stdout=PIPE, debug=False, shell="/bin/bash"): + """ + Capture the cmd stdout output to a file handle. + """ + from subprocess import Popen as P + + if debug: + logger.debug(cmd) + # See: + proc = P(cmd, bufsize=1, stdin=stdin, stdout=stdout, shell=True, executable=shell) + return proc + + +def get_system_processor() -> Tuple[str, str]: + """ + Get the system and processor information. + """ + return platform.system(), platform.processor() + + +def is_macOS_arm() -> bool: + """ + Check if the system is macOS on ARM. + """ + system, processor = get_system_processor() + return system == "Darwin" and "arm" in processor + + +def setup_magick_home(): + """ + Set MAGICK_HOME for ImageMagick. + """ + if "MAGICK_HOME" not in os.environ: + if is_macOS_arm(): + magick_home = "/opt/homebrew/opt/imagemagick" + if op.isdir(magick_home): + os.environ["MAGICK_HOME"] = magick_home + else: + logger.warning("MAGICK_HOME not set") + + +def popen(cmd, debug=True, shell="/bin/bash"): + return Popen(cmd, debug=debug, shell=shell).stdout + + +def is_exe(fpath): + return op.isfile(fpath) and os.access(fpath, os.X_OK) + + +def which(program): + """ + Emulates the unix which command. + + >>> which("cat") + "/bin/cat" + >>> which("nosuchprogram") + """ + fpath, _ = op.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + exe_file = op.join(path, program) + if is_exe(exe_file): + return exe_file + + return None + + +def glob(pathname, pattern=None): + """ + Wraps around glob.glob(), but return a sorted list. + """ + import glob as gl + + if pattern: + pathname = op.join(pathname, pattern) + return natsorted(gl.glob(pathname)) + + +def iglob(pathname, patterns): + """ + Allow multiple file formats. This is also recursive. For example: + + >>> iglob("apps", "*.py,*.pyc") + """ + matches = [] + patterns = patterns.split(",") if "," in patterns else listify(patterns) + for root, dirnames, filenames in os.walk(pathname): + matching = [] + for pattern in patterns: + matching.extend(fnmatch.filter(filenames, pattern)) + for filename in matching: + matches.append(op.join(root, filename)) + return natsorted(matches) + + +def symlink(target, link_name): + try: + os.symlink(target, link_name) + except OSError as e: + if e.errno == errno.EEXIST: + os.remove(link_name) + os.symlink(target, link_name) + + +def mkdir(dirname, overwrite=False): + """ + Wraps around os.mkdir(), but checks for existence first. + """ + if op.isdir(dirname): + if overwrite: + cleanup(dirname) + os.mkdir(dirname) + logger.debug("Overwrite folder `%s`", dirname) + else: + return False # Nothing is changed + else: + try: + os.mkdir(dirname) + except: + os.makedirs(dirname) + logger.debug("`%s` not found. Creating new.", dirname) + + return True + + +def is_newer_file(a, b): + """ + Check if the file a is newer than file b + """ + if not (op.exists(a) and op.exists(b)): + return False + am = os.stat(a).st_mtime + bm = os.stat(b).st_mtime + return am > bm + + +def parse_multi_values(param): + values = None + if param: + if op.isfile(param): + values = list(set(x.strip() for x in open(param))) + else: + values = list(set(param.split(","))) + return values + + +def listify(a: TextCollection) -> TextCollection: + """ + Convert something to a list if it is not already a list. + """ + return a if isinstance(a, (list, tuple)) else [a] # type: ignore + + +def last_updated(a: str) -> float: + """ + Check the time since file was last updated. + """ + return time.time() - op.getmtime(a) + + +def need_update(a: TextCollection, b: TextCollection, warn: bool = False) -> bool: + """ + Check if file a is newer than file b and decide whether or not to update + file b. Can generalize to two lists. + + Args: + a: file or list of files + b: file or list of files + warn: whether or not to print warning message + + Returns: + True if file a is newer than file b + """ + a = listify(a) + b = listify(b) + + should_update = ( + any((not op.exists(x)) for x in b) + or all((os.stat(x).st_size == 0 for x in b)) + or any(is_newer_file(x, y) for x in a for y in b) + ) + if (not should_update) and warn: + logger.debug("File `%s` found. Computation skipped.", ", ".join(b)) + return should_update + + +def flatten(input_list: Iterable) -> list: + """ + Flatten a list of lists and stop at the first non-list element. + """ + ans = [] + for i in input_list: + if isinstance(i, Iterable) and not isinstance(i, str): + for subc in flatten(i): + ans.append(subc) + else: + ans.append(i) + return ans + + +def cleanup(*args: Union[str, Iterable]) -> None: + """ + Remove a bunch of files in args; ignore if not found. + """ + for path in flatten(args): + if op.exists(path): + if op.isdir(path): + shutil.rmtree(path) + else: + os.remove(path) + + +def get_today(): + """ + Returns the date in 2010-07-14 format + """ + from datetime import date + + return str(date.today()) + + +def ls_ftp(dir): + """List the contents of a remote FTP server path. + + Args: + dir (URL): URL of a remote FTP server path. + + Returns: + [str]: List of remote paths available, analogous to `ls`. + """ + from urllib.parse import urlparse + from ftpretty import ftpretty + + o = urlparse(dir) + + ftp = ftpretty(o.netloc, "anonymous", "anonymous@") + return [op.basename(x) for x in ftp.list(o.path)] + + +def download( + url, filename=None, debug=True, cookies=None, handle_gzip=False, downloader=None +): + """Download URL to local + + Args: + url (str): Link to the file on the internet. + filename (str, optional): Local file name. Defaults to None. + debug (bool, optional): Print debug messages. Defaults to True. + cookies (str, optional): cookies file. Defaults to None. + handle_gzip (bool, optional): Postprocess .gz files, either compress or + uncompress. Defaults to False. + downloader (str, optional): Use a given downloader. One of wget|curl|powershell|insecure. + Defaults to None. + + Returns: + str: Local file name. + """ + from urllib.parse import urlsplit + + _, _, path, _, _ = urlsplit(url) + basepath = op.basename(path) + if basepath: + url_gzipped = basepath.endswith(".gz") + filename_gzipped = filename and filename.endswith(".gz") + need_gunzip = url_gzipped and (not filename_gzipped) + need_gzip = (not url_gzipped) and filename_gzipped + if handle_gzip and ( + need_gunzip or need_gzip + ): # One more compress/decompress step after download + target = basepath + else: # Just download + target = filename or basepath + else: + need_gunzip, need_gzip = False, False + target = filename or "index.html" + + success = False + final_filename = filename or target + if op.exists(final_filename): + if debug: + logger.info("File `%s` exists. Download skipped.", final_filename) + success = True + else: + from jcvi.utils.ez_setup import get_best_downloader + + downloader = get_best_downloader(downloader=downloader) + if downloader: + try: + downloader(url, target, cookies=cookies) + success = True + except (CalledProcessError, KeyboardInterrupt) as e: + print(e, file=sys.stderr) + else: + print("Cannot find a suitable downloader", file=sys.stderr) + + if success and handle_gzip: + if need_gunzip: + sh("gzip -dc {}".format(target), outfile=filename) + cleanup(target) + elif need_gzip: + sh("gzip -c {}".format(target), outfile=filename) + cleanup(target) + + if not success: + cleanup(target) + + return final_filename + + +def getfilesize(filename, ratio=None): + rawsize = op.getsize(filename) + if not filename.endswith(".gz"): + return rawsize + + import struct + + fo = open(filename, "rb") + fo.seek(-4, 2) + r = fo.read() + fo.close() + size = struct.unpack(" 2**32: + logger.warning("Gzip file estimated uncompressed size: %d", size) + + return size + + +def main(): + actions = ( + ("expand", "move files in subfolders into the current folder"), + ("less", "enhance the unix `less` command"), + ("mdownload", "multiple download a list of files"), + ("mergecsv", "merge a set of tsv files"), + ("notify", "send an email/push notification"), + ("timestamp", "record timestamps for all files in the current folder"), + ("touch", "recover timestamps for files in the current folder"), + ("waitpid", "wait for a PID to finish and then perform desired action"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def mdownload(args): + """ + %prog mdownload links.txt + + Multiple download a list of files. Use formats.html.links() to extract the + links file. + """ + from jcvi.apps.grid import Jobs + + p = OptionParser(mdownload.__doc__) + _, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (linksfile,) = args + links = [(x.strip(),) for x in open(linksfile)] + j = Jobs(download, links) + j.run() + + +def expand(args): + """ + %prog expand */* + + Move files in subfolders into the current folder. Use --symlink to create a + link instead. + """ + p = OptionParser(expand.__doc__) + p.add_argument( + "--symlink", default=False, action="store_true", help="Create symbolic link" + ) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + seen = set() + for a in args: + oa = a.replace("/", "_") + if oa in seen: + logger.debug("Name collision `%s`, ignored", oa) + continue + + cmd = "cp -s" if opts.symlink else "mv" + cmd += " {0} {1}".format(a, oa) + sh(cmd) + seen.add(oa) + + +def fname(): + return sys._getframe().f_back.f_code.co_name + + +def get_times(filename): + st = os.stat(filename) + atime = st.st_atime + mtime = st.st_mtime + return atime, mtime + + +def timestamp(args): + """ + %prog timestamp path > timestamp.info + + Record the timestamps for all files in the current folder. + filename atime mtime + + This file can be used later to recover previous timestamps through touch(). + """ + p = OptionParser(timestamp.__doc__) + _, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (path,) = args + for root, _, files in os.walk(path): + for f in files: + filename = op.join(root, f) + atime, mtime = get_times(filename) + print(filename, atime, mtime) + + +def touch(args): + """ + %prog touch timestamp.info + + Recover timestamps for files in the current folder. + CAUTION: you must execute this in the same directory as timestamp(). + """ + p = OptionParser(touch.__doc__) + _, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (info,) = args + fp = open(info) + for row in fp: + path, atime, mtime = row.split() + atime = float(atime) + mtime = float(mtime) + current_atime, current_mtime = get_times(path) + + # Check if the time has changed, with resolution up to 1 sec + if int(atime) == int(current_atime) and int(mtime) == int(current_mtime): + continue + + times = [ctime(x) for x in (current_atime, current_mtime, atime, mtime)] + msg = "{0} : ".format(path) + msg += "({0}, {1}) => ({2}, {3})".format(*times) + print(msg, file=sys.stderr) + os.utime(path, (atime, mtime)) + + +def snapshot(fp, p, fsize, counts=None): + pos = int(p * fsize) + print("==>> File `{0}`: {1} ({2}%)".format(fp.name, pos, int(p * 100))) + fp.seek(pos) + next(fp) + for i, row in enumerate(fp): + if counts and i > counts: + break + try: + sys.stdout.write(row) + except IOError: + break + + +def less(args): + """ + %prog less filename position | less + + Enhance the unix `less` command by seeking to a file location first. This is + useful to browse big files. Position is relative 0.00 - 1.00, or bytenumber. + + $ %prog less myfile 0.1 # Go to 10% of the current file and streaming + $ %prog less myfile 0.1,0.2 # Stream at several positions + $ %prog less myfile 100 # Go to certain byte number and streaming + $ %prog less myfile 100,200 # Stream at several positions + $ %prog less myfile all # Generate a snapshot every 10% (10%, 20%, ..) + """ + from jcvi.formats.base import must_open + + p = OptionParser(less.__doc__) + _, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + filename, pos = args + fsize = getfilesize(filename) + + if pos == "all": + pos = [x / 10.0 for x in range(0, 10)] + else: + pos = [float(x) for x in pos.split(",")] + + if pos[0] > 1: + pos = [x / fsize for x in pos] + + if len(pos) > 1: + counts = 20 + else: + counts = None + + fp = must_open(filename) + for p in pos: + snapshot(fp, p, fsize, counts=counts) + + +# notification specific variables +valid_notif_methods = ["email"] +available_push_api = {"push": ["pushover", "nma", "pushbullet"]} + + +def pushover( + message, token, user, title="JCVI: Job Monitor", priority=0, timestamp=None +): + """ + pushover.net python API + + + """ + assert -1 <= priority <= 2, "Priority should be an int() between -1 and 2" + + if timestamp is None: + from time import time + + timestamp = int(time()) + + retry, expire = (300, 3600) if priority == 2 else (None, None) + + conn = HTTPSConnection("api.pushover.net:443") + conn.request( + "POST", + "/1/messages.json", + urlencode( + { + "token": token, + "user": user, + "message": message, + "title": title, + "priority": priority, + "timestamp": timestamp, + "retry": retry, + "expire": expire, + } + ), + {"Content-type": "application/x-www-form-urlencoded"}, + ) + conn.getresponse() + + +def nma(description, apikey, event="JCVI: Job Monitor", priority=0): + """ + notifymyandroid.com API + + + """ + assert -2 <= priority <= 2, "Priority should be an int() between -2 and 2" + + conn = HTTPSConnection("www.notifymyandroid.com") + conn.request( + "POST", + "/publicapi/notify", + urlencode( + { + "apikey": apikey, + "application": "python notify", + "event": event, + "description": description, + "priority": priority, + } + ), + {"Content-type": "application/x-www-form-urlencoded"}, + ) + conn.getresponse() + + +def pushbullet(body, apikey, device, title="JCVI: Job Monitor"): + """ + pushbullet.com API + + + """ + import base64 + + headers = {} + auth = base64.encodestring("{0}:".format(apikey).encode("utf-8")).strip() + headers["Authorization"] = "Basic {0}".format(auth) + headers["Content-type"] = "application/x-www-form-urlencoded" + + conn = HTTPSConnection("api.pushbullet.com".format(apikey)) + conn.request( + "POST", + "/api/pushes", + urlencode({"iden": device, "type": "note", "title": title, "body": body}), + headers, + ) + conn.getresponse() + + +def pushnotify(subject, message, api="pushover", priority=0, timestamp=None): + """ + Send push notifications using pre-existing APIs + + Requires a config `pushnotify.ini` file in the user home area containing + the necessary api tokens and user keys. + + Default API: "pushover" + + Config file format: + ------------------- + [pushover] + token: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + user: yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy + + [nma] + apikey: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz + + [pushbullet] + apikey: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + iden: dddddddddddddddddddddddddddddddddddd + """ + assert ( + type(priority) is int and -1 <= priority <= 2 + ), "Priority should be and int() between -1 and 2" + + cfgfile = op.join(op.expanduser("~"), "pushnotify.ini") + Config = ConfigParser() + if op.exists(cfgfile): + Config.read(cfgfile) + else: + sys.exit( + "Push notification config file `{0}`".format(cfgfile) + " does not exist!" + ) + + if api == "pushover": + cfg = ConfigSectionMap(Config, api) + token, key = cfg["token"], cfg["user"] + pushover( + message, token, key, title=subject, priority=priority, timestamp=timestamp + ) + elif api == "nma": + cfg = ConfigSectionMap(Config, api) + apikey = cfg["apikey"] + nma(message, apikey, event=subject, priority=priority) + elif api == "pushbullet": + cfg = ConfigSectionMap(Config, api) + apikey, iden = cfg["apikey"], cfg["iden"] + pushbullet(message, apikey, iden, title=subject, type="note") + + +def send_email(fromaddr, toaddr, subject, message): + """ + Send an email message + """ + from smtplib import SMTP + from email.mime.text import MIMEText + + SERVER = "localhost" + _message = MIMEText(message) + _message["Subject"] = subject + _message["From"] = fromaddr + _message["To"] = ", ".join(toaddr) + + server = SMTP(SERVER) + server.sendmail(fromaddr, toaddr, _message.as_string()) + server.quit() + + +def get_email_address(whoami="user"): + """Auto-generate the FROM and TO email address""" + if whoami == "user": + username = getusername() + domain = getdomainname() + + myemail = "{0}@{1}".format(username, domain) + return myemail + else: + fromaddr = "notifier-donotreply@{0}".format(getdomainname()) + return fromaddr + + +def is_valid_email(email): + """ + RFC822 Email Address Regex + -------------------------- + + Originally written by Cal Henderson + c.f. http://iamcal.com/publish/articles/php/parsing_email/ + + Translated to Python by Tim Fletcher, with changes suggested by Dan Kubb. + + Licensed under a Creative Commons Attribution-ShareAlike 2.5 License + http://creativecommons.org/licenses/by-sa/2.5/ + """ + import re + + qtext = "[^\\x0d\\x22\\x5c\\x80-\\xff]" + dtext = "[^\\x0d\\x5b-\\x5d\\x80-\\xff]" + atom = "[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+" + quoted_pair = "\\x5c[\\x00-\\x7f]" + domain_literal = "\\x5b(?:%s|%s)*\\x5d" % (dtext, quoted_pair) + quoted_string = "\\x22(?:%s|%s)*\\x22" % (qtext, quoted_pair) + domain_ref = atom + sub_domain = "(?:%s|%s)" % (domain_ref, domain_literal) + word = "(?:%s|%s)" % (atom, quoted_string) + domain = "%s(?:\\x2e%s)*" % (sub_domain, sub_domain) + local_part = "%s(?:\\x2e%s)*" % (word, word) + addr_spec = "%s\\x40%s" % (local_part, domain) + + email_address = re.compile(r"\A%s\Z" % addr_spec) + if email_address.match(email): + return True + return False + + +def notify(args): + """ + %prog notify "Message to be sent" + + Send a message via email/push notification. + + Email notify: Recipient email address is constructed by joining the login `username` + and `dnsdomainname` of the server + + Push notify: Uses available API + """ + valid_notif_methods.extend(available_push_api.keys()) + + fromaddr = get_email_address(whoami="notifier") + + p = OptionParser(notify.__doc__) + p.add_argument( + "--method", + default="email", + choices=valid_notif_methods, + help="Specify the mode of notification", + ) + p.add_argument( + "--subject", + default="JCVI: job monitor", + help="Specify the subject of the notification message", + ) + p.set_email() + + g1 = p.add_argument_group("Optional `push` parameters") + g1.add_argument( + "--api", + default="pushover", + choices=flatten(available_push_api.values()), + help="Specify API used to send the push notification", + ) + g1.add_argument( + "--priority", default=0, type=int, help="Message priority (-1 <= p <= 2)" + ) + g1.add_argument( + "--timestamp", + default=None, + type=int, + dest="timestamp", + help="Message timestamp in unix format", + ) + + opts, args = p.parse_args(args) + + if len(args) == 0: + logger.error("Please provide a brief message to be sent") + sys.exit(not p.print_help()) + + subject = opts.subject + message = " ".join(args).strip() + + if opts.method == "email": + toaddr = opts.email.split(",") # TO address should be in a list + for addr in toaddr: + if not is_valid_email(addr): + logger.debug("Email address `%s` is not valid!", addr) + sys.exit() + send_email(fromaddr, toaddr, subject, message) + else: + pushnotify( + subject, + message, + api=opts.api, + priority=opts.priority, + timestamp=opts.timestamp, + ) + + +def pid_exists(pid): + """Check whether pid exists in the current process table.""" + if pid < 0: + return False + + try: + os.kill(pid, 0) + except OSError as e: + return e.errno == errno.EPERM + else: + return True + + +class TimeoutExpired(Exception): + pass + + +def _waitpid(pid, interval=None, timeout=None): + """ + Wait for process with pid 'pid' to terminate and return its + exit status code as an integer. + + If pid is not a children of os.getpid() (current process) just + waits until the process disappears and return None. + + If pid does not exist at all return None immediately. + + Raise TimeoutExpired on timeout expired (if specified). + + Source: http://code.activestate.com/recipes/578022-wait-for-pid-and-check-for-pid-existance-posix + """ + + def check_timeout(delay): + if timeout is not None: + if time.time() >= stop_at: + raise TimeoutExpired + time.sleep(delay) + return min(delay * 2, interval) + + if timeout is not None: + waitcall = lambda: os.waitpid(pid, os.WNOHANG) + stop_at = time.time() + timeout + else: + waitcall = lambda: os.waitpid(pid, 0) + + delay = 0.0001 + while 1: + try: + retpid, status = waitcall() + except OSError as err: + if err.errno == errno.EINTR: + delay = check_timeout(delay) + continue + elif err.errno == errno.ECHILD: + # This has two meanings: + # - pid is not a child of os.getpid() in which case + # we keep polling until it's gone + # - pid never existed in the first place + # In both cases we'll eventually return None as we + # can't determine its exit status code. + while 1: + if pid_exists(pid): + delay = check_timeout(delay) + else: + return + else: + raise + else: + if retpid == 0: + # WNOHANG was used, pid is still running + delay = check_timeout(delay) + continue + + # process exited due to a signal; return the integer of + # that signal + if os.WIFSIGNALED(status): + return os.WTERMSIG(status) + # process exited using exit(2) system call; return the + # integer exit(2) system call has been called with + elif os.WIFEXITED(status): + return os.WEXITSTATUS(status) + else: + # should never happen + raise RuntimeError("unknown process exit status") + + +def waitpid(args): + """ + %prog waitpid PID ::: "./command_to_run param1 param2 ...." + + Given a PID, this script will wait for the PID to finish running and + then perform a desired action (notify user and/or execute a new command) + + Specify "--notify=METHOD` to send the user a notification after waiting for PID + Specify `--grid` option to send the new process to the grid after waiting for PID + """ + import shlex + + valid_notif_methods.extend(flatten(available_push_api.values())) + + p = OptionParser(waitpid.__doc__) + p.add_argument( + "--notify", + default="email", + choices=valid_notif_methods, + help="Specify type of notification to be sent after waiting", + ) + p.add_argument( + "--interval", + default=120, + type=int, + help="Specify PID polling interval in seconds", + ) + p.add_argument("--message", help="Specify notification message") + p.set_email() + p.set_grid() + opts, args = p.parse_args(args) + + if len(args) == 0: + sys.exit(not p.print_help()) + + sep = ":::" + cmd = None + if sep in args: + sepidx = args.index(sep) + cmd = " ".join(args[sepidx + 1 :]).strip() + args = args[:sepidx] + + pid = int(" ".join(args).strip()) + + status = pid_exists(pid) + if status: + if opts.message: + msg = opts.message + else: + get_origcmd = "ps -p {0} -o cmd h".format(pid) + msg = check_output(shlex.split(get_origcmd)).strip() + _waitpid(pid, interval=opts.interval) + else: + logger.debug("Process with PID %d does not exist", pid) + sys.exit() + + if opts.notify: + notifycmd = ["[{0}] `{1}`".format(gethostname(), msg)] + if opts.notify != "email": + notifycmd.append("--method={0}".format("push")) + notifycmd.append("--api={0}".format(opts.notify)) + else: + notifycmd.append("--email={0}".format(opts.email)) + notify(notifycmd) + + if cmd is not None: + bg = False if opts.grid else True + sh(cmd, grid=opts.grid, background=bg) + + +def get_config(path): + config = RawConfigParser() + try: + config.read(path) + except ParsingError: + e = sys.exc_info()[1] + logger.error( + "There was a problem reading or parsing your credentials file: %s", + e.args[0], + ) + return config + + +def getpath( + cmd: str, + name: Optional[str] = None, + url: Optional[str] = None, + cfg: str = "~/.jcvirc", + warn: str = "exit", +) -> Optional[str]: + """ + Get install locations of common binaries + First, check ~/.jcvirc file to get the full path + If not present, ask on the console and store + """ + p = which(cmd) # if in PATH, just returns it + if p: + return p + + PATH = "Path" + config = RawConfigParser() + cfg = op.expanduser(cfg) + changed = False + if op.exists(cfg): + config.read(cfg) + + assert name is not None, "Need a program name" + + try: + fullpath = config.get(PATH, name) + except NoSectionError: + config.add_section(PATH) + + try: + fullpath = config.get(PATH, name) + except NoOptionError: + msg = f"=== Configure path for {name} ===\n" + if url: + msg += f"URL: {url}\n" + msg += f"[Directory that contains `{cmd}`]: " + fullpath = input(msg).strip() + + path = op.join(op.expanduser(fullpath), cmd) + if is_exe(path): + config.set(PATH, name, fullpath) + changed = True + else: + err_msg = f"Cannot execute binary `{path}`. Please verify and rerun." + if warn == "exit": + logger.fatal(err_msg) + else: + logger.warning(err_msg) + return None + + if changed: + configfile = open(cfg, "w") + config.write(configfile) + configfile.close() + logger.debug("Configuration written to `%s`", cfg) + + return path + + +def inspect(object): + """A better dir() showing attributes and values""" + for k in dir(object): + try: + details = getattr(object, k) + except Exception as e: + details = e + + try: + details = str(details) + except Exception as e: + details = e + + print("{}: {}".format(k, details), file=sys.stderr) + + +def sample_N(a: Collection, N: int, seed: Optional[int] = None) -> List: + """ + When size of N is > size of a, random.sample() will emit an error: + ValueError: sample larger than population + + This method handles such restrictions by repeatedly sampling when that + happens. Guaranteed to cover all items if N is > size of a. + + Examples: + >>> sample_N([1, 2, 3], 2, seed=666) + [2, 3] + >>> sample_N([1, 2, 3], 3, seed=666) + [2, 3, 1] + >>> sample_N([1, 2, 3], 4, seed=666) + [2, 3, 1, 2] + """ + import random + + random.seed(seed) + + ret = [] + while N > len(a): + ret += random.sample(a, len(a)) + N -= len(a) + + return ret + random.sample(a, N) + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/biomart.py b/jcvi/apps/biomart.py new file mode 100644 index 00000000..d5f9dcac --- /dev/null +++ b/jcvi/apps/biomart.py @@ -0,0 +1,426 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Builds the queries for Globus and BioMart servie, usefu for extraction of +phytozome data sets. Certain portion of the codes are ported from R package +`biomaRt` (thanks). +""" +import sys +import urllib + +from urllib.parse import urljoin +from xml.etree.ElementTree import ElementTree, Element, SubElement, tostring + +from .base import ActionDispatcher, OptionParser, download, logger + + +class GlobusXMLParser(ElementTree): + def __init__(self, xml_file): + """Parse an Globus directory listing XML file + + Args: + xml_file (str): Path to the XML file + """ + with open(xml_file) as fp: + self.parse(fp) + + def get_genomes(self): + """ + Only folders containing `assembly` and `annotation` are of interest. + """ + root = PhytozomePath(next(self.iter(tag="organismDownloads"))) + genomes = {} + for child in root.values(): + if child.has_genome_release: + genomes[child.name] = child + + # early_release + early_release = root.get("early_release") + if early_release: + for child in early_release.values(): + if child.has_genome_release: + genomes[child.name] = child + + return genomes + + +class PhytozomePath(dict): + TAGS_OF_INTEREST = ("organismDownloads", "folder", "file") + + def __init__(self, element): + """Deserialize XML => dict-like structure to ease navigation + between folders. Keys are folder or file names. + + Args: + element (ElementTree): XML parse tree + """ + tag = element.tag + assert tag in self.TAGS_OF_INTEREST + self.url = None + if tag == "file": + self.name = element.attrib["filename"] + self.url = element.attrib["url"] + else: + self.name = element.attrib["name"] + self.tag = tag + for child in list(element): + if child.tag not in self.TAGS_OF_INTEREST: + continue + child = PhytozomePath(child) + self[child.name] = child + + @property + def has_genome_release(self): + """Only the folders that contain both `assembly` and `annotation` are of interest here.""" + return "assembly" in self and "annotation" in self + + def download(self, name, base_url, cookies, downloader=None): + """Download the file if it has an URL. Otherwise, this will recursively search the children. + + See also: + + + Args: + name (str, optional): Name of the file. Defaults to None. + base_url (str): Link to the file on the internet. + cookies (str, optional): cookies file. Defaults to None. + downloader (str, optional): Use a given downloader. One of wget|curl|powershell|insecure. + Defaults to None. + """ + if self.name == name and base_url and self.url: + url = urljoin(base_url, self.url) + download( + url, filename=name, debug=True, cookies=cookies, downloader=downloader + ) + else: + for child_name, child in self.items(): + if child_name == name: + child.download(name, base_url, cookies, downloader=downloader) + return name + + def __repr__(self): + return "{}: [{}]".format(self.name, ", ".join(repr(v) for v in self)) + + +class MartXMLParser(ElementTree): + def __init__(self, xml_data): + self.parse(xml_data) + + def parse_marts(self): + for t in self.getiterator("MartURLLocation"): + if t.attrib["visible"] == "1": + yield Mart(**t.attrib) + + def parse_configuration(self): + # the attributes + for t in self.getiterator("AttributeDescription"): + yield Attribute(**t.attrib) + + # the filters + for t in self.getiterator("FilterDescription"): + f = Filter(**t.attrib) + options = [Option(**x.attrib) for x in t.getiterator("Option")] + f.add_arguments(options) + yield f + + +class Mart(dict): + def __init__( + self, + host="www.biomart.org", + path="/biomart/martservice", + port="80", + name="ensembl", + virtual_schema="default", + **attrib + ): + + self.__dict__ = attrib.copy() + self.__dict__.update( + x for x in locals().items() if x[0] not in ("self", "attrib") + ) + + self.registry = {} + self.url = "http://{0}:{1}{2}".format(self.host, self.port, path) + self.display_name = self.__dict__.get("displayName", "") + self.virtual_schema = self.__dict__.get( + "serverVirtualSchema", self.virtual_schema + ) + + def __str__(self): + return "\t".join((self.name, self.display_name, self.virtual_schema)) + + def get_registry(self, archive=False): + type = "registry_archive" if archive else "registry" + params = urllib.urlencode(dict(type=type)) + xml_data = urllib.urlopen(self.url, params) + + parser = MartXMLParser(xml_data) + for t in parser.parse_marts(): + self.registry[t.name] = t + + def list_registry(self): + if len(self.registry) == 0: + self.get_registry() + for m in sorted(self.registry.values()): + print(m) + + def get_datasets(self): + params = urllib.urlencode(dict(type="datasets", mart=self.name)) + web_data = urllib.urlopen(self.url, params) + + for row in web_data: + atoms = row.strip().split("\t") + if atoms[0] == "TableSet": + name, description, last_updated = atoms[1], atoms[2], atoms[-1] + self[name] = Dataset(name, description, last_updated, self) + + def list_datasets(self): + if len(self) == 0: + self.get_datasets() + for m in sorted(self.values(), key=str): + print(m) + + +class Dataset(object): + """ + Connect to a specified dataset in the database + """ + + def __init__(self, name, description, last_updated, mart): + self.name = name + self.description = description + self.last_updated = last_updated + self.mart = mart + + self.attributes = {} + self.filters = {} + + def __str__(self): + return "\t".join((self.name, self.description, self.last_updated)) + + def get_configuration(self): + params = urllib.urlencode(dict(type="configuration", dataset=self.name)) + xml_data = urllib.urlopen(self.mart.url, params) + + parser = MartXMLParser(xml_data) + for t in parser.parse_configuration(): + if isinstance(t, Attribute): + self.attributes[t.internalName] = t + elif isinstance(t, Filter): + self.filters[t.internalName] = t + + def list_attributes(self): + if len(self.attributes) == 0: + self.get_configuration() + for m in sorted(self.attributes.values()): + print(m) + + def list_filters(self): + if len(self.filters) == 0: + self.get_configuration() + for m in sorted(self.filters.values()): + print(m) + + def query(self, filters={}, attributes=()): + q = MartQuery(dataset=self) + q.add_filters(**filters) + q.add_attributes(attributes) + return q.execute() + + +class MartQuery(object): + def __init__( + self, dataset=None, formatter="TSV", header="0", unique_rows="0", count="0" + ): + self.dataset = dataset + self.url = dataset.mart.url + self.virtual_schema = dataset.mart.virtual_schema + self.formatter = formatter + self.header = header + self.unique_rows = unique_rows + self.count = count + self.name = dataset.name + self.attributes = [] + self.filters = {} + + def add_filters(self, **filters): + for key, val in filters.items(): + self.filters[key] = str(val) + + def add_attributes(self, attributes): + for key in attributes: + self.attributes.append(key) + + def set_header(self, flag): + self.header = str(flag) + + def set_formatter(self, format="TSV"): + self.formatter = format + + def build_query(self): + query_t = Element( + "Query", + dict( + virtualSchemaName=self.virtual_schema, + formatter=self.formatter, + header=self.header, + uniqueRows=self.unique_rows, + count=self.count, + datasetConfigVersion="0.6", + ), + ) + dataset_t = SubElement( + query_t, "Dataset", dict(name=self.name, interface="default") + ) + for key, val in self.filters.items(): + SubElement(dataset_t, "Filter", dict(name=key, value=val)) + for attribute in self.attributes: + SubElement(dataset_t, "Attribute", dict(name=attribute)) + + return tostring(query_t) + + def execute(self, debug=False): + xml_data = self.build_query() + if debug: + print(xml_data, file=sys.stderr) + data = urllib.urlencode(dict(query=xml_data)) + return urllib.urlopen(self.url, data) + + +class MartArgument(object): + def __init__(self, **attrib): + self.__dict__ = attrib.copy() + + def __str__(self): + return self.__class__.__name__ + str(self.__dict__) + + +class Attribute(MartArgument): + """ + Attributes define the values that we are retrieving. + + For example, the gene start, stop, or chromosomes it belongs to + """ + + pass + + +class Filter(MartArgument): + """ + Filters define a restriction on the query. + + For example, you can restrict output to all genes located on chr. 1 + then use the filter chromosome_name with value `1` + """ + + def add_arguments(self, options): + self.options = dict((x.displayName, x) for x in options) + + +class Option(MartArgument): + pass + + +class Sequence(object): + def __init__(self, seq): + self.seq = seq + + def export_fasta(self): + pass + + +def test_biomart(): + bm = Mart() + bm.list_registry() + bm.list_datasets() + return bm + + +def get_ensembl_dataset(): + bm = Mart() + ensembl = bm.registry["ensembl"] + ensembl.get_datasets() + dataset = ensembl["mmusculus_gene_ensembl"] + return dataset + + +def get_phytozome_dataset(): + # Either of the following method is okay + # bm = Mart() + # phytozome = bm.registry["phytozome_mart"] + + phytozome = Mart( + host="www.phytozome.net", + port="80", + name="phytozome_mart", + virtual_schema="zome_mart", + ) + + phytozome.get_datasets() + dataset = phytozome["phytozome"] + return dataset + + +def get_bed_from_phytozome(genelist): + """ + >>> data = get_bed_from_phytozome(["AT5G54690", "AT1G01010"]) + >>> print data.read() #doctest: +NORMALIZE_WHITESPACE + Chr1 3631 5899 AT1G01010 + Chr5 22219224 22221840 AT5G54690 + + """ + genelist = ",".join(genelist) + dataset = get_phytozome_dataset() + filters = dict(gene_name_filter=genelist) + attributes = "chr_name1,gene_chrom_start,gene_chrom_end,gene_name1".split(",") + + data = dataset.query(filters=filters, attributes=attributes) + return data + + +def main(): + + actions = (("bed", "get gene bed from phytozome"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def bed(args): + """ + %prog bed genes.ids + + Get gene bed from phytozome. `genes.ids` contains the list of gene you want + to pull from Phytozome. Write output to .bed file. + """ + p = OptionParser(bed.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (idsfile,) = args + ids = set(x.strip() for x in open(idsfile)) + data = get_bed_from_phytozome(list(ids)) + + pf = idsfile.rsplit(".", 1)[0] + bedfile = pf + ".bed" + fw = open(bedfile, "w") + for i, row in enumerate(data): + row = row.strip() + if row == "": + continue + + print(row, file=fw) + + logger.debug("A total of %d records written to `%s`.", i + 1, bedfile) + + +if __name__ == "__main__": + + import doctest + + doctest.testmod() + + main() diff --git a/jcvi/apps/blastplus.py b/jcvi/apps/blastplus.py new file mode 100755 index 00000000..071cd6f2 --- /dev/null +++ b/jcvi/apps/blastplus.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +import os.path as op +import sys + +from multiprocessing import Lock + +from ..formats.base import must_open, split + +from .align import run_formatdb +from .base import OptionParser, Popen, logger +from .grid import Jobs + + +def blastplus(out_fh, cmd, query, lock): + cmd += " -query {0}".format(query) + proc = Popen(cmd) + + logger.debug("job <%d> started: %s", proc.pid, cmd) + for row in proc.stdout: + if row[0] == "#": + continue + lock.acquire() + out_fh.write(row) + out_fh.flush() + lock.release() + logger.debug("job <%d> finished", proc.pid) + + +def main(): + """ + %prog database.fa query.fa [options] + + Wrapper for NCBI BLAST+. + """ + p = OptionParser(main.__doc__) + + p.add_argument( + "--format", + default=" '6 qseqid sseqid pident length " + "mismatch gapopen qstart qend sstart send evalue bitscore' ", + help='0-11, learn more with "blastp -help"', + ) + p.add_argument( + "--path", + dest="blast_path", + default=None, + help="specify BLAST+ path including the program name", + ) + p.add_argument( + "--prog", + dest="blast_program", + default="blastp", + help="specify BLAST+ program to use. See complete list here: " + "http://www.ncbi.nlm.nih.gov/books/NBK52640/#chapter1.Installation", + ) + p.set_align(evalue=0.01) + p.add_argument( + "--best", + default=1, + type=int, + help="Only look for best N hits", + ) + p.set_cpus() + p.add_argument( + "--nprocs", + default=1, + type=int, + help="number of BLAST processes to run in parallel. " + + "split query.fa into `nprocs` chunks, " + + "each chunk uses -num_threads=`cpus`", + ) + p.set_params() + p.set_outfile() + opts, args = p.parse_args() + + if len(args) != 2 or opts.blast_program is None: + sys.exit(not p.print_help()) + + bfasta_fn, afasta_fn = args + for fn in (afasta_fn, bfasta_fn): + assert op.exists(fn) + + afasta_fn = op.abspath(afasta_fn) + bfasta_fn = op.abspath(bfasta_fn) + out_fh = must_open(opts.outfile, "w") + + extra = opts.extra + blast_path = opts.blast_path + blast_program = opts.blast_program + + blast_bin = blast_path or blast_program + if op.basename(blast_bin) != blast_program: + blast_bin = op.join(blast_bin, blast_program) + + nprocs, cpus = opts.nprocs, opts.cpus + if nprocs > 1: + logger.debug("Dispatch job to %d processes", nprocs) + outdir = "outdir" + fs = split([afasta_fn, outdir, str(nprocs)]) + queries = fs.names + else: + queries = [afasta_fn] + + dbtype = "prot" if op.basename(blast_bin) in ("blastp", "blastx") else "nucl" + + db = bfasta_fn + if dbtype == "prot": + nin = db + ".pin" + else: + nin00 = db + ".00.nin" + nin = nin00 if op.exists(nin00) else (db + ".nin") + + run_formatdb(infile=db, outfile=nin, dbtype=dbtype) + + lock = Lock() + + blastplus_template = "{0} -db {1} -outfmt {2}" + blast_cmd = blastplus_template.format(blast_bin, bfasta_fn, opts.format) + blast_cmd += " -evalue {0} -max_target_seqs {1}".format(opts.evalue, opts.best) + blast_cmd += " -num_threads {0}".format(cpus) + if extra: + blast_cmd += " " + extra.strip() + + args = [(out_fh, blast_cmd, query, lock) for query in queries] + g = Jobs(target=blastplus, args=args) + g.run() + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/bowtie.py b/jcvi/apps/bowtie.py new file mode 100644 index 00000000..bf626b0f --- /dev/null +++ b/jcvi/apps/bowtie.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Run bowtie2 command and skips the manual run of naming intermediate output +files. Bowtie2 help: + + +""" +import sys + +from ..formats.base import BaseFile +from ..formats.sam import get_prefix, get_samfile, output_bam +from ..utils.cbook import percentage + +from .base import ( + ActionDispatcher, + OptionParser, + logger, + need_update, + sh, + get_abs_path, +) + + +first_tag = lambda fp: next(fp).split()[0] + + +class BowtieLogFile(BaseFile): + """ + Simple file that contains mapping rate: + + 100000 reads; of these: + 100000 (100.00%) were unpaired; of these: + 88453 (88.45%) aligned 0 times + 9772 (9.77%) aligned exactly 1 time + 1775 (1.77%) aligned >1 times + 11.55% overall alignment rate + """ + + def __init__(self, filename): + + super().__init__(filename) + fp = open(filename) + self.total = int(first_tag(fp)) + self.unpaired = int(first_tag(fp)) + self.unmapped = int(first_tag(fp)) + self.unique = int(first_tag(fp)) + self.multiple = int(first_tag(fp)) + self.mapped = self.unique + self.multiple + self.rate = float(first_tag(fp).rstrip("%")) + fp.close() + + def __str__(self): + return "Total mapped: {0}".format(percentage(self.mapped, self.total)) + + __repr__ = __str__ + + +def main(): + + actions = ( + ("index", "wraps bowtie2-build"), + ("align", "wraps bowtie2"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def check_index(dbfile): + dbfile = get_abs_path(dbfile) + safile = dbfile + ".1.bt2" + if need_update(dbfile, safile): + cmd = "bowtie2-build {0} {0}".format(dbfile) + sh(cmd) + else: + logger.error("`{0}` exists. `bowtie2-build` already run.".format(safile)) + + return dbfile + + +def index(args): + """ + %prog index database.fasta + + Wrapper for `bowtie2-build`. Same interface. + """ + p = OptionParser(index.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (dbfile,) = args + check_index(dbfile) + + +def align(args): + """ + %prog align database.fasta read1.fq [read2.fq] + + Wrapper for `bowtie2` single-end or paired-end, depending on the number of args. + """ + from jcvi.formats.fastq import guessoffset + + p = OptionParser(align.__doc__) + p.set_firstN(firstN=0) + p.add_argument( + "--full", + default=False, + action="store_true", + help="Enforce end-to-end alignment [default: local]", + ) + p.add_argument( + "--reorder", + default=False, + action="store_true", + help="Keep the input read order", + ) + p.add_argument( + "--null", + default=False, + action="store_true", + help="Do not write to SAM/BAM output", + ) + p.add_argument( + "--fasta", default=False, action="store_true", help="Query reads are FASTA" + ) + p.set_cutoff(cutoff=800) + p.set_mateorientation(mateorientation="+-") + p.set_sam_options(bowtie=True) + + opts, args = p.parse_args(args) + extra = opts.extra + mo = opts.mateorientation + if mo == "+-": + extra += "" + elif mo == "-+": + extra += "--rf" + else: + extra += "--ff" + + PE = True + if len(args) == 2: + logger.debug("Single-end alignment") + PE = False + elif len(args) == 3: + logger.debug("Paired-end alignment") + else: + sys.exit(not p.print_help()) + + firstN = opts.firstN + mapped = opts.mapped + unmapped = opts.unmapped + fasta = opts.fasta + gl = "--end-to-end" if opts.full else "--local" + + dbfile, readfile = args[0:2] + dbfile = check_index(dbfile) + prefix = get_prefix(readfile, dbfile) + samfile, mapped, unmapped = get_samfile( + readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam + ) + logfile = prefix + ".log" + if not fasta: + offset = guessoffset([readfile]) + + if not need_update(dbfile, samfile): + logger.error("`{0}` exists. `bowtie2` already run.".format(samfile)) + return samfile, logfile + + cmd = "bowtie2 -x {0}".format(dbfile) + if PE: + r1, r2 = args[1:3] + cmd += " -1 {0} -2 {1}".format(r1, r2) + cmd += " --maxins {0}".format(opts.cutoff) + mtag, utag = "--al-conc", "--un-conc" + else: + cmd += " -U {0}".format(readfile) + mtag, utag = "--al", "--un" + + if mapped: + cmd += " {0} {1}".format(mtag, mapped) + if unmapped: + cmd += " {0} {1}".format(utag, unmapped) + + if firstN: + cmd += " --upto {0}".format(firstN) + cmd += " -p {0}".format(opts.cpus) + if fasta: + cmd += " -f" + else: + cmd += " --phred{0}".format(offset) + cmd += " {0}".format(gl) + if opts.reorder: + cmd += " --reorder" + + cmd += " {0}".format(extra) + # Finally the log + cmd += " 2> {0}".format(logfile) + + if opts.null: + samfile = "/dev/null" + + cmd = output_bam(cmd, samfile) + sh(cmd) + print(open(logfile).read(), file=sys.stderr) + + return samfile, logfile + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/bwa.py b/jcvi/apps/bwa.py new file mode 100644 index 00000000..152db8bd --- /dev/null +++ b/jcvi/apps/bwa.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Run bwa command and skips the manual run of naming intermediate output files +The whole pipeline is following bwa documentation at + +""" + +import os.path as op +import sys + +from ..apps.grid import MakeManager +from ..assembly.automaton import iter_project +from ..formats.sam import get_samfile, mapped, output_bam + +from .base import ( + ActionDispatcher, + OptionParser, + cleanup, + get_abs_path, + logger, + mkdir, + need_update, + sh, +) + + +def main(): + + actions = ( + ("index", "wraps bwa index"), + ("align", "wraps bwa aln|mem|bwasw"), + ("batch", "run bwa in batch mode"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def batch(args): + """ + %proj batch database.fasta project_dir output_dir + + Run bwa in batch mode. + """ + p = OptionParser(batch.__doc__) + set_align_options(p) + p.set_sam_options() + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + ref_fasta, proj_dir, outdir = args + outdir = outdir.rstrip("/") + s3dir = None + if outdir.startswith("s3://"): + s3dir = outdir + outdir = op.basename(outdir) + mkdir(outdir) + + mm = MakeManager() + for p, pf in iter_project(proj_dir): + targs = [ref_fasta] + p + cmd1, bamfile = mem(targs, opts) + if cmd1: + cmd1 = output_bam(cmd1, bamfile) + nbamfile = op.join(outdir, bamfile) + cmd2 = "mv {} {}".format(bamfile, nbamfile) + cmds = [cmd1, cmd2] + + if s3dir: + cmd = "aws s3 cp {} {} --sse".format(nbamfile, op.join(s3dir, bamfile)) + cmds.append(cmd) + + mm.add(p, nbamfile, cmds) + + mm.write() + + +def check_index(dbfile): + dbfile = get_abs_path(dbfile) + safile = dbfile + ".sa" + if not op.exists(safile): + cmd = "bwa index {0}".format(dbfile) + sh(cmd) + else: + logger.error("`{0}` exists. `bwa index` already run.".format(safile)) + + return dbfile + + +def check_aln(dbfile, readfile, cpus=32): + from jcvi.formats.fastq import guessoffset + + saifile = readfile.rsplit(".", 1)[0] + ".sai" + if need_update((dbfile, readfile), saifile): + offset = guessoffset([readfile]) + cmd = "bwa aln " + " ".join((dbfile, readfile)) + cmd += " -t {0}".format(cpus) + if offset == 64: + cmd += " -I" + sh(cmd, outfile=saifile) + else: + logger.error("`{0}` exists. `bwa aln` already run.".format(saifile)) + + return saifile + + +def index(args): + """ + %prog index database.fasta + + Wrapper for `bwa index`. Same interface. + """ + p = OptionParser(index.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (dbfile,) = args + check_index(dbfile) + + +def set_align_options(p): + """Used in align() and batch()""" + p.add_argument("--bwa", default="bwa", help="Run bwa at this path") + p.add_argument("--rg", help="Read group") + p.add_argument( + "--readtype", + choices=("pacbio", "pbread", "ont2d", "intractg"), + help="Read type in bwa-mem", + ) + p.set_cutoff(cutoff=800) + + +def align(args): + """ + %prog align database.fasta read1.fq [read2.fq] + + Wrapper for three modes of BWA - mem (default), aln, bwasw (long reads). + """ + valid_modes = ("bwasw", "aln", "mem") + p = OptionParser(align.__doc__) + p.add_argument("--mode", default="mem", choices=valid_modes, help="BWA mode") + set_align_options(p) + p.set_sam_options() + + opts, args = p.parse_args(args) + mode = opts.mode + nargs = len(args) + + if nargs not in (2, 3): + sys.exit(not p.print_help()) + + tag = "bwa-{0}: ".format(mode) + c = mem + if nargs == 2: + tag += "Single-end alignment" + if mode == "bwasw": + c = bwasw + elif mode == "aln": + c = samse + else: + assert mode != "bwasw", "Cannot use --bwasw with paired-end mode" + tag += "Paired-end alignment" + if mode == "aln": + c = sampe + + logger.debug(tag) + cmd, samfile = c(args, opts) + if cmd: + cmd = output_bam(cmd, samfile) + + bam = opts.bam + unmapped = opts.unmapped + + sh(cmd) + if unmapped: + mopts = [samfile, "--unmapped"] + if not bam: + mopts += ["--sam"] + mapped(mopts) + cleanup(samfile) + + return samfile, None + + +def samse(args, opts): + """ + %prog samse database.fasta short_read.fastq + + Wrapper for `bwa samse`. Output will be short_read.sam. + """ + dbfile, readfile = args + dbfile = check_index(dbfile) + saifile = check_aln(dbfile, readfile, cpus=opts.cpus) + + samfile, _, unmapped = get_samfile( + readfile, dbfile, bam=opts.bam, unmapped=opts.unmapped + ) + if not need_update((dbfile, saifile), samfile): + logger.error("`{0}` exists. `bwa samse` already run.".format(samfile)) + return "", samfile + + cmd = "bwa samse {0} {1} {2}".format(dbfile, saifile, readfile) + cmd += " " + opts.extra + if opts.uniq: + cmd += " -n 1" + + return cmd, samfile + + +def sampe(args, opts): + """ + %prog sampe database.fasta read1.fq read2.fq + + Wrapper for `bwa sampe`. Output will be read1.sam. + """ + dbfile, read1file, read2file = args + dbfile = check_index(dbfile) + sai1file = check_aln(dbfile, read1file, cpus=opts.cpus) + sai2file = check_aln(dbfile, read2file, cpus=opts.cpus) + + samfile, _, unmapped = get_samfile( + read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped + ) + if not need_update((dbfile, sai1file, sai2file), samfile): + logger.error("`{0}` exists. `bwa samse` already run.".format(samfile)) + return "", samfile + + cmd = "bwa sampe " + " ".join((dbfile, sai1file, sai2file, read1file, read2file)) + cmd += " " + opts.extra + if opts.cutoff: + cmd += " -a {0}".format(opts.cutoff) + if opts.uniq: + cmd += " -n 1" + + return cmd, samfile + + +def mem(args, opts): + """ + %prog mem database.fasta read1.fq [read2.fq] + + Wrapper for `bwa mem`. Output will be read1.sam. + """ + dbfile, read1file = args[:2] + readtype = opts.readtype + pl = readtype or "illumina" + + pf = op.basename(read1file).split(".")[0] + rg = opts.rg or r"@RG\tID:{0}\tSM:sm\tLB:lb\tPL:{1}".format(pf, pl) + dbfile = check_index(dbfile) + args[0] = dbfile + samfile, _, unmapped = get_samfile( + read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped + ) + if not need_update(read1file, samfile): + logger.error("`{0}` exists. `bwa mem` already run.".format(samfile)) + return "", samfile + + cmd = "{} mem".format(opts.bwa) + """ + -M Mark shorter split hits as secondary (for Picard compatibility). + """ + cmd += " -M -t {0}".format(opts.cpus) + cmd += ' -R "{0}"'.format(rg) + if readtype: + cmd += " -x {0}".format(readtype) + cmd += " " + opts.extra + cmd += " ".join(args) + + return cmd, samfile + + +def bwasw(args, opts): + """ + %prog bwasw database.fasta long_read.fastq + + Wrapper for `bwa bwasw`. Output will be long_read.sam. + """ + dbfile, readfile = args + dbfile = check_index(dbfile) + + samfile, _, unmapped = get_samfile( + readfile, dbfile, bam=opts.bam, unmapped=opts.unmapped + ) + if not need_update(dbfile, samfile): + logger.error("`{0}` exists. `bwa bwasw` already run.".format(samfile)) + return "", samfile + + cmd = "bwa bwasw " + " ".join(args) + cmd += " -t {0}".format(opts.cpus) + cmd += " " + opts.extra + return cmd, samfile + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/cdhit.py b/jcvi/apps/cdhit.py new file mode 100644 index 00000000..0a20e228 --- /dev/null +++ b/jcvi/apps/cdhit.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Using CD-HIT to remove duplicate reads. +""" +import os.path as op +import sys + +from collections import defaultdict + +from ..formats.base import LineFile, read_block, must_open +from ..formats.fastq import fasta +from ..utils.cbook import percentage + +from .base import ActionDispatcher, OptionParser, logger, need_update, sh + + +class ClstrLine(object): + """ + Lines like these: + 0 12067nt, >LAP012517... at -/99.85% + 1 15532nt, >MOL158919... * + 2 15515nt, >SES069071... at +/99.85% + """ + + def __init__(self, row): + a, b = row.split(">", 1) + a = a.split("nt")[0] + sid, size = a.split() + self.size = int(size) + self.name = b.split("...")[0] + self.rep = row.rstrip()[-1] == "*" + + +class ClstrFile(LineFile): + def __init__(self, filename): + super().__init__(filename) + assert filename.endswith(".clstr") + + fp = open(filename) + for clstr, members in read_block(fp, ">"): + self.append([ClstrLine(x) for x in members]) + + def iter_sizes(self): + for members in self: + yield len(members) + + def iter_reps(self): + for i, members in enumerate(self): + for b in members: + if b.rep: + yield i, b.name + + def iter_reps_prefix(self, prefix=3): + for i, members in enumerate(self): + d = defaultdict(list) + for b in members: + pp = b.name[:prefix] + d[pp].append(b) + + for pp, members_with_same_pp in sorted(d.items()): + yield i, max(members_with_same_pp, key=lambda x: x.size).name + + +def main(): + + actions = ( + ("ids", "get the representative ids from clstr file"), + ("deduplicate", "use `cd-hit-est` to remove duplicate reads"), + ("filter", "filter consensus sequence with min cluster size"), + ("summary", "parse cdhit.clstr file to get distribution of cluster sizes"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def filter(args): + """ + %prog filter *.consensus.fasta + + Filter consensus sequence with min cluster size. + """ + from jcvi.formats.fasta import Fasta, SeqIO + + p = OptionParser(filter.__doc__) + p.add_argument("--minsize", default=2, type=int, help="Minimum cluster size") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + fastafiles = args + minsize = opts.minsize + totalreads = totalassembled = 0 + fw = must_open(opts.outfile, "w") + for i, fastafile in enumerate(fastafiles): + f = Fasta(fastafile, lazy=True) + pf = "s{0:03d}".format(i) + nreads = nsingletons = nclusters = 0 + for desc, rec in f.iterdescriptions_ordered(): + nclusters += 1 + if desc.startswith("singleton"): + nsingletons += 1 + nreads += 1 + continue + # consensus_for_cluster_0 with 63 sequences + name, w, size, seqs = desc.split() + assert w == "with" + size = int(size) + nreads += size + if size < minsize: + continue + rec.description = rec.description.split(None, 1)[-1] + rec.id = pf + "_" + rec.id + SeqIO.write(rec, fw, "fasta") + logger.debug("Scanned {0} clusters with {1} reads ..".format(nclusters, nreads)) + cclusters, creads = nclusters - nsingletons, nreads - nsingletons + logger.debug( + "Saved {0} clusters (min={1}) with {2} reads (avg:{3}) [{4}]".format( + cclusters, minsize, creads, creads / cclusters, pf + ) + ) + totalreads += nreads + totalassembled += nreads - nsingletons + logger.debug("Total assembled: {0}".format(percentage(totalassembled, totalreads))) + + +def ids(args): + """ + %prog ids cdhit.clstr + + Get the representative ids from clstr file. + """ + p = OptionParser(ids.__doc__) + p.add_argument("--prefix", type=int, help="Find rep id for prefix of len") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (clstrfile,) = args + cf = ClstrFile(clstrfile) + prefix = opts.prefix + if prefix: + reads = list(cf.iter_reps_prefix(prefix=prefix)) + else: + reads = list(cf.iter_reps()) + + nreads = len(reads) + idsfile = clstrfile.replace(".clstr", ".ids") + fw = open(idsfile, "w") + for i, name in reads: + print("\t".join(str(x) for x in (i, name)), file=fw) + + logger.debug( + "A total of {0} unique reads written to `{1}`.".format(nreads, idsfile) + ) + fw.close() + + return idsfile + + +def summary(args): + """ + %prog summary cdhit.clstr + + Parse cdhit.clstr file to get distribution of cluster sizes. + """ + from jcvi.graphics.histogram import loghistogram + + p = OptionParser(summary.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (clstrfile,) = args + cf = ClstrFile(clstrfile) + data = list(cf.iter_sizes()) + loghistogram(data, summary=True) + + +def deduplicate(args): + """ + %prog deduplicate fastafile + + Wraps `cd-hit-est` to remove duplicate sequences. + """ + p = OptionParser(deduplicate.__doc__) + p.set_align(pctid=96, pctcov=0) + p.add_argument( + "--fast", + default=False, + action="store_true", + help="Place sequence in the first cluster", + ) + p.add_argument( + "--consensus", + default=False, + action="store_true", + help="Compute consensus sequences", + ) + p.add_argument( + "--reads", + default=False, + action="store_true", + help="Use `cd-hit-454` to deduplicate", + ) + p.add_argument( + "--samestrand", + default=False, + action="store_true", + help="Enforce same strand alignment", + ) + p.set_home("cdhit") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + identity = opts.pctid / 100.0 + fastafile, qualfile = fasta([fastafile, "--seqtk"]) + + ocmd = "cd-hit-454" if opts.reads else "cd-hit-est" + cmd = op.join(opts.cdhit_home, ocmd) + cmd += " -c {0}".format(identity) + if ocmd == "cd-hit-est": + cmd += " -d 0" # include complete defline + if opts.samestrand: + cmd += " -r 0" + if not opts.fast: + cmd += " -g 1" + if opts.pctcov != 0: + cmd += " -aL {0} -aS {0}".format(opts.pctcov / 100.0) + + dd = fastafile + ".P{0}.cdhit".format(opts.pctid) + clstr = dd + ".clstr" + + cmd += " -M 0 -T {0} -i {1} -o {2}".format(opts.cpus, fastafile, dd) + if need_update(fastafile, (dd, clstr)): + sh(cmd) + + if opts.consensus: + cons = dd + ".consensus" + cmd = op.join(opts.cdhit_home, "cdhit-cluster-consensus") + cmd += " clustfile={0} fastafile={1} output={2} maxlen=1".format( + clstr, fastafile, cons + ) + if need_update((clstr, fastafile), cons): + sh(cmd) + + return dd + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/emboss.py b/jcvi/apps/emboss.py new file mode 100644 index 00000000..01764208 --- /dev/null +++ b/jcvi/apps/emboss.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Run EMBOSS programs. +""" +import sys +import multiprocessing as mp + +from ..formats.base import must_open +from .base import ActionDispatcher, OptionParser, cleanup + + +class NeedleHeader(object): + def __init__(self, filename): + fp = must_open(filename) + for row in fp: + if row[0] != "#": + continue + # Identity: 89/89 (100.0%) + if row.startswith("# Identity"): + self.identity = row.split(":")[-1].strip() + if row.startswith("# Score"): + self.score = row.split(":")[-1].strip() + + +def main(): + + actions = (("needle", "take protein pairs and needle them"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def _needle(fa, fb, needlefile, a, b, results): + """ + Run single needle job + """ + from Bio.Emboss.Applications import NeedleCommandline + + needle_cline = NeedleCommandline( + asequence=fa, bsequence=fb, gapopen=10, gapextend=0.5, outfile=needlefile + ) + _, _ = needle_cline() + nh = NeedleHeader(needlefile) + cleanup(fa, fb, needlefile) + r = ["\t".join((a, b, nh.identity, nh.score))] + + results.extend(r) + + +def needle(args): + """ + %prog needle nw.pairs a.pep.fasta b.pep.fasta + + Take protein pairs and needle them + Automatically writes output file `nw.scores` + """ + from jcvi.formats.fasta import Fasta, SeqIO + + p = OptionParser(needle.__doc__) + + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + manager = mp.Manager() + results = manager.list() + needle_pool = mp.Pool(processes=mp.cpu_count()) + + pairsfile, apep, bpep = args + afasta, bfasta = Fasta(apep), Fasta(bpep) + fp = must_open(pairsfile) + for i, row in enumerate(fp): + a, b = row.split() + a, b = afasta[a], bfasta[b] + fa, fb = must_open("{0}_{1}_a.fasta".format(pairsfile, i), "w"), must_open( + "{0}_{1}_b.fasta".format(pairsfile, i), "w" + ) + SeqIO.write([a], fa, "fasta") + SeqIO.write([b], fb, "fasta") + fa.close() + fb.close() + + needlefile = "{0}_{1}_ab.needle".format(pairsfile, i) + needle_pool.apply_async( + _needle, (fa.name, fb.name, needlefile, a.id, b.id, results) + ) + + needle_pool.close() + needle_pool.join() + + fp.close() + + scoresfile = "{0}.scores".format(pairsfile.rsplit(".")[0]) + fw = must_open(scoresfile, "w") + for result in results: + print(result, file=fw) + fw.close() + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/fetch.py b/jcvi/apps/fetch.py new file mode 100644 index 00000000..3ce134ca --- /dev/null +++ b/jcvi/apps/fetch.py @@ -0,0 +1,729 @@ +""" +Wrapper for fetching data from various online repositories \ +(Entrez, Ensembl, Phytozome, and SRA) +""" + +import os.path as op +import re +import sys +import time + +from os.path import join as urljoin +from urllib.error import HTTPError, URLError + +from Bio import Entrez, SeqIO +from more_itertools import grouper + +from ..formats.base import must_open +from ..formats.fasta import print_first_difference +from ..formats.fastq import fromsra +from ..utils.cbook import tile +from ..utils.console import printf + +from .base import ( + ActionDispatcher, + OptionParser, + logger, + cleanup, + download, + get_email_address, + last_updated, + ls_ftp, + mkdir, + sh, + which, +) + + +myEmail = get_email_address() +Entrez.email = myEmail +PHYTOZOME_COOKIES = ".phytozome_cookies" + + +def batch_taxonomy(list_of_taxids): + """ + Convert list of taxids to Latin names + """ + for taxid in list_of_taxids: + handle = Entrez.efetch(db="Taxonomy", id=taxid, retmode="xml") + records = Entrez.read(handle) + yield records[0]["ScientificName"] + + +def batch_taxids(list_of_names): + """ + Opposite of batch_taxonomy(): + + Convert list of Latin names to taxids + """ + for name in list_of_names: + handle = Entrez.esearch(db="Taxonomy", term=name, retmode="xml") + records = Entrez.read(handle) + yield records["IdList"][0] + + +def batch_entrez( + list_of_terms, db="nuccore", retmax=1, rettype="fasta", batchsize=1, email=myEmail +): + """ + Retrieve multiple rather than a single record + """ + + for term in list_of_terms: + + logger.debug("Search term %s", term) + success = False + ids = None + if not term: + continue + + while not success: + try: + search_handle = Entrez.esearch(db=db, retmax=retmax, term=term) + rec = Entrez.read(search_handle) + success = True + ids = rec["IdList"] + except (HTTPError, URLError, RuntimeError, KeyError) as e: + logger.error(e) + logger.debug("wait 5 seconds to reconnect...") + time.sleep(5) + + if not ids: + logger.error("term {0} not found".format(term)) + continue + + assert ids + nids = len(ids) + if nids > 1: + logger.debug("A total of {0} results found.".format(nids)) + + if batchsize != 1: + logger.debug("Use a batch size of {0}.".format(batchsize)) + + ids = list(grouper(ids, batchsize)) + + for id in ids: + id = [x for x in id if x] + size = len(id) + id = ",".join(id) + + success = False + while not success: + try: + fetch_handle = Entrez.efetch( + db=db, id=id, rettype=rettype, email=email + ) + success = True + except (HTTPError, URLError, RuntimeError) as e: + logger.error(e) + logger.debug("wait 5 seconds to reconnect...") + time.sleep(5) + + yield id, size, term, fetch_handle + + +def main(): + + actions = ( + ("entrez", "fetch records from entrez using a list of GenBank accessions"), + ("bisect", "determine the version of the accession by querying entrez"), + ( + "phytozome9", + "retrieve genomes and annotations from phytozome version 9.0 (legacy)", + ), + ("phytozome", "retrieve genomes and annotations from phytozome"), + ("ensembl", "retrieve genomes and annotations from ensembl"), + ("sra", "retrieve files from SRA via the sra-instant FTP"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def ensembl(args): + """ + %prog ensembl species + + Retrieve genomes and annotations from ensembl FTP. Available species + listed below. Use comma to give a list of species to download. For example: + + $ %prog ensembl danio_rerio,gasterosteus_aculeatus + """ + p = OptionParser(ensembl.__doc__) + p.add_argument("--version", default="75", help="Ensembl version") + opts, args = p.parse_args(args) + + version = opts.version + url = "ftp://ftp.ensembl.org/pub/release-{0}/".format(version) + fasta_url = url + "fasta/" + + valid_species = [x for x in ls_ftp(fasta_url) if "." not in x] + doc = "\n".join((ensembl.__doc__, tile(valid_species))) + p.usage = doc + + if len(args) != 1: + sys.exit(not p.print_help()) + + (species,) = args + species = species.split(",") + for s in species: + download_species_ensembl(s, valid_species, url) + + +def download_species_ensembl(species, valid_species, url): + assert species in valid_species, "{0} is not in the species list".format(species) + + # We want to download assembly and annotation for given species + ann_url = urljoin(url, "gtf/{0}".format(species)) + cds_url = urljoin(url, "fasta/{0}/cds".format(species)) + + for u in (ann_url, cds_url): + valid_files = [x for x in ls_ftp(u) if x.endswith(".gz")] + for f in valid_files: + f = urljoin(u, f) + download(f) + + +def get_cookies(cookies=PHYTOZOME_COOKIES): + from jcvi.utils.console import console + + # Check if cookies is still good + if op.exists(cookies) and last_updated(cookies) < 3600: + return cookies + + if console.is_terminal: + username = console.input("[bold green]Phytozome Login: ") + pw = console.input("[bold green]Phytozome Password: ", password=True) + else: + username, pw = None, None + curlcmd = which("curl") + if curlcmd is None: + logger.error("curl command not installed. Aborting.") + return None + cmd = "{} https://signon.jgi.doe.gov/signon/create".format(curlcmd) + cmd += " --data-urlencode 'login={0}' --data-urlencode 'password={1}' -b {2} -c {2}".format( + username, pw, cookies + ) + sh(cmd, outfile="/dev/null", errfile="/dev/null", log=False) + if not op.exists(cookies): + logger.error("Cookies file `{}` not created. Aborting.".format(cookies)) + return None + + return cookies + + +def phytozome(args): + """ + %prog phytozome species + + Retrieve genomes and annotations from phytozome using Globus API. Available + species listed below. Use comma to give a list of species to download. For + example: + + $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum + + The downloader will prompt you to enter Phytozome user name and password + during downloading. Please register for a login at: + https://phytozome.jgi.doe.gov/pz/portal.html. + """ + from jcvi.apps.biomart import GlobusXMLParser + + p = OptionParser(phytozome.__doc__) + p.add_argument( + "--version", + default="12", + choices=("9", "10", "11", "12", "12_unrestricted", "13"), + help="Phytozome version", + ) + p.add_argument( + "--assembly", + default=False, + action="store_true", + help="Download assembly", + ) + p.add_argument( + "--format", + default=False, + action="store_true", + help="Format to CDS and BED for synteny inference", + ) + p.set_downloader() + opts, args = p.parse_args(args) + + downloader = opts.downloader + directory_listing = ".phytozome_directory_V{}.xml".format(opts.version) + # Get directory listing + base_url = "http://genome.jgi.doe.gov" + dlist = "{}/ext-api/downloads/get-directory?organism=PhytozomeV{}".format( + base_url, opts.version + ) + + # Make sure we have a valid cookies + cookies = get_cookies() + if cookies is None: + logger.error("Error fetching cookies ... cleaning up") + cleanup(directory_listing) + sys.exit(1) + + # Proceed to use the cookies and download the species list + try: + download( + dlist, + filename=directory_listing, + cookies=cookies, + downloader=downloader, + ) + g = GlobusXMLParser(directory_listing) + except Exception as _: + logger.error("Error downloading directory listing ... cleaning up") + cleanup(directory_listing, cookies) + sys.exit(1) + + genomes = g.get_genomes() + valid_species = genomes.keys() + species_tile = tile(valid_species) + p.usage = "\n".join((phytozome.__doc__, species_tile)) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (species,) = args + if species == "all": + species = ",".join(valid_species) + + species = species.split(",") + for s in species: + res = download_species_phytozome( + genomes, + s, + valid_species, + base_url, + cookies, + assembly=opts.assembly, + downloader=downloader, + ) + if not res: + logger.error("No files downloaded") + gff, fa = res.get("gff"), res.get("cds") + if opts.format: + format_bed_and_cds(s, gff, fa) + + +def download_species_phytozome( + genomes, species, valid_species, base_url, cookies, assembly=False, downloader=None +): + """Download assembly FASTA and annotation GFF. + + Args: + genomes (dict): Dictionary parsed from Globus XML. + species (str): Target species to download. + valid_species (List[str]): Allowed set of species + base_url (str): URL. + cookies (str): cookies file path. + assembly (bool, optional): Do we download assembly FASTA (can be big). + Defaults to False. + downloader (str, optional): Use a given downloader. One of wget|curl|powershell|insecure. + Defaults to None. + """ + assert species in valid_species, "{} is not in the species list".format(species) + res = {} + genome = genomes.get(species) + if not genome: + return res + + genome_assembly = genome.get("assembly") + if assembly and genome_assembly: + asm_name = next(x for x in genome_assembly if x.endswith(".fa.gz")) + if asm_name: + res["asm"] = genome_assembly.download( + asm_name, base_url, cookies, downloader=downloader + ) + + genome_annotation = genome.get("annotation") + if genome_annotation: + gff_name = next(x for x in genome_annotation if x.endswith(".gene.gff3.gz")) + if gff_name: + res["gff"] = genome_annotation.download( + gff_name, base_url, cookies, downloader=downloader + ) + cds_name = next(x for x in genome_annotation if x.endswith(".cds.fa.gz")) + if cds_name: + res["cds"] = genome_annotation.download( + cds_name, base_url, cookies, downloader=downloader + ) + + return res + + +def phytozome9(args): + """ + %prog phytozome9 species + + Retrieve genomes and annotations from phytozome FTP. Available species + listed below. Use comma to give a list of species to download. For example: + + $ %prog phytozome9 Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum + """ + p = OptionParser(phytozome9.__doc__) + p.add_argument( + "--assembly", + default=False, + action="store_true", + help="Download assembly", + ) + p.add_argument( + "--format", + default=False, + action="store_true", + help="Format to CDS and BED for synteny inference", + ) + opts, args = p.parse_args(args) + + version = "9.0" + url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".format(version) + valid_species = [x for x in ls_ftp(url) if "." not in x] + + doc = "\n".join((phytozome9.__doc__, tile(valid_species))) + p.usage = doc + + if len(args) != 1: + sys.exit(not p.print_help()) + + (species,) = args + if species == "all": + species = ",".join(valid_species) + + species = species.split(",") + + for s in species: + res = download_species_phytozome9(s, valid_species, url, assembly=opts.assembly) + if not res: + logger.error("No files downloaded") + gff, cdsfa = res.get("gff"), res.get("cds") + if opts.format: + format_bed_and_cds(s, gff, cdsfa) + + +def format_bed_and_cds(species, gff, cdsfa): + """Run gff.format() and fasta.format() to generate BED and CDS files. + This prepares the input files for the MCscan synteny workflow. + + https://github.com/tanghaibao/jcvi/wiki/MCscan-(Python-version) + + Args: + species (str): Name of the species + gff (str): Path to the GFF file + cdsfa (str): Path to the FASTA file + """ + from jcvi.formats.gff import bed as gff_bed + from jcvi.formats.fasta import format as fasta_format + + # We have to watch out when the gene names and mRNA names mismatch, in which + # case we just extract the mRNA names + use_IDs = set() + use_mRNAs = { + "Cclementina", + "Creinhardtii", + "Csinensis", + "Fvesca", + "Lusitatissimum", + "Mesculenta", + "Mguttatus", + "Ppersica", + "Pvirgatum", + "Rcommunis", + "Sitalica", + "Tcacao", + "Thalophila", + "Vcarteri", + "Vvinifera", + "Zmays", + } + key = "ID" if species in use_IDs else "Name" + ttype = "mRNA" if species in use_mRNAs else "gene" + bedfile = species + ".bed" + cdsfile = species + ".cds" + gff_bed([gff, "--type={}".format(ttype), "--key={}".format(key), "-o", bedfile]) + fasta_format([cdsfa, cdsfile, r"--sep=|"]) + + +def download_species_phytozome9(species, valid_species, base_url, assembly=False): + assert species in valid_species, "{} is not in the species list".format(species) + + # We want to download assembly and annotation for given species + surl = urljoin(base_url, species) + contents = [x for x in ls_ftp(surl) if x.endswith("_readme.txt")] + magic = contents[0].split("_")[1] # Get the magic number + logger.debug("Found magic number for {0}: {1}".format(species, magic)) + + pf = "{0}_{1}".format(species, magic) + asm_url = urljoin(surl, "assembly/{0}.fa.gz".format(pf)) + ann_url = urljoin(surl, "annotation/{0}_gene.gff3.gz".format(pf)) + cds_url = urljoin(surl, "annotation/{0}_cds.fa.gz".format(pf)) + res = {} + if assembly: + res["asm"] = download(asm_url) + res["gff"] = download(ann_url) + res["cds"] = download(cds_url) + return res + + +def get_first_rec(fastafile): + """ + Returns the first record in the fastafile + """ + f = list(SeqIO.parse(fastafile, "fasta")) + + if len(f) > 1: + logger.debug( + "{0} records found in {1}, using the first one".format(len(f), fastafile) + ) + + return f[0] + + +def bisect(args): + """ + %prog bisect acc accession.fasta + + determine the version of the accession by querying entrez, based on a fasta file. + This proceeds by a sequential search from xxxx.1 to the latest record. + """ + p = OptionParser(bisect.__doc__) + p.set_email() + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + acc, fastafile = args + arec = get_first_rec(fastafile) + + valid = None + for i in range(1, 100): + term = "%s.%d" % (acc, i) + try: + query = list(batch_entrez([term], email=opts.email)) + except AssertionError as e: + logger.debug(f"no records found for {term}. terminating. {e}") + return + + id, term, handle = query[0] + brec = next(SeqIO.parse(handle, "fasta")) + + match = print_first_difference( + arec, brec, ignore_case=True, ignore_N=True, rc=True + ) + if match: + valid = term + break + + if valid: + printf() + printf("[green]{} matches the sequence in `{}`".format(valid, fastafile)) + + +def entrez(args): + """ + %prog entrez + + `filename` contains a list of terms to search. Or just one term. If the + results are small in size, e.g. "--format=acc", use "--batchsize=100" to speed + the download. + """ + p = OptionParser(entrez.__doc__) + + allowed_databases = { + "fasta": ["genome", "nuccore", "nucgss", "protein", "nucest"], + "asn.1": ["genome", "nuccore", "nucgss", "protein", "gene"], + "xml": ["genome", "nuccore", "nucgss", "nucest", "gene"], + "gb": ["genome", "nuccore", "nucgss"], + "est": ["nucest"], + "gss": ["nucgss"], + "acc": ["nuccore"], + } + + valid_formats = tuple(allowed_databases.keys()) + valid_databases = ("genome", "nuccore", "nucest", "nucgss", "protein", "gene") + + p.add_argument( + "--noversion", + dest="noversion", + default=False, + action="store_true", + help="Remove trailing accession versions", + ) + p.add_argument( + "--format", + default="fasta", + choices=valid_formats, + help="download format", + ) + p.add_argument( + "--database", + default="nuccore", + choices=valid_databases, + help="search database", + ) + p.add_argument( + "--retmax", + default=1000000, + type=int, + help="how many results to return", + ) + p.add_argument( + "--skipcheck", + default=False, + action="store_true", + help="turn off prompt to check file existence", + ) + p.add_argument( + "--batchsize", + default=500, + type=int, + help="download the results in batch for speed-up", + ) + p.set_outdir(outdir=None) + p.add_argument("--outprefix", default="out", help="output file name prefix") + p.set_email() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (filename,) = args + if op.exists(filename): + pf = filename.rsplit(".", 1)[0] + list_of_terms = [row.strip() for row in open(filename)] + if opts.noversion: + list_of_terms = [x.rsplit(".", 1)[0] for x in list_of_terms] + else: + pf = filename + # the filename is the search term + list_of_terms = [filename.strip()] + + fmt = opts.format + database = opts.database + batchsize = opts.batchsize + + assert ( + database in allowed_databases[fmt] + ), "For output format '{0}', allowed databases are: {1}".format( + fmt, allowed_databases[fmt] + ) + assert batchsize >= 1, "batchsize must >= 1" + + if " " in pf: + pf = opts.outprefix + + outfile = "{0}.{1}".format(pf, fmt) + + outdir = opts.outdir + if outdir: + mkdir(outdir) + + # If noprompt, will not check file existence + if not outdir: + fw = must_open(outfile, "w", checkexists=True, skipcheck=opts.skipcheck) + if fw is None: + return + + seen = set() + totalsize = 0 + for id, size, term, handle in batch_entrez( + list_of_terms, + retmax=opts.retmax, + rettype=fmt, + db=database, + batchsize=batchsize, + email=opts.email, + ): + if outdir: + outfile = urljoin(outdir, "{0}.{1}".format(term, fmt)) + fw = must_open(outfile, "w", checkexists=True, skipcheck=opts.skipcheck) + if fw is None: + continue + + rec = handle.read() + if id in seen: + logger.error("Duplicate key ({0}) found".format(rec)) + continue + + totalsize += size + print(rec, file=fw) + print(file=fw) + + seen.add(id) + + if seen: + printf( + "A total of {0} {1} records downloaded.".format(totalsize, fmt.upper()), + ) + + return outfile + + +def sra(args): + """ + %prog sra [term|term.ids] + + Given an SRA run ID, fetch the corresponding .sra file from the sra-instant FTP. + The term can also be a file containing list of SRR ids, one per line. + + Once downloaded, the SRA file is processed through `fastq-dump` to produce + FASTQ formatted sequence files, which are gzipped by default. + """ + p = OptionParser(sra.__doc__) + + p.add_argument( + "--nogzip", + dest="nogzip", + default=False, + action="store_true", + help="Do not gzip the FASTQ generated by fastq-dump", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (term,) = args + if op.isfile(term): + terms = [x.strip() for x in open(term)] + else: + terms = [term] + + for term in terms: + srafile = download_srr_term(term) + pf = srafile.split(".")[0] + mkdir(pf) + _opts = [srafile, "--paired", "--outdir={0}".format(pf)] + if not opts.nogzip: + _opts.append("--compress=gzip") + fromsra(_opts) + + +def download_srr_term(term): + sra_base_url = "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/" + sra_run_id_re = re.compile(r"^([DES]RR)(\d{3})(\d{3,4})$") + + m = re.search(sra_run_id_re, term) + if m is None: + logger.error( + "Incorrect SRA identifier format " + + "[should be like SRR126150, SRR1001901. " + + "len(identifier) should be between 9-10 characters]" + ) + sys.exit() + + prefix, subprefix = m.group(1), "{0}{1}".format(m.group(1), m.group(2)) + download_url = urljoin( + sra_base_url, prefix, subprefix, term, "{0}.sra".format(term) + ) + + logger.debug("Downloading file: {0}".format(download_url)) + return download(download_url) + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/gbsubmit.py b/jcvi/apps/gbsubmit.py new file mode 100644 index 00000000..ca629dec --- /dev/null +++ b/jcvi/apps/gbsubmit.py @@ -0,0 +1,676 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Prepare the data for Genbank submission +""" +import os.path as op +import sys +import string + +from collections import defaultdict +from functools import lru_cache + +from Bio import SeqIO + +from ..formats.base import DictFile +from ..utils.orderedcollections import parse_qs + +from .base import ActionDispatcher, OptionParser, glob, logger, mkdir, sh + + +""" +GSS submission template files + + +""" + +# Modify the following if a different submission +# TODO: make this generic and exist outside source code +Title = """Comparative Genomics of Sisymbrium irio""" + +Authors = """Town,C.D., Tang,H., Paterson,A.H. and Pires,J.C.""" + +Libname = "Sisymbrium irio BAC library SIL" +Contact = "Chris D. Town" + +PublicationTemplate = """TYPE: Pub +MEDUID: +TITLE: +{Title} +AUTHORS: +{Authors} +JOURNAL: +VOLUME: +ISSUE: +PAGES: +YEAR: 2011 +STATUS: 1 +||""" + +LibraryTemplate = """TYPE: Lib +NAME: {Libname} +ORGANISM: Sisymbrium irio +STRAIN: Gomez-Campo 1146-67 +SEX: +STAGE: +TISSUE: +CELL_TYPE: +VECTOR: pCC1BAC +RE_1: HindIII +DESCR: +Constructed by Amplicon Express; +Transformed into Invitrogen DH10b phage resistant E. coli. +||""" + +ContactTemplate = """TYPE: Cont +NAME: {Contact} +FAX: 301-795-7070 +TEL: 301-795-7523 +EMAIL: cdtown@jcvi.org +LAB: Plant Genomics +INST: J. Craig Venter Institute +ADDR: 9704 Medical Center Dr., Rockville, MD 20850, USA +||""" + +Directions = {"forward": "TR", "reverse": "TV"} + +Primers = { + "TR": "M13 Universal For 18bp Primer (TGTAAAACGACGGCCAGT)", + "TV": "T7 Rev 20bp Primer (TAATACGACTCACTATAGGG)", +} + +GSSTemplate = """TYPE: GSS +STATUS: New +CONT_NAME: {Contact} +GSS#: {gssID} +CLONE: {cloneID} +SOURCE: JCVI +OTHER_GSS: {othergss} +CITATION: +{Title} +INSERT: 120000 +PLATE: {plate} +ROW: {row} +COLUMN: {column} +SEQ_PRIMER: {primer} +DNA_TYPE: Genomic +CLASS: BAC ends +LIBRARY: {Libname} +PUBLIC: +PUT_ID: +COMMENT: +SEQUENCE: +{seq} +||""" + +Nrows, Ncols = 16, 24 +vars = globals() + + +def main(): + + actions = ( + ("fcs", "process the results from Genbank contaminant screen"), + ("gss", "prepare package for genbank gss submission"), + ("htg", "prepare sqn to update existing genbank htg records"), + ("htgnew", "prepare sqn to submit new genbank htg records"), + ("asn", "get the name tags from a bunch of asn.1 files"), + ("t384", "print out a table converting between 96 well to 384 well"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def fcs(args): + """ + %prog fcs fcsfile + + Process the results from Genbank contaminant screen. An example of the file + looks like: + + contig name, length, span(s), apparent source + contig0746 11760 1..141 vector + contig0751 14226 13476..14226 vector + contig0800 124133 30512..30559 primer/adapter + """ + p = OptionParser(fcs.__doc__) + p.add_argument( + "--cutoff", + default=200, + help="Skip small components less than", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fcsfile,) = args + cutoff = opts.cutoff + fp = open(fcsfile) + for row in fp: + if row[0] == "#": + continue + + sep = "\t" if "\t" in row else None + atoms = row.rstrip().split(sep, 3) + contig, length = atoms[:2] + length = int(length) + label = atoms[-1] + label = label.replace(" ", "_") + + if len(atoms) == 3: + ranges = "{0}..{1}".format(1, length) + else: + assert len(atoms) == 4 + ranges = atoms[2] + + for ab in ranges.split(","): + a, b = ab.split("..") + a, b = int(a), int(b) + assert a <= b + ahang = a - 1 + bhang = length - b + if ahang < cutoff: + a = 1 + if bhang < cutoff: + b = length + print("\t".join(str(x) for x in (contig, a - 1, b, label))) + + +def asn(args): + """ + %prog asn asnfiles + + Mainly to get this block, and extract `str` field: + + general { + db "TIGR" , + tag + str "mtg2_12952" } , + genbank { + accession "AC148996" , + """ + from jcvi.formats.base import must_open + + p = OptionParser(asn.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + fw = must_open(opts.outfile, "w") + for asnfile in args: + fp = open(asnfile) + ingeneralblock = False + ingenbankblock = False + gb, name = None, None + for row in fp: + if row.strip() == "": + continue + + tag = row.split()[0] + + if tag == "general": + ingeneralblock = True + if ingeneralblock and tag == "str": + if name is None: # Only allow first assignment + name = row.split('"')[1] + ingeneralblock = False + + if tag == "genbank": + ingenbankblock = True + if ingenbankblock and tag == "accession": + if gb is None: + gb = row.split('"')[1] + ingenbankblock = False + + assert gb and name + print("{0}\t{1}".format(gb, name), file=fw) + + +def verify_sqn(sqndir, accession): + valfile = "{0}/{1}.val".format(sqndir, accession) + contents = open(valfile).read().strip() + assert not contents, "Validation error:\n{0}".format(contents) + + cmd = "gb2fasta {0}/{1}.gbf".format(sqndir, accession) + outfile = "{0}/{1}.fasta".format(sqndir, accession) + sh(cmd, outfile=outfile) + + +def htgnew(args): + """ + %prog htgnew fastafile phasefile template.sbt + + Prepare sqnfiles for submitting new Genbank HTG records. + + `fastafile` contains the sequences. + `phasefile` contains the phase information, it is a two column file: + + mth2-45h12 3 + + `template.sbt` is the Genbank submission template. + + This function is simpler than htg, since the record names have not be + assigned yet (so less bookkeeping). + """ + from jcvi.formats.fasta import sequin + + p = OptionParser(htgnew.__doc__) + p.add_argument("--comment", default="", help="Comments for this submission") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + fastafile, phasefile, sbtfile = args + comment = opts.comment + + fastadir = "fasta" + sqndir = "sqn" + mkdir(fastadir) + mkdir(sqndir) + + cmd = "faSplit byname {0} {1}/".format(fastafile, fastadir) + sh(cmd, outfile="/dev/null", errfile="/dev/null") + + acmd = "tbl2asn -a z -p fasta -r {sqndir}" + acmd += " -i {splitfile} -t {sbtfile} -C tigr" + acmd += ' -j "[tech=htgs {phase}] [organism=Medicago truncatula] [strain=A17]"' + acmd += " -o {sqndir}/{accession_nv}.sqn -V Vbr" + acmd += ' -y "{comment}" -W T -T T' + + nupdated = 0 + for row in open(phasefile): + name, phase = row.split()[:2] + fafile = op.join(fastadir, name + ".fa") + cloneopt = "--clone={0}".format(name) + splitfile, gaps = sequin([fafile, cloneopt]) + splitfile = op.basename(splitfile) + accession = accession_nv = name + + phase = int(phase) + assert phase in (1, 2, 3) + + cmd = acmd.format( + accession_nv=accession_nv, + sqndir=sqndir, + sbtfile=sbtfile, + splitfile=splitfile, + phase=phase, + comment=comment, + ) + sh(cmd) + + verify_sqn(sqndir, accession) + nupdated += 1 + + print("A total of {0} records updated.".format(nupdated), file=sys.stderr) + + +def htg(args): + """ + %prog htg fastafile template.sbt + + Prepare sqnfiles for Genbank HTG submission to update existing records. + + `fastafile` contains the records to update, multiple records are allowed + (with each one generating separate sqn file in the sqn/ folder). The record + defline has the accession ID. For example, + >AC148290.3 + + Internally, this generates two additional files (phasefile and namesfile) + and download records from Genbank. Below is implementation details: + + `phasefile` contains, for each accession, phase information. For example: + AC148290.3 3 HTG 2 mth2-45h12 + + which means this is a Phase-3 BAC. Record with only a single contig will be + labeled as Phase-3 regardless of the info in the `phasefile`. Template file + is the Genbank sbt template. See jcvi.formats.sbt for generation of such + files. + + Another problem is that Genbank requires the name of the sequence to stay + the same when updating and will kick back with a table of name conflicts. + For example: + + We are unable to process the updates for these entries + for the following reason: + + Seqname has changed + + Accession Old seq_name New seq_name + --------- ------------ ------------ + AC239792 mtg2_29457 AC239792.1 + + To prepare a submission, this script downloads genbank and asn.1 format, + and generate the phase file and the names file (use formats.agp.phase() and + apps.gbsubmit.asn(), respectively). These get automatically run. + + However, use --phases if the genbank files contain outdated information. + For example, the clone name changes or phase upgrades. In this case, run + formats.agp.phase() manually, modify the phasefile and use --phases to override. + """ + from jcvi.formats.fasta import sequin, ids + from jcvi.formats.agp import phase + from jcvi.apps.fetch import entrez + + p = OptionParser(htg.__doc__) + p.add_argument( + "--phases", + default=None, + help="Use another phasefile to override", + ) + p.add_argument("--comment", default="", help="Comments for this update") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, sbtfile = args + pf = fastafile.rsplit(".", 1)[0] + + idsfile = pf + ".ids" + phasefile = pf + ".phases" + namesfile = pf + ".names" + + ids([fastafile, "--outfile={0}".format(idsfile)]) + + asndir = "asn.1" + mkdir(asndir) + entrez([idsfile, "--format=asn.1", "--outdir={0}".format(asndir)]) + asn(glob("{0}/*".format(asndir)) + ["--outfile={0}".format(namesfile)]) + + if opts.phases is None: + gbdir = "gb" + mkdir(gbdir) + entrez([idsfile, "--format=gb", "--outdir={0}".format(gbdir)]) + phase(glob("{0}/*".format(gbdir)) + ["--outfile={0}".format(phasefile)]) + else: + phasefile = opts.phases + + assert op.exists(namesfile) and op.exists(phasefile) + + newphasefile = phasefile + ".new" + newphasefw = open(newphasefile, "w") + comment = opts.comment + + fastadir = "fasta" + sqndir = "sqn" + mkdir(fastadir) + mkdir(sqndir) + + from jcvi.graphics.histogram import stem_leaf_plot + + names = DictFile(namesfile) + assert len(set(names.keys())) == len(set(names.values())) + + phases = DictFile(phasefile) + ph = [int(x) for x in phases.values()] + # vmin 1, vmax 4, bins 3 + stem_leaf_plot(ph, 1, 4, 3, title="Counts of phases before updates") + logger.debug("Information loaded for {0} records.".format(len(phases))) + assert len(names) == len(phases) + + newph = [] + + cmd = "faSplit byname {0} {1}/".format(fastafile, fastadir) + sh(cmd, outfile="/dev/null", errfile="/dev/null") + + acmd = "tbl2asn -a z -p fasta -r {sqndir}" + acmd += " -i {splitfile} -t {sbtfile} -C tigr" + acmd += ' -j "{qualifiers}"' + acmd += " -A {accession_nv} -o {sqndir}/{accession_nv}.sqn -V Vbr" + acmd += ' -y "{comment}" -W T -T T' + + qq = "[tech=htgs {phase}] [organism=Medicago truncatula] [strain=A17]" + + nupdated = 0 + for row in open(phasefile): + atoms = row.rstrip().split("\t") + # see formats.agp.phase() for column contents + accession, phase, clone = atoms[0], atoms[1], atoms[-1] + fafile = op.join(fastadir, accession + ".fa") + accession_nv = accession.split(".", 1)[0] + + newid = names[accession_nv] + newidopt = "--newid={0}".format(newid) + cloneopt = "--clone={0}".format(clone) + splitfile, gaps = sequin([fafile, newidopt, cloneopt]) + splitfile = op.basename(splitfile) + phase = int(phase) + assert phase in (1, 2, 3) + + oldphase = phase + if gaps == 0 and phase != 3: + phase = 3 + + if gaps != 0 and phase == 3: + phase = 2 + + print("{0}\t{1}\t{2}".format(accession_nv, oldphase, phase), file=newphasefw) + newph.append(phase) + + qualifiers = qq.format(phase=phase) + if ";" in clone: + qualifiers += " [keyword=HTGS_POOLED_MULTICLONE]" + + cmd = acmd.format( + accession=accession, + accession_nv=accession_nv, + sqndir=sqndir, + sbtfile=sbtfile, + splitfile=splitfile, + qualifiers=qualifiers, + comment=comment, + ) + sh(cmd) + + verify_sqn(sqndir, accession) + nupdated += 1 + + stem_leaf_plot(newph, 1, 4, 3, title="Counts of phases after updates") + print("A total of {0} records updated.".format(nupdated), file=sys.stderr) + + +@lru_cache(maxsize=None) +def get_rows_cols(nrows=Nrows, ncols=Ncols): + rows, cols = string.ascii_uppercase[:nrows], range(1, ncols + 1) + return rows, cols + + +@lru_cache(maxsize=None) +def get_plate(nrows=Nrows, ncols=Ncols): + + rows, cols = get_rows_cols(nrows, ncols) + plate = [[""] * ncols for _ in range(nrows)] + n = 0 + # 384 to (96+quadrant) + for i in range(0, nrows, 2): + for j in range(0, ncols, 2): + n += 1 + prefix = "{0:02d}".format(n) + plate[i][j] = prefix + "A" + plate[i][j + 1] = prefix + "B" + plate[i + 1][j] = prefix + "C" + plate[i + 1][j + 1] = prefix + "D" + + # (96+quadrant) to 384 + splate = {} + for i in range(nrows): + for j in range(ncols): + c = plate[i][j] + splate[c] = "{0}{1}".format(rows[i], j + 1) + + return plate, splate + + +def convert_96_to_384(c96, quad, ncols=Ncols): + """ + Convert the 96-well number and quad number to 384-well number + + >>> convert_96_to_384("B02", 1) + 'C3' + >>> convert_96_to_384("H09", 4) + 'P18' + """ + rows, cols = get_rows_cols() + plate, splate = get_plate() + + n96 = rows.index(c96[0]) * ncols // 2 + int(c96[1:]) + q = "{0:02d}{1}".format(n96, "ABCD"[quad - 1]) + return splate[q] + + +def t384(args): + """ + %prog t384 + + Print out a table converting between 96 well to 384 well + """ + p = OptionParser(t384.__doc__) + p.parse_args(args) + + plate, splate = get_plate() + + fw = sys.stdout + for i in plate: + for j, p in enumerate(i): + if j != 0: + fw.write("|") + fw.write(p) + fw.write("\n") + + +def parse_description(s): + """ + Returns a dictionary based on the FASTA header, assuming JCVI data + """ + s = "".join(s.split()[1:]).replace("/", ";") + a = parse_qs(s) + return a + + +def gss(args): + """ + %prog gss fastafile plateMapping + + Generate sequence files and metadata templates suited for gss submission. + The FASTA file is assumed to be exported from the JCVI data delivery folder + which looks like: + + >1127963806024 /library_name=SIL1T054-B-01-120KB /clear_start=0 + /clear_end=839 /primer_id=1049000104196 /trace_id=1064147620169 + /trace_file_id=1127963805941 /clone_insert_id=1061064364776 + /direction=reverse /sequencer_run_id=1064147620155 + /sequencer_plate_barcode=B906423 /sequencer_plate_well_coordinates=C3 + /sequencer_plate_96well_quadrant=1 /sequencer_plate_96well_coordinates=B02 + /template_plate_barcode=CC0251602AB /growth_plate_barcode=BB0273005AB + AGCTTTAGTTTCAAGGATACCTTCATTGTCATTCCCGGTTATGATGATATCATCAAGATAAACAAGAATG + ACAATGATACCTGTTTGGTTCTGAAGTGTAAAGAGGGTATGTTCAGCTTCAGATCTTCTAAACCCTTTGT + CTAGTAAGCTGGCACTTAGCTTCCTATACCAAACCCTTTGTGATTGCTTCAGTCCATAAATTGCCTTTTT + + Plate mapping file maps the JTC `sequencer_plate_barcode` to external IDs. + For example: + B906423 SIL-001 + """ + p = OptionParser(gss.__doc__) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(p.print_help()) + + fastafile, mappingfile = args + seen = defaultdict(int) + clone = defaultdict(set) + + plateMapping = DictFile(mappingfile) + + fw = open("MetaData.txt", "w") + print(PublicationTemplate.format(**vars), file=fw) + print(LibraryTemplate.format(**vars), file=fw) + print(ContactTemplate.format(**vars), file=fw) + logger.debug("Meta data written to `{0}`".format(fw.name)) + + fw = open("GSS.txt", "w") + fw_log = open("GSS.log", "w") + for rec in SeqIO.parse(fastafile, "fasta"): + # First pass just check well number matchings and populate sequences in + # the same clone + description = rec.description + a = parse_description(description) + direction = a["direction"][0] + sequencer_plate_barcode = a["sequencer_plate_barcode"][0] + sequencer_plate_well_coordinates = a["sequencer_plate_well_coordinates"][0] + sequencer_plate_96well_quadrant = a["sequencer_plate_96well_quadrant"][0] + sequencer_plate_96well_coordinates = a["sequencer_plate_96well_coordinates"][0] + + # Check the 96-well ID is correctly converted to 384-well ID + w96 = sequencer_plate_96well_coordinates + w96quad = int(sequencer_plate_96well_quadrant) + w384 = sequencer_plate_well_coordinates + assert convert_96_to_384(w96, w96quad) == w384 + + plate = sequencer_plate_barcode + assert plate in plateMapping, "{0} not found in `{1}` !".format( + plate, mappingfile + ) + + plate = plateMapping[plate] + d = Directions[direction] + + cloneID = "{0}{1}".format(plate, w384) + gssID = "{0}{1}".format(cloneID, d) + seen[gssID] += 1 + + if seen[gssID] > 1: + gssID = "{0}{1}".format(gssID, seen[gssID]) + + seen[gssID] += 1 + clone[cloneID].add(gssID) + + seen = defaultdict(int) + for rec in SeqIO.parse(fastafile, "fasta"): + # need to populate gssID, mateID, cloneID, seq, plate, row, column + description = rec.description + a = parse_description(description) + direction = a["direction"][0] + sequencer_plate_barcode = a["sequencer_plate_barcode"][0] + sequencer_plate_well_coordinates = a["sequencer_plate_well_coordinates"][0] + w384 = sequencer_plate_well_coordinates + + plate = sequencer_plate_barcode + plate = plateMapping[plate] + d = Directions[direction] + + cloneID = "{0}{1}".format(plate, w384) + gssID = "{0}{1}".format(cloneID, d) + seen[gssID] += 1 + + if seen[gssID] > 1: + logger.error("duplicate key {0} found".format(gssID)) + gssID = "{0}{1}".format(gssID, seen[gssID]) + + othergss = clone[cloneID] - {gssID} + othergss = ", ".join(sorted(othergss)) + vars.update(locals()) + + print(GSSTemplate.format(**vars), file=fw) + + # Write conversion logs to log file + print("{0}\t{1}".format(gssID, description), file=fw_log) + print("=" * 60, file=fw_log) + + logger.debug("A total of {0} seqs written to `{1}`".format(len(seen), fw.name)) + fw.close() + fw_log.close() + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + main() diff --git a/jcvi/apps/gmap.py b/jcvi/apps/gmap.py new file mode 100644 index 00000000..000e800f --- /dev/null +++ b/jcvi/apps/gmap.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Run GMAP/GSNAP commands. GMAP/GSNAP manual: + + +""" + +import os.path as op +import sys + +from ..formats.sam import get_prefix + +from .base import ( + ActionDispatcher, + OptionParser, + backup, + logger, + need_update, + sh, +) + + +def main(): + + actions = ( + ("index", "wraps gmap_build"), + ("align", "wraps gsnap"), + ("gmap", "wraps gmap"), + ("bam", "convert GSNAP output to BAM"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def bam(args): + """ + %prog snp input.gsnap ref.fasta + + Convert GSNAP output to BAM. + """ + from jcvi.formats.sizes import Sizes + from jcvi.formats.sam import index + + p = OptionParser(bam.__doc__) + p.set_home("eddyyeh") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gsnapfile, fastafile = args + EYHOME = opts.eddyyeh_home + pf = gsnapfile.rsplit(".", 1)[0] + uniqsam = pf + ".unique.sam" + samstats = uniqsam + ".stats" + sizesfile = Sizes(fastafile).filename + if need_update((gsnapfile, sizesfile), samstats): + cmd = op.join(EYHOME, "gsnap2gff3.pl") + cmd += " --format sam -i {0} -o {1}".format(gsnapfile, uniqsam) + cmd += " -u -l {0} -p {1}".format(sizesfile, opts.cpus) + sh(cmd) + + index([uniqsam]) + + return uniqsam + + +def check_index(dbfile, supercat=False, go=True): + if supercat: + updated = False + pf = dbfile.rsplit(".", 1)[0] + supercatfile = pf + ".supercat" + coordsfile = supercatfile + ".coords" + if go and need_update(dbfile, supercatfile): + cmd = "tGBS-Generate_Pseudo_Genome.pl" + cmd += " -f {0} -o {1}".format(dbfile, supercatfile) + sh(cmd) + # Rename .coords file since gmap_build will overwrite it + coordsbak = backup(coordsfile) + updated = True + dbfile = supercatfile + ".fasta" + + # dbfile = get_abs_path(dbfile) + dbdir, filename = op.split(dbfile) + if not dbdir: + dbdir = "." + dbname = filename.rsplit(".", 1)[0] + safile = op.join(dbdir, "{0}/{0}.genomecomp".format(dbname)) + if dbname == filename: + dbname = filename + ".db" + + if not go: + return dbdir, dbname + + if need_update(dbfile, safile): + cmd = "gmap_build -D {0} -d {1} {2}".format(dbdir, dbname, filename) + sh(cmd) + else: + logger.error("`{0}` exists. `gmap_build` already run.".format(safile)) + + if go and supercat and updated: + sh("mv {0} {1}".format(coordsbak, coordsfile)) + + return dbdir, dbname + + +def index(args): + """ + %prog index database.fasta + ` + Wrapper for `gmap_build`. Same interface. + """ + p = OptionParser(index.__doc__) + p.add_argument( + "--supercat", + default=False, + action="store_true", + help="Concatenate reference to speed up alignment", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (dbfile,) = args + check_index(dbfile, supercat=opts.supercat) + + +def gmap(args): + """ + %prog gmap database.fasta fastafile + + Wrapper for `gmap`. + """ + p = OptionParser(gmap.__doc__) + p.add_argument( + "--cross", default=False, action="store_true", help="Cross-species alignment" + ) + p.add_argument( + "--npaths", + default=0, + type=int, + help="Maximum number of paths to show." + " If set to 0, prints two paths if chimera" + " detected, else one.", + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + dbfile, fastafile = args + assert op.exists(dbfile) and op.exists(fastafile) + prefix = get_prefix(fastafile, dbfile) + logfile = prefix + ".log" + gmapfile = prefix + ".gmap.gff3" + + if not need_update((dbfile, fastafile), gmapfile): + logger.error("`{0}` exists. `gmap` already run.".format(gmapfile)) + else: + dbdir, dbname = check_index(dbfile) + cmd = "gmap -D {0} -d {1}".format(dbdir, dbname) + cmd += " -f 2 --intronlength=100000" # Output format 2 + cmd += " -t {0}".format(opts.cpus) + cmd += " --npaths {0}".format(opts.npaths) + if opts.cross: + cmd += " --cross-species" + cmd += " " + fastafile + + sh(cmd, outfile=gmapfile, errfile=logfile) + + return gmapfile, logfile + + +def align(args): + """ + %prog align database.fasta read1.fq read2.fq + + Wrapper for `gsnap` single-end or paired-end, depending on the number of + args. + """ + from jcvi.formats.fastq import guessoffset + + p = OptionParser(align.__doc__) + p.add_argument( + "--rnaseq", + default=False, + action="store_true", + help="Input is RNA-seq reads, turn splicing on", + ) + p.add_argument( + "--native", + default=False, + action="store_true", + help="Convert GSNAP output to NATIVE format", + ) + p.set_home("eddyyeh") + p.set_outdir() + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) == 2: + logger.debug("Single-end alignment") + elif len(args) == 3: + logger.debug("Paired-end alignment") + else: + sys.exit(not p.print_help()) + + dbfile, readfile = args[:2] + outdir = opts.outdir + assert op.exists(dbfile) and op.exists(readfile) + prefix = get_prefix(readfile, dbfile) + logfile = op.join(outdir, prefix + ".log") + gsnapfile = op.join(outdir, prefix + ".gsnap") + nativefile = gsnapfile.rsplit(".", 1)[0] + ".unique.native" + + if not need_update((dbfile, readfile), gsnapfile): + logger.error("`{0}` exists. `gsnap` already run.".format(gsnapfile)) + else: + dbdir, dbname = check_index(dbfile) + cmd = "gsnap -D {0} -d {1}".format(dbdir, dbname) + cmd += " -B 5 -m 0.1 -i 2 -n 3" # memory, mismatch, indel penalty, nhits + if opts.rnaseq: + cmd += " -N 1" + cmd += " -t {0}".format(opts.cpus) + cmd += " --gmap-mode none --nofails" + if readfile.endswith(".gz"): + cmd += " --gunzip" + try: + offset = "sanger" if guessoffset([readfile]) == 33 else "illumina" + cmd += " --quality-protocol {0}".format(offset) + except AssertionError: + pass + cmd += " " + " ".join(args[1:]) + sh(cmd, outfile=gsnapfile, errfile=logfile) + + if opts.native: + EYHOME = opts.eddyyeh_home + if need_update(gsnapfile, nativefile): + cmd = op.join(EYHOME, "convert2native.pl") + cmd += " --gsnap {0} -o {1}".format(gsnapfile, nativefile) + cmd += " -proc {0}".format(opts.cpus) + sh(cmd) + + return gsnapfile, logfile + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/grid.py b/jcvi/apps/grid.py new file mode 100644 index 00000000..5a3a3b4f --- /dev/null +++ b/jcvi/apps/grid.py @@ -0,0 +1,664 @@ +""" +Codes to submit multiple jobs to JCVI grid engine +""" + +import os.path as op +import sys +import re +import platform + +from multiprocessing import ( + Pool, + Process, + Value, + cpu_count, + get_context, + set_start_method, +) +from multiprocessing.queues import Queue + +from ..formats.base import write_file, must_open + +from .base import ( + ActionDispatcher, + OptionParser, + backup, + listify, + logger, + mkdir, + popen, + sh, +) + + +class SharedCounter(object): + """A synchronized shared counter. + + The locking done by multiprocessing.Value ensures that only a single + process or thread may read or write the in-memory ctypes object. However, + in order to do n += 1, Python performs a read followed by a write, so a + second process may read the old value before the new one is written by the + first process. The solution is to use a multiprocessing.Lock to guarantee + the atomicity of the modifications to Value. + + This class comes almost entirely from Eli Bendersky's blog: + http://eli.thegreenplace.net/2012/01/04/shared-counter-with-pythons-multiprocessing/ + """ + + def __init__(self, n=0): + self.count = Value("i", n) + + def increment(self, n=1): + """Increment the counter by n (default = 1)""" + with self.count.get_lock(): + self.count.value += n + + @property + def value(self): + """Return the value of the counter""" + return self.count.value + + +class Queue(Queue): + """A portable implementation of multiprocessing.Queue. + + Because of multithreading / multiprocessing semantics, Queue.qsize() may + raise the NotImplementedError exception on Unix platforms like Mac OS X + where sem_getvalue() is not implemented. This subclass addresses this + problem by using a synchronized shared counter (initialized to zero) and + increasing / decreasing its value every time the put() and get() methods + are called, respectively. This not only prevents NotImplementedError from + being raised, but also allows us to implement a reliable version of both + qsize() and empty(). + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, ctx=get_context()) + self.size = SharedCounter(0) + + def put(self, *args, **kwargs): + self.size.increment(1) + super().put(*args, **kwargs) + + def get(self, *args, **kwargs): + self.size.increment(-1) + return super().get(*args, **kwargs) + + def qsize(self): + """Reliable implementation of multiprocessing.Queue.qsize()""" + return self.size.value + + def empty(self): + """Reliable implementation of multiprocessing.Queue.empty()""" + return not self.qsize() + + +class Parallel(object): + """ + Run a number of commands in parallel. + """ + + def __init__(self, cmds, cpus=cpu_count()): + self.cmds = cmds + self.cpus = min(len(cmds), cpus) + + def run(self): + p = Pool(processes=self.cpus) + p.map(sh, self.cmds) + + +class Dependency(object): + """ + Used by MakeManager. + """ + + def __init__(self, source, target, cmds, id, remove=False): + self.id = id + self.source = listify(source) + self.target = listify(target) + self.cmds = listify(cmds) + if remove: + rm_cmd = "rm -f {0}".format(" ".join(self.target)) + self.cmds = [rm_cmd] + self.cmds + + def __str__(self): + source = " ".join(self.source) + target = " ".join(self.target) + # When there are multiple targets, use .INTERMEDIATE + # + if len(self.target) > 1: + intermediate = "{0}.intermediate".format(self.id) + s = "{0} : {1}\n".format(target, intermediate) + s += ".INTERMEDIATE: {0}\n".format(intermediate) + s += "{0} : {1}\n".format(intermediate, source) + else: + s = "{0} : {1}\n".format(target, source) + + for c in self.cmds: + c = c.replace("$", "$$") # Command escaping + s += "\t" + c + "\n" + return s + + +class MakeManager(list): + """ + Write and execute makefile. + """ + + def __init__(self, filename="makefile"): + self.makefile = filename + self.targets = set() + self.ndeps = 0 + + def add(self, source, target, cmds, remove=False): + self.ndeps += 1 + d = Dependency(source, target, cmds, self.ndeps, remove=remove) + self.append(d) + self.targets |= set(listify(target)) + + def write(self): + assert self.targets, "No targets specified" + filename = self.makefile + if op.exists(filename): + backup(filename) + fw = open(filename, "w") + print("all : {0}\n".format(" ".join(sorted(self.targets))), file=fw) + for d in self: + print(d, file=fw) + print("clean :\n\trm -rf {0}\n".format(" ".join(self.targets)), file=fw) + fw.close() + logger.debug("Makefile written to `{0}`.".format(self.makefile)) + + def run(self, cpus=1): + if not op.exists(self.makefile): + self.write() + cmd = "make -j {0} -f {1}".format(cpus, self.makefile) + sh(cmd) + + def clean(self): + cmd = "make clean -f {}".format(self.makefile) + sh(cmd) + + +class Jobs(list): + """ + Runs multiple funcion calls on the SAME computer, using multiprocessing. + """ + + def __init__(self, target, args): + + for x in args: + x = listify(x) + self.append(Process(target=target, args=x)) + + def start(self): + for pi in self: + pi.start() + + def join(self): + for pi in self: + pi.join() + + def run(self): + self.start() + self.join() + + +class Poison: + pass + + +class WriteJobs(object): + """ + Runs multiple function calls, but write to the same file. + + Producer-consumer model. + """ + + def __init__(self, target, args, filename, cpus=cpu_count()): + # macOS starts process with fork by default: https://zhuanlan.zhihu.com/p/144771768 + if platform.system() == "Darwin": + set_start_method("fork") + + workerq = Queue() + writerq = Queue() + + for a in args: + workerq.put(a) + + cpus = min(cpus, len(args)) + for i in range(cpus): + workerq.put(Poison()) + + self.worker = Jobs(work, args=[(workerq, writerq, target)] * cpus) + self.writer = Process(target=write, args=(workerq, writerq, filename, cpus)) + + def run(self): + self.worker.start() + self.writer.start() + self.worker.join() + self.writer.join() + + +def work(queue_in, queue_out, target): + while True: + a = queue_in.get() + if isinstance(a, Poison): + break + res = target(a) + queue_out.put(res) + queue_out.put(Poison()) + + +def write(queue_in, queue_out, filename, cpus): + from rich.progress import Progress + + fw = must_open(filename, "w") + isize = queue_in.qsize() + logger.debug("A total of {0} items to compute.".format(isize)) + isize = isize or 1 + poisons = 0 + with Progress() as progress: + task = progress.add_task("[green]Processing ...", total=isize) + while True: + res = queue_out.get() + qsize = queue_in.qsize() + progress.update(task, completed=isize - qsize) + if isinstance(res, Poison): + poisons += 1 + if poisons == cpus: # wait all workers finish + break + elif res: + print(res, file=fw) + fw.flush() + fw.close() + + +class GridOpts(dict): + def __init__(self, opts): + export = ( + "pcode", + "queue", + "threaded", + "concurrency", + "outdir", + "name", + "hold_jid", + ) + for e in export: + if e in opts.__dict__: + self[e] = getattr(opts, e) + + +class GridProcess(object): + + pat1 = re.compile(r"Your job (?P[0-9]*) ") + pat2 = re.compile(r"Your job-array (?P\S*) ") + + def __init__( + self, + cmd, + jobid="", + pcode="99999", + queue="default", + threaded=None, + infile=None, + outfile=None, + errfile=None, + arr=None, + concurrency=None, + outdir=".", + name=None, + hold_jid=None, + extra_opts=None, + grid_opts=None, + ): + + self.cmd = cmd + self.jobid = jobid + self.queue = queue + self.threaded = threaded + self.infile = infile + self.outfile = outfile or "" + self.errfile = errfile or "" + self.arr = arr + self.concurrency = concurrency + self.outdir = outdir + self.name = name + self.pcode = pcode + self.hold_jid = hold_jid + self.pat = self.pat2 if arr else self.pat1 + self.extra = extra_opts if extra_opts else None + if grid_opts: + self.__dict__.update(GridOpts(grid_opts)) + + def __str__(self): + return "\t".join((x for x in (self.jobid, self.cmd, self.outfile) if x)) + + def build(self): + # Shell commands + if "|" in self.cmd or "&&" in self.cmd or "||" in self.cmd: + quote = '"' if "'" in self.cmd else "'" + self.cmd = "sh -c {1}{0}{1}".format(self.cmd, quote) + + # qsub command (the project code is specific to jcvi) + qsub = "qsub -P {0} -cwd".format(self.pcode) + if self.queue != "default": + qsub += " -l {0}".format(self.queue) + if self.threaded: + qsub += " -pe threaded {0}".format(self.threaded) + if self.arr: + assert 1 <= self.arr < 100000 + qsub += " -t 1-{0}".format(self.arr) + if self.concurrency: + qsub += " -tc {0}".format(self.concurrency) + if self.name: + qsub += ' -N "{0}"'.format(self.name) + if self.hold_jid: + param = "-hold_jid_ad" if self.arr else "-hold_jid" + qsub += " {0} {1}".format(param, self.hold_jid) + if self.extra: + qsub += " {0}".format(self.extra) + + # I/O + infile = self.infile + outfile = self.outfile + errfile = self.errfile + outdir = self.outdir + mkdir(outdir) + redirect_same = outfile and (outfile == errfile) + + if infile: + qsub += " -i {0}".format(infile) + if outfile: + self.outfile = op.join(outdir, outfile) + qsub += " -o {0}".format(self.outfile) + if errfile: + if redirect_same: + qsub += " -j y" + else: + self.errfile = op.join(outdir, errfile) + qsub += " -e {0}".format(self.errfile) + + cmd = " ".join((qsub, self.cmd)) + return cmd + + def start(self): + cmd = self.build() + # run the command and get the job-ID (important) + output = popen(cmd, debug=False).read() + + if output.strip() != "": + self.jobid = re.search(self.pat, output).group("id") + else: + self.jobid = "-1" + + msg = "[{0}] {1}".format(self.jobid, self.cmd) + if self.infile: + msg += " < {0} ".format(self.infile) + if self.outfile: + backup(self.outfile) + msg += " > {0} ".format(self.outfile) + if self.errfile: + backup(self.errfile) + msg += " 2> {0} ".format(self.errfile) + + logger.debug(msg) + + +class Grid(list): + def __init__(self, cmds, outfiles=[]): + + assert cmds, "Commands empty!" + if not outfiles: + outfiles = [None] * len(cmds) + + for cmd, outfile in zip(cmds, outfiles): + self.append(GridProcess(cmd, outfile=outfile)) + + def run(self): + for pi in self: + pi.start() + + +PBS_STANZA = """ +#PBS -q {0} +#PBS -J 1-{1} +#PBS -l select=1:ncpus={2}:mem=23gb +#PBS -l pvmem=23gb +#PBS -l walltime=100:00:00 +""" + +arraysh = """ +CMD=`awk "NR==$SGE_TASK_ID" {0}` +$CMD""" + +arraysh_ua = ( + PBS_STANZA + + """ +cd $PBS_O_WORKDIR +CMD=`awk "NR==$PBS_ARRAY_INDEX" {3}` +$CMD""" +) + + +def get_grid_engine(): + cmd = "qsub --version" + ret = popen(cmd, debug=False).read().decode("utf-8").upper() + return "PBS" if "PBS" in ret else "SGE" + + +def main(): + + actions = ( + ("run", "run a normal command on grid"), + ("array", "run an array job"), + ("kill", "wrapper around the `qdel` command"), + ) + + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def array(args): + """ + %prog array commands.list + + Parallelize a set of commands on grid using array jobs. + """ + p = OptionParser(array.__doc__) + p.set_grid_opts(array=True) + p.set_params(prog="grid") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (cmds,) = args + fp = open(cmds) + N = sum(1 for _ in fp) + fp.close() + + pf = cmds.rsplit(".", 1)[0] + runfile = pf + ".sh" + assert runfile != cmds, "Commands list file should not have a `.sh` extension" + + engine = get_grid_engine() + threaded = opts.threaded or 1 + contents = ( + arraysh.format(cmds) + if engine == "SGE" + else arraysh_ua.format(opts.queue, N, threaded, cmds) + ) + write_file(runfile, contents) + + if engine == "PBS": + return + + outfile = "{0}.{1}.out".format(pf, r"\$TASK_ID") + errfile = "{0}.{1}.err".format(pf, r"\$TASK_ID") + p = GridProcess( + "sh {0}".format(runfile), + outfile=outfile, + errfile=errfile, + arr=N, + extra_opts=opts.extra, + grid_opts=opts, + ) + p.start() + + +def run(args): + """ + %prog run command ::: file1 file2 + + Parallelize a set of commands on grid. The syntax is modeled after GNU + parallel + + {} - input line + {.} - input line without extension + {_} - input line first part + {/} - basename of input line + {/.} - basename of input line without extension + {/_} - basename of input line first part + {#} - sequence number of job to run + ::: - Use arguments from the command line as input source instead of stdin + (standard input). + + If file name is `t/example.tar.gz`, then, + {} is "t/example.tar.gz", {.} is "t/example.tar", {_} is "t/example" + {/} is "example.tar.gz", {/.} is "example.tar", {/_} is "example" + + A few examples: + ls -1 *.fastq | %prog run process {} {.}.pdf # use stdin + %prog run process {} {.}.pdf ::: *fastq # use ::: + %prog run "zcat {} > {.}" ::: *.gz # quote redirection + %prog run < commands.list # run a list of commands + """ + p = OptionParser(run.__doc__) + p.set_grid_opts() + p.set_params(prog="grid") + opts, args = p.parse_args(args) + + if len(args) == 0: + sys.exit(not p.print_help()) + + sep = ":::" + if sep in args: + sepidx = args.index(sep) + filenames = args[sepidx + 1 :] + args = args[:sepidx] + if not filenames: + filenames = [""] + else: + filenames = sys.stdin if not sys.stdin.isatty() else [""] + + cmd = " ".join(args) + + cmds = [] if filenames else [(cmd, None)] + for i, filename in enumerate(filenames): + filename = filename.strip() + noextname = filename.rsplit(".", 1)[0] + prefix, basename = op.split(filename) + basenoextname = basename.rsplit(".", 1)[0] + basefirstname = basename.split(".")[0] + firstname = op.join(prefix, basefirstname) + ncmd = cmd + + if "{" in ncmd: + ncmd = ncmd.replace("{}", filename) + else: + ncmd += " " + filename + + ncmd = ncmd.replace("{.}", noextname) + ncmd = ncmd.replace("{_}", firstname) + ncmd = ncmd.replace("{/}", basename) + ncmd = ncmd.replace("{/.}", basenoextname) + ncmd = ncmd.replace("{/_}", basefirstname) + ncmd = ncmd.replace("{#}", str(i)) + + outfile = None + if ">" in ncmd: + ncmd, outfile = ncmd.split(">", 1) + ncmd, outfile = ncmd.strip(), outfile.strip() + + ncmd = ncmd.strip() + cmds.append((ncmd, outfile)) + + for ncmd, outfile in cmds: + p = GridProcess(ncmd, outfile=outfile, extra_opts=opts.extra, grid_opts=opts) + p.start() + + +def guess_method(tag): + from jcvi.formats.base import is_number + + jobids = tag.split(",") + for jobid in jobids: + if not is_number(jobid): + return "pattern" + return "jobid" + + +def kill(args): + """ + %prog kill [options] JOBNAMEPAT/JOBIDs + + Kill jobs based on JOBNAME pattern matching (case-sensitive) + or list of JOBIDs (comma separated) + + Examples: + %prog kill "pyth*" # Use regex + %prog kill 160253,160245,160252 # Use list of job ids + %prog kill all # Everything + """ + import shlex + from jcvi.apps.base import sh, getusername + from subprocess import check_output, CalledProcessError + import xml.etree.ElementTree as ET + + valid_methods = ("pattern", "jobid") + p = OptionParser(kill.__doc__) + p.add_argument( + "--method", + choices=valid_methods, + help="Identify jobs based on [default: guess]", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + username = getusername() + (tag,) = args + tag = tag.strip() + + if tag == "all": + sh("qdel -u {0}".format(username)) + return + + valid_jobids = set() + method = opts.method or guess_method(tag) + if method == "jobid": + jobids = tag.split(",") + valid_jobids |= set(jobids) + elif method == "pattern": + qsxmlcmd = 'qstat -u "{}" -j "{}" -nenv -njd -xml'.format(username, tag) + try: + qsxml = check_output(shlex.split(qsxmlcmd)).strip() + except CalledProcessError as e: + qsxml = None + logger.debug(f'No jobs matching the pattern "{tag}": {e}') + + if qsxml is not None: + for job in ET.fromstring(qsxml).findall("djob_info"): + for elem in job.findall("element"): + jobid = elem.find("JB_job_number").text + valid_jobids.add(jobid) + + if valid_jobids: + sh("qdel {0}".format(",".join(valid_jobids))) + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/lastz.py b/jcvi/apps/lastz.py new file mode 100755 index 00000000..3b6ec2c5 --- /dev/null +++ b/jcvi/apps/lastz.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +import os.path as op +import sys + +from math import exp +from multiprocessing import Lock, Pool + +from ..formats.base import must_open + +from .grid import Jobs +from .base import OptionParser, Popen, logger, mkdir + + +# LASTZ options +Darkspace = "nameparse=darkspace" +Unmask = "unmask" +Multiple = "multiple" +Subsample = "subsample={0}/{1}" +Lastz_template = "{0} --ambiguous=iupac {1}[{2}] {3}[{4}]" + +blast_fields = ( + "query,subject,pctid,hitlen,nmismatch,ngaps," + "qstart,qstop,sstart,sstop,evalue,score" +) + +lastz_fields = ( + "name2,name1,identity,nmismatch,ngap," + "start2+,end2+,strand2,start1,end1,strand1,score" +) + +# For assembly-assembly comparison, Bob Harris recommended: +similarOptions = ( + " --seed=match12 --notransition --step=20 --exact=50 " + "--identity=99 --matchcount=1000" +) + +# conversion between blastz and ncbi is taken from Kent src +# src/lib/blastOut.c +# this is not rigorous definition of e-value (assumes human genome) !! +blastz_score_to_ncbi_bits = lambda bz_score: bz_score * 0.0205 + + +def blastz_score_to_ncbi_expectation(bz_score): + bits = blastz_score_to_ncbi_bits(bz_score) + log_prob = -bits * 0.693147181 + # this number looks like.. human genome? + return 3.0e9 * exp(log_prob) + + +def lastz_to_blast(row): + """ + Convert the lastz tabular to the blast tabular, see headers above + Obsolete after LASTZ version 1.02.40 + """ + atoms = row.strip().split("\t") + ( + name1, + name2, + coverage, + identity, + nmismatch, + ngap, + start1, + end1, + strand1, + start2, + end2, + strand2, + score, + ) = atoms + identity = identity.replace("%", "") + hitlen = coverage.split("/")[1] + score = float(score) + same_strand = strand1 == strand2 + if not same_strand: + start2, end2 = end2, start2 + + evalue = blastz_score_to_ncbi_expectation(score) + score = blastz_score_to_ncbi_bits(score) + evalue, score = "%.2g" % evalue, "%.1f" % score + return "\t".join( + ( + name1, + name2, + identity, + hitlen, + nmismatch, + ngap, + start1, + end1, + start2, + end2, + evalue, + score, + ) + ) + + +def add_mask(ref_tags, qry_tags, mask=False): + if not mask: + ref_tags.append(Unmask) + qry_tags.append(Unmask) + + ref_tags = ",".join(ref_tags) + qry_tags = ",".join(qry_tags) + + return ref_tags, qry_tags + + +def lastz_2bit(t): + """ + Used for formats other than BLAST, i.e. lav, maf, etc. which requires the + database file to contain a single FASTA record. + """ + bfasta_fn, afasta_fn, outfile, lastz_bin, extra, mask, format = t + + ref_tags = [Darkspace] + qry_tags = [Darkspace] + ref_tags, qry_tags = add_mask(ref_tags, qry_tags, mask=mask) + + lastz_cmd = Lastz_template.format( + lastz_bin, bfasta_fn, ref_tags, afasta_fn, qry_tags + ) + if extra: + lastz_cmd += " " + extra.strip() + + lastz_cmd += " --format={0}".format(format) + proc = Popen(lastz_cmd) + out_fh = open(outfile, "w") + + logger.debug("job <%d> started: %s" % (proc.pid, lastz_cmd)) + for row in proc.stdout: + out_fh.write(row) + out_fh.flush() + logger.debug("job <%d> finished" % proc.pid) + + +def lastz(k, n, bfasta_fn, afasta_fn, out_fh, lock, lastz_bin, extra, mask=False): + + ref_tags = [Multiple, Darkspace] + qry_tags = [Darkspace] + if n != 1: + qry_tags.append(Subsample.format(k, n)) + + ref_tags, qry_tags = add_mask(ref_tags, qry_tags, mask=mask) + + lastz_cmd = Lastz_template.format( + lastz_bin, bfasta_fn, ref_tags, afasta_fn, qry_tags + ) + if extra: + lastz_cmd += " " + extra.strip() + + lastz_cmd += " --format=general-:%s" % lastz_fields + # The above conversion is no longer necessary after LASTZ v1.02.40 + # (of which I contributed a patch) + # lastz_cmd += " --format=BLASTN-" + + proc = Popen(lastz_cmd) + + logger.debug("job <%d> started: %s" % (proc.pid, lastz_cmd)) + for row in proc.stdout: + row = lastz_to_blast(row) + lock.acquire() + print(row, file=out_fh) + out_fh.flush() + lock.release() + logger.debug("job <%d> finished" % proc.pid) + + +def main(): + """ + %prog database.fa query.fa [options] + + Run LASTZ similar to the BLAST interface, and generates -m8 tabular format + """ + p = OptionParser(main.__doc__) + + supported_formats = tuple( + x.strip() + for x in "lav, lav+text, axt, axt+, maf, maf+, maf-, sam, softsam, " + "sam-, softsam-, cigar, BLASTN, BLASTN-, differences, rdotplot, text".split(",") + ) + + p.add_argument( + "--format", + default="BLASTN-", + choices=supported_formats, + help="Ooutput format", + ) + p.add_argument("--path", dest="lastz_path", default=None, help="specify LASTZ path") + p.add_argument( + "--mask", + dest="mask", + default=False, + action="store_true", + help="treat lower-case letters as mask info", + ) + p.add_argument( + "--similar", + default=False, + action="store_true", + help="Use options tuned for close comparison", + ) + p.set_cpus(cpus=32) + p.set_params() + p.set_outfile() + opts, args = p.parse_args() + + if len(args) != 2: + sys.exit(p.print_help()) + + bfasta_fn, afasta_fn = args + for fn in (afasta_fn, bfasta_fn): + assert op.exists(fn) + + afasta_fn = op.abspath(afasta_fn) + bfasta_fn = op.abspath(bfasta_fn) + out_fh = must_open(opts.outfile, "w") + + extra = opts.extra + if opts.similar: + extra += similarOptions + + lastz_bin = opts.lastz_path or "lastz" + assert lastz_bin.endswith("lastz"), "You need to include lastz in your path" + + mask = opts.mask + cpus = opts.cpus + logger.debug("Dispatch job to %d cpus" % cpus) + format = opts.format + blastline = format == "BLASTN-" + + # The axt, maf, etc. format can only be run on splitted database (i.e. one + # FASTA record per file). The splitted files are then parallelized for the + # computation, as opposed to splitting queries through "subsample". + outdir = "outdir" + if not blastline: + from jcvi.formats.fasta import Fasta + from jcvi.formats.chain import faToTwoBit + + mkdir(outdir) + + bfasta_2bit = faToTwoBit(bfasta_fn) + bids = list(Fasta(bfasta_fn, lazy=True).iterkeys_ordered()) + + apf = op.basename(afasta_fn).split(".")[0] + args = [] + # bfasta_fn, afasta_fn, outfile, lastz_bin, extra, mask, format + for id in bids: + bfasta = "/".join((bfasta_2bit, id)) + outfile = op.join(outdir, "{0}.{1}.{2}".format(apf, id, format)) + args.append((bfasta, afasta_fn, outfile, lastz_bin, extra, mask, format)) + + p = Pool(cpus) + p.map(lastz_2bit, args) + + return + + lock = Lock() + + args = [ + (k + 1, cpus, bfasta_fn, afasta_fn, out_fh, lock, lastz_bin, extra, mask) + for k in range(cpus) + ] + g = Jobs(target=lastz, args=args) + g.run() + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/mask.py b/jcvi/apps/mask.py new file mode 100755 index 00000000..448f07af --- /dev/null +++ b/jcvi/apps/mask.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Mask low complexity regions in the genome. +""" +import os.path as op +import sys + +from ..formats.fasta import Fasta +from ..utils.cbook import depends, percentage + +from .base import ActionDispatcher, OptionParser, sh + + +@depends +def wm_mk_counts(infile=None, outfile=None): + cmd = "windowmasker -in {0} -mk_counts".format(infile) + cmd += " -out {0}".format(outfile) + sh(cmd) + + +@depends +def wm_mk_masks(infile=None, outfile=None, genomefile=None): + cmd = "windowmasker -in {0} -ustat {1}".format(genomefile, infile) + cmd += " -outfmt fasta -dust T -out {0}".format(outfile) + sh(cmd) + + +def hardmask(fastafile): + cmd = "maskOutFa {0} hard {0}".format(fastafile) + sh(cmd) + + +def main(): + + actions = ( + ("mask", "use windowmasker to mask low-complexity bases"), + ("summary", "report the number of bases and sequences masked"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def summary(args): + """ + %prog summary fastafile + + Report the number of bases and sequences masked. + """ + p = OptionParser(summary.__doc__) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + f = Fasta(fastafile, index=False) + + halfmaskedseqs = set() + allmasked = 0 + allbases = 0 + cutoff = 50 + for key, seq in f.iteritems(): + masked = 0 + for base in seq: + if base not in "AGCT": + masked += 1 + seqlen = len(seq) + if masked * 100.0 / seqlen > cutoff: + halfmaskedseqs.add(key) + allmasked += masked + allbases += seqlen + + seqnum = len(f) + maskedseqnum = len(halfmaskedseqs) + + print( + "Total masked bases: {0}".format(percentage(allmasked, allbases)), + file=sys.stderr, + ) + print( + "Total masked sequences (contain > {0}% masked): {1}".format( + cutoff, percentage(maskedseqnum, seqnum) + ), + file=sys.stderr, + ) + + +def mask(args): + """ + %prog mask fastafile + + This script pipelines the windowmasker in NCBI BLAST+. Masked fasta file + will have an appended suffix of .mask with all the low-complexity bases masked + (default to lower case, set --hard for hardmasking). + """ + p = OptionParser(mask.__doc__) + p.add_argument( + "--hard", + dest="hard", + default=False, + action="store_true", + help="Hard mask the low-complexity bases", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (genomefile,) = args + + # entire pipeline + countsfile = genomefile + ".counts" + wm_mk_counts(infile=genomefile, outfile=countsfile) + + maskedfastafile = "%s.masked%s" % op.splitext(genomefile) + wm_mk_masks(infile=countsfile, outfile=maskedfastafile, genomefile=genomefile) + + if opts.hard: + hardmask(maskedfastafile) + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/phylo.py b/jcvi/apps/phylo.py new file mode 100644 index 00000000..e93485e7 --- /dev/null +++ b/jcvi/apps/phylo.py @@ -0,0 +1,1204 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Construct and visualize phylogenetic trees from: +1. MCSCAN output +2. CDS sequences in FASTA format + +Options are provided for each step: +1. sequence alignment: + ClustalW2 or MUSCLE (wrapped on Biopython) +2. alignment editting: + GBlocks (optional) +3. build trees: + NJ: PHYLIP + ML: RAxML or PHYML + +Optional steps: +- reroot tree +- alternative topology test (SH test) +- TreeFix + +The external software needs be installed first. +""" +import sys +import os +import os.path as op +import re +import warnings + +from math import ceil +from itertools import chain +from functools import partial + +import numpy as np +from ete3 import Tree +from Bio import SeqIO, AlignIO +from Bio.Data import CodonTable +from Bio.Emboss.Applications import ( + FSeqBootCommandline, + FDNADistCommandline, + FNeighborCommandline, + FConsenseCommandline, +) +from Bio.Phylo.Applications import PhymlCommandline, RaxmlCommandline + +from ..compara.ks import ( + AbstractCommandline, + find_first_isoform, + run_mrtrans, + clustal_align_protein, + muscle_align_protein, +) +from ..formats.base import must_open, DictFile, LineFile +from ..formats.fasta import Fasta +from ..utils.orderedcollections import OrderedDict +from ..graphics.base import plt, savefig + +from .base import ActionDispatcher, OptionParser, getpath, logger, mkdir, sh + + +GBLOCKS_BIN = partial(getpath, name="GBLOCKS", warn="warn") +PHYML_BIN = partial(getpath, name="PHYML", warn="warn") +RAXML_BIN = partial(getpath, name="RAXML", warn="warn") +FPHYLIP_BIN = partial(getpath, name="FPHYLIP", warn="warn") +TREEFIX_BIN = partial(getpath, name="TREEFIX", warn="warn") + + +class GblocksCommandline(AbstractCommandline): + """Little commandline for Gblocks + (http://molevol.cmima.csic.es/castresana/Gblocks.html). + + Accepts alignment in FASTA or NBRF/PIR format. + """ + + def __init__( + self, aln_file, aln_type="c", command=GBLOCKS_BIN("Gblocks"), **kwargs + ): + + self.aln_file = aln_file + self.aln_type = aln_type + self.command = command + + params = {"b4": 5, "b5": "h", "p": "n"} + params.update(kwargs) + self.parameters = ["-{0}={1}".format(k, v) for k, v in params.items()] + + def __str__(self): + return ( + self.command + + " %s -t=%s " % (self.aln_file, self.aln_type) + + " ".join(self.parameters) + ) + + +class FfitchCommandline(AbstractCommandline): + """Little commandline for ffitch in EMBOSS + (http://www.molgen.mpg.de/~beck/embassy/phylipnew/ffitch.html). + + Infer branch lengths of tree. + """ + + def __init__( + self, + datafile, + outtreefile, + command=FPHYLIP_BIN("ffitch"), + intreefile=None, + **kwargs + ): + + self.datafile = datafile + self.outtreefile = outtreefile + self.outfile = datafile.rsplit(".", 1)[0] + ".ffitch" + self.command = command + self.intreefile = intreefile if intreefile else '""' + + self.parameters = ["-{0} {1}".format(k, v) for k, v in kwargs.items()] + + def __str__(self): + return ( + self.command + + " -datafile %s -intreefile %s -outfile %s -outtreefile %s " + % ( + self.datafile, + self.intreefile, + self.outfile, + self.outtreefile, + ) + + " ".join(self.parameters) + ) + + +class TreeFixCommandline(AbstractCommandline): + """Little commandline for TreeFix + (http://compbio.mit.edu/treefix/). + """ + + def __init__( + self, + input, + stree_file, + smap_file, + a_ext, + command=TREEFIX_BIN("treefix"), + r=False, + **kwargs + ): + + self.input = input + self.s = stree_file + self.S = smap_file + self.A = a_ext + self.command = command + + params = {"V": 1, "l": input.rsplit(".", 1)[0] + ".treefix.log"} + params.update(kwargs) + self.parameters = ["-{0} {1}".format(k, v) for k, v in params.items()] + if r: + self.parameters.append("-r") + + def __str__(self): + return ( + self.command + + " -s %s -S %s -A %s " % (self.s, self.S, self.A) + + " ".join(self.parameters) + + " %s" % self.input + ) + + +def run_treefix( + input, + stree_file, + smap_file, + a_ext=".fasta", + o_ext=".dnd", + n_ext=".treefix.dnd", + **kwargs +): + """ + get the ML tree closest to the species tree + """ + cl = TreeFixCommandline( + input=input, + stree_file=stree_file, + smap_file=smap_file, + a_ext=a_ext, + o=o_ext, + n=n_ext, + **kwargs + ) + outtreefile = input.rsplit(o_ext, 1)[0] + n_ext + print("TreeFix:", cl, file=sys.stderr) + r, e = cl.run() + + if e: + print("***TreeFix could not run", file=sys.stderr) + return None + else: + logger.debug("new tree written to {0}".format(outtreefile)) + return outtreefile + + +def run_gblocks(align_fasta_file, **kwargs): + """ + remove poorly aligned positions and divergent regions with Gblocks + """ + cl = GblocksCommandline(aln_file=align_fasta_file, **kwargs) + r, e = cl.run() + + print("Gblocks:", cl, file=sys.stderr) + + if e: + print("***Gblocks could not run", file=sys.stderr) + return None + else: + print(r, file=sys.stderr) + alignp = re.sub( + r".*Gblocks alignment:.*\(([0-9]{1,3}) %\).*", r"\1", r, flags=re.DOTALL + ) + alignp = int(alignp) + if alignp <= 10: + print( + "** WARNING ** Only %s %% positions retained by Gblocks. " + "Results aborted. Using original alignment instead.\n" % alignp, + file=sys.stderr, + ) + return None + else: + return align_fasta_file + "-gb" + + +def run_ffitch(distfile, outtreefile, intreefile=None, **kwargs): + """ + Infer tree branch lengths using ffitch in EMBOSS PHYLIP + """ + cl = FfitchCommandline( + datafile=distfile, outtreefile=outtreefile, intreefile=intreefile, **kwargs + ) + r, e = cl.run() + + if e: + print("***ffitch could not run", file=sys.stderr) + return None + else: + print("ffitch:", cl, file=sys.stderr) + return outtreefile + + +def smart_reroot(treefile, outgroupfile, outfile, format=0): + """ + simple function to reroot Newick format tree using ete2 + + Tree reading format options see here: + http://packages.python.org/ete2/tutorial/tutorial_trees.html#reading-newick-trees + """ + tree = Tree(treefile, format=format) + leaves = [t.name for t in tree.get_leaves()][::-1] + outgroup = [] + for o in must_open(outgroupfile): + o = o.strip() + for leaf in leaves: + if leaf[: len(o)] == o: + outgroup.append(leaf) + if outgroup: + break + + if not outgroup: + print( + "Outgroup not found. Tree {0} cannot be rerooted.".format(treefile), + file=sys.stderr, + ) + return treefile + + try: + tree.set_outgroup(tree.get_common_ancestor(*outgroup)) + except ValueError: + assert type(outgroup) == list + outgroup = outgroup[0] + tree.set_outgroup(outgroup) + tree.write(outfile=outfile, format=format) + + logger.debug("Rerooted tree printed to {0}".format(outfile)) + return outfile + + +def build_nj_phylip(alignment, outfile, outgroup, work_dir="."): + """ + build neighbor joining tree of DNA seqs with PHYLIP in EMBOSS + + PHYLIP manual + http://evolution.genetics.washington.edu/phylip/doc/ + """ + + phy_file = op.join(work_dir, "work", "aln.phy") + try: + AlignIO.write(alignment, open(phy_file, "w"), "phylip") + except ValueError: + print( + "Repeated seq name, possibly due to truncation. NJ tree not built.", + file=sys.stderr, + ) + return None + + seqboot_out = phy_file.rsplit(".", 1)[0] + ".fseqboot" + seqboot_cl = FSeqBootCommandline( + FPHYLIP_BIN("fseqboot"), + sequence=phy_file, + outfile=seqboot_out, + seqtype="d", + reps=100, + seed=12345, + ) + stdout, stderr = seqboot_cl() + logger.debug("Resampling alignment: %s" % seqboot_cl) + + dnadist_out = phy_file.rsplit(".", 1)[0] + ".fdnadist" + dnadist_cl = FDNADistCommandline( + FPHYLIP_BIN("fdnadist"), sequence=seqboot_out, outfile=dnadist_out, method="f" + ) + stdout, stderr = dnadist_cl() + logger.debug("Calculating distance for bootstrapped alignments: %s" % dnadist_cl) + + neighbor_out = phy_file.rsplit(".", 1)[0] + ".njtree" + e = phy_file.rsplit(".", 1)[0] + ".fneighbor" + neighbor_cl = FNeighborCommandline( + FPHYLIP_BIN("fneighbor"), + datafile=dnadist_out, + outfile=e, + outtreefile=neighbor_out, + ) + stdout, stderr = neighbor_cl() + logger.debug("Building Neighbor Joining tree: %s" % neighbor_cl) + + consense_out = phy_file.rsplit(".", 1)[0] + ".consensustree.nodesupport" + e = phy_file.rsplit(".", 1)[0] + ".fconsense" + consense_cl = FConsenseCommandline( + FPHYLIP_BIN("fconsense"), + intreefile=neighbor_out, + outfile=e, + outtreefile=consense_out, + ) + stdout, stderr = consense_cl() + logger.debug("Building consensus tree: %s" % consense_cl) + + # distance without bootstrapping + dnadist_out0 = phy_file.rsplit(".", 1)[0] + ".fdnadist0" + dnadist_cl0 = FDNADistCommandline( + FPHYLIP_BIN("fdnadist"), sequence=phy_file, outfile=dnadist_out0, method="f" + ) + stdout, stderr = dnadist_cl0() + logger.debug("Calculating distance for original alignment: %s" % dnadist_cl0) + + # infer branch length on consensus tree + consensustree1 = phy_file.rsplit(".", 1)[0] + ".consensustree.branchlength" + run_ffitch( + distfile=dnadist_out0, outtreefile=consensustree1, intreefile=consense_out + ) + + # write final tree + ct_s = Tree(consense_out) + + if outgroup: + t1 = consensustree1 + ".rooted" + t2 = smart_reroot(consensustree1, outgroup, t1) + if t2 == t1: + outfile = outfile.replace(".unrooted", "") + ct_b = Tree(t2) + else: + ct_b = Tree(consensustree1) + + nodesupport = {} + for node in ct_s.traverse("postorder"): + node_children = tuple(sorted([f.name for f in node])) + if len(node_children) > 1: + nodesupport[node_children] = node.dist / 100.0 + + for k, v in nodesupport.items(): + ct_b.get_common_ancestor(*k).support = v + print(ct_b) + ct_b.write(format=0, outfile=outfile) + + try: + s = op.getsize(outfile) + except OSError: + s = 0 + if s: + logger.debug("NJ tree printed to %s" % outfile) + return outfile, phy_file + else: + logger.debug("Something was wrong. NJ tree was not built.") + return None + + +def build_ml_phyml(alignment, outfile, work_dir=".", **kwargs): + """ + build maximum likelihood tree of DNA seqs with PhyML + """ + phy_file = op.join(work_dir, "work", "aln.phy") + AlignIO.write(alignment, open(phy_file, "w"), "phylip-relaxed") + + phyml_cl = PhymlCommandline(cmd=PHYML_BIN("phyml"), input=phy_file, **kwargs) + logger.debug("Building ML tree using PhyML: %s" % phyml_cl) + stdout, stderr = phyml_cl() + + tree_file = phy_file + "_phyml_tree.txt" + if not op.exists(tree_file): + print("***PhyML failed.", file=sys.stderr) + return None + sh("cp {0} {1}".format(tree_file, outfile), log=False) + + logger.debug("ML tree printed to %s" % outfile) + + return outfile, phy_file + + +def build_ml_raxml(alignment, outfile, work_dir=".", **kwargs): + """ + build maximum likelihood tree of DNA seqs with RAxML + """ + work_dir = op.join(work_dir, "work") + mkdir(work_dir) + phy_file = op.join(work_dir, "aln.phy") + AlignIO.write(alignment, open(phy_file, "w"), "phylip-relaxed") + + raxml_work = op.abspath(op.join(op.dirname(phy_file), "raxml_work")) + mkdir(raxml_work) + raxml_cl = RaxmlCommandline( + cmd=RAXML_BIN("raxmlHPC"), + sequences=phy_file, + algorithm="a", + model="GTRGAMMA", + parsimony_seed=12345, + rapid_bootstrap_seed=12345, + num_replicates=100, + name="aln", + working_dir=raxml_work, + **kwargs + ) + + logger.debug("Building ML tree using RAxML: %s" % raxml_cl) + stdout, stderr = raxml_cl() + + tree_file = "{0}/RAxML_bipartitions.aln".format(raxml_work) + if not op.exists(tree_file): + print("***RAxML failed.", file=sys.stderr) + sh("rm -rf %s" % raxml_work, log=False) + return None + sh("cp {0} {1}".format(tree_file, outfile), log=False) + + logger.debug("ML tree printed to %s" % outfile) + sh("rm -rf %s" % raxml_work) + + return outfile, phy_file + + +def SH_raxml(reftree, querytree, phy_file, shout="SH_out.txt"): + """ + SH test using RAxML + + querytree can be a single tree or a bunch of trees (eg. from bootstrapping) + """ + assert op.isfile(reftree) + shout = must_open(shout, "a") + + raxml_work = op.abspath(op.join(op.dirname(phy_file), "raxml_work")) + mkdir(raxml_work) + raxml_cl = RaxmlCommandline( + cmd=RAXML_BIN("raxmlHPC"), + sequences=phy_file, + algorithm="h", + model="GTRGAMMA", + name="SH", + starting_tree=reftree, + bipartition_filename=querytree, + working_dir=raxml_work, + ) + + logger.debug("Running SH test in RAxML: %s" % raxml_cl) + o, stderr = raxml_cl() + # hard coded + try: + pval = re.search("(Significantly.*:.*)", o).group(0) + except: + print("SH test failed.", file=sys.stderr) + else: + pval = pval.strip().replace("\t", " ").replace("%", "\%") + print("{0}\t{1}".format(op.basename(querytree), pval), file=shout) + logger.debug("SH p-value appended to %s" % shout.name) + + shout.close() + return shout.name + + +CODON_TRANSLATION = CodonTable.standard_dna_table.forward_table +FOURFOLD = { + "CTT": "L", + "ACA": "T", + "ACG": "T", + "CCT": "P", + "CTG": "L", + "CTA": "L", + "ACT": "T", + "CCG": "P", + "CCA": "P", + "CCC": "P", + "GGT": "G", + "CGA": "R", + "CGC": "R", + "CGG": "R", + "GGG": "G", + "GGA": "G", + "GGC": "G", + "CGT": "R", + "GTA": "V", + "GTC": "V", + "GTG": "V", + "GTT": "V", + "CTC": "L", + "TCT": "S", + "TCG": "S", + "TCC": "S", + "ACC": "T", + "TCA": "S", + "GCA": "A", + "GCC": "A", + "GCG": "A", + "GCT": "A", +} + + +def subalignment(alnfle, subtype, alntype="fasta"): + """ + Subset synonymous or fourfold degenerate sites from an alignment + + input should be a codon alignment + """ + aln = AlignIO.read(alnfle, alntype) + alnlen = aln.get_alignment_length() + nseq = len(aln) + subaln = None + subalnfile = alnfle.rsplit(".", 1)[0] + "_{0}.{1}".format(subtype, alntype) + + if subtype == "synonymous": + for j in range(0, alnlen, 3): + aa = None + for i in range(nseq): + codon = str(aln[i, j : j + 3].seq) + if codon not in CODON_TRANSLATION: + break + if aa and CODON_TRANSLATION[codon] != aa: + break + else: + aa = CODON_TRANSLATION[codon] + else: + if subaln is None: + subaln = aln[:, j : j + 3] + else: + subaln += aln[:, j : j + 3] + + if subtype == "fourfold": + for j in range(0, alnlen, 3): + for i in range(nseq): + codon = str(aln[i, j : j + 3].seq) + if codon not in FOURFOLD: + break + else: + if subaln is None: + subaln = aln[:, j : j + 3] + else: + subaln += aln[:, j : j + 3] + + if subaln: + AlignIO.write(subaln, subalnfile, alntype) + return subalnfile + else: + print("No sites {0} selected.".format(subtype), file=sys.stderr) + return None + + +def merge_rows_local( + filename, ignore=".", colsep="\t", local=10, fieldcheck=True, fsep="," +): + """ + merge overlapping rows within given row count distance + """ + fw = must_open(filename + ".merged", "w") + rows = open(filename).readlines() + rows = [row.strip().split(colsep) for row in rows] + l = len(rows[0]) + + for rowi, row in enumerate(rows): + n = len(rows) + i = rowi + 1 + while i <= min(rowi + local, n - 1): + merge = 1 + row2 = rows[i] + for j in range(l): + a = row[j] + b = row2[j] + if fieldcheck: + a = set(a.split(fsep)) + a = fsep.join(sorted(list(a))) + b = set(b.split(fsep)) + b = fsep.join(sorted(list(b))) + + if all([a != ignore, b != ignore, a not in b, b not in a]): + merge = 0 + i += 1 + break + + if merge: + for x in range(l): + if row[x] == ignore: + rows[rowi][x] = row2[x] + elif row[x] in row2[x]: + rows[rowi][x] = row2[x] + else: + rows[rowi][x] = row[x] + row = rows[rowi] + rows.remove(row2) + + print(colsep.join(row), file=fw) + fw.close() + + return fw.name + + +def add_tandems(mcscanfile, tandemfile): + """ + add tandem genes to anchor genes in mcscan file + """ + tandems = [f.strip().split(",") for f in open(tandemfile)] + fw = must_open(mcscanfile + ".withtandems", "w") + fp = must_open(mcscanfile) + seen = set() + for i, row in enumerate(fp): + if row[0] == "#": + continue + anchorslist = row.strip().split("\t") + anchors = set([a.split(",")[0] for a in anchorslist]) + anchors.remove(".") + if anchors & seen == anchors: + continue + + newanchors = [] + for a in anchorslist: + if a == ".": + newanchors.append(a) + continue + for t in tandems: + if a in t: + newanchors.append(",".join(t)) + seen.update(t) + break + else: + newanchors.append(a) + seen.add(a) + print("\t".join(newanchors), file=fw) + + fw.close() + newmcscanfile = merge_rows_local(fw.name) + + logger.debug( + "Tandems added to `{0}`. Results in `{1}`".format(mcscanfile, newmcscanfile) + ) + fp.seek(0) + logger.debug( + "{0} rows merged to {1} rows".format( + len(fp.readlines()), len(open(newmcscanfile).readlines()) + ) + ) + sh("rm %s" % fw.name) + + return newmcscanfile + + +def main(): + + actions = ( + ("prepare", "prepare cds sequences from .mcscan"), + ("build", "build NJ and ML trees from cds"), + ("draw", "draw Newick formatted trees"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def prepare(args): + """ + %prog prepare mcscanfile cdsfile [options] + + Pick sequences from cdsfile to form fasta files, according to multiple + alignment in the mcscanfile. + The fasta sequences can then be used to construct phylogenetic tree. + + Use --addtandem=tandemfile to collapse tandems of anchors into single row. + The tandemfile must be provided with *ALL* genomes involved, otherwise + result will be incomplete and redundant. + """ + from jcvi.graphics.base import discrete_rainbow + + p = OptionParser(prepare.__doc__) + p.add_argument("--addtandem", help="path to tandemfile") + p.add_argument( + "--writecolors", + default=False, + action="store_true", + help="generate a gene_name to color mapping file which will be taken " + "by jcvi.apps.phylo.draw", + ) + p.set_outdir(outdir="sequences") + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + mcscanfile, cdsfile = args + + if opts.addtandem: + tandemfile = opts.addtandem + mcscanfile_with_tandems = add_tandems(mcscanfile, tandemfile) + mcscanfile = mcscanfile_with_tandems + + seqdir = opts.outdir + mkdir(seqdir) + f = Fasta(cdsfile) + fp = must_open(mcscanfile) + if opts.writecolors: + fc = must_open("leafcolors.txt", "w") + + n = 0 + for i, row in enumerate(fp): + row = row.strip().split("\t") + if i == 0: + l = len(row) + if l <= 20: + colors = discrete_rainbow(l, shuffle=False)[1] + else: + colors = discrete_rainbow(l, usepreset=False, shuffle=False)[1] + warnings.warn( + "*** WARNING ***\n" + "Too many columns. Colors may not be all distinctive." + ) + + assert len(row) == l, "All rows should have same number of fields." + + anchors = set() + for j, atom in enumerate(row): + color = "%s,%s,%s" % colors[j] + if atom == ".": + continue + elif "," in atom: + atom = atom.split(",") + for a in atom: + fc.write("{0}\t{1}\n".format(a, color)) + anchors.add(a) + else: + fc.write("{0}\t{1}\n".format(atom, color)) + anchors.add(atom) + + if len(anchors) <= 3: + print( + "Not enough seqs to build trees for {0}".format(anchors), + file=sys.stderr, + ) + continue + + pivot = row[0] + fw = must_open("%s/%s.cds" % (seqdir, pivot), "w") + for a in anchors: + if a not in f: + print(a) + a = find_first_isoform(a, f) + assert a, a + arec = f[a] + SeqIO.write(arec, fw, "fasta") + fw.close() + n += 1 + + if opts.writecolors: + fc.close() + logger.debug("leaf colors written to `{0}`".format(fc.name)) + + logger.debug("cds of {0} syntelog groups written to {1}/".format(n, seqdir)) + + return seqdir + + +def build(args): + """ + %prog build [prot.fasta] cds.fasta [options] --outdir=outdir + + This function wraps on the following steps: + 1. msa using ClustalW2 or MUSCLE(default) + 2. (optional) alignment editing using Gblocks + 3. build NJ tree using PHYLIP in EMBOSS package + seq names should be unique by first 10 chars (restriction of PHYLIP) + 4. build ML tree using RAxML(default) or PHYML, use keywords raxml or phyml, + *WARNING* maybe slow with large dataset + + If an outgroup file is provided, the result tree will be rooted on the + outgroup according to order in the file, i.e. the name in row1 will be + tried first. If not found, row2 will be used, etc. + Tail truncated names can be provided so long as it is unique among the seqs. + If not uniq, the first occurrence will be used. For example, if you have + two moss sequences in your input, then the tree will be rooted on the + first moss sequence encountered by the program, unless they are monophylic, + in which case the root will be their common ancestor. + + --stree and --smap are required if --treefix is set. + + Trees can be edited again using an editor such as Dendroscope. This + is the recommended way to get highly customized trees. + + Newick format trees will be deposited into outdir (. by default). + """ + from jcvi.formats.fasta import translate + + p = OptionParser(build.__doc__) + p.add_argument( + "--longest", + action="store_true", + help="Get longest ORF, only works if no pep file, e.g. ESTs", + ) + p.add_argument( + "--nogblocks", + action="store_true", + help="don't use Gblocks to edit alignment", + ) + p.add_argument( + "--synonymous", + action="store_true", + help="extract synonymous sites of the alignment", + ) + p.add_argument( + "--fourfold", + action="store_true", + help="extract fourfold degenerate sites of the alignment", + ) + p.add_argument( + "--msa", + default="muscle", + choices=("clustalw", "muscle"), + help="software used to align the proteins", + ) + p.add_argument( + "--noneighbor", + action="store_true", + help="don't build NJ tree", + ) + p.add_argument( + "--ml", + default=None, + choices=("raxml", "phyml"), + help="software used to build ML tree", + ) + p.add_argument("--outgroup", help="path to file containing outgroup orders") + p.add_argument("--SH", help="path to reference Newick tree") + p.add_argument("--shout", default="SH_out.txt", help="SH output file name") + p.add_argument( + "--treefix", + action="store_true", + help="use TreeFix to rearrange ML tree", + ) + p.add_argument("--stree", help="path to species Newick tree") + p.add_argument( + "--smap", + help="path to smap file: gene_name_patternspecies_name", + ) + p.set_outdir() + + opts, args = p.parse_args(args) + gblocks = not opts.nogblocks + synonymous = opts.synonymous + fourfold = opts.fourfold + neighbor = not opts.noneighbor + outgroup = opts.outgroup + outdir = opts.outdir + + if len(args) == 1: + protein_file, dna_file = None, args[0] + elif len(args) == 2: + protein_file, dna_file = args + else: + print("Incorrect arguments", file=sys.stderr) + sys.exit(not p.print_help()) + + if opts.treefix: + stree = opts.stree + smap = opts.smap + assert stree and smap, "TreeFix requires stree and smap files." + opts.ml = "raxml" + + treedir = op.join(outdir, "tree") + mkdir(treedir) + + if not protein_file: + protein_file = dna_file + ".pep" + translate_args = [dna_file, "--outfile=" + protein_file] + if opts.longest: + translate_args += ["--longest"] + dna_file, protein_file = translate(translate_args) + + work_dir = op.join(outdir, "alignment") + mkdir(work_dir) + p_recs = list(SeqIO.parse(open(protein_file), "fasta")) + if opts.msa == "clustalw": + align_fasta = clustal_align_protein(p_recs, work_dir) + elif opts.msa == "muscle": + align_fasta = muscle_align_protein(p_recs, work_dir) + + n_recs = list(SeqIO.parse(open(dna_file), "fasta")) + mrtrans_fasta = run_mrtrans(align_fasta, n_recs, work_dir, outfmt="fasta") + + if not mrtrans_fasta: + logger.debug("pal2nal aborted. Cannot reliably build tree for %s", dna_file) + return + + codon_aln_fasta = mrtrans_fasta + if gblocks: + gb_fasta = run_gblocks(mrtrans_fasta) + codon_aln_fasta = gb_fasta if gb_fasta else codon_aln_fasta + + else: + if synonymous: + codon_aln_fasta = subalignment(mrtrans_fasta, "synonymous") + + if fourfold: + codon_aln_fasta = subalignment(mrtrans_fasta, "fourfold") + + if not neighbor and not opts.ml: + return codon_aln_fasta + + alignment = AlignIO.read(codon_aln_fasta, "fasta") + if len(alignment) <= 3: + raise ValueError("Too few seqs to build tree.") + + mkdir(op.join(treedir, "work")) + if neighbor: + out_file = op.join( + treedir, op.basename(dna_file).rsplit(".", 1)[0] + ".NJ.unrooted.dnd" + ) + try: + outfile, phy_file = build_nj_phylip( + alignment, outfile=out_file, outgroup=outgroup, work_dir=treedir + ) + except: + print("NJ tree cannot be built for {0}".format(dna_file)) + + if opts.SH: + reftree = opts.SH + querytree = outfile + SH_raxml(reftree, querytree, phy_file, shout=opts.shout) + + if opts.ml: + out_file = op.join( + treedir, op.basename(dna_file).rsplit(".", 1)[0] + ".ML.unrooted.dnd" + ) + + if opts.ml == "phyml": + try: + outfile, phy_file = build_ml_phyml( + alignment, outfile=out_file, work_dir=treedir + ) + except: + print("ML tree cannot be built for {0}".format(dna_file)) + + elif opts.ml == "raxml": + try: + outfile, phy_file = build_ml_raxml( + alignment, outfile=out_file, work_dir=treedir + ) + except: + print("ML tree cannot be built for {0}".format(dna_file)) + + if outgroup: + new_out_file = out_file.replace(".unrooted", "") + t = smart_reroot( + treefile=out_file, outgroupfile=outgroup, outfile=new_out_file + ) + if t == new_out_file: + sh("rm %s" % out_file) + outfile = new_out_file + + if opts.SH: + reftree = opts.SH + querytree = outfile + SH_raxml(reftree, querytree, phy_file, shout=opts.shout) + + if opts.treefix: + treefix_dir = op.join(treedir, "treefix") + assert mkdir(treefix_dir, overwrite=True) + + sh("cp {0} {1}/".format(outfile, treefix_dir)) + input = op.join(treefix_dir, op.basename(outfile)) + aln_file = input.rsplit(".", 1)[0] + ".fasta" + SeqIO.write(alignment, aln_file, "fasta") + + outfile = run_treefix( + input=input, + stree_file=stree, + smap_file=smap, + a_ext=".fasta", + o_ext=".dnd", + n_ext=".treefix.dnd", + ) + + return outfile + + +def _draw_trees( + trees, nrow=1, ncol=1, rmargin=0.3, iopts=None, outdir=".", shfile=None, **kwargs +): + """ + Draw one or multiple trees on one plot. + """ + from jcvi.graphics.tree import draw_tree + + if shfile: + SHs = DictFile(shfile, delimiter="\t") + + ntrees = len(trees) + n = nrow * ncol + for x in range(int(ceil(float(ntrees) / n))): + fig = plt.figure(1, (iopts.w, iopts.h)) if iopts else plt.figure(1, (5, 5)) + root = fig.add_axes([0, 0, 1, 1]) + + xiv = 1.0 / ncol + yiv = 1.0 / nrow + xstart = list(np.arange(0, 1, xiv)) * nrow + ystart = list(chain(*zip(*[list(np.arange(0, 1, yiv))[::-1]] * ncol))) + for i in range(n * x, n * (x + 1)): + if i == ntrees: + break + ax = fig.add_axes([xstart[i % n], ystart[i % n], xiv, yiv]) + f = trees.keys()[i] + tree = trees[f] + try: + SH = SHs[f] + except: + SH = None + draw_tree( + ax, + tree, + rmargin=rmargin, + reroot=False, + supportcolor="r", + SH=SH, + **kwargs + ) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + format = iopts.format if iopts else "pdf" + dpi = iopts.dpi if iopts else 300 + if n == 1: + image_name = f.rsplit(".", 1)[0] + "." + format + else: + image_name = "trees{0}.{1}".format(x, format) + image_name = op.join(outdir, image_name) + savefig(image_name, dpi=dpi, iopts=iopts) + plt.clf() + + +def draw(args): + """ + %prog draw --input newicktrees [options] + + Draw phylogenetic trees into single or combined plots. + Input trees should be one of the following: + 1. single Newick format tree file + 2. a dir containing *ONLY* the tree files to be drawn + + Newick format: + http://evolution.genetics.washington.edu/phylip/newicktree.html + + This function wraps on jcvi.graphics.tree + This function is better used for trees generated by jcvi.apps.phylo (rooted + if possible). For drawing general Newick trees from external sources invoke + jcvi.graphics.tree directly, which also gives more drawing options. + """ + trunc_name_options = ["headn", "oheadn", "tailn", "otailn"] + p = OptionParser(draw.__doc__) + p.add_argument( + "--input", + help="path to single input tree file or a dir " + "containing ONLY the input tree files", + ) + p.add_argument( + "--combine", + type=str, + default="1x1", + help="combine multiple trees into one plot in nrowxncol", + ) + p.add_argument( + "--trunc_name", + default=None, + help="Options are: {0}. " + "truncate first n chars, retains only first n chars, " + "truncate last n chars, retain only last chars. " + "n=1~99.".format(trunc_name_options), + ) + p.add_argument( + "--SH", + default=None, + help="path to a file containing SH test p-values in format:" + "tree_file_namep-values " + "This file can be generated with jcvi.apps.phylo build", + ) + p.add_argument( + "--scutoff", + default=50, + type=int, + help="cutoff for displaying node support, 0-100", + ) + p.add_argument( + "--barcode", + default=None, + help="path to seq/taxon name barcode mapping file: " + "barcodenew_name " + "This option is downstream of `--trunc_name`", + ) + p.add_argument( + "--leafcolorfile", + default=None, + help="path to a mapping file containing font colors " + "for the OTUs: leafnamecolor", + ) + p.set_outdir() + opts, args, iopts = p.set_image_options(figsize="8x6") + input = opts.input + outdir = opts.outdir + combine = opts.combine.split("x") + trunc_name = opts.trunc_name + SH = opts.SH + + mkdir(outdir) + if not input: + sys.exit(not p.print_help()) + elif op.isfile(input): + trees_file = input + treenames = [op.basename(input)] + elif op.isdir(input): + trees_file = op.join(outdir, "alltrees.dnd") + treenames = [] + for f in sorted(os.listdir(input)): + sh("cat {0}/{1} >> {2}".format(input, f, trees_file), log=False) + treenames.append(f) + else: + sys.exit(not p.print_help()) + + trees = OrderedDict() + tree = "" + i = 0 + for row in LineFile(trees_file, comment="#", load=True).lines: + if i == len(treenames): + break + if not len(row): + continue + + if ";" in row: + # sanity check + if row.index(";") != len(row) - 1: + ts = row.split(";") + for ii in range(len(ts) - 1): + ts[ii] += ";" + else: + ts = [row] + for t in ts: + if ";" in t: + tree += t + if tree: + trees[treenames[i]] = tree + tree = "" + i += 1 + else: + tree += t + else: + tree += row + + logger.debug("A total of {0} trees imported.".format(len(trees))) + sh("rm {0}".format(op.join(outdir, "alltrees.dnd"))) + + _draw_trees( + trees, + nrow=int(combine[0]), + ncol=int(combine[1]), + rmargin=0.3, + iopts=iopts, + outdir=outdir, + shfile=SH, + trunc_name=trunc_name, + scutoff=opts.scutoff, + barcodefile=opts.barcode, + leafcolorfile=opts.leafcolorfile, + ) + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/r.py b/jcvi/apps/r.py new file mode 100644 index 00000000..c4b60f59 --- /dev/null +++ b/jcvi/apps/r.py @@ -0,0 +1,82 @@ +""" +uses R for statistics and graphics +""" + +import sys + +from string import Template + +from ..formats.base import must_open + +from .base import ActionDispatcher, OptionParser, cleanup, sh + + +class RTemplate(object): + """ + Creates a R script and runs it + """ + + def __init__(self, template, parameters): + + self.template = Template(template) + self.parameters = parameters + + def run(self, clean=True): + """ + Create a temporary file and run it + """ + template = self.template + parameters = self.parameters + # write to a temporary R script + fw = must_open("tmp", "w") + path = fw.name + + fw.write(template.safe_substitute(**parameters)) + fw.close() + + sh("Rscript %s" % path) + if clean: + cleanup(path) + # I have no idea why using ggsave, there is one extra image + # generated, but here I remove it + rplotspdf = "Rplots.pdf" + cleanup(rplotspdf) + + +def main(): + + actions = (("rdotplot", "dot plot based on lastz rdotplot output"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def rdotplot(args): + """ + %prog rdotplotfile + + Dot plot to visualize relationship between two sequences, by plotting + .rdotplot file (often generated by LASTZ) + """ + p = OptionParser(rdotplot.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + dotplot_template = """ + dots <- read.table('$rdotplotfile', header=T) + png('$pngfile') + plot(dots, type='l') + dev.off() + """ + + (rdotplotfile,) = args + assert rdotplotfile.endswith(".rdotplot") + pngfile = rdotplotfile.replace(".rdotplot", ".png") + + rtemplate = RTemplate(dotplot_template, locals()) + rtemplate.run() + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/restriction.py b/jcvi/apps/restriction.py new file mode 100644 index 00000000..4ecb4bf0 --- /dev/null +++ b/jcvi/apps/restriction.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Procedure to cut genome using restriction enzymes. +""" +import sys + +from Bio.Restriction.Restriction import AllEnzymes, Analysis + +from ..formats.base import must_open +from ..formats.fasta import Fasta, SeqRecord, SeqIO + +from .base import ActionDispatcher, OptionParser, logger + + +def main(): + + actions = ( + ("fragment", "extract upstream and downstream seq of particular RE"), + ("digest", "digest FASTA file to map restriction site positions"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def digest(args): + """ + %prog digest fastafile NspI,BfuCI + + Digest fasta sequences to map restriction site positions. + """ + p = OptionParser(digest.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, enzymes = args + enzymes = enzymes.split(",") + enzymes = [x for x in AllEnzymes if str(x) in enzymes] + f = Fasta(fastafile, lazy=True) + fw = must_open(opts.outfile, "w") + + header = ["Contig", "Length"] + [str(x) for x in enzymes] + print("\t".join(header), file=fw) + for name, rec in f.iteritems_ordered(): + row = [name, len(rec)] + for e in enzymes: + pos = e.search(rec.seq) + pos = "na" if not pos else "|".join(str(x) for x in pos) + row.append(pos) + print("\t".join(str(x) for x in row), file=fw) + + +def extract_full(rec, sites, flank, fw): + """ + Full extraction of seq flanking the sites. + """ + for s in sites: + newid = "{0}:{1}".format(rec.name, s) + left = max(s - flank, 0) + right = min(s + flank, len(rec)) + frag = rec.seq[left:right].strip("Nn") + newrec = SeqRecord(frag, id=newid, description="") + SeqIO.write([newrec], fw, "fasta") + + +def extract_ends(rec, sites, flank, fw, maxfragsize=800): + """ + Extraction of ends of fragments above certain size. + """ + nsites = len(sites) + size = len(rec) + for i, s in enumerate(sites): + newid = "{0}:{1}".format(rec.name, s) + recs = [] + + if i == 0 or s - sites[i - 1] <= maxfragsize: + newidL = newid + "L" + left = max(s - flank, 0) + right = s + frag = rec.seq[left:right].strip("Nn") + recL = SeqRecord(frag, id=newidL, description="") + if i == 0 and s > maxfragsize: # Contig L-end + pass + else: + recs.append(recL) + + if i == nsites - 1 or sites[i + 1] - s <= maxfragsize: + newidR = newid + "R" + left = s + right = min(s + flank, size) + frag = rec.seq[left:right].strip("Nn") + recR = SeqRecord(frag, id=newidR, description="") + if i == nsites - 1 and size - s > maxfragsize: # Contig R-end + pass + else: + recs.append(recR) + + SeqIO.write(recs, fw, "fasta") + + +def fragment(args): + """ + %prog fragment fastafile enzyme + + Cut the fastafile using the specified enzyme, and grab upstream and + downstream nucleotide sequence along with the cut site. In this case, the + sequences extracted are: + + |- PstI + ============|=========== + (-------) + + Sometimes we need to limit the size of the restriction fragments, for + example the GBS protocol does not allow fragments larger than 800bp. + + |-PstI |- PstI |- PstI + ~~~====|=============|==========~~~~~~~===|============ + (---) (---) + + In this case, the second fragment is longer than 800bp, therefore the two + ends are NOT extracted, as in the first fragment. + """ + p = OptionParser(fragment.__doc__) + p.add_argument( + "--flank", + default=150, + type=int, + help="Extract flanking bases of the cut sites", + ) + p.add_argument( + "--full", + default=False, + action="store_true", + help="The full extraction mode", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, enzyme = args + flank = opts.flank + assert flank > 0 + extract = extract_full if opts.full else extract_ends + tag = "full" if opts.full else "ends" + + assert enzyme in set(str(x) for x in AllEnzymes) + fragfastafile = fastafile.split(".")[0] + ".{0}.flank{1}.{2}.fasta".format( + enzyme, flank, tag + ) + enzyme = [x for x in AllEnzymes if str(x) == enzyme][0] + + f = Fasta(fastafile, lazy=True) + fw = open(fragfastafile, "w") + for name, rec in f.iteritems_ordered(): + a = Analysis([enzyme], rec.seq) + sites = a.full()[enzyme] + extract(rec, sites, flank, fw) + + logger.debug("Fragments written to `%s`.", fragfastafile) + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/softlink.py b/jcvi/apps/softlink.py new file mode 100644 index 00000000..ad055c0a --- /dev/null +++ b/jcvi/apps/softlink.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Procedure to touch and copy softlinks +""" +import os +import os.path as op +import sys + +from .base import ActionDispatcher, OptionParser, get_abs_path, logger + + +def main(): + + actions = ( + ("touch", "touch all the symlinks"), + ("cp", "cp all the symlinks to current folder"), + ("clean", "removes all the symlinks in current folder"), + ("size", "print the file sizes for the files pointed by symlinks"), + ("link", "link source to target based on a tabular file"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def lnsf(source, target, log=False): + # re-link the symlinks (similar to `ln -sf`) + if op.lexists(target): + os.unlink(target) + os.symlink(source, target) + if log: + logger.debug("{0} => {1}".format(source, target)) + + +def link(args): + """ + %prog link metafile + + Link source to target based on a tabular file. + """ + from jcvi.apps.base import mkdir + + p = OptionParser(link.__doc__) + p.add_argument("--dir", help="Place links in a subdirectory") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (meta,) = args + d = opts.dir + if d: + mkdir(d) + + fp = open(meta) + cwd = op.dirname(get_abs_path(meta)) + for row in fp: + source, target = row.split() + source = op.join(cwd, source) + if d: + target = op.join(d, target) + lnsf(source, target, log=True) + + +def touch(args): + """ + find . -type l | %prog touch + + Linux commands `touch` wouldn't modify mtime for links, this script can. + Use find to pipe in all the symlinks. + """ + p = OptionParser(touch.__doc__) + opts, args = p.parse_args(args) + fp = sys.stdin + + for link_name in fp: + link_name = link_name.strip() + if not op.islink(link_name): + continue + if not op.exists(link_name): + continue + + source = get_abs_path(link_name) + lnsf(source, link_name) + + +def clean(args): + """ + %prog clean + + Removes all symlinks from current folder + """ + p = OptionParser(clean.__doc__) + opts, args = p.parse_args(args) + + for link_name in os.listdir(os.getcwd()): + if not op.islink(link_name): + continue + logger.debug("remove symlink `{0}`".format(link_name)) + os.unlink(link_name) + + +def cp(args): + """ + find folder -type l | %prog cp + + Copy all the softlinks to the current folder, using absolute paths + """ + p = OptionParser(cp.__doc__) + fp = sys.stdin + + for link_name in fp: + link_name = link_name.strip() + if not op.exists(link_name): + continue + + source = get_abs_path(link_name) + link_name = op.basename(link_name) + if not op.exists(link_name): + os.symlink(source, link_name) + logger.debug(" => ".join((source, link_name))) + + +def size(args): + """ + find folder -type l | %prog size + + Get the size for all the paths that are pointed by the links + """ + from jcvi.utils.cbook import human_size + + p = OptionParser(size.__doc__) + fp = sys.stdin + + results = [] + for link_name in fp: + link_name = link_name.strip() + if not op.islink(link_name): + continue + + source = get_abs_path(link_name) + + link_name = op.basename(link_name) + filesize = op.getsize(source) + results.append((filesize, link_name)) + + # sort by descending file size + for filesize, link_name in sorted(results, reverse=True): + filesize = human_size(filesize, a_kilobyte_is_1024_bytes=True) + print("%10s\t%s" % (filesize, link_name), file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/uclust.py b/jcvi/apps/uclust.py new file mode 100644 index 00000000..1da53f33 --- /dev/null +++ b/jcvi/apps/uclust.py @@ -0,0 +1,1106 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Using VCLUST to derep, cluster, and make consensus from duplicate reads. +The VCLUST implementation borrows ideas and code from PyRAD. PyRAD link: + + +""" +import os.path as op +import sys + +from collections import defaultdict +from copy import deepcopy +from functools import partial +from itertools import groupby +from subprocess import Popen, PIPE, STDOUT +from tempfile import mkdtemp + +import numpy as np +import scipy +import scipy.stats +import scipy.optimize + +from more_itertools import grouper + +from ..formats.base import BaseFile, FileMerger, must_open, split +from ..formats.fasta import parse_fasta +from ..formats.fastq import fasta +from ..utils.orderedcollections import DefaultOrderedDict +from ..utils.table import write_csv + +from .base import ( + OptionParser, + ActionDispatcher, + cleanup, + datadir, + iglob, + listify, + logger, + mkdir, + need_update, + sh, +) + + +SEP = "//" +CONSTAG = ">CONSENS0" +BASES = "ACTGN_-" # CAUTION: DO NOT CHANGE THIS LINE +REAL = BASES[:4] +GAPS = BASES[-2:] +NBASES = len(BASES) +ACHEADER = """ +TAXON CHR POS REF_NT REF_ALLELE ALT_ALLELE REF_COUNT +ALT_COUNT OTHER_COUNT TOTAL_READS A G C T +READ_INS READ_DEL TOTAL_READS +""".split() +ACHEADER_NO_TAXON = ACHEADER[1:] + + +alleles = lambda x: (",".join(x).replace("-", "*") if x else "N") +getsize = lambda name: ( + 0 if ";" not in name else int(name.split(";")[1].replace("size=", "")) +) + + +class ClustFile(BaseFile): + def __init__(self, filename): + super().__init__(filename) + + def __iter__(self): + nstacks = 0 + fp = must_open(self.filename) + for tag, contents in groupby(fp, lambda row: row[0] == "/"): + if tag: + continue + data = Clust() + for name, seq in grouper(contents, 2): + name, seq = name.strip(), seq.strip() + nrep = getsize(name) + data.append((name, seq, nrep)) + yield data + nstacks += 1 + if nstacks % 10000 == 0: + logger.debug("{0} stacks parsed".format(nstacks)) + + +class Clust(list): + def __init__(self): + super().__init__(self) + + def __str__(self): + s = [] + for d in self: + s.append("\n".join(d[:2])) + return "\n".join(s) + "\n" + SEP + + +class ClustStore(BaseFile): + def __init__(self, consensfile): + super().__init__(consensfile) + binfile = consensfile + ".bin" + idxfile = consensfile + ".idx" + self.bin = np.fromfile(binfile, dtype=np.uint16) + assert self.bin.size % NBASES == 0 + + self.bin = self.bin.reshape((self.bin.size / NBASES, NBASES)) + self.index = {} + fp = open(idxfile) + for row in fp: + name, start, end = row.split() + start, end = int(start), int(end) + self.index[name.strip(">")] = (start, end) + + def __getitem__(self, name): + start, end = self.index[name] + return self.bin[start:end, :] + + +class AlleleCount(object): + """ + Each record represents a line in the .allele_count file + + Fields are: + # CHR POS REF_NT REF_ALLELE ALT_ALLELE REF_COUNT + # ALT_COUNT OTHER_COUNT TOTAL_READS A G C T + # READ_INS READ_DEL TOTAL_READS + """ + + def __init__(self, taxon, chr, pos, ref_allele, alt_allele, profile): + self.taxon = taxon + self.chr = chr + self.pos = pos + self.ref_nt = listify(ref_allele) + self.ref_allele = listify(ref_allele) + self.alt_allele = listify(alt_allele) + self.update(profile) + + def tostring(self, taxon=False): + ref_allele = alleles(self.ref_allele) + ar = [ + self.chr, + self.pos, + ref_allele, + ref_allele, + alleles(self.alt_allele), + self.ref_count, + self.alt_count, + self.other_count, + self.total_count, + self.A, + self.G, + self.C, + self.T, + self.read_ins, + self.read_del, + self.total_count, + ] + if taxon: + ar = [self.taxon] + ar + return "\t".join(str(x) for x in ar) + + def update(self, profile): + self.ref_count = sum(profile[BASES.index(x)] for x in self.ref_allele) + self.alt_count = sum(profile[BASES.index(x)] for x in self.alt_allele) + self.A, self.C, self.T, self.G, N, tgaps, gaps = profile + self.total_count = sum(profile) - tgaps + others = set(BASES) - set(self.ref_allele) - set(self.alt_allele) + self.other_count = sum(profile[BASES.index(x)] for x in others) - tgaps + self.read_ins = self.total_count if "-" in self.ref_allele else 0 + self.read_del = gaps + + def clear(self): + self.update([0] * NBASES) + + +class ClustStores(dict): + """ + ClustStores provides random access to any consensus read + """ + + def __init__(self, consensfiles): + super().__init__(self) + for cs in consensfiles: + name = op.basename(cs).split(".")[0] + self[name] = ClustStore(cs) + + +def main(): + + actions = ( + ("align", "align clustfile to clustSfile"), + ("estimateHE", "estimate heterozygosity and error rate for stacks"), + ("cluster", "cluster within samples"), + ("consensus", "call consensus bases within samples"), + ("mcluster", "cluster across samples"), + ("mconsensus", "call consensus bases across samples"), + ("stats", "generate table summarizing .stats files"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def stats(args): + """ + %prog stats folder + + Generate table summarizing .stats files. + """ + p = OptionParser(stats.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (folder,) = args + statsfiles = iglob(folder, "*.stats") + after_equal = lambda x: x.split("=")[-1] + header = "Library Assembled_reads Contigs".split() + contents = [] + # label=M0096 total=7443 cnts=948 mean=7.851 std=35.96 + for statsfile in statsfiles: + fp = open(statsfile) + for row in fp: + if row.startswith("label="): + break + label, total, cnts = row.split()[:3] + label = after_equal(label) + reads = int(after_equal(total)) + contigs = int(after_equal(cnts)) + contents.append((label, reads, contigs)) + + all_labels, all_reads, all_contigs = zip(*contents) + contents.append(("SUM", sum(all_reads), sum(all_contigs))) + contents.append( + ("AVERAGE (per sample)", int(np.mean(all_reads)), int(np.mean(all_contigs))) + ) + contents.append( + ("MEDIAN (per sample)", int(np.median(all_reads)), int(np.median(all_contigs))) + ) + write_csv(header, contents, filename=opts.outfile) + + +def add_consensus_options(p): + p.add_argument("--prefix", default="mcluster", help="Output prefix") + p.add_argument("--minlength", default=30, type=int, help="Min contig length") + p.add_argument("--mindepth", default=3, type=int, help="Min depth for each stack") + p.add_argument("--minsamp", default=3, type=int, help="Min number of samples") + + +def find_pctid(consensusfiles): + pctid = min( + [int(op.basename(x).split(".")[-2].replace("P", "")) for x in consensusfiles] + ) + logger.debug("Set pctid={0}".format(pctid)) + return pctid + + +def mcluster(args): + """ + %prog mcluster *.consensus + + Cluster across samples using consensus sequences. + """ + p = OptionParser(mcluster.__doc__) + add_consensus_options(p) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + consensusfiles = args + minlength = opts.minlength + cpus = opts.cpus + pf = opts.prefix + pctid = find_pctid(consensusfiles) + + pf += ".P{0}".format(pctid) + consensusfile = pf + ".consensus.fasta" + if need_update(consensusfiles, consensusfile): + fw_cons = must_open(consensusfile, "w") + totalseqs = 0 + for cf in consensusfiles: + nseqs = 0 + s = op.basename(cf).split(".")[0] + for name, seq in parse_fasta(cf): + name = ".".join((s, name)) + print(">{0}\n{1}".format(name, seq), file=fw_cons) + nseqs += 1 + logger.debug("Read `{0}`: {1} seqs".format(cf, nseqs)) + totalseqs += nseqs + logger.debug("Total: {0} seqs".format(totalseqs)) + fw_cons.close() + + userfile = pf + ".u" + notmatchedfile = pf + ".notmatched" + if need_update(consensusfile, userfile): + cluster_smallmem( + consensusfile, userfile, notmatchedfile, minlength, pctid, cpus + ) + + clustfile = pf + ".clust" + if need_update((consensusfile, userfile, notmatchedfile), clustfile): + makeclust(consensusfile, userfile, notmatchedfile, clustfile) + + clustSfile = pf + ".clustS" + if need_update(clustfile, clustSfile): + parallel_musclewrap(clustfile, cpus, minsamp=opts.minsamp) + + +def makeloci(clustSfile, store, prefix, minsamp=3, pctid=95): + C = ClustFile(clustSfile) + pf = clustSfile.rsplit(".", 1)[0] + locifile = pf + ".loci" + finalfastafile = pf + ".final.fasta" + fw = open(locifile, "w") + fw_finalfasta = open(finalfastafile, "w") + locid = 0 + AC = [] + diffratio = 1 - pctid / 100.0 + for data in C: + names, seqs, nreps = zip(*data) + # Strip off cut site + seqs = [x.upper() for x in seqs] + fname = "{0}_{1}".format(prefix, locid) + ntaxa = sum(1 for s, nrep in zip(seqs, nreps) if nrep) + + # Record variable sites + cons_name, cons_seq, cons_nrep = get_seed(data) + ncols = len(cons_seq) + snpsite = [" "] * ncols + seed_ungapped_pos = [] + ref_alleles = [] + alt_alleles = [] + ungapped_i = 0 + for i in range(ncols): + ref_allele = cons_seq[i] + ref_alleles.append(ref_allele) + seed_ungapped_pos.append(ungapped_i) + if ref_allele in GAPS: # Skip if reference is a deletion + alt_alleles.append([]) + continue + else: + ungapped_i += 1 + + site = [s[i] for s, nrep in zip(seqs, nreps) if nrep] # Column slice in MSA + reals = [x for x in site if x in REAL] + + realcounts = sorted([(reals.count(x), x) for x in REAL], reverse=True) + nreals = sum(x[0] for x in realcounts) + refcount = realcounts[0][0] + altcount = realcounts[1][0] + # Select SNP column + if ( + altcount >= minsamp + and nreals >= ntaxa / 2 + and (refcount + altcount) >= nreals * 0.9 + ): + snpsite[i] = "*" + if snpsite.count("*") > ncols * diffratio: + snpsite = [" "] * ncols + nonzeros = [x for c, x in realcounts if (c and x != ref_allele)] + alt_alleles.append(nonzeros[:1]) # Keep only two alleles + + assert len(seed_ungapped_pos) == ncols + assert len(ref_alleles) == ncols + assert len(alt_alleles) == ncols + cons_seq = cons_seq.strip("_N").replace("-", "") + + for name, seq in zip(names, seqs): + name = name.strip(">") + if "." not in name: # CONSENS0 + continue + taxon, readname = name.split(".", 1) + profile = store[taxon][readname] + assert len(seq) == ncols + + ungapped_i = 0 + gap_p = [0, 0, 0, 0, 0, 0, sum(profile[0])] + for pos, ref_allele, alt_allele, r, ispoly in zip( + seed_ungapped_pos, ref_alleles, alt_alleles, seq, snpsite + ): + if r in GAPS: # insertion in ref, deletion in read + p = gap_p + else: + p = profile[ungapped_i] + ungapped_i += 1 + + if ispoly != "*": + continue + + assert cons_seq[pos] == ref_allele # Sanity check + ac = AlleleCount( + taxon, + fname, + pos + 1, # 1-based coordinate + ref_allele, + alt_allele, + p, + ) + AC.append(ac) + + longname = max(len(x) for x in names) + longname = max(len(fname) + 3, longname) + 1 + print("// {0}".format(fname).ljust(longname) + "".join(snpsite) + "|", file=fw) + for name, seq, nrep in data: + print(name.ljust(longname) + seq, file=fw) + + print( + ">{0} with {1} sequences\n{2}".format(fname, sum(nreps), cons_seq), + file=fw_finalfasta, + ) + locid += 1 + + logger.debug("Stacks written to `{0}`".format(locifile)) + logger.debug( + "Final consensus sequences written to `{0}` (n={1})".format( + finalfastafile, locid + ) + ) + fw.close() + fw_finalfasta.close() + + return AC + + +def mconsensus(args): + """ + %prog mconsensus *.consensus + + Call consensus along the stacks from cross-sample clustering. + """ + p = OptionParser(mconsensus.__doc__) + p.add_argument( + "--allele_counts", + default="allele_counts", + help="Directory to generate allele counts", + ) + add_consensus_options(p) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + consensusfiles = args + prefix = opts.prefix + acdir = opts.allele_counts + store = ClustStores(consensusfiles) + pctid = find_pctid(consensusfiles) + pf = prefix + ".P{0}".format(pctid) + + clustSfile = pf + ".clustS" + AC = makeloci(clustSfile, store, prefix, minsamp=opts.minsamp, pctid=pctid) + + mkdir(acdir) + acfile = pf + ".allele_counts" + fw = open(acfile, "w") + seen = DefaultOrderedDict(list) # chr, pos => taxa + print("# " + "\t".join(ACHEADER), file=fw) + # Sort allele counts into separate files + for ac in AC: + chrpos = ac.chr, ac.pos + seen[chrpos].append(ac) + print(ac.tostring(taxon=True), file=fw) + fw.close() + + logger.debug("Populate all taxa and instantiate empty vector if missing") + all_taxa = set([op.basename(x).split(".")[0] for x in consensusfiles]) + taxon_to_ac = defaultdict(list) + for chrpos, aclist in seen.items(): + included_taxa = set([x.taxon for x in aclist]) + missing_taxa = all_taxa - included_taxa + template = deepcopy(aclist[0]) + template.clear() + for ac in aclist: + taxon_to_ac[ac.taxon].append(ac) + for tx in missing_taxa: + taxon_to_ac[tx].append(template) + + logger.debug("Write allele counts for all taxa") + for tx, aclist in sorted(taxon_to_ac.items()): + tx_acfile = op.join(acdir, tx + ".allele_counts") + fw = open(tx_acfile, "w") + print("# " + "\t".join(ACHEADER_NO_TAXON), file=fw) + for ac in aclist: + print(ac.tostring(), file=fw) + fw.close() + logger.debug("Written {0} sites in `{1}`".format(len(aclist), tx_acfile)) + + +def get_seed(data): + if len(data) == 1: + return data[0] + + for name, seq, nrep in data[::-1]: + if name == CONSTAG: + break + return name, seq, nrep + + +def compute_consensus(fname, cons_seq, RAD, S, totalsize, mindepth=3, verbose=False): + # Strip N's from either end and gaps + gaps = set() + fixed = set() + assert len(cons_seq) == len(RAD) + + # Correct consensus by converting to top voting bases + shortcon = "" + for i, (base, site) in enumerate(zip(cons_seq, RAD)): + good = site[:4] + [site[-1]] + # Handles terminal regions delete columns if consensus is a terminal gap, + # or bases plus 'internal' gaps not covering half of the total abundance + if base == "_" or sum(good) < max(mindepth, totalsize / 2): + gaps.add(i) + continue + # Check count for original base for possible ties + n0 = site[BASES.index(base)] + n1 = max(good) # Base with highest count + if n1 > n0: + base = BASES[site.index(n1)] + fixed.add(i) + if base in GAPS: + gaps.add(i) + continue + shortcon += base + + shortRAD = [j for (i, j) in enumerate(RAD) if i not in gaps] + assert len(shortcon) == len(shortRAD) + + if verbose: + print(fname) + print("\n".join(["{0} {1}".format(*x) for x in S])) + display = "" + basecounts = [""] * NBASES + for i, (b, p) in enumerate(zip(cons_seq, RAD)): + display += ("+" if i in fixed else b) if i not in gaps else " " + for j, k in enumerate(p): + basecounts[j] += (str(k) if k < 10 else "#") if k else "." + print("=" * len(cons_seq)) + print(cons_seq) + print(display) + print("=" * len(cons_seq)) + for j, k in enumerate(basecounts): + if BASES[j] == "N": + continue + print("".join(k)) + print("=" * len(cons_seq)) + + return shortcon, shortRAD + + +def consensus(args): + """ + %prog consensus clustSfile + + Call consensus along the stacks. Tabulate bases at each site, tests for + errors according to error rate, calls consensus. + """ + p = OptionParser(consensus.__doc__) + p.add_argument( + "--ploidy", default=2, type=int, help="Number of haplotypes per locus" + ) + add_consensus_options(p) + p.set_verbose() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (clustSfile,) = args + pf = clustSfile.rsplit(".", 1)[0] + mindepth = opts.mindepth + minlength = opts.minlength + verbose = opts.verbose + + C = ClustFile(clustSfile) + output = [] + bins = [] + indices = [] + start = end = 0 # Index into base count array + for data in C: + names, seqs, nreps = zip(*data) + total_nreps = sum(nreps) + # Depth filter + if total_nreps < mindepth: + continue + + first_name, first_seq, first_nrep = data[0] + fname = first_name.split(";")[0] + ";size={0};".format(total_nreps) + cons_name, cons_seq, cons_nrep = get_seed(data) + if len(data) > 1 and cons_name != CONSTAG: + logger.debug("Tag {0} not found in cluster {1}".format(CONSTAG, cons_name)) + + # List for sequence data + S = [(seq, nrep) for name, seq, nrep in data if nrep] + # Pileups for base counting + RAD = stack(S) + + if len(data) == 1: # No computation needed + output.append((fname, seq)) + bins.extend(RAD) + start = end + end += len(seq) + indices.append((fname, start, end)) + continue + + shortcon, shortRAD = compute_consensus( + fname, cons_seq, RAD, S, total_nreps, mindepth=mindepth, verbose=verbose + ) + if len(shortcon) < minlength: + shortcon, shortRAD = compute_consensus( + fname, + first_seq, + RAD, + S, + total_nreps, + mindepth=mindepth, + verbose=verbose, + ) + + if len(shortcon) < minlength: # Stop trying + continue + + output.append((fname, shortcon)) + bins.extend(shortRAD) + + start = end + end += len(shortcon) + indices.append((fname, start, end)) + + consensfile = pf + ".consensus" + consens = open(consensfile, "w") + for k, v in output: + print("\n".join((k, v)), file=consens) + consens.close() + logger.debug("Consensus sequences written to `{0}`".format(consensfile)) + + binfile = consensfile + ".bin" + bins = np.array(bins, dtype=np.uint32) + ulimit = 65535 + bins[bins > ulimit] = ulimit + bins = np.array(bins, dtype=np.uint16) # Compact size + bins.tofile(binfile) + logger.debug("Allele counts written to `{0}`".format(binfile)) + + idxfile = consensfile + ".idx" + fw = open(idxfile, "w") + for fname, start, end in indices: + print("\t".join(str(x) for x in (fname, start, end)), file=fw) + fw.close() + logger.debug("Serializing indices to `{0}`".format(idxfile)) + + return consensfile, binfile, idxfile + + +def stack(S): + """ + From list of bases at a site D, make counts of bases + """ + S, nreps = zip(*S) + S = np.array([list(x) for x in S]) + rows, cols = S.shape + counts = [] + for c in range(cols): + freq = [0] * NBASES + for b, nrep in zip(S[:, c], nreps): + freq[BASES.index(b)] += nrep + counts.append(freq) + return counts + + +def get_left_right(seq): + """ + Find position of the first and last base + """ + cseq = seq.strip(GAPS) + leftjust = seq.index(cseq[0]) + rightjust = seq.rindex(cseq[-1]) + + return leftjust, rightjust + + +def cons(f, mindepth): + """ + Makes a list of lists of reads at each site + """ + C = ClustFile(f) + for data in C: + names, seqs, nreps = zip(*data) + total_nreps = sum(nreps) + # Depth filter + if total_nreps < mindepth: + continue + + S = [] + for name, seq, nrep in data: + # Append sequence * number of dereps + S.append([seq, nrep]) + + # Make list for each site in sequences + res = stack(S) + yield [x[:4] for x in res if sum(x[:4]) >= mindepth] + + +def makeP(N): + # Make list of freq. for BASES + sump = float(sum([sum(i) for i in N])) + if sump: + p1 = sum([i[0] for i in N]) / sump + p2 = sum([i[1] for i in N]) / sump + p3 = sum([i[2] for i in N]) / sump + p4 = sum([i[3] for i in N]) / sump + else: + p1 = p2 = p3 = p4 = 0.0 + return [p1, p2, p3, p4] + + +def makeC(N): + """ + Makes a dictionary with counts of base counts [x,x,x,x]:x, + speeds up Likelihood calculation + """ + C = defaultdict(int) + for d in N: + C[tuple(d)] += 1 + + return [i for i in C.items() if (0, 0, 0, 0) not in i] + + +def L1(E, P, N): + # Probability of homozygous + h = [] + s = sum(N) + for i, l in enumerate(N): + p = P[i] + b = scipy.stats.binom.pmf(s - l, s, E) + h.append(p * b) + return sum(h) + + +def L2(E, P, N): + # Probability of heterozygous + h = [] + s = sum(N) + for l, i in enumerate(N): + for j, k in enumerate(N): + if j > l: + one = 2.0 * P[l] * P[j] + two = scipy.stats.binom.pmf(s - i - k, s, (2.0 * E) / 3.0) + three = scipy.stats.binom.pmf(i, k + i, 0.5) + four = 1.0 - (sum([q**2.0 for q in P])) + h.append(one * two * (three / four)) + return sum(h) + + +def totlik(E, P, H, N): + # Total probability + lik = ((1 - H) * L1(E, P, N)) + (H * L2(E, P, N)) + return lik + + +def LL(x0, P, C): + # Log likelihood score given values [H, E] + H, E = x0 + L = [] + if H <= 0.0 or E <= 0.0: + r = np.exp(100) + else: + for i in C: + ll = totlik(E, P, H, i[0]) + if ll > 0: + L.append(i[1] * np.log(ll)) + r = -sum(L) + return r + + +def estimateHE(args): + """ + %prog estimateHE clustSfile + + Estimate heterozygosity (H) and error rate (E). Idea borrowed heavily from + the PyRad paper. + """ + p = OptionParser(estimateHE.__doc__) + add_consensus_options(p) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (clustSfile,) = args + HEfile = clustSfile.rsplit(".", 1)[0] + ".HE" + if not need_update(clustSfile, HEfile, warn=True): + return HEfile + + D = [] + for d in cons(clustSfile, opts.mindepth): + D.extend(d) + + logger.debug("Computing base frequencies ...") + P = makeP(D) + C = makeC(D) + logger.debug("Solving log-likelihood function ...") + x0 = [0.01, 0.001] # initital values + H, E = scipy.optimize.fmin(LL, x0, args=(P, C)) + + fw = must_open(HEfile, "w") + print(H, E, file=fw) + fw.close() + + return HEfile + + +def alignfast(names, seqs): + """ + Performs MUSCLE alignments on cluster and returns output as string + """ + matfile = op.join(datadir, "blosum80.mat") + cmd = "poa -read_fasta - -pir stdout {0} -tolower -silent -hb -fuse_all".format( + matfile + ) + p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) + s = "" + for i, j in zip(names, seqs): + s += "\n".join((i, j)) + "\n" + return p.communicate(s)[0] + + +def replace_terminal(seq): + leftjust, rightjust = get_left_right(seq) + seq = ( + "_" * leftjust + + seq[leftjust : rightjust + 1] + + "_" * (len(seq) - rightjust - 1) + ) + return seq + + +def sortalign(stringnames): + G = stringnames.split("\n>") + aligned = [ + ( + ">" + i.split("\n")[0].strip(">"), + replace_terminal("".join(i.split("\n")[1:]).upper()), + ) + for i in G + ] + return aligned + + +def parallel_musclewrap(clustfile, cpus, minsamp=0): + musclewrap_minsamp = partial(musclewrap, minsamp=minsamp) + if cpus == 1: + return musclewrap_minsamp(clustfile) + + from jcvi.apps.grid import Jobs + + outdir = mkdtemp(dir=".") + fs = split([clustfile, outdir, str(cpus), "--format=clust"]) + g = Jobs(musclewrap_minsamp, fs.names) + g.run() + + clustnames = [x.replace(".clust", ".clustS") for x in fs.names] + clustSfile = clustfile.replace(".clust", ".clustS") + FileMerger(clustnames, outfile=clustSfile).merge() + cleanup(outdir) + + +def filter_samples(names, seqs, sep="."): + """ + When there are uncollapsed contigs within the same sample, only retain the + first seq, or the seq that is most abundant (with cluster_size). + """ + seen = set() + filtered_names, filtered_seqs = [], [] + for name, seq in zip(names, seqs): + samp = name.split(sep, 1)[0] + if samp in seen: + continue + seen.add(samp) + filtered_names.append(name) + filtered_seqs.append(seq) + + nfiltered, nnames = len(filtered_names), len(names) + assert nfiltered == len(seen) + + return filtered_names, filtered_seqs, seen + + +def musclewrap(clustfile, minsamp=0): + cnts = 0 + C = ClustFile(clustfile) + clustSfile = clustfile.replace(".clust", ".clustS") + fw = open(clustSfile, "w") + for data in C: + STACK = Clust() + names = [] + seqs = [] + names, seqs, nreps = zip(*data) + if minsamp: # Filter based on samples, applicable in mcluster() + names, seqs, samples = filter_samples(names, seqs) + if len(samples) < minsamp: + continue + else: + names, seqs = names[:256], seqs[:256] # Reduce high coverage data + + if len(names) == 1: + STACK.append((names[0], seqs[0])) + else: + stringnames = alignfast(names, seqs) + aligned = sortalign(stringnames) + # Reorder keys by derep number + D1 = [(getsize(name), name, seq) for name, seq in aligned] + D1.sort(key=lambda x: (-x[0], x[1])) + for size, name, seq in D1: + STACK.append((name, seq)) + + if STACK: + print(STACK, file=fw) + cnts += 1 + + fw.close() + + +def makestats(clustSfile, statsfile, mindepth): + C = ClustFile(clustSfile) + depth = [] + for data in C: + d = 0 + for name, seq, nrep in data: + d += nrep + depth.append(d) + namecheck = op.basename(clustSfile).split(".")[0] + if depth: + me = round(np.mean(depth), 3) + std = round(np.std(depth), 3) + else: + me = std = 0.0 + out = dict(label=namecheck, total=sum(depth), cnts=len(depth), mean=me, std=std) + header = "label total cnts mean std".split() + + bins = [0, 5, 10, 15, 20, 25, 30, 35, 40, 50, 100, 250, 500, 99999] + ohist, edges = np.histogram(depth, bins) + hist = [float(i) / sum(ohist) for i in ohist] + hist = [int(round(i * 30)) for i in hist] + + logger.debug("Sample {0} finished, {1} loci".format(clustSfile, len(depth))) + + fw = open(statsfile, "w") + print("# Params: mindepth={0}".format(mindepth), file=fw) + print(" ".join("{0}={1}".format(k, out[k]) for k in header), file=fw) + print("\nbins\tdepth_histogram\tcnts", file=fw) + print(" :\t0------------50-------------100%", file=fw) + + for i, j, k in zip(edges, hist, ohist): + firststar = " " + if k > 0: + firststar = "*" + print(i, "\t", firststar + "*" * j + " " * (34 - j), k, file=fw) + fw.close() + + +def makeclust(derepfile, userfile, notmatchedfile, clustfile, mindepth=3): + D = dict(parse_fasta(derepfile)) + U = defaultdict(list) # Clusters + fp = open(userfile) + for row in fp: + query, target, id, qcov, tcov = row.rstrip().split("\t") + U[target].append((query, getsize(query), float(id) * float(qcov) * float(tcov))) + + fw = open(clustfile, "w") + for key, members in U.items(): + keysize = getsize(key) + members.sort(key=lambda x: (-x[1], -x[2])) + totalsize = keysize + sum(x[1] for x in members) + if totalsize < mindepth: + continue + + # Recruit cluster members + seqs = [(">" + key, D[key])] + for name, size, id in members: + seqs.append((">" + name, D[name])) + + seq = "\n".join("\n".join(x) for x in seqs) + print("\n".join((seq, SEP)), file=fw) + + I = dict(parse_fasta(notmatchedfile)) + singletons = set(I.keys()) - set(U.keys()) + for key in singletons: + if getsize(key) < mindepth: + continue + print("\n".join((">" + key, I[key], SEP)), file=fw) + fw.close() + + +def derep(fastafile, derepfile, minlength, cpus, usearch="vsearch"): + cmd = usearch + " -minseqlength {0}".format(minlength) + cmd += " -derep_fulllength {0}".format(fastafile) + cmd += " -output {0} -sizeout".format(derepfile) + cmd += " -threads {0}".format(cpus) + sh(cmd) + + +def cluster_smallmem( + derepfile, + userfile, + notmatchedfile, + minlength, + pctid, + cpus, + cov=0.8, + usearch="vsearch", +): + identity = pctid / 100.0 + cmd = usearch + " -minseqlength {0}".format(minlength) + cmd += " -cluster_size {0}".format(derepfile) + cmd += " -id {0}".format(identity) + cmd += " -mincols {0}".format(minlength) + cmd += " -query_cov {0}".format(cov) + cmd += " -target_cov {0}".format(cov) + cmd += " -userout {0}".format(userfile) + cmd += " -userfields query+target+id+qcov+tcov" + cmd += " -maxaccepts 1 -maxrejects 16" # Decrease maxrejects for speed + cmd += " -usersort -sizein" + cmd += " -notmatched {0}".format(notmatchedfile) + cmd += " -threads {0}".format(cpus) + sh(cmd) + + +def cluster(args): + """ + %prog cluster prefix fastqfiles + + Use `vsearch` to remove duplicate reads. This routine is heavily influenced + by PyRAD: . + """ + p = OptionParser(cluster.__doc__) + add_consensus_options(p) + p.set_align(pctid=95) + p.set_outdir() + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + prefix = args[0] + fastqfiles = args[1:] + cpus = opts.cpus + pctid = opts.pctid + mindepth = opts.mindepth + minlength = opts.minlength + fastafile, qualfile = fasta( + fastqfiles + + [ + "--seqtk", + "--outdir={0}".format(opts.outdir), + "--outfile={0}".format(prefix + ".fasta"), + ] + ) + + prefix = op.join(opts.outdir, prefix) + pf = prefix + ".P{0}".format(pctid) + derepfile = prefix + ".derep" + if need_update(fastafile, derepfile): + derep(fastafile, derepfile, minlength, cpus) + + userfile = pf + ".u" + notmatchedfile = pf + ".notmatched" + if need_update(derepfile, userfile): + cluster_smallmem(derepfile, userfile, notmatchedfile, minlength, pctid, cpus) + + clustfile = pf + ".clust" + if need_update((derepfile, userfile, notmatchedfile), clustfile): + makeclust(derepfile, userfile, notmatchedfile, clustfile, mindepth=mindepth) + + clustSfile = pf + ".clustS" + if need_update(clustfile, clustSfile): + parallel_musclewrap(clustfile, cpus) + + statsfile = pf + ".stats" + if need_update(clustSfile, statsfile): + makestats(clustSfile, statsfile, mindepth=mindepth) + + +def align(args): + """ + %prog align clustfile + + Align clustfile to clustSfile. Useful for benchmarking aligners. + """ + p = OptionParser(align.__doc__) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (clustfile,) = args + parallel_musclewrap(clustfile, opts.cpus) + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/uniprot.py b/jcvi/apps/uniprot.py new file mode 100644 index 00000000..c1ea5668 --- /dev/null +++ b/jcvi/apps/uniprot.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Programatically accessing UniprotKB to get data from a list of queries +""" +import os.path as op +import sys +import time + +from urllib.parse import urlencode +from urllib.request import Request, urlopen +from urllib.error import HTTPError, URLError + +from ..formats.base import must_open + +from .base import ActionDispatcher, OptionParser, logger + + +uniprot_url = "http://www.uniprot.org/uniprot/" + +valid_formats = [ + "html", + "tab", + "xls", + "fasta", + "gff", + "txt", + "xml", + "rdf", + "list", + "rss", +] +valid_columns = [ + "citation", + "clusters", + "comments", + "database", + "domains", + "domain", + "ec", + "id", + "entry name", + "existence", + "families", + "features", + "genes", + "go", + "go-id", + "interpro", + "interactor", + "keywords", + "keyword-id", + "last-modified", + "length", + "organism", + "organism-id", + "pathway", + "protein names", + "reviewed", + "score", + "sequence", + "3d", + "subcellular locations", + "taxon", + "tools", + "version", + "virus hosts", +] + +valid_column_formats = ["tab", "xls"] +valid_include_formats = ["fasta", "rdf"] + + +def main(): + + actions = (("fetch", "fetch records from uniprot. input is a list of query terms"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def fetch(args): + """ + %prog fetch "query" + OR + %prog fetch queries.txt + + Please provide a UniProt compatible `query` to retrieve data. If `query` contains + spaces, please remember to "quote" it. + + You can also specify a `filename` which contains queries, one per line. + + Follow this syntax + to query any of the documented fields + """ + import re + import csv + + p = OptionParser(fetch.__doc__) + + p.add_argument( + "--format", + default="tab", + choices=valid_formats, + help="download format", + ) + p.add_argument( + "--columns", + default="entry name, protein names, genes,organism", + help="columns to download, if --format is `tab` or `xls`", + ) + p.add_argument( + "--include", + default=False, + action="store_true", + help="Include isoforms when --format is `fasta` or include `description` when --format is `rdf`.", + ) + p.add_argument( + "--limit", + default=10, + type=int, + help="Max number of results to retrieve", + ) + p.add_argument( + "--offset", + default=0, + type=int, + help="Offset of first result, used with --limit", + ) + p.add_argument( + "--skipcheck", + default=False, + action="store_true", + help="Turn off prompt to check file existence", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (query,) = args + url_params = {} + if op.exists(query): + pf = query.rsplit(".", 1)[0] + list_of_queries = [row.strip() for row in open(query)] + else: + # the query is the search term + pf = query.strip().strip('"') + list_of_queries = [pf] + pf = re.sub(r"\s+", "_", pf) + + assert len(list_of_queries) > 0, "Please provide atleast one input query" + + url_params["format"] = opts.format + + if opts.columns and opts.format in valid_column_formats: + reader = csv.reader([opts.columns], skipinitialspace=True) + cols = [col for r in reader for col in r] + for col in cols: + assert ( + col in valid_columns + ), "Column '{0}' is not a valid. Allowed options are {1}".format( + col, valid_columns + ) + url_params["columns"] = ",".join(cols) + + if opts.include and opts.format in valid_include_formats: + url_params["include"] = "yes" + + url_params["limit"] = opts.limit + url_params["offset"] = opts.offset + + outfile = "{0}.{1}".format(pf, opts.format) + + # If noprompt, will not check file existence + fw = must_open(outfile, "w", checkexists=True, skipcheck=opts.skipcheck) + if fw is None: + return + + seen = set() + for query in list_of_queries: + if query in seen: + logger.error("Duplicate query ({0}) found".format(query)) + continue + + url_params["query"] = query + + data = urlencode(url_params) + try: + request = Request(uniprot_url, data) + response = urlopen(request) + except (HTTPError, URLError, RuntimeError, KeyError) as e: + logger.error(e) + logger.debug("wait 5 seconds to reconnect...") + time.sleep(5) + + page = response.read() + if not page: + logger.error("query `{0}` yielded no results".format(query)) + continue + + print(page, file=fw) + + seen.add(query) + + if seen: + print( + "A total of {0} out of {1} queries returned results.".format( + len(seen), len(list_of_queries) + ), + file=sys.stderr, + ) + + +if __name__ == "__main__": + main() diff --git a/jcvi/apps/vecscreen.py b/jcvi/apps/vecscreen.py new file mode 100644 index 00000000..a1c97b62 --- /dev/null +++ b/jcvi/apps/vecscreen.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Run through NCBI vecscreen on a local machine. +""" +import os.path as op +import sys + +from ..formats.base import must_open +from ..formats.blast import BlastLine +from ..formats.fasta import tidy +from ..utils.range import range_merge + +from .align import run_vecscreen, run_megablast +from .base import ActionDispatcher, OptionParser, download, sh + +ECOLI_URL = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/019/425/GCF_000019425.1_ASM1942v1/GCF_000019425.1_ASM1942v1_genomic.fna.gz" +UNIVEC_URL = "ftp://ftp.ncbi.nih.gov/pub/UniVec/UniVec_Core" + + +def main(): + + actions = (("mask", "mask the contaminants"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def is_internet_file(url): + """Return if url starts with http://, https://, or ftp://. + + Args: + url (str): URL of the link + """ + return ( + url.startswith("http://") + or url.startswith("https://") + or url.startswith("ftp://") + ) + + +def mask(args): + """ + %prog mask fastafile + + Mask the contaminants. By default, this will compare against UniVec_Core and + Ecoli.fasta. Merge the contaminant results, and use `maskFastaFromBed`. Can + perform FASTA tidy if requested. + """ + p = OptionParser(mask.__doc__) + p.add_argument( + "--db", + default=ECOLI_URL, + help="Contaminant db other than Ecoli K12, will download if file starts with http://, https://, or ftp://", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + db = opts.db + assert op.exists(fastafile) + + outfastafile = fastafile.rsplit(".", 1)[0] + ".masked.fasta" + vecbedfile = blast([fastafile]) + ecolifile = ( + download(db, filename="Ecoli.fasta", handle_gzip=True) + if is_internet_file(db) + else db + ) + assert op.exists(ecolifile) + ecolibedfile = blast([fastafile, "--db={0}".format(ecolifile)]) + + cmd = "cat {0} {1}".format(vecbedfile, ecolibedfile) + cmd += " | sort -k1,1 -k2,2n" + cmd += " | mergeBed -c 4 -o distinct -d 100 -i stdin" + cmd += " | maskFastaFromBed -fi {0} -bed stdin -fo {1}".format( + fastafile, outfastafile + ) + sh(cmd) + + return tidy([outfastafile]) + + +def blast(args): + """ + %prog blast fastafile + + Run BLASTN against database (default is UniVec_Core). Output .bed format + on the vector/contaminant ranges. + """ + p = OptionParser(blast.__doc__) + p.add_argument( + "--dist", + default=100, + type=int, + help="Merge adjacent HSPs separated by", + ) + p.add_argument("--db", help="Use a different database rather than UniVec_Core") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + fastaprefix = fastafile.split(".", 1)[0] + + univec = opts.db or download(UNIVEC_URL) + uniprefix = univec.split(".", 1)[0] + + fastablast = fastaprefix + ".{0}.blast".format(uniprefix) + + prog = run_megablast if opts.db else run_vecscreen + prog(infile=fastafile, outfile=fastablast, db=univec, pctid=95, hitlen=50) + + fp = open(fastablast) + ranges = [] + for row in fp: + b = BlastLine(row) + ranges.append((b.query, b.qstart, b.qstop)) + + merged_ranges = range_merge(ranges, dist=opts.dist) + bedfile = fastaprefix + ".{0}.bed".format(uniprefix) + fw = must_open(bedfile, "w") + for seqid, start, end in merged_ranges: + print("\t".join(str(x) for x in (seqid, start - 1, end, uniprefix)), file=fw) + + return bedfile + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/__init__.py b/jcvi/assembly/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/assembly/__main__.py b/jcvi/assembly/__main__.py new file mode 100644 index 00000000..e71fb0f9 --- /dev/null +++ b/jcvi/assembly/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Assemblage of genome-assembly related scripts: ALLMAPS algorithm, scaffolding, k-mer analysis, QC, tool wrappers, etc. +""" + +from ..apps.base import dmain + + +if __name__ == "__main__": + dmain(__file__) diff --git a/jcvi/assembly/allmaps.py b/jcvi/assembly/allmaps.py new file mode 100644 index 00000000..575554ce --- /dev/null +++ b/jcvi/assembly/allmaps.py @@ -0,0 +1,2018 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Scaffold Ordering with Weighted Maps. +""" +import os.path as op +import os +import sys + +from collections import Counter, defaultdict +from functools import partial +from itertools import combinations, product +from typing import Optional + +import numpy as np +import networkx as nx + +from cmmodule.utils import read_chain_file +from cmmodule.mapbed import crossmap_bed_file +from more_itertools import pairwise + +from ..algorithms.ec import GA_setup, GA_run +from ..algorithms.formula import reject_outliers, spearmanr +from ..algorithms.lis import ( + longest_monotonic_subseq_length_loose as lms, + longest_monotonic_subsequence_loose as lmseq, +) +from ..algorithms.matrix import determine_signs +from ..apps.base import ( + ActionDispatcher, + OptionParser, + SUPPRESS, + cleanup, + flatten, + get_today, + logger, + mkdir, + need_update, + sh, + version, +) +from ..formats.agp import AGP, order_to_agp, build as agp_build, reindex +from ..formats.base import DictFile, FileMerger, must_open, read_block +from ..formats.bed import Bed, BedLine, natsorted, sort +from ..formats.chain import fromagp +from ..formats.sizes import Sizes +from ..graphics.landscape import draw_gauge +from ..utils.cbook import human_size, percentage +from ..utils.grouper import Grouper +from ..utils.table import tabulate + + +START, END = "START", "END" +distance_choices = ("cM", "rank") +linkage_choices = ("single", "double", "complete", "average", "median") +np.seterr(invalid="ignore") + + +class Scaffold(object): + def __init__(self, seqid, mapc): + self.markers = mapc.extract(seqid) + self.seqid = seqid + self.mapc = mapc + + @property + def mlg_counts(self): + return Counter([x.mlg for x in self.markers]) + + def add_LG_pairs(self, G, mappair): + # Computes co-occurrences of LG pairs + cc = self.mlg_counts.items() + mappair = sorted(mappair) + for (ak, av), (bk, bv) in combinations(cc, 2): + aks, bks = ak.split("-")[0], bk.split("-")[0] + if sorted((aks, bks)) != mappair: + continue + weight = min(av, bv) + G[ak, bk] += weight + G[bk, ak] += weight + + +class LinkageGroup(object): + def __init__(self, lg, length, markers, function=(lambda x: x.rank), linkage=min): + self.lg = lg + self.length = length + self.markers = markers + self.function = f = function + self.linkage = linkage + + self.mapname = lg.split("-")[0] + self.series = {} + self.nmarkers = {} + self.oo = {} + self.position = {} + self.guide = {} + for k, v in markers.items(): # keyed by scaffold ids + self.series[k] = xs = [f(x) for x in v] + self.nmarkers[k] = len(v) + physical_to_cm = [(x.pos, f(x)) for x in v] + self.oo[k] = get_rho(physical_to_cm) + self.position[k] = np.median(xs) + self.guide[k] = np.median([x.cm for x in v]) + + path = sorted((v, self.guide[k], k) for k, v in self.position.items()) + vv, gg, path = zip(*path) + self.path = path + self.rho = 0 + + def populate_pairwise_distance(self): + distances = {} + series = self.series + linkage = self.linkage + for a, b in combinations(self.path, 2): + d = linkage_distance(series[a], series[b], linkage=linkage) + distances[a, b] = distances[b, a] = d + + for p in self.path: + adist = linkage_distance([0], series[p], linkage=linkage) + bdist = linkage_distance(series[p], [self.length], linkage=linkage) + if self.rho < 0: + adist, bdist = bdist, adist + distances[START, p] = distances[p, START] = adist + distances[END, p] = distances[p, END] = bdist + + self.distances = distances + + return distances + + +class ScaffoldOO(object): + """ + This contains the routine to construct order and orientation for the + scaffolds per partition. + """ + + def __init__( + self, + lgs, + scaffolds, + mapc, + pivot, + weights, + sizes, + function=(lambda x: x.rank), + linkage=min, + fwtour=None, + ngen=500, + npop=100, + cpus=8, + seed=666, + ): + + self.lgs = lgs + self.lengths = mapc.lengths + self.bins = mapc.bins + self.sizes = sizes + self.scaffolds = scaffolds + self.pivot = pivot + self.weights = weights + self.function = function + self.linkage = linkage + + self.prepare_linkage_groups() # populate all data + for mlg in self.lgs: + mapname, lg = mlg.rsplit("-", 1) + if mapname == pivot: + self.object = "chr{0}".format(lg) + break + + tag = "|".join(lgs) + tour = zip(scaffolds, len(scaffolds) * [1]) + print_tour(fwtour, self.object, tag, "INIT", tour, recode=True) + signs = self.assign_orientation() + assert len(signs) == len(scaffolds) + tour = list(zip(scaffolds, signs)) + scaffolds_oo = dict(tour) + print_tour(fwtour, self.object, tag, "FLIP", tour, recode=True) + tour = self.assign_order() + tour = [(x, scaffolds_oo[x]) for x in tour] + print_tour(fwtour, self.object, tag, "TSP", tour, recode=True) + + def callback(tour, gen, i=0): + fitness = tour.fitness if hasattr(tour, "fitness") else None + tour = [scaffolds[x] for x in tour] + tour = [(x, scaffolds_oo[x]) for x in tour] + label = "GA{0}-{1}".format(i, gen) + if fitness: + fitness = "{0}".format(fitness).split(".")[0].replace("(", "") + label += "-" + fitness + print_tour(fwtour, self.object, tag, label, tour, recode=True) + return tour + + i = 0 + best_fitness = None + while True: # Multiple EC rounds due to orientation fixes + logger.debug("Start EC round %d", i) + scaffolds_oo = dict(tour) + scfs, tour, ww = self.prepare_ec(scaffolds, tour, weights) + callbacki = partial(callback, i=i) + toolbox = GA_setup(tour) + toolbox.register("evaluate", colinear_evaluate_multi, scfs=scfs, weights=ww) + tour, fitness = GA_run( + toolbox, ngen=ngen, npop=npop, cpus=cpus, seed=seed, callback=callbacki + ) + tour = callbacki(tour, "FIN") + if best_fitness and fitness <= best_fitness: + logger.debug("No fitness improvement: %s. Exit EC.", best_fitness) + break + tour = self.fix_orientation(tour) + best_fitness = fitness + print_tour( + fwtour, self.object, tag, "GA{0}-FIXORI".format(i), tour, recode=True + ) + logger.debug("Current best fitness: %s", best_fitness) + i += 1 + + tour = self.fix_tour(tour) + self.tour = recode_tour(tour) + for fw in (sys.stderr, fwtour): + print_tour(fw, self.object, tag, "FINAL", self.tour) + + def prepare_ec(self, scaffolds, tour, weights): + """ + Prepare Evolutionary Computation. This converts scaffold names into + indices (integer) in the scaffolds array. + """ + scaffolds_ii = dict((s, i) for i, s in enumerate(scaffolds)) + scfs = [] + ww = [] + for mlg in self.linkage_groups: + w = float(weights[mlg.mapname]) + scf = {} + for s, o in tour: + si = scaffolds_ii[s] + scf[si] = self.get_series(mlg.lg, s, orientation=o) + scfs.append(scf) + ww.append(w) + tour = [scaffolds_ii[x] for x, o in tour] + + return scfs, tour, ww + + def weighted_mean(self, a): + a, w = zip(*a) + w = [self.weights[x] for x in w] + return np.average(a, weights=w) + + def get_markers(self, lg, scaffold, orientation=0): + xs = self.bins.get((lg, scaffold), []) + if orientation < 0: + xs = xs[::-1] + return xs + + def get_series(self, lg, scaffold, orientation=0): + xs = self.get_markers(lg, scaffold, orientation=orientation) + return [self.function(x) for x in xs] + + def prepare_linkage_groups(self): + self.linkage_groups = [] + for lg in self.lgs: + length = self.lengths[lg] + markers = {} + for s in self.scaffolds: + xs = self.get_markers(lg, s) + if xs: + markers[s] = xs + if not markers: + continue + LG = LinkageGroup( + lg, length, markers, function=self.function, linkage=self.linkage + ) + self.linkage_groups.append(LG) + + def distances_to_tour(self): + scaffolds = self.scaffolds + distances = self.distances + G = nx.DiGraph() + for (a, b), v in distances.items(): + d = self.weighted_mean(v) + G.add_edge(a, b, weight=d) + if a == START or b == END: + continue + G.add_edge(b, a, weight=d) + + logger.debug("Graph size: |V|=%d, |E|=%d", len(G), G.size()) + + L = dict(nx.all_pairs_dijkstra_path_length(G)) + for a, b in combinations(scaffolds, 2): + if G.has_edge(a, b): + continue + if a in L and b in L[a]: + l = L[a][b] + G.add_edge(a, b, weight=l) + G.add_edge(b, a, weight=l) + + edges = [] + for a, b, d in G.edges(data=True): + edges.append((a, b, d["weight"])) + + return scaffolds[:] + + def assign_order(self): + """ + The goal is to assign scaffold orders. To help order the scaffolds, two + dummy node, START and END, mark the ends of the chromosome. We connect + START to each scaffold (directed), and each scaffold to END. + """ + linkage_groups = self.linkage_groups + for mlg in linkage_groups: + mapname = mlg.mapname + if mapname == self.pivot: + pivot_position = mlg.position + + for mlg in linkage_groups: + position = mlg.position + # Flip order if path goes in the opposite direction to the pivot + common = [] + for a, ap in position.items(): + if a not in pivot_position: + continue + pp = pivot_position[a] + common.append((ap, pp)) + + mlg.rho = get_rho(common) + if mlg.rho < 0: + mlg.path = mlg.path[::-1] + + mlg.populate_pairwise_distance() + + # Preparation of TSP + distances = defaultdict(list) + for mlg in linkage_groups: + mapname = mlg.mapname + position = mlg.position + length = mlg.length + path = mlg.path + rho = mlg.rho + dd = mlg.distances + for a, b in combinations(path, 2): + d = dd[a, b] + distances[a, b].append((d, mapname)) + for p in path: + adist, bdist = position[p], length - position[p] + if rho < 0: + adist, bdist = bdist, adist + distances[START, p].append((adist, mapname)) + distances[p, END].append((bdist, mapname)) + + self.distances = distances + tour = self.distances_to_tour() + return tour + + def get_orientation(self, si, sj): + """ + si, sj are two number series. To compute whether these two series have + same orientation or not. We combine them in the two orientation + configurations and compute length of the longest monotonic series. + """ + if not si or not sj: + return 0 + # Same orientation configuration + a = lms(si + sj) + b = lms(sj + si) + # Opposite orientation configuration + c = lms(si + sj[::-1]) + d = lms(sj[::-1] + si) + return max(a, b)[0] - max(c, d)[0] + + def assign_orientation(self): + signs = defaultdict(list) + scaffolds = self.scaffolds + for mlg in self.linkage_groups: + mapname = mlg.mapname + series = mlg.series + if mapname == self.pivot: + pivot_oo = mlg.oo + pivot_nmarkers = mlg.nmarkers + + for i, j in combinations(range(len(scaffolds)), 2): + si, sj = scaffolds[i], scaffolds[j] + si, sj = series.get(si, []), series.get(sj, []) + d = self.get_orientation(si, sj) + if not d: + continue + signs[i, j].append((d, mapname)) + + for e, v in signs.items(): + signs[e] = self.weighted_mean(v) + + signs_edges = sorted((a, b, w) for (a, b), w in signs.items()) + signs = determine_signs(scaffolds, signs_edges) + + # Finally flip this according to pivot map, then weight by #_markers + pivot_oo = [pivot_oo.get(x, 0) for x in scaffolds] + nmarkers = [pivot_nmarkers.get(x, 0) for x in scaffolds] + flipr = signs * np.sign(np.array(pivot_oo)) * nmarkers + if sum(flipr) < 0: + signs = -signs + return signs + + def fix_tour(self, tour): + """ + Test each scaffold if dropping does not decrease LMS. + """ + scaffolds, oos = zip(*tour) + keep = set() + for mlg in self.linkage_groups: + lg = mlg.lg + for s, o in tour: + i = scaffolds.index(s) + L = [self.get_series(lg, x, xo) for x, xo in tour[:i]] + U = [self.get_series(lg, x, xo) for x, xo in tour[i + 1 :]] + L, U = list(flatten(L)), list(flatten(U)) + M = self.get_series(lg, s, o) + score_with = lms(L + M + U)[0] + score_without = lms(L + U)[0] + assert score_with >= score_without + if score_with > score_without: + keep.add(s) + dropped = len(tour) - len(keep) + logger.debug("Dropped %d minor scaffolds", dropped) + return [(s, o) for (s, o) in tour if s in keep] + + def fix_orientation(self, tour): + """ + Test each scaffold if flipping will increass longest monotonic chain + length. + """ + orientations = dict(tour) # old configuration here + scaffold_oo = defaultdict(list) + scaffolds, oos = zip(*tour) + for mlg in self.linkage_groups: + lg = mlg.lg + mapname = mlg.mapname + for s, o in tour: + i = scaffolds.index(s) + L = [self.get_series(lg, x, xo) for x, xo in tour[:i]] + U = [self.get_series(lg, x, xo) for x, xo in tour[i + 1 :]] + L, U = list(flatten(L)), list(flatten(U)) + M = self.get_series(lg, s) + plus = lms(L + M + U) + minus = lms(L + M[::-1] + U) + d = plus[0] - minus[0] + if not d: + continue + scaffold_oo[s].append((d, mapname)) # reset orientation + + fixed = 0 + for s, v in scaffold_oo.items(): + d = self.weighted_mean(v) + old_d = orientations[s] + new_d = np.sign(d) + if new_d != old_d: + orientations[s] = new_d + fixed += 1 + + tour = [(x, orientations[x]) for x in scaffolds] + logger.debug("Fixed orientations for %d scaffolds.", fixed) + return tour + + +class CSVMapLine(object): + def __init__(self, row, sep=",", mapname=None): + # ScaffoldID,ScaffoldPosition,LinkageGroup,GeneticPosition + args = [x.strip() for x in row.split(sep)] + self.seqid = args[0] + self.pos = int(args[1]) + self.lg = args[2] + self.cm = float(args[3]) + self.mapname = mapname + + @property + def bedline(self): + marker = "{0}-{1}:{2:.6f}".format(self.mapname, self.lg, self.cm) + track = "{0}:{1}".format(self.seqid, self.pos) + return "\t".join( + str(x) for x in (self.seqid, self.pos - 1, self.pos, marker, track) + ) + + +class Marker(object): + def __init__(self, b): + self.seqid = b.seqid + self.pos = b.start + self.mlg, cm = b.accn.split(":") + try: + self.mapname, self.lg = b.accn.split("-", 1) + except ValueError: + logger.error("Malformed marker name: %s", b.accn) + sys.exit(1) + self.cm = float(cm) + self.accn = b.accn + self.args = b.args + self.rank = -1 + + def parse_scaffold_info(self): + self.scaffoldaccn = self.args[-1] + self.scaffoldid, scaffoldpos = self.scaffoldaccn.split(":") + self.scaffoldpos = int(scaffoldpos) + + def __str__(self): + return "\t".join( + str(x) for x in (self.seqid, self.pos - 1, self.pos, self.accn, self.rank) + ) + + __repr__ = __str__ + + +class Map(list): + def __init__( + self, + filename, + scaffold_info=False, + compress=1e-6, + remove_outliers=False, + function=(lambda x: x.rank), + ): + super().__init__() + bed = Bed(filename) + for b in bed: + self.append(Marker(b)) + self.report() + self.ranks = self.compute_ranks(compress) + self.lengths = self.compute_lengths(function) + self.bins = self.get_bins(function, remove_outliers) + if scaffold_info: + for b in self: + b.parse_scaffold_info() + + def report(self): + self.nmarkers = len(self) + self.seqids = sorted(set(x.seqid for x in self)) + self.mapnames = sorted(set(x.mapname for x in self)) + self.mlgs = sorted(set(x.mlg for x in self)) + logger.debug( + "Map contains %d markers in %d linkage groups.", + self.nmarkers, + len(self.mlgs), + ) + + def extract(self, seqid): + r = [x for x in self if x.seqid == seqid] + return sorted(r, key=lambda x: x.pos) + + def extract_mlg(self, mlg): + r = [x for x in self if x.mlg == mlg] + return sorted(r, key=lambda x: x.cm) + + def compute_ranks(self, compress): + ranks = {} # Store the length for each linkage group + for mlg in self.mlgs: + rank = 0 + mlg_set = self.extract_mlg(mlg) + for i, marker in enumerate(mlg_set): + if i == 0: + marker.rank = rank + continue + if marker.cm - mlg_set[i - 1].cm > compress: + rank += 1 + marker.rank = rank + ranks[mlg] = mlg_set + return ranks + + def compute_lengths(self, function): + lengths = {} + for mlg, v in self.ranks.items(): + lengths[mlg] = max(function(x) for x in v) + return lengths + + def get_bins(self, function, remove_outliers): + s = defaultdict(list) + for m in self: + s[(m.mlg, m.seqid)].append(m) + + if remove_outliers: + original = clean = 0 + for pair, markers in s.items(): + cm = self.remove_outliers(markers, function) + s[pair] = cm + original += len(markers) + clean += len(cm) + logger.debug("Retained %s clean markers.", percentage(clean, original)) + return s + + def remove_outliers(self, markers, function): + data = [function(x) for x in markers] + reject = reject_outliers(data) + clean_markers = [m for m, r in zip(markers, reject) if not r] + return clean_markers + + +class MapSummary(object): + def __init__(self, markers, l50, s, scaffolds=None): + markers = self.unique_markers(markers) + self.num_markers = len(markers) + self.num_lgs = len(set(x.mlg for x in markers)) + scaffolds = scaffolds or set(x.seqid for x in markers) + n50_scaffolds = [x for x in scaffolds if s.mapping[x] >= l50] + self.num_scaffolds = len(scaffolds) + self.num_n50_scaffolds = len(n50_scaffolds) + self.total_bases = sum(s.mapping[x] for x in scaffolds) + self.tally_markers(markers) + + def unique_markers(self, markers): + umarkers = [] + seen = set() + for m in markers: + mt = (m.seqid, m.pos) + if mt in seen: + continue + umarkers.append(m) + seen.add(mt) + return umarkers + + def tally_markers(self, markers): + counter = Counter([x.seqid for x in markers]) + self.scaffold_1m = len([x for x in counter.values() if x == 1]) + self.scaffold_2m = len([x for x in counter.values() if x == 2]) + self.scaffold_3m = len([x for x in counter.values() if x == 3]) + self.scaffold_4m = len([x for x in counter.values() if x >= 4]) + + def export_table(self, r, mapname, total): + r["Markers (unique)", mapname] = self.num_markers + r["Markers per Mb", mapname] = ( + self.num_markers * 1e6 / self.total_bases if self.total_bases else 0 + ) + r["Scaffolds", mapname] = self.num_scaffolds + r["N50 Scaffolds", mapname] = self.num_n50_scaffolds + r["Total bases", mapname] = percentage(self.total_bases, total, mode=1) + r["Scaffolds with 1 marker", mapname] = self.scaffold_1m + r["Scaffolds with 2 markers", mapname] = self.scaffold_2m + r["Scaffolds with 3 markers", mapname] = self.scaffold_3m + r["Scaffolds with >=4 markers", mapname] = self.scaffold_4m + + +class Weights(DictFile): + def __init__(self, filename, mapnames, cast=int): + super().__init__(filename, cast=cast) + self.maps = [x.split()[0] for x in must_open(filename)] + self.update_maps(mapnames) + pivot_weight, o, pivot = self.get_pivot(mapnames) + ref = self.maps[0] + self.pivot = pivot + self.ref = ref + + logger.debug("Map weights: %s", self.items()) + + def update_maps(self, mapnames, default=1): + keys = list(self.keys()) + for m in keys: + if m not in mapnames: + del self[m] + for m in mapnames: + if m in self: + continue + self[m] = default + logger.debug("Weight for `%s` set to %d.", m, default) + + def get_pivot(self, mapnames): + # Break ties by occurence in file + common_mapnames = set(self.maps) & set(mapnames) + if not common_mapnames: + logger.error("No common names found between %s and %s", self.maps, mapnames) + sys.exit(1) + return max( + (w, -self.maps.index(m), m) for m, w in self.items() if m in common_mapnames + ) + + +class Layout(object): + def __init__(self, mlgsizes): + + self.mlgsizes = mlgsizes + self.partition() + self.calculate_coords() + + def partition(self, N=2): + # Partition LGs into two sides with approximately similar sum of sizes + endtime = [0] * N + parts = [] + for i in range(N): + parts.append([]) + # LPT greedy algorithm, sort by LG size decreasing + for mlg, mlgsize in sorted(self.mlgsizes.items(), key=lambda x: -x[-1]): + mt, mi = min((x, i) for (i, x) in enumerate(endtime)) + endtime[mi] += mlgsize + parts[mi].append((mlg, mlgsize)) + self.parts = parts + + def calculate_coords(self, r=0.8, gapsize=0.1): + # Find the larger partition + part_sizes = [] + for p in self.parts: + ps = sum(ms for m, ms in p) + part_sizes.append((ps, len(p) - 1)) + max_part_size, ngaps = max(part_sizes) + gaps = gapsize * ngaps + ratio = (r - gaps) / max_part_size + self.ratio = ratio + + coords = {} + for x, p, (ps, ngaps) in zip((0.25, 0.75), self.parts, part_sizes): + gaps = gapsize * ngaps + ystart = (1 + ratio * ps + gaps) / 2 + for m, ms in p: + mlen = ratio * ms + coords[m] = (x, ystart - mlen, ystart) + ystart -= mlen + gapsize + self.coords = coords + + +class GapEstimator(object): + def __init__(self, mapc, agp, seqid, mlg, function=lambda x: x.cm): + mm = mapc.extract_mlg(mlg) + logger.debug("Extracted %d markers for %s-%s", len(mm), seqid, mlg) + self.mlgsize = max(function(x) for x in mm) + + self.agp = [x for x in agp if x.object == seqid] + self.scaffolds = [x.component_id for x in self.agp if not x.is_gap] + self.pp = [x.object_beg for x in self.agp if x.is_gap] + self.chrsize = max(x.object_end for x in self.agp) + + s = Scaffold(seqid, mapc) + self.scatter_data = [] + self.scaffold_markers = defaultdict(list) + for x in s.markers: + if x.mlg != mlg: + continue + self.scaffold_markers[x.scaffoldid].append(x) + self.scatter_data.append((x.pos, function(x))) + self.scatter_data.sort() + self.get_splines() + + def get_gapsize(self, scaffold): + # Find the gap size right after a query scaffold + i = self.scaffolds.index(scaffold) + return self.gapsizes[i] + + def get_splines(self, floor=25 * 1e-9, ceil=25 * 1e-6): + from scipy.interpolate import UnivariateSpline + + mx, my = zip(*self.scatter_data) + yy, xx = zip(*lmseq(zip(my, mx))) # filter with LMS + spl = UnivariateSpline(xx, yy) + spld = spl.derivative() + + def spl_derivative(x): + s = abs(spld(x)) + s[s < floor] = floor + s[s > ceil] = ceil + return s + + self.spl = spl + self.spld = spl_derivative + + def compute_one_gap(self, a, b, gappos, minsize, maxsize, verbose=False): + ma, mb = self.scaffold_markers[a], self.scaffold_markers[b] + all_marker_pairs = [] + for x, y in product(ma, mb): + cm_dist = abs(x.cm - y.cm) + (ratio,) = self.spld([gappos]) + converted_dist = int(round(cm_dist / ratio)) + overhang_x = abs(x.pos - gappos) + overhang_y = abs(y.pos - gappos) - minsize + estimated = converted_dist - overhang_x - overhang_y + if estimated < minsize: + estimated = minsize + if estimated > maxsize: + estimated = maxsize + if verbose: + print("=" * 10) + print(x) + print(y) + print(x.scaffoldaccn, y.scaffoldaccn) + print("Converted dist:", cm_dist, ratio, converted_dist) + print("Overhangs:", overhang_x, overhang_y) + print("Estimated", estimated) + all_marker_pairs.append(estimated) + + gapsize = min(all_marker_pairs) if all_marker_pairs else None + if verbose: + print("*" * 5, a, b, gapsize) + return gapsize + + def compute_all_gaps(self, minsize=100, maxsize=500000, verbose=False): + self.gapsizes = [] + for (a, b), gappos in zip(pairwise(self.scaffolds), self.pp): + gapsize = self.compute_one_gap( + a, b, gappos, minsize, maxsize, verbose=verbose + ) + self.gapsizes.append(gapsize) + + +def colinear_evaluate_multi(tour, scfs, weights): + weighted_score = 0 + for scf, w in zip(scfs, weights): + subtour = [x for x in tour if x in scf] + series = [] + for t in subtour: + series.extend(scf[t]) + score, diff = lms(series) + weighted_score += score * w + return (weighted_score,) + + +def get_rho(xy): + if not xy: + return 0 + x, y = zip(*xy) + rho = spearmanr(x, y) + if np.isnan(rho): + rho = 0 + return rho + + +def linkage_distance(a, b, linkage=min): + return linkage([abs(i - j) for i, j in product(a, b)]) + + +def double_linkage(L): + if len(L) == 1: + return L[0] + L.sort() + a, b = L[:2] + return (a + b) / 2.0 + + +def main(): + + actions = ( + ("fake", "make fake scaffolds.fasta"), + ("merge", "merge csv maps and convert to bed format"), + ("mergebed", "merge maps in bed format"), + ("path", "construct golden path given a set of genetic maps"), + ("estimategaps", "estimate sizes of inter-scaffold gaps"), + ("build", "build associated FASTA and CHAIN file"), + ("split", "split suspicious scaffolds"), + ("summary", "report summary stats for maps and final consensus"), + # Visualization + ("plot", "plot matches between goldenpath and maps for single object"), + ("plotall", "plot matches between goldenpath and maps for all objects"), + ("plotratio", "illustrate physical vs map distance ratio"), + ("movie", "visualize history of scaffold OO"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def normalize_lms_axis( + ax, xlim=None, ylim=None, xfactor=1e-6, yfactor=1, xlabel=None, ylabel="Map (cM)" +): + """Normalize the axis limits and labels to beautify axis.""" + if xlim: + ax.set_xlim(0, xlim) + if ylim: + ax.set_ylim(0, ylim) + if xlabel: + xticklabels = [int(round(x * xfactor)) for x in ax.get_xticks()] + ax.set_xticklabels(xticklabels, family="Helvetica") + ax.set_xlabel(xlabel) + else: + ax.set_xticks([]) + if ylabel: + yticklabels = [int(round(x * yfactor)) for x in ax.get_yticks()] + ax.set_yticklabels(yticklabels, family="Helvetica") + ax.set_ylabel(ylabel) + else: + ax.set_yticks([]) + + +def plotratio(args): + """ + %prog plotratio JM-2 chr23 JMMale-23 + + Illustrate physical vs map distance ratio, that were used in the gap estimation algorithm. + """ + from ..graphics.base import plt, savefig, normalize_axes, panel_labels, set2 + + p = OptionParser(estimategaps.__doc__) + _, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) + + if len(args) != 3: + sys.exit(not p.print_help()) + + pf, seqid, mlg = args + bedfile = pf + ".lifted.bed" + agpfile = pf + ".agp" + + function = lambda x: x.cm + cc = Map(bedfile, scaffold_info=True, function=function) + agp = AGP(agpfile) + + g = GapEstimator(cc, agp, seqid, mlg, function=function) + pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize + spl, spld = g.spl, g.spld + g.compute_all_gaps(verbose=False) + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + # Panel A + xstart, ystart = 0.15, 0.55 + w, h = 0.7, 0.4 + t = np.linspace(0, chrsize, 1000) + ax = fig.add_axes([xstart, ystart, w, h]) + mx, my = zip(*g.scatter_data) + rho = spearmanr(mx, my) + + dsg = "g" + ax.vlines(pp, 0, mlgsize, colors="beige") + ax.plot(mx, my, ".", color=set2[3]) + ax.plot(t, spl(t), "-", color=dsg) + ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes) + normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") + if rho < 0: + ax.invert_yaxis() + + # Panel B + ystart = 0.1 + ax = fig.add_axes([xstart, ystart, w, h]) + ax.vlines(pp, 0, mlgsize, colors="beige") + ax.plot(t, spld(t), "-", lw=2, color=dsg) + ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) + normalize_lms_axis( + ax, + xlim=chrsize, + ylim=25 * 1e-6, + xfactor=1e-6, + xlabel="Physical position (Mb) on {}".format(seqid), + yfactor=1000000, + ylabel="Recomb. rate\n(cM / Mb)", + ) + ax.xaxis.grid(False) + + labels = ((0.05, 0.95, "A"), (0.05, 0.5, "B")) + panel_labels(root, labels) + normalize_axes(root) + + pf = "plotratio" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def fake(args): + """ + %prog fake input.bed + + Make fake `scaffolds.fasta`. Use case for this is that sometimes I would + receive just the csv/bed file and I'd like to use path() out of the box. + """ + from math import ceil + from random import choice + + from Bio import SeqIO + from Bio.Seq import Seq + from Bio.SeqRecord import SeqRecord + + p = OptionParser(fake.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (inputbed,) = args + bed = Bed(inputbed) + recs = [] + for seqid, sb in bed.sub_beds(): + maxend = max(x.end for x in sb) + size = int(ceil(maxend / 1000.0) * 1000) + seq = "".join([choice("ACGT") for x in range(size)]) + rec = SeqRecord(Seq(seq), id=seqid, description="") + recs.append(rec) + + fw = must_open(opts.outfile, "w") + SeqIO.write(recs, fw, "fasta") + + +def compute_score(markers, bonus, penalty): + """ + Compute chain score using dynamic programming. If a marker is the same + linkage group as a previous one, we add bonus; otherwise, we penalize the + chain switching. + """ + nmarkers = len(markers) + s = [bonus] * nmarkers # score + f = [-1] * nmarkers # from + for i in range(1, nmarkers): + for j in range(i): + mi, mj = markers[i], markers[j] + t = bonus if mi.mlg == mj.mlg else penalty + bonus + if s[i] < s[j] + t: + s[i] = s[j] + t + f[i] = j + # Recover the highest scoring chain + highest_score = max(s) + si = s.index(highest_score) + onchain = set() + while True: + if si < 0: + break + si = f[si] + onchain.add(si) + return [x for i, x in enumerate(markers) if i in onchain] + + +def split(args): + """ + %prog split input.bed + + Split suspicious scaffolds. Suspicious scaffolds are those that contain + chunks that map to more than one linkage group. The chunk size can be + modified through --chunk option. + """ + p = OptionParser(split.__doc__) + p.add_argument( + "--chunk", default=4, type=int, help="Split chunks of at least N markers" + ) + p.add_argument( + "--splitsingle", + default=False, + action="store_true", + help="Split breakpoint range right in the middle", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (inputbed,) = args + bonus = 2 + nchunk = opts.chunk + nbreaks = 0 + penalty = -(nchunk * bonus - 1) + bed = Bed(inputbed) + for seqid, bb in bed.sub_beds(): + markers = [Marker(x) for x in bb] + markers = compute_score(markers, bonus, penalty) + for mi, mj in pairwise(markers): + if mi.mlg == mj.mlg: + continue + assert mi.seqid == mj.seqid + start, end = mi.pos, mj.pos + if start > end: + start, end = end, start + if opts.splitsingle: + start = end = (start + end) / 2 + print("\t".join(str(x) for x in (mi.seqid, start - 1, end))) + nbreaks += 1 + logger.debug("A total of %d breakpoints inferred (--chunk=%d)", nbreaks, nchunk) + + +def movie(args): + """ + %prog movie input.bed scaffolds.fasta chr1 + + Visualize history of scaffold OO. The history is contained within the + tourfile, generated by path(). For each historical scaffold OO, the program + plots a separate PDF file. The plots can be combined to show the progression + as a little animation. The third argument limits the plotting to a + specific pseudomolecule, for example `chr1`. + """ + p = OptionParser(movie.__doc__) + p.add_argument( + "--gapsize", + default=100, + type=int, + help="Insert gaps of size between scaffolds", + ) + add_allmaps_plot_options(p) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + inputbed, scaffoldsfasta, seqid = args + gapsize = opts.gapsize + pf = inputbed.rsplit(".", 1)[0] + agpfile = pf + ".chr.agp" + tourfile = pf + ".tour" + + fp = open(tourfile) + sizes = Sizes(scaffoldsfasta).mapping + ffmpeg = "ffmpeg" + mkdir(ffmpeg) + score = None + i = 1 + for header, block in read_block(fp, ">"): + s, tag, label = header[1:].split() + if s != seqid: + continue + tour = block[0].split() + tour = [(x[:-1], x[-1]) for x in tour] + if label.startswith("GA"): + cur_score = label.split("-")[-1] + if cur_score == score: + i += 1 + continue + score = cur_score + + image_name = ".".join((seqid, "{0:04d}".format(i), label, "pdf")) + if need_update(tourfile, image_name): + fwagp = must_open(agpfile, "w") + order_to_agp(seqid, tour, sizes, fwagp, gapsize=gapsize, evidence="map") + fwagp.close() + logger.debug("%s written to `%s`.", header, agpfile) + build([inputbed, scaffoldsfasta, "--cleanup"]) + pdf_name = plot([inputbed, seqid, "--title={0}".format(label)]) + sh("mv {0} {1}".format(pdf_name, image_name)) + if label in ("INIT", "FLIP", "TSP", "FINAL"): + for j in range(5): # Delay for 5 frames + image_delay = image_name.rsplit(".", 1)[0] + ".d{0}.pdf".format(j) + sh("cp {0} {1}/{2}".format(image_name, ffmpeg, image_delay)) + else: + sh("cp {0} {1}/".format(image_name, ffmpeg)) + i += 1 + + make_movie(ffmpeg, pf) + + +def make_movie(workdir, pf, dpi=120, fps=1, format="pdf", engine="ffmpeg"): + """Make the movie using either ffmpeg or gifsicle.""" + os.chdir(workdir) + if format != "png": + cmd = "parallel convert -density {}".format(dpi) + cmd += " {} {.}.png ::: " + "*.{}".format(format) + sh(cmd) + + assert engine in ( + "ffmpeg", + "gifsicle", + ), "Only ffmpeg or gifsicle is currently supported" + if engine == "ffmpeg": + cmd = "ffmpeg -framerate {} -pattern_type glob -i '*.png' {}.mp4".format( + fps, pf + ) + elif engine == "gifsicle": + cmd = "convert *.png gif:- |" + cmd += " gifsicle --delay {} --loop --optimize=3".format(100 // fps) + cmd += " --colors=256 --multifile - > {}.gif".format(pf) + + sh(cmd) + + +def estimategaps(args): + """ + %prog estimategaps input.bed + + Estimate sizes of inter-scaffold gaps. The AGP file generated by path() + command has unknown gap sizes with a generic number of Ns (often 100 Ns). + The AGP file `input.chr.agp` will be modified in-place. + """ + p = OptionParser(estimategaps.__doc__) + p.add_argument("--minsize", default=100, type=int, help="Minimum gap size") + p.add_argument("--maxsize", default=500000, type=int, help="Maximum gap size") + p.add_argument( + "--links", + default=10, + type=int, + help="Only use linkage grounds with matchings more than", + ) + p.set_verbose(help="Print details for each gap calculation") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (inputbed,) = args + pf = inputbed.rsplit(".", 1)[0] + agpfile = pf + ".chr.agp" + bedfile = pf + ".lifted.bed" + + cc = Map(bedfile, scaffold_info=True) + agp = AGP(agpfile) + minsize, maxsize = opts.minsize, opts.maxsize + links = opts.links + verbose = opts.verbose + + outagpfile = pf + ".estimategaps.agp" + fw = must_open(outagpfile, "w") + + for ob, components in agp.iter_object(): + components = list(components) + s = Scaffold(ob, cc) + mlg_counts = s.mlg_counts + gaps = [x for x in components if x.is_gap] + gapsizes = [None] * len(gaps) # master + for mlg, count in mlg_counts.items(): + if count < links: + continue + g = GapEstimator(cc, agp, ob, mlg) + g.compute_all_gaps(minsize=minsize, maxsize=maxsize, verbose=verbose) + # Merge evidence from this mlg into master + assert len(g.gapsizes) == len(gaps) + for i, gs in enumerate(gapsizes): + gg = g.gapsizes[i] + if gs is None: + gapsizes[i] = gg + elif gg: + gapsizes[i] = min(gs, gg) + + print(gapsizes) + # Modify AGP + i = 0 + for x in components: + if x.is_gap: + x.gap_length = gapsizes[i] or minsize + x.component_type = "U" if x.gap_length == 100 else "N" + i += 1 + print(x, file=fw) + + fw.close() + reindex([outagpfile, "--inplace"]) + + +def filename_to_mapname(filename): + # Infer map name based on file name + mapname = op.basename(filename).rsplit(".", 1)[0] + return mapname.replace("-", "_").replace(":", "_").replace(".", "_") + + +def merge(args): + """ + %prog merge map1 map2 map3 ... + + Convert csv maps to bed format. + + Each input map is csv formatted, for example: + + ScaffoldID,ScaffoldPosition,LinkageGroup,GeneticPosition + scaffold_2707,11508,1,0 + scaffold_2707,11525,1,1.2 + scaffold_759,81336,1,9.7 + """ + p = OptionParser(merge.__doc__) + p.add_argument( + "-w", "--weightsfile", default="weights.txt", help="Write weights to file" + ) + p.set_outfile("out.bed") + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + maps = args + outfile = opts.outfile + fp = must_open(maps) + b = Bed() + mapnames = set() + for row in fp: + mapname = filename_to_mapname(fp.filename()) + mapnames.add(mapname) + try: + m = CSVMapLine(row, mapname=mapname) + if m.cm < 0: + logger.error("Ignore marker with negative genetic distance") + print(row.strip(), file=sys.stderr) + else: + b.append(BedLine(m.bedline)) + except (IndexError, ValueError): # header or mal-formed line + continue + + b.print_to_file(filename=outfile, sorted=True) + logger.debug("A total of %d markers written to `%s`.", len(b), outfile) + + assert len(maps) == len(mapnames), "You have a collision in map names" + write_weightsfile(mapnames, weightsfile=opts.weightsfile) + + +def mergebed(args): + """ + %prog mergebed map1.bed map2.bed map3.bed ... + + Combine bed maps to bed format, adding the map name. + """ + p = OptionParser(mergebed.__doc__) + p.add_argument( + "-w", "--weightsfile", default="weights.txt", help="Write weights to file" + ) + p.set_outfile("out.bed") + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + maps = args + outfile = opts.outfile + fp = must_open(maps) + b = Bed() + mapnames = set() + for row in fp: + mapname = filename_to_mapname(fp.filename()) + mapnames.add(mapname) + try: + m = BedLine(row) + m.accn = "{0}-{1}".format(mapname, m.accn) + m.extra = ["{0}:{1}".format(m.seqid, m.start)] + b.append(m) + except (IndexError, ValueError): # header or mal-formed line + continue + + b.print_to_file(filename=outfile, sorted=True) + logger.debug("A total of %d markers written to `%s`.", len(b), outfile) + + assert len(maps) == len(mapnames), "You have a collision in map names" + write_weightsfile(mapnames, weightsfile=opts.weightsfile) + + +def write_weightsfile(mapnames, weightsfile="weights.txt"): + if op.exists(weightsfile): + logger.debug("Weights file `%s` found. Will not overwrite.", weightsfile) + return + + fw = open(weightsfile, "w") + for mapname in sorted(mapnames): + weight = 1 + print(mapname, weight, file=fw) + logger.debug("Weights file written to `%s`.", weightsfile) + + +def best_no_ambiguous(d, label): + best, best_value = max(d.items(), key=lambda x: x[1]) + if list(d.values()).count(best_value) > 1: # tie + print("AMBIGUOUS", label, d, file=sys.stderr) + return None, None + return best, best_value + + +def get_function(field): + assert field in distance_choices + return (lambda x: x.cm) if field == "cM" else (lambda x: x.rank) + + +def print_tour(fw, object, tag, label, tour, recode=False): + if recode: + tour = recode_tour(tour) + if fw: + print(">{0} ({1}) {2}".format(object, tag, label), file=fw) + print(" ".join("".join(x) for x in tour), file=fw) + + +def recode_tour(tour): + recode = {0: "?", 1: "+", -1: "-"} + return [(x, recode[o]) for x, o in tour] + + +def path(args): + """ + %prog path input.bed scaffolds.fasta + + Construct golden path given a set of genetic maps. The respective weight for + each map is given in file `weights.txt`. The map with the highest weight is + considered the pivot map. The final output is an AGP file that contains + ordered scaffolds. + + Please note that BED file and FASTA file cannot share the same prefix. + """ + oargs = args + p = OptionParser(path.__doc__) + p.add_argument("-b", "--bedfile", help=SUPPRESS) + p.add_argument("-s", "--fastafile", help=SUPPRESS) + p.add_argument( + "-w", "--weightsfile", default="weights.txt", help="Use weights from file" + ) + p.add_argument( + "--compress", + default=1e-6, + type=float, + help="Compress markers with distance <=", + ) + p.add_argument( + "--noremoveoutliers", + default=False, + action="store_true", + help="Don't remove outlier markers", + ) + p.add_argument( + "--distance", + default="rank", + choices=distance_choices, + help="Distance function when building initial consensus", + ) + p.add_argument( + "--linkage", + default="double", + choices=linkage_choices, + help="Linkage function when building initial consensus", + ) + p.add_argument( + "--gapsize", + default=100, + type=int, + help="Insert gaps of size between scaffolds", + ) + p.add_argument("--seqid", help="Only run partition with this seqid") + p.add_argument("--partitions", help="Use predefined partitions of LGs") + p.add_argument( + "--links", default=10, type=int, help="Only plot matchings more than" + ) + p.add_argument( + "--mincount", default=1, type=int, help="Minimum markers on a contig" + ) + p.add_argument( + "--noplot", + default=False, + action="store_true", + help="Do not visualize the alignments", + ) + p.add_argument( + "--renumber", + default=False, + action="store_true", + help="Renumber chromosome based on decreasing sizes", + ) + p.set_cpus(cpus=16) + + q = p.add_argument_group("Genetic algorithm options") + q.add_argument( + "--ngen", default=500, type=int, help="Iterations in GA, higher ~ slower" + ) + q.add_argument( + "--npop", default=100, type=int, help="Population size in GA, higher ~ slower" + ) + q.add_argument("--seed", default=666, type=int, help="Random seed number") + opts, args, iopts = p.set_image_options(args, figsize="10x6") + + if len(args) != 2: + sys.exit(not p.print_help()) + + inputbed, fastafile = args + inputbed = opts.bedfile or inputbed + fastafile = opts.fastafile or fastafile + + pf = inputbed.rsplit(".", 1)[0] + if op.basename(fastafile).split(".")[0] == pf: + print( + "ERROR: Filename collision `{}`. We suggest to rename `{}`".format( + pf, inputbed + ), + file=sys.stderr, + ) + sys.exit(1) + + bedfile = pf + ".bed" + weightsfile = opts.weightsfile + partitionsfile = opts.partitions + gapsize = opts.gapsize + mincount = opts.mincount + ngen = opts.ngen + npop = opts.npop + cpus = opts.cpus + seed = opts.seed + if sys.version_info[:2] < (2, 7): + logger.debug( + "Python version: %s. CPUs set to 1.", sys.version.splitlines()[0].strip() + ) + cpus = 1 + + function = get_function(opts.distance) + cc = Map( + bedfile, + function=function, + compress=opts.compress, + remove_outliers=(not opts.noremoveoutliers), + ) + mapnames = cc.mapnames + allseqids = cc.seqids + weights = Weights(weightsfile, mapnames) + pivot = weights.pivot + ref = weights.ref + linkage = opts.linkage + oseqid = opts.seqid + logger.debug("Linkage function: %s-linkage", linkage) + linkage = { + "single": min, + "double": double_linkage, + "complete": max, + "average": np.mean, + "median": np.median, + }[linkage] + + # Partition the linkage groups into consensus clusters + C = Grouper() + # Initialize the partitions + for mlg in cc.mlgs: + C.join(mlg) + + if partitionsfile: + logger.debug("Partition LGs based on `%s`", partitionsfile) + fp = open(partitionsfile) + for row in fp: + C.join(*row.strip().split(",")) + else: + logger.debug("Partition LGs based on %s", ref) + for mapname in mapnames: + if mapname == ref: + continue + # Compute co-occurrence between LG pairs + G = defaultdict(int) + for s in allseqids: + s = Scaffold(s, cc) + s.add_LG_pairs(G, (ref, mapname)) + # Convert edge list to adj list + nodes = defaultdict(list) + for (a, b), w in G.items(): + nodes[a].append((b, w)) + # Find the best ref LG every non-ref LG matches to + for n, neighbors in nodes.items(): + if n.split("-")[0] == ref: + continue + neighbors = dict(neighbors) + best_neighbor, best_value = best_no_ambiguous(neighbors, n) + if best_neighbor is None: + continue + C.join(n, best_neighbor) + + partitions = defaultdict(list) + # Partition the scaffolds and assign them to one consensus + for s in allseqids: + s = Scaffold(s, cc) + seqid = s.seqid + counts = {} + for mlg, count in s.mlg_counts.items(): + consensus = C[mlg] + mapname = mlg.split("-")[0] + mw = weights[mapname] + if consensus not in counts: + counts[consensus] = 0 + if count < mincount: + continue + counts[consensus] += count * mw + best_consensus, best_value = best_no_ambiguous(counts, seqid) + if best_consensus is None: + continue + partitions[best_consensus].append(seqid) + + # Perform OO within each partition + agpfile = pf + ".chr.agp" + tourfile = pf + ".tour" + sizes = Sizes(fastafile).mapping + fwagp = must_open(agpfile, "w") + fwtour = must_open(tourfile, "w") + solutions = [] + for lgs, scaffolds in natsorted(partitions.items()): + if oseqid and oseqid not in lgs: + continue + tag = "|".join(lgs) + lgs_maps = set(x.split("-")[0] for x in lgs) + if pivot not in lgs_maps: + logger.debug("Skipping %s ...", tag) + continue + logger.debug("Working on %s ...", tag) + s = ScaffoldOO( + lgs, + scaffolds, + cc, + pivot, + weights, + sizes, + function=function, + linkage=linkage, + fwtour=fwtour, + ngen=ngen, + npop=npop, + cpus=cpus, + seed=seed, + ) + + solutions.append(s) + fwtour.close() + + # Renumber chromosome based on decreasing size + if opts.renumber: + chrsizes = {} + conversion = {} + for s in solutions: + chrsizes[s.object] = ( + sum(sizes[x] for (x, o) in s.tour) + (len(s.tour) - 1) * gapsize + ) + for i, (c, size) in enumerate(sorted(chrsizes.items(), key=lambda x: -x[1])): + newc = "chr{0}".format(i + 1) + logger.debug("%s: %d => %d", c, size, newc) + conversion[c] = newc + for s in solutions: + s.object = conversion[s.object] + + # meta-data about the run parameters + command = "# COMMAND: python -m jcvi.assembly.allmaps path {0}".format( + " ".join(oargs) + ) + comment = "Generated by ALLMAPS {} ({})\n{}".format(version, get_today(), command) + AGP.print_header(fwagp, comment=comment) + + for s in natsorted(solutions, key=lambda x: x.object): + order_to_agp(s.object, s.tour, sizes, fwagp, gapsize=gapsize, evidence="map") + fwagp.close() + + logger.debug("AGP file written to `%s`.", agpfile) + logger.debug("Tour file written to `%s`.", tourfile) + + build([inputbed, fastafile]) + + summaryfile = pf + ".summary.txt" + summary([inputbed, fastafile, "--outfile={0}".format(summaryfile)]) + + if not opts.noplot: + plotall( + [ + inputbed, + "-w", + opts.weightsfile, + "--links={0}".format(opts.links), + "--figsize={0}".format(opts.figsize), + ] + ) + + +def write_unplaced_agp(agpfile, scaffolds, unplaced_agp): + agp = AGP(agpfile) + scaffolds_seen = set(x.component_id for x in agp) + sizes = Sizes(scaffolds).mapping + fwagp = must_open(unplaced_agp, "w") + for s in natsorted(sizes.keys()): + if s in scaffolds_seen: + continue + order_to_agp(s, [(s, "?")], sizes, fwagp) + logger.debug("Write unplaced AGP to `%s`", unplaced_agp) + + +def summary(args): + """ + %prog summary input.bed scaffolds.fasta + + Print out summary statistics per map, followed by consensus summary of + scaffold anchoring based on multiple maps. + """ + p = OptionParser(summary.__doc__) + p.set_table(sep="|", align=True) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + inputbed, scaffolds = args + pf = inputbed.rsplit(".", 1)[0] + mapbed = pf + ".bed" + chr_agp = pf + ".chr.agp" + sep = opts.sep + align = opts.align + cc = Map(mapbed) + mapnames = cc.mapnames + s = Sizes(scaffolds) + total, l50, n50 = s.summary + r = {} + maps = [] + + fw = must_open(opts.outfile, "w") + print("*** Summary for each individual map ***", file=fw) + for mapname in mapnames: + markers = [x for x in cc if x.mapname == mapname] + ms = MapSummary(markers, l50, s) + r["Linkage Groups", mapname] = ms.num_lgs + ms.export_table(r, mapname, total) + maps.append(ms) + print(tabulate(r, sep=sep, align=align), file=fw) + + r = {} + agp = AGP(chr_agp) + print("*** Summary for consensus map ***", file=fw) + consensus_scaffolds = set(x.component_id for x in agp if not x.is_gap) + oriented_scaffolds = set( + x.component_id for x in agp if (not x.is_gap) and x.orientation != "?" + ) + unplaced_scaffolds = set(s.mapping.keys()) - consensus_scaffolds + + for mapname, sc in ( + ("Anchored", consensus_scaffolds), + ("Oriented", oriented_scaffolds), + ("Unplaced", unplaced_scaffolds), + ): + markers = [x for x in cc if x.seqid in sc] + ms = MapSummary(markers, l50, s, scaffolds=sc) + ms.export_table(r, mapname, total) + print(tabulate(r, sep=sep, align=align), file=fw) + + +def liftover( + chain_file: str, + in_file: str, + out_file: str, + unmapfile: Optional[str], + cstyle: str = "l", +): + """ + Lifts over a bed file from one assembly to another using a chain file. + """ + mapTree, _, _ = read_chain_file(chain_file) + crossmap_bed_file(mapTree, in_file, out_file, unmapfile=unmapfile, cstyle=cstyle) + + +def build(args): + """ + %prog build input.bed scaffolds.fasta + + Build associated genome FASTA file and CHAIN file that can be used to lift + old coordinates to new coordinates. The CHAIN file will be used to lift the + original marker positions to new positions in the reconstructed genome. The + new positions of the markers will be reported in *.lifted.bed. + """ + p = OptionParser(build.__doc__) + p.add_argument( + "--cleanup", + default=False, + action="store_true", + help="Clean up bulky FASTA files, useful for plotting", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + inputbed, scaffolds = args + pf = inputbed.rsplit(".", 1)[0] + mapbed = pf + ".bed" + chr_agp = pf + ".chr.agp" + chr_fasta = pf + ".chr.fasta" + if need_update((chr_agp, scaffolds), chr_fasta): + agp_build([chr_agp, scaffolds, chr_fasta]) + + unplaced_agp = pf + ".unplaced.agp" + if need_update((chr_agp, scaffolds), unplaced_agp): + write_unplaced_agp(chr_agp, scaffolds, unplaced_agp) + + unplaced_fasta = pf + ".unplaced.fasta" + if need_update((unplaced_agp, scaffolds), unplaced_fasta): + agp_build([unplaced_agp, scaffolds, unplaced_fasta]) + + combined_agp = pf + ".agp" + if need_update((chr_agp, unplaced_agp), combined_agp): + FileMerger((chr_agp, unplaced_agp), combined_agp).merge() + + combined_fasta = pf + ".fasta" + if need_update((chr_fasta, unplaced_fasta), combined_fasta): + FileMerger((chr_fasta, unplaced_fasta), combined_fasta).merge() + + chainfile = pf + ".chain" + if need_update((combined_agp, scaffolds, combined_fasta), chainfile): + fromagp([combined_agp, scaffolds, combined_fasta]) + + liftedbed = mapbed.rsplit(".", 1)[0] + ".lifted.bed" + if need_update((mapbed, chainfile), liftedbed): + logger.debug( + "Lifting markers from positions in `%s` to new positions in `%s`", + mapbed, + liftedbed, + ) + liftover(chainfile, mapbed, liftedbed, unmapfile="unmapped", cstyle="l") + + if opts.cleanup: + cleanup( + chr_fasta, + unplaced_fasta, + combined_fasta, + chainfile, + unplaced_agp, + combined_fasta + ".sizes", + "unmapped", + ) + + sort([liftedbed, "-i"]) # Sort bed in place + + +def add_allmaps_plot_options(p): + p.add_argument( + "-w", "--weightsfile", default="weights.txt", help="Use weights from file" + ) + p.add_argument( + "--distance", + default="cM", + choices=distance_choices, + help="Plot markers based on distance", + ) + p.add_argument( + "--links", default=10, type=int, help="Only plot matchings more than" + ) + p.add_argument( + "--panels", default=False, action="store_true", help="Add panel labels A/B" + ) + + +def plot(args): + """ + %prog plot input.bed seqid + + Plot the matchings between the reconstructed pseudomolecules and the maps. + Two types of visualizations are available in one canvas: + + 1. Parallel axes, and matching markers are shown in connecting lines; + 2. Scatter plot. + """ + from ..graphics.base import ( + plt, + savefig, + normalize_axes, + set2, + panel_labels, + shorten, + ) + from ..graphics.chromosome import Chromosome, GeneticMap, HorizontalChromosome + + p = OptionParser(plot.__doc__) + p.add_argument("--title", help="Title of the plot") + add_allmaps_plot_options(p) + opts, args, iopts = p.set_image_options(args, figsize="10x6") + + if len(args) != 2: + sys.exit(not p.print_help()) + + inputbed, seqid = args + pf = inputbed.rsplit(".", 1)[0] + bedfile = pf + ".lifted.bed" + agpfile = pf + ".agp" + weightsfile = opts.weightsfile + links = opts.links + + function = get_function(opts.distance) + cc = Map(bedfile, function=function) + allseqids = cc.seqids + mapnames = cc.mapnames + weights = Weights(weightsfile, mapnames) + assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids) + + s = Scaffold(seqid, cc) + mlgs = [k for k, v in s.mlg_counts.items() if v >= links] + while not mlgs: + links //= 2 + logger.error("No markers to plot, --links reset to %d", links) + mlgs = [k for k, v in s.mlg_counts.items() if v >= links] + + mlgsizes = {} + for mlg in mlgs: + mm = cc.extract_mlg(mlg) + mlgsize = max(function(x) for x in mm) + mlgsizes[mlg] = mlgsize + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + bbox = dict(boxstyle="round", fc="darkslategray", ec="darkslategray") + if opts.title: + root.text(0.5, 0.95, opts.title, color="w", bbox=bbox, size=16) + ax1 = fig.add_axes([0, 0, 0.5, 1]) + ax2 = fig.add_axes([0.5, 0, 0.5, 1]) + + # Find the layout first + ystart, ystop = 0.9, 0.1 + L = Layout(mlgsizes) + coords = L.coords + + tip = 0.02 + marker_pos = {} + # Palette + colors = dict((mapname, set2[i % len(set2)]) for i, mapname in enumerate(mapnames)) + colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs) + + rhos = {} + # Parallel coordinates + for mlg, (x, y1, y2) in coords.items(): + mm = cc.extract_mlg(mlg) + markers = [(m.accn, function(m)) for m in mm] # exhaustive marker list + xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid] + mx, my = zip(*xy) + rho = spearmanr(mx, my) + rhos[mlg] = rho + flip = rho < 0 + + g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip) + extra = -3 * tip if x < 0.5 else 3 * tip + ha = "right" if x < 0.5 else "left" + mapname = mlg.split("-")[0] + tlg = shorten(mlg.replace("_", ".")) # Latex does not like underscore char + label = "{0} (w={1})".format(tlg, weights[mapname]) + ax1.text( + x + extra, + (y1 + y2) / 2, + label, + color=colors[mlg], + ha=ha, + va="center", + rotation=90, + ) + marker_pos.update(g.marker_pos) + + agp = AGP(agpfile) + agp = [x for x in agp if x.object == seqid] + chrsize = max(x.object_end for x in agp) + + # Pseudomolecules in the center + r = ystart - ystop + ratio = r / chrsize + f = lambda x: (ystart - ratio * x) + patchstart = [f(x.object_beg) for x in agp if not x.is_gap] + Chromosome(ax1, 0.5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2) + + label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0)) + ax1.text(0.5, ystart + tip, label, ha="center") + + scatter_data = defaultdict(list) + # Connecting lines + for b in s.markers: + marker_name = b.accn + if marker_name not in marker_pos: + continue + + cx = 0.5 + cy = f(b.pos) + mx = coords[b.mlg][0] + my = marker_pos[marker_name] + + extra = -tip if mx < cx else tip + extra *= 1.25 # leave boundaries for aesthetic reasons + cx += extra + mx -= extra + ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg]) + scatter_data[b.mlg].append((b.pos, function(b))) + + # Scatter plot, same data as parallel coordinates + xstart, xstop = sorted((ystart, ystop)) + f = lambda x: (xstart + ratio * x) + pp = [x.object_beg for x in agp if not x.is_gap] + patchstart = [f(x) for x in pp] + HorizontalChromosome( + ax2, xstart, xstop, ystop, height=2 * tip, patch=patchstart, lw=2 + ) + draw_gauge(ax2, xstart, chrsize) + + gap = 0.03 + ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values()) + + tlgs = [] + for mlg, mlgsize in sorted(mlgsizes.items()): + height = ratio * mlgsize + ystart -= height + xx = 0.5 + xstart / 2 + width = r / 2 + color = colors[mlg] + ax = fig.add_axes([xx, ystart, width, height]) + ypos = ystart + height / 2 + ystart -= gap + sd = scatter_data[mlg] + xx, yy = zip(*sd) + ax.vlines(pp, 0, 2 * mlgsize, colors="beige") + ax.plot(xx, yy, ".", color=color) + rho = rhos[mlg] + ax.text( + 0.5, + 1 - 0.4 * gap / height, + r"$\rho$={0:.3f}".format(rho), + ha="center", + va="top", + transform=ax.transAxes, + color="gray", + ) + tlg = shorten(mlg.replace("_", ".")) + tlgs.append((tlg, ypos, color)) + ax.set_xlim(0, chrsize) + ax.set_ylim(0, mlgsize) + ax.set_xticks([]) + while height / len(ax.get_yticks()) < 0.03 and len(ax.get_yticks()) >= 2: + ax.set_yticks(ax.get_yticks()[::2]) # Sparsify the ticks + yticklabels = [int(x) for x in ax.get_yticks()] + ax.set_yticks(yticklabels) + ax.set_yticklabels(yticklabels, family="Helvetica") + if rho < 0: + ax.invert_yaxis() + + for i, (tlg, ypos, color) in enumerate(tlgs): + ha = "center" + if len(tlgs) > 4: + ha = "right" if i % 2 else "left" + root.text(0.5, ypos, tlg, color=color, rotation=90, ha=ha, va="center") + + if opts.panels: + labels = ((0.04, 0.96, "A"), (0.48, 0.96, "B")) + panel_labels(root, labels) + + normalize_axes(ax1, ax2, root) + image_name = seqid + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + plt.close(fig) + return image_name + + +def plotall(xargs): + """ + %prog plotall input.bed + + Plot the matchings between the reconstructed pseudomolecules and the maps. + This command will plot each reconstructed object (non-singleton). + """ + p = OptionParser(plotall.__doc__) + add_allmaps_plot_options(p) + opts, args, iopts = p.set_image_options(xargs, figsize="10x6") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (inputbed,) = args + pf = inputbed.rsplit(".", 1)[0] + agpfile = pf + ".chr.agp" + agp = AGP(agpfile) + objects = [ob for ob, lines in agp.iter_object()] + for seqid in natsorted(objects): + plot(xargs + [seqid]) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/allpaths.py b/jcvi/assembly/allpaths.py new file mode 100644 index 00000000..e95c3e18 --- /dev/null +++ b/jcvi/assembly/allpaths.py @@ -0,0 +1,530 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Subroutines to aid ALLPATHS-LG assembly. +""" +import os.path as op +import sys + +from struct import pack, unpack +from itertools import islice + +import numpy as np + +from ..formats.base import BaseFile +from ..apps.grid import Jobs +from ..apps.base import ( + ActionDispatcher, + OptionParser, + cleanup, + glob, + logger, + need_update, + sh, +) + +from .base import FastqNamings, Library + + +class PairsFile(BaseFile): + def __init__(self, filename): + super().__init__(filename) + + fp = open(filename, "rb") + (binwrite,) = unpack("8s", fp.read(8)) + assert binwrite == "BINWRITE" + + (self.version,) = unpack("i", fp.read(4)) + assert self.version == 1 + + (self.nreads,) = unpack("Q", fp.read(8)) + (self.nlibs,) = unpack("Q", fp.read(8)) + self.libstats = [] + self.libnames = [] + + for i in range(self.nlibs): + self.libstats.append(unpack("ii", fp.read(8))) + + (nlibs,) = unpack("Q", fp.read(8)) + assert nlibs == self.nlibs + + for i in range(self.nlibs): + (slen,) = unpack("i", fp.read(4)) + libname, nul = unpack("{0}sc".format(slen - 1), fp.read(slen)) + self.libnames.append(libname) + + (npairs,) = unpack("Q", fp.read(8)) + self.r1 = np.fromfile(fp, dtype=np.int64, count=npairs) + + (npairs2,) = unpack("Q", fp.read(8)) + assert npairs2 == npairs + self.r2 = np.fromfile(fp, dtype=np.int64, count=npairs) + + (npairsl,) = unpack("Q", fp.read(8)) + assert npairsl == npairs + self.libs = np.fromfile(fp, dtype=np.int8, count=npairs) + + assert len(fp.read()) == 0 # EOF + self.npairs = npairs + + @property + def header(self): + from jcvi.utils.cbook import percentage + + s = "Number of paired reads: {0}\n".format( + percentage(self.npairs * 2, self.nreads) + ) + s += "Libraries: {0}\n".format(", ".join(self.libnames)) + s += "LibraryStats: {0}\n".format(self.libstats) + s += "r1: {0}\n".format(self.r1) + s += "r2: {0}\n".format(self.r2) + s += "libs: {0}".format(self.libs) + return s + + def fixLibraryStats(self, sep, sd): + libstat = (sep, sd) + logger.debug("New library stat: {0}".format(libstat)) + self.libstats = [libstat] * self.nlibs + + def write(self, filename): + fw = open(filename, "wb") + fw.write(pack("8s", "BINWRITE")) + fw.write(pack("i", self.version)) + fw.write(pack("Q", self.nreads)) + fw.write(pack("Q", self.nlibs)) + for a, b in self.libstats: + fw.write(pack("ii", a, b)) + fw.write(pack("Q", self.nlibs)) + for name in self.libnames: + slen = len(name) + 1 + fw.write(pack("i", slen)) + fw.write(pack("{0}s".format(slen), name)) + fw.write(pack("Q", self.npairs)) + self.r1.tofile(fw) + fw.write(pack("Q", self.npairs)) + self.r2.tofile(fw) + fw.write(pack("Q", self.npairs)) + self.libs.tofile(fw) + logger.debug("New pairs file written to `{0}`.".format(filename)) + + +def main(): + + actions = ( + ("prepare", "prepare ALLPATHS csv files and run script"), + ("log", "prepare a log of created files"), + ("pairs", "parse ALLPATHS pairs file"), + ("dump", "export ALLPATHS fastb file to fastq"), + ("fixpairs", "fix pairs library stats"), + ("fill", "run FillFragments on `frag_reads_corr.fastb`"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def dump(args): + """ + %prog dump fastbfile + + Export ALLPATHS fastb file to fastq file. Use --dir to indicate a previously + run allpaths folder. + """ + p = OptionParser(dump.__doc__) + p.add_argument("--dir", help="Working directory") + p.add_argument( + "--nosim", + default=False, + action="store_true", + help="Do not simulate qual to 50", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastbfile,) = args + d = opts.dir + if d: + from jcvi.assembly.preprocess import export_fastq + + rc = "jump" in fastbfile + export_fastq(d, fastbfile, rc=rc) + return + + sim = not opts.nosim + pf = "j" if "jump" in fastbfile else "f" + + statsfile = "{0}.lib_stats".format(pf) + cleanup(statsfile) + + cmd = "SplitReadsByLibrary READS_IN={0}".format(fastbfile) + cmd += " READS_OUT={0} QUALS=True".format(pf) + sh(cmd) + + libs = [] + fp = open(statsfile) + next(fp) + next(fp) # skip two rows + for row in fp: + if row.strip() == "": + continue + + libname = row.split()[0] + if libname == "Unpaired": + continue + + libs.append(libname) + + logger.debug("Found libraries: {0}".format(",".join(libs))) + + cmds = [] + for libname in libs: + cmd = "FastbQualbToFastq" + cmd += " HEAD_IN={0}.{1}.AB HEAD_OUT={1}".format(pf, libname) + cmd += " PAIRED=True PHRED_OFFSET=33" + if sim: + cmd += " SIMULATE_QUALS=True" + if pf == "j": + cmd += " FLIP=True" + + cmds.append((cmd,)) + + m = Jobs(target=sh, args=cmds) + m.run() + + for libname in libs: + cmd = "mv {0}.A.fastq {0}.1.fastq".format(libname) + sh(cmd) + cmd = "mv {0}.B.fastq {0}.2.fastq".format(libname) + sh(cmd) + + +def fixpairs(args): + """ + %prog fixpairs pairsfile sep sd + + Fix pairs library stats. This is sometime useful to modify library stats, + for example, the separation between paired reads after importing the data. + """ + p = OptionParser(fixpairs.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + pairsfile, sep, sd = args + newpairsfile = pairsfile.rsplit(".", 1)[0] + ".new.pairs" + sep = int(sep) + sd = int(sd) + + p = PairsFile(pairsfile) + p.fixLibraryStats(sep, sd) + p.write(newpairsfile) + + +def fill(args): + """ + %prog fill frag_reads_corr.fastb + + Run FillFragments on `frag_reads_corr.fastb`. + """ + p = OptionParser(fill.__doc__) + p.add_argument( + "--stretch", + default=3, + type=int, + help="MAX_STRETCH to pass to FillFragments", + ) + p.set_cpus() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastb,) = args + assert fastb == "frag_reads_corr.fastb" + + pcfile = "frag_reads_corr.k28.pc.info" + nthreads = " NUM_THREADS={0}".format(opts.cpus) + maxstretch = " MAX_STRETCH={0}".format(opts.stretch) + if need_update(fastb, pcfile): + cmd = "PathReads READS_IN=frag_reads_corr" + cmd += nthreads + sh(cmd) + + filledfastb = "filled_reads.fastb" + if need_update(pcfile, filledfastb): + cmd = "FillFragments PAIRS_OUT=frag_reads_corr_cpd" + cmd += " PRECORRECT_LIBSTATS=True" + cmd += maxstretch + cmd += nthreads + sh(cmd) + + filledfasta = "filled_reads.fasta" + if need_update(filledfastb, filledfasta): + cmd = "Fastb2Fasta IN=filled_reads.fastb OUT=filled_reads.fasta" + sh(cmd) + + +def extract_pairs(fastqfile, p1fw, p2fw, fragsfw, p, suffix=False): + """ + Take fastqfile and array of pair ID, extract adjacent pairs to outfile. + Perform check on numbers when done. p1fw, p2fw is a list of file handles, + each for one end. p is a Pairs instance. + """ + fp = open(fastqfile) + current_id = 0 + npairs = nfrags = 0 + for x, lib in zip(p.r1, p.libs): + while current_id != x: + fragsfw.writelines(islice(fp, 4)) # Exhaust the iterator + current_id += 1 + nfrags += 1 + a = list(islice(fp, 4)) + b = list(islice(fp, 4)) + if suffix: + name = a[0].rstrip() + a[0] = name + "/1\n" + b[0] = name + "/2\n" + else: + b[0] = a[0] # Keep same read ID for pairs + + p1fw[lib].writelines(a) + p2fw[lib].writelines(b) + current_id += 2 + npairs += 2 + + # Write the remaining single reads + while True: + contents = list(islice(fp, 4)) + if not contents: + break + fragsfw.writelines(contents) + nfrags += 1 + + logger.debug( + "A total of {0} paired reads written to `{1}`.".format( + npairs, ",".join(x.name for x in p1fw + p2fw) + ) + ) + logger.debug( + "A total of {0} single reads written to `{1}`.".format(nfrags, fragsfw.name) + ) + + # Validate the numbers + expected_pairs = 2 * p.npairs + expected_frags = p.nreads - 2 * p.npairs + assert npairs == expected_pairs, "Expect {0} paired reads, got {1} instead".format( + expected_pairs, npairs + ) + assert nfrags == expected_frags, "Expect {0} single reads, got {1} instead".format( + expected_frags, nfrags + ) + + +def pairs(args): + """ + %prog pairs pairsfile + + Parse ALLPATHS pairs file, and write pairs IDs and single read IDs in + respective ids files: e.g. `lib1.pairs.fastq`, `lib2.pairs.fastq`, + and single `frags.fastq` (with single reads from lib1/2). + """ + from jcvi.assembly.preprocess import run_FastbAndQualb2Fastq + + p = OptionParser(pairs.__doc__) + p.add_argument( + "--header", + default=False, + action="store_true", + help="Print header only", + ) + p.add_argument( + "--suffix", + default=False, + action="store_true", + help="Add suffix /1, /2 to read names", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + pairsfile, fastqfile = args + pf = op.basename(fastqfile).split(".")[0] + p = PairsFile(pairsfile) + print(p.header, file=sys.stderr) + + if opts.header: + return + + if fastqfile.endswith(".fastb"): + fastbfile = fastqfile + fastqfile = fastbfile.replace(".fastb", ".fastq") + run_FastbAndQualb2Fastq(infile=fastbfile, outfile=fastqfile) + + p1file = "{0}.1.corr.fastq" + p2file = "{0}.2.corr.fastq" + fragsfile = "{0}.corr.fastq" + p1fw = [open(p1file.format(x), "w") for x in p.libnames] + p2fw = [open(p2file.format(x), "w") for x in p.libnames] + fragsfw = open(fragsfile.format(pf), "w") + + extract_pairs(fastqfile, p1fw, p2fw, fragsfw, p, suffix=opts.suffix) + + +ALLPATHSRUN = r""" +ulimit -s 100000 + +if [ -f frag_reads_orig.fastb ] +then + echo "'frag_reads_orig.fastb' exists. Skip loading reads." +else + mkdir -p $PWD/read_cache + echo "Load reads ..." + CacheLibs.pl CACHE_DIR=$PWD/read_cache \ + ACTION=Add IN_LIBS_CSV=in_libs.csv + if [ -f in_groups_33.csv ] + then + CacheGroups.pl CACHE_DIR=$PWD/read_cache \ + ACTION=Add IN_GROUP_CSV=in_groups_33.csv PHRED_64=0 HOSTS='{1}' + fi + if [ -f in_groups_64.csv ] + then + CacheGroups.pl CACHE_DIR=$PWD/read_cache \ + ACTION=Add IN_GROUP_CSV=in_groups_64.csv PHRED_64=1 HOSTS='{1}' + fi + PrepareAllPathsInputs.pl DATA_DIR=$PWD PLOIDY={0} HOSTS='{1}' +fi + +RunAllPathsLG PRE=. REFERENCE_NAME=. OVERWRITE=True HAPLOIDIFY=False \ + DATA_SUBDIR=. RUN=allpaths SUBDIR=run THREADS={1} MIN_CONTIG=200 \ + {2} | tee allpaths.log""" + + +def prepare(args): + """ + %prog prepare "B. oleracea" *.fastq + + Scan input fastq files (see below) and create `in_groups.csv` and + `in_libs.csv`. The species name does not really matter. + """ + from jcvi.utils.table import write_csv + from jcvi.formats.base import write_file + from jcvi.formats.fastq import guessoffset, readlen + + p = OptionParser(prepare.__doc__ + FastqNamings) + p.add_argument( + "--corr", + default=False, + action="store_true", + help="Extra parameters for corrected data", + ) + p.add_argument( + "--norun", + default=False, + action="store_true", + help="Don't write `run.sh` script", + ) + p.add_argument("--ploidy", default="2", choices=("1", "2"), help="Ploidy") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + organism_name = args[0] + project_name = "".join(x[0] for x in organism_name.split()).upper() + fnames = sorted(glob("*.fastq*") if len(args) == 1 else args[1:]) + for x in fnames: + assert op.exists(x), "File `{0}` not found.".format(x) + + groupheader = "group_name library_name file_name".split() + libheader = ( + "library_name project_name organism_name type paired " + "frag_size frag_stddev insert_size insert_stddev read_orientation " + "genomic_start genomic_end".split() + ) + groups_33 = [] + groups_64 = [] + libs = [] + for file_name in fnames: + offset = guessoffset([file_name]) + group_name = op.basename(file_name).split(".")[0] + library_name = "-".join(group_name.split("-")[:2]) + + # Handle paired files and convert to wildcard + if ".1." in file_name: + file_name = file_name.replace(".1.", ".?.") + elif ".2." in file_name: + continue + + groupscontents = groups_64 if offset == 64 else groups_33 + groupscontents.append((group_name, library_name, file_name)) + if library_name not in libs: + libs.append(library_name) + + libcontents = [] + for library_name in libs: + L = Library(library_name) + size = L.size + stddev = L.stddev + type = L.type + paired = L.paired + read_orientation = L.read_orientation + + size = size or "" + stddev = stddev or "" + frag_size = size if type == "fragment" else "" + frag_stddev = stddev if type == "fragment" else "" + insert_size = size if type != "fragment" else "" + insert_stddev = stddev if type != "fragment" else "" + genomic_start, genomic_end = "", "" + libcontents.append( + ( + library_name, + project_name, + organism_name, + type, + paired, + frag_size, + frag_stddev, + insert_size, + insert_stddev, + read_orientation, + genomic_start, + genomic_end, + ) + ) + + for groups, csvfile in ( + (groups_33, "in_groups_33.csv"), + (groups_64, "in_groups_64.csv"), + (groups_33 + groups_64, "in_groups.csv"), + ): + if not groups: + continue + write_csv(groupheader, groups, filename=csvfile, tee=True) + logger.debug("`{0}` created (# of groups = {1}).".format(csvfile, len(groups))) + + write_csv(libheader, libcontents, filename="in_libs.csv", tee=True) + logger.debug("`in_libs.csv` created (# of libs = {0}).".format(len(libcontents))) + + runfile = "run.sh" + + # ALLPATHS stalls on reads over 250bp + max_rd_len = max(readlen([f]) for f in fnames) + extra = "CLOSE_UNIPATH_GAPS=False " if max_rd_len > 200 else "" + if opts.corr: + extra += "FE_NUM_CYCLES=1 EC_K=28 FE_QUAL_CEIL_RADIUS=0" + extra += " REMOVE_DODGY_READS_FRAG=False FE_MAX_KMER_FREQ_TO_MARK=1" + + if not opts.norun: + contents = ALLPATHSRUN.format(opts.ploidy, opts.cpus, extra) + write_file(runfile, contents) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/automaton.py b/jcvi/assembly/automaton.py new file mode 100644 index 00000000..08ca0153 --- /dev/null +++ b/jcvi/assembly/automaton.py @@ -0,0 +1,482 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Automate genome assembly by iterating assembly on a set of files, individually. +""" +import os +import os.path as op +import sys + +from more_itertools import grouper + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + get_abs_path, + glob, + iglob, + logger, + mkdir, + need_update, + sh, +) +from ..formats.base import LineFile, write_file +from ..formats.fastq import first, pairspf + + +class Meta(object): + def __init__(self, fastq, guess=True): + # Note the guesswork is largely based on JIRA LIMS naming convention + self.fastq = fastq.strip() + self.suffix = op.splitext(fastq)[-1] + if ".1." in fastq or ".2." in fastq: + paired = ".1" if ".1." in fastq else ".2" + elif "_R1_" in fastq or "_R2_" in fastq: + paired = ".1" if "_R1_" in fastq else ".2" + else: + paired = "" + self.paired = paired + if guess: + self.guess() + + def __str__(self): + return "\t".join((self.genome, self.tag, self.fastq)) + + @property + def link(self): + linkname = "{0}{1}{2}".format(self.tag, self.paired, self.suffix) + return op.join(self.genome, linkname) + + def make_link(self, firstN=0): + mkdir(self.genome) + if firstN > 0: + first([str(firstN), self.fastq, "--outfile={0}".format(self.link)]) + return + + if op.islink(self.link): + os.unlink(self.link) + os.symlink(get_abs_path(self.fastq), self.link) + + def guess(self): + # Try to guess library info based on file name + # SUBAC47-MP-IL73-1_CGGAAT_L001_R1_filtered.fastq + basename = op.basename(self.fastq) + baseparts = basename.split("-") + self.genome = baseparts[0] + self.tag = baseparts[1] + + if self.genome.endswith("BP"): + self.genome, bp = self.genome[:-5], self.genome[-5:-2] + self.tag = "-".join((self.tag, bp)) # 500BP + + +class MetaFile(LineFile): + def __init__(self, filename): + super().__init__(filename) + fp = open(filename) + for row in fp: + genome, tag, fastq = row.split() + m = Meta(fastq, guess=False) + m.genome, m.tag = genome, tag + self.append(m) + + self.sort(key=lambda x: (x.genome, x.tag, x.fastq)) + + +def main(): + + actions = ( + ("prepare", "parse list of FASTQ files and prepare input"), + ("pairs", "estimate insert sizes for input files"), + ("contamination", "remove contaminated reads"), + ("allpaths", "run automated ALLPATHS"), + ("spades", "run automated SPADES assembly"), + ("allpathsX", "run automated ALLPATHS on list of files"), + ("soapX", "run automated SOAP on list of files"), + ("correctX", "run automated ALLPATHS correction on list of files"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def spades(args): + """ + %prog spades folder + + Run automated SPADES. + """ + from jcvi.formats.fastq import readlen + + p = OptionParser(spades.__doc__) + opts, args = p.parse_args(args) + + if len(args) == 0: + sys.exit(not p.print_help()) + + (folder,) = args + for p, pf in iter_project(folder): + rl = readlen([p[0], "--silent"]) + + # + kmers = None + if rl >= 150: + kmers = "21,33,55,77" + elif rl >= 250: + kmers = "21,33,55,77,99,127" + + cmd = "spades.py" + if kmers: + cmd += " -k {0}".format(kmers) + cmd += " --careful" + cmd += " --pe1-1 {0} --pe1-2 {1}".format(*p) + cmd += " -o {0}_spades".format(pf) + print(cmd) + + +def contamination(args): + """ + %prog contamination folder Ecoli.fasta + + Remove contaminated reads. The FASTQ files in the folder will automatically + pair and filtered against Ecoli.fasta to remove contaminants using BOWTIE2. + """ + from jcvi.apps.bowtie import align + + p = OptionParser(contamination.__doc__) + p.add_argument( + "--mapped", + default=False, + action="store_true", + help="Retain contaminated reads instead", + ) + p.set_cutoff(cutoff=800) + p.set_mateorientation(mateorientation="+-") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + folder, ecoli = args + ecoli = get_abs_path(ecoli) + tag = "--mapped" if opts.mapped else "--unmapped" + for p, pf in iter_project(folder): + align_opts = [ecoli] + p + [tag] + align_opts += ["--cutoff={0}".format(opts.cutoff), "--null"] + if opts.mateorientation: + align_opts += ["--mateorientation={0}".format(opts.mateorientation)] + align(align_opts) + + +def pairs(args): + """ + %prog pairs folder reference.fasta + + Estimate insert size distribution. Compatible with a variety of aligners, + including BOWTIE and BWA. + """ + p = OptionParser(pairs.__doc__) + p.set_firstN() + p.set_mates() + p.set_aligner() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + cwd = os.getcwd() + aligner = opts.aligner + work = "-".join(("pairs", aligner)) + mkdir(work) + + from jcvi.formats.sam import pairs as ps + + if aligner == "bowtie": + from jcvi.apps.bowtie import align + elif aligner == "bwa": + from jcvi.apps.bwa import align + + folder, ref = args + ref = get_abs_path(ref) + messages = [] + for p, prefix in iter_project(folder): + samplefq = [] + for i in range(2): + samplefq.append(op.join(work, prefix + "_{0}.first.fastq".format(i + 1))) + first([str(opts.firstN)] + [p[i]] + ["-o", samplefq[i]]) + + os.chdir(work) + align_args = [ref] + [op.basename(fq) for fq in samplefq] + outfile, logfile = align(align_args) + bedfile, stats = ps([outfile, "--rclip={0}".format(opts.rclip)]) + os.chdir(cwd) + + median = stats.median + tag = "MP" if median > 1000 else "PE" + median = str(median) + pf, sf = median[:2], median[2:] + if sf and int(sf) != 0: + pf = str(int(pf) + 1) # Get the first two effective digits + lib = "{0}-{1}".format(tag, pf + "0" * len(sf)) + for i, xp in enumerate(p): + suffix = "fastq.gz" if xp.endswith(".gz") else "fastq" + link = "{0}-{1}.{2}.{3}".format(lib, prefix.replace("-", ""), i + 1, suffix) + m = "\t".join(str(x) for x in (xp, link)) + messages.append(m) + + messages = "\n".join(messages) + write_file("f.meta", messages, tee=True) + + +def allpaths(args): + """ + %prog allpaths folder1 folder2 ... + + Run automated ALLPATHS on list of dirs. + """ + p = OptionParser(allpaths.__doc__) + p.add_argument("--ploidy", default="1", choices=("1", "2"), help="Ploidy") + opts, args = p.parse_args(args) + + if len(args) == 0: + sys.exit(not p.print_help()) + + folders = args + for pf in folders: + if not op.isdir(pf): + continue + assemble_dir( + pf, + target=["final.contigs.fasta", "final.assembly.fasta"], + ploidy=opts.ploidy, + ) + + +def prepare(args): + """ + %prog prepare jira.txt + + Parse JIRA report and prepare input. Look for all FASTQ files in the report + and get the prefix. Assign fastq to a folder and a new file name indicating + the library type (e.g. PE-500, MP-5000, etc.). + + Note that JIRA report can also be a list of FASTQ files. + """ + p = OptionParser(prepare.__doc__) + p.add_argument( + "--first", + default=0, + type=int, + help="Use only first N reads", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (jfile,) = args + metafile = jfile + ".meta" + + if need_update(jfile, metafile): + fp = open(jfile) + fastqfiles = [x.strip() for x in fp if ".fastq" in x] + metas = [Meta(x) for x in fastqfiles] + + fw = open(metafile, "w") + print("\n".join(str(x) for x in metas), file=fw) + print( + "Now modify `{0}`, and restart this script.".format(metafile), + file=sys.stderr, + ) + print("Each line is : genome library fastqfile", file=sys.stderr) + fw.close() + return + + mf = MetaFile(metafile) + for m in mf: + m.make_link(firstN=opts.first) + + +def slink(p, pf, tag, extra=None): + + mkdir(pf, overwrite=True) + cwd = os.getcwd() + os.chdir(pf) + + # Create sym-links for the input files + i = 1 + for f in sorted(p): + gz = ".gz" if f.endswith(".gz") else "" + if "PE-0" in f: + sh("ln -sf ../{0} PE-0.fastq{1}".format(f, gz)) + continue + for t in tag: + sh("ln -sf ../{0} {1}.{2}.fastq{3}".format(f, t, i, gz)) + i += 1 + + if extra: + for e in extra: + sh("ln -sf {0}".format(e)) + + os.chdir(cwd) + + +def assemble_pairs(p, pf, tag, target=["final.contigs.fasta"]): + """ + Take one pair of reads and assemble to contigs.fasta. + """ + slink(p, pf, tag) + assemble_dir(pf, target) + + +def assemble_dir(pf, target, ploidy="1"): + from jcvi.assembly.allpaths import prepare + + logger.debug("Work on {0}".format(pf)) + asm = [x.replace("final", pf) for x in target] + if not need_update(pf, asm, warn=True): + return + + cwd = os.getcwd() + os.chdir(pf) + prepare( + [pf] + + sorted(glob("*.fastq") + glob("*.fastq.gz")) + + ["--ploidy={0}".format(ploidy)] + ) + sh("./run.sh") + + for a, t in zip(asm, target): + sh("cp allpaths/ASSEMBLIES/run/{0} ../{1}".format(t, a)) + + logger.debug("Assembly finished: {0}".format(asm)) + os.chdir(cwd) + + +def correct_pairs(p, pf, tag): + """ + Take one pair of reads and correct to generate *.corr.fastq. + """ + from jcvi.assembly.preprocess import correct as cr + + logger.debug("Work on {0} ({1})".format(pf, ",".join(p))) + itag = tag[0] + cm = ".".join((pf, itag)) + targets = (cm + ".1.corr.fastq", cm + ".2.corr.fastq", pf + ".PE-0.corr.fastq") + if not need_update(p, targets, warn=True): + return + + slink(p, pf, tag) + + cwd = os.getcwd() + os.chdir(pf) + cr(sorted(glob("*.fastq") + glob("*.fastq.gz")) + ["--nofragsdedup"]) + sh("mv {0}.1.corr.fastq ../{1}".format(itag, targets[0])) + sh("mv {0}.2.corr.fastq ../{1}".format(itag, targets[1])) + sh("mv frag_reads_corr.corr.fastq ../{0}".format(targets[2])) + + logger.debug("Correction finished: {0}".format(targets)) + os.chdir(cwd) + + +def soap_trios(p, pf, tag, extra): + """ + Take one pair of reads and 'widow' reads after correction and run SOAP. + """ + from jcvi.assembly.soap import prepare + + logger.debug("Work on {0} ({1})".format(pf, ",".join(p))) + asm = "{0}.closed.scafSeq".format(pf) + if not need_update(p, asm, warn=True): + return + + slink(p, pf, tag, extra) + + cwd = os.getcwd() + os.chdir(pf) + prepare( + sorted(glob("*.fastq") + glob("*.fastq.gz")) + + ["--assemble_1st_rank_only", "-K 31"] + ) + sh("./run.sh") + sh("cp asm31.closed.scafSeq ../{0}".format(asm)) + + logger.debug("Assembly finished: {0}".format(asm)) + os.chdir(cwd) + + +def iter_project( + folder, pattern="*.fq,*.fq.gz,*.fastq,*.fastq.gz", n=2, commonprefix=True +): + # Check for paired reads and extract project id + filelist = [x for x in iglob(folder, pattern)] + for p in grouper(filelist, n): + if len(p) != n or None in p: + continue + + pp = [op.basename(x) for x in p] + pf = pairspf(pp, commonprefix=commonprefix) + yield sorted(p), pf + + +def soapX(args): + """ + %prog soapX folder tag [*.fastq] + + Run SOAP on a folder of paired reads and apply tag before assembly. + Optional *.fastq in the argument list will be symlinked in each folder and + co-assembled. + """ + p = OptionParser(soapX.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + folder, tag = args[:2] + extra = args[2:] + extra = [get_abs_path(x) for x in extra] + tag = tag.split(",") + for p, pf in iter_project(folder, n=3): + soap_trios(p, pf, tag, extra) + + +def correctX(args): + """ + %prog correctX folder tag + + Run ALLPATHS correction on a folder of paired reads and apply tag. + """ + p = OptionParser(correctX.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + folder, tag = args + tag = tag.split(",") + for p, pf in iter_project(folder): + correct_pairs(p, pf, tag) + + +def allpathsX(args): + """ + %prog allpathsX folder tag + + Run assembly on a folder of paired reads and apply tag (PE-200, PE-500). + Allow multiple tags separated by comma, e.g. PE-350,TT-1050 + """ + p = OptionParser(allpathsX.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + folder, tag = args + tag = tag.split(",") + for p, pf in iter_project(folder): + assemble_pairs(p, pf, tag) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/base.py b/jcvi/assembly/base.py new file mode 100644 index 00000000..77ff98b6 --- /dev/null +++ b/jcvi/assembly/base.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Base utilties for genome assembly related calculations and manipulations +""" +import os.path as op +import sys + +from math import log +from bisect import bisect + +import numpy as np + +from ..formats.base import must_open +from ..formats.fasta import Fasta +from ..apps.base import ActionDispatcher, OptionParser, glob + +ln2 = log(2) + + +types = {"PE": "fragment", "MP": "jumping", "TT": "jumping", "LL": "long"} +header = ("Length", "L50", "N50", "Min", "Max", "N") + +FastqNamings = """ + The naming schemes for the fastq files are. + + PE-376.fastq (paired end) + MP-3000.fastq (mate pairs) + TT-3000.fastq (mate pairs, but from 454 data, so expected to be +-) + LL-0.fastq (long reads) + + Paired reads in different files must be in the form of (note the .1. and .2.): + PE-376.1.fastq and PE-376.2.fastq to be considered + + The reads are assumed to be NOT paired if the number after the PE-, MP-, + etc. is 0. Otherwise, they are considered paired at the given distance. +""" + + +class Library(object): + """ + The sequence files define a library. + """ + + def __init__(self, library_name): + + self.library_name = library_name + if "-" in library_name: + pf, size = library_name.split("-", 1) + assert pf in types, "Library prefix must be one of {0}".format(types.keys()) + else: + pf, size = "PE", 0 + + self.size = size = int(size) + self.type = types[pf] + self.stddev = size / 6 if self.type == "jumping" else size / 9 + self.paired = 0 if size == 0 else 1 + self.read_orientation = "outward" if pf == "MP" else "inward" + self.reverse_seq = 1 if pf == "MP" else 0 + self.asm_flags = 3 if pf != "MP" else 2 + if not self.paired: + self.read_orientation = "" + + def get_lib_seq(self, wildcard, prefix, readlen, rank): + # lib_seq wildcard prefix insAvg insSdev avgReadLen hasInnieArtifact + # isRevComped useForContigging scaffRound useForGapClosing 5pWiggleRoom + # 3pWiggleRoom (used by MERACULOUS) + useForContigging = useForGapClosing = int(self.asm_flags == 3) + return ( + "lib_seq", + wildcard, + prefix, + self.size, + self.stddev, + readlen, + int(self.type == "jumping"), + self.reverse_seq, + useForContigging, + rank, + useForGapClosing, + 0, + 0, + ) + + +def get_libs(args): + from itertools import groupby + + fnames = args or glob("*.fastq*") + fnames = sorted(fnames) + for x in fnames: + assert op.exists(x), "File `{0}` not found.".format(x) + + library_name = lambda x: "-".join(op.basename(x).split(".")[0].split("-")[:2]) + libs = [(Library(x), sorted(fs)) for x, fs in groupby(fnames, key=library_name)] + + libs.sort(key=lambda x: x[0].size) + return libs + + +def calculate_A50(ctgsizes, cutoff=0, percent=50): + """ + Given an array of contig sizes, produce A50, N50, and L50 values + """ + + ctgsizes = np.array(ctgsizes, dtype=int) + ctgsizes = np.sort(ctgsizes)[::-1] + ctgsizes = ctgsizes[ctgsizes >= cutoff] + + a50 = np.cumsum(ctgsizes) + + total = np.sum(ctgsizes) + idx = bisect(a50, total * percent / 100.0) + l50 = ctgsizes[idx] + n50 = idx + 1 + + return a50, l50, n50 + + +""" +Discriminator A-statistics: + +If n reads are uniform sample of the genome of length G, +we expect k = n * delta / G to start in a region of length delta + +Use poisson distribution: +A(delta, k) = ln(prob(1-copy) / prob(2-copies)) = n * delta / G - k * ln2 +""" + + +def Astat(delta, k, G, n): + """ + delta: contig size + k: reads mapped in contig + G: total genome size + n: total reads mapped to genome + """ + return n * delta * 1.0 / G - k * ln2 + + +def main(): + + actions = (("n50", "Given FASTA or a list of contig sizes, calculate N50"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def n50(args): + """ + %prog n50 filename + + Given a file with a list of numbers denoting contig lengths, calculate N50. + Input file can be both FASTA or a list of sizes. + """ + from jcvi.graphics.histogram import loghistogram + + p = OptionParser(n50.__doc__) + p.add_argument( + "--print0", + default=False, + action="store_true", + help="Print size and L50 to stdout", + ) + + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + ctgsizes = [] + + # Guess file format + probe = open(args[0]).readline()[0] + isFasta = probe == ">" + if isFasta: + for filename in args: + f = Fasta(filename) + ctgsizes += list(b for a, b in f.itersizes()) + + else: + for row in must_open(args): + try: + ctgsize = int(float(row.split()[-1])) + except ValueError: + continue + ctgsizes.append(ctgsize) + + a50, l50, nn50 = calculate_A50(ctgsizes) + sumsize = sum(ctgsizes) + minsize = min(ctgsizes) + maxsize = max(ctgsizes) + n = len(ctgsizes) + print(", ".join(args), file=sys.stderr) + + summary = (sumsize, l50, nn50, minsize, maxsize, n) + print( + " ".join("{0}={1}".format(a, b) for a, b in zip(header, summary)), + file=sys.stderr, + ) + loghistogram(ctgsizes) + + if opts.print0: + print("\t".join(str(x) for x in (",".join(args), sumsize, l50))) + + return zip(header, summary) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/chic.c b/jcvi/assembly/chic.c new file mode 100644 index 00000000..96c3e78f --- /dev/null +++ b/jcvi/assembly/chic.c @@ -0,0 +1,14222 @@ +/* Generated by Cython 3.0.11 */ + +/* BEGIN: Cython Metadata +{ + "distutils": { + "depends": [ + "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include/numpy/arrayobject.h", + "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include/numpy/arrayscalars.h", + "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include/numpy/ndarrayobject.h", + "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include/numpy/ndarraytypes.h", + "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include/numpy/ufuncobject.h" + ], + "extra_compile_args": [ + "-O3" + ], + "include_dirs": [ + "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include" + ], + "name": "jcvi.assembly.chic", + "sources": [ + "src/jcvi/assembly/chic.pyx" + ] + }, + "module_name": "jcvi.assembly.chic" +} +END: Cython Metadata */ + +#ifndef PY_SSIZE_T_CLEAN +#define PY_SSIZE_T_CLEAN +#endif /* PY_SSIZE_T_CLEAN */ +#if defined(CYTHON_LIMITED_API) && 0 + #ifndef Py_LIMITED_API + #if CYTHON_LIMITED_API+0 > 0x03030000 + #define Py_LIMITED_API CYTHON_LIMITED_API + #else + #define Py_LIMITED_API 0x03030000 + #endif + #endif +#endif + +#include "Python.h" + + #if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyFloat_FromString(obj) PyFloat_FromString(obj) + #else + #define __Pyx_PyFloat_FromString(obj) PyFloat_FromString(obj, NULL) + #endif + + + #if PY_MAJOR_VERSION <= 2 + #define PyDict_GetItemWithError _PyDict_GetItemWithError + #endif + + + #if (PY_VERSION_HEX < 0x030700b1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030600)) && !defined(PyContextVar_Get) + #define PyContextVar_Get(var, d, v) ((d) ? ((void)(var), Py_INCREF(d), (v)[0] = (d), 0) : ((v)[0] = NULL, 0) ) + #endif + +#ifndef Py_PYTHON_H + #error Python headers needed to compile C extensions, please install development version of Python. +#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) + #error Cython requires Python 2.7+ or Python 3.3+. +#else +#if defined(CYTHON_LIMITED_API) && CYTHON_LIMITED_API +#define __PYX_EXTRA_ABI_MODULE_NAME "limited" +#else +#define __PYX_EXTRA_ABI_MODULE_NAME "" +#endif +#define CYTHON_ABI "3_0_11" __PYX_EXTRA_ABI_MODULE_NAME +#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI +#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "." +#define CYTHON_HEX_VERSION 0x03000BF0 +#define CYTHON_FUTURE_DIVISION 1 +#include +#ifndef offsetof + #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) +#endif +#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS) + #ifndef __stdcall + #define __stdcall + #endif + #ifndef __cdecl + #define __cdecl + #endif + #ifndef __fastcall + #define __fastcall + #endif +#endif +#ifndef DL_IMPORT + #define DL_IMPORT(t) t +#endif +#ifndef DL_EXPORT + #define DL_EXPORT(t) t +#endif +#define __PYX_COMMA , +#ifndef HAVE_LONG_LONG + #define HAVE_LONG_LONG +#endif +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif +#ifndef Py_HUGE_VAL + #define Py_HUGE_VAL HUGE_VAL +#endif +#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX +#if defined(GRAALVM_PYTHON) + /* For very preliminary testing purposes. Most variables are set the same as PyPy. + The existence of this section does not imply that anything works or is even tested */ + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 1 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) + #endif + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(PYPY_VERSION) + #define CYTHON_COMPILING_IN_PYPY 1 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) + #endif + #if PY_VERSION_HEX < 0x03090000 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00) + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(CYTHON_LIMITED_API) + #ifdef Py_LIMITED_API + #undef __PYX_LIMITED_VERSION_HEX + #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API + #endif + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 1 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_CLINE_IN_TRACEBACK + #define CYTHON_CLINE_IN_TRACEBACK 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 1 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #endif + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 1 + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(Py_GIL_DISABLED) || defined(Py_NOGIL) + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 1 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #ifndef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #ifndef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #ifndef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 1 + #endif + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #ifndef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #endif + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 + #endif + #ifndef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 + #endif +#else + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 1 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #ifndef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 1 + #endif + #if PY_MAJOR_VERSION < 3 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #ifndef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 1 + #endif + #ifndef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 1 + #endif + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #elif !defined(CYTHON_USE_UNICODE_WRITER) + #define CYTHON_USE_UNICODE_WRITER 1 + #endif + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #ifndef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 1 + #endif + #ifndef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6) + #endif + #ifndef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1) + #endif + #ifndef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 1 + #endif + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #endif + #if PY_VERSION_HEX < 0x030400a1 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #elif !defined(CYTHON_USE_TP_FINALIZE) + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #if PY_VERSION_HEX < 0x030600B1 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #elif !defined(CYTHON_USE_DICT_VERSIONS) + #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX < 0x030C00A5) + #endif + #if PY_VERSION_HEX < 0x030700A3 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #elif !defined(CYTHON_USE_EXC_INFO_STACK) + #define CYTHON_USE_EXC_INFO_STACK 1 + #endif + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 + #endif + #ifndef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 1 + #endif +#endif +#if !defined(CYTHON_FAST_PYCCALL) +#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) +#endif +#if !defined(CYTHON_VECTORCALL) +#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1) +#endif +#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1) +#if CYTHON_USE_PYLONG_INTERNALS + #if PY_MAJOR_VERSION < 3 + #include "longintrepr.h" + #endif + #undef SHIFT + #undef BASE + #undef MASK + #ifdef SIZEOF_VOID_P + enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; + #endif +#endif +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#ifndef __has_cpp_attribute + #define __has_cpp_attribute(x) 0 +#endif +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif +#endif +#ifndef CYTHON_UNUSED + #if defined(__cplusplus) + /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17 + * but leads to warnings with -pedantic, since it is a C++17 feature */ + #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) + #if __has_cpp_attribute(maybe_unused) + #define CYTHON_UNUSED [[maybe_unused]] + #endif + #endif + #endif +#endif +#ifndef CYTHON_UNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_UNUSED_VAR +# if defined(__cplusplus) + template void CYTHON_UNUSED_VAR( const T& ) { } +# else +# define CYTHON_UNUSED_VAR(x) (void)(x) +# endif +#endif +#ifndef CYTHON_MAYBE_UNUSED_VAR + #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x) +#endif +#ifndef CYTHON_NCP_UNUSED +# if CYTHON_COMPILING_IN_CPYTHON +# define CYTHON_NCP_UNUSED +# else +# define CYTHON_NCP_UNUSED CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_USE_CPP_STD_MOVE + #if defined(__cplusplus) && (\ + __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600)) + #define CYTHON_USE_CPP_STD_MOVE 1 + #else + #define CYTHON_USE_CPP_STD_MOVE 0 + #endif +#endif +#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) +#ifdef _MSC_VER + #ifndef _MSC_STDINT_H_ + #if _MSC_VER < 1300 + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + #else + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + #endif + #endif + #if _MSC_VER < 1300 + #ifdef _WIN64 + typedef unsigned long long __pyx_uintptr_t; + #else + typedef unsigned int __pyx_uintptr_t; + #endif + #else + #ifdef _WIN64 + typedef unsigned __int64 __pyx_uintptr_t; + #else + typedef unsigned __int32 __pyx_uintptr_t; + #endif + #endif +#else + #include + typedef uintptr_t __pyx_uintptr_t; +#endif +#ifndef CYTHON_FALLTHROUGH + #if defined(__cplusplus) + /* for clang __has_cpp_attribute(fallthrough) is true even before C++17 + * but leads to warnings with -pedantic, since it is a C++17 feature */ + #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) + #if __has_cpp_attribute(fallthrough) + #define CYTHON_FALLTHROUGH [[fallthrough]] + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_cpp_attribute(clang::fallthrough) + #define CYTHON_FALLTHROUGH [[clang::fallthrough]] + #elif __has_cpp_attribute(gnu::fallthrough) + #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] + #endif + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_attribute(fallthrough) + #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) + #else + #define CYTHON_FALLTHROUGH + #endif + #endif + #if defined(__clang__) && defined(__apple_build_version__) + #if __apple_build_version__ < 7000000 + #undef CYTHON_FALLTHROUGH + #define CYTHON_FALLTHROUGH + #endif + #endif +#endif +#ifdef __cplusplus + template + struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);}; + #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL::value) +#else + #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0) +#endif +#if CYTHON_COMPILING_IN_PYPY == 1 + #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000) +#else + #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000) +#endif +#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer)) + +#ifndef CYTHON_INLINE + #if defined(__clang__) + #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) + #elif defined(__GNUC__) + #define CYTHON_INLINE __inline__ + #elif defined(_MSC_VER) + #define CYTHON_INLINE __inline + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_INLINE inline + #else + #define CYTHON_INLINE + #endif +#endif + +#define __PYX_BUILD_PY_SSIZE_T "n" +#define CYTHON_FORMAT_SSIZE_T "z" +#if PY_MAJOR_VERSION < 3 + #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" + #define __Pyx_DefaultClassType PyClass_Type + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_BUILTIN_MODULE_NAME "builtins" + #define __Pyx_DefaultClassType PyType_Type +#if CYTHON_COMPILING_IN_LIMITED_API + static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, + PyObject *code, PyObject *c, PyObject* n, PyObject *v, + PyObject *fv, PyObject *cell, PyObject* fn, + PyObject *name, int fline, PyObject *lnos) { + PyObject *exception_table = NULL; + PyObject *types_module=NULL, *code_type=NULL, *result=NULL; + #if __PYX_LIMITED_VERSION_HEX < 0x030B0000 + PyObject *version_info; + PyObject *py_minor_version = NULL; + #endif + long minor_version = 0; + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000 + minor_version = 11; + #else + if (!(version_info = PySys_GetObject("version_info"))) goto end; + if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end; + minor_version = PyLong_AsLong(py_minor_version); + Py_DECREF(py_minor_version); + if (minor_version == -1 && PyErr_Occurred()) goto end; + #endif + if (!(types_module = PyImport_ImportModule("types"))) goto end; + if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end; + if (minor_version <= 7) { + (void)p; + result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code, + c, n, v, fn, name, fline, lnos, fv, cell); + } else if (minor_version <= 10) { + result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code, + c, n, v, fn, name, fline, lnos, fv, cell); + } else { + if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end; + result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code, + c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell); + } + end: + Py_XDECREF(code_type); + Py_XDECREF(exception_table); + Py_XDECREF(types_module); + if (type) { + PyErr_Restore(type, value, traceback); + } + return result; + } + #ifndef CO_OPTIMIZED + #define CO_OPTIMIZED 0x0001 + #endif + #ifndef CO_NEWLOCALS + #define CO_NEWLOCALS 0x0002 + #endif + #ifndef CO_VARARGS + #define CO_VARARGS 0x0004 + #endif + #ifndef CO_VARKEYWORDS + #define CO_VARKEYWORDS 0x0008 + #endif + #ifndef CO_ASYNC_GENERATOR + #define CO_ASYNC_GENERATOR 0x0200 + #endif + #ifndef CO_GENERATOR + #define CO_GENERATOR 0x0020 + #endif + #ifndef CO_COROUTINE + #define CO_COROUTINE 0x0080 + #endif +#elif PY_VERSION_HEX >= 0x030B0000 + static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, + PyObject *code, PyObject *c, PyObject* n, PyObject *v, + PyObject *fv, PyObject *cell, PyObject* fn, + PyObject *name, int fline, PyObject *lnos) { + PyCodeObject *result; + PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); + if (!empty_bytes) return NULL; + result = + #if PY_VERSION_HEX >= 0x030C0000 + PyUnstable_Code_NewWithPosOnlyArgs + #else + PyCode_NewWithPosOnlyArgs + #endif + (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes); + Py_DECREF(empty_bytes); + return result; + } +#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#endif +#endif +#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE) + #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type) +#else + #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type)) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is) + #define __Pyx_Py_Is(x, y) Py_Is(x, y) +#else + #define __Pyx_Py_Is(x, y) ((x) == (y)) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone) + #define __Pyx_Py_IsNone(ob) Py_IsNone(ob) +#else + #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue) + #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob) +#else + #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse) + #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob) +#else + #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False) +#endif +#define __Pyx_NoneAsNull(obj) (__Pyx_Py_IsNone(obj) ? NULL : (obj)) +#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o) +#else + #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o) +#endif +#ifndef CO_COROUTINE + #define CO_COROUTINE 0x80 +#endif +#ifndef CO_ASYNC_GENERATOR + #define CO_ASYNC_GENERATOR 0x200 +#endif +#ifndef Py_TPFLAGS_CHECKTYPES + #define Py_TPFLAGS_CHECKTYPES 0 +#endif +#ifndef Py_TPFLAGS_HAVE_INDEX + #define Py_TPFLAGS_HAVE_INDEX 0 +#endif +#ifndef Py_TPFLAGS_HAVE_NEWBUFFER + #define Py_TPFLAGS_HAVE_NEWBUFFER 0 +#endif +#ifndef Py_TPFLAGS_HAVE_FINALIZE + #define Py_TPFLAGS_HAVE_FINALIZE 0 +#endif +#ifndef Py_TPFLAGS_SEQUENCE + #define Py_TPFLAGS_SEQUENCE 0 +#endif +#ifndef Py_TPFLAGS_MAPPING + #define Py_TPFLAGS_MAPPING 0 +#endif +#ifndef METH_STACKLESS + #define METH_STACKLESS 0 +#endif +#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL) + #ifndef METH_FASTCALL + #define METH_FASTCALL 0x80 + #endif + typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs); + typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args, + Py_ssize_t nargs, PyObject *kwnames); +#else + #if PY_VERSION_HEX >= 0x030d00A4 + # define __Pyx_PyCFunctionFast PyCFunctionFast + # define __Pyx_PyCFunctionFastWithKeywords PyCFunctionFastWithKeywords + #else + # define __Pyx_PyCFunctionFast _PyCFunctionFast + # define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords + #endif +#endif +#if CYTHON_METH_FASTCALL + #define __Pyx_METH_FASTCALL METH_FASTCALL + #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast + #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords +#else + #define __Pyx_METH_FASTCALL METH_VARARGS + #define __Pyx_PyCFunction_FastCall PyCFunction + #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords +#endif +#if CYTHON_VECTORCALL + #define __pyx_vectorcallfunc vectorcallfunc + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET + #define __Pyx_PyVectorcall_NARGS(n) PyVectorcall_NARGS((size_t)(n)) +#elif CYTHON_BACKPORT_VECTORCALL + typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args, + size_t nargsf, PyObject *kwnames); + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1)) + #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)) +#else + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET 0 + #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(n)) +#endif +#if PY_MAJOR_VERSION >= 0x030900B1 +#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_CheckExact(func) +#else +#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_Check(func) +#endif +#define __Pyx_CyOrPyCFunction_Check(func) PyCFunction_Check(func) +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) (((PyCFunctionObject*)(func))->m_ml->ml_meth) +#elif !CYTHON_COMPILING_IN_LIMITED_API +#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) PyCFunction_GET_FUNCTION(func) +#endif +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CyOrPyCFunction_GET_FLAGS(func) (((PyCFunctionObject*)(func))->m_ml->ml_flags) +static CYTHON_INLINE PyObject* __Pyx_CyOrPyCFunction_GET_SELF(PyObject *func) { + return (__Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_STATIC) ? NULL : ((PyCFunctionObject*)func)->m_self; +} +#endif +static CYTHON_INLINE int __Pyx__IsSameCFunction(PyObject *func, void *cfunc) { +#if CYTHON_COMPILING_IN_LIMITED_API + return PyCFunction_Check(func) && PyCFunction_GetFunction(func) == (PyCFunction) cfunc; +#else + return PyCFunction_Check(func) && PyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; +#endif +} +#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCFunction(func, cfunc) +#if __PYX_LIMITED_VERSION_HEX < 0x030900B1 + #define __Pyx_PyType_FromModuleAndSpec(m, s, b) ((void)m, PyType_FromSpecWithBases(s, b)) + typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *); +#else + #define __Pyx_PyType_FromModuleAndSpec(m, s, b) PyType_FromModuleAndSpec(m, s, b) + #define __Pyx_PyCMethod PyCMethod +#endif +#ifndef METH_METHOD + #define METH_METHOD 0x200 +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) + #define PyObject_Malloc(s) PyMem_Malloc(s) + #define PyObject_Free(p) PyMem_Free(p) + #define PyObject_Realloc(p) PyMem_Realloc(p) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) +#else + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyThreadState_Current PyThreadState_Get() +#elif !CYTHON_FAST_THREAD_STATE + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#elif PY_VERSION_HEX >= 0x030d00A1 + #define __Pyx_PyThreadState_Current PyThreadState_GetUnchecked() +#elif PY_VERSION_HEX >= 0x03060000 + #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() +#elif PY_VERSION_HEX >= 0x03000000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#else + #define __Pyx_PyThreadState_Current _PyThreadState_Current +#endif +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op) +{ + void *result; + result = PyModule_GetState(op); + if (!result) + Py_FatalError("Couldn't find the module state"); + return result; +} +#endif +#define __Pyx_PyObject_GetSlot(obj, name, func_ctype) __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype) +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((func_ctype) PyType_GetSlot((type), Py_##name)) +#else + #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((type)->name) +#endif +#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT) +#include "pythread.h" +#define Py_tss_NEEDS_INIT 0 +typedef int Py_tss_t; +static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) { + *key = PyThread_create_key(); + return 0; +} +static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) { + Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t)); + *key = Py_tss_NEEDS_INIT; + return key; +} +static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) { + PyObject_Free(key); +} +static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) { + return *key != Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) { + PyThread_delete_key(*key); + *key = Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) { + return PyThread_set_key_value(*key, value); +} +static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { + return PyThread_get_key_value(*key); +} +#endif +#if PY_MAJOR_VERSION < 3 + #if CYTHON_COMPILING_IN_PYPY + #if PYPY_VERSION_NUM < 0x07030600 + #if defined(__cplusplus) && __cplusplus >= 201402L + [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]] + #elif defined(__GNUC__) || defined(__clang__) + __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))) + #elif defined(_MSC_VER) + __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")) + #endif + static CYTHON_INLINE int PyGILState_Check(void) { + return 0; + } + #else // PYPY_VERSION_NUM < 0x07030600 + #endif // PYPY_VERSION_NUM < 0x07030600 + #else + static CYTHON_INLINE int PyGILState_Check(void) { + PyThreadState * tstate = _PyThreadState_Current; + return tstate && (tstate == PyGILState_GetThisThreadState()); + } + #endif +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 || defined(_PyDict_NewPresized) +#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) +#else +#define __Pyx_PyDict_NewPresized(n) PyDict_New() +#endif +#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && PY_VERSION_HEX < 0x030d0000 && CYTHON_USE_UNICODE_INTERNALS +#define __Pyx_PyDict_GetItemStrWithError(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash) +static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) { + PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name); + if (res == NULL) PyErr_Clear(); + return res; +} +#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000) +#define __Pyx_PyDict_GetItemStrWithError PyDict_GetItemWithError +#define __Pyx_PyDict_GetItemStr PyDict_GetItem +#else +static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) { +#if CYTHON_COMPILING_IN_PYPY + return PyDict_GetItem(dict, name); +#else + PyDictEntry *ep; + PyDictObject *mp = (PyDictObject*) dict; + long hash = ((PyStringObject *) name)->ob_shash; + assert(hash != -1); + ep = (mp->ma_lookup)(mp, name, hash); + if (ep == NULL) { + return NULL; + } + return ep->me_value; +#endif +} +#define __Pyx_PyDict_GetItemStr PyDict_GetItem +#endif +#if CYTHON_USE_TYPE_SLOTS + #define __Pyx_PyType_GetFlags(tp) (((PyTypeObject *)tp)->tp_flags) + #define __Pyx_PyType_HasFeature(type, feature) ((__Pyx_PyType_GetFlags(type) & (feature)) != 0) + #define __Pyx_PyObject_GetIterNextFunc(obj) (Py_TYPE(obj)->tp_iternext) +#else + #define __Pyx_PyType_GetFlags(tp) (PyType_GetFlags((PyTypeObject *)tp)) + #define __Pyx_PyType_HasFeature(type, feature) PyType_HasFeature(type, feature) + #define __Pyx_PyObject_GetIterNextFunc(obj) PyIter_Next +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v) +#else + #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v) +#endif +#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000 +#define __Pyx_PyHeapTypeObject_GC_Del(obj) {\ + PyTypeObject *type = Py_TYPE((PyObject*)obj);\ + assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\ + PyObject_GC_Del(obj);\ + Py_DECREF(type);\ +} +#else +#define __Pyx_PyHeapTypeObject_GC_Del(obj) PyObject_GC_Del(obj) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define CYTHON_PEP393_ENABLED 1 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GetLength(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((void)u, 1114111U) + #define __Pyx_PyUnicode_KIND(u) ((void)u, (0)) + #define __Pyx_PyUnicode_DATA(u) ((void*)u) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)k, PyUnicode_ReadChar((PyObject*)(d), i)) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GetLength(u)) +#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) + #define CYTHON_PEP393_ENABLED 1 + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_READY(op) (0) + #else + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + 0 : _PyUnicode_Ready((PyObject *)(op))) + #endif + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) + #define __Pyx_PyUnicode_KIND(u) ((int)PyUnicode_KIND(u)) + #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) + #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, (Py_UCS4) ch) + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) + #else + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) + #else + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) + #endif + #endif +#else + #define CYTHON_PEP393_ENABLED 0 + #define PyUnicode_1BYTE_KIND 1 + #define PyUnicode_2BYTE_KIND 2 + #define PyUnicode_4BYTE_KIND 4 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U) + #define __Pyx_PyUnicode_KIND(u) ((int)sizeof(Py_UNICODE)) + #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #if !defined(PyUnicode_DecodeUnicodeEscape) + #define PyUnicode_DecodeUnicodeEscape(s, size, errors) PyUnicode_Decode(s, size, "unicode_escape", errors) + #endif + #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500) + #undef PyUnicode_Contains + #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) + #endif + #if !defined(PyByteArray_Check) + #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) + #endif + #if !defined(PyObject_Format) + #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) + #endif +#endif +#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) +#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) +#else + #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) +#endif +#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) + #define PyObject_ASCII(o) PyObject_Repr(o) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBaseString_Type PyUnicode_Type + #define PyStringObject PyUnicodeObject + #define PyString_Type PyUnicode_Type + #define PyString_Check PyUnicode_Check + #define PyString_CheckExact PyUnicode_CheckExact +#ifndef PyObject_Unicode + #define PyObject_Unicode PyObject_Str +#endif +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) +#endif +#if CYTHON_COMPILING_IN_CPYTHON + #define __Pyx_PySequence_ListKeepNew(obj)\ + (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj)) +#else + #define __Pyx_PySequence_ListKeepNew(obj) PySequence_List(obj) +#endif +#ifndef PySet_CheckExact + #define PySet_CheckExact(obj) __Pyx_IS_TYPE(obj, &PySet_Type) +#endif +#if PY_VERSION_HEX >= 0x030900A4 + #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size) +#else + #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size) +#endif +#if CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i) + #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq) + #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0)) + #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0)) + #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o) + #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o) + #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o) + #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o) + #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o) +#else + #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i) + #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq) + #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v) + #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v) + #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o) + #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o) + #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o) + #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o) + #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o) +#endif +#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 + #define __Pyx_PyImport_AddModuleRef(name) PyImport_AddModuleRef(name) +#else + static CYTHON_INLINE PyObject *__Pyx_PyImport_AddModuleRef(const char *name) { + PyObject *module = PyImport_AddModule(name); + Py_XINCREF(module); + return module; + } +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyIntObject PyLongObject + #define PyInt_Type PyLong_Type + #define PyInt_Check(op) PyLong_Check(op) + #define PyInt_CheckExact(op) PyLong_CheckExact(op) + #define __Pyx_Py3Int_Check(op) PyLong_Check(op) + #define __Pyx_Py3Int_CheckExact(op) PyLong_CheckExact(op) + #define PyInt_FromString PyLong_FromString + #define PyInt_FromUnicode PyLong_FromUnicode + #define PyInt_FromLong PyLong_FromLong + #define PyInt_FromSize_t PyLong_FromSize_t + #define PyInt_FromSsize_t PyLong_FromSsize_t + #define PyInt_AsLong PyLong_AsLong + #define PyInt_AS_LONG PyLong_AS_LONG + #define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask + #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask + #define PyNumber_Int PyNumber_Long +#else + #define __Pyx_Py3Int_Check(op) (PyLong_Check(op) || PyInt_Check(op)) + #define __Pyx_Py3Int_CheckExact(op) (PyLong_CheckExact(op) || PyInt_CheckExact(op)) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBoolObject PyLongObject +#endif +#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY + #ifndef PyUnicode_InternFromString + #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) + #endif +#endif +#if PY_VERSION_HEX < 0x030200A4 + typedef long Py_hash_t; + #define __Pyx_PyInt_FromHash_t PyInt_FromLong + #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t +#else + #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t + #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t +#endif +#if CYTHON_USE_ASYNC_SLOTS + #if PY_VERSION_HEX >= 0x030500B1 + #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods + #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) + #else + #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) + #endif +#else + #define __Pyx_PyType_AsAsync(obj) NULL +#endif +#ifndef __Pyx_PyAsyncMethodsStruct + typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; + } __Pyx_PyAsyncMethodsStruct; +#endif + +#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) + #if !defined(_USE_MATH_DEFINES) + #define _USE_MATH_DEFINES + #endif +#endif +#include +#ifdef NAN +#define __PYX_NAN() ((float) NAN) +#else +static CYTHON_INLINE float __PYX_NAN() { + float value; + memset(&value, 0xFF, sizeof(value)); + return value; +} +#endif +#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) +#define __Pyx_truncl trunc +#else +#define __Pyx_truncl truncl +#endif + +#define __PYX_MARK_ERR_POS(f_index, lineno) \ + { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; } +#define __PYX_ERR(f_index, lineno, Ln_error) \ + { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; } + +#ifdef CYTHON_EXTERN_C + #undef __PYX_EXTERN_C + #define __PYX_EXTERN_C CYTHON_EXTERN_C +#elif defined(__PYX_EXTERN_C) + #ifdef _MSC_VER + #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") + #else + #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. + #endif +#else + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#define __PYX_HAVE__jcvi__assembly__chic +#define __PYX_HAVE_API__jcvi__assembly__chic +/* Early includes */ +#include +#include + + /* Using NumPy API declarations from "numpy/__init__.cython-30.pxd" */ + +#include "numpy/arrayobject.h" +#include "numpy/ndarrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/arrayscalars.h" +#include "numpy/ufuncobject.h" +#include +#include "pythread.h" + + #if CYTHON_COMPILING_IN_PYPY + #ifdef _MSC_VER + #pragma message ("This module uses CPython specific internals of 'array.array', which are not available in PyPy.") + #else + #warning This module uses CPython specific internals of 'array.array', which are not available in PyPy. + #endif + #endif + +#ifdef _OPENMP +#include +#endif /* _OPENMP */ + +#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) +#define CYTHON_WITHOUT_ASSERTIONS +#endif + +typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; + const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; + +#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8) +#define __PYX_DEFAULT_STRING_ENCODING "" +#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString +#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#define __Pyx_uchar_cast(c) ((unsigned char)c) +#define __Pyx_long_cast(x) ((long)x) +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ + (sizeof(type) < sizeof(Py_ssize_t)) ||\ + (sizeof(type) > sizeof(Py_ssize_t) &&\ + likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX) &&\ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ + v == (type)PY_SSIZE_T_MIN))) ||\ + (sizeof(type) == sizeof(Py_ssize_t) &&\ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX))) ) +static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) { + return (size_t) i < (size_t) limit; +} +#if defined (__cplusplus) && __cplusplus >= 201103L + #include + #define __Pyx_sst_abs(value) std::abs(value) +#elif SIZEOF_INT >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) abs(value) +#elif SIZEOF_LONG >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) labs(value) +#elif defined (_MSC_VER) + #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define __Pyx_sst_abs(value) llabs(value) +#elif defined (__GNUC__) + #define __Pyx_sst_abs(value) __builtin_llabs(value) +#else + #define __Pyx_sst_abs(value) ((value<0) ? -value : value) +#endif +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s); +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char*); +#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) +#define __Pyx_PyBytes_FromString PyBytes_FromString +#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#else + #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize +#endif +#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyObject_AsWritableString(s) ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableSString(s) ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) +#define __Pyx_PyUnicode_FromOrdinal(o) PyUnicode_FromOrdinal((int)o) +#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode +#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) +#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b); +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*); +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); +#define __Pyx_PySequence_Tuple(obj)\ + (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); +static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); +#if CYTHON_ASSUME_SAFE_MACROS +#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) +#else +#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) +#endif +#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) +#else +#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) +#endif +#if CYTHON_USE_PYLONG_INTERNALS + #if PY_VERSION_HEX >= 0x030C00A7 + #ifndef _PyLong_SIGN_MASK + #define _PyLong_SIGN_MASK 3 + #endif + #ifndef _PyLong_NON_SIZE_BITS + #define _PyLong_NON_SIZE_BITS 3 + #endif + #define __Pyx_PyLong_Sign(x) (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK) + #define __Pyx_PyLong_IsNeg(x) ((__Pyx_PyLong_Sign(x) & 2) != 0) + #define __Pyx_PyLong_IsNonNeg(x) (!__Pyx_PyLong_IsNeg(x)) + #define __Pyx_PyLong_IsZero(x) (__Pyx_PyLong_Sign(x) & 1) + #define __Pyx_PyLong_IsPos(x) (__Pyx_PyLong_Sign(x) == 0) + #define __Pyx_PyLong_CompactValueUnsigned(x) (__Pyx_PyLong_Digits(x)[0]) + #define __Pyx_PyLong_DigitCount(x) ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS)) + #define __Pyx_PyLong_SignedDigitCount(x)\ + ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x)) + #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue) + #define __Pyx_PyLong_IsCompact(x) PyUnstable_Long_IsCompact((PyLongObject*) x) + #define __Pyx_PyLong_CompactValue(x) PyUnstable_Long_CompactValue((PyLongObject*) x) + #else + #define __Pyx_PyLong_IsCompact(x) (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS)) + #define __Pyx_PyLong_CompactValue(x) ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0]) + #endif + typedef Py_ssize_t __Pyx_compact_pylong; + typedef size_t __Pyx_compact_upylong; + #else + #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0) + #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0) + #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0) + #define __Pyx_PyLong_IsPos(x) (Py_SIZE(x) > 0) + #define __Pyx_PyLong_CompactValueUnsigned(x) ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0]) + #define __Pyx_PyLong_DigitCount(x) __Pyx_sst_abs(Py_SIZE(x)) + #define __Pyx_PyLong_SignedDigitCount(x) Py_SIZE(x) + #define __Pyx_PyLong_IsCompact(x) (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1) + #define __Pyx_PyLong_CompactValue(x)\ + ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0])) + typedef sdigit __Pyx_compact_pylong; + typedef digit __Pyx_compact_upylong; + #endif + #if PY_VERSION_HEX >= 0x030C00A5 + #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit) + #else + #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->ob_digit) + #endif +#endif +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII +#include +static int __Pyx_sys_getdefaultencoding_not_ascii; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + PyObject* ascii_chars_u = NULL; + PyObject* ascii_chars_b = NULL; + const char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + if (strcmp(default_encoding_c, "ascii") == 0) { + __Pyx_sys_getdefaultencoding_not_ascii = 0; + } else { + char ascii_chars[128]; + int c; + for (c = 0; c < 128; c++) { + ascii_chars[c] = (char) c; + } + __Pyx_sys_getdefaultencoding_not_ascii = 1; + ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); + if (!ascii_chars_u) goto bad; + ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); + if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { + PyErr_Format( + PyExc_ValueError, + "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", + default_encoding_c); + goto bad; + } + Py_DECREF(ascii_chars_u); + Py_DECREF(ascii_chars_b); + } + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + Py_XDECREF(ascii_chars_u); + Py_XDECREF(ascii_chars_b); + return -1; +} +#endif +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) +#else +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#include +static char* __PYX_DEFAULT_STRING_ENCODING; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1); + if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; + strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + return -1; +} +#endif +#endif + + +/* Test for GCC > 2.95 */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) + #define likely(x) __builtin_expect(!!(x), 1) + #define unlikely(x) __builtin_expect(!!(x), 0) +#else /* !__GNUC__ or GCC < 2.95 */ + #define likely(x) (x) + #define unlikely(x) (x) +#endif /* __GNUC__ */ +static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } + +#if !CYTHON_USE_MODULE_STATE +static PyObject *__pyx_m = NULL; +#endif +static int __pyx_lineno; +static int __pyx_clineno = 0; +static const char * __pyx_cfilenm = __FILE__; +static const char *__pyx_filename; + +/* Header.proto */ +#if !defined(CYTHON_CCOMPLEX) + #if defined(__cplusplus) + #define CYTHON_CCOMPLEX 1 + #elif (defined(_Complex_I) && !defined(_MSC_VER)) || ((defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_COMPLEX__) && !defined(_MSC_VER)) + #define CYTHON_CCOMPLEX 1 + #else + #define CYTHON_CCOMPLEX 0 + #endif +#endif +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + #include + #else + #include + #endif +#endif +#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__) + #undef _Complex_I + #define _Complex_I 1.0fj +#endif + +/* #### Code section: filename_table ### */ + +static const char *__pyx_f[] = { + "src/jcvi/assembly/chic.pyx", + "__init__.cython-30.pxd", + "contextvars.pxd", + "array.pxd", + "type.pxd", + "bool.pxd", + "complex.pxd", +}; +/* #### Code section: utility_code_proto_before_types ### */ +/* ForceInitThreads.proto */ +#ifndef __PYX_FORCE_INIT_THREADS + #define __PYX_FORCE_INIT_THREADS 0 +#endif + +/* BufferFormatStructs.proto */ +struct __Pyx_StructField_; +#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0) +typedef struct { + const char* name; + struct __Pyx_StructField_* fields; + size_t size; + size_t arraysize[8]; + int ndim; + char typegroup; + char is_unsigned; + int flags; +} __Pyx_TypeInfo; +typedef struct __Pyx_StructField_ { + __Pyx_TypeInfo* type; + const char* name; + size_t offset; +} __Pyx_StructField; +typedef struct { + __Pyx_StructField* field; + size_t parent_offset; +} __Pyx_BufFmt_StackElem; +typedef struct { + __Pyx_StructField root; + __Pyx_BufFmt_StackElem* head; + size_t fmt_offset; + size_t new_count, enc_count; + size_t struct_alignment; + int is_complex; + char enc_type; + char new_packmode; + char enc_packmode; + char is_valid_array; +} __Pyx_BufFmt_Context; + +/* #### Code section: numeric_typedefs ### */ + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":787 + * # in Cython to enable them only on the right systems. + * + * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t + */ +typedef npy_int8 __pyx_t_5numpy_int8_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":788 + * + * ctypedef npy_int8 int8_t + * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< + * ctypedef npy_int32 int32_t + * ctypedef npy_int64 int64_t + */ +typedef npy_int16 __pyx_t_5numpy_int16_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":789 + * ctypedef npy_int8 int8_t + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< + * ctypedef npy_int64 int64_t + * #ctypedef npy_int96 int96_t + */ +typedef npy_int32 __pyx_t_5numpy_int32_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":790 + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t + * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< + * #ctypedef npy_int96 int96_t + * #ctypedef npy_int128 int128_t + */ +typedef npy_int64 __pyx_t_5numpy_int64_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":794 + * #ctypedef npy_int128 int128_t + * + * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t + */ +typedef npy_uint8 __pyx_t_5numpy_uint8_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":795 + * + * ctypedef npy_uint8 uint8_t + * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< + * ctypedef npy_uint32 uint32_t + * ctypedef npy_uint64 uint64_t + */ +typedef npy_uint16 __pyx_t_5numpy_uint16_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":796 + * ctypedef npy_uint8 uint8_t + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< + * ctypedef npy_uint64 uint64_t + * #ctypedef npy_uint96 uint96_t + */ +typedef npy_uint32 __pyx_t_5numpy_uint32_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":797 + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t + * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< + * #ctypedef npy_uint96 uint96_t + * #ctypedef npy_uint128 uint128_t + */ +typedef npy_uint64 __pyx_t_5numpy_uint64_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":801 + * #ctypedef npy_uint128 uint128_t + * + * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< + * ctypedef npy_float64 float64_t + * #ctypedef npy_float80 float80_t + */ +typedef npy_float32 __pyx_t_5numpy_float32_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":802 + * + * ctypedef npy_float32 float32_t + * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< + * #ctypedef npy_float80 float80_t + * #ctypedef npy_float128 float128_t + */ +typedef npy_float64 __pyx_t_5numpy_float64_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":809 + * ctypedef double complex complex128_t + * + * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< + * ctypedef npy_ulonglong ulonglong_t + * + */ +typedef npy_longlong __pyx_t_5numpy_longlong_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":810 + * + * ctypedef npy_longlong longlong_t + * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< + * + * ctypedef npy_intp intp_t + */ +typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":812 + * ctypedef npy_ulonglong ulonglong_t + * + * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< + * ctypedef npy_uintp uintp_t + * + */ +typedef npy_intp __pyx_t_5numpy_intp_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":813 + * + * ctypedef npy_intp intp_t + * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< + * + * ctypedef npy_double float_t + */ +typedef npy_uintp __pyx_t_5numpy_uintp_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":815 + * ctypedef npy_uintp uintp_t + * + * ctypedef npy_double float_t # <<<<<<<<<<<<<< + * ctypedef npy_double double_t + * ctypedef npy_longdouble longdouble_t + */ +typedef npy_double __pyx_t_5numpy_float_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":816 + * + * ctypedef npy_double float_t + * ctypedef npy_double double_t # <<<<<<<<<<<<<< + * ctypedef npy_longdouble longdouble_t + * + */ +typedef npy_double __pyx_t_5numpy_double_t; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":817 + * ctypedef npy_double float_t + * ctypedef npy_double double_t + * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< + * + * ctypedef float complex cfloat_t + */ +typedef npy_longdouble __pyx_t_5numpy_longdouble_t; +/* #### Code section: complex_type_declarations ### */ +/* Declarations.proto */ +#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #ifdef __cplusplus + typedef ::std::complex< float > __pyx_t_float_complex; + #else + typedef float _Complex __pyx_t_float_complex; + #endif +#else + typedef struct { float real, imag; } __pyx_t_float_complex; +#endif +static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float); + +/* Declarations.proto */ +#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #ifdef __cplusplus + typedef ::std::complex< double > __pyx_t_double_complex; + #else + typedef double _Complex __pyx_t_double_complex; + #endif +#else + typedef struct { double real, imag; } __pyx_t_double_complex; +#endif +static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double); + +/* Declarations.proto */ +#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #ifdef __cplusplus + typedef ::std::complex< long double > __pyx_t_long_double_complex; + #else + typedef long double _Complex __pyx_t_long_double_complex; + #endif +#else + typedef struct { long double real, imag; } __pyx_t_long_double_complex; +#endif +static CYTHON_INLINE __pyx_t_long_double_complex __pyx_t_long_double_complex_from_parts(long double, long double); + +/* #### Code section: type_declarations ### */ + +/*--- Type declarations ---*/ +#ifndef _ARRAYARRAY_H +struct arrayobject; +typedef struct arrayobject arrayobject; +#endif +struct __pyx_opt_args_7cpython_11contextvars_get_value; +struct __pyx_opt_args_7cpython_11contextvars_get_value_no_default; + +/* "cpython/contextvars.pxd":112 + * + * + * cdef inline object get_value(var, default_value=None): # <<<<<<<<<<<<<< + * """Return a new reference to the value of the context variable, + * or the default value of the context variable, + */ +struct __pyx_opt_args_7cpython_11contextvars_get_value { + int __pyx_n; + PyObject *default_value; +}; + +/* "cpython/contextvars.pxd":129 + * + * + * cdef inline object get_value_no_default(var, default_value=None): # <<<<<<<<<<<<<< + * """Return a new reference to the value of the context variable, + * or the provided default value if no such value was found. + */ +struct __pyx_opt_args_7cpython_11contextvars_get_value_no_default { + int __pyx_n; + PyObject *default_value; +}; + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1113 + * + * # Iterator API added in v1.6 + * ctypedef int (*NpyIter_IterNextFunc)(NpyIter* it) noexcept nogil # <<<<<<<<<<<<<< + * ctypedef void (*NpyIter_GetMultiIndexFunc)(NpyIter* it, npy_intp* outcoords) noexcept nogil + * + */ +typedef int (*__pyx_t_5numpy_NpyIter_IterNextFunc)(NpyIter *); + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1114 + * # Iterator API added in v1.6 + * ctypedef int (*NpyIter_IterNextFunc)(NpyIter* it) noexcept nogil + * ctypedef void (*NpyIter_GetMultiIndexFunc)(NpyIter* it, npy_intp* outcoords) noexcept nogil # <<<<<<<<<<<<<< + * + * cdef extern from "numpy/arrayobject.h": + */ +typedef void (*__pyx_t_5numpy_NpyIter_GetMultiIndexFunc)(NpyIter *, npy_intp *); + +/* "jcvi/assembly/chic.pyx":25 + * + * + * ctypedef np.int INT # <<<<<<<<<<<<<< + * DEF LIMIT = 10000000 + * DEF BB = 12 + */ +typedef PyObject *__pyx_t_4jcvi_8assembly_4chic_INT; +/* #### Code section: utility_code_proto ### */ + +/* --- Runtime support code (head) --- */ +/* Refnanny.proto */ +#ifndef CYTHON_REFNANNY + #define CYTHON_REFNANNY 0 +#endif +#if CYTHON_REFNANNY + typedef struct { + void (*INCREF)(void*, PyObject*, Py_ssize_t); + void (*DECREF)(void*, PyObject*, Py_ssize_t); + void (*GOTREF)(void*, PyObject*, Py_ssize_t); + void (*GIVEREF)(void*, PyObject*, Py_ssize_t); + void* (*SetupContext)(const char*, Py_ssize_t, const char*); + void (*FinishContext)(void**); + } __Pyx_RefNannyAPIStruct; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); + #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; +#ifdef WITH_THREAD + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + if (acquire_gil) {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ + PyGILState_Release(__pyx_gilstate_save);\ + } else {\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ + } + #define __Pyx_RefNannyFinishContextNogil() {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __Pyx_RefNannyFinishContext();\ + PyGILState_Release(__pyx_gilstate_save);\ + } +#else + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__)) + #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext() +#endif + #define __Pyx_RefNannyFinishContextNogil() {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __Pyx_RefNannyFinishContext();\ + PyGILState_Release(__pyx_gilstate_save);\ + } + #define __Pyx_RefNannyFinishContext()\ + __Pyx_RefNanny->FinishContext(&__pyx_refnanny) + #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_XINCREF(r) do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0) + #define __Pyx_XDECREF(r) do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0) + #define __Pyx_XGOTREF(r) do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0) + #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0) +#else + #define __Pyx_RefNannyDeclarations + #define __Pyx_RefNannySetupContext(name, acquire_gil) + #define __Pyx_RefNannyFinishContextNogil() + #define __Pyx_RefNannyFinishContext() + #define __Pyx_INCREF(r) Py_INCREF(r) + #define __Pyx_DECREF(r) Py_DECREF(r) + #define __Pyx_GOTREF(r) + #define __Pyx_GIVEREF(r) + #define __Pyx_XINCREF(r) Py_XINCREF(r) + #define __Pyx_XDECREF(r) Py_XDECREF(r) + #define __Pyx_XGOTREF(r) + #define __Pyx_XGIVEREF(r) +#endif +#define __Pyx_Py_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; Py_XDECREF(tmp);\ + } while (0) +#define __Pyx_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_XDECREF(tmp);\ + } while (0) +#define __Pyx_DECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_DECREF(tmp);\ + } while (0) +#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) +#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) + +/* PyErrExceptionMatches.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) +static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); +#else +#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) +#endif + +/* PyThreadStateGet.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; +#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; +#if PY_VERSION_HEX >= 0x030C00A6 +#define __Pyx_PyErr_Occurred() (__pyx_tstate->current_exception != NULL) +#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL) +#else +#define __Pyx_PyErr_Occurred() (__pyx_tstate->curexc_type != NULL) +#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->curexc_type) +#endif +#else +#define __Pyx_PyThreadState_declare +#define __Pyx_PyThreadState_assign +#define __Pyx_PyErr_Occurred() (PyErr_Occurred() != NULL) +#define __Pyx_PyErr_CurrentExceptionType() PyErr_Occurred() +#endif + +/* PyErrFetchRestore.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) +#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6 +#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) +#else +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#endif +#else +#define __Pyx_PyErr_Clear() PyErr_Clear() +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) +#endif + +/* PyObjectGetAttrStr.proto */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) +#endif + +/* PyObjectGetAttrStrNoError.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name); + +/* GetBuiltinName.proto */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name); + +/* GetTopmostException.proto */ +#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE +static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate); +#endif + +/* SaveResetException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +#else +#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb) +#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb) +#endif + +/* GetException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb) +static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#else +static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb); +#endif + +/* PyObjectCall.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + +/* RaiseException.proto */ +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); + +/* TupleAndListFromArray.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n); +static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n); +#endif + +/* IncludeStringH.proto */ +#include + +/* BytesEquals.proto */ +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); + +/* UnicodeEquals.proto */ +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); + +/* fastcall.proto */ +#if CYTHON_AVOID_BORROWED_REFS + #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i) +#elif CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i) +#else + #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i) +#endif +#if CYTHON_AVOID_BORROWED_REFS + #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg) + #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg) +#else + #define __Pyx_Arg_NewRef_VARARGS(arg) arg + #define __Pyx_Arg_XDECREF_VARARGS(arg) +#endif +#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds) +#define __Pyx_KwValues_VARARGS(args, nargs) NULL +#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s) +#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw) +#if CYTHON_METH_FASTCALL + #define __Pyx_Arg_FASTCALL(args, i) args[i] + #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds) + #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs)) + static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 + CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues); + #else + #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw) + #endif + #define __Pyx_Arg_NewRef_FASTCALL(arg) arg /* no-op, __Pyx_Arg_FASTCALL is direct and this needs + to have the same reference counting */ + #define __Pyx_Arg_XDECREF_FASTCALL(arg) +#else + #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS + #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS + #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS + #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS + #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS + #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg) + #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS +#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start) +#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start) +#else +#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop) +#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop) +#endif + +/* RaiseDoubleKeywords.proto */ +static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); + +/* ParseKeywords.proto */ +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues, + PyObject **argnames[], + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, + const char* function_name); + +/* RaiseArgTupleInvalid.proto */ +static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, + Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); + +/* ArgTypeTest.proto */ +#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ + ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\ + __Pyx__ArgTypeTest(obj, type, name, exact)) +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); + +/* IsLittleEndian.proto */ +static CYTHON_INLINE int __Pyx_Is_Little_Endian(void); + +/* BufferFormatCheck.proto */ +static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts); +static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, + __Pyx_BufFmt_StackElem* stack, + __Pyx_TypeInfo* type); + +/* BufferGetAndValidate.proto */ +#define __Pyx_GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)\ + ((obj == Py_None || obj == NULL) ?\ + (__Pyx_ZeroBuffer(buf), 0) :\ + __Pyx__GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)) +static int __Pyx__GetBufferAndValidate(Py_buffer* buf, PyObject* obj, + __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack); +static void __Pyx_ZeroBuffer(Py_buffer* buf); +static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info); +static Py_ssize_t __Pyx_minusones[] = { -1, -1, -1, -1, -1, -1, -1, -1 }; +static Py_ssize_t __Pyx_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +/* GetItemInt.proto */ +#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\ + (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\ + __Pyx_GetItemInt_Generic(o, to_py_func(i)))) +#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, + int is_list, int wraparound, int boundscheck); + +/* PyFunctionFastCall.proto */ +#if CYTHON_FAST_PYCALL +#if !CYTHON_VECTORCALL +#define __Pyx_PyFunction_FastCall(func, args, nargs)\ + __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs); +#endif +#define __Pyx_BUILD_ASSERT_EXPR(cond)\ + (sizeof(char [1 - 2*!(cond)]) - 1) +#ifndef Py_MEMBER_SIZE +#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) +#endif +#if !CYTHON_VECTORCALL +#if PY_VERSION_HEX >= 0x03080000 + #include "frameobject.h" +#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif + #define __Pxy_PyFrame_Initialize_Offsets() + #define __Pyx_PyFrame_GetLocalsplus(frame) ((frame)->f_localsplus) +#else + static size_t __pyx_pyframe_localsplus_offset = 0; + #include "frameobject.h" + #define __Pxy_PyFrame_Initialize_Offsets()\ + ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\ + (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus))) + #define __Pyx_PyFrame_GetLocalsplus(frame)\ + (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset)) +#endif +#endif +#endif + +/* PyObjectCallMethO.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +/* PyObjectFastCall.proto */ +#define __Pyx_PyObject_FastCall(func, args, nargs) __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL) +static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs); + +/* PyObjectCallOneArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + +/* ObjectGetItem.proto */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject *key); +#else +#define __Pyx_PyObject_GetItem(obj, key) PyObject_GetItem(obj, key) +#endif + +/* ExtTypeTest.proto */ +static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); + +/* PyDictVersioning.proto */ +#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1) +#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\ + (version_var) = __PYX_GET_DICT_VERSION(dict);\ + (cache_var) = (value); +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\ + static PY_UINT64_T __pyx_dict_version = 0;\ + static PyObject *__pyx_dict_cached_value = NULL;\ + if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\ + (VAR) = __pyx_dict_cached_value;\ + } else {\ + (VAR) = __pyx_dict_cached_value = (LOOKUP);\ + __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\ + }\ +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj); +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj); +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version); +#else +#define __PYX_GET_DICT_VERSION(dict) (0) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var) +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP); +#endif + +/* GetModuleGlobalName.proto */ +#if CYTHON_USE_DICT_VERSIONS +#define __Pyx_GetModuleGlobalName(var, name) do {\ + static PY_UINT64_T __pyx_dict_version = 0;\ + static PyObject *__pyx_dict_cached_value = NULL;\ + (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\ + (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\ + __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ +} while(0) +#define __Pyx_GetModuleGlobalNameUncached(var, name) do {\ + PY_UINT64_T __pyx_dict_version;\ + PyObject *__pyx_dict_cached_value;\ + (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ +} while(0) +static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value); +#else +#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name) +#define __Pyx_GetModuleGlobalNameUncached(var, name) (var) = __Pyx__GetModuleGlobalName(name) +static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name); +#endif + +#define __Pyx_BufPtrFull1d(type, buf, i0, s0, o0) (type)(__Pyx_BufPtrFull1d_imp(buf, i0, s0, o0)) +static CYTHON_INLINE void* __Pyx_BufPtrFull1d_imp(void* buf, Py_ssize_t i0, Py_ssize_t s0, Py_ssize_t o0); +#define __Pyx_BufPtrStrided2d(type, buf, i0, s0, i1, s1) (type)((char*)buf + i0 * s0 + i1 * s1) +#define __Pyx_BufPtrStrided1d(type, buf, i0, s0) (type)((char*)buf + i0 * s0) +#define __Pyx_BufPtrStrided3d(type, buf, i0, s0, i1, s1, i2, s2) (type)((char*)buf + i0 * s0 + i1 * s1 + i2 * s2) +/* TypeImport.proto */ +#ifndef __PYX_HAVE_RT_ImportType_proto_3_0_11 +#define __PYX_HAVE_RT_ImportType_proto_3_0_11 +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +#include +#endif +#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L +#define __PYX_GET_STRUCT_ALIGNMENT_3_0_11(s) alignof(s) +#else +#define __PYX_GET_STRUCT_ALIGNMENT_3_0_11(s) sizeof(void*) +#endif +enum __Pyx_ImportType_CheckSize_3_0_11 { + __Pyx_ImportType_CheckSize_Error_3_0_11 = 0, + __Pyx_ImportType_CheckSize_Warn_3_0_11 = 1, + __Pyx_ImportType_CheckSize_Ignore_3_0_11 = 2 +}; +static PyTypeObject *__Pyx_ImportType_3_0_11(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_0_11 check_size); +#endif + +/* Import.proto */ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); + +/* ImportDottedModule.proto */ +static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple); +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple); +#endif + +/* ImportDottedModuleRelFirst.proto */ +static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple); + +/* IncludeStructmemberH.proto */ +#include + +/* FixUpExtensionType.proto */ +#if CYTHON_USE_TYPE_SPECS +static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type); +#endif + +/* FetchSharedCythonModule.proto */ +static PyObject *__Pyx_FetchSharedCythonABIModule(void); + +/* FetchCommonType.proto */ +#if !CYTHON_USE_TYPE_SPECS +static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type); +#else +static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases); +#endif + +/* PyMethodNew.proto */ +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { + PyObject *typesModule=NULL, *methodType=NULL, *result=NULL; + CYTHON_UNUSED_VAR(typ); + if (!self) + return __Pyx_NewRef(func); + typesModule = PyImport_ImportModule("types"); + if (!typesModule) return NULL; + methodType = PyObject_GetAttrString(typesModule, "MethodType"); + Py_DECREF(typesModule); + if (!methodType) return NULL; + result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL); + Py_DECREF(methodType); + return result; +} +#elif PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { + CYTHON_UNUSED_VAR(typ); + if (!self) + return __Pyx_NewRef(func); + return PyMethod_New(func, self); +} +#else + #define __Pyx_PyMethod_New PyMethod_New +#endif + +/* PyVectorcallFastCallDict.proto */ +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw); +#endif + +/* CythonFunctionShared.proto */ +#define __Pyx_CyFunction_USED +#define __Pyx_CYFUNCTION_STATICMETHOD 0x01 +#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02 +#define __Pyx_CYFUNCTION_CCLASS 0x04 +#define __Pyx_CYFUNCTION_COROUTINE 0x08 +#define __Pyx_CyFunction_GetClosure(f)\ + (((__pyx_CyFunctionObject *) (f))->func_closure) +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_CyFunction_GetClassObj(f)\ + (((__pyx_CyFunctionObject *) (f))->func_classobj) +#else + #define __Pyx_CyFunction_GetClassObj(f)\ + ((PyObject*) ((PyCMethodObject *) (f))->mm_class) +#endif +#define __Pyx_CyFunction_SetClassObj(f, classobj)\ + __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj)) +#define __Pyx_CyFunction_Defaults(type, f)\ + ((type *)(((__pyx_CyFunctionObject *) (f))->defaults)) +#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\ + ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g) +typedef struct { +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject_HEAD + PyObject *func; +#elif PY_VERSION_HEX < 0x030900B1 + PyCFunctionObject func; +#else + PyCMethodObject func; +#endif +#if CYTHON_BACKPORT_VECTORCALL + __pyx_vectorcallfunc func_vectorcall; +#endif +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API + PyObject *func_weakreflist; +#endif + PyObject *func_dict; + PyObject *func_name; + PyObject *func_qualname; + PyObject *func_doc; + PyObject *func_globals; + PyObject *func_code; + PyObject *func_closure; +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + PyObject *func_classobj; +#endif + void *defaults; + int defaults_pyobjects; + size_t defaults_size; + int flags; + PyObject *defaults_tuple; + PyObject *defaults_kwdict; + PyObject *(*defaults_getter)(PyObject *); + PyObject *func_annotations; + PyObject *func_is_coroutine; +} __pyx_CyFunctionObject; +#undef __Pyx_CyOrPyCFunction_Check +#define __Pyx_CyFunction_Check(obj) __Pyx_TypeCheck(obj, __pyx_CyFunctionType) +#define __Pyx_CyOrPyCFunction_Check(obj) __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type) +#define __Pyx_CyFunction_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_CyFunctionType) +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc); +#undef __Pyx_IsSameCFunction +#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCyOrCFunction(func, cfunc) +static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml, + int flags, PyObject* qualname, + PyObject *closure, + PyObject *module, PyObject *globals, + PyObject* code); +static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj); +static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m, + size_t size, + int pyobjects); +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m, + PyObject *tuple); +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m, + PyObject *dict); +static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, + PyObject *dict); +static int __pyx_CyFunction_init(PyObject *module); +#if CYTHON_METH_FASTCALL +static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +#if CYTHON_BACKPORT_VECTORCALL +#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall) +#else +#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall) +#endif +#endif + +/* CythonFunction.proto */ +static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, + int flags, PyObject* qualname, + PyObject *closure, + PyObject *module, PyObject *globals, + PyObject* code); + +/* CLineInTraceback.proto */ +#ifdef CYTHON_CLINE_IN_TRACEBACK +#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) +#else +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); +#endif + +/* CodeObjectCache.proto */ +#if !CYTHON_COMPILING_IN_LIMITED_API +typedef struct { + PyCodeObject* code_object; + int code_line; +} __Pyx_CodeObjectCacheEntry; +struct __Pyx_CodeObjectCache { + int count; + int max_count; + __Pyx_CodeObjectCacheEntry* entries; +}; +static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); +static PyCodeObject *__pyx_find_code_object(int code_line); +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); +#endif + +/* AddTraceback.proto */ +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename); + +/* ArrayAPI.proto */ +#ifndef _ARRAYARRAY_H +#define _ARRAYARRAY_H +typedef struct arraydescr { + int typecode; + int itemsize; + PyObject * (*getitem)(struct arrayobject *, Py_ssize_t); + int (*setitem)(struct arrayobject *, Py_ssize_t, PyObject *); +#if PY_MAJOR_VERSION >= 3 + char *formats; +#endif +} arraydescr; +struct arrayobject { + PyObject_HEAD + Py_ssize_t ob_size; + union { + char *ob_item; + float *as_floats; + double *as_doubles; + int *as_ints; + unsigned int *as_uints; + unsigned char *as_uchars; + signed char *as_schars; + char *as_chars; + unsigned long *as_ulongs; + long *as_longs; +#if PY_MAJOR_VERSION >= 3 + unsigned long long *as_ulonglongs; + long long *as_longlongs; +#endif + short *as_shorts; + unsigned short *as_ushorts; + Py_UNICODE *as_pyunicodes; + void *as_voidptr; + } data; + Py_ssize_t allocated; + struct arraydescr *ob_descr; + PyObject *weakreflist; +#if PY_MAJOR_VERSION >= 3 + int ob_exports; +#endif +}; +#ifndef NO_NEWARRAY_INLINE +static CYTHON_INLINE PyObject * newarrayobject(PyTypeObject *type, Py_ssize_t size, + struct arraydescr *descr) { + arrayobject *op; + size_t nbytes; + if (size < 0) { + PyErr_BadInternalCall(); + return NULL; + } + nbytes = size * descr->itemsize; + if (nbytes / descr->itemsize != (size_t)size) { + return PyErr_NoMemory(); + } + op = (arrayobject *) type->tp_alloc(type, 0); + if (op == NULL) { + return NULL; + } + op->ob_descr = descr; + op->allocated = size; + op->weakreflist = NULL; + __Pyx_SET_SIZE(op, size); + if (size <= 0) { + op->data.ob_item = NULL; + } + else { + op->data.ob_item = PyMem_NEW(char, nbytes); + if (op->data.ob_item == NULL) { + Py_DECREF(op); + return PyErr_NoMemory(); + } + } + return (PyObject *) op; +} +#else +PyObject* newarrayobject(PyTypeObject *type, Py_ssize_t size, + struct arraydescr *descr); +#endif +static CYTHON_INLINE int resize(arrayobject *self, Py_ssize_t n) { + void *items = (void*) self->data.ob_item; + PyMem_Resize(items, char, (size_t)(n * self->ob_descr->itemsize)); + if (items == NULL) { + PyErr_NoMemory(); + return -1; + } + self->data.ob_item = (char*) items; + __Pyx_SET_SIZE(self, n); + self->allocated = n; + return 0; +} +static CYTHON_INLINE int resize_smart(arrayobject *self, Py_ssize_t n) { + void *items = (void*) self->data.ob_item; + Py_ssize_t newsize; + if (n < self->allocated && n*4 > self->allocated) { + __Pyx_SET_SIZE(self, n); + return 0; + } + newsize = n + (n / 2) + 1; + if (newsize <= n) { + PyErr_NoMemory(); + return -1; + } + PyMem_Resize(items, char, (size_t)(newsize * self->ob_descr->itemsize)); + if (items == NULL) { + PyErr_NoMemory(); + return -1; + } + self->data.ob_item = (char*) items; + __Pyx_SET_SIZE(self, n); + self->allocated = newsize; + return 0; +} +#endif + +/* BufferStructDeclare.proto */ +typedef struct { + Py_ssize_t shape, strides, suboffsets; +} __Pyx_Buf_DimInfo; +typedef struct { + size_t refcount; + Py_buffer pybuffer; +} __Pyx_Buffer; +typedef struct { + __Pyx_Buffer *rcbuffer; + char *data; + __Pyx_Buf_DimInfo diminfo[8]; +} __Pyx_LocalBuf_ND; + +#if PY_MAJOR_VERSION < 3 + static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags); + static void __Pyx_ReleaseBuffer(Py_buffer *view); +#else + #define __Pyx_GetBuffer PyObject_GetBuffer + #define __Pyx_ReleaseBuffer PyBuffer_Release +#endif + + +/* GCCDiagnostics.proto */ +#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) +#define __Pyx_HAS_GCC_DIAGNOSTIC +#endif + +/* RealImag.proto */ +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + #define __Pyx_CREAL(z) ((z).real()) + #define __Pyx_CIMAG(z) ((z).imag()) + #else + #define __Pyx_CREAL(z) (__real__(z)) + #define __Pyx_CIMAG(z) (__imag__(z)) + #endif +#else + #define __Pyx_CREAL(z) ((z).real) + #define __Pyx_CIMAG(z) ((z).imag) +#endif +#if defined(__cplusplus) && CYTHON_CCOMPLEX\ + && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103) + #define __Pyx_SET_CREAL(z,x) ((z).real(x)) + #define __Pyx_SET_CIMAG(z,y) ((z).imag(y)) +#else + #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x) + #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y) +#endif + +/* Arithmetic.proto */ +#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #define __Pyx_c_eq_float(a, b) ((a)==(b)) + #define __Pyx_c_sum_float(a, b) ((a)+(b)) + #define __Pyx_c_diff_float(a, b) ((a)-(b)) + #define __Pyx_c_prod_float(a, b) ((a)*(b)) + #define __Pyx_c_quot_float(a, b) ((a)/(b)) + #define __Pyx_c_neg_float(a) (-(a)) + #ifdef __cplusplus + #define __Pyx_c_is_zero_float(z) ((z)==(float)0) + #define __Pyx_c_conj_float(z) (::std::conj(z)) + #if 1 + #define __Pyx_c_abs_float(z) (::std::abs(z)) + #define __Pyx_c_pow_float(a, b) (::std::pow(a, b)) + #endif + #else + #define __Pyx_c_is_zero_float(z) ((z)==0) + #define __Pyx_c_conj_float(z) (conjf(z)) + #if 1 + #define __Pyx_c_abs_float(z) (cabsf(z)) + #define __Pyx_c_pow_float(a, b) (cpowf(a, b)) + #endif + #endif +#else + static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex); + static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex); + #if 1 + static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex); + #endif +#endif + +/* Arithmetic.proto */ +#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #define __Pyx_c_eq_double(a, b) ((a)==(b)) + #define __Pyx_c_sum_double(a, b) ((a)+(b)) + #define __Pyx_c_diff_double(a, b) ((a)-(b)) + #define __Pyx_c_prod_double(a, b) ((a)*(b)) + #define __Pyx_c_quot_double(a, b) ((a)/(b)) + #define __Pyx_c_neg_double(a) (-(a)) + #ifdef __cplusplus + #define __Pyx_c_is_zero_double(z) ((z)==(double)0) + #define __Pyx_c_conj_double(z) (::std::conj(z)) + #if 1 + #define __Pyx_c_abs_double(z) (::std::abs(z)) + #define __Pyx_c_pow_double(a, b) (::std::pow(a, b)) + #endif + #else + #define __Pyx_c_is_zero_double(z) ((z)==0) + #define __Pyx_c_conj_double(z) (conj(z)) + #if 1 + #define __Pyx_c_abs_double(z) (cabs(z)) + #define __Pyx_c_pow_double(a, b) (cpow(a, b)) + #endif + #endif +#else + static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex); + static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex); + #if 1 + static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex); + #endif +#endif + +/* Arithmetic.proto */ +#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #define __Pyx_c_eq_long__double(a, b) ((a)==(b)) + #define __Pyx_c_sum_long__double(a, b) ((a)+(b)) + #define __Pyx_c_diff_long__double(a, b) ((a)-(b)) + #define __Pyx_c_prod_long__double(a, b) ((a)*(b)) + #define __Pyx_c_quot_long__double(a, b) ((a)/(b)) + #define __Pyx_c_neg_long__double(a) (-(a)) + #ifdef __cplusplus + #define __Pyx_c_is_zero_long__double(z) ((z)==(long double)0) + #define __Pyx_c_conj_long__double(z) (::std::conj(z)) + #if 1 + #define __Pyx_c_abs_long__double(z) (::std::abs(z)) + #define __Pyx_c_pow_long__double(a, b) (::std::pow(a, b)) + #endif + #else + #define __Pyx_c_is_zero_long__double(z) ((z)==0) + #define __Pyx_c_conj_long__double(z) (conjl(z)) + #if 1 + #define __Pyx_c_abs_long__double(z) (cabsl(z)) + #define __Pyx_c_pow_long__double(a, b) (cpowl(a, b)) + #endif + #endif +#else + static CYTHON_INLINE int __Pyx_c_eq_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_sum_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_diff_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_prod_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_quot_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_neg_long__double(__pyx_t_long_double_complex); + static CYTHON_INLINE int __Pyx_c_is_zero_long__double(__pyx_t_long_double_complex); + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_conj_long__double(__pyx_t_long_double_complex); + #if 1 + static CYTHON_INLINE long double __Pyx_c_abs_long__double(__pyx_t_long_double_complex); + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_pow_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); + #endif +#endif + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); + +/* CIntFromPy.proto */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); + +/* FormatTypeName.proto */ +#if CYTHON_COMPILING_IN_LIMITED_API +typedef PyObject *__Pyx_TypeName; +#define __Pyx_FMT_TYPENAME "%U" +static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp); +#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj) +#else +typedef const char *__Pyx_TypeName; +#define __Pyx_FMT_TYPENAME "%.200s" +#define __Pyx_PyType_GetName(tp) ((tp)->tp_name) +#define __Pyx_DECREF_TypeName(obj) +#endif + +/* CIntFromPy.proto */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); + +/* FastTypeChecks.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) +#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2) +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); +#else +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2)) +#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) +#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) +#endif +#define __Pyx_PyErr_ExceptionMatches2(err1, err2) __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2) +#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) + +/* CheckBinaryVersion.proto */ +static unsigned long __Pyx_get_runtime_version(void); +static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer); + +/* InitStrings.proto */ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); + +/* #### Code section: module_declarations ### */ +static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_Descr *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_9alignment_alignment(PyArray_Descr *__pyx_v_self); /* proto*/ +static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_6fields_fields(PyArray_Descr *__pyx_v_self); /* proto*/ +static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr *__pyx_v_self); /* proto*/ +static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarray(PyArray_Descr *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr *__pyx_v_self); /* proto*/ +static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE double __pyx_f_7cpython_7complex_7complex_4real_real(PyComplexObject *__pyx_v_self); /* proto*/ +static CYTHON_INLINE double __pyx_f_7cpython_7complex_7complex_4imag_imag(PyComplexObject *__pyx_v_self); /* proto*/ + +/* Module declarations from "libc.string" */ + +/* Module declarations from "libc.stdio" */ + +/* Module declarations from "__builtin__" */ + +/* Module declarations from "cpython.type" */ + +/* Module declarations from "cpython.version" */ + +/* Module declarations from "cpython.exc" */ + +/* Module declarations from "cpython.module" */ + +/* Module declarations from "cpython.mem" */ + +/* Module declarations from "cpython.tuple" */ + +/* Module declarations from "cpython.list" */ + +/* Module declarations from "cpython.sequence" */ + +/* Module declarations from "cpython.mapping" */ + +/* Module declarations from "cpython.iterator" */ + +/* Module declarations from "cpython.number" */ + +/* Module declarations from "cpython.int" */ + +/* Module declarations from "__builtin__" */ + +/* Module declarations from "cpython.bool" */ + +/* Module declarations from "cpython.long" */ + +/* Module declarations from "cpython.float" */ + +/* Module declarations from "__builtin__" */ + +/* Module declarations from "cpython.complex" */ + +/* Module declarations from "cpython.string" */ + +/* Module declarations from "libc.stddef" */ + +/* Module declarations from "cpython.unicode" */ + +/* Module declarations from "cpython.pyport" */ + +/* Module declarations from "cpython.dict" */ + +/* Module declarations from "cpython.instance" */ + +/* Module declarations from "cpython.function" */ + +/* Module declarations from "cpython.method" */ + +/* Module declarations from "cpython.weakref" */ + +/* Module declarations from "cpython.getargs" */ + +/* Module declarations from "cpython.pythread" */ + +/* Module declarations from "cpython.pystate" */ + +/* Module declarations from "cpython.cobject" */ + +/* Module declarations from "cpython.oldbuffer" */ + +/* Module declarations from "cpython.set" */ + +/* Module declarations from "cpython.buffer" */ + +/* Module declarations from "cpython.bytes" */ + +/* Module declarations from "cpython.pycapsule" */ + +/* Module declarations from "cpython.contextvars" */ + +/* Module declarations from "cpython" */ + +/* Module declarations from "cpython.object" */ + +/* Module declarations from "cpython.ref" */ + +/* Module declarations from "numpy" */ + +/* Module declarations from "numpy" */ + +/* Module declarations from "cython" */ + +/* Module declarations from "array" */ + +/* Module declarations from "cpython.array" */ +static CYTHON_INLINE int __pyx_f_7cpython_5array_extend_buffer(arrayobject *, char *, Py_ssize_t); /*proto*/ + +/* Module declarations from "jcvi.assembly.chic" */ +static int *__pyx_v_4jcvi_8assembly_4chic_GR; +/* #### Code section: typeinfo ### */ +static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, __PYX_IS_UNSIGNED(int) ? 'U' : 'I', __PYX_IS_UNSIGNED(int), 0 }; +static __Pyx_TypeInfo __Pyx_TypeInfo_object = { "INT", NULL, sizeof(__pyx_t_4jcvi_8assembly_4chic_INT), { 0 }, 0, 'O', 0, 0 }; +/* #### Code section: before_global_var ### */ +#define __Pyx_MODULE_NAME "jcvi.assembly.chic" +extern int __pyx_module_is_main_jcvi__assembly__chic; +int __pyx_module_is_main_jcvi__assembly__chic = 0; + +/* Implementation of "jcvi.assembly.chic" */ +/* #### Code section: global_var ### */ +static PyObject *__pyx_builtin_range; +static PyObject *__pyx_builtin_ImportError; +static PyObject *__pyx_builtin_MemoryError; +/* #### Code section: string_decls ### */ +static const char __pyx_k_a[] = "a"; +static const char __pyx_k_b[] = "b"; +static const char __pyx_k_c[] = "c"; +static const char __pyx_k_s[] = "s"; +static const char __pyx_k__3[] = "*"; +static const char __pyx_k_ia[] = "ia"; +static const char __pyx_k_ib[] = "ib"; +static const char __pyx_k_ic[] = "ic"; +static const char __pyx_k_np[] = "np"; +static const char __pyx_k__11[] = "?"; +static const char __pyx_k_dist[] = "dist"; +static const char __pyx_k_main[] = "__main__"; +static const char __pyx_k_name[] = "__name__"; +static const char __pyx_k_size[] = "size"; +static const char __pyx_k_spec[] = "__spec__"; +static const char __pyx_k_test[] = "__test__"; +static const char __pyx_k_tour[] = "tour"; +static const char __pyx_k_array[] = "array"; +static const char __pyx_k_links[] = "links"; +static const char __pyx_k_numpy[] = "numpy"; +static const char __pyx_k_range[] = "range"; +static const char __pyx_k_cumsum[] = "cumsum"; +static const char __pyx_k_import[] = "__import__"; +static const char __pyx_k_tour_M[] = "tour_M"; +static const char __pyx_k_tour_P[] = "tour_P"; +static const char __pyx_k_tour_Q[] = "tour_Q"; +static const char __pyx_k_sizes_oo[] = "sizes_oo"; +static const char __pyx_k_sizes_cum[] = "sizes_cum"; +static const char __pyx_k_tour_sizes[] = "tour_sizes"; +static const char __pyx_k_ImportError[] = "ImportError"; +static const char __pyx_k_MemoryError[] = "MemoryError"; +static const char __pyx_k_initializing[] = "_initializing"; +static const char __pyx_k_is_coroutine[] = "_is_coroutine"; +static const char __pyx_k_class_getitem[] = "__class_getitem__"; +static const char __pyx_k_score_evaluate_M[] = "score_evaluate_M"; +static const char __pyx_k_score_evaluate_P[] = "score_evaluate_P"; +static const char __pyx_k_score_evaluate_Q[] = "score_evaluate_Q"; +static const char __pyx_k_asyncio_coroutines[] = "asyncio.coroutines"; +static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; +static const char __pyx_k_jcvi_assembly_chic[] = "jcvi.assembly.chic"; +static const char __pyx_k_src_jcvi_assembly_chic_pyx[] = "src/jcvi/assembly/chic.pyx"; +static const char __pyx_k_Cythonized_version_of_score_eva[] = "\nCythonized version of score_evaluate() in hic.py.\n\nSupport three versions with different objective functions:\n- score_evaluate_M: distance is defined as the distance between mid-points\n between contigs. Maximize Sum(n_links / distance).\n- score_evaluate_P: distance is defined as the sizes of interleaving contigs\n plus the harmonic mean of all link distances. Maximize Sum(n_links / distance).\n- score_evaluate_Q: distance is defined as the sizes of interleaving contigs\n plus the actual link distances. Maximize Sum(1 / distance) for all links.\n For performance consideration, we actually use a histogram to approximate\n all link distances. See golden_array() in hic for details.\n"; +static const char __pyx_k_numpy__core_multiarray_failed_to[] = "numpy._core.multiarray failed to import"; +static const char __pyx_k_numpy__core_umath_failed_to_impo[] = "numpy._core.umath failed to import"; +/* #### Code section: decls ### */ +static int __pyx_pf_7cpython_5array_5array___getbuffer__(arrayobject *__pyx_v_self, Py_buffer *__pyx_v_info, CYTHON_UNUSED int __pyx_v_flags); /* proto */ +static void __pyx_pf_7cpython_5array_5array_2__releasebuffer__(CYTHON_UNUSED arrayobject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ +static PyObject *__pyx_pf_4jcvi_8assembly_4chic_score_evaluate_M(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_M); /* proto */ +static PyObject *__pyx_pf_4jcvi_8assembly_4chic_2score_evaluate_P(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_P); /* proto */ +static PyObject *__pyx_pf_4jcvi_8assembly_4chic_4score_evaluate_Q(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_Q); /* proto */ +/* #### Code section: late_includes ### */ +/* #### Code section: module_state ### */ +typedef struct { + PyObject *__pyx_d; + PyObject *__pyx_b; + PyObject *__pyx_cython_runtime; + PyObject *__pyx_empty_tuple; + PyObject *__pyx_empty_bytes; + PyObject *__pyx_empty_unicode; + #ifdef __Pyx_CyFunction_USED + PyTypeObject *__pyx_CyFunctionType; + #endif + #ifdef __Pyx_FusedFunction_USED + PyTypeObject *__pyx_FusedFunctionType; + #endif + #ifdef __Pyx_Generator_USED + PyTypeObject *__pyx_GeneratorType; + #endif + #ifdef __Pyx_IterableCoroutine_USED + PyTypeObject *__pyx_IterableCoroutineType; + #endif + #ifdef __Pyx_Coroutine_USED + PyTypeObject *__pyx_CoroutineAwaitType; + #endif + #ifdef __Pyx_Coroutine_USED + PyTypeObject *__pyx_CoroutineType; + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + PyTypeObject *__pyx_ptype_7cpython_4type_type; + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + PyTypeObject *__pyx_ptype_7cpython_4bool_bool; + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + PyTypeObject *__pyx_ptype_7cpython_7complex_complex; + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + PyTypeObject *__pyx_ptype_5numpy_dtype; + PyTypeObject *__pyx_ptype_5numpy_flatiter; + PyTypeObject *__pyx_ptype_5numpy_broadcast; + PyTypeObject *__pyx_ptype_5numpy_ndarray; + PyTypeObject *__pyx_ptype_5numpy_generic; + PyTypeObject *__pyx_ptype_5numpy_number; + PyTypeObject *__pyx_ptype_5numpy_integer; + PyTypeObject *__pyx_ptype_5numpy_signedinteger; + PyTypeObject *__pyx_ptype_5numpy_unsignedinteger; + PyTypeObject *__pyx_ptype_5numpy_inexact; + PyTypeObject *__pyx_ptype_5numpy_floating; + PyTypeObject *__pyx_ptype_5numpy_complexfloating; + PyTypeObject *__pyx_ptype_5numpy_flexible; + PyTypeObject *__pyx_ptype_5numpy_character; + PyTypeObject *__pyx_ptype_5numpy_ufunc; + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + PyTypeObject *__pyx_ptype_7cpython_5array_array; + #if CYTHON_USE_MODULE_STATE + #endif + PyObject *__pyx_n_s_ImportError; + PyObject *__pyx_n_s_MemoryError; + PyObject *__pyx_n_s__11; + PyObject *__pyx_n_s__3; + PyObject *__pyx_n_s_a; + PyObject *__pyx_n_s_array; + PyObject *__pyx_n_s_asyncio_coroutines; + PyObject *__pyx_n_s_b; + PyObject *__pyx_n_s_c; + PyObject *__pyx_n_s_class_getitem; + PyObject *__pyx_n_s_cline_in_traceback; + PyObject *__pyx_n_s_cumsum; + PyObject *__pyx_n_s_dist; + PyObject *__pyx_n_s_ia; + PyObject *__pyx_n_s_ib; + PyObject *__pyx_n_s_ic; + PyObject *__pyx_n_s_import; + PyObject *__pyx_n_s_initializing; + PyObject *__pyx_n_s_is_coroutine; + PyObject *__pyx_n_s_jcvi_assembly_chic; + PyObject *__pyx_n_s_links; + PyObject *__pyx_n_s_main; + PyObject *__pyx_n_s_name; + PyObject *__pyx_n_s_np; + PyObject *__pyx_n_s_numpy; + PyObject *__pyx_kp_s_numpy__core_multiarray_failed_to; + PyObject *__pyx_kp_s_numpy__core_umath_failed_to_impo; + PyObject *__pyx_n_s_range; + PyObject *__pyx_n_s_s; + PyObject *__pyx_n_s_score_evaluate_M; + PyObject *__pyx_n_s_score_evaluate_P; + PyObject *__pyx_n_s_score_evaluate_Q; + PyObject *__pyx_n_s_size; + PyObject *__pyx_n_s_sizes_cum; + PyObject *__pyx_n_s_sizes_oo; + PyObject *__pyx_n_s_spec; + PyObject *__pyx_kp_s_src_jcvi_assembly_chic_pyx; + PyObject *__pyx_n_s_test; + PyObject *__pyx_n_s_tour; + PyObject *__pyx_n_s_tour_M; + PyObject *__pyx_n_s_tour_P; + PyObject *__pyx_n_s_tour_Q; + PyObject *__pyx_n_s_tour_sizes; + PyObject *__pyx_int_2; + PyObject *__pyx_int_neg_1; + PyObject *__pyx_tuple_; + PyObject *__pyx_tuple__2; + PyObject *__pyx_tuple__4; + PyObject *__pyx_tuple__6; + PyObject *__pyx_tuple__7; + PyObject *__pyx_tuple__9; + PyObject *__pyx_codeobj__5; + PyObject *__pyx_codeobj__8; + PyObject *__pyx_codeobj__10; +} __pyx_mstate; + +#if CYTHON_USE_MODULE_STATE +#ifdef __cplusplus +namespace { + extern struct PyModuleDef __pyx_moduledef; +} /* anonymous namespace */ +#else +static struct PyModuleDef __pyx_moduledef; +#endif + +#define __pyx_mstate(o) ((__pyx_mstate *)__Pyx_PyModule_GetState(o)) + +#define __pyx_mstate_global (__pyx_mstate(PyState_FindModule(&__pyx_moduledef))) + +#define __pyx_m (PyState_FindModule(&__pyx_moduledef)) +#else +static __pyx_mstate __pyx_mstate_global_static = +#ifdef __cplusplus + {}; +#else + {0}; +#endif +static __pyx_mstate *__pyx_mstate_global = &__pyx_mstate_global_static; +#endif +/* #### Code section: module_state_clear ### */ +#if CYTHON_USE_MODULE_STATE +static int __pyx_m_clear(PyObject *m) { + __pyx_mstate *clear_module_state = __pyx_mstate(m); + if (!clear_module_state) return 0; + Py_CLEAR(clear_module_state->__pyx_d); + Py_CLEAR(clear_module_state->__pyx_b); + Py_CLEAR(clear_module_state->__pyx_cython_runtime); + Py_CLEAR(clear_module_state->__pyx_empty_tuple); + Py_CLEAR(clear_module_state->__pyx_empty_bytes); + Py_CLEAR(clear_module_state->__pyx_empty_unicode); + #ifdef __Pyx_CyFunction_USED + Py_CLEAR(clear_module_state->__pyx_CyFunctionType); + #endif + #ifdef __Pyx_FusedFunction_USED + Py_CLEAR(clear_module_state->__pyx_FusedFunctionType); + #endif + Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_4type_type); + Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_4bool_bool); + Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_7complex_complex); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_dtype); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_flatiter); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_broadcast); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_ndarray); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_generic); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_number); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_integer); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_signedinteger); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_unsignedinteger); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_inexact); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_floating); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_complexfloating); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_flexible); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_character); + Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_ufunc); + Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_5array_array); + Py_CLEAR(clear_module_state->__pyx_n_s_ImportError); + Py_CLEAR(clear_module_state->__pyx_n_s_MemoryError); + Py_CLEAR(clear_module_state->__pyx_n_s__11); + Py_CLEAR(clear_module_state->__pyx_n_s__3); + Py_CLEAR(clear_module_state->__pyx_n_s_a); + Py_CLEAR(clear_module_state->__pyx_n_s_array); + Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines); + Py_CLEAR(clear_module_state->__pyx_n_s_b); + Py_CLEAR(clear_module_state->__pyx_n_s_c); + Py_CLEAR(clear_module_state->__pyx_n_s_class_getitem); + Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback); + Py_CLEAR(clear_module_state->__pyx_n_s_cumsum); + Py_CLEAR(clear_module_state->__pyx_n_s_dist); + Py_CLEAR(clear_module_state->__pyx_n_s_ia); + Py_CLEAR(clear_module_state->__pyx_n_s_ib); + Py_CLEAR(clear_module_state->__pyx_n_s_ic); + Py_CLEAR(clear_module_state->__pyx_n_s_import); + Py_CLEAR(clear_module_state->__pyx_n_s_initializing); + Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine); + Py_CLEAR(clear_module_state->__pyx_n_s_jcvi_assembly_chic); + Py_CLEAR(clear_module_state->__pyx_n_s_links); + Py_CLEAR(clear_module_state->__pyx_n_s_main); + Py_CLEAR(clear_module_state->__pyx_n_s_name); + Py_CLEAR(clear_module_state->__pyx_n_s_np); + Py_CLEAR(clear_module_state->__pyx_n_s_numpy); + Py_CLEAR(clear_module_state->__pyx_kp_s_numpy__core_multiarray_failed_to); + Py_CLEAR(clear_module_state->__pyx_kp_s_numpy__core_umath_failed_to_impo); + Py_CLEAR(clear_module_state->__pyx_n_s_range); + Py_CLEAR(clear_module_state->__pyx_n_s_s); + Py_CLEAR(clear_module_state->__pyx_n_s_score_evaluate_M); + Py_CLEAR(clear_module_state->__pyx_n_s_score_evaluate_P); + Py_CLEAR(clear_module_state->__pyx_n_s_score_evaluate_Q); + Py_CLEAR(clear_module_state->__pyx_n_s_size); + Py_CLEAR(clear_module_state->__pyx_n_s_sizes_cum); + Py_CLEAR(clear_module_state->__pyx_n_s_sizes_oo); + Py_CLEAR(clear_module_state->__pyx_n_s_spec); + Py_CLEAR(clear_module_state->__pyx_kp_s_src_jcvi_assembly_chic_pyx); + Py_CLEAR(clear_module_state->__pyx_n_s_test); + Py_CLEAR(clear_module_state->__pyx_n_s_tour); + Py_CLEAR(clear_module_state->__pyx_n_s_tour_M); + Py_CLEAR(clear_module_state->__pyx_n_s_tour_P); + Py_CLEAR(clear_module_state->__pyx_n_s_tour_Q); + Py_CLEAR(clear_module_state->__pyx_n_s_tour_sizes); + Py_CLEAR(clear_module_state->__pyx_int_2); + Py_CLEAR(clear_module_state->__pyx_int_neg_1); + Py_CLEAR(clear_module_state->__pyx_tuple_); + Py_CLEAR(clear_module_state->__pyx_tuple__2); + Py_CLEAR(clear_module_state->__pyx_tuple__4); + Py_CLEAR(clear_module_state->__pyx_tuple__6); + Py_CLEAR(clear_module_state->__pyx_tuple__7); + Py_CLEAR(clear_module_state->__pyx_tuple__9); + Py_CLEAR(clear_module_state->__pyx_codeobj__5); + Py_CLEAR(clear_module_state->__pyx_codeobj__8); + Py_CLEAR(clear_module_state->__pyx_codeobj__10); + return 0; +} +#endif +/* #### Code section: module_state_traverse ### */ +#if CYTHON_USE_MODULE_STATE +static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) { + __pyx_mstate *traverse_module_state = __pyx_mstate(m); + if (!traverse_module_state) return 0; + Py_VISIT(traverse_module_state->__pyx_d); + Py_VISIT(traverse_module_state->__pyx_b); + Py_VISIT(traverse_module_state->__pyx_cython_runtime); + Py_VISIT(traverse_module_state->__pyx_empty_tuple); + Py_VISIT(traverse_module_state->__pyx_empty_bytes); + Py_VISIT(traverse_module_state->__pyx_empty_unicode); + #ifdef __Pyx_CyFunction_USED + Py_VISIT(traverse_module_state->__pyx_CyFunctionType); + #endif + #ifdef __Pyx_FusedFunction_USED + Py_VISIT(traverse_module_state->__pyx_FusedFunctionType); + #endif + Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_4type_type); + Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_4bool_bool); + Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_7complex_complex); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_dtype); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_flatiter); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_broadcast); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_ndarray); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_generic); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_number); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_integer); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_signedinteger); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_unsignedinteger); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_inexact); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_floating); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_complexfloating); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_flexible); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_character); + Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_ufunc); + Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_5array_array); + Py_VISIT(traverse_module_state->__pyx_n_s_ImportError); + Py_VISIT(traverse_module_state->__pyx_n_s_MemoryError); + Py_VISIT(traverse_module_state->__pyx_n_s__11); + Py_VISIT(traverse_module_state->__pyx_n_s__3); + Py_VISIT(traverse_module_state->__pyx_n_s_a); + Py_VISIT(traverse_module_state->__pyx_n_s_array); + Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines); + Py_VISIT(traverse_module_state->__pyx_n_s_b); + Py_VISIT(traverse_module_state->__pyx_n_s_c); + Py_VISIT(traverse_module_state->__pyx_n_s_class_getitem); + Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback); + Py_VISIT(traverse_module_state->__pyx_n_s_cumsum); + Py_VISIT(traverse_module_state->__pyx_n_s_dist); + Py_VISIT(traverse_module_state->__pyx_n_s_ia); + Py_VISIT(traverse_module_state->__pyx_n_s_ib); + Py_VISIT(traverse_module_state->__pyx_n_s_ic); + Py_VISIT(traverse_module_state->__pyx_n_s_import); + Py_VISIT(traverse_module_state->__pyx_n_s_initializing); + Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine); + Py_VISIT(traverse_module_state->__pyx_n_s_jcvi_assembly_chic); + Py_VISIT(traverse_module_state->__pyx_n_s_links); + Py_VISIT(traverse_module_state->__pyx_n_s_main); + Py_VISIT(traverse_module_state->__pyx_n_s_name); + Py_VISIT(traverse_module_state->__pyx_n_s_np); + Py_VISIT(traverse_module_state->__pyx_n_s_numpy); + Py_VISIT(traverse_module_state->__pyx_kp_s_numpy__core_multiarray_failed_to); + Py_VISIT(traverse_module_state->__pyx_kp_s_numpy__core_umath_failed_to_impo); + Py_VISIT(traverse_module_state->__pyx_n_s_range); + Py_VISIT(traverse_module_state->__pyx_n_s_s); + Py_VISIT(traverse_module_state->__pyx_n_s_score_evaluate_M); + Py_VISIT(traverse_module_state->__pyx_n_s_score_evaluate_P); + Py_VISIT(traverse_module_state->__pyx_n_s_score_evaluate_Q); + Py_VISIT(traverse_module_state->__pyx_n_s_size); + Py_VISIT(traverse_module_state->__pyx_n_s_sizes_cum); + Py_VISIT(traverse_module_state->__pyx_n_s_sizes_oo); + Py_VISIT(traverse_module_state->__pyx_n_s_spec); + Py_VISIT(traverse_module_state->__pyx_kp_s_src_jcvi_assembly_chic_pyx); + Py_VISIT(traverse_module_state->__pyx_n_s_test); + Py_VISIT(traverse_module_state->__pyx_n_s_tour); + Py_VISIT(traverse_module_state->__pyx_n_s_tour_M); + Py_VISIT(traverse_module_state->__pyx_n_s_tour_P); + Py_VISIT(traverse_module_state->__pyx_n_s_tour_Q); + Py_VISIT(traverse_module_state->__pyx_n_s_tour_sizes); + Py_VISIT(traverse_module_state->__pyx_int_2); + Py_VISIT(traverse_module_state->__pyx_int_neg_1); + Py_VISIT(traverse_module_state->__pyx_tuple_); + Py_VISIT(traverse_module_state->__pyx_tuple__2); + Py_VISIT(traverse_module_state->__pyx_tuple__4); + Py_VISIT(traverse_module_state->__pyx_tuple__6); + Py_VISIT(traverse_module_state->__pyx_tuple__7); + Py_VISIT(traverse_module_state->__pyx_tuple__9); + Py_VISIT(traverse_module_state->__pyx_codeobj__5); + Py_VISIT(traverse_module_state->__pyx_codeobj__8); + Py_VISIT(traverse_module_state->__pyx_codeobj__10); + return 0; +} +#endif +/* #### Code section: module_state_defines ### */ +#define __pyx_d __pyx_mstate_global->__pyx_d +#define __pyx_b __pyx_mstate_global->__pyx_b +#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime +#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple +#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes +#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode +#ifdef __Pyx_CyFunction_USED +#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType +#endif +#ifdef __Pyx_FusedFunction_USED +#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType +#endif +#ifdef __Pyx_Generator_USED +#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType +#endif +#ifdef __Pyx_IterableCoroutine_USED +#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType +#endif +#ifdef __Pyx_Coroutine_USED +#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType +#endif +#ifdef __Pyx_Coroutine_USED +#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#define __pyx_ptype_7cpython_4type_type __pyx_mstate_global->__pyx_ptype_7cpython_4type_type +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#define __pyx_ptype_7cpython_4bool_bool __pyx_mstate_global->__pyx_ptype_7cpython_4bool_bool +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#define __pyx_ptype_7cpython_7complex_complex __pyx_mstate_global->__pyx_ptype_7cpython_7complex_complex +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#define __pyx_ptype_5numpy_dtype __pyx_mstate_global->__pyx_ptype_5numpy_dtype +#define __pyx_ptype_5numpy_flatiter __pyx_mstate_global->__pyx_ptype_5numpy_flatiter +#define __pyx_ptype_5numpy_broadcast __pyx_mstate_global->__pyx_ptype_5numpy_broadcast +#define __pyx_ptype_5numpy_ndarray __pyx_mstate_global->__pyx_ptype_5numpy_ndarray +#define __pyx_ptype_5numpy_generic __pyx_mstate_global->__pyx_ptype_5numpy_generic +#define __pyx_ptype_5numpy_number __pyx_mstate_global->__pyx_ptype_5numpy_number +#define __pyx_ptype_5numpy_integer __pyx_mstate_global->__pyx_ptype_5numpy_integer +#define __pyx_ptype_5numpy_signedinteger __pyx_mstate_global->__pyx_ptype_5numpy_signedinteger +#define __pyx_ptype_5numpy_unsignedinteger __pyx_mstate_global->__pyx_ptype_5numpy_unsignedinteger +#define __pyx_ptype_5numpy_inexact __pyx_mstate_global->__pyx_ptype_5numpy_inexact +#define __pyx_ptype_5numpy_floating __pyx_mstate_global->__pyx_ptype_5numpy_floating +#define __pyx_ptype_5numpy_complexfloating __pyx_mstate_global->__pyx_ptype_5numpy_complexfloating +#define __pyx_ptype_5numpy_flexible __pyx_mstate_global->__pyx_ptype_5numpy_flexible +#define __pyx_ptype_5numpy_character __pyx_mstate_global->__pyx_ptype_5numpy_character +#define __pyx_ptype_5numpy_ufunc __pyx_mstate_global->__pyx_ptype_5numpy_ufunc +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#define __pyx_ptype_7cpython_5array_array __pyx_mstate_global->__pyx_ptype_7cpython_5array_array +#if CYTHON_USE_MODULE_STATE +#endif +#define __pyx_n_s_ImportError __pyx_mstate_global->__pyx_n_s_ImportError +#define __pyx_n_s_MemoryError __pyx_mstate_global->__pyx_n_s_MemoryError +#define __pyx_n_s__11 __pyx_mstate_global->__pyx_n_s__11 +#define __pyx_n_s__3 __pyx_mstate_global->__pyx_n_s__3 +#define __pyx_n_s_a __pyx_mstate_global->__pyx_n_s_a +#define __pyx_n_s_array __pyx_mstate_global->__pyx_n_s_array +#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines +#define __pyx_n_s_b __pyx_mstate_global->__pyx_n_s_b +#define __pyx_n_s_c __pyx_mstate_global->__pyx_n_s_c +#define __pyx_n_s_class_getitem __pyx_mstate_global->__pyx_n_s_class_getitem +#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback +#define __pyx_n_s_cumsum __pyx_mstate_global->__pyx_n_s_cumsum +#define __pyx_n_s_dist __pyx_mstate_global->__pyx_n_s_dist +#define __pyx_n_s_ia __pyx_mstate_global->__pyx_n_s_ia +#define __pyx_n_s_ib __pyx_mstate_global->__pyx_n_s_ib +#define __pyx_n_s_ic __pyx_mstate_global->__pyx_n_s_ic +#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import +#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing +#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine +#define __pyx_n_s_jcvi_assembly_chic __pyx_mstate_global->__pyx_n_s_jcvi_assembly_chic +#define __pyx_n_s_links __pyx_mstate_global->__pyx_n_s_links +#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main +#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name +#define __pyx_n_s_np __pyx_mstate_global->__pyx_n_s_np +#define __pyx_n_s_numpy __pyx_mstate_global->__pyx_n_s_numpy +#define __pyx_kp_s_numpy__core_multiarray_failed_to __pyx_mstate_global->__pyx_kp_s_numpy__core_multiarray_failed_to +#define __pyx_kp_s_numpy__core_umath_failed_to_impo __pyx_mstate_global->__pyx_kp_s_numpy__core_umath_failed_to_impo +#define __pyx_n_s_range __pyx_mstate_global->__pyx_n_s_range +#define __pyx_n_s_s __pyx_mstate_global->__pyx_n_s_s +#define __pyx_n_s_score_evaluate_M __pyx_mstate_global->__pyx_n_s_score_evaluate_M +#define __pyx_n_s_score_evaluate_P __pyx_mstate_global->__pyx_n_s_score_evaluate_P +#define __pyx_n_s_score_evaluate_Q __pyx_mstate_global->__pyx_n_s_score_evaluate_Q +#define __pyx_n_s_size __pyx_mstate_global->__pyx_n_s_size +#define __pyx_n_s_sizes_cum __pyx_mstate_global->__pyx_n_s_sizes_cum +#define __pyx_n_s_sizes_oo __pyx_mstate_global->__pyx_n_s_sizes_oo +#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec +#define __pyx_kp_s_src_jcvi_assembly_chic_pyx __pyx_mstate_global->__pyx_kp_s_src_jcvi_assembly_chic_pyx +#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test +#define __pyx_n_s_tour __pyx_mstate_global->__pyx_n_s_tour +#define __pyx_n_s_tour_M __pyx_mstate_global->__pyx_n_s_tour_M +#define __pyx_n_s_tour_P __pyx_mstate_global->__pyx_n_s_tour_P +#define __pyx_n_s_tour_Q __pyx_mstate_global->__pyx_n_s_tour_Q +#define __pyx_n_s_tour_sizes __pyx_mstate_global->__pyx_n_s_tour_sizes +#define __pyx_int_2 __pyx_mstate_global->__pyx_int_2 +#define __pyx_int_neg_1 __pyx_mstate_global->__pyx_int_neg_1 +#define __pyx_tuple_ __pyx_mstate_global->__pyx_tuple_ +#define __pyx_tuple__2 __pyx_mstate_global->__pyx_tuple__2 +#define __pyx_tuple__4 __pyx_mstate_global->__pyx_tuple__4 +#define __pyx_tuple__6 __pyx_mstate_global->__pyx_tuple__6 +#define __pyx_tuple__7 __pyx_mstate_global->__pyx_tuple__7 +#define __pyx_tuple__9 __pyx_mstate_global->__pyx_tuple__9 +#define __pyx_codeobj__5 __pyx_mstate_global->__pyx_codeobj__5 +#define __pyx_codeobj__8 __pyx_mstate_global->__pyx_codeobj__8 +#define __pyx_codeobj__10 __pyx_mstate_global->__pyx_codeobj__10 +/* #### Code section: module_code ### */ + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":287 + * + * @property + * cdef inline npy_intp itemsize(self) noexcept nogil: # <<<<<<<<<<<<<< + * return PyDataType_ELSIZE(self) + * + */ + +static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_Descr *__pyx_v_self) { + npy_intp __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":288 + * @property + * cdef inline npy_intp itemsize(self) noexcept nogil: + * return PyDataType_ELSIZE(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyDataType_ELSIZE(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":287 + * + * @property + * cdef inline npy_intp itemsize(self) noexcept nogil: # <<<<<<<<<<<<<< + * return PyDataType_ELSIZE(self) + * + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":291 + * + * @property + * cdef inline npy_intp alignment(self) noexcept nogil: # <<<<<<<<<<<<<< + * return PyDataType_ALIGNMENT(self) + * + */ + +static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_9alignment_alignment(PyArray_Descr *__pyx_v_self) { + npy_intp __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":292 + * @property + * cdef inline npy_intp alignment(self) noexcept nogil: + * return PyDataType_ALIGNMENT(self) # <<<<<<<<<<<<<< + * + * # Use fields/names with care as they may be NULL. You must check + */ + __pyx_r = PyDataType_ALIGNMENT(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":291 + * + * @property + * cdef inline npy_intp alignment(self) noexcept nogil: # <<<<<<<<<<<<<< + * return PyDataType_ALIGNMENT(self) + * + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":297 + * # for this using PyDataType_HASFIELDS. + * @property + * cdef inline object fields(self): # <<<<<<<<<<<<<< + * return PyDataType_FIELDS(self) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_6fields_fields(PyArray_Descr *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1; + __Pyx_RefNannySetupContext("fields", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":298 + * @property + * cdef inline object fields(self): + * return PyDataType_FIELDS(self) # <<<<<<<<<<<<<< + * + * @property + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyDataType_FIELDS(__pyx_v_self); + __Pyx_INCREF(((PyObject *)__pyx_t_1)); + __pyx_r = ((PyObject *)__pyx_t_1); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":297 + * # for this using PyDataType_HASFIELDS. + * @property + * cdef inline object fields(self): # <<<<<<<<<<<<<< + * return PyDataType_FIELDS(self) + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":301 + * + * @property + * cdef inline tuple names(self): # <<<<<<<<<<<<<< + * return PyDataType_NAMES(self) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1; + __Pyx_RefNannySetupContext("names", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":302 + * @property + * cdef inline tuple names(self): + * return PyDataType_NAMES(self) # <<<<<<<<<<<<<< + * + * # Use PyDataType_HASSUBARRAY to test whether this field is + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyDataType_NAMES(__pyx_v_self); + __Pyx_INCREF(((PyObject*)__pyx_t_1)); + __pyx_r = ((PyObject*)__pyx_t_1); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":301 + * + * @property + * cdef inline tuple names(self): # <<<<<<<<<<<<<< + * return PyDataType_NAMES(self) + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":308 + * # this field via the inline helper method PyDataType_SHAPE. + * @property + * cdef inline PyArray_ArrayDescr* subarray(self) noexcept nogil: # <<<<<<<<<<<<<< + * return PyDataType_SUBARRAY(self) + * + */ + +static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarray(PyArray_Descr *__pyx_v_self) { + PyArray_ArrayDescr *__pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":309 + * @property + * cdef inline PyArray_ArrayDescr* subarray(self) noexcept nogil: + * return PyDataType_SUBARRAY(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyDataType_SUBARRAY(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":308 + * # this field via the inline helper method PyDataType_SHAPE. + * @property + * cdef inline PyArray_ArrayDescr* subarray(self) noexcept nogil: # <<<<<<<<<<<<<< + * return PyDataType_SUBARRAY(self) + * + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":312 + * + * @property + * cdef inline npy_uint64 flags(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The data types flags.""" + * return PyDataType_FLAGS(self) + */ + +static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr *__pyx_v_self) { + npy_uint64 __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":314 + * cdef inline npy_uint64 flags(self) noexcept nogil: + * """The data types flags.""" + * return PyDataType_FLAGS(self) # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = PyDataType_FLAGS(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":312 + * + * @property + * cdef inline npy_uint64 flags(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The data types flags.""" + * return PyDataType_FLAGS(self) + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":324 + * + * @property + * cdef inline int numiter(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The number of arrays that need to be broadcast to the same shape.""" + * return PyArray_MultiIter_NUMITER(self) + */ + +static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMultiIterObject *__pyx_v_self) { + int __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":326 + * cdef inline int numiter(self) noexcept nogil: + * """The number of arrays that need to be broadcast to the same shape.""" + * return PyArray_MultiIter_NUMITER(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_MultiIter_NUMITER(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":324 + * + * @property + * cdef inline int numiter(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The number of arrays that need to be broadcast to the same shape.""" + * return PyArray_MultiIter_NUMITER(self) + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":329 + * + * @property + * cdef inline npy_intp size(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The total broadcasted size.""" + * return PyArray_MultiIter_SIZE(self) + */ + +static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiIterObject *__pyx_v_self) { + npy_intp __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":331 + * cdef inline npy_intp size(self) noexcept nogil: + * """The total broadcasted size.""" + * return PyArray_MultiIter_SIZE(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_MultiIter_SIZE(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":329 + * + * @property + * cdef inline npy_intp size(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The total broadcasted size.""" + * return PyArray_MultiIter_SIZE(self) + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":334 + * + * @property + * cdef inline npy_intp index(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The current (1-d) index into the broadcasted result.""" + * return PyArray_MultiIter_INDEX(self) + */ + +static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMultiIterObject *__pyx_v_self) { + npy_intp __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":336 + * cdef inline npy_intp index(self) noexcept nogil: + * """The current (1-d) index into the broadcasted result.""" + * return PyArray_MultiIter_INDEX(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_MultiIter_INDEX(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":334 + * + * @property + * cdef inline npy_intp index(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The current (1-d) index into the broadcasted result.""" + * return PyArray_MultiIter_INDEX(self) + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":339 + * + * @property + * cdef inline int nd(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The number of dimensions in the broadcasted result.""" + * return PyArray_MultiIter_NDIM(self) + */ + +static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject *__pyx_v_self) { + int __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":341 + * cdef inline int nd(self) noexcept nogil: + * """The number of dimensions in the broadcasted result.""" + * return PyArray_MultiIter_NDIM(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_MultiIter_NDIM(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":339 + * + * @property + * cdef inline int nd(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The number of dimensions in the broadcasted result.""" + * return PyArray_MultiIter_NDIM(self) + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":344 + * + * @property + * cdef inline npy_intp* dimensions(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The shape of the broadcasted result.""" + * return PyArray_MultiIter_DIMS(self) + */ + +static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions(PyArrayMultiIterObject *__pyx_v_self) { + npy_intp *__pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":346 + * cdef inline npy_intp* dimensions(self) noexcept nogil: + * """The shape of the broadcasted result.""" + * return PyArray_MultiIter_DIMS(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_MultiIter_DIMS(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":344 + * + * @property + * cdef inline npy_intp* dimensions(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The shape of the broadcasted result.""" + * return PyArray_MultiIter_DIMS(self) + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":349 + * + * @property + * cdef inline void** iters(self) noexcept nogil: # <<<<<<<<<<<<<< + * """An array of iterator objects that holds the iterators for the arrays to be broadcast together. + * On return, the iterators are adjusted for broadcasting.""" + */ + +static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiIterObject *__pyx_v_self) { + void **__pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":352 + * """An array of iterator objects that holds the iterators for the arrays to be broadcast together. + * On return, the iterators are adjusted for broadcasting.""" + * return PyArray_MultiIter_ITERS(self) # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = PyArray_MultiIter_ITERS(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":349 + * + * @property + * cdef inline void** iters(self) noexcept nogil: # <<<<<<<<<<<<<< + * """An array of iterator objects that holds the iterators for the arrays to be broadcast together. + * On return, the iterators are adjusted for broadcasting.""" + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":367 + * + * @property + * cdef inline PyObject* base(self) noexcept nogil: # <<<<<<<<<<<<<< + * """Returns a borrowed reference to the object owning the data/memory. + * """ + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject *__pyx_v_self) { + PyObject *__pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":370 + * """Returns a borrowed reference to the object owning the data/memory. + * """ + * return PyArray_BASE(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_BASE(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":367 + * + * @property + * cdef inline PyObject* base(self) noexcept nogil: # <<<<<<<<<<<<<< + * """Returns a borrowed reference to the object owning the data/memory. + * """ + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":373 + * + * @property + * cdef inline dtype descr(self): # <<<<<<<<<<<<<< + * """Returns an owned reference to the dtype of the array. + * """ + */ + +static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArrayObject *__pyx_v_self) { + PyArray_Descr *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyArray_Descr *__pyx_t_1; + __Pyx_RefNannySetupContext("descr", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":376 + * """Returns an owned reference to the dtype of the array. + * """ + * return PyArray_DESCR(self) # <<<<<<<<<<<<<< + * + * @property + */ + __Pyx_XDECREF((PyObject *)__pyx_r); + __pyx_t_1 = PyArray_DESCR(__pyx_v_self); + __Pyx_INCREF((PyObject *)((PyArray_Descr *)__pyx_t_1)); + __pyx_r = ((PyArray_Descr *)__pyx_t_1); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":373 + * + * @property + * cdef inline dtype descr(self): # <<<<<<<<<<<<<< + * """Returns an owned reference to the dtype of the array. + * """ + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF((PyObject *)__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":379 + * + * @property + * cdef inline int ndim(self) noexcept nogil: # <<<<<<<<<<<<<< + * """Returns the number of dimensions in the array. + * """ + */ + +static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx_v_self) { + int __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":382 + * """Returns the number of dimensions in the array. + * """ + * return PyArray_NDIM(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_NDIM(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":379 + * + * @property + * cdef inline int ndim(self) noexcept nogil: # <<<<<<<<<<<<<< + * """Returns the number of dimensions in the array. + * """ + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":385 + * + * @property + * cdef inline npy_intp *shape(self) noexcept nogil: # <<<<<<<<<<<<<< + * """Returns a pointer to the dimensions/shape of the array. + * The number of elements matches the number of dimensions of the array (ndim). + */ + +static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObject *__pyx_v_self) { + npy_intp *__pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":390 + * Can return NULL for 0-dimensional arrays. + * """ + * return PyArray_DIMS(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_DIMS(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":385 + * + * @property + * cdef inline npy_intp *shape(self) noexcept nogil: # <<<<<<<<<<<<<< + * """Returns a pointer to the dimensions/shape of the array. + * The number of elements matches the number of dimensions of the array (ndim). + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":393 + * + * @property + * cdef inline npy_intp *strides(self) noexcept nogil: # <<<<<<<<<<<<<< + * """Returns a pointer to the strides of the array. + * The number of elements matches the number of dimensions of the array (ndim). + */ + +static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayObject *__pyx_v_self) { + npy_intp *__pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":397 + * The number of elements matches the number of dimensions of the array (ndim). + * """ + * return PyArray_STRIDES(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_STRIDES(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":393 + * + * @property + * cdef inline npy_intp *strides(self) noexcept nogil: # <<<<<<<<<<<<<< + * """Returns a pointer to the strides of the array. + * The number of elements matches the number of dimensions of the array (ndim). + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":400 + * + * @property + * cdef inline npy_intp size(self) noexcept nogil: # <<<<<<<<<<<<<< + * """Returns the total size (in number of elements) of the array. + * """ + */ + +static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject *__pyx_v_self) { + npy_intp __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":403 + * """Returns the total size (in number of elements) of the array. + * """ + * return PyArray_SIZE(self) # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = PyArray_SIZE(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":400 + * + * @property + * cdef inline npy_intp size(self) noexcept nogil: # <<<<<<<<<<<<<< + * """Returns the total size (in number of elements) of the array. + * """ + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":406 + * + * @property + * cdef inline char* data(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The pointer to the data buffer as a char*. + * This is provided for legacy reasons to avoid direct struct field access. + */ + +static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__pyx_v_self) { + char *__pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":412 + * of `PyArray_DATA()` instead, which returns a 'void*'. + * """ + * return PyArray_BYTES(self) # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = PyArray_BYTES(__pyx_v_self); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":406 + * + * @property + * cdef inline char* data(self) noexcept nogil: # <<<<<<<<<<<<<< + * """The pointer to the data buffer as a char*. + * This is provided for legacy reasons to avoid direct struct field access. + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":824 + * ctypedef long double complex clongdouble_t + * + * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(1, a) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":825 + * + * cdef inline object PyArray_MultiIterNew1(a): + * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew2(a, b): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 825, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":824 + * ctypedef long double complex clongdouble_t + * + * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(1, a) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":827 + * return PyArray_MultiIterNew(1, a) + * + * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(2, a, b) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":828 + * + * cdef inline object PyArray_MultiIterNew2(a, b): + * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 828, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":827 + * return PyArray_MultiIterNew(1, a) + * + * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(2, a, b) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":830 + * return PyArray_MultiIterNew(2, a, b) + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(3, a, b, c) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":831 + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): + * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 831, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":830 + * return PyArray_MultiIterNew(2, a, b) + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(3, a, b, c) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":833 + * return PyArray_MultiIterNew(3, a, b, c) + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(4, a, b, c, d) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":834 + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): + * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 834, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":833 + * return PyArray_MultiIterNew(3, a, b, c) + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(4, a, b, c, d) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":836 + * return PyArray_MultiIterNew(4, a, b, c, d) + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":837 + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): + * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 837, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":836 + * return PyArray_MultiIterNew(4, a, b, c, d) + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":839 + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2; + __Pyx_RefNannySetupContext("PyDataType_SHAPE", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":840 + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< + * return d.subarray.shape + * else: + */ + __pyx_t_1 = PyDataType_HASSUBARRAY(__pyx_v_d); + if (__pyx_t_1) { + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":841 + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape # <<<<<<<<<<<<<< + * else: + * return () + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_2 = __pyx_f_5numpy_5dtype_8subarray_subarray(__pyx_v_d)->shape; + __Pyx_INCREF(((PyObject*)__pyx_t_2)); + __pyx_r = ((PyObject*)__pyx_t_2); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":840 + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< + * return d.subarray.shape + * else: + */ + } + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":843 + * return d.subarray.shape + * else: + * return () # <<<<<<<<<<<<<< + * + * + */ + /*else*/ { + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_empty_tuple); + __pyx_r = __pyx_empty_tuple; + goto __pyx_L0; + } + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":839 + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1027 + * int _import_umath() except -1 + * + * cdef inline void set_array_base(ndarray arr, object base) except *: # <<<<<<<<<<<<<< + * Py_INCREF(base) # important to do this before stealing the reference below! + * PyArray_SetBaseObject(arr, base) + */ + +static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) { + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1028 + * + * cdef inline void set_array_base(ndarray arr, object base) except *: + * Py_INCREF(base) # important to do this before stealing the reference below! # <<<<<<<<<<<<<< + * PyArray_SetBaseObject(arr, base) + * + */ + Py_INCREF(__pyx_v_base); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1029 + * cdef inline void set_array_base(ndarray arr, object base) except *: + * Py_INCREF(base) # important to do this before stealing the reference below! + * PyArray_SetBaseObject(arr, base) # <<<<<<<<<<<<<< + * + * cdef inline object get_array_base(ndarray arr): + */ + __pyx_t_1 = PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(1, 1029, __pyx_L1_error) + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1027 + * int _import_umath() except -1 + * + * cdef inline void set_array_base(ndarray arr, object base) except *: # <<<<<<<<<<<<<< + * Py_INCREF(base) # important to do this before stealing the reference below! + * PyArray_SetBaseObject(arr, base) + */ + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("numpy.set_array_base", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_L0:; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1031 + * PyArray_SetBaseObject(arr, base) + * + * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< + * base = PyArray_BASE(arr) + * if base is NULL: + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) { + PyObject *__pyx_v_base; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + __Pyx_RefNannySetupContext("get_array_base", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1032 + * + * cdef inline object get_array_base(ndarray arr): + * base = PyArray_BASE(arr) # <<<<<<<<<<<<<< + * if base is NULL: + * return None + */ + __pyx_v_base = PyArray_BASE(__pyx_v_arr); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1033 + * cdef inline object get_array_base(ndarray arr): + * base = PyArray_BASE(arr) + * if base is NULL: # <<<<<<<<<<<<<< + * return None + * return base + */ + __pyx_t_1 = (__pyx_v_base == NULL); + if (__pyx_t_1) { + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1034 + * base = PyArray_BASE(arr) + * if base is NULL: + * return None # <<<<<<<<<<<<<< + * return base + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1033 + * cdef inline object get_array_base(ndarray arr): + * base = PyArray_BASE(arr) + * if base is NULL: # <<<<<<<<<<<<<< + * return None + * return base + */ + } + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1035 + * if base is NULL: + * return None + * return base # <<<<<<<<<<<<<< + * + * # Versions of the import_* functions which are more suitable for + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(((PyObject *)__pyx_v_base)); + __pyx_r = ((PyObject *)__pyx_v_base); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1031 + * PyArray_SetBaseObject(arr, base) + * + * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< + * base = PyArray_BASE(arr) + * if base is NULL: + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1039 + * # Versions of the import_* functions which are more suitable for + * # Cython code. + * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< + * try: + * __pyx_import_array() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("import_array", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1040 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * __pyx_import_array() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1041 + * cdef inline int import_array() except -1: + * try: + * __pyx_import_array() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy._core.multiarray failed to import") + */ + __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1041, __pyx_L3_error) + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1040 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * __pyx_import_array() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1042 + * try: + * __pyx_import_array() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy._core.multiarray failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1042, __pyx_L5_except_error) + __Pyx_XGOTREF(__pyx_t_5); + __Pyx_XGOTREF(__pyx_t_6); + __Pyx_XGOTREF(__pyx_t_7); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1043 + * __pyx_import_array() + * except Exception: + * raise ImportError("numpy._core.multiarray failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_umath() except -1: + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1043, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 1043, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1040 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * __pyx_import_array() + * except Exception: + */ + __pyx_L5_except_error:; + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1039 + * # Versions of the import_* functions which are more suitable for + * # Cython code. + * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< + * try: + * __pyx_import_array() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1045 + * raise ImportError("numpy._core.multiarray failed to import") + * + * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("import_umath", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1046 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1047 + * cdef inline int import_umath() except -1: + * try: + * _import_umath() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy._core.umath failed to import") + */ + __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1047, __pyx_L3_error) + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1046 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1048 + * try: + * _import_umath() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy._core.umath failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1048, __pyx_L5_except_error) + __Pyx_XGOTREF(__pyx_t_5); + __Pyx_XGOTREF(__pyx_t_6); + __Pyx_XGOTREF(__pyx_t_7); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1049 + * _import_umath() + * except Exception: + * raise ImportError("numpy._core.umath failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_ufunc() except -1: + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1049, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 1049, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1046 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + __pyx_L5_except_error:; + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1045 + * raise ImportError("numpy._core.multiarray failed to import") + * + * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1051 + * raise ImportError("numpy._core.umath failed to import") + * + * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("import_ufunc", 1); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1052 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1053 + * cdef inline int import_ufunc() except -1: + * try: + * _import_umath() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy._core.umath failed to import") + */ + __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1053, __pyx_L3_error) + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1052 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1054 + * try: + * _import_umath() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy._core.umath failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1054, __pyx_L5_except_error) + __Pyx_XGOTREF(__pyx_t_5); + __Pyx_XGOTREF(__pyx_t_6); + __Pyx_XGOTREF(__pyx_t_7); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1055 + * _import_umath() + * except Exception: + * raise ImportError("numpy._core.umath failed to import") # <<<<<<<<<<<<<< + * + * + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1055, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 1055, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1052 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + __pyx_L5_except_error:; + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1051 + * raise ImportError("numpy._core.umath failed to import") + * + * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1058 + * + * + * cdef inline bint is_timedelta64_object(object obj) noexcept: # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.timedelta64)` + */ + +static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_obj) { + int __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1070 + * bool + * """ + * return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyTimedeltaArrType_Type)); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1058 + * + * + * cdef inline bint is_timedelta64_object(object obj) noexcept: # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.timedelta64)` + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1073 + * + * + * cdef inline bint is_datetime64_object(object obj) noexcept: # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.datetime64)` + */ + +static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_obj) { + int __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1085 + * bool + * """ + * return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyDatetimeArrType_Type)); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1073 + * + * + * cdef inline bint is_datetime64_object(object obj) noexcept: # <<<<<<<<<<<<<< + * """ + * Cython equivalent of `isinstance(obj, np.datetime64)` + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1088 + * + * + * cdef inline npy_datetime get_datetime64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy datetime64 object + */ + +static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject *__pyx_v_obj) { + npy_datetime __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1095 + * also needed. That can be found using `get_datetime64_unit`. + * """ + * return (obj).obval # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = ((PyDatetimeScalarObject *)__pyx_v_obj)->obval; + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1088 + * + * + * cdef inline npy_datetime get_datetime64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy datetime64 object + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1098 + * + * + * cdef inline npy_timedelta get_timedelta64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy timedelta64 object + */ + +static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject *__pyx_v_obj) { + npy_timedelta __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1102 + * returns the int64 value underlying scalar numpy timedelta64 object + * """ + * return (obj).obval # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = ((PyTimedeltaScalarObject *)__pyx_v_obj)->obval; + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1098 + * + * + * cdef inline npy_timedelta get_timedelta64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< + * """ + * returns the int64 value underlying scalar numpy timedelta64 object + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1105 + * + * + * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) noexcept nogil: # <<<<<<<<<<<<<< + * """ + * returns the unit part of the dtype for a numpy datetime64 object. + */ + +static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObject *__pyx_v_obj) { + NPY_DATETIMEUNIT __pyx_r; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1109 + * returns the unit part of the dtype for a numpy datetime64 object. + * """ + * return (obj).obmeta.base # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = ((NPY_DATETIMEUNIT)((PyDatetimeScalarObject *)__pyx_v_obj)->obmeta.base); + goto __pyx_L0; + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1105 + * + * + * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) noexcept nogil: # <<<<<<<<<<<<<< + * """ + * returns the unit part of the dtype for a numpy datetime64 object. + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "cpython/complex.pxd":19 + * + * @property + * cdef inline double real(self) noexcept: # <<<<<<<<<<<<<< + * return self.cval.real + * + */ + +static CYTHON_INLINE double __pyx_f_7cpython_7complex_7complex_4real_real(PyComplexObject *__pyx_v_self) { + double __pyx_r; + + /* "cpython/complex.pxd":20 + * @property + * cdef inline double real(self) noexcept: + * return self.cval.real # <<<<<<<<<<<<<< + * + * @property + */ + __pyx_r = __pyx_v_self->cval.real; + goto __pyx_L0; + + /* "cpython/complex.pxd":19 + * + * @property + * cdef inline double real(self) noexcept: # <<<<<<<<<<<<<< + * return self.cval.real + * + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "cpython/complex.pxd":23 + * + * @property + * cdef inline double imag(self) noexcept: # <<<<<<<<<<<<<< + * return self.cval.imag + * + */ + +static CYTHON_INLINE double __pyx_f_7cpython_7complex_7complex_4imag_imag(PyComplexObject *__pyx_v_self) { + double __pyx_r; + + /* "cpython/complex.pxd":24 + * @property + * cdef inline double imag(self) noexcept: + * return self.cval.imag # <<<<<<<<<<<<<< + * + * # PyTypeObject PyComplex_Type + */ + __pyx_r = __pyx_v_self->cval.imag; + goto __pyx_L0; + + /* "cpython/complex.pxd":23 + * + * @property + * cdef inline double imag(self) noexcept: # <<<<<<<<<<<<<< + * return self.cval.imag + * + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "cpython/contextvars.pxd":112 + * + * + * cdef inline object get_value(var, default_value=None): # <<<<<<<<<<<<<< + * """Return a new reference to the value of the context variable, + * or the default value of the context variable, + */ + +static CYTHON_INLINE PyObject *__pyx_f_7cpython_11contextvars_get_value(PyObject *__pyx_v_var, struct __pyx_opt_args_7cpython_11contextvars_get_value *__pyx_optional_args) { + PyObject *__pyx_v_default_value = ((PyObject *)Py_None); + PyObject *__pyx_v_value; + PyObject *__pyx_v_pyvalue = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + int __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("get_value", 1); + if (__pyx_optional_args) { + if (__pyx_optional_args->__pyx_n > 0) { + __pyx_v_default_value = __pyx_optional_args->default_value; + } + } + + /* "cpython/contextvars.pxd":117 + * or None if no such value or default was found. + * """ + * cdef PyObject *value = NULL # <<<<<<<<<<<<<< + * PyContextVar_Get(var, NULL, &value) + * if value is NULL: + */ + __pyx_v_value = NULL; + + /* "cpython/contextvars.pxd":118 + * """ + * cdef PyObject *value = NULL + * PyContextVar_Get(var, NULL, &value) # <<<<<<<<<<<<<< + * if value is NULL: + * # context variable does not have a default + */ + __pyx_t_1 = PyContextVar_Get(__pyx_v_var, NULL, (&__pyx_v_value)); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(2, 118, __pyx_L1_error) + + /* "cpython/contextvars.pxd":119 + * cdef PyObject *value = NULL + * PyContextVar_Get(var, NULL, &value) + * if value is NULL: # <<<<<<<<<<<<<< + * # context variable does not have a default + * pyvalue = default_value + */ + __pyx_t_2 = (__pyx_v_value == NULL); + if (__pyx_t_2) { + + /* "cpython/contextvars.pxd":121 + * if value is NULL: + * # context variable does not have a default + * pyvalue = default_value # <<<<<<<<<<<<<< + * else: + * # value or default value of context variable + */ + __Pyx_INCREF(__pyx_v_default_value); + __pyx_v_pyvalue = __pyx_v_default_value; + + /* "cpython/contextvars.pxd":119 + * cdef PyObject *value = NULL + * PyContextVar_Get(var, NULL, &value) + * if value is NULL: # <<<<<<<<<<<<<< + * # context variable does not have a default + * pyvalue = default_value + */ + goto __pyx_L3; + } + + /* "cpython/contextvars.pxd":124 + * else: + * # value or default value of context variable + * pyvalue = value # <<<<<<<<<<<<<< + * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' + * return pyvalue + */ + /*else*/ { + __pyx_t_3 = ((PyObject *)__pyx_v_value); + __Pyx_INCREF(__pyx_t_3); + __pyx_v_pyvalue = __pyx_t_3; + __pyx_t_3 = 0; + + /* "cpython/contextvars.pxd":125 + * # value or default value of context variable + * pyvalue = value + * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' # <<<<<<<<<<<<<< + * return pyvalue + * + */ + Py_XDECREF(__pyx_v_value); + } + __pyx_L3:; + + /* "cpython/contextvars.pxd":126 + * pyvalue = value + * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' + * return pyvalue # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_pyvalue); + __pyx_r = __pyx_v_pyvalue; + goto __pyx_L0; + + /* "cpython/contextvars.pxd":112 + * + * + * cdef inline object get_value(var, default_value=None): # <<<<<<<<<<<<<< + * """Return a new reference to the value of the context variable, + * or the default value of the context variable, + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_3); + __Pyx_AddTraceback("cpython.contextvars.get_value", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_pyvalue); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "cpython/contextvars.pxd":129 + * + * + * cdef inline object get_value_no_default(var, default_value=None): # <<<<<<<<<<<<<< + * """Return a new reference to the value of the context variable, + * or the provided default value if no such value was found. + */ + +static CYTHON_INLINE PyObject *__pyx_f_7cpython_11contextvars_get_value_no_default(PyObject *__pyx_v_var, struct __pyx_opt_args_7cpython_11contextvars_get_value_no_default *__pyx_optional_args) { + PyObject *__pyx_v_default_value = ((PyObject *)Py_None); + PyObject *__pyx_v_value; + PyObject *__pyx_v_pyvalue = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("get_value_no_default", 1); + if (__pyx_optional_args) { + if (__pyx_optional_args->__pyx_n > 0) { + __pyx_v_default_value = __pyx_optional_args->default_value; + } + } + + /* "cpython/contextvars.pxd":135 + * Ignores the default value of the context variable, if any. + * """ + * cdef PyObject *value = NULL # <<<<<<<<<<<<<< + * PyContextVar_Get(var, default_value, &value) + * # value of context variable or 'default_value' + */ + __pyx_v_value = NULL; + + /* "cpython/contextvars.pxd":136 + * """ + * cdef PyObject *value = NULL + * PyContextVar_Get(var, default_value, &value) # <<<<<<<<<<<<<< + * # value of context variable or 'default_value' + * pyvalue = value + */ + __pyx_t_1 = PyContextVar_Get(__pyx_v_var, ((PyObject *)__pyx_v_default_value), (&__pyx_v_value)); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(2, 136, __pyx_L1_error) + + /* "cpython/contextvars.pxd":138 + * PyContextVar_Get(var, default_value, &value) + * # value of context variable or 'default_value' + * pyvalue = value # <<<<<<<<<<<<<< + * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' + * return pyvalue + */ + __pyx_t_2 = ((PyObject *)__pyx_v_value); + __Pyx_INCREF(__pyx_t_2); + __pyx_v_pyvalue = __pyx_t_2; + __pyx_t_2 = 0; + + /* "cpython/contextvars.pxd":139 + * # value of context variable or 'default_value' + * pyvalue = value + * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' # <<<<<<<<<<<<<< + * return pyvalue + */ + Py_XDECREF(__pyx_v_value); + + /* "cpython/contextvars.pxd":140 + * pyvalue = value + * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' + * return pyvalue # <<<<<<<<<<<<<< + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_pyvalue); + __pyx_r = __pyx_v_pyvalue; + goto __pyx_L0; + + /* "cpython/contextvars.pxd":129 + * + * + * cdef inline object get_value_no_default(var, default_value=None): # <<<<<<<<<<<<<< + * """Return a new reference to the value of the context variable, + * or the provided default value if no such value was found. + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("cpython.contextvars.get_value_no_default", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_pyvalue); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "array.pxd":104 + * __data_union data + * + * def __getbuffer__(self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< + * # This implementation of getbuffer is geared towards Cython + * # requirements, and does not yet fulfill the PEP. + */ + +/* Python wrapper */ +CYTHON_UNUSED static int __pyx_pw_7cpython_5array_5array_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/ +CYTHON_UNUSED static int __pyx_pw_7cpython_5array_5array_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_7cpython_5array_5array___getbuffer__(((arrayobject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_7cpython_5array_5array___getbuffer__(arrayobject *__pyx_v_self, Py_buffer *__pyx_v_info, CYTHON_UNUSED int __pyx_v_flags) { + PyObject *__pyx_v_item_count = NULL; + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + char *__pyx_t_2; + int __pyx_t_3; + PyObject *__pyx_t_4 = NULL; + Py_ssize_t __pyx_t_5; + int __pyx_t_6; + char __pyx_t_7; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + if (unlikely(__pyx_v_info == NULL)) { + PyErr_SetString(PyExc_BufferError, "PyObject_GetBuffer: view==NULL argument is obsolete"); + return -1; + } + __Pyx_RefNannySetupContext("__getbuffer__", 0); + __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None); + __Pyx_GIVEREF(__pyx_v_info->obj); + + /* "array.pxd":109 + * # In particular strided access is always provided regardless + * # of flags + * item_count = Py_SIZE(self) # <<<<<<<<<<<<<< + * + * info.suboffsets = NULL + */ + __pyx_t_1 = PyInt_FromSsize_t(Py_SIZE(((PyObject *)__pyx_v_self))); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 109, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_item_count = __pyx_t_1; + __pyx_t_1 = 0; + + /* "array.pxd":111 + * item_count = Py_SIZE(self) + * + * info.suboffsets = NULL # <<<<<<<<<<<<<< + * info.buf = self.data.as_chars + * info.readonly = 0 + */ + __pyx_v_info->suboffsets = NULL; + + /* "array.pxd":112 + * + * info.suboffsets = NULL + * info.buf = self.data.as_chars # <<<<<<<<<<<<<< + * info.readonly = 0 + * info.ndim = 1 + */ + __pyx_t_2 = __pyx_v_self->data.as_chars; + __pyx_v_info->buf = __pyx_t_2; + + /* "array.pxd":113 + * info.suboffsets = NULL + * info.buf = self.data.as_chars + * info.readonly = 0 # <<<<<<<<<<<<<< + * info.ndim = 1 + * info.itemsize = self.ob_descr.itemsize # e.g. sizeof(float) + */ + __pyx_v_info->readonly = 0; + + /* "array.pxd":114 + * info.buf = self.data.as_chars + * info.readonly = 0 + * info.ndim = 1 # <<<<<<<<<<<<<< + * info.itemsize = self.ob_descr.itemsize # e.g. sizeof(float) + * info.len = info.itemsize * item_count + */ + __pyx_v_info->ndim = 1; + + /* "array.pxd":115 + * info.readonly = 0 + * info.ndim = 1 + * info.itemsize = self.ob_descr.itemsize # e.g. sizeof(float) # <<<<<<<<<<<<<< + * info.len = info.itemsize * item_count + * + */ + __pyx_t_3 = __pyx_v_self->ob_descr->itemsize; + __pyx_v_info->itemsize = __pyx_t_3; + + /* "array.pxd":116 + * info.ndim = 1 + * info.itemsize = self.ob_descr.itemsize # e.g. sizeof(float) + * info.len = info.itemsize * item_count # <<<<<<<<<<<<<< + * + * info.shape = PyObject_Malloc(sizeof(Py_ssize_t) + 2) + */ + __pyx_t_1 = PyInt_FromSsize_t(__pyx_v_info->itemsize); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 116, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_4 = PyNumber_Multiply(__pyx_t_1, __pyx_v_item_count); if (unlikely(!__pyx_t_4)) __PYX_ERR(3, 116, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_5 = __Pyx_PyIndex_AsSsize_t(__pyx_t_4); if (unlikely((__pyx_t_5 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(3, 116, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_v_info->len = __pyx_t_5; + + /* "array.pxd":118 + * info.len = info.itemsize * item_count + * + * info.shape = PyObject_Malloc(sizeof(Py_ssize_t) + 2) # <<<<<<<<<<<<<< + * if not info.shape: + * raise MemoryError() + */ + __pyx_v_info->shape = ((Py_ssize_t *)PyObject_Malloc(((sizeof(Py_ssize_t)) + 2))); + + /* "array.pxd":119 + * + * info.shape = PyObject_Malloc(sizeof(Py_ssize_t) + 2) + * if not info.shape: # <<<<<<<<<<<<<< + * raise MemoryError() + * info.shape[0] = item_count # constant regardless of resizing + */ + __pyx_t_6 = (!(__pyx_v_info->shape != 0)); + if (unlikely(__pyx_t_6)) { + + /* "array.pxd":120 + * info.shape = PyObject_Malloc(sizeof(Py_ssize_t) + 2) + * if not info.shape: + * raise MemoryError() # <<<<<<<<<<<<<< + * info.shape[0] = item_count # constant regardless of resizing + * info.strides = &info.itemsize + */ + PyErr_NoMemory(); __PYX_ERR(3, 120, __pyx_L1_error) + + /* "array.pxd":119 + * + * info.shape = PyObject_Malloc(sizeof(Py_ssize_t) + 2) + * if not info.shape: # <<<<<<<<<<<<<< + * raise MemoryError() + * info.shape[0] = item_count # constant regardless of resizing + */ + } + + /* "array.pxd":121 + * if not info.shape: + * raise MemoryError() + * info.shape[0] = item_count # constant regardless of resizing # <<<<<<<<<<<<<< + * info.strides = &info.itemsize + * + */ + __pyx_t_5 = __Pyx_PyIndex_AsSsize_t(__pyx_v_item_count); if (unlikely((__pyx_t_5 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(3, 121, __pyx_L1_error) + (__pyx_v_info->shape[0]) = __pyx_t_5; + + /* "array.pxd":122 + * raise MemoryError() + * info.shape[0] = item_count # constant regardless of resizing + * info.strides = &info.itemsize # <<<<<<<<<<<<<< + * + * info.format = (info.shape + 1) + */ + __pyx_v_info->strides = (&__pyx_v_info->itemsize); + + /* "array.pxd":124 + * info.strides = &info.itemsize + * + * info.format = (info.shape + 1) # <<<<<<<<<<<<<< + * info.format[0] = self.ob_descr.typecode + * info.format[1] = 0 + */ + __pyx_v_info->format = ((char *)(__pyx_v_info->shape + 1)); + + /* "array.pxd":125 + * + * info.format = (info.shape + 1) + * info.format[0] = self.ob_descr.typecode # <<<<<<<<<<<<<< + * info.format[1] = 0 + * info.obj = self + */ + __pyx_t_7 = __pyx_v_self->ob_descr->typecode; + (__pyx_v_info->format[0]) = __pyx_t_7; + + /* "array.pxd":126 + * info.format = (info.shape + 1) + * info.format[0] = self.ob_descr.typecode + * info.format[1] = 0 # <<<<<<<<<<<<<< + * info.obj = self + * + */ + (__pyx_v_info->format[1]) = 0; + + /* "array.pxd":127 + * info.format[0] = self.ob_descr.typecode + * info.format[1] = 0 + * info.obj = self # <<<<<<<<<<<<<< + * + * def __releasebuffer__(self, Py_buffer* info): + */ + __Pyx_INCREF((PyObject *)__pyx_v_self); + __Pyx_GIVEREF((PyObject *)__pyx_v_self); + __Pyx_GOTREF(__pyx_v_info->obj); + __Pyx_DECREF(__pyx_v_info->obj); + __pyx_v_info->obj = ((PyObject *)__pyx_v_self); + + /* "array.pxd":104 + * __data_union data + * + * def __getbuffer__(self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< + * # This implementation of getbuffer is geared towards Cython + * # requirements, and does not yet fulfill the PEP. + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_AddTraceback("cpython.array.array.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + if (__pyx_v_info->obj != NULL) { + __Pyx_GOTREF(__pyx_v_info->obj); + __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0; + } + goto __pyx_L2; + __pyx_L0:; + if (__pyx_v_info->obj == Py_None) { + __Pyx_GOTREF(__pyx_v_info->obj); + __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0; + } + __pyx_L2:; + __Pyx_XDECREF(__pyx_v_item_count); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "array.pxd":129 + * info.obj = self + * + * def __releasebuffer__(self, Py_buffer* info): # <<<<<<<<<<<<<< + * PyObject_Free(info.shape) + * + */ + +/* Python wrapper */ +CYTHON_UNUSED static void __pyx_pw_7cpython_5array_5array_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info); /*proto*/ +CYTHON_UNUSED static void __pyx_pw_7cpython_5array_5array_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__releasebuffer__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_pf_7cpython_5array_5array_2__releasebuffer__(((arrayobject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); +} + +static void __pyx_pf_7cpython_5array_5array_2__releasebuffer__(CYTHON_UNUSED arrayobject *__pyx_v_self, Py_buffer *__pyx_v_info) { + + /* "array.pxd":130 + * + * def __releasebuffer__(self, Py_buffer* info): + * PyObject_Free(info.shape) # <<<<<<<<<<<<<< + * + * array newarrayobject(PyTypeObject* type, Py_ssize_t size, arraydescr *descr) + */ + PyObject_Free(__pyx_v_info->shape); + + /* "array.pxd":129 + * info.obj = self + * + * def __releasebuffer__(self, Py_buffer* info): # <<<<<<<<<<<<<< + * PyObject_Free(info.shape) + * + */ + + /* function exit code */ +} + +/* "array.pxd":141 + * + * + * cdef inline array clone(array template, Py_ssize_t length, bint zero): # <<<<<<<<<<<<<< + * """ fast creation of a new array, given a template array. + * type will be same as template. + */ + +static CYTHON_INLINE arrayobject *__pyx_f_7cpython_5array_clone(arrayobject *__pyx_v_template, Py_ssize_t __pyx_v_length, int __pyx_v_zero) { + arrayobject *__pyx_v_op = 0; + arrayobject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_t_2; + int __pyx_t_3; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("clone", 1); + + /* "array.pxd":145 + * type will be same as template. + * if zero is true, new array will be initialized with zeroes.""" + * cdef array op = newarrayobject(Py_TYPE(template), length, template.ob_descr) # <<<<<<<<<<<<<< + * if zero and op is not None: + * memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) + */ + __pyx_t_1 = ((PyObject *)newarrayobject(Py_TYPE(((PyObject *)__pyx_v_template)), __pyx_v_length, __pyx_v_template->ob_descr)); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_op = ((arrayobject *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "array.pxd":146 + * if zero is true, new array will be initialized with zeroes.""" + * cdef array op = newarrayobject(Py_TYPE(template), length, template.ob_descr) + * if zero and op is not None: # <<<<<<<<<<<<<< + * memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) + * return op + */ + if (__pyx_v_zero) { + } else { + __pyx_t_2 = __pyx_v_zero; + goto __pyx_L4_bool_binop_done; + } + __pyx_t_3 = (((PyObject *)__pyx_v_op) != Py_None); + __pyx_t_2 = __pyx_t_3; + __pyx_L4_bool_binop_done:; + if (__pyx_t_2) { + + /* "array.pxd":147 + * cdef array op = newarrayobject(Py_TYPE(template), length, template.ob_descr) + * if zero and op is not None: + * memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) # <<<<<<<<<<<<<< + * return op + * + */ + (void)(memset(__pyx_v_op->data.as_chars, 0, (__pyx_v_length * __pyx_v_op->ob_descr->itemsize))); + + /* "array.pxd":146 + * if zero is true, new array will be initialized with zeroes.""" + * cdef array op = newarrayobject(Py_TYPE(template), length, template.ob_descr) + * if zero and op is not None: # <<<<<<<<<<<<<< + * memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) + * return op + */ + } + + /* "array.pxd":148 + * if zero and op is not None: + * memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) + * return op # <<<<<<<<<<<<<< + * + * cdef inline array copy(array self): + */ + __Pyx_XDECREF((PyObject *)__pyx_r); + __Pyx_INCREF((PyObject *)__pyx_v_op); + __pyx_r = __pyx_v_op; + goto __pyx_L0; + + /* "array.pxd":141 + * + * + * cdef inline array clone(array template, Py_ssize_t length, bint zero): # <<<<<<<<<<<<<< + * """ fast creation of a new array, given a template array. + * type will be same as template. + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("cpython.array.clone", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF((PyObject *)__pyx_v_op); + __Pyx_XGIVEREF((PyObject *)__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "array.pxd":150 + * return op + * + * cdef inline array copy(array self): # <<<<<<<<<<<<<< + * """ make a copy of an array. """ + * cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr) + */ + +static CYTHON_INLINE arrayobject *__pyx_f_7cpython_5array_copy(arrayobject *__pyx_v_self) { + arrayobject *__pyx_v_op = 0; + arrayobject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("copy", 1); + + /* "array.pxd":152 + * cdef inline array copy(array self): + * """ make a copy of an array. """ + * cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr) # <<<<<<<<<<<<<< + * memcpy(op.data.as_chars, self.data.as_chars, Py_SIZE(op) * op.ob_descr.itemsize) + * return op + */ + __pyx_t_1 = ((PyObject *)newarrayobject(Py_TYPE(((PyObject *)__pyx_v_self)), Py_SIZE(((PyObject *)__pyx_v_self)), __pyx_v_self->ob_descr)); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 152, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_op = ((arrayobject *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "array.pxd":153 + * """ make a copy of an array. """ + * cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr) + * memcpy(op.data.as_chars, self.data.as_chars, Py_SIZE(op) * op.ob_descr.itemsize) # <<<<<<<<<<<<<< + * return op + * + */ + (void)(memcpy(__pyx_v_op->data.as_chars, __pyx_v_self->data.as_chars, (Py_SIZE(((PyObject *)__pyx_v_op)) * __pyx_v_op->ob_descr->itemsize))); + + /* "array.pxd":154 + * cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr) + * memcpy(op.data.as_chars, self.data.as_chars, Py_SIZE(op) * op.ob_descr.itemsize) + * return op # <<<<<<<<<<<<<< + * + * cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n) except -1: + */ + __Pyx_XDECREF((PyObject *)__pyx_r); + __Pyx_INCREF((PyObject *)__pyx_v_op); + __pyx_r = __pyx_v_op; + goto __pyx_L0; + + /* "array.pxd":150 + * return op + * + * cdef inline array copy(array self): # <<<<<<<<<<<<<< + * """ make a copy of an array. """ + * cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr) + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("cpython.array.copy", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF((PyObject *)__pyx_v_op); + __Pyx_XGIVEREF((PyObject *)__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "array.pxd":156 + * return op + * + * cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n) except -1: # <<<<<<<<<<<<<< + * """ efficient appending of new stuff of same type + * (e.g. of same array type) + */ + +static CYTHON_INLINE int __pyx_f_7cpython_5array_extend_buffer(arrayobject *__pyx_v_self, char *__pyx_v_stuff, Py_ssize_t __pyx_v_n) { + Py_ssize_t __pyx_v_itemsize; + Py_ssize_t __pyx_v_origsize; + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + + /* "array.pxd":160 + * (e.g. of same array type) + * n: number of elements (not number of bytes!) """ + * cdef Py_ssize_t itemsize = self.ob_descr.itemsize # <<<<<<<<<<<<<< + * cdef Py_ssize_t origsize = Py_SIZE(self) + * resize_smart(self, origsize + n) + */ + __pyx_t_1 = __pyx_v_self->ob_descr->itemsize; + __pyx_v_itemsize = __pyx_t_1; + + /* "array.pxd":161 + * n: number of elements (not number of bytes!) """ + * cdef Py_ssize_t itemsize = self.ob_descr.itemsize + * cdef Py_ssize_t origsize = Py_SIZE(self) # <<<<<<<<<<<<<< + * resize_smart(self, origsize + n) + * memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize) + */ + __pyx_v_origsize = Py_SIZE(((PyObject *)__pyx_v_self)); + + /* "array.pxd":162 + * cdef Py_ssize_t itemsize = self.ob_descr.itemsize + * cdef Py_ssize_t origsize = Py_SIZE(self) + * resize_smart(self, origsize + n) # <<<<<<<<<<<<<< + * memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize) + * return 0 + */ + __pyx_t_1 = resize_smart(__pyx_v_self, (__pyx_v_origsize + __pyx_v_n)); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(3, 162, __pyx_L1_error) + + /* "array.pxd":163 + * cdef Py_ssize_t origsize = Py_SIZE(self) + * resize_smart(self, origsize + n) + * memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize) # <<<<<<<<<<<<<< + * return 0 + * + */ + (void)(memcpy((__pyx_v_self->data.as_chars + (__pyx_v_origsize * __pyx_v_itemsize)), __pyx_v_stuff, (__pyx_v_n * __pyx_v_itemsize))); + + /* "array.pxd":164 + * resize_smart(self, origsize + n) + * memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize) + * return 0 # <<<<<<<<<<<<<< + * + * cdef inline int extend(array self, array other) except -1: + */ + __pyx_r = 0; + goto __pyx_L0; + + /* "array.pxd":156 + * return op + * + * cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n) except -1: # <<<<<<<<<<<<<< + * """ efficient appending of new stuff of same type + * (e.g. of same array type) + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("cpython.array.extend_buffer", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "array.pxd":166 + * return 0 + * + * cdef inline int extend(array self, array other) except -1: # <<<<<<<<<<<<<< + * """ extend array with data from another array; types must match. """ + * if self.ob_descr.typecode != other.ob_descr.typecode: + */ + +static CYTHON_INLINE int __pyx_f_7cpython_5array_extend(arrayobject *__pyx_v_self, arrayobject *__pyx_v_other) { + int __pyx_r; + int __pyx_t_1; + int __pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + + /* "array.pxd":168 + * cdef inline int extend(array self, array other) except -1: + * """ extend array with data from another array; types must match. """ + * if self.ob_descr.typecode != other.ob_descr.typecode: # <<<<<<<<<<<<<< + * PyErr_BadArgument() + * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) + */ + __pyx_t_1 = (__pyx_v_self->ob_descr->typecode != __pyx_v_other->ob_descr->typecode); + if (__pyx_t_1) { + + /* "array.pxd":169 + * """ extend array with data from another array; types must match. """ + * if self.ob_descr.typecode != other.ob_descr.typecode: + * PyErr_BadArgument() # <<<<<<<<<<<<<< + * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) + * + */ + __pyx_t_2 = PyErr_BadArgument(); if (unlikely(__pyx_t_2 == ((int)0))) __PYX_ERR(3, 169, __pyx_L1_error) + + /* "array.pxd":168 + * cdef inline int extend(array self, array other) except -1: + * """ extend array with data from another array; types must match. """ + * if self.ob_descr.typecode != other.ob_descr.typecode: # <<<<<<<<<<<<<< + * PyErr_BadArgument() + * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) + */ + } + + /* "array.pxd":170 + * if self.ob_descr.typecode != other.ob_descr.typecode: + * PyErr_BadArgument() + * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) # <<<<<<<<<<<<<< + * + * cdef inline void zero(array self) noexcept: + */ + __pyx_t_2 = __pyx_f_7cpython_5array_extend_buffer(__pyx_v_self, __pyx_v_other->data.as_chars, Py_SIZE(((PyObject *)__pyx_v_other))); if (unlikely(__pyx_t_2 == ((int)-1))) __PYX_ERR(3, 170, __pyx_L1_error) + __pyx_r = __pyx_t_2; + goto __pyx_L0; + + /* "array.pxd":166 + * return 0 + * + * cdef inline int extend(array self, array other) except -1: # <<<<<<<<<<<<<< + * """ extend array with data from another array; types must match. """ + * if self.ob_descr.typecode != other.ob_descr.typecode: + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("cpython.array.extend", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "array.pxd":172 + * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) + * + * cdef inline void zero(array self) noexcept: # <<<<<<<<<<<<<< + * """ set all elements of array to zero. """ + * memset(self.data.as_chars, 0, Py_SIZE(self) * self.ob_descr.itemsize) + */ + +static CYTHON_INLINE void __pyx_f_7cpython_5array_zero(arrayobject *__pyx_v_self) { + + /* "array.pxd":174 + * cdef inline void zero(array self) noexcept: + * """ set all elements of array to zero. """ + * memset(self.data.as_chars, 0, Py_SIZE(self) * self.ob_descr.itemsize) # <<<<<<<<<<<<<< + */ + (void)(memset(__pyx_v_self->data.as_chars, 0, (Py_SIZE(((PyObject *)__pyx_v_self)) * __pyx_v_self->ob_descr->itemsize))); + + /* "array.pxd":172 + * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) + * + * cdef inline void zero(array self) noexcept: # <<<<<<<<<<<<<< + * """ set all elements of array to zero. """ + * memset(self.data.as_chars, 0, Py_SIZE(self) * self.ob_descr.itemsize) + */ + + /* function exit code */ +} + +/* "jcvi/assembly/chic.pyx":34 + * + * + * def score_evaluate_M(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=2] tour_M=None): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_8assembly_4chic_1score_evaluate_M(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_4jcvi_8assembly_4chic_1score_evaluate_M = {"score_evaluate_M", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_8assembly_4chic_1score_evaluate_M, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_4jcvi_8assembly_4chic_1score_evaluate_M(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + arrayobject *__pyx_v_tour = 0; + PyArrayObject *__pyx_v_tour_sizes = 0; + PyArrayObject *__pyx_v_tour_M = 0; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[3] = {0,0,0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("score_evaluate_M (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_tour,&__pyx_n_s_tour_sizes,&__pyx_n_s_tour_M,0}; + + /* "jcvi/assembly/chic.pyx":35 + * + * def score_evaluate_M(array.array[int] tour, + * np.ndarray[INT, ndim=1] tour_sizes=None, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=2] tour_M=None): + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + */ + values[1] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); + + /* "jcvi/assembly/chic.pyx":36 + * def score_evaluate_M(array.array[int] tour, + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=2] tour_M=None): # <<<<<<<<<<<<<< + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - sizes_oo // 2 + */ + values[2] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 34, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_sizes); + if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 34, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_M); + if (value) { values[2] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 34, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "score_evaluate_M") < 0)) __PYX_ERR(0, 34, __pyx_L3_error) + } + } else { + switch (__pyx_nargs) { + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + break; + default: goto __pyx_L5_argtuple_error; + } + } + __pyx_v_tour = ((arrayobject *)values[0]); + __pyx_v_tour_sizes = ((PyArrayObject *)values[1]); + __pyx_v_tour_M = ((PyArrayObject *)values[2]); + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("score_evaluate_M", 0, 1, 3, __pyx_nargs); __PYX_ERR(0, 34, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_M", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour), __pyx_ptype_7cpython_5array_array, 1, "tour", 0))) __PYX_ERR(0, 34, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_sizes), __pyx_ptype_5numpy_ndarray, 1, "tour_sizes", 0))) __PYX_ERR(0, 35, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_M), __pyx_ptype_5numpy_ndarray, 1, "tour_M", 0))) __PYX_ERR(0, 36, __pyx_L1_error) + __pyx_r = __pyx_pf_4jcvi_8assembly_4chic_score_evaluate_M(__pyx_self, __pyx_v_tour, __pyx_v_tour_sizes, __pyx_v_tour_M); + + /* "jcvi/assembly/chic.pyx":34 + * + * + * def score_evaluate_M(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=2] tour_M=None): + */ + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_8assembly_4chic_score_evaluate_M(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_M) { + PyArrayObject *__pyx_v_sizes_oo = 0; + PyArrayObject *__pyx_v_sizes_cum = 0; + double __pyx_v_s; + int __pyx_v_size; + int __pyx_v_a; + int __pyx_v_b; + int __pyx_v_ia; + int __pyx_v_ib; + int __pyx_v_links; + double __pyx_v_dist; + __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_cum; + __Pyx_Buffer __pyx_pybuffer_sizes_cum; + __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_oo; + __Pyx_Buffer __pyx_pybuffer_sizes_oo; + __Pyx_LocalBuf_ND __pyx_pybuffernd_tour; + __Pyx_Buffer __pyx_pybuffer_tour; + __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_M; + __Pyx_Buffer __pyx_pybuffer_tour_M; + __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_sizes; + __Pyx_Buffer __pyx_pybuffer_tour_sizes; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyArrayObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + unsigned int __pyx_t_5; + PyArrayObject *__pyx_t_6 = NULL; + Py_ssize_t __pyx_t_7; + int __pyx_t_8; + int __pyx_t_9; + int __pyx_t_10; + Py_ssize_t __pyx_t_11; + int __pyx_t_12; + int __pyx_t_13; + int __pyx_t_14; + Py_ssize_t __pyx_t_15; + int __pyx_t_16; + int __pyx_t_17; + double __pyx_t_18; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("score_evaluate_M", 1); + __pyx_pybuffer_sizes_oo.pybuffer.buf = NULL; + __pyx_pybuffer_sizes_oo.refcount = 0; + __pyx_pybuffernd_sizes_oo.data = NULL; + __pyx_pybuffernd_sizes_oo.rcbuffer = &__pyx_pybuffer_sizes_oo; + __pyx_pybuffer_sizes_cum.pybuffer.buf = NULL; + __pyx_pybuffer_sizes_cum.refcount = 0; + __pyx_pybuffernd_sizes_cum.data = NULL; + __pyx_pybuffernd_sizes_cum.rcbuffer = &__pyx_pybuffer_sizes_cum; + __pyx_pybuffer_tour.pybuffer.buf = NULL; + __pyx_pybuffer_tour.refcount = 0; + __pyx_pybuffernd_tour.data = NULL; + __pyx_pybuffernd_tour.rcbuffer = &__pyx_pybuffer_tour; + __pyx_pybuffer_tour_sizes.pybuffer.buf = NULL; + __pyx_pybuffer_tour_sizes.refcount = 0; + __pyx_pybuffernd_tour_sizes.data = NULL; + __pyx_pybuffernd_tour_sizes.rcbuffer = &__pyx_pybuffer_tour_sizes; + __pyx_pybuffer_tour_M.pybuffer.buf = NULL; + __pyx_pybuffer_tour_M.refcount = 0; + __pyx_pybuffernd_tour_M.data = NULL; + __pyx_pybuffernd_tour_M.rcbuffer = &__pyx_pybuffer_tour_M; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour, &__Pyx_TypeInfo_int, PyBUF_FORMAT| PyBUF_INDIRECT, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 34, __pyx_L1_error) + } + __pyx_pybuffernd_tour.diminfo[0].strides = __pyx_pybuffernd_tour.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour.diminfo[0].shape = __pyx_pybuffernd_tour.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour.diminfo[0].suboffsets = __pyx_pybuffernd_tour.rcbuffer->pybuffer.suboffsets[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_sizes, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 34, __pyx_L1_error) + } + __pyx_pybuffernd_tour_sizes.diminfo[0].strides = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_sizes.diminfo[0].shape = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.shape[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_M.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_M, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 34, __pyx_L1_error) + } + __pyx_pybuffernd_tour_M.diminfo[0].strides = __pyx_pybuffernd_tour_M.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_M.diminfo[0].shape = __pyx_pybuffernd_tour_M.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour_M.diminfo[1].strides = __pyx_pybuffernd_tour_M.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_tour_M.diminfo[1].shape = __pyx_pybuffernd_tour_M.rcbuffer->pybuffer.shape[1]; + + /* "jcvi/assembly/chic.pyx":37 + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=2] tour_M=None): + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] # <<<<<<<<<<<<<< + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - sizes_oo // 2 + * + */ + __pyx_t_1 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_tour_sizes), ((PyObject *)__pyx_v_tour)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 37, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 37, __pyx_L1_error) + __pyx_t_2 = ((PyArrayObject *)__pyx_t_1); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer, (PyObject*)__pyx_t_2, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { + __pyx_v_sizes_oo = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.buf = NULL; + __PYX_ERR(0, 37, __pyx_L1_error) + } else {__pyx_pybuffernd_sizes_oo.diminfo[0].strides = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_oo.diminfo[0].shape = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.shape[0]; + } + } + __pyx_t_2 = 0; + __pyx_v_sizes_oo = ((PyArrayObject *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/assembly/chic.pyx":38 + * np.ndarray[INT, ndim=2] tour_M=None): + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - sizes_oo // 2 # <<<<<<<<<<<<<< + * + * cdef double s = 0.0 + */ + __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 38, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cumsum); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 38, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = NULL; + __pyx_t_5 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_4))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4); + if (likely(__pyx_t_3)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_4, function); + __pyx_t_5 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_3, ((PyObject *)__pyx_v_sizes_oo)}; + __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5); + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 38, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + } + __pyx_t_4 = PyNumber_FloorDivide(((PyObject *)__pyx_v_sizes_oo), __pyx_int_2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 38, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyNumber_Subtract(__pyx_t_1, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 38, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 38, __pyx_L1_error) + __pyx_t_6 = ((PyArrayObject *)__pyx_t_3); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { + __pyx_v_sizes_cum = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf = NULL; + __PYX_ERR(0, 38, __pyx_L1_error) + } else {__pyx_pybuffernd_sizes_cum.diminfo[0].strides = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_cum.diminfo[0].shape = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.shape[0]; + } + } + __pyx_t_6 = 0; + __pyx_v_sizes_cum = ((PyArrayObject *)__pyx_t_3); + __pyx_t_3 = 0; + + /* "jcvi/assembly/chic.pyx":40 + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - sizes_oo // 2 + * + * cdef double s = 0.0 # <<<<<<<<<<<<<< + * cdef int size = len(tour) + * cdef int a, b, ia, ib + */ + __pyx_v_s = 0.0; + + /* "jcvi/assembly/chic.pyx":41 + * + * cdef double s = 0.0 + * cdef int size = len(tour) # <<<<<<<<<<<<<< + * cdef int a, b, ia, ib + * cdef int links + */ + if (unlikely(((PyObject *)__pyx_v_tour) == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); + __PYX_ERR(0, 41, __pyx_L1_error) + } + __pyx_t_7 = Py_SIZE(((PyObject *)__pyx_v_tour)); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 41, __pyx_L1_error) + __pyx_v_size = __pyx_t_7; + + /* "jcvi/assembly/chic.pyx":45 + * cdef int links + * cdef double dist + * for ia in range(size): # <<<<<<<<<<<<<< + * a = tour[ia] + * for ib in range(ia + 1, size): + */ + __pyx_t_8 = __pyx_v_size; + __pyx_t_9 = __pyx_t_8; + for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) { + __pyx_v_ia = __pyx_t_10; + + /* "jcvi/assembly/chic.pyx":46 + * cdef double dist + * for ia in range(size): + * a = tour[ia] # <<<<<<<<<<<<<< + * for ib in range(ia + 1, size): + * b = tour[ib] + */ + __pyx_t_11 = __pyx_v_ia; + __pyx_v_a = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); + + /* "jcvi/assembly/chic.pyx":47 + * for ia in range(size): + * a = tour[ia] + * for ib in range(ia + 1, size): # <<<<<<<<<<<<<< + * b = tour[ib] + * links = tour_M[a, b] + */ + __pyx_t_12 = __pyx_v_size; + __pyx_t_13 = __pyx_t_12; + for (__pyx_t_14 = (__pyx_v_ia + 1); __pyx_t_14 < __pyx_t_13; __pyx_t_14+=1) { + __pyx_v_ib = __pyx_t_14; + + /* "jcvi/assembly/chic.pyx":48 + * a = tour[ia] + * for ib in range(ia + 1, size): + * b = tour[ib] # <<<<<<<<<<<<<< + * links = tour_M[a, b] + * if links == 0: + */ + __pyx_t_11 = __pyx_v_ib; + __pyx_v_b = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); + + /* "jcvi/assembly/chic.pyx":49 + * for ib in range(ia + 1, size): + * b = tour[ib] + * links = tour_M[a, b] # <<<<<<<<<<<<<< + * if links == 0: + * continue + */ + __pyx_t_11 = __pyx_v_a; + __pyx_t_15 = __pyx_v_b; + __pyx_t_3 = (PyObject *) *__Pyx_BufPtrStrided2d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_tour_M.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour_M.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_tour_M.diminfo[1].strides); + if (unlikely(__pyx_t_3 == NULL)) __pyx_t_3 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_3); + __pyx_t_16 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_16 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 49, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_links = __pyx_t_16; + + /* "jcvi/assembly/chic.pyx":50 + * b = tour[ib] + * links = tour_M[a, b] + * if links == 0: # <<<<<<<<<<<<<< + * continue + * dist = sizes_cum[ib] - sizes_cum[ia] + */ + __pyx_t_17 = (__pyx_v_links == 0); + if (__pyx_t_17) { + + /* "jcvi/assembly/chic.pyx":51 + * links = tour_M[a, b] + * if links == 0: + * continue # <<<<<<<<<<<<<< + * dist = sizes_cum[ib] - sizes_cum[ia] + * if dist > LIMIT: + */ + goto __pyx_L5_continue; + + /* "jcvi/assembly/chic.pyx":50 + * b = tour[ib] + * links = tour_M[a, b] + * if links == 0: # <<<<<<<<<<<<<< + * continue + * dist = sizes_cum[ib] - sizes_cum[ia] + */ + } + + /* "jcvi/assembly/chic.pyx":52 + * if links == 0: + * continue + * dist = sizes_cum[ib] - sizes_cum[ia] # <<<<<<<<<<<<<< + * if dist > LIMIT: + * break + */ + __pyx_t_15 = __pyx_v_ib; + __pyx_t_3 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_15, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); + if (unlikely(__pyx_t_3 == NULL)) __pyx_t_3 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_3); + __pyx_t_15 = __pyx_v_ia; + __pyx_t_4 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_15, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); + if (unlikely(__pyx_t_4 == NULL)) __pyx_t_4 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_4); + __pyx_t_1 = PyNumber_Subtract(__pyx_t_3, __pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 52, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_18 = __pyx_PyFloat_AsDouble(__pyx_t_1); if (unlikely((__pyx_t_18 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 52, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_dist = __pyx_t_18; + + /* "jcvi/assembly/chic.pyx":53 + * continue + * dist = sizes_cum[ib] - sizes_cum[ia] + * if dist > LIMIT: # <<<<<<<<<<<<<< + * break + * s += links / dist + */ + __pyx_t_17 = (__pyx_v_dist > 10000000.0); + if (__pyx_t_17) { + + /* "jcvi/assembly/chic.pyx":54 + * dist = sizes_cum[ib] - sizes_cum[ia] + * if dist > LIMIT: + * break # <<<<<<<<<<<<<< + * s += links / dist + * return s, + */ + goto __pyx_L6_break; + + /* "jcvi/assembly/chic.pyx":53 + * continue + * dist = sizes_cum[ib] - sizes_cum[ia] + * if dist > LIMIT: # <<<<<<<<<<<<<< + * break + * s += links / dist + */ + } + + /* "jcvi/assembly/chic.pyx":55 + * if dist > LIMIT: + * break + * s += links / dist # <<<<<<<<<<<<<< + * return s, + * + */ + __pyx_v_s = (__pyx_v_s + (((double)__pyx_v_links) / __pyx_v_dist)); + __pyx_L5_continue:; + } + __pyx_L6_break:; + } + + /* "jcvi/assembly/chic.pyx":56 + * break + * s += links / dist + * return s, # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyFloat_FromDouble(__pyx_v_s); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 56, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 56, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1)) __PYX_ERR(0, 56, __pyx_L1_error); + __pyx_t_1 = 0; + __pyx_r = __pyx_t_4; + __pyx_t_4 = 0; + goto __pyx_L0; + + /* "jcvi/assembly/chic.pyx":34 + * + * + * def score_evaluate_M(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=2] tour_M=None): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_M.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); + __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} + __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_M", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + goto __pyx_L2; + __pyx_L0:; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_M.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); + __pyx_L2:; + __Pyx_XDECREF((PyObject *)__pyx_v_sizes_oo); + __Pyx_XDECREF((PyObject *)__pyx_v_sizes_cum); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/assembly/chic.pyx":59 + * + * + * def score_evaluate_P(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_P=None): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_8assembly_4chic_3score_evaluate_P(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_4jcvi_8assembly_4chic_3score_evaluate_P = {"score_evaluate_P", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_8assembly_4chic_3score_evaluate_P, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_4jcvi_8assembly_4chic_3score_evaluate_P(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + arrayobject *__pyx_v_tour = 0; + PyArrayObject *__pyx_v_tour_sizes = 0; + PyArrayObject *__pyx_v_tour_P = 0; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[3] = {0,0,0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("score_evaluate_P (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_tour,&__pyx_n_s_tour_sizes,&__pyx_n_s_tour_P,0}; + + /* "jcvi/assembly/chic.pyx":60 + * + * def score_evaluate_P(array.array[int] tour, + * np.ndarray[INT, ndim=1] tour_sizes=None, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=3] tour_P=None): + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + */ + values[1] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); + + /* "jcvi/assembly/chic.pyx":61 + * def score_evaluate_P(array.array[int] tour, + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_P=None): # <<<<<<<<<<<<<< + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) + */ + values[2] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 59, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_sizes); + if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 59, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_P); + if (value) { values[2] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 59, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "score_evaluate_P") < 0)) __PYX_ERR(0, 59, __pyx_L3_error) + } + } else { + switch (__pyx_nargs) { + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + break; + default: goto __pyx_L5_argtuple_error; + } + } + __pyx_v_tour = ((arrayobject *)values[0]); + __pyx_v_tour_sizes = ((PyArrayObject *)values[1]); + __pyx_v_tour_P = ((PyArrayObject *)values[2]); + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("score_evaluate_P", 0, 1, 3, __pyx_nargs); __PYX_ERR(0, 59, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_P", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour), __pyx_ptype_7cpython_5array_array, 1, "tour", 0))) __PYX_ERR(0, 59, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_sizes), __pyx_ptype_5numpy_ndarray, 1, "tour_sizes", 0))) __PYX_ERR(0, 60, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_P), __pyx_ptype_5numpy_ndarray, 1, "tour_P", 0))) __PYX_ERR(0, 61, __pyx_L1_error) + __pyx_r = __pyx_pf_4jcvi_8assembly_4chic_2score_evaluate_P(__pyx_self, __pyx_v_tour, __pyx_v_tour_sizes, __pyx_v_tour_P); + + /* "jcvi/assembly/chic.pyx":59 + * + * + * def score_evaluate_P(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_P=None): + */ + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_8assembly_4chic_2score_evaluate_P(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_P) { + PyArrayObject *__pyx_v_sizes_oo = 0; + PyArrayObject *__pyx_v_sizes_cum = 0; + double __pyx_v_s; + int __pyx_v_size; + int __pyx_v_a; + int __pyx_v_b; + int __pyx_v_c; + int __pyx_v_ia; + int __pyx_v_ib; + double __pyx_v_dist; + __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_cum; + __Pyx_Buffer __pyx_pybuffer_sizes_cum; + __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_oo; + __Pyx_Buffer __pyx_pybuffer_sizes_oo; + __Pyx_LocalBuf_ND __pyx_pybuffernd_tour; + __Pyx_Buffer __pyx_pybuffer_tour; + __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_P; + __Pyx_Buffer __pyx_pybuffer_tour_P; + __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_sizes; + __Pyx_Buffer __pyx_pybuffer_tour_sizes; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyArrayObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + unsigned int __pyx_t_5; + PyArrayObject *__pyx_t_6 = NULL; + Py_ssize_t __pyx_t_7; + int __pyx_t_8; + int __pyx_t_9; + int __pyx_t_10; + Py_ssize_t __pyx_t_11; + int __pyx_t_12; + int __pyx_t_13; + int __pyx_t_14; + double __pyx_t_15; + int __pyx_t_16; + Py_ssize_t __pyx_t_17; + Py_ssize_t __pyx_t_18; + int __pyx_t_19; + PyObject *__pyx_t_20 = NULL; + PyObject *__pyx_t_21 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("score_evaluate_P", 1); + __pyx_pybuffer_sizes_oo.pybuffer.buf = NULL; + __pyx_pybuffer_sizes_oo.refcount = 0; + __pyx_pybuffernd_sizes_oo.data = NULL; + __pyx_pybuffernd_sizes_oo.rcbuffer = &__pyx_pybuffer_sizes_oo; + __pyx_pybuffer_sizes_cum.pybuffer.buf = NULL; + __pyx_pybuffer_sizes_cum.refcount = 0; + __pyx_pybuffernd_sizes_cum.data = NULL; + __pyx_pybuffernd_sizes_cum.rcbuffer = &__pyx_pybuffer_sizes_cum; + __pyx_pybuffer_tour.pybuffer.buf = NULL; + __pyx_pybuffer_tour.refcount = 0; + __pyx_pybuffernd_tour.data = NULL; + __pyx_pybuffernd_tour.rcbuffer = &__pyx_pybuffer_tour; + __pyx_pybuffer_tour_sizes.pybuffer.buf = NULL; + __pyx_pybuffer_tour_sizes.refcount = 0; + __pyx_pybuffernd_tour_sizes.data = NULL; + __pyx_pybuffernd_tour_sizes.rcbuffer = &__pyx_pybuffer_tour_sizes; + __pyx_pybuffer_tour_P.pybuffer.buf = NULL; + __pyx_pybuffer_tour_P.refcount = 0; + __pyx_pybuffernd_tour_P.data = NULL; + __pyx_pybuffernd_tour_P.rcbuffer = &__pyx_pybuffer_tour_P; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour, &__Pyx_TypeInfo_int, PyBUF_FORMAT| PyBUF_INDIRECT, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 59, __pyx_L1_error) + } + __pyx_pybuffernd_tour.diminfo[0].strides = __pyx_pybuffernd_tour.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour.diminfo[0].shape = __pyx_pybuffernd_tour.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour.diminfo[0].suboffsets = __pyx_pybuffernd_tour.rcbuffer->pybuffer.suboffsets[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_sizes, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 59, __pyx_L1_error) + } + __pyx_pybuffernd_tour_sizes.diminfo[0].strides = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_sizes.diminfo[0].shape = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.shape[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_P.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_P, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 3, 0, __pyx_stack) == -1)) __PYX_ERR(0, 59, __pyx_L1_error) + } + __pyx_pybuffernd_tour_P.diminfo[0].strides = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_P.diminfo[0].shape = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour_P.diminfo[1].strides = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_tour_P.diminfo[1].shape = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.shape[1]; __pyx_pybuffernd_tour_P.diminfo[2].strides = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.strides[2]; __pyx_pybuffernd_tour_P.diminfo[2].shape = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.shape[2]; + + /* "jcvi/assembly/chic.pyx":62 + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_P=None): + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] # <<<<<<<<<<<<<< + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_tour_sizes), ((PyObject *)__pyx_v_tour)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 62, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 62, __pyx_L1_error) + __pyx_t_2 = ((PyArrayObject *)__pyx_t_1); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer, (PyObject*)__pyx_t_2, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { + __pyx_v_sizes_oo = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.buf = NULL; + __PYX_ERR(0, 62, __pyx_L1_error) + } else {__pyx_pybuffernd_sizes_oo.diminfo[0].strides = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_oo.diminfo[0].shape = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.shape[0]; + } + } + __pyx_t_2 = 0; + __pyx_v_sizes_oo = ((PyArrayObject *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/assembly/chic.pyx":63 + * np.ndarray[INT, ndim=3] tour_P=None): + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) # <<<<<<<<<<<<<< + * + * cdef double s = 0.0 + */ + __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 63, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cumsum); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 63, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = NULL; + __pyx_t_5 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_4))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4); + if (likely(__pyx_t_3)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_4, function); + __pyx_t_5 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_3, ((PyObject *)__pyx_v_sizes_oo)}; + __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5); + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 63, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + } + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 63, __pyx_L1_error) + __pyx_t_6 = ((PyArrayObject *)__pyx_t_1); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { + __pyx_v_sizes_cum = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf = NULL; + __PYX_ERR(0, 63, __pyx_L1_error) + } else {__pyx_pybuffernd_sizes_cum.diminfo[0].strides = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_cum.diminfo[0].shape = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.shape[0]; + } + } + __pyx_t_6 = 0; + __pyx_v_sizes_cum = ((PyArrayObject *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/assembly/chic.pyx":65 + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) + * + * cdef double s = 0.0 # <<<<<<<<<<<<<< + * cdef int size = len(tour) + * cdef int a, b, c, ia, ib + */ + __pyx_v_s = 0.0; + + /* "jcvi/assembly/chic.pyx":66 + * + * cdef double s = 0.0 + * cdef int size = len(tour) # <<<<<<<<<<<<<< + * cdef int a, b, c, ia, ib + * cdef double dist + */ + if (unlikely(((PyObject *)__pyx_v_tour) == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); + __PYX_ERR(0, 66, __pyx_L1_error) + } + __pyx_t_7 = Py_SIZE(((PyObject *)__pyx_v_tour)); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 66, __pyx_L1_error) + __pyx_v_size = __pyx_t_7; + + /* "jcvi/assembly/chic.pyx":69 + * cdef int a, b, c, ia, ib + * cdef double dist + * for ia in range(size): # <<<<<<<<<<<<<< + * a = tour[ia] + * for ib in range(ia + 1, size): + */ + __pyx_t_8 = __pyx_v_size; + __pyx_t_9 = __pyx_t_8; + for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) { + __pyx_v_ia = __pyx_t_10; + + /* "jcvi/assembly/chic.pyx":70 + * cdef double dist + * for ia in range(size): + * a = tour[ia] # <<<<<<<<<<<<<< + * for ib in range(ia + 1, size): + * b = tour[ib] + */ + __pyx_t_11 = __pyx_v_ia; + __pyx_v_a = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); + + /* "jcvi/assembly/chic.pyx":71 + * for ia in range(size): + * a = tour[ia] + * for ib in range(ia + 1, size): # <<<<<<<<<<<<<< + * b = tour[ib] + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + */ + __pyx_t_12 = __pyx_v_size; + __pyx_t_13 = __pyx_t_12; + for (__pyx_t_14 = (__pyx_v_ia + 1); __pyx_t_14 < __pyx_t_13; __pyx_t_14+=1) { + __pyx_v_ib = __pyx_t_14; + + /* "jcvi/assembly/chic.pyx":72 + * a = tour[ia] + * for ib in range(ia + 1, size): + * b = tour[ib] # <<<<<<<<<<<<<< + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + * if dist > LIMIT: + */ + __pyx_t_11 = __pyx_v_ib; + __pyx_v_b = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); + + /* "jcvi/assembly/chic.pyx":73 + * for ib in range(ia + 1, size): + * b = tour[ib] + * dist = sizes_cum[ib - 1] - sizes_cum[ia] # <<<<<<<<<<<<<< + * if dist > LIMIT: + * break + */ + __pyx_t_11 = (__pyx_v_ib - 1); + __pyx_t_1 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); + if (unlikely(__pyx_t_1 == NULL)) __pyx_t_1 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_1); + __pyx_t_11 = __pyx_v_ia; + __pyx_t_4 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); + if (unlikely(__pyx_t_4 == NULL)) __pyx_t_4 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_4); + __pyx_t_3 = PyNumber_Subtract(__pyx_t_1, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 73, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_15 = __pyx_PyFloat_AsDouble(__pyx_t_3); if (unlikely((__pyx_t_15 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 73, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_dist = __pyx_t_15; + + /* "jcvi/assembly/chic.pyx":74 + * b = tour[ib] + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + * if dist > LIMIT: # <<<<<<<<<<<<<< + * break + * c = tour_P[a, b, 0] + */ + __pyx_t_16 = (__pyx_v_dist > 10000000.0); + if (__pyx_t_16) { + + /* "jcvi/assembly/chic.pyx":75 + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + * if dist > LIMIT: + * break # <<<<<<<<<<<<<< + * c = tour_P[a, b, 0] + * if c == 0: + */ + goto __pyx_L6_break; + + /* "jcvi/assembly/chic.pyx":74 + * b = tour[ib] + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + * if dist > LIMIT: # <<<<<<<<<<<<<< + * break + * c = tour_P[a, b, 0] + */ + } + + /* "jcvi/assembly/chic.pyx":76 + * if dist > LIMIT: + * break + * c = tour_P[a, b, 0] # <<<<<<<<<<<<<< + * if c == 0: + * continue + */ + __pyx_t_11 = __pyx_v_a; + __pyx_t_17 = __pyx_v_b; + __pyx_t_18 = 0; + __pyx_t_3 = (PyObject *) *__Pyx_BufPtrStrided3d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour_P.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_tour_P.diminfo[1].strides, __pyx_t_18, __pyx_pybuffernd_tour_P.diminfo[2].strides); + if (unlikely(__pyx_t_3 == NULL)) __pyx_t_3 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_3); + __pyx_t_19 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 76, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_c = __pyx_t_19; + + /* "jcvi/assembly/chic.pyx":77 + * break + * c = tour_P[a, b, 0] + * if c == 0: # <<<<<<<<<<<<<< + * continue + * s += c / (tour_P[a, b, 1] + dist) + */ + __pyx_t_16 = (__pyx_v_c == 0); + if (__pyx_t_16) { + + /* "jcvi/assembly/chic.pyx":78 + * c = tour_P[a, b, 0] + * if c == 0: + * continue # <<<<<<<<<<<<<< + * s += c / (tour_P[a, b, 1] + dist) + * return s, + */ + goto __pyx_L5_continue; + + /* "jcvi/assembly/chic.pyx":77 + * break + * c = tour_P[a, b, 0] + * if c == 0: # <<<<<<<<<<<<<< + * continue + * s += c / (tour_P[a, b, 1] + dist) + */ + } + + /* "jcvi/assembly/chic.pyx":79 + * if c == 0: + * continue + * s += c / (tour_P[a, b, 1] + dist) # <<<<<<<<<<<<<< + * return s, + * + */ + __pyx_t_3 = PyFloat_FromDouble(__pyx_v_s); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 79, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_c); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 79, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_18 = __pyx_v_a; + __pyx_t_17 = __pyx_v_b; + __pyx_t_11 = 1; + __pyx_t_1 = (PyObject *) *__Pyx_BufPtrStrided3d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.buf, __pyx_t_18, __pyx_pybuffernd_tour_P.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_tour_P.diminfo[1].strides, __pyx_t_11, __pyx_pybuffernd_tour_P.diminfo[2].strides); + if (unlikely(__pyx_t_1 == NULL)) __pyx_t_1 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_1); + __pyx_t_20 = PyFloat_FromDouble(__pyx_v_dist); if (unlikely(!__pyx_t_20)) __PYX_ERR(0, 79, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_20); + __pyx_t_21 = PyNumber_Add(__pyx_t_1, __pyx_t_20); if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 79, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_21); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_20); __pyx_t_20 = 0; + __pyx_t_20 = __Pyx_PyNumber_Divide(__pyx_t_4, __pyx_t_21); if (unlikely(!__pyx_t_20)) __PYX_ERR(0, 79, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_20); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_21); __pyx_t_21 = 0; + __pyx_t_21 = PyNumber_InPlaceAdd(__pyx_t_3, __pyx_t_20); if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 79, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_21); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_20); __pyx_t_20 = 0; + __pyx_t_15 = __pyx_PyFloat_AsDouble(__pyx_t_21); if (unlikely((__pyx_t_15 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 79, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_21); __pyx_t_21 = 0; + __pyx_v_s = __pyx_t_15; + __pyx_L5_continue:; + } + __pyx_L6_break:; + } + + /* "jcvi/assembly/chic.pyx":80 + * continue + * s += c / (tour_P[a, b, 1] + dist) + * return s, # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_21 = PyFloat_FromDouble(__pyx_v_s); if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 80, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_21); + __pyx_t_20 = PyTuple_New(1); if (unlikely(!__pyx_t_20)) __PYX_ERR(0, 80, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_20); + __Pyx_GIVEREF(__pyx_t_21); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_20, 0, __pyx_t_21)) __PYX_ERR(0, 80, __pyx_L1_error); + __pyx_t_21 = 0; + __pyx_r = __pyx_t_20; + __pyx_t_20 = 0; + goto __pyx_L0; + + /* "jcvi/assembly/chic.pyx":59 + * + * + * def score_evaluate_P(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_P=None): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_20); + __Pyx_XDECREF(__pyx_t_21); + { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_P.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); + __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} + __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_P", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + goto __pyx_L2; + __pyx_L0:; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_P.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); + __pyx_L2:; + __Pyx_XDECREF((PyObject *)__pyx_v_sizes_oo); + __Pyx_XDECREF((PyObject *)__pyx_v_sizes_cum); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/assembly/chic.pyx":83 + * + * + * def score_evaluate_Q(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_Q=None): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_8assembly_4chic_5score_evaluate_Q(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_4jcvi_8assembly_4chic_5score_evaluate_Q = {"score_evaluate_Q", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_8assembly_4chic_5score_evaluate_Q, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_4jcvi_8assembly_4chic_5score_evaluate_Q(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + arrayobject *__pyx_v_tour = 0; + PyArrayObject *__pyx_v_tour_sizes = 0; + PyArrayObject *__pyx_v_tour_Q = 0; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[3] = {0,0,0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("score_evaluate_Q (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_tour,&__pyx_n_s_tour_sizes,&__pyx_n_s_tour_Q,0}; + + /* "jcvi/assembly/chic.pyx":84 + * + * def score_evaluate_Q(array.array[int] tour, + * np.ndarray[INT, ndim=1] tour_sizes=None, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=3] tour_Q=None): + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + */ + values[1] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); + + /* "jcvi/assembly/chic.pyx":85 + * def score_evaluate_Q(array.array[int] tour, + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_Q=None): # <<<<<<<<<<<<<< + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) + */ + values[2] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 83, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_sizes); + if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 83, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (kw_args > 0) { + PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_Q); + if (value) { values[2] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 83, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "score_evaluate_Q") < 0)) __PYX_ERR(0, 83, __pyx_L3_error) + } + } else { + switch (__pyx_nargs) { + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + break; + default: goto __pyx_L5_argtuple_error; + } + } + __pyx_v_tour = ((arrayobject *)values[0]); + __pyx_v_tour_sizes = ((PyArrayObject *)values[1]); + __pyx_v_tour_Q = ((PyArrayObject *)values[2]); + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("score_evaluate_Q", 0, 1, 3, __pyx_nargs); __PYX_ERR(0, 83, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_Q", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour), __pyx_ptype_7cpython_5array_array, 1, "tour", 0))) __PYX_ERR(0, 83, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_sizes), __pyx_ptype_5numpy_ndarray, 1, "tour_sizes", 0))) __PYX_ERR(0, 84, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_Q), __pyx_ptype_5numpy_ndarray, 1, "tour_Q", 0))) __PYX_ERR(0, 85, __pyx_L1_error) + __pyx_r = __pyx_pf_4jcvi_8assembly_4chic_4score_evaluate_Q(__pyx_self, __pyx_v_tour, __pyx_v_tour_sizes, __pyx_v_tour_Q); + + /* "jcvi/assembly/chic.pyx":83 + * + * + * def score_evaluate_Q(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_Q=None): + */ + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_8assembly_4chic_4score_evaluate_Q(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_Q) { + PyArrayObject *__pyx_v_sizes_oo = 0; + PyArrayObject *__pyx_v_sizes_cum = 0; + double __pyx_v_s; + int __pyx_v_size; + int __pyx_v_a; + int __pyx_v_b; + int __pyx_v_c; + int __pyx_v_ia; + int __pyx_v_ib; + int __pyx_v_ic; + double __pyx_v_dist; + __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_cum; + __Pyx_Buffer __pyx_pybuffer_sizes_cum; + __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_oo; + __Pyx_Buffer __pyx_pybuffer_sizes_oo; + __Pyx_LocalBuf_ND __pyx_pybuffernd_tour; + __Pyx_Buffer __pyx_pybuffer_tour; + __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_Q; + __Pyx_Buffer __pyx_pybuffer_tour_Q; + __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_sizes; + __Pyx_Buffer __pyx_pybuffer_tour_sizes; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyArrayObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + unsigned int __pyx_t_5; + PyArrayObject *__pyx_t_6 = NULL; + Py_ssize_t __pyx_t_7; + int __pyx_t_8; + int __pyx_t_9; + int __pyx_t_10; + Py_ssize_t __pyx_t_11; + int __pyx_t_12; + int __pyx_t_13; + int __pyx_t_14; + Py_ssize_t __pyx_t_15; + Py_ssize_t __pyx_t_16; + int __pyx_t_17; + double __pyx_t_18; + int __pyx_t_19; + int __pyx_t_20; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("score_evaluate_Q", 1); + __pyx_pybuffer_sizes_oo.pybuffer.buf = NULL; + __pyx_pybuffer_sizes_oo.refcount = 0; + __pyx_pybuffernd_sizes_oo.data = NULL; + __pyx_pybuffernd_sizes_oo.rcbuffer = &__pyx_pybuffer_sizes_oo; + __pyx_pybuffer_sizes_cum.pybuffer.buf = NULL; + __pyx_pybuffer_sizes_cum.refcount = 0; + __pyx_pybuffernd_sizes_cum.data = NULL; + __pyx_pybuffernd_sizes_cum.rcbuffer = &__pyx_pybuffer_sizes_cum; + __pyx_pybuffer_tour.pybuffer.buf = NULL; + __pyx_pybuffer_tour.refcount = 0; + __pyx_pybuffernd_tour.data = NULL; + __pyx_pybuffernd_tour.rcbuffer = &__pyx_pybuffer_tour; + __pyx_pybuffer_tour_sizes.pybuffer.buf = NULL; + __pyx_pybuffer_tour_sizes.refcount = 0; + __pyx_pybuffernd_tour_sizes.data = NULL; + __pyx_pybuffernd_tour_sizes.rcbuffer = &__pyx_pybuffer_tour_sizes; + __pyx_pybuffer_tour_Q.pybuffer.buf = NULL; + __pyx_pybuffer_tour_Q.refcount = 0; + __pyx_pybuffernd_tour_Q.data = NULL; + __pyx_pybuffernd_tour_Q.rcbuffer = &__pyx_pybuffer_tour_Q; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour, &__Pyx_TypeInfo_int, PyBUF_FORMAT| PyBUF_INDIRECT, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 83, __pyx_L1_error) + } + __pyx_pybuffernd_tour.diminfo[0].strides = __pyx_pybuffernd_tour.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour.diminfo[0].shape = __pyx_pybuffernd_tour.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour.diminfo[0].suboffsets = __pyx_pybuffernd_tour.rcbuffer->pybuffer.suboffsets[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_sizes, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 83, __pyx_L1_error) + } + __pyx_pybuffernd_tour_sizes.diminfo[0].strides = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_sizes.diminfo[0].shape = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.shape[0]; + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_Q.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_Q, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 3, 0, __pyx_stack) == -1)) __PYX_ERR(0, 83, __pyx_L1_error) + } + __pyx_pybuffernd_tour_Q.diminfo[0].strides = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_Q.diminfo[0].shape = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour_Q.diminfo[1].strides = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_tour_Q.diminfo[1].shape = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.shape[1]; __pyx_pybuffernd_tour_Q.diminfo[2].strides = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.strides[2]; __pyx_pybuffernd_tour_Q.diminfo[2].shape = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.shape[2]; + + /* "jcvi/assembly/chic.pyx":86 + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_Q=None): + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] # <<<<<<<<<<<<<< + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_tour_sizes), ((PyObject *)__pyx_v_tour)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 86, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 86, __pyx_L1_error) + __pyx_t_2 = ((PyArrayObject *)__pyx_t_1); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer, (PyObject*)__pyx_t_2, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { + __pyx_v_sizes_oo = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.buf = NULL; + __PYX_ERR(0, 86, __pyx_L1_error) + } else {__pyx_pybuffernd_sizes_oo.diminfo[0].strides = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_oo.diminfo[0].shape = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.shape[0]; + } + } + __pyx_t_2 = 0; + __pyx_v_sizes_oo = ((PyArrayObject *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/assembly/chic.pyx":87 + * np.ndarray[INT, ndim=3] tour_Q=None): + * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) # <<<<<<<<<<<<<< + * + * cdef double s = 0.0 + */ + __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cumsum); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = NULL; + __pyx_t_5 = 0; + #if CYTHON_UNPACK_METHODS + if (unlikely(PyMethod_Check(__pyx_t_4))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4); + if (likely(__pyx_t_3)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_4, function); + __pyx_t_5 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[2] = {__pyx_t_3, ((PyObject *)__pyx_v_sizes_oo)}; + __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5); + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + } + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_t_6 = ((PyArrayObject *)__pyx_t_1); + { + __Pyx_BufFmt_StackElem __pyx_stack[1]; + if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { + __pyx_v_sizes_cum = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf = NULL; + __PYX_ERR(0, 87, __pyx_L1_error) + } else {__pyx_pybuffernd_sizes_cum.diminfo[0].strides = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_cum.diminfo[0].shape = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.shape[0]; + } + } + __pyx_t_6 = 0; + __pyx_v_sizes_cum = ((PyArrayObject *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/assembly/chic.pyx":89 + * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) + * + * cdef double s = 0.0 # <<<<<<<<<<<<<< + * cdef int size = len(tour) + * cdef int a, b, c, ia, ib, ic + */ + __pyx_v_s = 0.0; + + /* "jcvi/assembly/chic.pyx":90 + * + * cdef double s = 0.0 + * cdef int size = len(tour) # <<<<<<<<<<<<<< + * cdef int a, b, c, ia, ib, ic + * cdef double dist + */ + if (unlikely(((PyObject *)__pyx_v_tour) == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); + __PYX_ERR(0, 90, __pyx_L1_error) + } + __pyx_t_7 = Py_SIZE(((PyObject *)__pyx_v_tour)); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 90, __pyx_L1_error) + __pyx_v_size = __pyx_t_7; + + /* "jcvi/assembly/chic.pyx":93 + * cdef int a, b, c, ia, ib, ic + * cdef double dist + * for ia in range(size): # <<<<<<<<<<<<<< + * a = tour[ia] + * for ib in range(ia + 1, size): + */ + __pyx_t_8 = __pyx_v_size; + __pyx_t_9 = __pyx_t_8; + for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) { + __pyx_v_ia = __pyx_t_10; + + /* "jcvi/assembly/chic.pyx":94 + * cdef double dist + * for ia in range(size): + * a = tour[ia] # <<<<<<<<<<<<<< + * for ib in range(ia + 1, size): + * b = tour[ib] + */ + __pyx_t_11 = __pyx_v_ia; + __pyx_v_a = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); + + /* "jcvi/assembly/chic.pyx":95 + * for ia in range(size): + * a = tour[ia] + * for ib in range(ia + 1, size): # <<<<<<<<<<<<<< + * b = tour[ib] + * if tour_Q[a, b, 0] == -1: + */ + __pyx_t_12 = __pyx_v_size; + __pyx_t_13 = __pyx_t_12; + for (__pyx_t_14 = (__pyx_v_ia + 1); __pyx_t_14 < __pyx_t_13; __pyx_t_14+=1) { + __pyx_v_ib = __pyx_t_14; + + /* "jcvi/assembly/chic.pyx":96 + * a = tour[ia] + * for ib in range(ia + 1, size): + * b = tour[ib] # <<<<<<<<<<<<<< + * if tour_Q[a, b, 0] == -1: + * continue + */ + __pyx_t_11 = __pyx_v_ib; + __pyx_v_b = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); + + /* "jcvi/assembly/chic.pyx":97 + * for ib in range(ia + 1, size): + * b = tour[ib] + * if tour_Q[a, b, 0] == -1: # <<<<<<<<<<<<<< + * continue + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + */ + __pyx_t_11 = __pyx_v_a; + __pyx_t_15 = __pyx_v_b; + __pyx_t_16 = 0; + __pyx_t_1 = (PyObject *) *__Pyx_BufPtrStrided3d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour_Q.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_tour_Q.diminfo[1].strides, __pyx_t_16, __pyx_pybuffernd_tour_Q.diminfo[2].strides); + if (unlikely(__pyx_t_1 == NULL)) __pyx_t_1 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_1); + __pyx_t_4 = PyObject_RichCompare(__pyx_t_1, __pyx_int_neg_1, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 97, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_17 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_17 < 0))) __PYX_ERR(0, 97, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_17) { + + /* "jcvi/assembly/chic.pyx":98 + * b = tour[ib] + * if tour_Q[a, b, 0] == -1: + * continue # <<<<<<<<<<<<<< + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + * if dist > LIMIT: + */ + goto __pyx_L5_continue; + + /* "jcvi/assembly/chic.pyx":97 + * for ib in range(ia + 1, size): + * b = tour[ib] + * if tour_Q[a, b, 0] == -1: # <<<<<<<<<<<<<< + * continue + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + */ + } + + /* "jcvi/assembly/chic.pyx":99 + * if tour_Q[a, b, 0] == -1: + * continue + * dist = sizes_cum[ib - 1] - sizes_cum[ia] # <<<<<<<<<<<<<< + * if dist > LIMIT: + * break + */ + __pyx_t_16 = (__pyx_v_ib - 1); + __pyx_t_4 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); + if (unlikely(__pyx_t_4 == NULL)) __pyx_t_4 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_4); + __pyx_t_16 = __pyx_v_ia; + __pyx_t_1 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); + if (unlikely(__pyx_t_1 == NULL)) __pyx_t_1 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_1); + __pyx_t_3 = PyNumber_Subtract(__pyx_t_4, __pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 99, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_18 = __pyx_PyFloat_AsDouble(__pyx_t_3); if (unlikely((__pyx_t_18 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 99, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_dist = __pyx_t_18; + + /* "jcvi/assembly/chic.pyx":100 + * continue + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + * if dist > LIMIT: # <<<<<<<<<<<<<< + * break + * for ic in range(BB): + */ + __pyx_t_17 = (__pyx_v_dist > 10000000.0); + if (__pyx_t_17) { + + /* "jcvi/assembly/chic.pyx":101 + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + * if dist > LIMIT: + * break # <<<<<<<<<<<<<< + * for ic in range(BB): + * c = tour_Q[a, b, ic] + */ + goto __pyx_L6_break; + + /* "jcvi/assembly/chic.pyx":100 + * continue + * dist = sizes_cum[ib - 1] - sizes_cum[ia] + * if dist > LIMIT: # <<<<<<<<<<<<<< + * break + * for ic in range(BB): + */ + } + + /* "jcvi/assembly/chic.pyx":102 + * if dist > LIMIT: + * break + * for ic in range(BB): # <<<<<<<<<<<<<< + * c = tour_Q[a, b, ic] + * s += c / (GR[ic] + dist) + */ + for (__pyx_t_19 = 0; __pyx_t_19 < 12; __pyx_t_19+=1) { + __pyx_v_ic = __pyx_t_19; + + /* "jcvi/assembly/chic.pyx":103 + * break + * for ic in range(BB): + * c = tour_Q[a, b, ic] # <<<<<<<<<<<<<< + * s += c / (GR[ic] + dist) + * return s, + */ + __pyx_t_16 = __pyx_v_a; + __pyx_t_15 = __pyx_v_b; + __pyx_t_11 = __pyx_v_ic; + __pyx_t_3 = (PyObject *) *__Pyx_BufPtrStrided3d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_tour_Q.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_tour_Q.diminfo[1].strides, __pyx_t_11, __pyx_pybuffernd_tour_Q.diminfo[2].strides); + if (unlikely(__pyx_t_3 == NULL)) __pyx_t_3 = Py_None; + __Pyx_INCREF((PyObject*)__pyx_t_3); + __pyx_t_20 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_20 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 103, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_c = __pyx_t_20; + + /* "jcvi/assembly/chic.pyx":104 + * for ic in range(BB): + * c = tour_Q[a, b, ic] + * s += c / (GR[ic] + dist) # <<<<<<<<<<<<<< + * return s, + */ + __pyx_v_s = (__pyx_v_s + (((double)__pyx_v_c) / ((__pyx_v_4jcvi_8assembly_4chic_GR[__pyx_v_ic]) + __pyx_v_dist))); + } + __pyx_L5_continue:; + } + __pyx_L6_break:; + } + + /* "jcvi/assembly/chic.pyx":105 + * c = tour_Q[a, b, ic] + * s += c / (GR[ic] + dist) + * return s, # <<<<<<<<<<<<<< + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_3 = PyFloat_FromDouble(__pyx_v_s); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 105, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 105, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_GIVEREF(__pyx_t_3); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3)) __PYX_ERR(0, 105, __pyx_L1_error); + __pyx_t_3 = 0; + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "jcvi/assembly/chic.pyx":83 + * + * + * def score_evaluate_Q(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_Q=None): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_Q.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); + __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} + __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_Q", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + goto __pyx_L2; + __pyx_L0:; + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_Q.rcbuffer->pybuffer); + __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); + __pyx_L2:; + __Pyx_XDECREF((PyObject *)__pyx_v_sizes_oo); + __Pyx_XDECREF((PyObject *)__pyx_v_sizes_cum); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyMethodDef __pyx_methods[] = { + {0, 0, 0, 0} +}; +#ifndef CYTHON_SMALL_CODE +#if defined(__clang__) + #define CYTHON_SMALL_CODE +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define CYTHON_SMALL_CODE __attribute__((cold)) +#else + #define CYTHON_SMALL_CODE +#endif +#endif +/* #### Code section: pystring_table ### */ + +static int __Pyx_CreateStringTabAndInitStrings(void) { + __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_n_s_ImportError, __pyx_k_ImportError, sizeof(__pyx_k_ImportError), 0, 0, 1, 1}, + {&__pyx_n_s_MemoryError, __pyx_k_MemoryError, sizeof(__pyx_k_MemoryError), 0, 0, 1, 1}, + {&__pyx_n_s__11, __pyx_k__11, sizeof(__pyx_k__11), 0, 0, 1, 1}, + {&__pyx_n_s__3, __pyx_k__3, sizeof(__pyx_k__3), 0, 0, 1, 1}, + {&__pyx_n_s_a, __pyx_k_a, sizeof(__pyx_k_a), 0, 0, 1, 1}, + {&__pyx_n_s_array, __pyx_k_array, sizeof(__pyx_k_array), 0, 0, 1, 1}, + {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1}, + {&__pyx_n_s_b, __pyx_k_b, sizeof(__pyx_k_b), 0, 0, 1, 1}, + {&__pyx_n_s_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 0, 1, 1}, + {&__pyx_n_s_class_getitem, __pyx_k_class_getitem, sizeof(__pyx_k_class_getitem), 0, 0, 1, 1}, + {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, + {&__pyx_n_s_cumsum, __pyx_k_cumsum, sizeof(__pyx_k_cumsum), 0, 0, 1, 1}, + {&__pyx_n_s_dist, __pyx_k_dist, sizeof(__pyx_k_dist), 0, 0, 1, 1}, + {&__pyx_n_s_ia, __pyx_k_ia, sizeof(__pyx_k_ia), 0, 0, 1, 1}, + {&__pyx_n_s_ib, __pyx_k_ib, sizeof(__pyx_k_ib), 0, 0, 1, 1}, + {&__pyx_n_s_ic, __pyx_k_ic, sizeof(__pyx_k_ic), 0, 0, 1, 1}, + {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, + {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1}, + {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1}, + {&__pyx_n_s_jcvi_assembly_chic, __pyx_k_jcvi_assembly_chic, sizeof(__pyx_k_jcvi_assembly_chic), 0, 0, 1, 1}, + {&__pyx_n_s_links, __pyx_k_links, sizeof(__pyx_k_links), 0, 0, 1, 1}, + {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, + {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1}, + {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1}, + {&__pyx_kp_s_numpy__core_multiarray_failed_to, __pyx_k_numpy__core_multiarray_failed_to, sizeof(__pyx_k_numpy__core_multiarray_failed_to), 0, 0, 1, 0}, + {&__pyx_kp_s_numpy__core_umath_failed_to_impo, __pyx_k_numpy__core_umath_failed_to_impo, sizeof(__pyx_k_numpy__core_umath_failed_to_impo), 0, 0, 1, 0}, + {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, + {&__pyx_n_s_s, __pyx_k_s, sizeof(__pyx_k_s), 0, 0, 1, 1}, + {&__pyx_n_s_score_evaluate_M, __pyx_k_score_evaluate_M, sizeof(__pyx_k_score_evaluate_M), 0, 0, 1, 1}, + {&__pyx_n_s_score_evaluate_P, __pyx_k_score_evaluate_P, sizeof(__pyx_k_score_evaluate_P), 0, 0, 1, 1}, + {&__pyx_n_s_score_evaluate_Q, __pyx_k_score_evaluate_Q, sizeof(__pyx_k_score_evaluate_Q), 0, 0, 1, 1}, + {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, + {&__pyx_n_s_sizes_cum, __pyx_k_sizes_cum, sizeof(__pyx_k_sizes_cum), 0, 0, 1, 1}, + {&__pyx_n_s_sizes_oo, __pyx_k_sizes_oo, sizeof(__pyx_k_sizes_oo), 0, 0, 1, 1}, + {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1}, + {&__pyx_kp_s_src_jcvi_assembly_chic_pyx, __pyx_k_src_jcvi_assembly_chic_pyx, sizeof(__pyx_k_src_jcvi_assembly_chic_pyx), 0, 0, 1, 0}, + {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_n_s_tour, __pyx_k_tour, sizeof(__pyx_k_tour), 0, 0, 1, 1}, + {&__pyx_n_s_tour_M, __pyx_k_tour_M, sizeof(__pyx_k_tour_M), 0, 0, 1, 1}, + {&__pyx_n_s_tour_P, __pyx_k_tour_P, sizeof(__pyx_k_tour_P), 0, 0, 1, 1}, + {&__pyx_n_s_tour_Q, __pyx_k_tour_Q, sizeof(__pyx_k_tour_Q), 0, 0, 1, 1}, + {&__pyx_n_s_tour_sizes, __pyx_k_tour_sizes, sizeof(__pyx_k_tour_sizes), 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0} + }; + return __Pyx_InitStrings(__pyx_string_tab); +} +/* #### Code section: cached_builtins ### */ +static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 45, __pyx_L1_error) + __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(1, 1043, __pyx_L1_error) + __pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) __PYX_ERR(3, 120, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: cached_constants ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1043 + * __pyx_import_array() + * except Exception: + * raise ImportError("numpy._core.multiarray failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_umath() except -1: + */ + __pyx_tuple_ = PyTuple_Pack(1, __pyx_kp_s_numpy__core_multiarray_failed_to); if (unlikely(!__pyx_tuple_)) __PYX_ERR(1, 1043, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple_); + __Pyx_GIVEREF(__pyx_tuple_); + + /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1049 + * _import_umath() + * except Exception: + * raise ImportError("numpy._core.umath failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_ufunc() except -1: + */ + __pyx_tuple__2 = PyTuple_Pack(1, __pyx_kp_s_numpy__core_umath_failed_to_impo); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(1, 1049, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__2); + __Pyx_GIVEREF(__pyx_tuple__2); + + /* "jcvi/assembly/chic.pyx":34 + * + * + * def score_evaluate_M(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=2] tour_M=None): + */ + __pyx_tuple__4 = PyTuple_Pack(13, __pyx_n_s_tour, __pyx_n_s_tour_sizes, __pyx_n_s_tour_M, __pyx_n_s_sizes_oo, __pyx_n_s_sizes_cum, __pyx_n_s_s, __pyx_n_s_size, __pyx_n_s_a, __pyx_n_s_b, __pyx_n_s_ia, __pyx_n_s_ib, __pyx_n_s_links, __pyx_n_s_dist); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 34, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__4); + __Pyx_GIVEREF(__pyx_tuple__4); + __pyx_codeobj__5 = (PyObject*)__Pyx_PyCode_New(3, 0, 0, 13, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__4, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_jcvi_assembly_chic_pyx, __pyx_n_s_score_evaluate_M, 34, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__5)) __PYX_ERR(0, 34, __pyx_L1_error) + __pyx_tuple__6 = PyTuple_Pack(2, Py_None, Py_None); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(0, 34, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__6); + __Pyx_GIVEREF(__pyx_tuple__6); + + /* "jcvi/assembly/chic.pyx":59 + * + * + * def score_evaluate_P(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_P=None): + */ + __pyx_tuple__7 = PyTuple_Pack(13, __pyx_n_s_tour, __pyx_n_s_tour_sizes, __pyx_n_s_tour_P, __pyx_n_s_sizes_oo, __pyx_n_s_sizes_cum, __pyx_n_s_s, __pyx_n_s_size, __pyx_n_s_a, __pyx_n_s_b, __pyx_n_s_c, __pyx_n_s_ia, __pyx_n_s_ib, __pyx_n_s_dist); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(0, 59, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__7); + __Pyx_GIVEREF(__pyx_tuple__7); + __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(3, 0, 0, 13, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_jcvi_assembly_chic_pyx, __pyx_n_s_score_evaluate_P, 59, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(0, 59, __pyx_L1_error) + + /* "jcvi/assembly/chic.pyx":83 + * + * + * def score_evaluate_Q(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_Q=None): + */ + __pyx_tuple__9 = PyTuple_Pack(14, __pyx_n_s_tour, __pyx_n_s_tour_sizes, __pyx_n_s_tour_Q, __pyx_n_s_sizes_oo, __pyx_n_s_sizes_cum, __pyx_n_s_s, __pyx_n_s_size, __pyx_n_s_a, __pyx_n_s_b, __pyx_n_s_c, __pyx_n_s_ia, __pyx_n_s_ib, __pyx_n_s_ic, __pyx_n_s_dist); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(0, 83, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__9); + __Pyx_GIVEREF(__pyx_tuple__9); + __pyx_codeobj__10 = (PyObject*)__Pyx_PyCode_New(3, 0, 0, 14, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__9, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_jcvi_assembly_chic_pyx, __pyx_n_s_score_evaluate_Q, 83, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__10)) __PYX_ERR(0, 83, __pyx_L1_error) + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} +/* #### Code section: init_constants ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) { + if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error); + __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_int_neg_1 = PyInt_FromLong(-1); if (unlikely(!__pyx_int_neg_1)) __PYX_ERR(0, 1, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: init_globals ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) { + /* NumpyImportArray.init */ + /* + * Cython has automatically inserted a call to _import_array since + * you didn't include one when you cimported numpy. To disable this + * add the line + * numpy._import_array + */ +#ifdef NPY_FEATURE_VERSION +#ifndef NO_IMPORT_ARRAY +if (unlikely(_import_array() == -1)) { + PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import " + "(auto-generated because you didn't call 'numpy.import_array()' after cimporting numpy; " + "use 'numpy._import_array' to disable if you are certain you don't need it)."); +} +#endif +#endif + +if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error) + + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: init_module ### */ + +static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/ + +static int __Pyx_modinit_global_init_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0); + /*--- Global init code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_variable_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0); + /*--- Variable export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0); + /*--- Function export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_type_init_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); + /*--- Type init code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_type_import_code(void) { + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0); + /*--- Type import code ---*/ + __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(4, 9, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_0_11(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", + #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 + sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyTypeObject), + #elif CYTHON_COMPILING_IN_LIMITED_API + sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyTypeObject), + #else + sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyHeapTypeObject), + #endif + __Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_7cpython_4type_type) __PYX_ERR(4, 9, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(5, 8, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType_3_0_11(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "bool", sizeof(PyBoolObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyBoolObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_7cpython_4bool_bool) __PYX_ERR(5, 8, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(6, 15, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_ptype_7cpython_7complex_complex = __Pyx_ImportType_3_0_11(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "complex", sizeof(PyComplexObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyComplexObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_7cpython_7complex_complex) __PYX_ERR(6, 15, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyImport_ImportModule("numpy"); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 272, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_ptype_5numpy_dtype = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "dtype", sizeof(PyArray_Descr), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArray_Descr),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_dtype) __PYX_ERR(1, 272, __pyx_L1_error) + __pyx_ptype_5numpy_flatiter = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "flatiter", sizeof(PyArrayIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayIterObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_flatiter) __PYX_ERR(1, 317, __pyx_L1_error) + __pyx_ptype_5numpy_broadcast = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "broadcast", sizeof(PyArrayMultiIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayMultiIterObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_broadcast) __PYX_ERR(1, 321, __pyx_L1_error) + __pyx_ptype_5numpy_ndarray = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "ndarray", sizeof(PyArrayObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_ndarray) __PYX_ERR(1, 360, __pyx_L1_error) + __pyx_ptype_5numpy_generic = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "generic", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_generic) __PYX_ERR(1, 865, __pyx_L1_error) + __pyx_ptype_5numpy_number = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "number", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_number) __PYX_ERR(1, 867, __pyx_L1_error) + __pyx_ptype_5numpy_integer = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "integer", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_integer) __PYX_ERR(1, 869, __pyx_L1_error) + __pyx_ptype_5numpy_signedinteger = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "signedinteger", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_signedinteger) __PYX_ERR(1, 871, __pyx_L1_error) + __pyx_ptype_5numpy_unsignedinteger = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "unsignedinteger", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_unsignedinteger) __PYX_ERR(1, 873, __pyx_L1_error) + __pyx_ptype_5numpy_inexact = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "inexact", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_inexact) __PYX_ERR(1, 875, __pyx_L1_error) + __pyx_ptype_5numpy_floating = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "floating", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_floating) __PYX_ERR(1, 877, __pyx_L1_error) + __pyx_ptype_5numpy_complexfloating = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "complexfloating", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_complexfloating) __PYX_ERR(1, 879, __pyx_L1_error) + __pyx_ptype_5numpy_flexible = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "flexible", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_flexible) __PYX_ERR(1, 881, __pyx_L1_error) + __pyx_ptype_5numpy_character = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "character", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_character) __PYX_ERR(1, 883, __pyx_L1_error) + __pyx_ptype_5numpy_ufunc = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "ufunc", sizeof(PyUFuncObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyUFuncObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_ufunc) __PYX_ERR(1, 947, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyImport_ImportModule("array"); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 69, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_ptype_7cpython_5array_array = __Pyx_ImportType_3_0_11(__pyx_t_1, "array", "array", sizeof(arrayobject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(arrayobject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_7cpython_5array_array) __PYX_ERR(3, 69, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_RefNannyFinishContext(); + return -1; +} + +static int __Pyx_modinit_variable_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0); + /*--- Variable import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0); + /*--- Function import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + + +#if PY_MAJOR_VERSION >= 3 +#if CYTHON_PEP489_MULTI_PHASE_INIT +static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ +static int __pyx_pymod_exec_chic(PyObject* module); /*proto*/ +static PyModuleDef_Slot __pyx_moduledef_slots[] = { + {Py_mod_create, (void*)__pyx_pymod_create}, + {Py_mod_exec, (void*)__pyx_pymod_exec_chic}, + {0, NULL} +}; +#endif + +#ifdef __cplusplus +namespace { + struct PyModuleDef __pyx_moduledef = + #else + static struct PyModuleDef __pyx_moduledef = + #endif + { + PyModuleDef_HEAD_INIT, + "chic", + __pyx_k_Cythonized_version_of_score_eva, /* m_doc */ + #if CYTHON_PEP489_MULTI_PHASE_INIT + 0, /* m_size */ + #elif CYTHON_USE_MODULE_STATE + sizeof(__pyx_mstate), /* m_size */ + #else + -1, /* m_size */ + #endif + __pyx_methods /* m_methods */, + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_moduledef_slots, /* m_slots */ + #else + NULL, /* m_reload */ + #endif + #if CYTHON_USE_MODULE_STATE + __pyx_m_traverse, /* m_traverse */ + __pyx_m_clear, /* m_clear */ + NULL /* m_free */ + #else + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ + #endif + }; + #ifdef __cplusplus +} /* anonymous namespace */ +#endif +#endif + +#ifndef CYTHON_NO_PYINIT_EXPORT +#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC +#elif PY_MAJOR_VERSION < 3 +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" void +#else +#define __Pyx_PyMODINIT_FUNC void +#endif +#else +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" PyObject * +#else +#define __Pyx_PyMODINIT_FUNC PyObject * +#endif +#endif + + +#if PY_MAJOR_VERSION < 3 +__Pyx_PyMODINIT_FUNC initchic(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC initchic(void) +#else +__Pyx_PyMODINIT_FUNC PyInit_chic(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC PyInit_chic(void) +#if CYTHON_PEP489_MULTI_PHASE_INIT +{ + return PyModuleDef_Init(&__pyx_moduledef); +} +static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) { + #if PY_VERSION_HEX >= 0x030700A1 + static PY_INT64_T main_interpreter_id = -1; + PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp); + if (main_interpreter_id == -1) { + main_interpreter_id = current_id; + return (unlikely(current_id == -1)) ? -1 : 0; + } else if (unlikely(main_interpreter_id != current_id)) + #else + static PyInterpreterState *main_interpreter = NULL; + PyInterpreterState *current_interpreter = PyThreadState_Get()->interp; + if (!main_interpreter) { + main_interpreter = current_interpreter; + } else if (unlikely(main_interpreter != current_interpreter)) + #endif + { + PyErr_SetString( + PyExc_ImportError, + "Interpreter change detected - this module can only be loaded into one interpreter per process."); + return -1; + } + return 0; +} +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none) +#else +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) +#endif +{ + PyObject *value = PyObject_GetAttrString(spec, from_name); + int result = 0; + if (likely(value)) { + if (allow_none || value != Py_None) { +#if CYTHON_COMPILING_IN_LIMITED_API + result = PyModule_AddObject(module, to_name, value); +#else + result = PyDict_SetItemString(moddict, to_name, value); +#endif + } + Py_DECREF(value); + } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } else { + result = -1; + } + return result; +} +static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) { + PyObject *module = NULL, *moddict, *modname; + CYTHON_UNUSED_VAR(def); + if (__Pyx_check_single_interpreter()) + return NULL; + if (__pyx_m) + return __Pyx_NewRef(__pyx_m); + modname = PyObject_GetAttrString(spec, "name"); + if (unlikely(!modname)) goto bad; + module = PyModule_NewObject(modname); + Py_DECREF(modname); + if (unlikely(!module)) goto bad; +#if CYTHON_COMPILING_IN_LIMITED_API + moddict = module; +#else + moddict = PyModule_GetDict(module); + if (unlikely(!moddict)) goto bad; +#endif + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad; + return module; +bad: + Py_XDECREF(module); + return NULL; +} + + +static CYTHON_SMALL_CODE int __pyx_pymod_exec_chic(PyObject *__pyx_pyinit_module) +#endif +#endif +{ + int stringtab_initialized = 0; + #if CYTHON_USE_MODULE_STATE + int pystate_addmodule_run = 0; + #endif + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + static int __pyx_t_3[12]; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannyDeclarations + #if CYTHON_PEP489_MULTI_PHASE_INIT + if (__pyx_m) { + if (__pyx_m == __pyx_pyinit_module) return 0; + PyErr_SetString(PyExc_RuntimeError, "Module 'chic' has already been imported. Re-initialisation is not supported."); + return -1; + } + #elif PY_MAJOR_VERSION >= 3 + if (__pyx_m) return __Pyx_NewRef(__pyx_m); + #endif + /*--- Module creation code ---*/ + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_m = __pyx_pyinit_module; + Py_INCREF(__pyx_m); + #else + #if PY_MAJOR_VERSION < 3 + __pyx_m = Py_InitModule4("chic", __pyx_methods, __pyx_k_Cythonized_version_of_score_eva, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #elif CYTHON_USE_MODULE_STATE + __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) + { + int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef); + __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "chic" pseudovariable */ + if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + pystate_addmodule_run = 1; + } + #else + __pyx_m = PyModule_Create(&__pyx_moduledef); + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #endif + CYTHON_UNUSED_VAR(__pyx_t_1); + __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) + Py_INCREF(__pyx_d); + __pyx_b = __Pyx_PyImport_AddModuleRef(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_cython_runtime = __Pyx_PyImport_AddModuleRef((const char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if CYTHON_REFNANNY +__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); +if (!__Pyx_RefNanny) { + PyErr_Clear(); + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); + if (!__Pyx_RefNanny) + Py_FatalError("failed to import 'refnanny' module"); +} +#endif + __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_chic(void)", 0); + if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pxy_PyFrame_Initialize_Offsets + __Pxy_PyFrame_Initialize_Offsets(); + #endif + __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pyx_CyFunction_USED + if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_FusedFunction_USED + if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Coroutine_USED + if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Generator_USED + if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_StopAsyncIteration_USED + if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + /*--- Library function declarations ---*/ + /*--- Threads initialization code ---*/ + #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS + PyEval_InitThreads(); + #endif + /*--- Initialize various global constants etc. ---*/ + if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + stringtab_initialized = 1; + if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + if (__pyx_module_is_main_jcvi__assembly__chic) { + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + } + #if PY_MAJOR_VERSION >= 3 + { + PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) + if (!PyDict_GetItemString(modules, "jcvi.assembly.chic")) { + if (unlikely((PyDict_SetItemString(modules, "jcvi.assembly.chic", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + } + } + #endif + /*--- Builtin init code ---*/ + if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Constants init code ---*/ + if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Global type/function init code ---*/ + (void)__Pyx_modinit_global_init_code(); + (void)__Pyx_modinit_variable_export_code(); + (void)__Pyx_modinit_function_export_code(); + (void)__Pyx_modinit_type_init_code(); + if (unlikely((__Pyx_modinit_type_import_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + (void)__Pyx_modinit_variable_import_code(); + (void)__Pyx_modinit_function_import_code(); + /*--- Execution code ---*/ + #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + + /* "jcvi/assembly/chic.pyx":18 + * + * from __future__ import division + * import numpy as np # <<<<<<<<<<<<<< + * cimport numpy as np + * cimport cython + */ + __pyx_t_2 = __Pyx_ImportDottedModuleRelFirst(__pyx_n_s_numpy, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_2) < 0) __PYX_ERR(0, 18, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/assembly/chic.pyx":22 + * cimport cython + * from cpython cimport array + * import array # <<<<<<<<<<<<<< + * + * + */ + __pyx_t_2 = __Pyx_ImportDottedModuleRelFirst(__pyx_n_s_array, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 22, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_array, __pyx_t_2) < 0) __PYX_ERR(0, 22, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/assembly/chic.pyx":29 + * DEF BB = 12 + * cdef int *GR = \ + * [ 5778, 9349, 15127, 24476, # <<<<<<<<<<<<<< + * 39603, 64079, 103682, 167761, + * 271443, 439204, 710647, 1149851] + */ + __pyx_t_3[0] = 0x1692; + __pyx_t_3[1] = 0x2485; + __pyx_t_3[2] = 0x3B17; + __pyx_t_3[3] = 0x5F9C; + __pyx_t_3[4] = 0x9AB3; + __pyx_t_3[5] = 0xFA4F; + __pyx_t_3[6] = 0x19502; + __pyx_t_3[7] = 0x28F51; + __pyx_t_3[8] = 0x42453; + __pyx_t_3[9] = 0x6B3A4; + __pyx_t_3[10] = 0xAD7F7; + __pyx_t_3[11] = 0x118B9B; + __pyx_v_4jcvi_8assembly_4chic_GR = __pyx_t_3; + + /* "jcvi/assembly/chic.pyx":34 + * + * + * def score_evaluate_M(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=2] tour_M=None): + */ + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_8assembly_4chic_1score_evaluate_M, 0, __pyx_n_s_score_evaluate_M, NULL, __pyx_n_s_jcvi_assembly_chic, __pyx_d, ((PyObject *)__pyx_codeobj__5)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 34, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_tuple__6); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_score_evaluate_M, __pyx_t_2) < 0) __PYX_ERR(0, 34, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/assembly/chic.pyx":59 + * + * + * def score_evaluate_P(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_P=None): + */ + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_8assembly_4chic_3score_evaluate_P, 0, __pyx_n_s_score_evaluate_P, NULL, __pyx_n_s_jcvi_assembly_chic, __pyx_d, ((PyObject *)__pyx_codeobj__8)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 59, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_tuple__6); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_score_evaluate_P, __pyx_t_2) < 0) __PYX_ERR(0, 59, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/assembly/chic.pyx":83 + * + * + * def score_evaluate_Q(array.array[int] tour, # <<<<<<<<<<<<<< + * np.ndarray[INT, ndim=1] tour_sizes=None, + * np.ndarray[INT, ndim=3] tour_Q=None): + */ + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_8assembly_4chic_5score_evaluate_Q, 0, __pyx_n_s_score_evaluate_Q, NULL, __pyx_n_s_jcvi_assembly_chic, __pyx_d, ((PyObject *)__pyx_codeobj__10)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 83, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_tuple__6); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_score_evaluate_Q, __pyx_t_2) < 0) __PYX_ERR(0, 83, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/assembly/chic.pyx":1 + * #cython: language_level=2, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True # <<<<<<<<<<<<<< + * + * """ + */ + __pyx_t_2 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /*--- Wrapped vars code ---*/ + + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + if (__pyx_m) { + if (__pyx_d && stringtab_initialized) { + __Pyx_AddTraceback("init jcvi.assembly.chic", __pyx_clineno, __pyx_lineno, __pyx_filename); + } + #if !CYTHON_USE_MODULE_STATE + Py_CLEAR(__pyx_m); + #else + Py_DECREF(__pyx_m); + if (pystate_addmodule_run) { + PyObject *tp, *value, *tb; + PyErr_Fetch(&tp, &value, &tb); + PyState_RemoveModule(&__pyx_moduledef); + PyErr_Restore(tp, value, tb); + } + #endif + } else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ImportError, "init jcvi.assembly.chic"); + } + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + #if CYTHON_PEP489_MULTI_PHASE_INIT + return (__pyx_m != NULL) ? 0 : -1; + #elif PY_MAJOR_VERSION >= 3 + return __pyx_m; + #else + return; + #endif +} +/* #### Code section: cleanup_globals ### */ +/* #### Code section: cleanup_module ### */ +/* #### Code section: main_method ### */ +/* #### Code section: utility_code_pragmas ### */ +#ifdef _MSC_VER +#pragma warning( push ) +/* Warning 4127: conditional expression is constant + * Cython uses constant conditional expressions to allow in inline functions to be optimized at + * compile-time, so this warning is not useful + */ +#pragma warning( disable : 4127 ) +#endif + + + +/* #### Code section: utility_code_def ### */ + +/* --- Runtime support code --- */ +/* Refnanny */ +#if CYTHON_REFNANNY +static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { + PyObject *m = NULL, *p = NULL; + void *r = NULL; + m = PyImport_ImportModule(modname); + if (!m) goto end; + p = PyObject_GetAttrString(m, "RefNannyAPI"); + if (!p) goto end; + r = PyLong_AsVoidPtr(p); +end: + Py_XDECREF(p); + Py_XDECREF(m); + return (__Pyx_RefNannyAPIStruct *)r; +} +#endif + +/* PyErrExceptionMatches */ +#if CYTHON_FAST_THREAD_STATE +static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; i= 0x030C00A6 + PyObject *current_exception = tstate->current_exception; + if (unlikely(!current_exception)) return 0; + exc_type = (PyObject*) Py_TYPE(current_exception); + if (exc_type == err) return 1; +#else + exc_type = tstate->curexc_type; + if (exc_type == err) return 1; + if (unlikely(!exc_type)) return 0; +#endif + #if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(exc_type); + #endif + if (unlikely(PyTuple_Check(err))) { + result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); + } else { + result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err); + } + #if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(exc_type); + #endif + return result; +} +#endif + +/* PyErrFetchRestore */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { +#if PY_VERSION_HEX >= 0x030C00A6 + PyObject *tmp_value; + assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value))); + if (value) { + #if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb)) + #endif + PyException_SetTraceback(value, tb); + } + tmp_value = tstate->current_exception; + tstate->current_exception = value; + Py_XDECREF(tmp_value); + Py_XDECREF(type); + Py_XDECREF(tb); +#else + PyObject *tmp_type, *tmp_value, *tmp_tb; + tmp_type = tstate->curexc_type; + tmp_value = tstate->curexc_value; + tmp_tb = tstate->curexc_traceback; + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +#endif +} +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { +#if PY_VERSION_HEX >= 0x030C00A6 + PyObject* exc_value; + exc_value = tstate->current_exception; + tstate->current_exception = 0; + *value = exc_value; + *type = NULL; + *tb = NULL; + if (exc_value) { + *type = (PyObject*) Py_TYPE(exc_value); + Py_INCREF(*type); + #if CYTHON_COMPILING_IN_CPYTHON + *tb = ((PyBaseExceptionObject*) exc_value)->traceback; + Py_XINCREF(*tb); + #else + *tb = PyException_GetTraceback(exc_value); + #endif + } +#else + *type = tstate->curexc_type; + *value = tstate->curexc_value; + *tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +#endif +} +#endif + +/* PyObjectGetAttrStr */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro)) + return tp->tp_getattro(obj, attr_name); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_getattr)) + return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); +#endif + return PyObject_GetAttr(obj, attr_name); +} +#endif + +/* PyObjectGetAttrStrNoError */ +#if __PYX_LIMITED_VERSION_HEX < 0x030d00A1 +static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError))) + __Pyx_PyErr_Clear(); +} +#endif +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) { + PyObject *result; +#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 + (void) PyObject_GetOptionalAttr(obj, attr_name, &result); + return result; +#else +#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1 + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) { + return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1); + } +#endif + result = __Pyx_PyObject_GetAttrStr(obj, attr_name); + if (unlikely(!result)) { + __Pyx_PyObject_GetAttrStr_ClearAttributeError(); + } + return result; +#endif +} + +/* GetBuiltinName */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name) { + PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name); + if (unlikely(!result) && !PyErr_Occurred()) { + PyErr_Format(PyExc_NameError, +#if PY_MAJOR_VERSION >= 3 + "name '%U' is not defined", name); +#else + "name '%.200s' is not defined", PyString_AS_STRING(name)); +#endif + } + return result; +} + +/* GetTopmostException */ +#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE +static _PyErr_StackItem * +__Pyx_PyErr_GetTopmostException(PyThreadState *tstate) +{ + _PyErr_StackItem *exc_info = tstate->exc_info; + while ((exc_info->exc_value == NULL || exc_info->exc_value == Py_None) && + exc_info->previous_item != NULL) + { + exc_info = exc_info->previous_item; + } + return exc_info; +} +#endif + +/* SaveResetException */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 + _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); + PyObject *exc_value = exc_info->exc_value; + if (exc_value == NULL || exc_value == Py_None) { + *value = NULL; + *type = NULL; + *tb = NULL; + } else { + *value = exc_value; + Py_INCREF(*value); + *type = (PyObject*) Py_TYPE(exc_value); + Py_INCREF(*type); + *tb = PyException_GetTraceback(exc_value); + } + #elif CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); + *type = exc_info->exc_type; + *value = exc_info->exc_value; + *tb = exc_info->exc_traceback; + Py_XINCREF(*type); + Py_XINCREF(*value); + Py_XINCREF(*tb); + #else + *type = tstate->exc_type; + *value = tstate->exc_value; + *tb = tstate->exc_traceback; + Py_XINCREF(*type); + Py_XINCREF(*value); + Py_XINCREF(*tb); + #endif +} +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { + #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 + _PyErr_StackItem *exc_info = tstate->exc_info; + PyObject *tmp_value = exc_info->exc_value; + exc_info->exc_value = value; + Py_XDECREF(tmp_value); + Py_XDECREF(type); + Py_XDECREF(tb); + #else + PyObject *tmp_type, *tmp_value, *tmp_tb; + #if CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = tstate->exc_info; + tmp_type = exc_info->exc_type; + tmp_value = exc_info->exc_value; + tmp_tb = exc_info->exc_traceback; + exc_info->exc_type = type; + exc_info->exc_value = value; + exc_info->exc_traceback = tb; + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = type; + tstate->exc_value = value; + tstate->exc_traceback = tb; + #endif + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); + #endif +} +#endif + +/* GetException */ +#if CYTHON_FAST_THREAD_STATE +static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) +#else +static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) +#endif +{ + PyObject *local_type = NULL, *local_value, *local_tb = NULL; +#if CYTHON_FAST_THREAD_STATE + PyObject *tmp_type, *tmp_value, *tmp_tb; + #if PY_VERSION_HEX >= 0x030C00A6 + local_value = tstate->current_exception; + tstate->current_exception = 0; + if (likely(local_value)) { + local_type = (PyObject*) Py_TYPE(local_value); + Py_INCREF(local_type); + local_tb = PyException_GetTraceback(local_value); + } + #else + local_type = tstate->curexc_type; + local_value = tstate->curexc_value; + local_tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; + #endif +#else + PyErr_Fetch(&local_type, &local_value, &local_tb); +#endif + PyErr_NormalizeException(&local_type, &local_value, &local_tb); +#if CYTHON_FAST_THREAD_STATE && PY_VERSION_HEX >= 0x030C00A6 + if (unlikely(tstate->current_exception)) +#elif CYTHON_FAST_THREAD_STATE + if (unlikely(tstate->curexc_type)) +#else + if (unlikely(PyErr_Occurred())) +#endif + goto bad; + #if PY_MAJOR_VERSION >= 3 + if (local_tb) { + if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0)) + goto bad; + } + #endif + Py_XINCREF(local_tb); + Py_XINCREF(local_type); + Py_XINCREF(local_value); + *type = local_type; + *value = local_value; + *tb = local_tb; +#if CYTHON_FAST_THREAD_STATE + #if CYTHON_USE_EXC_INFO_STACK + { + _PyErr_StackItem *exc_info = tstate->exc_info; + #if PY_VERSION_HEX >= 0x030B00a4 + tmp_value = exc_info->exc_value; + exc_info->exc_value = local_value; + tmp_type = NULL; + tmp_tb = NULL; + Py_XDECREF(local_type); + Py_XDECREF(local_tb); + #else + tmp_type = exc_info->exc_type; + tmp_value = exc_info->exc_value; + tmp_tb = exc_info->exc_traceback; + exc_info->exc_type = local_type; + exc_info->exc_value = local_value; + exc_info->exc_traceback = local_tb; + #endif + } + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = local_type; + tstate->exc_value = local_value; + tstate->exc_traceback = local_tb; + #endif + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +#else + PyErr_SetExcInfo(local_type, local_value, local_tb); +#endif + return 0; +bad: + *type = 0; + *value = 0; + *tb = 0; + Py_XDECREF(local_type); + Py_XDECREF(local_value); + Py_XDECREF(local_tb); + return -1; +} + +/* PyObjectCall */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = Py_TYPE(func)->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) + return NULL; + #endif + result = (*call)(func, arg, kw); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* RaiseException */ +#if PY_MAJOR_VERSION < 3 +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { + __Pyx_PyThreadState_declare + CYTHON_UNUSED_VAR(cause); + Py_XINCREF(type); + if (!value || value == Py_None) + value = NULL; + else + Py_INCREF(value); + if (!tb || tb == Py_None) + tb = NULL; + else { + Py_INCREF(tb); + if (!PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto raise_error; + } + } + if (PyType_Check(type)) { +#if CYTHON_COMPILING_IN_PYPY + if (!value) { + Py_INCREF(Py_None); + value = Py_None; + } +#endif + PyErr_NormalizeException(&type, &value, &tb); + } else { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto raise_error; + } + value = type; + type = (PyObject*) Py_TYPE(type); + Py_INCREF(type); + if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto raise_error; + } + } + __Pyx_PyThreadState_assign + __Pyx_ErrRestore(type, value, tb); + return; +raise_error: + Py_XDECREF(value); + Py_XDECREF(type); + Py_XDECREF(tb); + return; +} +#else +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { + PyObject* owned_instance = NULL; + if (tb == Py_None) { + tb = 0; + } else if (tb && !PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto bad; + } + if (value == Py_None) + value = 0; + if (PyExceptionInstance_Check(type)) { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto bad; + } + value = type; + type = (PyObject*) Py_TYPE(value); + } else if (PyExceptionClass_Check(type)) { + PyObject *instance_class = NULL; + if (value && PyExceptionInstance_Check(value)) { + instance_class = (PyObject*) Py_TYPE(value); + if (instance_class != type) { + int is_subclass = PyObject_IsSubclass(instance_class, type); + if (!is_subclass) { + instance_class = NULL; + } else if (unlikely(is_subclass == -1)) { + goto bad; + } else { + type = instance_class; + } + } + } + if (!instance_class) { + PyObject *args; + if (!value) + args = PyTuple_New(0); + else if (PyTuple_Check(value)) { + Py_INCREF(value); + args = value; + } else + args = PyTuple_Pack(1, value); + if (!args) + goto bad; + owned_instance = PyObject_Call(type, args, NULL); + Py_DECREF(args); + if (!owned_instance) + goto bad; + value = owned_instance; + if (!PyExceptionInstance_Check(value)) { + PyErr_Format(PyExc_TypeError, + "calling %R should have returned an instance of " + "BaseException, not %R", + type, Py_TYPE(value)); + goto bad; + } + } + } else { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto bad; + } + if (cause) { + PyObject *fixed_cause; + if (cause == Py_None) { + fixed_cause = NULL; + } else if (PyExceptionClass_Check(cause)) { + fixed_cause = PyObject_CallObject(cause, NULL); + if (fixed_cause == NULL) + goto bad; + } else if (PyExceptionInstance_Check(cause)) { + fixed_cause = cause; + Py_INCREF(fixed_cause); + } else { + PyErr_SetString(PyExc_TypeError, + "exception causes must derive from " + "BaseException"); + goto bad; + } + PyException_SetCause(value, fixed_cause); + } + PyErr_SetObject(type, value); + if (tb) { + #if PY_VERSION_HEX >= 0x030C00A6 + PyException_SetTraceback(value, tb); + #elif CYTHON_FAST_THREAD_STATE + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject* tmp_tb = tstate->curexc_traceback; + if (tb != tmp_tb) { + Py_INCREF(tb); + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_tb); + } +#else + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); + Py_INCREF(tb); + PyErr_Restore(tmp_type, tmp_value, tb); + Py_XDECREF(tmp_tb); +#endif + } +bad: + Py_XDECREF(owned_instance); + return; +} +#endif + +/* TupleAndListFromArray */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) { + PyObject *v; + Py_ssize_t i; + for (i = 0; i < length; i++) { + v = dest[i] = src[i]; + Py_INCREF(v); + } +} +static CYTHON_INLINE PyObject * +__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) +{ + PyObject *res; + if (n <= 0) { + Py_INCREF(__pyx_empty_tuple); + return __pyx_empty_tuple; + } + res = PyTuple_New(n); + if (unlikely(res == NULL)) return NULL; + __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n); + return res; +} +static CYTHON_INLINE PyObject * +__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n) +{ + PyObject *res; + if (n <= 0) { + return PyList_New(0); + } + res = PyList_New(n); + if (unlikely(res == NULL)) return NULL; + __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n); + return res; +} +#endif + +/* BytesEquals */ +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API + return PyObject_RichCompareBool(s1, s2, equals); +#else + if (s1 == s2) { + return (equals == Py_EQ); + } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { + const char *ps1, *ps2; + Py_ssize_t length = PyBytes_GET_SIZE(s1); + if (length != PyBytes_GET_SIZE(s2)) + return (equals == Py_NE); + ps1 = PyBytes_AS_STRING(s1); + ps2 = PyBytes_AS_STRING(s2); + if (ps1[0] != ps2[0]) { + return (equals == Py_NE); + } else if (length == 1) { + return (equals == Py_EQ); + } else { + int result; +#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000) + Py_hash_t hash1, hash2; + hash1 = ((PyBytesObject*)s1)->ob_shash; + hash2 = ((PyBytesObject*)s2)->ob_shash; + if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { + return (equals == Py_NE); + } +#endif + result = memcmp(ps1, ps2, (size_t)length); + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { + return (equals == Py_NE); + } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { + return (equals == Py_NE); + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +#endif +} + +/* UnicodeEquals */ +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API + return PyObject_RichCompareBool(s1, s2, equals); +#else +#if PY_MAJOR_VERSION < 3 + PyObject* owned_ref = NULL; +#endif + int s1_is_unicode, s2_is_unicode; + if (s1 == s2) { + goto return_eq; + } + s1_is_unicode = PyUnicode_CheckExact(s1); + s2_is_unicode = PyUnicode_CheckExact(s2); +#if PY_MAJOR_VERSION < 3 + if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { + owned_ref = PyUnicode_FromObject(s2); + if (unlikely(!owned_ref)) + return -1; + s2 = owned_ref; + s2_is_unicode = 1; + } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { + owned_ref = PyUnicode_FromObject(s1); + if (unlikely(!owned_ref)) + return -1; + s1 = owned_ref; + s1_is_unicode = 1; + } else if (((!s2_is_unicode) & (!s1_is_unicode))) { + return __Pyx_PyBytes_Equals(s1, s2, equals); + } +#endif + if (s1_is_unicode & s2_is_unicode) { + Py_ssize_t length; + int kind; + void *data1, *data2; + if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0)) + return -1; + length = __Pyx_PyUnicode_GET_LENGTH(s1); + if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { + goto return_ne; + } +#if CYTHON_USE_UNICODE_INTERNALS + { + Py_hash_t hash1, hash2; + #if CYTHON_PEP393_ENABLED + hash1 = ((PyASCIIObject*)s1)->hash; + hash2 = ((PyASCIIObject*)s2)->hash; + #else + hash1 = ((PyUnicodeObject*)s1)->hash; + hash2 = ((PyUnicodeObject*)s2)->hash; + #endif + if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { + goto return_ne; + } + } +#endif + kind = __Pyx_PyUnicode_KIND(s1); + if (kind != __Pyx_PyUnicode_KIND(s2)) { + goto return_ne; + } + data1 = __Pyx_PyUnicode_DATA(s1); + data2 = __Pyx_PyUnicode_DATA(s2); + if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { + goto return_ne; + } else if (length == 1) { + goto return_eq; + } else { + int result = memcmp(data1, data2, (size_t)(length * kind)); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & s2_is_unicode) { + goto return_ne; + } else if ((s2 == Py_None) & s1_is_unicode) { + goto return_ne; + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +return_eq: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_EQ); +return_ne: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_NE); +#endif +} + +/* fastcall */ +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s) +{ + Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames); + for (i = 0; i < n; i++) + { + if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i]; + } + for (i = 0; i < n; i++) + { + int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ); + if (unlikely(eq != 0)) { + if (unlikely(eq < 0)) return NULL; + return kwvalues[i]; + } + } + return NULL; +} +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 +CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) { + Py_ssize_t i, nkwargs = PyTuple_GET_SIZE(kwnames); + PyObject *dict; + dict = PyDict_New(); + if (unlikely(!dict)) + return NULL; + for (i=0; i= 3 + "%s() got multiple values for keyword argument '%U'", func_name, kw_name); + #else + "%s() got multiple values for keyword argument '%s'", func_name, + PyString_AsString(kw_name)); + #endif +} + +/* ParseKeywords */ +static int __Pyx_ParseOptionalKeywords( + PyObject *kwds, + PyObject *const *kwvalues, + PyObject **argnames[], + PyObject *kwds2, + PyObject *values[], + Py_ssize_t num_pos_args, + const char* function_name) +{ + PyObject *key = 0, *value = 0; + Py_ssize_t pos = 0; + PyObject*** name; + PyObject*** first_kw_arg = argnames + num_pos_args; + int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds)); + while (1) { + Py_XDECREF(key); key = NULL; + Py_XDECREF(value); value = NULL; + if (kwds_is_tuple) { + Py_ssize_t size; +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(kwds); +#else + size = PyTuple_Size(kwds); + if (size < 0) goto bad; +#endif + if (pos >= size) break; +#if CYTHON_AVOID_BORROWED_REFS + key = __Pyx_PySequence_ITEM(kwds, pos); + if (!key) goto bad; +#elif CYTHON_ASSUME_SAFE_MACROS + key = PyTuple_GET_ITEM(kwds, pos); +#else + key = PyTuple_GetItem(kwds, pos); + if (!key) goto bad; +#endif + value = kwvalues[pos]; + pos++; + } + else + { + if (!PyDict_Next(kwds, &pos, &key, &value)) break; +#if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(key); +#endif + } + name = first_kw_arg; + while (*name && (**name != key)) name++; + if (*name) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(value); + Py_DECREF(key); +#endif + key = NULL; + value = NULL; + continue; + } +#if !CYTHON_AVOID_BORROWED_REFS + Py_INCREF(key); +#endif + Py_INCREF(value); + name = first_kw_arg; + #if PY_MAJOR_VERSION < 3 + if (likely(PyString_Check(key))) { + while (*name) { + if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) + && _PyString_Eq(**name, key)) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + value = NULL; +#endif + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + if ((**argname == key) || ( + (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) + && _PyString_Eq(**argname, key))) { + goto arg_passed_twice; + } + argname++; + } + } + } else + #endif + if (likely(PyUnicode_Check(key))) { + while (*name) { + int cmp = ( + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**name, key) + ); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + value = NULL; +#endif + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + int cmp = (**argname == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**argname, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) goto arg_passed_twice; + argname++; + } + } + } else + goto invalid_keyword_type; + if (kwds2) { + if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; + } else { + goto invalid_keyword; + } + } + Py_XDECREF(key); + Py_XDECREF(value); + return 0; +arg_passed_twice: + __Pyx_RaiseDoubleKeywordsError(function_name, key); + goto bad; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + goto bad; +invalid_keyword: + #if PY_MAJOR_VERSION < 3 + PyErr_Format(PyExc_TypeError, + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + PyErr_Format(PyExc_TypeError, + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif +bad: + Py_XDECREF(key); + Py_XDECREF(value); + return -1; +} + +/* RaiseArgTupleInvalid */ +static void __Pyx_RaiseArgtupleInvalid( + const char* func_name, + int exact, + Py_ssize_t num_min, + Py_ssize_t num_max, + Py_ssize_t num_found) +{ + Py_ssize_t num_expected; + const char *more_or_less; + if (num_found < num_min) { + num_expected = num_min; + more_or_less = "at least"; + } else { + num_expected = num_max; + more_or_less = "at most"; + } + if (exact) { + more_or_less = "exactly"; + } + PyErr_Format(PyExc_TypeError, + "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", + func_name, more_or_less, num_expected, + (num_expected == 1) ? "" : "s", num_found); +} + +/* ArgTypeTest */ +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) +{ + __Pyx_TypeName type_name; + __Pyx_TypeName obj_type_name; + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + else if (exact) { + #if PY_MAJOR_VERSION == 2 + if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; + #endif + } + else { + if (likely(__Pyx_TypeCheck(obj, type))) return 1; + } + type_name = __Pyx_PyType_GetName(type); + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME + ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name); + __Pyx_DECREF_TypeName(type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return 0; +} + +/* IsLittleEndian */ +static CYTHON_INLINE int __Pyx_Is_Little_Endian(void) +{ + union { + uint32_t u32; + uint8_t u8[4]; + } S; + S.u32 = 0x01020304; + return S.u8[0] == 4; +} + +/* BufferFormatCheck */ +static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, + __Pyx_BufFmt_StackElem* stack, + __Pyx_TypeInfo* type) { + stack[0].field = &ctx->root; + stack[0].parent_offset = 0; + ctx->root.type = type; + ctx->root.name = "buffer dtype"; + ctx->root.offset = 0; + ctx->head = stack; + ctx->head->field = &ctx->root; + ctx->fmt_offset = 0; + ctx->head->parent_offset = 0; + ctx->new_packmode = '@'; + ctx->enc_packmode = '@'; + ctx->new_count = 1; + ctx->enc_count = 0; + ctx->enc_type = 0; + ctx->is_complex = 0; + ctx->is_valid_array = 0; + ctx->struct_alignment = 0; + while (type->typegroup == 'S') { + ++ctx->head; + ctx->head->field = type->fields; + ctx->head->parent_offset = 0; + type = type->fields->type; + } +} +static int __Pyx_BufFmt_ParseNumber(const char** ts) { + int count; + const char* t = *ts; + if (*t < '0' || *t > '9') { + return -1; + } else { + count = *t++ - '0'; + while (*t >= '0' && *t <= '9') { + count *= 10; + count += *t++ - '0'; + } + } + *ts = t; + return count; +} +static int __Pyx_BufFmt_ExpectNumber(const char **ts) { + int number = __Pyx_BufFmt_ParseNumber(ts); + if (number == -1) + PyErr_Format(PyExc_ValueError,\ + "Does not understand character buffer dtype format string ('%c')", **ts); + return number; +} +static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) { + PyErr_Format(PyExc_ValueError, + "Unexpected format string character: '%c'", ch); +} +static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) { + switch (ch) { + case '?': return "'bool'"; + case 'c': return "'char'"; + case 'b': return "'signed char'"; + case 'B': return "'unsigned char'"; + case 'h': return "'short'"; + case 'H': return "'unsigned short'"; + case 'i': return "'int'"; + case 'I': return "'unsigned int'"; + case 'l': return "'long'"; + case 'L': return "'unsigned long'"; + case 'q': return "'long long'"; + case 'Q': return "'unsigned long long'"; + case 'f': return (is_complex ? "'complex float'" : "'float'"); + case 'd': return (is_complex ? "'complex double'" : "'double'"); + case 'g': return (is_complex ? "'complex long double'" : "'long double'"); + case 'T': return "a struct"; + case 'O': return "Python object"; + case 'P': return "a pointer"; + case 's': case 'p': return "a string"; + case 0: return "end"; + default: return "unparsable format string"; + } +} +static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) { + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return 2; + case 'i': case 'I': case 'l': case 'L': return 4; + case 'q': case 'Q': return 8; + case 'f': return (is_complex ? 8 : 4); + case 'd': return (is_complex ? 16 : 8); + case 'g': { + PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g').."); + return 0; + } + case 'O': case 'P': return sizeof(void*); + default: + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } +} +static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) { + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return sizeof(short); + case 'i': case 'I': return sizeof(int); + case 'l': case 'L': return sizeof(long); + #ifdef HAVE_LONG_LONG + case 'q': case 'Q': return sizeof(PY_LONG_LONG); + #endif + case 'f': return sizeof(float) * (is_complex ? 2 : 1); + case 'd': return sizeof(double) * (is_complex ? 2 : 1); + case 'g': return sizeof(long double) * (is_complex ? 2 : 1); + case 'O': case 'P': return sizeof(void*); + default: { + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } + } +} +typedef struct { char c; short x; } __Pyx_st_short; +typedef struct { char c; int x; } __Pyx_st_int; +typedef struct { char c; long x; } __Pyx_st_long; +typedef struct { char c; float x; } __Pyx_st_float; +typedef struct { char c; double x; } __Pyx_st_double; +typedef struct { char c; long double x; } __Pyx_st_longdouble; +typedef struct { char c; void *x; } __Pyx_st_void_p; +#ifdef HAVE_LONG_LONG +typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong; +#endif +static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, int is_complex) { + CYTHON_UNUSED_VAR(is_complex); + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short); + case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int); + case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long); +#ifdef HAVE_LONG_LONG + case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG); +#endif + case 'f': return sizeof(__Pyx_st_float) - sizeof(float); + case 'd': return sizeof(__Pyx_st_double) - sizeof(double); + case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double); + case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*); + default: + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } +} +/* These are for computing the padding at the end of the struct to align + on the first member of the struct. This will probably the same as above, + but we don't have any guarantees. + */ +typedef struct { short x; char c; } __Pyx_pad_short; +typedef struct { int x; char c; } __Pyx_pad_int; +typedef struct { long x; char c; } __Pyx_pad_long; +typedef struct { float x; char c; } __Pyx_pad_float; +typedef struct { double x; char c; } __Pyx_pad_double; +typedef struct { long double x; char c; } __Pyx_pad_longdouble; +typedef struct { void *x; char c; } __Pyx_pad_void_p; +#ifdef HAVE_LONG_LONG +typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong; +#endif +static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, int is_complex) { + CYTHON_UNUSED_VAR(is_complex); + switch (ch) { + case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; + case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short); + case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int); + case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long); +#ifdef HAVE_LONG_LONG + case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG); +#endif + case 'f': return sizeof(__Pyx_pad_float) - sizeof(float); + case 'd': return sizeof(__Pyx_pad_double) - sizeof(double); + case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double); + case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*); + default: + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } +} +static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) { + switch (ch) { + case 'c': + return 'H'; + case 'b': case 'h': case 'i': + case 'l': case 'q': case 's': case 'p': + return 'I'; + case '?': case 'B': case 'H': case 'I': case 'L': case 'Q': + return 'U'; + case 'f': case 'd': case 'g': + return (is_complex ? 'C' : 'R'); + case 'O': + return 'O'; + case 'P': + return 'P'; + default: { + __Pyx_BufFmt_RaiseUnexpectedChar(ch); + return 0; + } + } +} +static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) { + if (ctx->head == NULL || ctx->head->field == &ctx->root) { + const char* expected; + const char* quote; + if (ctx->head == NULL) { + expected = "end"; + quote = ""; + } else { + expected = ctx->head->field->type->name; + quote = "'"; + } + PyErr_Format(PyExc_ValueError, + "Buffer dtype mismatch, expected %s%s%s but got %s", + quote, expected, quote, + __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex)); + } else { + __Pyx_StructField* field = ctx->head->field; + __Pyx_StructField* parent = (ctx->head - 1)->field; + PyErr_Format(PyExc_ValueError, + "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'", + field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex), + parent->type->name, field->name); + } +} +static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) { + char group; + size_t size, offset, arraysize = 1; + if (ctx->enc_type == 0) return 0; + if (ctx->head->field->type->arraysize[0]) { + int i, ndim = 0; + if (ctx->enc_type == 's' || ctx->enc_type == 'p') { + ctx->is_valid_array = ctx->head->field->type->ndim == 1; + ndim = 1; + if (ctx->enc_count != ctx->head->field->type->arraysize[0]) { + PyErr_Format(PyExc_ValueError, + "Expected a dimension of size %zu, got %zu", + ctx->head->field->type->arraysize[0], ctx->enc_count); + return -1; + } + } + if (!ctx->is_valid_array) { + PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d", + ctx->head->field->type->ndim, ndim); + return -1; + } + for (i = 0; i < ctx->head->field->type->ndim; i++) { + arraysize *= ctx->head->field->type->arraysize[i]; + } + ctx->is_valid_array = 0; + ctx->enc_count = 1; + } + group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex); + do { + __Pyx_StructField* field = ctx->head->field; + __Pyx_TypeInfo* type = field->type; + if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') { + size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex); + } else { + size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex); + } + if (ctx->enc_packmode == '@') { + size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex); + size_t align_mod_offset; + if (align_at == 0) return -1; + align_mod_offset = ctx->fmt_offset % align_at; + if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset; + if (ctx->struct_alignment == 0) + ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type, + ctx->is_complex); + } + if (type->size != size || type->typegroup != group) { + if (type->typegroup == 'C' && type->fields != NULL) { + size_t parent_offset = ctx->head->parent_offset + field->offset; + ++ctx->head; + ctx->head->field = type->fields; + ctx->head->parent_offset = parent_offset; + continue; + } + if ((type->typegroup == 'H' || group == 'H') && type->size == size) { + } else { + __Pyx_BufFmt_RaiseExpected(ctx); + return -1; + } + } + offset = ctx->head->parent_offset + field->offset; + if (ctx->fmt_offset != offset) { + PyErr_Format(PyExc_ValueError, + "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected", + (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset); + return -1; + } + ctx->fmt_offset += size; + if (arraysize) + ctx->fmt_offset += (arraysize - 1) * size; + --ctx->enc_count; + while (1) { + if (field == &ctx->root) { + ctx->head = NULL; + if (ctx->enc_count != 0) { + __Pyx_BufFmt_RaiseExpected(ctx); + return -1; + } + break; + } + ctx->head->field = ++field; + if (field->type == NULL) { + --ctx->head; + field = ctx->head->field; + continue; + } else if (field->type->typegroup == 'S') { + size_t parent_offset = ctx->head->parent_offset + field->offset; + if (field->type->fields->type == NULL) continue; + field = field->type->fields; + ++ctx->head; + ctx->head->field = field; + ctx->head->parent_offset = parent_offset; + break; + } else { + break; + } + } + } while (ctx->enc_count); + ctx->enc_type = 0; + ctx->is_complex = 0; + return 0; +} +static int +__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp) +{ + const char *ts = *tsp; + int i = 0, number, ndim; + ++ts; + if (ctx->new_count != 1) { + PyErr_SetString(PyExc_ValueError, + "Cannot handle repeated arrays in format string"); + return -1; + } + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return -1; + ndim = ctx->head->field->type->ndim; + while (*ts && *ts != ')') { + switch (*ts) { + case ' ': case '\f': case '\r': case '\n': case '\t': case '\v': continue; + default: break; + } + number = __Pyx_BufFmt_ExpectNumber(&ts); + if (number == -1) return -1; + if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i]) { + PyErr_Format(PyExc_ValueError, + "Expected a dimension of size %zu, got %d", + ctx->head->field->type->arraysize[i], number); + return -1; + } + if (*ts != ',' && *ts != ')') { + PyErr_Format(PyExc_ValueError, + "Expected a comma in format string, got '%c'", *ts); + return -1; + } + if (*ts == ',') ts++; + i++; + } + if (i != ndim) { + PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d", + ctx->head->field->type->ndim, i); + return -1; + } + if (!*ts) { + PyErr_SetString(PyExc_ValueError, + "Unexpected end of format string, expected ')'"); + return -1; + } + ctx->is_valid_array = 1; + ctx->new_count = 1; + *tsp = ++ts; + return 0; +} +static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) { + int got_Z = 0; + while (1) { + switch(*ts) { + case 0: + if (ctx->enc_type != 0 && ctx->head == NULL) { + __Pyx_BufFmt_RaiseExpected(ctx); + return NULL; + } + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + if (ctx->head != NULL) { + __Pyx_BufFmt_RaiseExpected(ctx); + return NULL; + } + return ts; + case ' ': + case '\r': + case '\n': + ++ts; + break; + case '<': + if (!__Pyx_Is_Little_Endian()) { + PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler"); + return NULL; + } + ctx->new_packmode = '='; + ++ts; + break; + case '>': + case '!': + if (__Pyx_Is_Little_Endian()) { + PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler"); + return NULL; + } + ctx->new_packmode = '='; + ++ts; + break; + case '=': + case '@': + case '^': + ctx->new_packmode = *ts++; + break; + case 'T': + { + const char* ts_after_sub; + size_t i, struct_count = ctx->new_count; + size_t struct_alignment = ctx->struct_alignment; + ctx->new_count = 1; + ++ts; + if (*ts != '{') { + PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'"); + return NULL; + } + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->enc_type = 0; + ctx->enc_count = 0; + ctx->struct_alignment = 0; + ++ts; + ts_after_sub = ts; + for (i = 0; i != struct_count; ++i) { + ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts); + if (!ts_after_sub) return NULL; + } + ts = ts_after_sub; + if (struct_alignment) ctx->struct_alignment = struct_alignment; + } + break; + case '}': + { + size_t alignment = ctx->struct_alignment; + ++ts; + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->enc_type = 0; + if (alignment && ctx->fmt_offset % alignment) { + ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment); + } + } + return ts; + case 'x': + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->fmt_offset += ctx->new_count; + ctx->new_count = 1; + ctx->enc_count = 0; + ctx->enc_type = 0; + ctx->enc_packmode = ctx->new_packmode; + ++ts; + break; + case 'Z': + got_Z = 1; + ++ts; + if (*ts != 'f' && *ts != 'd' && *ts != 'g') { + __Pyx_BufFmt_RaiseUnexpectedChar('Z'); + return NULL; + } + CYTHON_FALLTHROUGH; + case '?': case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I': + case 'l': case 'L': case 'q': case 'Q': + case 'f': case 'd': case 'g': + case 'O': case 'p': + if ((ctx->enc_type == *ts) && (got_Z == ctx->is_complex) && + (ctx->enc_packmode == ctx->new_packmode) && (!ctx->is_valid_array)) { + ctx->enc_count += ctx->new_count; + ctx->new_count = 1; + got_Z = 0; + ++ts; + break; + } + CYTHON_FALLTHROUGH; + case 's': + if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; + ctx->enc_count = ctx->new_count; + ctx->enc_packmode = ctx->new_packmode; + ctx->enc_type = *ts; + ctx->is_complex = got_Z; + ++ts; + ctx->new_count = 1; + got_Z = 0; + break; + case ':': + ++ts; + while(*ts != ':') ++ts; + ++ts; + break; + case '(': + if (__pyx_buffmt_parse_array(ctx, &ts) < 0) return NULL; + break; + default: + { + int number = __Pyx_BufFmt_ExpectNumber(&ts); + if (number == -1) return NULL; + ctx->new_count = (size_t)number; + } + } + } +} + +/* BufferGetAndValidate */ + static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info) { + if (unlikely(info->buf == NULL)) return; + if (info->suboffsets == __Pyx_minusones) info->suboffsets = NULL; + __Pyx_ReleaseBuffer(info); +} +static void __Pyx_ZeroBuffer(Py_buffer* buf) { + buf->buf = NULL; + buf->obj = NULL; + buf->strides = __Pyx_zeros; + buf->shape = __Pyx_zeros; + buf->suboffsets = __Pyx_minusones; +} +static int __Pyx__GetBufferAndValidate( + Py_buffer* buf, PyObject* obj, __Pyx_TypeInfo* dtype, int flags, + int nd, int cast, __Pyx_BufFmt_StackElem* stack) +{ + buf->buf = NULL; + if (unlikely(__Pyx_GetBuffer(obj, buf, flags) == -1)) { + __Pyx_ZeroBuffer(buf); + return -1; + } + if (unlikely(buf->ndim != nd)) { + PyErr_Format(PyExc_ValueError, + "Buffer has wrong number of dimensions (expected %d, got %d)", + nd, buf->ndim); + goto fail; + } + if (!cast) { + __Pyx_BufFmt_Context ctx; + __Pyx_BufFmt_Init(&ctx, stack, dtype); + if (!__Pyx_BufFmt_CheckString(&ctx, buf->format)) goto fail; + } + if (unlikely((size_t)buf->itemsize != dtype->size)) { + PyErr_Format(PyExc_ValueError, + "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "d byte%s) does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "d byte%s)", + buf->itemsize, (buf->itemsize > 1) ? "s" : "", + dtype->name, (Py_ssize_t)dtype->size, (dtype->size > 1) ? "s" : ""); + goto fail; + } + if (buf->suboffsets == NULL) buf->suboffsets = __Pyx_minusones; + return 0; +fail:; + __Pyx_SafeReleaseBuffer(buf); + return -1; +} + +/* GetItemInt */ + static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { + PyObject *r; + if (unlikely(!j)) return NULL; + r = PyObject_GetItem(o, j); + Py_DECREF(j); + return r; +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + Py_ssize_t wrapped_i = i; + if (wraparound & unlikely(i < 0)) { + wrapped_i += PyList_GET_SIZE(o); + } + if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) { + PyObject *r = PyList_GET_ITEM(o, wrapped_i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + Py_ssize_t wrapped_i = i; + if (wraparound & unlikely(i < 0)) { + wrapped_i += PyTuple_GET_SIZE(o); + } + if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, wrapped_i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS + if (is_list || PyList_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); + if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) { + PyObject *r = PyList_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } + else if (PyTuple_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); + if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } else { + PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; + PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; + if (mm && mm->mp_subscript) { + PyObject *r, *key = PyInt_FromSsize_t(i); + if (unlikely(!key)) return NULL; + r = mm->mp_subscript(o, key); + Py_DECREF(key); + return r; + } + if (likely(sm && sm->sq_item)) { + if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { + Py_ssize_t l = sm->sq_length(o); + if (likely(l >= 0)) { + i += l; + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + return NULL; + PyErr_Clear(); + } + } + return sm->sq_item(o, i); + } + } +#else + if (is_list || !PyMapping_Check(o)) { + return PySequence_GetItem(o, i); + } +#endif + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +} + +/* PyFunctionFastCall */ + #if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL +static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, + PyObject *globals) { + PyFrameObject *f; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject **fastlocals; + Py_ssize_t i; + PyObject *result; + assert(globals != NULL); + /* XXX Perhaps we should create a specialized + PyFrame_New() that doesn't take locals, but does + take builtins without sanity checking them. + */ + assert(tstate != NULL); + f = PyFrame_New(tstate, co, globals, NULL); + if (f == NULL) { + return NULL; + } + fastlocals = __Pyx_PyFrame_GetLocalsplus(f); + for (i = 0; i < na; i++) { + Py_INCREF(*args); + fastlocals[i] = *args++; + } + result = PyEval_EvalFrameEx(f,0); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return result; +} +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) { + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + PyObject *globals = PyFunction_GET_GLOBALS(func); + PyObject *argdefs = PyFunction_GET_DEFAULTS(func); + PyObject *closure; +#if PY_MAJOR_VERSION >= 3 + PyObject *kwdefs; +#endif + PyObject *kwtuple, **k; + PyObject **d; + Py_ssize_t nd; + Py_ssize_t nk; + PyObject *result; + assert(kwargs == NULL || PyDict_Check(kwargs)); + nk = kwargs ? PyDict_Size(kwargs) : 0; + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) { + return NULL; + } + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) { + return NULL; + } + #endif + if ( +#if PY_MAJOR_VERSION >= 3 + co->co_kwonlyargcount == 0 && +#endif + likely(kwargs == NULL || nk == 0) && + co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { + if (argdefs == NULL && co->co_argcount == nargs) { + result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); + goto done; + } + else if (nargs == 0 && argdefs != NULL + && co->co_argcount == Py_SIZE(argdefs)) { + /* function called with no arguments, but all parameters have + a default value: use default values as arguments .*/ + args = &PyTuple_GET_ITEM(argdefs, 0); + result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); + goto done; + } + } + if (kwargs != NULL) { + Py_ssize_t pos, i; + kwtuple = PyTuple_New(2 * nk); + if (kwtuple == NULL) { + result = NULL; + goto done; + } + k = &PyTuple_GET_ITEM(kwtuple, 0); + pos = i = 0; + while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { + Py_INCREF(k[i]); + Py_INCREF(k[i+1]); + i += 2; + } + nk = i / 2; + } + else { + kwtuple = NULL; + k = NULL; + } + closure = PyFunction_GET_CLOSURE(func); +#if PY_MAJOR_VERSION >= 3 + kwdefs = PyFunction_GET_KW_DEFAULTS(func); +#endif + if (argdefs != NULL) { + d = &PyTuple_GET_ITEM(argdefs, 0); + nd = Py_SIZE(argdefs); + } + else { + d = NULL; + nd = 0; + } +#if PY_MAJOR_VERSION >= 3 + result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, kwdefs, closure); +#else + result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, closure); +#endif + Py_XDECREF(kwtuple); +done: + Py_LeaveRecursiveCall(); + return result; +} +#endif + +/* PyObjectCallMethO */ + #if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = __Pyx_CyOrPyCFunction_GET_FUNCTION(func); + self = __Pyx_CyOrPyCFunction_GET_SELF(func); + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) + return NULL; + #endif + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectFastCall */ + #if PY_VERSION_HEX < 0x03090000 || CYTHON_COMPILING_IN_LIMITED_API +static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) { + PyObject *argstuple; + PyObject *result = 0; + size_t i; + argstuple = PyTuple_New((Py_ssize_t)nargs); + if (unlikely(!argstuple)) return NULL; + for (i = 0; i < nargs; i++) { + Py_INCREF(args[i]); + if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad; + } + result = __Pyx_PyObject_Call(func, argstuple, kwargs); + bad: + Py_DECREF(argstuple); + return result; +} +#endif +static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) { + Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs); +#if CYTHON_COMPILING_IN_CPYTHON + if (nargs == 0 && kwargs == NULL) { + if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_NOARGS)) + return __Pyx_PyObject_CallMethO(func, NULL); + } + else if (nargs == 1 && kwargs == NULL) { + if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_O)) + return __Pyx_PyObject_CallMethO(func, args[0]); + } +#endif + #if PY_VERSION_HEX < 0x030800B1 + #if CYTHON_FAST_PYCCALL + if (PyCFunction_Check(func)) { + if (kwargs) { + return _PyCFunction_FastCallDict(func, args, nargs, kwargs); + } else { + return _PyCFunction_FastCallKeywords(func, args, nargs, NULL); + } + } + #if PY_VERSION_HEX >= 0x030700A1 + if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) { + return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL); + } + #endif + #endif + #if CYTHON_FAST_PYCALL + if (PyFunction_Check(func)) { + return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs); + } + #endif + #endif + if (kwargs == NULL) { + #if CYTHON_VECTORCALL + #if PY_VERSION_HEX < 0x03090000 + vectorcallfunc f = _PyVectorcall_Function(func); + #else + vectorcallfunc f = PyVectorcall_Function(func); + #endif + if (f) { + return f(func, args, (size_t)nargs, NULL); + } + #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL + if (__Pyx_CyFunction_CheckExact(func)) { + __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func); + if (f) return f(func, args, (size_t)nargs, NULL); + } + #endif + } + if (nargs == 0) { + return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs); + } + #if PY_VERSION_HEX >= 0x03090000 && !CYTHON_COMPILING_IN_LIMITED_API + return PyObject_VectorcallDict(func, args, (size_t)nargs, kwargs); + #else + return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs); + #endif +} + +/* PyObjectCallOneArg */ + static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *args[2] = {NULL, arg}; + return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); +} + +/* ObjectGetItem */ + #if CYTHON_USE_TYPE_SLOTS +static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject *index) { + PyObject *runerr = NULL; + Py_ssize_t key_value; + key_value = __Pyx_PyIndex_AsSsize_t(index); + if (likely(key_value != -1 || !(runerr = PyErr_Occurred()))) { + return __Pyx_GetItemInt_Fast(obj, key_value, 0, 1, 1); + } + if (PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) { + __Pyx_TypeName index_type_name = __Pyx_PyType_GetName(Py_TYPE(index)); + PyErr_Clear(); + PyErr_Format(PyExc_IndexError, + "cannot fit '" __Pyx_FMT_TYPENAME "' into an index-sized integer", index_type_name); + __Pyx_DECREF_TypeName(index_type_name); + } + return NULL; +} +static PyObject *__Pyx_PyObject_GetItem_Slow(PyObject *obj, PyObject *key) { + __Pyx_TypeName obj_type_name; + if (likely(PyType_Check(obj))) { + PyObject *meth = __Pyx_PyObject_GetAttrStrNoError(obj, __pyx_n_s_class_getitem); + if (!meth) { + PyErr_Clear(); + } else { + PyObject *result = __Pyx_PyObject_CallOneArg(meth, key); + Py_DECREF(meth); + return result; + } + } + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "'" __Pyx_FMT_TYPENAME "' object is not subscriptable", obj_type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return NULL; +} +static PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject *key) { + PyTypeObject *tp = Py_TYPE(obj); + PyMappingMethods *mm = tp->tp_as_mapping; + PySequenceMethods *sm = tp->tp_as_sequence; + if (likely(mm && mm->mp_subscript)) { + return mm->mp_subscript(obj, key); + } + if (likely(sm && sm->sq_item)) { + return __Pyx_PyObject_GetIndex(obj, key); + } + return __Pyx_PyObject_GetItem_Slow(obj, key); +} +#endif + +/* ExtTypeTest */ + static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) { + __Pyx_TypeName obj_type_name; + __Pyx_TypeName type_name; + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + if (likely(__Pyx_TypeCheck(obj, type))) + return 1; + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + type_name = __Pyx_PyType_GetName(type); + PyErr_Format(PyExc_TypeError, + "Cannot convert " __Pyx_FMT_TYPENAME " to " __Pyx_FMT_TYPENAME, + obj_type_name, type_name); + __Pyx_DECREF_TypeName(obj_type_name); + __Pyx_DECREF_TypeName(type_name); + return 0; +} + +/* PyDictVersioning */ + #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0; +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) { + PyObject **dictptr = NULL; + Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset; + if (offset) { +#if CYTHON_COMPILING_IN_CPYTHON + dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj); +#else + dictptr = _PyObject_GetDictPtr(obj); +#endif + } + return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0; +} +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict))) + return 0; + return obj_dict_version == __Pyx_get_object_dict_version(obj); +} +#endif + +/* GetModuleGlobalName */ + #if CYTHON_USE_DICT_VERSIONS +static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value) +#else +static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name) +#endif +{ + PyObject *result; +#if !CYTHON_AVOID_BORROWED_REFS +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && PY_VERSION_HEX < 0x030d0000 + result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } else if (unlikely(PyErr_Occurred())) { + return NULL; + } +#elif CYTHON_COMPILING_IN_LIMITED_API + if (unlikely(!__pyx_m)) { + return NULL; + } + result = PyObject_GetAttr(__pyx_m, name); + if (likely(result)) { + return result; + } +#else + result = PyDict_GetItem(__pyx_d, name); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } +#endif +#else + result = PyObject_GetItem(__pyx_d, name); + __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) + if (likely(result)) { + return __Pyx_NewRef(result); + } + PyErr_Clear(); +#endif + return __Pyx_GetBuiltinName(name); +} + + +static CYTHON_INLINE void* __Pyx_BufPtrFull1d_imp(void* buf, Py_ssize_t i0, Py_ssize_t s0, Py_ssize_t o0) { + char* ptr = (char*)buf; +ptr += s0 * i0; +if (o0 >= 0) ptr = *((char**)ptr) + o0; + +return ptr; +} + /* TypeImport */ + #ifndef __PYX_HAVE_RT_ImportType_3_0_11 +#define __PYX_HAVE_RT_ImportType_3_0_11 +static PyTypeObject *__Pyx_ImportType_3_0_11(PyObject *module, const char *module_name, const char *class_name, + size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_0_11 check_size) +{ + PyObject *result = 0; + char warning[200]; + Py_ssize_t basicsize; + Py_ssize_t itemsize; +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *py_basicsize; + PyObject *py_itemsize; +#endif + result = PyObject_GetAttrString(module, class_name); + if (!result) + goto bad; + if (!PyType_Check(result)) { + PyErr_Format(PyExc_TypeError, + "%.200s.%.200s is not a type object", + module_name, class_name); + goto bad; + } +#if !CYTHON_COMPILING_IN_LIMITED_API + basicsize = ((PyTypeObject *)result)->tp_basicsize; + itemsize = ((PyTypeObject *)result)->tp_itemsize; +#else + py_basicsize = PyObject_GetAttrString(result, "__basicsize__"); + if (!py_basicsize) + goto bad; + basicsize = PyLong_AsSsize_t(py_basicsize); + Py_DECREF(py_basicsize); + py_basicsize = 0; + if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred()) + goto bad; + py_itemsize = PyObject_GetAttrString(result, "__itemsize__"); + if (!py_itemsize) + goto bad; + itemsize = PyLong_AsSsize_t(py_itemsize); + Py_DECREF(py_itemsize); + py_itemsize = 0; + if (itemsize == (Py_ssize_t)-1 && PyErr_Occurred()) + goto bad; +#endif + if (itemsize) { + if (size % alignment) { + alignment = size % alignment; + } + if (itemsize < (Py_ssize_t)alignment) + itemsize = (Py_ssize_t)alignment; + } + if ((size_t)(basicsize + itemsize) < size) { + PyErr_Format(PyExc_ValueError, + "%.200s.%.200s size changed, may indicate binary incompatibility. " + "Expected %zd from C header, got %zd from PyObject", + module_name, class_name, size, basicsize+itemsize); + goto bad; + } + if (check_size == __Pyx_ImportType_CheckSize_Error_3_0_11 && + ((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) { + PyErr_Format(PyExc_ValueError, + "%.200s.%.200s size changed, may indicate binary incompatibility. " + "Expected %zd from C header, got %zd-%zd from PyObject", + module_name, class_name, size, basicsize, basicsize+itemsize); + goto bad; + } + else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_0_11 && (size_t)basicsize > size) { + PyOS_snprintf(warning, sizeof(warning), + "%s.%s size changed, may indicate binary incompatibility. " + "Expected %zd from C header, got %zd from PyObject", + module_name, class_name, size, basicsize); + if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad; + } + return (PyTypeObject *)result; +bad: + Py_XDECREF(result); + return NULL; +} +#endif + +/* Import */ + static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + PyObject *module = 0; + PyObject *empty_dict = 0; + PyObject *empty_list = 0; + #if PY_MAJOR_VERSION < 3 + PyObject *py_import; + py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); + if (unlikely(!py_import)) + goto bad; + if (!from_list) { + empty_list = PyList_New(0); + if (unlikely(!empty_list)) + goto bad; + from_list = empty_list; + } + #endif + empty_dict = PyDict_New(); + if (unlikely(!empty_dict)) + goto bad; + { + #if PY_MAJOR_VERSION >= 3 + if (level == -1) { + if (strchr(__Pyx_MODULE_NAME, '.') != NULL) { + module = PyImport_ImportModuleLevelObject( + name, __pyx_d, empty_dict, from_list, 1); + if (unlikely(!module)) { + if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) + goto bad; + PyErr_Clear(); + } + } + level = 0; + } + #endif + if (!module) { + #if PY_MAJOR_VERSION < 3 + PyObject *py_level = PyInt_FromLong(level); + if (unlikely(!py_level)) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, __pyx_d, empty_dict, from_list, level); + #endif + } + } +bad: + Py_XDECREF(empty_dict); + Py_XDECREF(empty_list); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_import); + #endif + return module; +} + +/* ImportDottedModule */ + #if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) { + PyObject *partial_name = NULL, *slice = NULL, *sep = NULL; + if (unlikely(PyErr_Occurred())) { + PyErr_Clear(); + } + if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) { + partial_name = name; + } else { + slice = PySequence_GetSlice(parts_tuple, 0, count); + if (unlikely(!slice)) + goto bad; + sep = PyUnicode_FromStringAndSize(".", 1); + if (unlikely(!sep)) + goto bad; + partial_name = PyUnicode_Join(sep, slice); + } + PyErr_Format( +#if PY_MAJOR_VERSION < 3 + PyExc_ImportError, + "No module named '%s'", PyString_AS_STRING(partial_name)); +#else +#if PY_VERSION_HEX >= 0x030600B1 + PyExc_ModuleNotFoundError, +#else + PyExc_ImportError, +#endif + "No module named '%U'", partial_name); +#endif +bad: + Py_XDECREF(sep); + Py_XDECREF(slice); + Py_XDECREF(partial_name); + return NULL; +} +#endif +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) { + PyObject *imported_module; +#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) + PyObject *modules = PyImport_GetModuleDict(); + if (unlikely(!modules)) + return NULL; + imported_module = __Pyx_PyDict_GetItemStr(modules, name); + Py_XINCREF(imported_module); +#else + imported_module = PyImport_GetModule(name); +#endif + return imported_module; +} +#endif +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) { + Py_ssize_t i, nparts; + nparts = PyTuple_GET_SIZE(parts_tuple); + for (i=1; i < nparts && module; i++) { + PyObject *part, *submodule; +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + part = PyTuple_GET_ITEM(parts_tuple, i); +#else + part = PySequence_ITEM(parts_tuple, i); +#endif + submodule = __Pyx_PyObject_GetAttrStrNoError(module, part); +#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) + Py_DECREF(part); +#endif + Py_DECREF(module); + module = submodule; + } + if (unlikely(!module)) { + return __Pyx__ImportDottedModule_Error(name, parts_tuple, i); + } + return module; +} +#endif +static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) { +#if PY_MAJOR_VERSION < 3 + PyObject *module, *from_list, *star = __pyx_n_s__3; + CYTHON_UNUSED_VAR(parts_tuple); + from_list = PyList_New(1); + if (unlikely(!from_list)) + return NULL; + Py_INCREF(star); + PyList_SET_ITEM(from_list, 0, star); + module = __Pyx_Import(name, from_list, 0); + Py_DECREF(from_list); + return module; +#else + PyObject *imported_module; + PyObject *module = __Pyx_Import(name, NULL, 0); + if (!parts_tuple || unlikely(!module)) + return module; + imported_module = __Pyx__ImportDottedModule_Lookup(name); + if (likely(imported_module)) { + Py_DECREF(module); + return imported_module; + } + PyErr_Clear(); + return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); +#endif +} +static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1 + PyObject *module = __Pyx__ImportDottedModule_Lookup(name); + if (likely(module)) { + PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec); + if (likely(spec)) { + PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing); + if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) { + Py_DECREF(spec); + spec = NULL; + } + Py_XDECREF(unsafe); + } + if (likely(!spec)) { + PyErr_Clear(); + return module; + } + Py_DECREF(spec); + Py_DECREF(module); + } else if (PyErr_Occurred()) { + PyErr_Clear(); + } +#endif + return __Pyx__ImportDottedModule(name, parts_tuple); +} + +/* ImportDottedModuleRelFirst */ + static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple) { + PyObject *module; + PyObject *from_list = NULL; +#if PY_MAJOR_VERSION < 3 + PyObject *star = __pyx_n_s__3; + from_list = PyList_New(1); + if (unlikely(!from_list)) + return NULL; + Py_INCREF(star); + PyList_SET_ITEM(from_list, 0, star); +#endif + module = __Pyx_Import(name, from_list, -1); + Py_XDECREF(from_list); + if (module) { + #if PY_MAJOR_VERSION >= 3 + if (parts_tuple) { + module = __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); + } + #endif + return module; + } + if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) + return NULL; + PyErr_Clear(); + return __Pyx_ImportDottedModule(name, parts_tuple); +} + +/* FixUpExtensionType */ + #if CYTHON_USE_TYPE_SPECS +static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) { +#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + CYTHON_UNUSED_VAR(spec); + CYTHON_UNUSED_VAR(type); +#else + const PyType_Slot *slot = spec->slots; + while (slot && slot->slot && slot->slot != Py_tp_members) + slot++; + if (slot && slot->slot == Py_tp_members) { + int changed = 0; +#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON) + const +#endif + PyMemberDef *memb = (PyMemberDef*) slot->pfunc; + while (memb && memb->name) { + if (memb->name[0] == '_' && memb->name[1] == '_') { +#if PY_VERSION_HEX < 0x030900b1 + if (strcmp(memb->name, "__weaklistoffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); + type->tp_weaklistoffset = memb->offset; + changed = 1; + } + else if (strcmp(memb->name, "__dictoffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); + type->tp_dictoffset = memb->offset; + changed = 1; + } +#if CYTHON_METH_FASTCALL + else if (strcmp(memb->name, "__vectorcalloffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); +#if PY_VERSION_HEX >= 0x030800b4 + type->tp_vectorcall_offset = memb->offset; +#else + type->tp_print = (printfunc) memb->offset; +#endif + changed = 1; + } +#endif +#else + if ((0)); +#endif +#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON + else if (strcmp(memb->name, "__module__") == 0) { + PyObject *descr; + assert(memb->type == T_OBJECT); + assert(memb->flags == 0 || memb->flags == READONLY); + descr = PyDescr_NewMember(type, memb); + if (unlikely(!descr)) + return -1; + if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) { + Py_DECREF(descr); + return -1; + } + Py_DECREF(descr); + changed = 1; + } +#endif + } + memb++; + } + if (changed) + PyType_Modified(type); + } +#endif + return 0; +} +#endif + +/* FetchSharedCythonModule */ + static PyObject *__Pyx_FetchSharedCythonABIModule(void) { + return __Pyx_PyImport_AddModuleRef((char*) __PYX_ABI_MODULE_NAME); +} + +/* FetchCommonType */ + static int __Pyx_VerifyCachedType(PyObject *cached_type, + const char *name, + Py_ssize_t basicsize, + Py_ssize_t expected_basicsize) { + if (!PyType_Check(cached_type)) { + PyErr_Format(PyExc_TypeError, + "Shared Cython type %.200s is not a type object", name); + return -1; + } + if (basicsize != expected_basicsize) { + PyErr_Format(PyExc_TypeError, + "Shared Cython type %.200s has the wrong size, try recompiling", + name); + return -1; + } + return 0; +} +#if !CYTHON_USE_TYPE_SPECS +static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) { + PyObject* abi_module; + const char* object_name; + PyTypeObject *cached_type = NULL; + abi_module = __Pyx_FetchSharedCythonABIModule(); + if (!abi_module) return NULL; + object_name = strrchr(type->tp_name, '.'); + object_name = object_name ? object_name+1 : type->tp_name; + cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name); + if (cached_type) { + if (__Pyx_VerifyCachedType( + (PyObject *)cached_type, + object_name, + cached_type->tp_basicsize, + type->tp_basicsize) < 0) { + goto bad; + } + goto done; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; + PyErr_Clear(); + if (PyType_Ready(type) < 0) goto bad; + if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0) + goto bad; + Py_INCREF(type); + cached_type = type; +done: + Py_DECREF(abi_module); + return cached_type; +bad: + Py_XDECREF(cached_type); + cached_type = NULL; + goto done; +} +#else +static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) { + PyObject *abi_module, *cached_type = NULL; + const char* object_name = strrchr(spec->name, '.'); + object_name = object_name ? object_name+1 : spec->name; + abi_module = __Pyx_FetchSharedCythonABIModule(); + if (!abi_module) return NULL; + cached_type = PyObject_GetAttrString(abi_module, object_name); + if (cached_type) { + Py_ssize_t basicsize; +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *py_basicsize; + py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__"); + if (unlikely(!py_basicsize)) goto bad; + basicsize = PyLong_AsSsize_t(py_basicsize); + Py_DECREF(py_basicsize); + py_basicsize = 0; + if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; +#else + basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1; +#endif + if (__Pyx_VerifyCachedType( + cached_type, + object_name, + basicsize, + spec->basicsize) < 0) { + goto bad; + } + goto done; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; + PyErr_Clear(); + CYTHON_UNUSED_VAR(module); + cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases); + if (unlikely(!cached_type)) goto bad; + if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad; + if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad; +done: + Py_DECREF(abi_module); + assert(cached_type == NULL || PyType_Check(cached_type)); + return (PyTypeObject *) cached_type; +bad: + Py_XDECREF(cached_type); + cached_type = NULL; + goto done; +} +#endif + +/* PyVectorcallFastCallDict */ + #if CYTHON_METH_FASTCALL +static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) +{ + PyObject *res = NULL; + PyObject *kwnames; + PyObject **newargs; + PyObject **kwvalues; + Py_ssize_t i, pos; + size_t j; + PyObject *key, *value; + unsigned long keys_are_strings; + Py_ssize_t nkw = PyDict_GET_SIZE(kw); + newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0])); + if (unlikely(newargs == NULL)) { + PyErr_NoMemory(); + return NULL; + } + for (j = 0; j < nargs; j++) newargs[j] = args[j]; + kwnames = PyTuple_New(nkw); + if (unlikely(kwnames == NULL)) { + PyMem_Free(newargs); + return NULL; + } + kwvalues = newargs + nargs; + pos = i = 0; + keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; + while (PyDict_Next(kw, &pos, &key, &value)) { + keys_are_strings &= Py_TYPE(key)->tp_flags; + Py_INCREF(key); + Py_INCREF(value); + PyTuple_SET_ITEM(kwnames, i, key); + kwvalues[i] = value; + i++; + } + if (unlikely(!keys_are_strings)) { + PyErr_SetString(PyExc_TypeError, "keywords must be strings"); + goto cleanup; + } + res = vc(func, newargs, nargs, kwnames); +cleanup: + Py_DECREF(kwnames); + for (i = 0; i < nkw; i++) + Py_DECREF(kwvalues[i]); + PyMem_Free(newargs); + return res; +} +static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) +{ + if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) { + return vc(func, args, nargs, NULL); + } + return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw); +} +#endif + +/* CythonFunctionShared */ + #if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { + if (__Pyx_CyFunction_Check(func)) { + return PyCFunction_GetFunction(((__pyx_CyFunctionObject*)func)->func) == (PyCFunction) cfunc; + } else if (PyCFunction_Check(func)) { + return PyCFunction_GetFunction(func) == (PyCFunction) cfunc; + } + return 0; +} +#else +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { + return __Pyx_CyOrPyCFunction_Check(func) && __Pyx_CyOrPyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; +} +#endif +static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) { +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + __Pyx_Py_XDECREF_SET( + __Pyx_CyFunction_GetClassObj(f), + ((classobj) ? __Pyx_NewRef(classobj) : NULL)); +#else + __Pyx_Py_XDECREF_SET( + ((PyCMethodObject *) (f))->mm_class, + (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL)); +#endif +} +static PyObject * +__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure) +{ + CYTHON_UNUSED_VAR(closure); + if (unlikely(op->func_doc == NULL)) { +#if CYTHON_COMPILING_IN_LIMITED_API + op->func_doc = PyObject_GetAttrString(op->func, "__doc__"); + if (unlikely(!op->func_doc)) return NULL; +#else + if (((PyCFunctionObject*)op)->m_ml->ml_doc) { +#if PY_MAJOR_VERSION >= 3 + op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); +#else + op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); +#endif + if (unlikely(op->func_doc == NULL)) + return NULL; + } else { + Py_INCREF(Py_None); + return Py_None; + } +#endif + } + Py_INCREF(op->func_doc); + return op->func_doc; +} +static int +__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (value == NULL) { + value = Py_None; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_doc, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(op->func_name == NULL)) { +#if CYTHON_COMPILING_IN_LIMITED_API + op->func_name = PyObject_GetAttrString(op->func, "__name__"); +#elif PY_MAJOR_VERSION >= 3 + op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); +#else + op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); +#endif + if (unlikely(op->func_name == NULL)) + return NULL; + } + Py_INCREF(op->func_name); + return op->func_name; +} +static int +__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__name__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_name, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + Py_INCREF(op->func_qualname); + return op->func_qualname; +} +static int +__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__qualname__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_qualname, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(op->func_dict == NULL)) { + op->func_dict = PyDict_New(); + if (unlikely(op->func_dict == NULL)) + return NULL; + } + Py_INCREF(op->func_dict); + return op->func_dict; +} +static int +__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(value == NULL)) { + PyErr_SetString(PyExc_TypeError, + "function's dictionary may not be deleted"); + return -1; + } + if (unlikely(!PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "setting function's dictionary to a non-dict"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_dict, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + Py_INCREF(op->func_globals); + return op->func_globals; +} +static PyObject * +__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(op); + CYTHON_UNUSED_VAR(context); + Py_INCREF(Py_None); + return Py_None; +} +static PyObject * +__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context) +{ + PyObject* result = (op->func_code) ? op->func_code : Py_None; + CYTHON_UNUSED_VAR(context); + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) { + int result = 0; + PyObject *res = op->defaults_getter((PyObject *) op); + if (unlikely(!res)) + return -1; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + op->defaults_tuple = PyTuple_GET_ITEM(res, 0); + Py_INCREF(op->defaults_tuple); + op->defaults_kwdict = PyTuple_GET_ITEM(res, 1); + Py_INCREF(op->defaults_kwdict); + #else + op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0); + if (unlikely(!op->defaults_tuple)) result = -1; + else { + op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1); + if (unlikely(!op->defaults_kwdict)) result = -1; + } + #endif + Py_DECREF(res); + return result; +} +static int +__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value) { + value = Py_None; + } else if (unlikely(value != Py_None && !PyTuple_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__defaults__ must be set to a tuple object"); + return -1; + } + PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not " + "currently affect the values used in function calls", 1); + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->defaults_tuple, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->defaults_tuple; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + if (op->defaults_getter) { + if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; + result = op->defaults_tuple; + } else { + result = Py_None; + } + } + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value) { + value = Py_None; + } else if (unlikely(value != Py_None && !PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__kwdefaults__ must be set to a dict object"); + return -1; + } + PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not " + "currently affect the values used in function calls", 1); + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->defaults_kwdict; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + if (op->defaults_getter) { + if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; + result = op->defaults_kwdict; + } else { + result = Py_None; + } + } + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value || value == Py_None) { + value = NULL; + } else if (unlikely(!PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__annotations__ must be set to a dict object"); + return -1; + } + Py_XINCREF(value); + __Pyx_Py_XDECREF_SET(op->func_annotations, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->func_annotations; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + result = PyDict_New(); + if (unlikely(!result)) return NULL; + op->func_annotations = result; + } + Py_INCREF(result); + return result; +} +static PyObject * +__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) { + int is_coroutine; + CYTHON_UNUSED_VAR(context); + if (op->func_is_coroutine) { + return __Pyx_NewRef(op->func_is_coroutine); + } + is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE; +#if PY_VERSION_HEX >= 0x03050000 + if (is_coroutine) { + PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine; + fromlist = PyList_New(1); + if (unlikely(!fromlist)) return NULL; + Py_INCREF(marker); +#if CYTHON_ASSUME_SAFE_MACROS + PyList_SET_ITEM(fromlist, 0, marker); +#else + if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) { + Py_DECREF(marker); + Py_DECREF(fromlist); + return NULL; + } +#endif + module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0); + Py_DECREF(fromlist); + if (unlikely(!module)) goto ignore; + op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker); + Py_DECREF(module); + if (likely(op->func_is_coroutine)) { + return __Pyx_NewRef(op->func_is_coroutine); + } +ignore: + PyErr_Clear(); + } +#endif + op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine); + return __Pyx_NewRef(op->func_is_coroutine); +} +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject * +__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) { + CYTHON_UNUSED_VAR(context); + return PyObject_GetAttrString(op->func, "__module__"); +} +static int +__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + return PyObject_SetAttrString(op->func, "__module__", value); +} +#endif +static PyGetSetDef __pyx_CyFunction_getsets[] = { + {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, + {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, + {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, + {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, + {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0}, + {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, + {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, + {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, + {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, + {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, + {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, + {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, + {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, + {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, + {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, + {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0}, + {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, + {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0}, +#if CYTHON_COMPILING_IN_LIMITED_API + {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0}, +#endif + {0, 0, 0, 0, 0} +}; +static PyMemberDef __pyx_CyFunction_members[] = { +#if !CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0}, +#endif +#if CYTHON_USE_TYPE_SPECS + {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0}, +#if CYTHON_METH_FASTCALL +#if CYTHON_BACKPORT_VECTORCALL + {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0}, +#else +#if !CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0}, +#endif +#endif +#endif +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0}, +#else + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0}, +#endif +#endif + {0, 0, 0, 0, 0} +}; +static PyObject * +__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args) +{ + CYTHON_UNUSED_VAR(args); +#if PY_MAJOR_VERSION >= 3 + Py_INCREF(m->func_qualname); + return m->func_qualname; +#else + return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name); +#endif +} +static PyMethodDef __pyx_CyFunction_methods[] = { + {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0}, + {0, 0, 0, 0} +}; +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API +#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist) +#else +#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist) +#endif +static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname, + PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { +#if !CYTHON_COMPILING_IN_LIMITED_API + PyCFunctionObject *cf = (PyCFunctionObject*) op; +#endif + if (unlikely(op == NULL)) + return NULL; +#if CYTHON_COMPILING_IN_LIMITED_API + op->func = PyCFunction_NewEx(ml, (PyObject*)op, module); + if (unlikely(!op->func)) return NULL; +#endif + op->flags = flags; + __Pyx_CyFunction_weakreflist(op) = NULL; +#if !CYTHON_COMPILING_IN_LIMITED_API + cf->m_ml = ml; + cf->m_self = (PyObject *) op; +#endif + Py_XINCREF(closure); + op->func_closure = closure; +#if !CYTHON_COMPILING_IN_LIMITED_API + Py_XINCREF(module); + cf->m_module = module; +#endif + op->func_dict = NULL; + op->func_name = NULL; + Py_INCREF(qualname); + op->func_qualname = qualname; + op->func_doc = NULL; +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + op->func_classobj = NULL; +#else + ((PyCMethodObject*)op)->mm_class = NULL; +#endif + op->func_globals = globals; + Py_INCREF(op->func_globals); + Py_XINCREF(code); + op->func_code = code; + op->defaults_pyobjects = 0; + op->defaults_size = 0; + op->defaults = NULL; + op->defaults_tuple = NULL; + op->defaults_kwdict = NULL; + op->defaults_getter = NULL; + op->func_annotations = NULL; + op->func_is_coroutine = NULL; +#if CYTHON_METH_FASTCALL + switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { + case METH_NOARGS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS; + break; + case METH_O: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O; + break; + case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD; + break; + case METH_FASTCALL | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS; + break; + case METH_VARARGS | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = NULL; + break; + default: + PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); + Py_DECREF(op); + return NULL; + } +#endif + return (PyObject *) op; +} +static int +__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m) +{ + Py_CLEAR(m->func_closure); +#if CYTHON_COMPILING_IN_LIMITED_API + Py_CLEAR(m->func); +#else + Py_CLEAR(((PyCFunctionObject*)m)->m_module); +#endif + Py_CLEAR(m->func_dict); + Py_CLEAR(m->func_name); + Py_CLEAR(m->func_qualname); + Py_CLEAR(m->func_doc); + Py_CLEAR(m->func_globals); + Py_CLEAR(m->func_code); +#if !CYTHON_COMPILING_IN_LIMITED_API +#if PY_VERSION_HEX < 0x030900B1 + Py_CLEAR(__Pyx_CyFunction_GetClassObj(m)); +#else + { + PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class; + ((PyCMethodObject *) (m))->mm_class = NULL; + Py_XDECREF(cls); + } +#endif +#endif + Py_CLEAR(m->defaults_tuple); + Py_CLEAR(m->defaults_kwdict); + Py_CLEAR(m->func_annotations); + Py_CLEAR(m->func_is_coroutine); + if (m->defaults) { + PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); + int i; + for (i = 0; i < m->defaults_pyobjects; i++) + Py_XDECREF(pydefaults[i]); + PyObject_Free(m->defaults); + m->defaults = NULL; + } + return 0; +} +static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m) +{ + if (__Pyx_CyFunction_weakreflist(m) != NULL) + PyObject_ClearWeakRefs((PyObject *) m); + __Pyx_CyFunction_clear(m); + __Pyx_PyHeapTypeObject_GC_Del(m); +} +static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m) +{ + PyObject_GC_UnTrack(m); + __Pyx__CyFunction_dealloc(m); +} +static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg) +{ + Py_VISIT(m->func_closure); +#if CYTHON_COMPILING_IN_LIMITED_API + Py_VISIT(m->func); +#else + Py_VISIT(((PyCFunctionObject*)m)->m_module); +#endif + Py_VISIT(m->func_dict); + Py_VISIT(m->func_name); + Py_VISIT(m->func_qualname); + Py_VISIT(m->func_doc); + Py_VISIT(m->func_globals); + Py_VISIT(m->func_code); +#if !CYTHON_COMPILING_IN_LIMITED_API + Py_VISIT(__Pyx_CyFunction_GetClassObj(m)); +#endif + Py_VISIT(m->defaults_tuple); + Py_VISIT(m->defaults_kwdict); + Py_VISIT(m->func_is_coroutine); + if (m->defaults) { + PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); + int i; + for (i = 0; i < m->defaults_pyobjects; i++) + Py_VISIT(pydefaults[i]); + } + return 0; +} +static PyObject* +__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) +{ +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_FromFormat("", + op->func_qualname, (void *)op); +#else + return PyString_FromFormat("", + PyString_AsString(op->func_qualname), (void *)op); +#endif +} +static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) { +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *f = ((__pyx_CyFunctionObject*)func)->func; + PyObject *py_name = NULL; + PyCFunction meth; + int flags; + meth = PyCFunction_GetFunction(f); + if (unlikely(!meth)) return NULL; + flags = PyCFunction_GetFlags(f); + if (unlikely(flags < 0)) return NULL; +#else + PyCFunctionObject* f = (PyCFunctionObject*)func; + PyCFunction meth = f->m_ml->ml_meth; + int flags = f->m_ml->ml_flags; +#endif + Py_ssize_t size; + switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) { + case METH_VARARGS: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) + return (*meth)(self, arg); + break; + case METH_VARARGS | METH_KEYWORDS: + return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw); + case METH_NOARGS: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) { +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(arg); +#else + size = PyTuple_Size(arg); + if (unlikely(size < 0)) return NULL; +#endif + if (likely(size == 0)) + return (*meth)(self, NULL); +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, + "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + py_name, size); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + f->m_ml->ml_name, size); +#endif + return NULL; + } + break; + case METH_O: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) { +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(arg); +#else + size = PyTuple_Size(arg); + if (unlikely(size < 0)) return NULL; +#endif + if (likely(size == 1)) { + PyObject *result, *arg0; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + arg0 = PyTuple_GET_ITEM(arg, 0); + #else + arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL; + #endif + result = (*meth)(self, arg0); + #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) + Py_DECREF(arg0); + #endif + return result; + } +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, + "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + py_name, size); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + f->m_ml->ml_name, size); +#endif + return NULL; + } + break; + default: + PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); + return NULL; + } +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments", + py_name); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", + f->m_ml->ml_name); +#endif + return NULL; +} +static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *self, *result; +#if CYTHON_COMPILING_IN_LIMITED_API + self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func); + if (unlikely(!self) && PyErr_Occurred()) return NULL; +#else + self = ((PyCFunctionObject*)func)->m_self; +#endif + result = __Pyx_CyFunction_CallMethod(func, self, arg, kw); + return result; +} +static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) { + PyObject *result; + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; +#if CYTHON_METH_FASTCALL + __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc); + if (vc) { +#if CYTHON_ASSUME_SAFE_MACROS + return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw); +#else + (void) &__Pyx_PyVectorcall_FastCallDict; + return PyVectorcall_Call(func, args, kw); +#endif + } +#endif + if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { + Py_ssize_t argc; + PyObject *new_args; + PyObject *self; +#if CYTHON_ASSUME_SAFE_MACROS + argc = PyTuple_GET_SIZE(args); +#else + argc = PyTuple_Size(args); + if (unlikely(!argc) < 0) return NULL; +#endif + new_args = PyTuple_GetSlice(args, 1, argc); + if (unlikely(!new_args)) + return NULL; + self = PyTuple_GetItem(args, 0); + if (unlikely(!self)) { + Py_DECREF(new_args); +#if PY_MAJOR_VERSION > 2 + PyErr_Format(PyExc_TypeError, + "unbound method %.200S() needs an argument", + cyfunc->func_qualname); +#else + PyErr_SetString(PyExc_TypeError, + "unbound method needs an argument"); +#endif + return NULL; + } + result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw); + Py_DECREF(new_args); + } else { + result = __Pyx_CyFunction_Call(func, args, kw); + } + return result; +} +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames) +{ + int ret = 0; + if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { + if (unlikely(nargs < 1)) { + PyErr_Format(PyExc_TypeError, "%.200s() needs an argument", + ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); + return -1; + } + ret = 1; + } + if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); + return -1; + } + return ret; +} +static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + if (unlikely(nargs != 0)) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + def->ml_name, nargs); + return NULL; + } + return def->ml_meth(self, NULL); +} +static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + if (unlikely(nargs != 1)) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + def->ml_name, nargs); + return NULL; + } + return def->ml_meth(self, args[0]); +} +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + return ((__Pyx_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames); +} +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; + PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc); +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames); +} +#endif +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_CyFunctionType_slots[] = { + {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc}, + {Py_tp_repr, (void *)__Pyx_CyFunction_repr}, + {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod}, + {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse}, + {Py_tp_clear, (void *)__Pyx_CyFunction_clear}, + {Py_tp_methods, (void *)__pyx_CyFunction_methods}, + {Py_tp_members, (void *)__pyx_CyFunction_members}, + {Py_tp_getset, (void *)__pyx_CyFunction_getsets}, + {Py_tp_descr_get, (void *)__Pyx_PyMethod_New}, + {0, 0}, +}; +static PyType_Spec __pyx_CyFunctionType_spec = { + __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", + sizeof(__pyx_CyFunctionObject), + 0, +#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR + Py_TPFLAGS_METHOD_DESCRIPTOR | +#endif +#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL) + _Py_TPFLAGS_HAVE_VECTORCALL | +#endif + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + __pyx_CyFunctionType_slots +}; +#else +static PyTypeObject __pyx_CyFunctionType_type = { + PyVarObject_HEAD_INIT(0, 0) + __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", + sizeof(__pyx_CyFunctionObject), + 0, + (destructor) __Pyx_CyFunction_dealloc, +#if !CYTHON_METH_FASTCALL + 0, +#elif CYTHON_BACKPORT_VECTORCALL + (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall), +#else + offsetof(PyCFunctionObject, vectorcall), +#endif + 0, + 0, +#if PY_MAJOR_VERSION < 3 + 0, +#else + 0, +#endif + (reprfunc) __Pyx_CyFunction_repr, + 0, + 0, + 0, + 0, + __Pyx_CyFunction_CallAsMethod, + 0, + 0, + 0, + 0, +#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR + Py_TPFLAGS_METHOD_DESCRIPTOR | +#endif +#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL + _Py_TPFLAGS_HAVE_VECTORCALL | +#endif + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + 0, + (traverseproc) __Pyx_CyFunction_traverse, + (inquiry) __Pyx_CyFunction_clear, + 0, +#if PY_VERSION_HEX < 0x030500A0 + offsetof(__pyx_CyFunctionObject, func_weakreflist), +#else + offsetof(PyCFunctionObject, m_weakreflist), +#endif + 0, + 0, + __pyx_CyFunction_methods, + __pyx_CyFunction_members, + __pyx_CyFunction_getsets, + 0, + 0, + __Pyx_PyMethod_New, + 0, + offsetof(__pyx_CyFunctionObject, func_dict), + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +#if PY_VERSION_HEX >= 0x030400a1 + 0, +#endif +#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, +#endif +#if __PYX_NEED_TP_PRINT_SLOT + 0, +#endif +#if PY_VERSION_HEX >= 0x030C0000 + 0, +#endif +#if PY_VERSION_HEX >= 0x030d00A4 + 0, +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, +#endif +}; +#endif +static int __pyx_CyFunction_init(PyObject *module) { +#if CYTHON_USE_TYPE_SPECS + __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL); +#else + CYTHON_UNUSED_VAR(module); + __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type); +#endif + if (unlikely(__pyx_CyFunctionType == NULL)) { + return -1; + } + return 0; +} +static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults = PyObject_Malloc(size); + if (unlikely(!m->defaults)) + return PyErr_NoMemory(); + memset(m->defaults, 0, size); + m->defaults_pyobjects = pyobjects; + m->defaults_size = size; + return m->defaults; +} +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults_tuple = tuple; + Py_INCREF(tuple); +} +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults_kwdict = dict; + Py_INCREF(dict); +} +static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->func_annotations = dict; + Py_INCREF(dict); +} + +/* CythonFunction */ + static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname, + PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { + PyObject *op = __Pyx_CyFunction_Init( + PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType), + ml, flags, qualname, closure, module, globals, code + ); + if (likely(op)) { + PyObject_GC_Track(op); + } + return op; +} + +/* CLineInTraceback */ + #ifndef CYTHON_CLINE_IN_TRACEBACK +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) { + PyObject *use_cline; + PyObject *ptype, *pvalue, *ptraceback; +#if CYTHON_COMPILING_IN_CPYTHON + PyObject **cython_runtime_dict; +#endif + CYTHON_MAYBE_UNUSED_VAR(tstate); + if (unlikely(!__pyx_cython_runtime)) { + return c_line; + } + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); +#if CYTHON_COMPILING_IN_CPYTHON + cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); + if (likely(cython_runtime_dict)) { + __PYX_PY_DICT_LOOKUP_IF_MODIFIED( + use_cline, *cython_runtime_dict, + __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback)) + } else +#endif + { + PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); + if (use_cline_obj) { + use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; + Py_DECREF(use_cline_obj); + } else { + PyErr_Clear(); + use_cline = NULL; + } + } + if (!use_cline) { + c_line = 0; + (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); + } + else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) { + c_line = 0; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + return c_line; +} +#endif + +/* CodeObjectCache */ + #if !CYTHON_COMPILING_IN_LIMITED_API +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { + int start = 0, mid = 0, end = count - 1; + if (end >= 0 && code_line > entries[end].code_line) { + return count; + } + while (start < end) { + mid = start + (end - start) / 2; + if (code_line < entries[mid].code_line) { + end = mid; + } else if (code_line > entries[mid].code_line) { + start = mid + 1; + } else { + return mid; + } + } + if (code_line <= entries[mid].code_line) { + return mid; + } else { + return mid + 1; + } +} +static PyCodeObject *__pyx_find_code_object(int code_line) { + PyCodeObject* code_object; + int pos; + if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { + return NULL; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { + return NULL; + } + code_object = __pyx_code_cache.entries[pos].code_object; + Py_INCREF(code_object); + return code_object; +} +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { + int pos, i; + __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; + if (unlikely(!code_line)) { + return; + } + if (unlikely(!entries)) { + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); + if (likely(entries)) { + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = 64; + __pyx_code_cache.count = 1; + entries[0].code_line = code_line; + entries[0].code_object = code_object; + Py_INCREF(code_object); + } + return; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { + PyCodeObject* tmp = entries[pos].code_object; + entries[pos].code_object = code_object; + Py_DECREF(tmp); + return; + } + if (__pyx_code_cache.count == __pyx_code_cache.max_count) { + int new_max = __pyx_code_cache.max_count + 64; + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( + __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry)); + if (unlikely(!entries)) { + return; + } + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = new_max; + } + for (i=__pyx_code_cache.count; i>pos; i--) { + entries[i] = entries[i-1]; + } + entries[pos].code_line = code_line; + entries[pos].code_object = code_object; + __pyx_code_cache.count++; + Py_INCREF(code_object); +} +#endif + +/* AddTraceback */ + #include "compile.h" +#include "frameobject.h" +#include "traceback.h" +#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict, + PyObject *firstlineno, PyObject *name) { + PyObject *replace = NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL; + replace = PyObject_GetAttrString(code, "replace"); + if (likely(replace)) { + PyObject *result; + result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict); + Py_DECREF(replace); + return result; + } + PyErr_Clear(); + #if __PYX_LIMITED_VERSION_HEX < 0x030780000 + { + PyObject *compiled = NULL, *result = NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL; + compiled = Py_CompileString( + "out = type(code)(\n" + " code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n" + " code.co_flags, code.co_code, code.co_consts, code.co_names,\n" + " code.co_varnames, code.co_filename, co_name, co_firstlineno,\n" + " code.co_lnotab)\n", "", Py_file_input); + if (!compiled) return NULL; + result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict); + Py_DECREF(compiled); + if (!result) PyErr_Print(); + Py_DECREF(result); + result = PyDict_GetItemString(scratch_dict, "out"); + if (result) Py_INCREF(result); + return result; + } + #else + return NULL; + #endif +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL; + PyObject *replace = NULL, *getframe = NULL, *frame = NULL; + PyObject *exc_type, *exc_value, *exc_traceback; + int success = 0; + if (c_line) { + (void) __pyx_cfilenm; + (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line); + } + PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); + code_object = Py_CompileString("_getframe()", filename, Py_eval_input); + if (unlikely(!code_object)) goto bad; + py_py_line = PyLong_FromLong(py_line); + if (unlikely(!py_py_line)) goto bad; + py_funcname = PyUnicode_FromString(funcname); + if (unlikely(!py_funcname)) goto bad; + dict = PyDict_New(); + if (unlikely(!dict)) goto bad; + { + PyObject *old_code_object = code_object; + code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname); + Py_DECREF(old_code_object); + } + if (unlikely(!code_object)) goto bad; + getframe = PySys_GetObject("_getframe"); + if (unlikely(!getframe)) goto bad; + if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad; + frame = PyEval_EvalCode(code_object, dict, dict); + if (unlikely(!frame) || frame == Py_None) goto bad; + success = 1; + bad: + PyErr_Restore(exc_type, exc_value, exc_traceback); + Py_XDECREF(code_object); + Py_XDECREF(py_py_line); + Py_XDECREF(py_funcname); + Py_XDECREF(dict); + Py_XDECREF(replace); + if (success) { + PyTraceBack_Here( + (struct _frame*)frame); + } + Py_XDECREF(frame); +} +#else +static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( + const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = NULL; + PyObject *py_funcname = NULL; + #if PY_MAJOR_VERSION < 3 + PyObject *py_srcfile = NULL; + py_srcfile = PyString_FromString(filename); + if (!py_srcfile) goto bad; + #endif + if (c_line) { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + if (!py_funcname) goto bad; + #else + py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + if (!py_funcname) goto bad; + funcname = PyUnicode_AsUTF8(py_funcname); + if (!funcname) goto bad; + #endif + } + else { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromString(funcname); + if (!py_funcname) goto bad; + #endif + } + #if PY_MAJOR_VERSION < 3 + py_code = __Pyx_PyCode_New( + 0, + 0, + 0, + 0, + 0, + 0, + __pyx_empty_bytes, /*PyObject *code,*/ + __pyx_empty_tuple, /*PyObject *consts,*/ + __pyx_empty_tuple, /*PyObject *names,*/ + __pyx_empty_tuple, /*PyObject *varnames,*/ + __pyx_empty_tuple, /*PyObject *freevars,*/ + __pyx_empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + py_line, + __pyx_empty_bytes /*PyObject *lnotab*/ + ); + Py_DECREF(py_srcfile); + #else + py_code = PyCode_NewEmpty(filename, funcname, py_line); + #endif + Py_XDECREF(py_funcname); + return py_code; +bad: + Py_XDECREF(py_funcname); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_srcfile); + #endif + return NULL; +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject *ptype, *pvalue, *ptraceback; + if (c_line) { + c_line = __Pyx_CLineForTraceback(tstate, c_line); + } + py_code = __pyx_find_code_object(c_line ? -c_line : py_line); + if (!py_code) { + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); + py_code = __Pyx_CreateCodeObjectForTraceback( + funcname, c_line, py_line, filename); + if (!py_code) { + /* If the code object creation fails, then we should clear the + fetched exception references and propagate the new exception */ + Py_XDECREF(ptype); + Py_XDECREF(pvalue); + Py_XDECREF(ptraceback); + goto bad; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); + } + py_frame = PyFrame_New( + tstate, /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + __Pyx_PyFrame_SetLineNumber(py_frame, py_line); + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} +#endif + +#if PY_MAJOR_VERSION < 3 +static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) { + __Pyx_TypeName obj_type_name; + if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags); + if (__Pyx_TypeCheck(obj, __pyx_ptype_7cpython_5array_array)) return __pyx_pw_7cpython_5array_5array_1__getbuffer__(obj, view, flags); + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "'" __Pyx_FMT_TYPENAME "' does not have the buffer interface", + obj_type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return -1; +} +static void __Pyx_ReleaseBuffer(Py_buffer *view) { + PyObject *obj = view->obj; + if (!obj) return; + if (PyObject_CheckBuffer(obj)) { + PyBuffer_Release(view); + return; + } + if ((0)) {} + else if (__Pyx_TypeCheck(obj, __pyx_ptype_7cpython_5array_array)) __pyx_pw_7cpython_5array_5array_3__releasebuffer__(obj, view); + view->obj = NULL; + Py_DECREF(obj); +} +#endif + + + /* CIntFromPyVerify */ + #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) +#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) +#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ + {\ + func_type value = func_value;\ + if (sizeof(target_type) < sizeof(func_type)) {\ + if (unlikely(value != (func_type) (target_type) value)) {\ + func_type zero = 0;\ + if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ + return (target_type) -1;\ + if (is_unsigned && unlikely(value < zero))\ + goto raise_neg_overflow;\ + else\ + goto raise_overflow;\ + }\ + }\ + return (target_type) value;\ + } + +/* Declarations */ + #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #ifdef __cplusplus + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + return ::std::complex< float >(x, y); + } + #else + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + return x + y*(__pyx_t_float_complex)_Complex_I; + } + #endif +#else + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + __pyx_t_float_complex z; + z.real = x; + z.imag = y; + return z; + } +#endif + +/* Arithmetic */ + #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) +#else + static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + return (a.real == b.real) && (a.imag == b.imag); + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real + b.real; + z.imag = a.imag + b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real - b.real; + z.imag = a.imag - b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real * b.real - a.imag * b.imag; + z.imag = a.real * b.imag + a.imag * b.real; + return z; + } + #if 1 + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + if (b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real); + } else if (fabsf(b.real) >= fabsf(b.imag)) { + if (b.real == 0 && b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.imag); + } else { + float r = b.imag / b.real; + float s = (float)(1.0) / (b.real + b.imag * r); + return __pyx_t_float_complex_from_parts( + (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); + } + } else { + float r = b.real / b.imag; + float s = (float)(1.0) / (b.imag + b.real * r); + return __pyx_t_float_complex_from_parts( + (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); + } + } + #else + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + if (b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real); + } else { + float denom = b.real * b.real + b.imag * b.imag; + return __pyx_t_float_complex_from_parts( + (a.real * b.real + a.imag * b.imag) / denom, + (a.imag * b.real - a.real * b.imag) / denom); + } + } + #endif + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex a) { + __pyx_t_float_complex z; + z.real = -a.real; + z.imag = -a.imag; + return z; + } + static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex a) { + return (a.real == 0) && (a.imag == 0); + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex a) { + __pyx_t_float_complex z; + z.real = a.real; + z.imag = -a.imag; + return z; + } + #if 1 + static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex z) { + #if !defined(HAVE_HYPOT) || defined(_MSC_VER) + return sqrtf(z.real*z.real + z.imag*z.imag); + #else + return hypotf(z.real, z.imag); + #endif + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + float r, lnr, theta, z_r, z_theta; + if (b.imag == 0 && b.real == (int)b.real) { + if (b.real < 0) { + float denom = a.real * a.real + a.imag * a.imag; + a.real = a.real / denom; + a.imag = -a.imag / denom; + b.real = -b.real; + } + switch ((int)b.real) { + case 0: + z.real = 1; + z.imag = 0; + return z; + case 1: + return a; + case 2: + return __Pyx_c_prod_float(a, a); + case 3: + z = __Pyx_c_prod_float(a, a); + return __Pyx_c_prod_float(z, a); + case 4: + z = __Pyx_c_prod_float(a, a); + return __Pyx_c_prod_float(z, z); + } + } + if (a.imag == 0) { + if (a.real == 0) { + return a; + } else if ((b.imag == 0) && (a.real >= 0)) { + z.real = powf(a.real, b.real); + z.imag = 0; + return z; + } else if (a.real > 0) { + r = a.real; + theta = 0; + } else { + r = -a.real; + theta = atan2f(0.0, -1.0); + } + } else { + r = __Pyx_c_abs_float(a); + theta = atan2f(a.imag, a.real); + } + lnr = logf(r); + z_r = expf(lnr * b.real - theta * b.imag); + z_theta = theta * b.real + lnr * b.imag; + z.real = z_r * cosf(z_theta); + z.imag = z_r * sinf(z_theta); + return z; + } + #endif +#endif + +/* Declarations */ + #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #ifdef __cplusplus + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + return ::std::complex< double >(x, y); + } + #else + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + return x + y*(__pyx_t_double_complex)_Complex_I; + } + #endif +#else + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + __pyx_t_double_complex z; + z.real = x; + z.imag = y; + return z; + } +#endif + +/* Arithmetic */ + #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) +#else + static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + return (a.real == b.real) && (a.imag == b.imag); + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real + b.real; + z.imag = a.imag + b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real - b.real; + z.imag = a.imag - b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real * b.real - a.imag * b.imag; + z.imag = a.real * b.imag + a.imag * b.real; + return z; + } + #if 1 + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + if (b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real); + } else if (fabs(b.real) >= fabs(b.imag)) { + if (b.real == 0 && b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.imag); + } else { + double r = b.imag / b.real; + double s = (double)(1.0) / (b.real + b.imag * r); + return __pyx_t_double_complex_from_parts( + (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); + } + } else { + double r = b.real / b.imag; + double s = (double)(1.0) / (b.imag + b.real * r); + return __pyx_t_double_complex_from_parts( + (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); + } + } + #else + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + if (b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real); + } else { + double denom = b.real * b.real + b.imag * b.imag; + return __pyx_t_double_complex_from_parts( + (a.real * b.real + a.imag * b.imag) / denom, + (a.imag * b.real - a.real * b.imag) / denom); + } + } + #endif + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex a) { + __pyx_t_double_complex z; + z.real = -a.real; + z.imag = -a.imag; + return z; + } + static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex a) { + return (a.real == 0) && (a.imag == 0); + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex a) { + __pyx_t_double_complex z; + z.real = a.real; + z.imag = -a.imag; + return z; + } + #if 1 + static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex z) { + #if !defined(HAVE_HYPOT) || defined(_MSC_VER) + return sqrt(z.real*z.real + z.imag*z.imag); + #else + return hypot(z.real, z.imag); + #endif + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + double r, lnr, theta, z_r, z_theta; + if (b.imag == 0 && b.real == (int)b.real) { + if (b.real < 0) { + double denom = a.real * a.real + a.imag * a.imag; + a.real = a.real / denom; + a.imag = -a.imag / denom; + b.real = -b.real; + } + switch ((int)b.real) { + case 0: + z.real = 1; + z.imag = 0; + return z; + case 1: + return a; + case 2: + return __Pyx_c_prod_double(a, a); + case 3: + z = __Pyx_c_prod_double(a, a); + return __Pyx_c_prod_double(z, a); + case 4: + z = __Pyx_c_prod_double(a, a); + return __Pyx_c_prod_double(z, z); + } + } + if (a.imag == 0) { + if (a.real == 0) { + return a; + } else if ((b.imag == 0) && (a.real >= 0)) { + z.real = pow(a.real, b.real); + z.imag = 0; + return z; + } else if (a.real > 0) { + r = a.real; + theta = 0; + } else { + r = -a.real; + theta = atan2(0.0, -1.0); + } + } else { + r = __Pyx_c_abs_double(a); + theta = atan2(a.imag, a.real); + } + lnr = log(r); + z_r = exp(lnr * b.real - theta * b.imag); + z_theta = theta * b.real + lnr * b.imag; + z.real = z_r * cos(z_theta); + z.imag = z_r * sin(z_theta); + return z; + } + #endif +#endif + +/* Declarations */ + #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) + #ifdef __cplusplus + static CYTHON_INLINE __pyx_t_long_double_complex __pyx_t_long_double_complex_from_parts(long double x, long double y) { + return ::std::complex< long double >(x, y); + } + #else + static CYTHON_INLINE __pyx_t_long_double_complex __pyx_t_long_double_complex_from_parts(long double x, long double y) { + return x + y*(__pyx_t_long_double_complex)_Complex_I; + } + #endif +#else + static CYTHON_INLINE __pyx_t_long_double_complex __pyx_t_long_double_complex_from_parts(long double x, long double y) { + __pyx_t_long_double_complex z; + z.real = x; + z.imag = y; + return z; + } +#endif + +/* Arithmetic */ + #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) +#else + static CYTHON_INLINE int __Pyx_c_eq_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { + return (a.real == b.real) && (a.imag == b.imag); + } + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_sum_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { + __pyx_t_long_double_complex z; + z.real = a.real + b.real; + z.imag = a.imag + b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_diff_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { + __pyx_t_long_double_complex z; + z.real = a.real - b.real; + z.imag = a.imag - b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_prod_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { + __pyx_t_long_double_complex z; + z.real = a.real * b.real - a.imag * b.imag; + z.imag = a.real * b.imag + a.imag * b.real; + return z; + } + #if 1 + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_quot_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { + if (b.imag == 0) { + return __pyx_t_long_double_complex_from_parts(a.real / b.real, a.imag / b.real); + } else if (fabsl(b.real) >= fabsl(b.imag)) { + if (b.real == 0 && b.imag == 0) { + return __pyx_t_long_double_complex_from_parts(a.real / b.real, a.imag / b.imag); + } else { + long double r = b.imag / b.real; + long double s = (long double)(1.0) / (b.real + b.imag * r); + return __pyx_t_long_double_complex_from_parts( + (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); + } + } else { + long double r = b.real / b.imag; + long double s = (long double)(1.0) / (b.imag + b.real * r); + return __pyx_t_long_double_complex_from_parts( + (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); + } + } + #else + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_quot_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { + if (b.imag == 0) { + return __pyx_t_long_double_complex_from_parts(a.real / b.real, a.imag / b.real); + } else { + long double denom = b.real * b.real + b.imag * b.imag; + return __pyx_t_long_double_complex_from_parts( + (a.real * b.real + a.imag * b.imag) / denom, + (a.imag * b.real - a.real * b.imag) / denom); + } + } + #endif + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_neg_long__double(__pyx_t_long_double_complex a) { + __pyx_t_long_double_complex z; + z.real = -a.real; + z.imag = -a.imag; + return z; + } + static CYTHON_INLINE int __Pyx_c_is_zero_long__double(__pyx_t_long_double_complex a) { + return (a.real == 0) && (a.imag == 0); + } + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_conj_long__double(__pyx_t_long_double_complex a) { + __pyx_t_long_double_complex z; + z.real = a.real; + z.imag = -a.imag; + return z; + } + #if 1 + static CYTHON_INLINE long double __Pyx_c_abs_long__double(__pyx_t_long_double_complex z) { + #if !defined(HAVE_HYPOT) || defined(_MSC_VER) + return sqrtl(z.real*z.real + z.imag*z.imag); + #else + return hypotl(z.real, z.imag); + #endif + } + static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_pow_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { + __pyx_t_long_double_complex z; + long double r, lnr, theta, z_r, z_theta; + if (b.imag == 0 && b.real == (int)b.real) { + if (b.real < 0) { + long double denom = a.real * a.real + a.imag * a.imag; + a.real = a.real / denom; + a.imag = -a.imag / denom; + b.real = -b.real; + } + switch ((int)b.real) { + case 0: + z.real = 1; + z.imag = 0; + return z; + case 1: + return a; + case 2: + return __Pyx_c_prod_long__double(a, a); + case 3: + z = __Pyx_c_prod_long__double(a, a); + return __Pyx_c_prod_long__double(z, a); + case 4: + z = __Pyx_c_prod_long__double(a, a); + return __Pyx_c_prod_long__double(z, z); + } + } + if (a.imag == 0) { + if (a.real == 0) { + return a; + } else if ((b.imag == 0) && (a.real >= 0)) { + z.real = powl(a.real, b.real); + z.imag = 0; + return z; + } else if (a.real > 0) { + r = a.real; + theta = 0; + } else { + r = -a.real; + theta = atan2l(0.0, -1.0); + } + } else { + r = __Pyx_c_abs_long__double(a); + theta = atan2l(a.imag, a.real); + } + lnr = logl(r); + z_r = expl(lnr * b.real - theta * b.imag); + z_theta = theta * b.real + lnr * b.imag; + z.real = z_r * cosl(z_theta); + z.imag = z_r * sinl(z_theta); + return z; + } + #endif +#endif + +/* CIntToPy */ + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(int) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(int) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(int) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(int), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(int)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* CIntFromPy */ + static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(int) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (int) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + int val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (int) -1; + val = __Pyx_PyInt_As_int(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) { + return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) { + return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) { + return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(int) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { + return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(int) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + int val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (int) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (int) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (int) -1; + } else { + stepval = v; + } + v = NULL; + val = (int) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((int) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((int) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (int) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to int"); + return (int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; +} + +/* CIntToPy */ + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(long) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(long) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(long), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* FormatTypeName */ + #if CYTHON_COMPILING_IN_LIMITED_API +static __Pyx_TypeName +__Pyx_PyType_GetName(PyTypeObject* tp) +{ + PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp, + __pyx_n_s_name); + if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) { + PyErr_Clear(); + Py_XDECREF(name); + name = __Pyx_NewRef(__pyx_n_s__11); + } + return name; +} +#endif + +/* CIntFromPy */ + static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(long) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (long) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + long val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (long) -1; + val = __Pyx_PyInt_As_long(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) { + return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) { + return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) { + return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (long) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(long) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { + return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(long) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + long val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (long) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (long) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (long) -1; + } else { + stepval = v; + } + v = NULL; + val = (long) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((long) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((long) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (long) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to long"); + return (long) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; +} + +/* FastTypeChecks */ + #if CYTHON_COMPILING_IN_CPYTHON +static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { + while (a) { + a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*); + if (a == b) + return 1; + } + return b == &PyBaseObject_Type; +} +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (a == b) return 1; + mro = a->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(a, b); +} +static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (cls == a || cls == b) return 1; + mro = cls->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + PyObject *base = PyTuple_GET_ITEM(mro, i); + if (base == (PyObject *)a || base == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b); +} +#if PY_MAJOR_VERSION == 2 +static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { + PyObject *exception, *value, *tb; + int res; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&exception, &value, &tb); + res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + if (!res) { + res = PyObject_IsSubclass(err, exc_type2); + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + } + __Pyx_ErrRestore(exception, value, tb); + return res; +} +#else +static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { + if (exc_type1) { + return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2); + } else { + return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); + } +} +#endif +static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + assert(PyExceptionClass_Check(exc_type)); + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; i= 0x030B00A4 + return Py_Version & ~0xFFUL; +#else + const char* rt_version = Py_GetVersion(); + unsigned long version = 0; + unsigned long factor = 0x01000000UL; + unsigned int digit = 0; + int i = 0; + while (factor) { + while ('0' <= rt_version[i] && rt_version[i] <= '9') { + digit = digit * 10 + (unsigned int) (rt_version[i] - '0'); + ++i; + } + version += factor * digit; + if (rt_version[i] != '.') + break; + digit = 0; + factor >>= 8; + ++i; + } + return version; +#endif +} +static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer) { + const unsigned long MAJOR_MINOR = 0xFFFF0000UL; + if ((rt_version & MAJOR_MINOR) == (ct_version & MAJOR_MINOR)) + return 0; + if (likely(allow_newer && (rt_version & MAJOR_MINOR) > (ct_version & MAJOR_MINOR))) + return 1; + { + char message[200]; + PyOS_snprintf(message, sizeof(message), + "compile time Python version %d.%d " + "of module '%.100s' " + "%s " + "runtime version %d.%d", + (int) (ct_version >> 24), (int) ((ct_version >> 16) & 0xFF), + __Pyx_MODULE_NAME, + (allow_newer) ? "was newer than" : "does not match", + (int) (rt_version >> 24), (int) ((rt_version >> 16) & 0xFF) + ); + return PyErr_WarnEx(NULL, message, 1); + } +} + +/* InitStrings */ + #if PY_MAJOR_VERSION >= 3 +static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) { + if (t.is_unicode | t.is_str) { + if (t.intern) { + *str = PyUnicode_InternFromString(t.s); + } else if (t.encoding) { + *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL); + } else { + *str = PyUnicode_FromStringAndSize(t.s, t.n - 1); + } + } else { + *str = PyBytes_FromStringAndSize(t.s, t.n - 1); + } + if (!*str) + return -1; + if (PyObject_Hash(*str) == -1) + return -1; + return 0; +} +#endif +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { + while (t->p) { + #if PY_MAJOR_VERSION >= 3 + __Pyx_InitString(*t, t->p); + #else + if (t->is_unicode) { + *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); + } else if (t->intern) { + *t->p = PyString_InternFromString(t->s); + } else { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + } + if (!*t->p) + return -1; + if (PyObject_Hash(*t->p) == -1) + return -1; + #endif + ++t; + } + return 0; +} + +#include +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) { + size_t len = strlen(s); + if (unlikely(len > (size_t) PY_SSIZE_T_MAX)) { + PyErr_SetString(PyExc_OverflowError, "byte string is too long"); + return -1; + } + return (Py_ssize_t) len; +} +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { + Py_ssize_t len = __Pyx_ssize_strlen(c_str); + if (unlikely(len < 0)) return NULL; + return __Pyx_PyUnicode_FromStringAndSize(c_str, len); +} +static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char* c_str) { + Py_ssize_t len = __Pyx_ssize_strlen(c_str); + if (unlikely(len < 0)) return NULL; + return PyByteArray_FromStringAndSize(c_str, len); +} +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { + Py_ssize_t ignore; + return __Pyx_PyObject_AsStringAndSize(o, &ignore); +} +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#if !CYTHON_PEP393_ENABLED +static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; + } + } + } +#endif + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +} +#else +static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + if (likely(PyUnicode_IS_ASCII(o))) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } +#else + return PyUnicode_AsUTF8AndSize(o, length); +#endif +} +#endif +#endif +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && +#endif + PyUnicode_Check(o)) { + return __Pyx_PyUnicode_AsStringAndSize(o, length); + } else +#endif +#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + if (PyByteArray_Check(o)) { + *length = PyByteArray_GET_SIZE(o); + return PyByteArray_AS_STRING(o); + } else +#endif + { + char* result; + int r = PyBytes_AsStringAndSize(o, &result, length); + if (unlikely(r < 0)) { + return NULL; + } else { + return result; + } + } +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { + int is_true = x == Py_True; + if (is_true | (x == Py_False) | (x == Py_None)) return is_true; + else return PyObject_IsTrue(x); +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) { + int retval; + if (unlikely(!x)) return -1; + retval = __Pyx_PyObject_IsTrue(x); + Py_DECREF(x); + return retval; +} +static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { + __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result)); +#if PY_MAJOR_VERSION >= 3 + if (PyLong_Check(result)) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "__int__ returned non-int (type " __Pyx_FMT_TYPENAME "). " + "The ability to return an instance of a strict subclass of int is deprecated, " + "and may be removed in a future version of Python.", + result_type_name)) { + __Pyx_DECREF_TypeName(result_type_name); + Py_DECREF(result); + return NULL; + } + __Pyx_DECREF_TypeName(result_type_name); + return result; + } +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")", + type_name, type_name, result_type_name); + __Pyx_DECREF_TypeName(result_type_name); + Py_DECREF(result); + return NULL; +} +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { +#if CYTHON_USE_TYPE_SLOTS + PyNumberMethods *m; +#endif + const char *name = NULL; + PyObject *res = NULL; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x) || PyLong_Check(x))) +#else + if (likely(PyLong_Check(x))) +#endif + return __Pyx_NewRef(x); +#if CYTHON_USE_TYPE_SLOTS + m = Py_TYPE(x)->tp_as_number; + #if PY_MAJOR_VERSION < 3 + if (m && m->nb_int) { + name = "int"; + res = m->nb_int(x); + } + else if (m && m->nb_long) { + name = "long"; + res = m->nb_long(x); + } + #else + if (likely(m && m->nb_int)) { + name = "int"; + res = m->nb_int(x); + } + #endif +#else + if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { + res = PyNumber_Int(x); + } +#endif + if (likely(res)) { +#if PY_MAJOR_VERSION < 3 + if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { +#else + if (unlikely(!PyLong_CheckExact(res))) { +#endif + return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); + } + } + else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + } + return res; +} +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { + Py_ssize_t ival; + PyObject *x; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(b))) { + if (sizeof(Py_ssize_t) >= sizeof(long)) + return PyInt_AS_LONG(b); + else + return PyInt_AsSsize_t(b); + } +#endif + if (likely(PyLong_CheckExact(b))) { + #if CYTHON_USE_PYLONG_INTERNALS + if (likely(__Pyx_PyLong_IsCompact(b))) { + return __Pyx_PyLong_CompactValue(b); + } else { + const digit* digits = __Pyx_PyLong_Digits(b); + const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b); + switch (size) { + case 2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + } + } + #endif + return PyLong_AsSsize_t(b); + } + x = PyNumber_Index(b); + if (!x) return -1; + ival = PyInt_AsSsize_t(x); + Py_DECREF(x); + return ival; +} +static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) { + if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) { + return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o); +#if PY_MAJOR_VERSION < 3 + } else if (likely(PyInt_CheckExact(o))) { + return PyInt_AS_LONG(o); +#endif + } else { + Py_ssize_t ival; + PyObject *x; + x = PyNumber_Index(o); + if (!x) return -1; + ival = PyInt_AsLong(x); + Py_DECREF(x); + return ival; + } +} +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { + return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False); +} +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { + return PyInt_FromSize_t(ival); +} + + +/* #### Code section: utility_code_pragmas_end ### */ +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + + + +/* #### Code section: end ### */ +#endif /* Py_PYTHON_H */ diff --git a/jcvi/assembly/chic.pyx b/jcvi/assembly/chic.pyx new file mode 100644 index 00000000..cd526243 --- /dev/null +++ b/jcvi/assembly/chic.pyx @@ -0,0 +1,105 @@ +#cython: language_level=2, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True + +""" +Cythonized version of score_evaluate() in hic.py. + +Support three versions with different objective functions: +- score_evaluate_M: distance is defined as the distance between mid-points + between contigs. Maximize Sum(n_links / distance). +- score_evaluate_P: distance is defined as the sizes of interleaving contigs + plus the harmonic mean of all link distances. Maximize Sum(n_links / distance). +- score_evaluate_Q: distance is defined as the sizes of interleaving contigs + plus the actual link distances. Maximize Sum(1 / distance) for all links. + For performance consideration, we actually use a histogram to approximate + all link distances. See golden_array() in hic for details. +""" + +from __future__ import division +import numpy as np +cimport numpy as np +cimport cython +from cpython cimport array +import array + + +ctypedef np.int INT +DEF LIMIT = 10000000 +DEF BB = 12 +cdef int *GR = \ + [ 5778, 9349, 15127, 24476, + 39603, 64079, 103682, 167761, + 271443, 439204, 710647, 1149851] + + +def score_evaluate_M(array.array[int] tour, + np.ndarray[INT, ndim=1] tour_sizes=None, + np.ndarray[INT, ndim=2] tour_M=None): + cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - sizes_oo // 2 + + cdef double s = 0.0 + cdef int size = len(tour) + cdef int a, b, ia, ib + cdef int links + cdef double dist + for ia in range(size): + a = tour[ia] + for ib in range(ia + 1, size): + b = tour[ib] + links = tour_M[a, b] + if links == 0: + continue + dist = sizes_cum[ib] - sizes_cum[ia] + if dist > LIMIT: + break + s += links / dist + return s, + + +def score_evaluate_P(array.array[int] tour, + np.ndarray[INT, ndim=1] tour_sizes=None, + np.ndarray[INT, ndim=3] tour_P=None): + cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) + + cdef double s = 0.0 + cdef int size = len(tour) + cdef int a, b, c, ia, ib + cdef double dist + for ia in range(size): + a = tour[ia] + for ib in range(ia + 1, size): + b = tour[ib] + dist = sizes_cum[ib - 1] - sizes_cum[ia] + if dist > LIMIT: + break + c = tour_P[a, b, 0] + if c == 0: + continue + s += c / (tour_P[a, b, 1] + dist) + return s, + + +def score_evaluate_Q(array.array[int] tour, + np.ndarray[INT, ndim=1] tour_sizes=None, + np.ndarray[INT, ndim=3] tour_Q=None): + cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] + cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) + + cdef double s = 0.0 + cdef int size = len(tour) + cdef int a, b, c, ia, ib, ic + cdef double dist + for ia in range(size): + a = tour[ia] + for ib in range(ia + 1, size): + b = tour[ib] + if tour_Q[a, b, 0] == -1: + continue + dist = sizes_cum[ib - 1] - sizes_cum[ia] + if dist > LIMIT: + break + for ic in range(BB): + c = tour_Q[a, b, ic] + s += c / (GR[ic] + dist) + return s, diff --git a/jcvi/assembly/coverage.py b/jcvi/assembly/coverage.py new file mode 100644 index 00000000..da5aab99 --- /dev/null +++ b/jcvi/assembly/coverage.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Provide coverage QC for assembled sequences: +1. plot paired-end reads as curves +2. plot base coverage and mate coverage +3. plot gaps in the sequence (if any) +""" +from collections import defaultdict + +from ..apps.base import ActionDispatcher, logger, need_update, sh +from ..formats.base import BaseFile, must_open +from ..formats.bed import BedLine, sort +from ..formats.sizes import Sizes + + +class Coverage(BaseFile): + """ + Three-column .coverage file, often generated by `genomeCoverageBed -d` + contigID baseID coverage + """ + + def __init__(self, bedfile, sizesfile): + + bedfile = sort([bedfile]) + coveragefile = bedfile + ".coverage" + if need_update(bedfile, coveragefile): + cmd = "genomeCoverageBed" + cmd += " -bg -i {0} -g {1}".format(bedfile, sizesfile) + sh(cmd, outfile=coveragefile) + + self.sizes = Sizes(sizesfile).mapping + + filename = coveragefile + assert filename.endswith(".coverage") + super().__init__(filename) + + def get_plot_data(self, ctg, bins=None): + import numpy as np + from jcvi.algorithms.matrix import chunk_average + + fp = open(self.filename) + size = self.sizes[ctg] + + data = np.zeros((size,), dtype=np.int) + for row in fp: + seqid, start, end, cov = row.split() + if seqid != ctg: + continue + + start, end = int(start), int(end) + cov = int(cov) + data[start:end] = cov + + bases = np.arange(1, size + 1) + if bins: + window = size / bins + bases = bases[::window] + data = chunk_average(data, window) + + return bases, data + + +def main(): + + actions = (("posmap", "QC based on indexed posmap file"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def clone_name(s, ca=False): + """ + >>> clone_name("120038881639") + "0038881639" + >>> clone_name("GW11W6RK01DAJDWa") + "GW11W6RK01DAJDW" + """ + if not ca: + return s[:-1] + + if s[0] == "1": + return s[2:] + return s.rstrip("ab") + + +def bed_to_bedpe( + bedfile, bedpefile, pairsbedfile=None, matesfile=None, ca=False, strand=False +): + """ + This converts the bedfile to bedpefile, assuming the reads are from CA. + """ + fp = must_open(bedfile) + fw = must_open(bedpefile, "w") + if pairsbedfile: + fwpairs = must_open(pairsbedfile, "w") + + clones = defaultdict(list) + for row in fp: + b = BedLine(row) + name = b.accn + clonename = clone_name(name, ca=ca) + clones[clonename].append(b) + + if matesfile: + fp = open(matesfile) + libraryline = next(fp) + # 'library bes 37896 126916' + lib, name, smin, smax = libraryline.split() + assert lib == "library" + smin, smax = int(smin), int(smax) + logger.debug( + "Happy mates for lib {0} fall between {1} - {2}".format(name, smin, smax) + ) + + nbedpe = 0 + nspan = 0 + for clonename, blines in clones.items(): + nlines = len(blines) + if nlines == 2: + a, b = blines + aseqid, astart, aend = a.seqid, a.start, a.end + bseqid, bstart, bend = b.seqid, b.start, b.end + outcols = [aseqid, astart - 1, aend, bseqid, bstart - 1, bend, clonename] + if strand: + outcols.extend([0, a.strand, b.strand]) + print("\t".join(str(x) for x in outcols), file=fw) + nbedpe += 1 + elif nlines == 1: + (a,) = blines + aseqid, astart, aend = a.seqid, a.start, a.end + bseqid, bstart, bend = 0, 0, 0 + else: # More than two lines per pair + pass + + if pairsbedfile: + start = min(astart, bstart) if bstart > 0 else astart + end = max(aend, bend) if bend > 0 else aend + if aseqid != bseqid: + continue + + span = end - start + 1 + if (not matesfile) or (smin <= span <= smax): + print( + "\t".join(str(x) for x in (aseqid, start - 1, end, clonename)), + file=fwpairs, + ) + nspan += 1 + + fw.close() + logger.debug("A total of {0} bedpe written to `{1}`.".format(nbedpe, bedpefile)) + if pairsbedfile: + fwpairs.close() + logger.debug( + "A total of {0} spans written to `{1}`.".format(nspan, pairsbedfile) + ) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/gaps.py b/jcvi/assembly/gaps.py new file mode 100644 index 00000000..57824dbc --- /dev/null +++ b/jcvi/assembly/gaps.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Calculates gap statistics and manipulate gaps in assembly. +""" +import os.path as op +import sys + +from itertools import groupby + +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update +from ..formats.bed import Bed, fastaFromBed +from ..formats.blast import BlastSlow +from ..formats.sizes import Sizes + + +def main(): + + actions = ( + ("flanks", "create sequences flanking the gaps"), + ("sizes", "compile gap sizes"), + ("estimate", "estimate gap sizes based on mates"), + ("annotate", "annotate AGP v2 file with linkage info"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def annotate(args): + """ + %prog annotate agpfile gaps.linkage.bed assembly.fasta + + Annotate AGP file with linkage info of `paired-end` or `map`. + File `gaps.linkage.bed` is generated by assembly.gaps.estimate(). + """ + from jcvi.formats.agp import AGP, bed, tidy + + p = OptionParser(annotate.__doc__) + p.add_argument("--minsize", default=200, help="Smallest component size") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + agpfile, linkagebed, assemblyfasta = args + linkagebed = Bed(linkagebed) + spannedgaps = set() + for b in linkagebed: + score = int(b.score) + if score == 0: + spannedgaps.add((b.accn, b.start, b.end)) + + agp = AGP(agpfile) + newagpfile = agpfile.rsplit(".", 1)[0] + ".linkage.agp" + newagp = open(newagpfile, "w") + contig_id = 0 + minsize = opts.minsize + for a in agp: + if not a.is_gap: + cs = a.component_span + if cs < minsize: + a.is_gap = True + a.component_type = "N" + a.gap_length = cs + a.gap_type = "scaffold" + a.linkage = "yes" + a.linkage_evidence = [] + else: + contig_id += 1 + a.component_id = "contig{0:04d}".format(contig_id) + a.component_beg = 1 + a.component_end = cs + a.component_type = "W" + + print(a, file=newagp) + continue + + gapinfo = (a.object, a.object_beg, a.object_end) + gaplen = a.gap_length + + if gaplen == 100 and gapinfo not in spannedgaps: + a.component_type = "U" + tag = "map" + else: + tag = "paired-ends" + + a.linkage_evidence.append(tag) + print(a, file=newagp) + + newagp.close() + logger.debug("Annotated AGP written to `%s`.", newagpfile) + + contigbed = assemblyfasta.rsplit(".", 1)[0] + ".contigs.bed" + bedfile = bed([newagpfile, "--nogaps", "--outfile=" + contigbed]) + + contigfasta = fastaFromBed(bedfile, assemblyfasta, name=True, stranded=True) + + tidy([newagpfile, contigfasta]) + + +def blast_to_twobeds(blastfile, rclip=1): + + key1 = lambda x: x.query + key2 = lambda x: x.query[:-rclip] if rclip else key1 + data = BlastSlow(blastfile) + OK = "OK" + + fw = open("after.bed", "w") + fwlabels = open("after.labels", "w") + for pe, lines in groupby(data, key=key2): + label = OK + lines = list(lines) + assert len(lines) in (1, 2) + + if len(lines) != 2: + label = "Singleton" + + else: + a, b = lines + + aquery, bquery = a.query, b.query + asubject, bsubject = a.subject, b.subject + if asubject != bsubject: + label = "Different chr {0}|{1}".format(asubject, bsubject) + + else: + astrand, bstrand = a.orientation, b.orientation + assert aquery[-1] == "L" and bquery[-1] == "R", str((aquery, bquery)) + + if astrand == "+" and bstrand == "+": + sstart, sstop = a.sstop + 1, b.sstart - 1 + + elif astrand == "-" and bstrand == "-": + sstart, sstop = b.sstop + 1, a.sstart - 1 + + else: + label = "Strand {0}|{1}".format(astrand, bstrand) + + if label == OK: + strand = "+" + label = sstop - sstart + 1 + + if sstart > sstop: + sstart, sstop = sstop, sstart + strand = "-" + label = -(sstop - sstart + 1) + + print( + "\t".join(str(x) for x in (asubject, sstart - 1, sstop, pe, strand)), + file=fw, + ) + + print("\t".join(str(x) for x in (pe, label)), file=fwlabels) + + fw.close() + fwlabels.close() + + return fwlabels.name + + +def sizes(args): + """ + %prog sizes gaps.bed a.fasta b.fasta + + Take the flanks of gaps within a.fasta, map them onto b.fasta. Compile the + results to the gap size estimates in b. The output is detailed below: + + Columns are: + 1. A scaffold + 2. Start position + 3. End position + 4. Gap identifier + 5. Gap size in A (= End - Start) + 6. Gap size in B (based on BLAST, see below) + + For each gap, I extracted the left and right sequence (mostly 2Kb, but can be shorter + if it runs into another gap) flanking the gap. The flanker names look like gap.00003L + and gap.00003R means the left and right flanker of this particular gap, respectively. + + The BLAST output is used to calculate the gap size. For each flanker sequence, I took + the best hit, and calculate the inner distance between the L match range and R range. + The two flankers must map with at least 98% identity, and in the same orientation. + + NOTE the sixth column in the list file is not always a valid number. Other values are: + - na: both flankers are missing in B + - Singleton: one flanker is missing + - Different chr: flankers map to different scaffolds + - Strand +|-: flankers map in different orientations + - Negative value: the R flanker map before L flanker + """ + from jcvi.formats.base import DictFile + from jcvi.apps.align import blast + + p = OptionParser(sizes.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + gapsbed, afasta, bfasta = args + pf = gapsbed.rsplit(".", 1)[0] + extfasta = pf + ".ext.fasta" + + if need_update(gapsbed, extfasta): + extbed, extfasta = flanks([gapsbed, afasta]) + + q = op.basename(extfasta).split(".")[0] + r = op.basename(bfasta).split(".")[0] + blastfile = "{0}.{1}.blast".format(q, r) + + if need_update([extfasta, bfasta], blastfile): + blastfile = blast([bfasta, extfasta, "--wordsize=50", "--pctid=98"]) + + labelsfile = blast_to_twobeds(blastfile) + labels = DictFile(labelsfile, delimiter="\t") + bed = Bed(gapsbed) + for b in bed: + b.score = b.span + accn = b.accn + print( + "\t".join( + ( + str(x) + for x in ( + b.seqid, + b.start - 1, + b.end, + accn, + b.score, + labels.get(accn, "na"), + ) + ) + ) + ) + + +def flanks(args): + """ + %prog flanks gaps.bed fastafile + + Create sequences flanking the gaps. + """ + p = OptionParser(flanks.__doc__) + p.add_argument( + "--extend", + default=2000, + type=int, + help="Extend seq flanking the gaps", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gapsbed, fastafile = args + Ext = opts.extend + sizes = Sizes(fastafile).mapping + + bed = Bed(gapsbed) + pf = gapsbed.rsplit(".", 1)[0] + extbed = pf + ".ext.bed" + fw = open(extbed, "w") + for i, b in enumerate(bed): + seqid = b.seqid + gapname = b.accn + size = sizes[seqid] + + prev_b = bed[i - 1] if i > 0 else None + next_b = bed[i + 1] if i + 1 < len(bed) else None + if prev_b and prev_b.seqid != seqid: + prev_b = None + if next_b and next_b.seqid != seqid: + next_b = None + + start = prev_b.end + 1 if prev_b else 1 + start, end = max(start, b.start - Ext), b.start - 1 + print( + "\t".join(str(x) for x in (b.seqid, start - 1, end, gapname + "L")), file=fw + ) + + end = next_b.start - 1 if next_b else size + start, end = b.end + 1, min(end, b.end + Ext) + print( + "\t".join(str(x) for x in (b.seqid, start - 1, end, gapname + "R")), file=fw + ) + fw.close() + + extfasta = fastaFromBed(extbed, fastafile, name=True) + return extbed, extfasta + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/geneticmap.py b/jcvi/assembly/geneticmap.py new file mode 100644 index 00000000..2720c3ae --- /dev/null +++ b/jcvi/assembly/geneticmap.py @@ -0,0 +1,714 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Use genetic map to break chimeric scaffolds, order and orient scaffolds onto +chromosomes. +""" +import os.path as op +import sys + +from itertools import combinations, groupby +from random import sample +from typing import Tuple + +import numpy as np +import seaborn as sns + +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update +from ..algorithms.formula import calc_ldscore +from ..algorithms.matrix import symmetrize +from ..formats.base import BaseFile, LineFile, must_open, read_block +from ..formats.bed import Bed, fastaFromBed +from ..graphics.base import ( + Rectangle, + draw_cmap, + normalize_axes, + plt, + plot_heatmap, + savefig, +) + + +MSTheader = """population_type {0} +population_name LG +distance_function kosambi +cut_off_p_value 0.000001 +no_map_dist 10.0 +no_map_size 0 +missing_threshold {1} +estimation_before_clustering no +detect_bad_data yes +objective_function ML +number_of_loci {2} +number_of_individual {3} +""" + + +class BinMap(BaseFile, dict): + def __init__(self, filename): + super().__init__(filename) + + fp = open(filename) + for header, seq in read_block(fp, "group "): + lg = header.split()[-1] + self[lg] = [] + for s in seq: + if s.strip() == "" or s[0] == ";": + continue + marker, pos = s.split() + pos = int(float(pos) * 1000) + self[lg].append((marker, pos)) + + def print_to_bed(self, filename="stdout", switch=False, sep="."): + """Print the genetic map in the BED format. + + Args: + filename (str, optional): Output filename. Defaults to "stdout". + switch (bool, optional): Use linkage group as seqid. Defaults to False. + sep (str, optional): Separator that delimits scaffold name and position. Defaults to ".". + """ + fw = must_open(filename, "w") + for lg, markers in sorted(self.items()): + for marker, pos in markers: + if not switch: + line = (lg, pos, pos + 1, marker) + else: + seqid_spos = marker.rsplit(sep, 1) + if len(seqid_spos) != 2: + logger.error( + "Error: `%s` must be in the form e.g. `name%sposition`", + marker, + sep, + ) + continue + seqid, spos = seqid_spos + spos = int(spos) + marker = "{0}:{1}".format(lg, pos / 1000.0) + line = (seqid, spos - 1, spos, marker) + print("\t".join(str(x) for x in line), file=fw) + fw.close() + + +class MSTMapLine(object): + def __init__(self, row, startidx=3): + args = row.split() + self.id = args[0] + self.seqid, pos = self.id.split(".") + self.pos = int(pos) + self.genotype = "".join(args[startidx:]) + + def __len__(self): + return len(self.genotype) + + def __str__(self): + return "{0}: {1}".format(self.id, self.genotype) + + @property + def bedline(self): + return "\t".join(str(x) for x in (self.seqid, self.pos - 1, self.pos, self.id)) + + +class MSTMap(LineFile): + def __init__(self, filename): + super().__init__(filename) + fp = open(filename) + startidx = 1 + for row in fp: + if row.startswith("locus_name"): + if row.split()[1] == "seqid": + startidx = 3 + self.header = row.split() + break + + for row in fp: + self.append(MSTMapLine(row, startidx=startidx)) + + self.nmarkers = len(self) + self.nind = len(self[0].genotype) + logger.debug( + "Map contains %d markers in %d individuals", self.nmarkers, self.nind + ) + + +class MSTMatrix(object): + def __init__(self, matrix, markerheader, population_type, missing_threshold): + self.matrix = matrix + self.markerheader = markerheader + self.population_type = population_type + self.missing_threshold = missing_threshold + self.ngenotypes = len(matrix) + self.nind = len(markerheader) - 1 + assert self.nind == len(matrix[0]) - 1 + logger.debug( + "Imported %d markers and %d individuals.", self.ngenotypes, self.nind + ) + + def write(self, filename="stdout", header=True): + fw = must_open(filename, "w") + if header: + print( + MSTheader.format( + self.population_type, + self.missing_threshold, + self.ngenotypes, + self.nind, + ), + file=fw, + ) + print("\t".join(self.markerheader), file=fw) + for m in self.matrix: + print("\t".join(m), file=fw) + + +def main(): + actions = ( + ("breakpoint", "find scaffold breakpoints using genetic map"), + ("heatmap", "calculate pairwise linkage disequilibrium"), + ("bed", "convert MSTmap output to bed format"), + ("fasta", "extract markers based on map"), + ("anchor", "anchor scaffolds based on map"), + ("rename", "rename markers according to the new mapping locations"), + ("header", "rename lines in the map header"), + # Plot genetic map + ("blat", "make ALLMAPS input csv based on sequences"), + ("dotplot", "make dotplot between chromosomes and linkage maps"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def blat(args): + """ + %prog blat map1.txt ref.fasta + + Make ALLMAPS input csv based on sequences. The tab-delimited txt file + include: name, LG, position, sequence. + """ + from jcvi.formats.base import is_number + from jcvi.formats.blast import best as blast_best, bed as blast_bed + from jcvi.apps.align import blat as blat_align + + p = OptionParser(blat.__doc__) + _, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + maptxt, ref = args + pf = maptxt.rsplit(".", 1)[0] + register = {} + fastafile = pf + ".fasta" + fp = open(maptxt) + fw = open(fastafile, "w") + for row in fp: + name, lg, pos, seq = row.split() + if not is_number(pos): + continue + register[name] = (pf + "-" + lg, pos) + print(">{0}\n{1}\n".format(name, seq), file=fw) + fw.close() + + blatfile = blat_align([ref, fastafile]) + bestfile = blast_best([blatfile]) + bedfile = blast_bed([bestfile]) + b = Bed(bedfile).order + + pf = ".".join((op.basename(maptxt).split(".")[0], op.basename(ref).split(".")[0])) + csvfile = pf + ".csv" + fp = open(maptxt) + fw = open(csvfile, "w") + for row in fp: + name, lg, pos, seq = row.split() + if name not in b: + continue + bbi, bb = b[name] + scaffold, scaffold_pos = bb.seqid, bb.start + print(",".join(str(x) for x in (scaffold, scaffold_pos, lg, pos)), file=fw) + fw.close() + + +def dotplot(args): + """ + %prog dotplot map.csv ref.fasta + + Make dotplot between chromosomes and linkage maps. + The input map is csv formatted, for example: + + ScaffoldID,ScaffoldPosition,LinkageGroup,GeneticPosition + scaffold_2707,11508,1,0 + scaffold_2707,11525,1,1.2 + """ + from natsort import natsorted + from jcvi.assembly.allmaps import CSVMapLine + from jcvi.formats.sizes import Sizes + from jcvi.graphics.base import shorten + from jcvi.graphics.dotplot import ( + plt, + savefig, + markup, + normalize_axes, + downsample, + plot_breaks_and_labels, + thousands, + ) + + p = OptionParser(dotplot.__doc__) + p.set_outfile(outfile=None) + opts, args, iopts = p.set_image_options( + args, figsize="8x8", style="dark", dpi=90, cmap="copper" + ) + + if len(args) != 2: + sys.exit(not p.print_help()) + + csvfile, fastafile = args + sizes = natsorted(Sizes(fastafile).mapping.items()) + seen = set() + raw_data = [] + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) # the whole canvas + ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # the dot plot + + fp = must_open(csvfile) + for row in fp: + m = CSVMapLine(row) + seen.add(m.seqid) + raw_data.append(m) + + # X-axis is the genome assembly + ctgs, ctg_sizes = zip(*sizes) + xsize = sum(ctg_sizes) + qb = list(np.cumsum(ctg_sizes)) + qbreaks = list(zip(ctgs, [0] + qb, qb)) + qstarts = dict(zip(ctgs, [0] + qb)) + + # Y-axis is the map + key = lambda x: x.lg + raw_data.sort(key=key) + ssizes = {} + for lg, d in groupby(raw_data, key=key): + ssizes[lg] = max([x.cm for x in d]) + ssizes = natsorted(ssizes.items()) + lgs, lg_sizes = zip(*ssizes) + ysize = sum(lg_sizes) + sb = list(np.cumsum(lg_sizes)) + sbreaks = list(zip([("LG" + x) for x in lgs], [0] + sb, sb)) + sstarts = dict(zip(lgs, [0] + sb)) + + # Re-code all the scatter dots + data = [ + (qstarts[x.seqid] + x.pos, sstarts[x.lg] + x.cm, "g") + for x in raw_data + if (x.seqid in qstarts) + ] + npairs = len(data) + data = downsample(data) + + x, y, c = zip(*data) + ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) + + # Flip X-Y label + gy, gx = op.basename(csvfile).split(".")[:2] + gx, gy = shorten(gx, maxchar=30), shorten(gy, maxchar=30) + xlim, ylim = plot_breaks_and_labels( + fig, + root, + ax, + gx, + gy, + xsize, + ysize, + qbreaks, + sbreaks, + usetex=iopts.usetex, + ) + ax.set_xlim(xlim) + ax.set_ylim(ylim) + + title = "Alignment: {} vs {}".format(gx, gy) + title += " ({} markers)".format(thousands(npairs)) + root.set_title(markup(title), x=0.5, y=0.96, color="k") + logger.debug(title) + normalize_axes(root) + + image_name = opts.outfile or (csvfile.rsplit(".", 1)[0] + "." + iopts.format) + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + fig.clear() + + +def read_subsampled_matrix(mstmap: str, subsample: int) -> Tuple[np.ndarray, str, int]: + """ + Read the subsampled matrix from file if it exists, otherwise calculate it. + """ + data = MSTMap(mstmap) + + # Take random subsample while keeping marker order + if subsample < data.nmarkers: + data = [data[x] for x in sorted(sample(range(len(data)), subsample))] + else: + logger.debug("Use all markers, --subsample ignored") + + nmarkers = len(data) + markerbedfile = mstmap + ".subsample.bed" + ldmatrix = mstmap + ".subsample.matrix" + if need_update(mstmap, (ldmatrix, markerbedfile)): + with open(markerbedfile, "w", encoding="utf-8") as fw: + print("\n".join(x.bedline for x in data), file=fw) + logger.debug( + "Write marker set of size %d to file `%s`.", nmarkers, markerbedfile + ) + + M = np.zeros((nmarkers, nmarkers), dtype=float) + for i, j in combinations(range(nmarkers), 2): + a = data[i] + b = data[j] + M[i, j] = calc_ldscore(a.genotype, b.genotype) + + M = symmetrize(M) + + logger.debug("Write LD matrix to file `%s`.", ldmatrix) + M.tofile(ldmatrix) + else: + nmarkers = len(Bed(markerbedfile)) + M = np.fromfile(ldmatrix, dtype=float).reshape(nmarkers, nmarkers) + logger.debug("LD matrix `%s` exists (%dx%d).", ldmatrix, nmarkers, nmarkers) + + return M, markerbedfile, nmarkers + + +def draw_geneticmap_heatmap(root, ax, mstmap: str, subsample: int): + """ + Draw the heatmap of the genetic map. + """ + M, markerbedfile, nmarkers = read_subsampled_matrix(mstmap, subsample) + + # Plot chromosomes breaks + b = Bed(markerbedfile) + xsize = len(b) + extent = (0, nmarkers) + chr_labels = [] + ignore_size = 20 + + breaks = [] + for seqid, beg, end in b.get_breaks(): + ignore = abs(end - beg) < ignore_size + pos = (beg + end) / 2 + chr_labels.append((seqid, pos, ignore)) + if ignore: + continue + breaks.append(end) + + cmap = sns.color_palette("rocket", as_cmap=True) + plot_heatmap(ax, M, breaks, cmap=cmap, plot_breaks=True) + + # Plot chromosome labels + for label, pos, ignore in chr_labels: + if not ignore: + xpos = 0.1 + pos * 0.8 / xsize + root.text( + xpos, 0.91, label, ha="center", va="bottom", rotation=45, color="grey" + ) + ypos = 0.9 - pos * 0.8 / xsize + root.text(0.09, ypos, label, ha="right", va="center", color="grey") + + ax.set_xlim(extent) + ax.set_ylim((nmarkers, 0)) # Invert y-axis + ax.set_axis_off() + + draw_cmap(root, r"Pairwise LD ($r^2$)", 0, 1, cmap=cmap) + + root.add_patch(Rectangle((0.1, 0.1), 0.8, 0.8, fill=False, ec="k", lw=2)) + m = mstmap.split(".")[0] + root.text(0.5, 0.06, f"Linkage Disequilibrium between {m} markers", ha="center") + + normalize_axes(root) + + +def heatmap(args): + """ + %prog heatmap map + + Calculate pairwise linkage disequilibrium given MSTmap. + """ + p = OptionParser(heatmap.__doc__) + p.add_argument( + "--subsample", + default=1000, + type=int, + help="Subsample markers to speed up", + ) + opts, args, iopts = p.set_image_options(args, figsize="8x8") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (mstmap,) = args + + plt.rcParams["axes.linewidth"] = 0 + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + ax = fig.add_axes((0.1, 0.1, 0.8, 0.8)) # the heatmap + + draw_geneticmap_heatmap(root, ax, mstmap, opts.subsample) + + pf = mstmap.split(".")[0] + image_name = pf + ".subsample" + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def header(args): + """ + %prog header map conversion_table + + Rename lines in the map header. The mapping of old names to new names are + stored in two-column `conversion_table`. + """ + from jcvi.formats.base import DictFile + + p = OptionParser(header.__doc__) + p.add_argument("--prefix", default="", help="Prepend text to line number") + p.add_argument("--ids", help="Write ids to file") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + mstmap, conversion_table = args + data = MSTMap(mstmap) + hd = data.header + conversion = DictFile(conversion_table) + newhd = [opts.prefix + conversion.get(x, x) for x in hd] + + print("\t".join(hd)) + print("--->") + print("\t".join(newhd)) + + ids = opts.ids + if ids: + fw = open(ids, "w") + print("\n".join(newhd), file=fw) + fw.close() + + +def rename(args): + """ + %prog rename map markers.bed > renamed.map + + Rename markers according to the new mapping locations. + """ + p = OptionParser(rename.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + mstmap, bedfile = args + markersbed = Bed(bedfile) + markers = markersbed.order + + data = MSTMap(mstmap) + header = data.header + header = [header[0]] + ["seqid", "start"] + header[1:] + renamed = [] + for b in data: + m, geno = b.id, b.genotype + om = m + if m not in markers: + m = m.rsplit(".", 1)[0] + if m not in markers: + continue + + i, mb = markers[m] + renamed.append([om, mb.seqid, mb.start, "\t".join(list(geno))]) + + renamed.sort(key=lambda x: (x[1], x[2])) + fw = must_open(opts.outfile, "w") + print("\t".join(header), file=fw) + for d in renamed: + print("\t".join(str(x) for x in d), file=fw) + + +def anchor(args): + """ + %prog anchor map.bed markers.blast > anchored.bed + + Anchor scaffolds based on map. + """ + from jcvi.formats.blast import bed + + p = OptionParser(anchor.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + mapbed, blastfile = args + bedfile = bed([blastfile]) + markersbed = Bed(bedfile) + markers = markersbed.order + + mapbed = Bed(mapbed, sorted=False) + for b in mapbed: + m = b.accn + if m not in markers: + continue + + i, mb = markers[m] + new_accn = "{0}:{1}-{2}".format(mb.seqid, mb.start, mb.end) + b.accn = new_accn + print(b) + + +def bed(args): + """ + %prog fasta map.out + + Convert MSTMAP output into bed format. + """ + p = OptionParser(bed.__doc__) + p.add_argument( + "--switch", + default=False, + action="store_true", + help="Switch reference and aligned map elements", + ) + p.add_argument( + "--sep", + default=".", + help="Separator that is used to delimit scaffold and position in the marker name", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (mapout,) = args + pf = mapout.split(".")[0] + mapbed = pf + ".bed" + bm = BinMap(mapout) + bm.print_to_bed(mapbed, switch=opts.switch, sep=opts.sep) + + return mapbed + + +def fasta(args): + """ + %prog fasta map.out scaffolds.fasta + + Extract marker sequences based on map. + """ + from jcvi.formats.sizes import Sizes + + p = OptionParser(fasta.__doc__) + p.add_argument( + "--extend", + default=1000, + type=int, + help="Extend seq flanking the gaps", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + mapout, sfasta = args + Flank = opts.extend + pf = mapout.split(".")[0] + mapbed = pf + ".bed" + bm = BinMap(mapout) + bm.print_to_bed(mapbed) + + bed = Bed(mapbed, sorted=False) + markersbed = pf + ".markers.bed" + fw = open(markersbed, "w") + sizes = Sizes(sfasta).mapping + for b in bed: + accn = b.accn + scf, pos = accn.split(".") + pos = int(pos) + start = max(0, pos - Flank) + end = min(pos + Flank, sizes[scf]) + print("\t".join(str(x) for x in (scf, start, end, accn)), file=fw) + + fw.close() + + fastaFromBed(markersbed, sfasta, name=True) + + +def hamming_distance(a, b, ignore=None): + dist = 0 + for x, y in zip(a, b): + if ignore and ignore in (x, y): + continue + if x != y: + dist += 1 + return dist + + +OK, BREAK, END = range(3) + + +def check_markers(a, b, maxdiff): + if a.seqid != b.seqid: + return END, None + diff = hamming_distance(a.genotype, b.genotype, ignore="-") + max_allowed = len(a) * maxdiff + if diff <= max_allowed: + return OK, None + + return BREAK, (a.seqid, a.pos, b.pos) + + +def breakpoint(args): + """ + %prog breakpoint mstmap.input > breakpoints.bed + + Find scaffold breakpoints using genetic map. Use variation.vcf.mstmap() to + generate the input for this routine. + """ + from more_itertools import pairwise + + p = OptionParser(breakpoint.__doc__) + p.add_argument( + "--diff", + default=0.1, + type=float, + help="Maximum ratio of differences allowed", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (mstmap,) = args + diff = opts.diff + data = MSTMap(mstmap) + + # Remove singleton markers (avoid double cross-over) + good = [] + nsingletons = 0 + for i in range(1, len(data) - 1): + a = data[i] + left_label, left_rr = check_markers(data[i - 1], a, diff) + right_label, right_rr = check_markers(a, data[i + 1], diff) + + if left_label == BREAK and right_label == BREAK: + nsingletons += 1 + continue + + good.append(a) + + logger.debug("A total of %d singleton markers removed.", nsingletons) + + for a, b in pairwise(good): + label, rr = check_markers(a, b, diff) + if label == BREAK: + print("\t".join(str(x) for x in rr)) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/goldenpath.py b/jcvi/assembly/goldenpath.py new file mode 100644 index 00000000..5108a5c6 --- /dev/null +++ b/jcvi/assembly/goldenpath.py @@ -0,0 +1,1192 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Procedures to validate and update golden path of a genome assembly. This relies +heavily on formats.agp, and further includes several algorithms, e.g. overlap +detection. +""" +import os +import os.path as op +import shutil +import sys + +from copy import deepcopy +from functools import lru_cache +from itertools import groupby + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + cleanup, + logger, + mkdir, + need_update, + popen, + sh, +) +from ..apps.fetch import entrez +from ..apps.grid import WriteJobs +from ..formats.agp import AGP, TPF, build, get_phase, reindex, tidy +from ..formats.base import BaseFile, must_open +from ..formats.blast import BlastLine, BlastSlow +from ..formats.coords import Overlap_types +from ..formats.fasta import Fasta, SeqIO + + +GoodPct = 98 +GoodOverlap = 200 +GoodOverhang = 2000 + + +class Cutoff(object): + def __init__(self, pctid=GoodPct, overlap=GoodOverlap, hang=GoodOverhang): + self.pctid = pctid + self.overlap = overlap + self.hang = hang + + def __str__(self): + return "Configuration: PCTID={} OVERLAP={} HANG={}".format( + self.pctid, self.overlap, self.hang + ) + + +class CLR(object): + def __init__(self, id, size, orientation="+"): + self.id = id + self.start = 1 + self.end = size + if orientation == "?": + orientation = "+" + assert orientation in ("+", "-") + self.orientation = orientation + + def __str__(self): + return "{}: {}-{}({})".format(self.id, self.start, self.end, self.orientation) + + @property + def is_valid(self): + return self.start < self.end + + @classmethod + def from_agpline(cls, a): + c = CLR(a.component_id, 0, a.orientation) + c.start = a.component_beg + c.end = a.component_end + return c + + +class Overlap(object): + def __init__(self, blastline, asize, bsize, cutoff, qreverse=False): + + b = blastline + aid = b.query + bid = b.subject + + self.aid = aid.split("|")[3] if aid.count("|") >= 3 else aid + self.bid = bid.split("|")[3] if bid.count("|") >= 3 else bid + self.asize = asize + self.bsize = bsize + + self.qstart = b.qstart + self.qstop = b.qstop + self.sstart = b.sstart + self.sstop = b.sstop + + self.pctid = b.pctid + self.hitlen = b.hitlen + self.orientation = b.orientation + + self.cutoff = cutoff + self.qreverse = qreverse + self.blastline = b + + def __str__(self): + ov = Overlap_types[self.otype] + s = "{0} - {1}: {2} ".format(self.aid, self.bid, ov) + s += "Overlap: {0} Identity: {1}% Orientation: {2}".format( + self.hitlen, self.pctid, self.orientation + ) + return s + + @property + def swapped(self): + blastline = self.blastline.swapped + asize = self.asize + bsize = self.bsize + _, bo = self.get_ao_bo() + qreverse = bo == "-" + return Overlap(blastline, bsize, asize, self.cutoff, qreverse=qreverse) + + @property + def certificateline(self): + terminal_tag = "Terminal" if self.isTerminal else "Non-terminal" + return "\t".join( + str(x) + for x in ( + self.bid, + self.asize, + self.qstart, + self.qstop, + self.orientation, + terminal_tag, + ) + ) + + @property + def isTerminal(self): + return self.isGoodQuality and self.otype in (1, 2) + + @property + def isGoodQuality(self): + cutoff = self.cutoff + return self.hitlen >= cutoff.overlap and self.pctid >= cutoff.pctid + + def get_hangs(self): + r""" + Determine the type of overlap given query, ref alignment coordinates + Consider the following alignment between sequence a and b: + + aLhang \ / aRhang + \------------/ + /------------\ + bLhang / \ bRhang + + Terminal overlap: a before b, b before a + Contain overlap: a in b, b in a + """ + aLhang, aRhang = self.qstart - 1, self.asize - self.qstop + bLhang, bRhang = self.sstart - 1, self.bsize - self.sstop + if self.orientation == "-": + bLhang, bRhang = bRhang, bLhang + if self.qreverse: + aLhang, aRhang = aRhang, aLhang + bLhang, bRhang = bRhang, bLhang + + return aLhang, aRhang, bLhang, bRhang + + def update_clr(self, aclr, bclr): + """ + Zip the two sequences together, using "left-greedy" rule + + ============= seqA + |||| + ====(===============) seqB + """ + print(aclr, bclr, file=sys.stderr) + otype = self.otype + + if otype == 1: + if aclr.orientation == "+": + aclr.end = self.qstop + else: + aclr.start = self.qstart + if bclr.orientation == "+": + bclr.start = self.sstop + 1 + else: + bclr.end = self.sstart - 1 + + elif otype == 3: + aclr.start = aclr.end + + elif otype == 4: + bclr.start = bclr.end + + print(aclr, bclr, file=sys.stderr) + + def get_ao_bo(self): + ao = "-" if self.qreverse else "+" + bo = ao if self.orientation == "+" else {"+": "-", "-": "+"}[ao] + return ao, bo + + def anneal(self, aclr, bclr): + ao, bo = self.get_ao_bo() + + # Requirement: end-to-end join in correct order and orientation + can_anneal = self.otype in (1, 3, 4) and (ao, bo) == ( + aclr.orientation, + bclr.orientation, + ) + if not can_anneal: + print( + "* Cannot anneal! (otype={0}|{1}{2}|{3}{4})".format( + self.otype, ao, bo, aclr.orientation, bclr.orientation + ), + file=sys.stderr, + ) + return False + + self.update_clr(aclr, bclr) + return True + + def print_graphic(self): + """ + >>>>>>>>>>>>>>>>>>> seqA (alen) + |||||||| + <<<<<<<<<<<<<<<<<<<<< seqB (blen) + """ + aLhang, aRhang, bLhang, bRhang = self.get_hangs() + + achar = ">" + bchar = "<" if self.orientation == "-" else ">" + if self.qreverse: + achar = "<" + bchar = {">": "<", "<": ">"}[bchar] + + print(aLhang, aRhang, bLhang, bRhang, file=sys.stderr) + width = 50 # Canvas + hitlen = self.hitlen + lmax = max(aLhang, bLhang) + rmax = max(aRhang, bRhang) + bpwidth = lmax + hitlen + rmax + ratio = width * 1.0 / bpwidth + + _ = lambda x: int(round(x * ratio, 0)) + a1, a2 = _(aLhang), _(aRhang) + b1, b2 = _(bLhang), _(bRhang) + hit = max(_(hitlen), 1) + + msg = " " * max(b1 - a1, 0) + msg += achar * (a1 + hit + a2) + msg += " " * (width - len(msg) + 2) + msg += "{0} ({1})".format(self.aid, self.asize) + print(msg, file=sys.stderr) + + msg = " " * max(a1, b1) + msg += "|" * hit + print(msg, file=sys.stderr) + + msg = " " * max(a1 - b1, 0) + msg += bchar * (b1 + hit + b2) + msg += " " * (width - len(msg) + 2) + msg += "{0} ({1})".format(self.bid, self.bsize) + print(msg, file=sys.stderr) + print(self, file=sys.stderr) + + @property + def otype(self): + if not self.isGoodQuality: + return 0 + + aLhang, aRhang, bLhang, bRhang = self.get_hangs() + + s1 = aRhang + bLhang + s2 = aLhang + bRhang + s3 = aLhang + aRhang + s4 = bLhang + bRhang + ms = min(s1, s2, s3, s4) + if ms > self.cutoff.hang: + type = 0 + elif ms == s1: + type = 1 # a ~ b + elif ms == s2: + type = 2 # b ~ a + elif ms == s3: + type = 3 # a in b + elif ms == s4: + type = 4 # b in a + else: + assert 0 + + return type + + +class CertificateLine(object): + """ + North chr1 2 0 AC229737.8 telomere 58443 + South chr1 2 1 AC229737.8 AC202463.29 58443 37835 58443 + Non-terminal + """ + + def __init__(self, line): + args = line.split() + self.tag = args[0] + self.chr = args[1] + self.aphase = int(args[2]) + self.bphase = int(args[3]) + self.aid = args[4] + self.bid = args[5] + self.asize = int(args[6]) + self.is_no_overlap = False + + if len(args) == 7: + self.is_gap = True + return + + self.is_gap = False + + if len(args) == 8: + assert args[7] == "None" + self.is_no_overlap = True + self.terminal = "Non-terminal" + return + + self.astart = int(args[7]) + self.astop = int(args[8]) + self.orientation = args[9] + self.terminal = args[10] + + @property + def isTerminal(self): + return self.terminal == "Terminal" + + def __str__(self): + ar = [ + self.tag, + self.chr, + self.aphase, + self.bphase, + self.aid, + self.bid, + self.asize, + ] + + if self.is_no_overlap: + ar += ["None"] + elif not self.is_gap: + ar += [self.astart, self.astop, self.orientation, self.terminal] + + return "\t".join(str(x) for x in ar) + + +class Certificate(BaseFile): + + gapsize = 100000 + gaps = dict( + telomere=gapsize, centromere=gapsize, contig=gapsize, clone=50000, fragment=5000 + ) + + def __init__(self, filename): + + super().__init__(filename) + + fp = open(filename) + self.lines = [CertificateLine(x) for x in fp.readlines()] + + def write(self, filename): + fw = must_open(filename, "w") + for b in self.lines: + print(b, file=fw) + + def get_agp_gap(self, gap_type="contig"): + gap_length = Certificate.gaps[gap_type] + linkage = "yes" if gap_type in ("fragment", "clone") else "no" + + return ["N", gap_length, gap_type, linkage, ""] + + def write_AGP(self, filename, orientationguide={}): + """ + For each component, we have two overlaps: North and South. + + ======= + |||| South + ====(=================) Current BAC + North |||| + =============== + + For the case that says "Non-terminal", the overlap will not be + considered. North-South would suggest a '+' orientation, South-North + would suggest a '-' orientation. In most cases, unless the overlap + involves phase1 BAC, the selected range will be shown as the brackets + above - exclude North overlap, and include South overlap (aka the + "left-greedy" rule). + """ + fw = must_open(filename, "w") + for aid, bb in groupby(self.lines, key=lambda x: x.aid): + bb = list(bb) + north, south = bb + aid = north.aid + assert aid == south.aid + + aphase = north.aphase + chr = north.chr + size = north.asize + ar = [chr, 0, 0, 0] + + northline = southline = None + northrange = southrange = None + + # Warn if adjacent components do not have valid overlaps + if south.is_no_overlap: + print(south, file=sys.stderr) + + # Most gaps, except telomeres occur twice, so only do the "North" + if north.is_gap: + bar = ar + self.get_agp_gap(north.bid) + northline = "\t".join(str(x) for x in bar) + else: + if north.isTerminal: + northrange = north.astart, north.astop + + if south.is_gap: + if south.bid == "telomere": + bar = ar + self.get_agp_gap(south.bid) + southline = "\t".join(str(x) for x in bar) + else: + if south.isTerminal: + southrange = south.astart, south.astop + else: + bar = ar + self.get_agp_gap("fragment") + southline = "\t".join(str(x) for x in bar) + + # Determine the orientation and clear range for the current BAC + clr = [1, size] + orientation = sorientation = None + if northrange: + start, stop = northrange + Lhang = start - 1 + Rhang = size - stop + + orientation = "+" if Lhang < Rhang else "-" + if north.bphase == 1 and north.bphase < aphase: + if Lhang < Rhang: # North overlap at 5` + clr[0] = start + else: + clr[1] = stop + # Override left-greedy (also see below) + else: + if Lhang < Rhang: + clr[0] = stop + 1 + else: + clr[1] = start - 1 + + if southrange: + start, stop = southrange + Lhang = start - 1 + Rhang = size - stop + + sorientation = "+" if Lhang > Rhang else "-" + # Override left-greedy (also see above) + if aphase == 1 and aphase < south.bphase: + if Lhang < Rhang: # South overlap at 5` + clr[0] = stop + 1 + else: + clr[1] = start - 1 + else: + if Lhang < Rhang: + clr[0] = start + else: + clr[1] = stop + + if orientation: + if sorientation: + try: + assert ( + orientation == sorientation + ), "Orientation conflicts:\n{0}\n{1}".format(north, south) + except AssertionError as e: + logger.debug(e) + else: + if sorientation: + orientation = sorientation + else: # Both overlaps fail to define orientation + orientation = orientationguide.get(aid, "+") + + component_type = "D" if aphase in (1, 2) else "F" + bar = ar + [component_type, aid, clr[0], clr[1], orientation] + cline = "\t".join(str(x) for x in bar) + + if northline: + print(northline, file=fw) + print(cline, file=fw) + if southline: + print(southline, file=fw) + + fw.close() + + reindex([filename, "--inplace"]) + + +def main(): + + actions = ( + ("bes", "confirm the BES mapping"), + ("flip", "flip the FASTA sequences according to a set of references"), + ("overlap", "check terminal overlaps between two records"), + ("batchoverlap", "check terminal overlaps for many pairs"), + ("neighbor", "check neighbors of a component in agpfile"), + ("blast", "blast a component to componentpool"), + ("certificate", "make certificates for all overlaps in agpfile"), + ("agp", "make agpfile based on certificates"), + ("anneal", "merge adjacent contigs and make new agpfile"), + ("dedup", "remove redundant contigs with cdhit"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def dedup(args): + """ + %prog dedup scaffolds.fasta + + Remove redundant contigs with CD-HIT. This is run prior to + assembly.sspace.embed(). + """ + from jcvi.formats.fasta import gaps + from jcvi.apps.cdhit import deduplicate, ids + + p = OptionParser(dedup.__doc__) + p.set_align(pctid=GoodPct) + p.set_mingap(default=10) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (scaffolds,) = args + mingap = opts.mingap + splitfile, oagpfile, cagpfile = gaps( + [scaffolds, "--split", "--mingap={0}".format(mingap)] + ) + + dd = splitfile + ".cdhit" + clstrfile = dd + ".clstr" + idsfile = dd + ".ids" + if need_update(splitfile, clstrfile): + deduplicate([splitfile, "--pctid={0}".format(opts.pctid)]) + if need_update(clstrfile, idsfile): + ids([clstrfile]) + + agp = AGP(cagpfile) + reps = set(x.split()[-1] for x in open(idsfile)) + pf = scaffolds.rsplit(".", 1)[0] + dedupagp = pf + ".dedup.agp" + fw = open(dedupagp, "w") + + ndropped = ndroppedbases = 0 + for a in agp: + if not a.is_gap and a.component_id not in reps: + span = a.component_span + logger.debug("Drop component {0} ({1})".format(a.component_id, span)) + ndropped += 1 + ndroppedbases += span + continue + print(a, file=fw) + fw.close() + + logger.debug( + "Dropped components: {0}, Dropped bases: {1}".format(ndropped, ndroppedbases) + ) + logger.debug("Deduplicated file written to `{0}`.".format(dedupagp)) + + tidyagp = tidy([dedupagp, splitfile]) + dedupfasta = pf + ".dedup.fasta" + build([tidyagp, dd, dedupfasta]) + + return dedupfasta + + +def get_shred_id(id): + """ + >>> get_shred_id("ca-bacs.5638.frag11.22000-23608") + ("ca-bacs.5638", 11) + """ + try: + parts = id.split(".") + aid = ".".join(parts[:2]) + fid = int(parts[2].replace("frag", "")) + except: + aid, fid = None, None + return aid, fid + + +def is_adjacent_shreds(a, b): + aid, bid = a.component_id, b.component_id + ao, bo = a.orientation, b.orientation + if ao != bo: + return False + + ai, af = get_shred_id(aid) + bi, bf = get_shred_id(bid) + if ai is None or bi is None: + return False + + # Same sequence, with fragment id offset by one + return ai == bi and abs(af - bf) == 1 + + +def overlap_blastline_writer(oopts): + o = overlap(oopts) + if not o: + return "" + + return str(o.blastline) + + +def get_overlap_opts(aid, bid, qreverse, outdir, opts): + oopts = [ + aid, + bid, + "--suffix", + "fa", + "--dir", + outdir, + "--pctid={0}".format(opts.pctid), + "--hitlen={0}".format(opts.hitlen), + ] + if qreverse: + oopts += ["--qreverse"] + return oopts + + +def populate_blastfile(blastfile, agp, outdir, opts): + assert not op.exists(blastfile) + all_oopts = [] + for a, b, qreverse in agp.iter_paired_components(): + aid = a.component_id + bid = b.component_id + oopts = get_overlap_opts(aid, bid, qreverse, outdir, opts) + all_oopts.append(oopts) + + pool = WriteJobs(overlap_blastline_writer, all_oopts, blastfile, cpus=opts.cpus) + pool.run() + + +def anneal(args): + """ + %prog anneal agpfile contigs.fasta + + Merge adjacent overlapping contigs and make new AGP file. + + By default it will also anneal lines like these together (unless --nozipshreds): + scaffold4 1 1608 1 W ca-bacs.5638.frag11.22000-23608 1 1608 - + scaffold4 1609 1771 2 N 163 scaffold yes paired-ends + scaffold4 1772 3771 3 W ca-bacs.5638.frag10.20000-22000 1 2000 - + + These are most likely shreds, which we look for based on names. + """ + p = OptionParser(anneal.__doc__) + p.set_align(pctid=GoodPct, hitlen=GoodOverlap) + p.add_argument( + "--hang", default=GoodOverhang, type=int, help="Maximum overhang length" + ) + p.set_outdir(outdir="outdir") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + agpfile, contigs = args + outdir = opts.outdir + if not op.exists(outdir): + mkdir(outdir) + cmd = "faSplit byname {0} {1}/".format(contigs, outdir) + sh(cmd) + + cutoff = Cutoff(opts.pctid, opts.hitlen, opts.hang) + logger.debug(str(cutoff)) + + agp = AGP(agpfile) + blastfile = agpfile.replace(".agp", ".blast") + if not op.exists(blastfile): + populate_blastfile(blastfile, agp, outdir, opts) + + assert op.exists(blastfile) + logger.debug("File `{0}` found. Start loading.".format(blastfile)) + blast = BlastSlow(blastfile).to_dict() + + annealedagp = "annealed.agp" + annealedfasta = "annealed.fasta" + + newagp = deepcopy(agp) + clrstore = {} + for a, b, qreverse in agp.iter_paired_components(): + aid = a.component_id + bid = b.component_id + + pair = (aid, bid) + if pair in blast: + bl = blast[pair] + else: + oopts = get_overlap_opts(aid, bid, qreverse, outdir, opts) + o = overlap(oopts) + if not o: + continue + bl = o.blastline + + o = Overlap(bl, a.component_span, b.component_span, cutoff, qreverse=qreverse) + + if aid not in clrstore: + clrstore[aid] = CLR.from_agpline(a) + if bid not in clrstore: + clrstore[bid] = CLR.from_agpline(b) + + aclr, bclr = clrstore[aid], clrstore[bid] + + o.print_graphic() + if o.anneal(aclr, bclr): + newagp.delete_between(aid, bid, verbose=True) + + if o.otype == 2: # b ~ a + o = o.swapped + o.print_graphic() + if o.anneal(bclr, aclr): + newagp.switch_between(bid, aid, verbose=True) + newagp.delete_between(bid, aid, verbose=True) + + logger.debug("A total of {0} components with modified CLR.".format(len(clrstore))) + + for cid, c in clrstore.items(): + if c.is_valid: + continue + print("Remove {0}".format(c), file=sys.stderr) + newagp.convert_to_gap(cid, verbose=True) + + # Update all ranges that has modified clr + for a in newagp: + if a.is_gap: + continue + aid = a.component_id + if aid in clrstore: + c = clrstore[aid] + a.component_beg = c.start + a.component_end = c.end + + newagp.print_to_file(annealedagp) + tidyagp = tidy([annealedagp, contigs]) + + build([tidyagp, contigs, annealedfasta]) + return annealedfasta + + +def blast(args): + """ + %prog blast allfasta clonename + + Insert a component into agpfile by aligning to the best hit in pool and see + if they have good overlaps. + """ + from jcvi.apps.align import run_megablast + + p = OptionParser(blast.__doc__) + p.add_argument("-n", type=int, default=2, help="Take best N hits") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + allfasta, clonename = args + fastadir = "fasta" + infile = op.join(fastadir, clonename + ".fasta") + if not op.exists(infile): + entrez([clonename, "--skipcheck", "--outdir=" + fastadir]) + + outfile = "{0}.{1}.blast".format(clonename, allfasta.split(".")[0]) + run_megablast( + infile=infile, outfile=outfile, db=allfasta, pctid=GoodPct, hitlen=GoodOverlap + ) + + blasts = [BlastLine(x) for x in open(outfile)] + besthits = [] + for b in blasts: + if b.query.count("|") >= 3: + b.query = b.query.split("|")[3] + + if b.subject.count("|") >= 3: + b.subject = b.subject.split("|")[3] + + b.query = b.query.rsplit(".", 1)[0] + b.subject = b.subject.rsplit(".", 1)[0] + + if b.query == b.subject: + continue + + if b.subject not in besthits: + besthits.append(b.subject) + if len(besthits) == opts.n: + break + + for b in besthits: + overlap([clonename, b, "--dir=" + fastadir]) + + +def bes(args): + """ + %prog bes bacfasta clonename + + Use the clone name to download BES gss sequences from Genbank, map and then + visualize. + """ + from jcvi.apps.align import run_blat + + p = OptionParser(bes.__doc__) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bacfasta, clonename = args + + entrez([clonename, "--database=nucgss", "--skipcheck"]) + besfasta = clonename + ".fasta" + blatfile = clonename + ".bes.blat" + run_blat( + infile=besfasta, + outfile=blatfile, + db=bacfasta, + pctid=95, + hitlen=100, + cpus=opts.cpus, + ) + + aid, asize = next(Fasta(bacfasta).itersizes()) + + width = 50 + msg = "=" * width + msg += " " + aid + print(msg, file=sys.stderr) + + ratio = width * 1.0 / asize + _ = lambda x: int(round(x * ratio, 0)) + blasts = [BlastLine(x) for x in open(blatfile)] + for b in blasts: + if b.orientation == "+": + msg = " " * _(b.sstart) + "->" + else: + msg = " " * (_(b.sstop) - 2) + "<-" + msg += " " * (width - len(msg) + 2) + msg += b.query + if b.orientation == "+": + msg += " (hang={0})".format(b.sstart - 1) + else: + msg += " (hang={0})".format(asize - b.sstop) + + print(msg, file=sys.stderr) + + +def flip(args): + """ + %prog flip fastafile + + Go through each FASTA record, check against Genbank file and determines + whether or not to flip the sequence. This is useful before updates of the + sequences to make sure the same orientation is used. + """ + p = OptionParser(flip.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + outfastafile = fastafile.rsplit(".", 1)[0] + ".flipped.fasta" + fo = open(outfastafile, "w") + f = Fasta(fastafile, lazy=True) + for name, rec in f.iteritems_ordered(): + tmpfasta = "a.fasta" + fw = open(tmpfasta, "w") + SeqIO.write([rec], fw, "fasta") + fw.close() + + o = overlap([tmpfasta, name]) + if o.orientation == "-": + rec.seq = rec.seq.reverse_complement() + + SeqIO.write([rec], fo, "fasta") + cleanup(tmpfasta) + + +def batchoverlap(args): + """ + %prog batchoverlap pairs.txt outdir + + Check overlaps between pairs of sequences. + """ + p = OptionParser(batchoverlap.__doc__) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + pairsfile, outdir = args + fp = open(pairsfile) + cmds = [] + mkdir("overlaps") + for row in fp: + a, b = row.split()[:2] + oa = op.join(outdir, a + ".fa") + ob = op.join(outdir, b + ".fa") + cmd = "python -m jcvi.assembly.goldenpath overlap {0} {1}".format(oa, ob) + cmd += " -o overlaps/{0}_{1}.ov".format(a, b) + cmds.append(cmd) + + print("\n".join(cmds)) + + +def overlap(args): + """ + %prog overlap + + Check overlaps between two fasta records. The arguments can be genBank IDs + instead of FASTA files. In case of IDs, the sequences will be downloaded + first. + """ + from jcvi.formats.blast import chain_HSPs + + p = OptionParser(overlap.__doc__) + p.add_argument( + "--dir", + default=os.getcwd(), + help="Download sequences to dir", + ) + p.add_argument( + "--suffix", + default="fasta", + help="Suffix of the sequence file in dir", + ) + p.add_argument( + "--qreverse", + default=False, + action="store_true", + help="Reverse seq a", + ) + p.add_argument( + "--nochain", + default=False, + action="store_true", + help="Do not chain adjacent HSPs", + ) + p.set_align(pctid=GoodPct, hitlen=GoodOverlap, evalue=0.01) + p.set_outfile(outfile=None) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + afasta, bfasta = args + dir = opts.dir + chain = not opts.nochain + suffix = opts.suffix + evalue = opts.evalue + pctid = opts.pctid + hitlen = opts.hitlen + cutoff = Cutoff(pctid, hitlen) + + # Check first whether it is file or accession name + if not op.exists(afasta): + af = op.join(dir, ".".join((afasta, suffix))) + if not op.exists(af): # Check to avoid redownload + entrez([afasta, "--skipcheck", "--outdir=" + dir]) + afasta = af + + if not op.exists(bfasta): + bf = op.join(dir, ".".join((bfasta, suffix))) + if not op.exists(bf): + entrez([bfasta, "--skipcheck", "--outdir=" + dir]) + bfasta = bf + + assert op.exists(afasta) and op.exists(bfasta) + + cmd = "blastn -dust no" + cmd += " -query {0} -subject {1}".format(afasta, bfasta) + cmd += " -evalue {0} -outfmt 6 -perc_identity {1}".format(evalue, pctid) + + fp = popen(cmd) + hsps = fp.readlines() + + hsps = [BlastLine(x) for x in hsps] + hsps = [x for x in hsps if x.hitlen >= hitlen] + if chain: + logger.debug("Chain HSPs in the Blast output.") + dist = 2 * hitlen # Distance to chain the HSPs + hsps = chain_HSPs(hsps, xdist=dist, ydist=dist) + + if len(hsps) == 0: + print("No match found.", file=sys.stderr) + return None + + besthsp = hsps[0] + + aid, asize = next(Fasta(afasta).itersizes()) + bid, bsize = next(Fasta(bfasta).itersizes()) + o = Overlap(besthsp, asize, bsize, cutoff, qreverse=opts.qreverse) + o.print_graphic() + + if opts.outfile: + fw = must_open(opts.outfile, "w") + print(str(o), file=fw) + fw.close() + + return o + + +@lru_cache(maxsize=None) +def phase(accession): + gbdir = "gb" + gbfile = op.join(gbdir, accession + ".gb") + if not op.exists(gbfile): + entrez([accession, "--skipcheck", "--outdir=" + gbdir, "--format=gb"]) + rec = next(SeqIO.parse(gbfile, "gb")) + ph, keywords = get_phase(rec) + return ph, len(rec) + + +def check_certificate(certificatefile): + data = {} + if op.exists(certificatefile): + # This will make updates resume-able and backed-up + certificatefilebak = certificatefile + ".orig" + shutil.copy2(certificatefile, certificatefilebak) + + fp = open(certificatefile) + for row in fp: + atoms = row.split() + tag, aid, bid = atoms[0], atoms[4], atoms[5] + data[(tag, aid, bid)] = row.strip() + + return data + + +def certificate(args): + """ + %prog certificate tpffile certificatefile + + Generate certificate file for all overlaps in tpffile. tpffile can be + generated by jcvi.formats.agp.tpf(). + + North chr1 2 0 AC229737.8 telomere 58443 + South chr1 2 1 AC229737.8 AC202463.29 58443 37835 58443 + Non-terminal + + Each line describes a relationship between the current BAC and the + north/south BAC. First, "North/South" tag, then the chromosome, phases of + the two BACs, ids of the two BACs, the size and the overlap start-stop of + the CURRENT BAC, and orientation. Each BAC will have two lines in the + certificate file. + """ + p = OptionParser(certificate.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + tpffile, certificatefile = args + fastadir = "fasta" + + tpf = TPF(tpffile) + + data = check_certificate(certificatefile) + fw = must_open(certificatefile, "w") + for i, a in enumerate(tpf): + if a.is_gap: + continue + + aid = a.component_id + + af = op.join(fastadir, aid + ".fasta") + if not op.exists(af): # Check to avoid redownload + entrez([aid, "--skipcheck", "--outdir=" + fastadir]) + + north, south = tpf.getNorthSouthClone(i) + aphase, asize = phase(aid) + + for tag, p in (("North", north), ("South", south)): + if not p: # end of the chromosome + ov = "telomere\t{0}".format(asize) + elif p.isCloneGap: + bphase = "0" + ov = "{0}\t{1}".format(p.gap_type, asize) + else: + bid = p.component_id + bphase, bsize = phase(bid) + key = (tag, aid, bid) + if key in data: + print(data[key], file=fw) + continue + + ar = [aid, bid, "--dir=" + fastadir] + o = overlap(ar) + ov = o.certificateline if o else "{0}\t{1}\tNone".format(bid, asize) + + print( + "\t".join(str(x) for x in (tag, a.object, aphase, bphase, aid, ov)), + file=fw, + ) + fw.flush() + + +def neighbor(args): + """ + %prog neighbor agpfile componentID + + Check overlaps of a particular component in agpfile. + """ + p = OptionParser(neighbor.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + agpfile, componentID = args + fastadir = "fasta" + + cmd = "grep" + cmd += " --color -C2 {0} {1}".format(componentID, agpfile) + sh(cmd) + + agp = AGP(agpfile) + aorder = agp.order + if componentID not in aorder: + print( + "Record {0} not present in `{1}`.".format(componentID, agpfile), + file=sys.stderr, + ) + return + + i, c = aorder[componentID] + north, south = agp.getNorthSouthClone(i) + + if not north.isCloneGap: + ar = [north.component_id, componentID, "--dir=" + fastadir] + if north.orientation == "-": + ar += ["--qreverse"] + overlap(ar) + + if not south.isCloneGap: + ar = [componentID, south.component_id, "--dir=" + fastadir] + if c.orientation == "-": + ar += ["--qreverse"] + overlap(ar) + + +def agp(args): + """ + %prog agp tpffile certificatefile agpfile + + Build agpfile from overlap certificates. + + Tiling Path File (tpf) is a file that lists the component and the gaps. + It is a three-column file similar to below, also see jcvi.formats.agp.tpf(): + + telomere chr1 na + AC229737.8 chr1 + + AC202463.29 chr1 + + + Note: the orientation of the component is only used as a guide. If the + orientation is derivable from a terminal overlap, it will use it regardless + of what the tpf says. + + See jcvi.assembly.goldenpath.certificate() which generates a list of + certificates based on agpfile. At first, it seems counter-productive to + convert first agp to certificates then certificates back to agp. + + The certificates provide a way to edit the overlap information, so that the + agpfile can be corrected (without changing agpfile directly). + """ + from jcvi.formats.base import DictFile + + p = OptionParser(agp.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + tpffile, certificatefile, agpfile = args + orientationguide = DictFile(tpffile, valuepos=2) + cert = Certificate(certificatefile) + cert.write_AGP(agpfile, orientationguide=orientationguide) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/hic.py b/jcvi/assembly/hic.py new file mode 100644 index 00000000..0547ae15 --- /dev/null +++ b/jcvi/assembly/hic.py @@ -0,0 +1,1772 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Process Hi-C output into AGP for chromosomal-scale scaffolding. +""" +import array +import json +import math +import os +import os.path as op +import sys + +from collections import defaultdict +from functools import partial +from multiprocessing import Pool +from typing import List, Optional, Tuple + +import numpy as np + +from natsort import natsorted + +from ..algorithms.ec import GA_run, GA_setup +from ..algorithms.formula import outlier_cutoff +from ..algorithms.matrix import get_signs +from ..apps.base import ( + ActionDispatcher, + OptionParser, + backup, + iglob, + logger, + mkdir, + symlink, +) +from ..apps.grid import Jobs +from ..compara.synteny import check_beds, get_bed_filenames +from ..formats.agp import order_to_agp +from ..formats.base import LineFile, must_open +from ..formats.bed import Bed +from ..formats.blast import Blast +from ..formats.sizes import Sizes +from ..graphics.base import ( + markup, + normalize_axes, + plt, + plot_heatmap, + savefig, +) +from ..graphics.dotplot import dotplot +from ..utils.cbook import gene_name + +from .allmaps import make_movie + + +# Map orientations to ints +FF = {"+": 1, "-": -1, "?": 1} +RR = {"+": -1, "-": 1, "?": -1} +LB = 18 # Lower bound for golden_array() +UB = 29 # Upper bound for golden_array() +BB = UB - LB + 1 # Span for golden_array() +ACCEPT = "[green]ACCEPT" +REJECT = "[red]REJECT" +BINSIZE = 50000 + + +class ContigOrderingLine(object): + """Stores one line in the ContigOrdering file""" + + def __init__(self, line, sep="|"): + args = line.split() + self.contig_id = args[0] + self.contig_name = args[1].split(sep)[0] + contig_rc = args[2] + assert contig_rc in ("0", "1") + self.strand = "+" if contig_rc == "0" else "-" + self.orientation_score = args[3] + self.gap_size_after_contig = args[4] + + +class ContigOrdering(LineFile): + """ContigOrdering file as created by LACHESIS, one per chromosome group. + Header contains summary information per group, followed by list of contigs + with given ordering. + """ + + def __init__(self, filename): + super().__init__(filename) + fp = open(filename) + for row in fp: + if row[0] == "#": + continue + orderline = ContigOrderingLine(row) + self.append(orderline) + + def write_agp( + self, obj, sizes, fw=sys.stdout, gapsize=100, gaptype="contig", evidence="map" + ): + """Converts the ContigOrdering file into AGP format""" + contigorder = [(x.contig_name, x.strand) for x in self] + order_to_agp( + obj, + contigorder, + sizes, + fw, + gapsize=gapsize, + gaptype=gaptype, + evidence=evidence, + ) + + +class CLMFile: + """CLM file (modified) has the following format: + + tig00046211+ tig00063795+ 1 53173 + tig00046211+ tig00063795- 1 116050 + tig00046211- tig00063795+ 1 71155 + tig00046211- tig00063795- 1 134032 + tig00030676+ tig00077819+ 5 136407 87625 87625 106905 102218 + tig00030676+ tig00077819- 5 126178 152952 152952 35680 118923 + tig00030676- tig00077819+ 5 118651 91877 91877 209149 125906 + tig00030676- tig00077819- 5 108422 157204 157204 137924 142611 + """ + + def __init__(self, clmfile, skiprecover=False): + self.name = op.basename(clmfile).rsplit(".", 1)[0] + self.clmfile = clmfile + self.idsfile = clmfile.rsplit(".", 1)[0] + ".ids" + self.parse_ids(skiprecover) + self.parse_clm() + self.signs = None + + def parse_ids(self, skiprecover): + """IDS file has a list of contigs that need to be ordered. 'recover', + keyword, if available in the third column, is less confident. + + tig00015093 46912 + tig00035238 46779 recover + tig00030900 119291 + """ + idsfile = self.idsfile + logger.debug("Parse idsfile `%s`", idsfile) + fp = open(idsfile) + tigs = [] + for row in fp: + if row[0] == "#": # Header + continue + atoms = row.split() + tig, _, size = atoms + size = int(size) + if skiprecover and len(atoms) == 3 and atoms[2] == "recover": + continue + tigs.append((tig, size)) + + # Arrange contig names and sizes + _tigs, _sizes = zip(*tigs) + self.contigs = set(_tigs) + self.sizes = np.array(_sizes) + self.tig_to_size = dict(tigs) + + # Initially all contigs are considered active + self.active = set(_tigs) + + def parse_clm(self): + clmfile = self.clmfile + logger.debug("Parse clmfile `%s`", clmfile) + fp = open(clmfile) + contacts = {} + contacts_oriented = defaultdict(dict) + orientations = defaultdict(list) + for row in fp: + atoms = row.strip().split("\t") + assert len(atoms) == 3, "Malformed line `{}`".format(atoms) + abtig, links, dists = atoms + atig, btig = abtig.split() + at, ao = atig[:-1], atig[-1] + bt, bo = btig[:-1], btig[-1] + if at not in self.tig_to_size: + continue + if bt not in self.tig_to_size: + continue + dists = [int(x) for x in dists.split()] + contacts[(at, bt)] = len(dists) + gdists = golden_array(dists) + contacts_oriented[(at, bt)][(FF[ao], FF[bo])] = gdists + contacts_oriented[(bt, at)][(RR[bo], RR[ao])] = gdists + strandedness = 1 if ao == bo else -1 + orientations[(at, bt)].append((strandedness, dists)) + + self.contacts = contacts + self.contacts_oriented = contacts_oriented + # Preprocess the orientations dict + for (at, bt), dists in orientations.items(): + dists = [(s, d, hmean_int(d)) for (s, d) in dists] + strandedness, md, mh = min(dists, key=lambda x: x[-1]) + orientations[(at, bt)] = (strandedness, len(md), mh) + self.orientations = orientations + + def calculate_densities(self): + """ + Calculate the density of inter-contig links per base. Strong contigs + considered to have high level of inter-contig links in the current + partition. + """ + active = self.active + densities = defaultdict(int) + for (at, bt), links in self.contacts.items(): + if not (at in active and bt in active): + continue + densities[at] += links + densities[bt] += links + + logdensities = {} + for x, d in densities.items(): + s = self.tig_to_size[x] + logd = np.log10(d * 1.0 / min(s, 500000)) + logdensities[x] = logd + + return logdensities + + def report_active(self): + logger.debug("Active contigs: %d (length=%d)", self.N, self.active_sizes.sum()) + + def activate(self, tourfile=None, minsize=10000, backuptour=True): + """ + Select contigs in the current partition. This is the setup phase of the + algorithm, and supports two modes: + + - "de novo": This is useful at the start of a new run where no tours + available. We select the strong contigs that have significant number + of links to other contigs in the partition. We build a histogram of + link density (# links per bp) and remove the contigs that appear as + outliers. The orientations are derived from the matrix decomposition + of the pairwise strandedness matrix O. + + - "hotstart": This is useful when there was a past run, with a given + tourfile. In this case, the active contig list and orientations are + derived from the last tour in the file. + """ + if tourfile and (not op.exists(tourfile)): + logger.debug("Tourfile `%s` not found", tourfile) + tourfile = None + + if tourfile: + logger.debug("Importing tourfile `%s`", tourfile) + tour, tour_o = iter_last_tour(tourfile, self) + self.active = set(tour) + tig_to_idx = self.tig_to_idx + tour = [tig_to_idx[x] for x in tour] + signs = sorted([(x, FF[o]) for (x, o) in zip(tour, tour_o)]) + _, signs = zip(*signs) + self.signs = np.array(signs, dtype=int) + if backuptour: + backup(tourfile) + tour = array.array("i", tour) + else: + self.report_active() + while True: + logdensities = self.calculate_densities() + lb, ub = outlier_cutoff(list(logdensities.values())) + logger.debug("Log10(link_densities) ~ [%d, %d]", lb, ub) + remove = set( + x + for x, d in logdensities.items() + if (d < lb and self.tig_to_size[x] < minsize * 10) + ) + if remove: + self.active -= remove + self.report_active() + else: + break + + logger.debug("Remove contigs with size < %d", minsize) + self.active = set(x for x in self.active if self.tig_to_size[x] >= minsize) + tour = range(self.N) # Use starting (random) order otherwise + tour = array.array("i", tour) + + # Determine orientations + self.flip_all(tour) + + self.report_active() + self.tour = tour + + return tour + + def evaluate_tour_M(self, tour): + """Use Cythonized version to evaluate the score of a current tour""" + from .chic import score_evaluate_M + + return score_evaluate_M(tour, self.active_sizes, self.M) + + def evaluate_tour_P(self, tour): + """Use Cythonized version to evaluate the score of a current tour, + with better precision on the distance of the contigs. + """ + from .chic import score_evaluate_P + + return score_evaluate_P(tour, self.active_sizes, self.P) + + def evaluate_tour_Q(self, tour): + """Use Cythonized version to evaluate the score of a current tour, + taking orientation into consideration. This may be the most accurate + evaluation under the right condition. + """ + from .chic import score_evaluate_Q + + return score_evaluate_Q(tour, self.active_sizes, self.Q) + + def flip_log(self, method, score, score_flipped, tag): + logger.debug("%s: %d => %d %s", method, score, score_flipped, tag) + + def flip_all(self, tour): + """Initialize the orientations based on pairwise O matrix.""" + if self.signs is None: # First run + score = 0 + else: + old_signs = self.signs[: self.N] + (score,) = self.evaluate_tour_Q(tour) + + # Remember we cannot have ambiguous orientation code (0 or '?') here + self.signs = get_signs(self.O, validate=False, ambiguous=False) + (score_flipped,) = self.evaluate_tour_Q(tour) + if score_flipped >= score: + tag = ACCEPT + else: + self.signs = old_signs[:] + tag = REJECT + self.flip_log("FLIPALL", score, score_flipped, tag) + return tag + + def flip_whole(self, tour): + """Test flipping all contigs at the same time to see if score improves.""" + (score,) = self.evaluate_tour_Q(tour) + self.signs = -self.signs + (score_flipped,) = self.evaluate_tour_Q(tour) + if score_flipped > score: + tag = ACCEPT + else: + self.signs = -self.signs + tag = REJECT + self.flip_log("FLIPWHOLE", score, score_flipped, tag) + return tag + + def flip_one(self, tour): + """Test flipping every single contig sequentially to see if score + improves. + """ + n_accepts = n_rejects = 0 + any_tag_ACCEPT = False + for i, t in enumerate(tour): + if i == 0: + (score,) = self.evaluate_tour_Q(tour) + self.signs[t] = -self.signs[t] + (score_flipped,) = self.evaluate_tour_Q(tour) + if score_flipped > score: + n_accepts += 1 + tag = ACCEPT + else: + self.signs[t] = -self.signs[t] + n_rejects += 1 + tag = REJECT + self.flip_log( + "FLIPONE ({}/{})".format(i + 1, len(self.signs)), + score, + score_flipped, + tag, + ) + if tag == ACCEPT: + any_tag_ACCEPT = True + score = score_flipped + logger.debug("FLIPONE: N_accepts=%d N_rejects=%d", n_accepts, n_rejects) + return ACCEPT if any_tag_ACCEPT else REJECT + + def prune_tour(self, tour, cpus): + """Test deleting each contig and check the delta_score; tour here must + be an array of ints. + """ + while True: + (tour_score,) = self.evaluate_tour_M(tour) + logger.debug("Starting score: %d", tour_score) + active_sizes = self.active_sizes + M = self.M + args = [] + for i, t in enumerate(tour): + stour = tour[:i] + tour[i + 1 :] + args.append((t, stour, tour_score, active_sizes, M)) + + # Parallel run + p = Pool(processes=cpus) + results = list(p.imap(prune_tour_worker, args)) + assert len(tour) == len( + results + ), "Array size mismatch, tour({}) != results({})".format( + len(tour), len(results) + ) + + # Identify outliers + active_contigs = self.active_contigs + idx, log10deltas = zip(*results) + lb, ub = outlier_cutoff(log10deltas) + logger.debug("Log10(delta_score) ~ [%d, %d]", lb, ub) + + remove = set(active_contigs[x] for (x, d) in results if d < lb) + self.active -= remove + self.report_active() + + tig_to_idx = self.tig_to_idx + tour = [active_contigs[x] for x in tour] + tour = array.array("i", [tig_to_idx[x] for x in tour if x not in remove]) + if not remove: + break + + self.tour = tour + self.flip_all(tour) + + return tour + + @property + def active_contigs(self): + return list(self.active) + + @property + def active_sizes(self): + return np.array([self.tig_to_size[x] for x in self.active]) + + @property + def N(self): + return len(self.active) + + @property + def oo(self): + return range(self.N) + + @property + def tig_to_idx(self): + return dict((x, i) for (i, x) in enumerate(self.active)) + + @property + def M(self): + """ + Contact frequency matrix. Each cell contains how many inter-contig + links between i-th and j-th contigs. + """ + N = self.N + tig_to_idx = self.tig_to_idx + M = np.zeros((N, N), dtype=int) + for (at, bt), links in self.contacts.items(): + if not (at in tig_to_idx and bt in tig_to_idx): + continue + ai = tig_to_idx[at] + bi = tig_to_idx[bt] + M[ai, bi] = M[bi, ai] = links + return M + + @property + def O(self): + """ + Pairwise strandedness matrix. Each cell contains whether i-th and j-th + contig are the same orientation +1, or opposite orientation -1. + """ + N = self.N + tig_to_idx = self.tig_to_idx + O = np.zeros((N, N), dtype=int) + for (at, bt), (strandedness, md, mh) in self.orientations.items(): + if not (at in tig_to_idx and bt in tig_to_idx): + continue + ai = tig_to_idx[at] + bi = tig_to_idx[bt] + score = strandedness * md + O[ai, bi] = O[bi, ai] = score + return O + + @property + def P(self): + """ + Contact frequency matrix with better precision on distance between + contigs. In the matrix M, the distance is assumed to be the distance + between mid-points of two contigs. In matrix Q, however, we compute + harmonic mean of the links for the orientation configuration that is + shortest. This offers better precision for the distance between big + contigs. + """ + N = self.N + tig_to_idx = self.tig_to_idx + P = np.zeros((N, N, 2), dtype=int) + for (at, bt), (strandedness, md, mh) in self.orientations.items(): + if not (at in tig_to_idx and bt in tig_to_idx): + continue + ai = tig_to_idx[at] + bi = tig_to_idx[bt] + P[ai, bi, 0] = P[bi, ai, 0] = md + P[ai, bi, 1] = P[bi, ai, 1] = mh + return P + + @property + def Q(self): + """ + Contact frequency matrix when contigs are already oriented. This is s a + similar matrix as M, but rather than having the number of links in the + cell, it points to an array that has the actual distances. + """ + N = self.N + tig_to_idx = self.tig_to_idx + signs = self.signs + Q = np.ones((N, N, BB), dtype=int) * -1 # Use -1 as the sentinel + for (at, bt), k in self.contacts_oriented.items(): + if not (at in tig_to_idx and bt in tig_to_idx): + continue + ai = tig_to_idx[at] + bi = tig_to_idx[bt] + ao = signs[ai] + bo = signs[bi] + Q[ai, bi] = k[(ao, bo)] + return Q + + +def hmean_int(a, a_min=5778, a_max=1149851): + """Harmonic mean of an array, returns the closest int""" + from scipy.stats import hmean + + return int(round(hmean(np.clip(a, a_min, a_max)))) + + +def golden_array(a, phi=1.61803398875, lb=LB, ub=UB): + """Given list of ints, we aggregate similar values so that it becomes an + array of multiples of phi, where phi is the golden ratio. + + phi ^ 14 = 843 + phi ^ 33 = 7881196 + + So the array of counts go between 843 to 788196. One triva is that the + exponents of phi gets closer to integers as N grows. See interesting + discussion here: + + """ + counts = np.zeros(BB, dtype=int) + for x in a: + c = int(round(math.log(x, phi))) + if c < lb: + c = lb + if c > ub: + c = ub + counts[c - lb] += 1 + return counts + + +def prune_tour_worker(arg): + """Worker thread for CLMFile.prune_tour()""" + from .chic import score_evaluate_M + + t, stour, tour_score, active_sizes, M = arg + (stour_score,) = score_evaluate_M(stour, active_sizes, M) + delta_score = tour_score - stour_score + log10d = np.log10(delta_score) if delta_score > 1e-9 else -9 + return t, log10d + + +def main(): + + actions = ( + # LACHESIS output processing + ("agp", "generate AGP file based on LACHESIS output"), + ("score", "score the current LACHESIS CLM"), + # Simulation + ("simulate", "simulate CLM data"), + # Scaffolding + ("optimize", "optimize the contig order and orientation"), + ("density", "estimate link density of contigs"), + # Plotting + ("movieframe", "plot heatmap and synteny for a particular tour"), + ("movie", "plot heatmap optimization history in a tourfile"), + # Reference-based analytics + ("bam2mat", "convert bam file to .npy format used in plotting"), + ("mergemat", "combine counts from multiple .npy data files"), + ("heatmap", "plot heatmap based on .npy file"), + ("dist", "plot distance distribution based on .dist.npy file"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def fit_power_law(xs, ys): + """Fit power law distribution. + + See reference: + http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html + Assumes the form Y = A * X^B, returns + + Args: + xs ([int]): X vector + ys ([float64]): Y vector + + Returns: + (A, B), the coefficients + """ + import math + + sum_logXlogY, sum_logXlogX, sum_logX, sum_logY = 0, 0, 0, 0 + N = len(xs) + for i in range(N): + if not xs[i] or not ys[i]: + continue + logXs, logYs = math.log(xs[i]), math.log(ys[i]) + sum_logXlogY += logXs * logYs + sum_logXlogX += logXs * logXs + sum_logX += logXs + sum_logY += logYs + + B = (N * sum_logXlogY - sum_logX * sum_logY) / ( + N * sum_logXlogX - sum_logX * sum_logX + ) + A = math.exp((sum_logY - B * sum_logX) / N) + logger.debug("Power law Y = %.1f * X ^ %.4f", A, B) + label = "$Y={:.1f} \\times X^{{ {:.4f} }}$".format(A, B) + return A, B, label + + +def dist(args): + """ + %prog dist input.dist.npy genome.json + + Plot histogram based on .dist.npy data file. The .npy file stores an array + with link counts per dist bin, with the bin starts stored in the genome.json. + """ + import seaborn as sns + import pandas as pd + from jcvi.graphics.base import human_base_formatter, markup + + p = OptionParser(dist.__doc__) + p.add_argument("--title", help="Title of the histogram") + p.add_argument("--xmin", default=300, help="Minimum distance") + p.add_argument("--xmax", default=6000000, help="Maximum distance") + opts, args, iopts = p.set_image_options(args, figsize="6x6") + + if len(args) != 2: + sys.exit(not p.print_help()) + + npyfile, jsonfile = args + pf = npyfile.rsplit(".", 1)[0] + header = json.loads(open(jsonfile).read()) + distbin_starts = np.array(header["distbinstarts"], dtype="float64") + distbin_sizes = np.array(header["distbinsizes"], dtype="float64") + a = np.load(npyfile) + + xmin, xmax = opts.xmin, opts.xmax + df = pd.DataFrame() + xstart, xend = ( + np.searchsorted(distbin_starts, xmin), + np.searchsorted(distbin_starts, xmax), + ) + df["BinStart"] = distbin_starts[xstart:xend] + df["LinkDensity"] = a[xstart:xend] / distbin_sizes[xstart:xend] + ax = sns.lineplot( + x="BinStart", y="LinkDensity", data=df, lw=3, color="lightslategray" + ) + tx = df["BinStart"] + A, B, label = fit_power_law(tx, df["LinkDensity"]) + ty = A * tx**B + ax.plot(tx, ty, "r:", lw=3, label=label) + ax.legend() + if opts.title: + ax.set_title(markup(opts.title)) + ax.set_xlabel("Link size (bp)") + ax.set_ylabel(r"Density (\# of links per bp)") + ax.set_xscale("log", nonposx="clip") + ax.set_yscale("log", nonposy="clip") + ax.xaxis.set_major_formatter(human_base_formatter) + + image_name = pf + "." + opts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def generate_groups(groupsfile): + """Parse 'groups' file. The 'groups' file has the following format, + for example: + + seq1,seq2 b + seq1 g + seq2 g + + Args: + groupsfile (str): Path to the groups file + """ + data = [] + with open(groupsfile) as fp: + for row in fp: + seqids, color = row.split() + yield seqids, color + + +def read_matrix( + npyfile: str, + header: dict, + contig: Optional[str], + groups: List[Tuple[str, str]], + vmin: int, + vmax: int, + plot_breaks: bool, +): + """ + Read the matrix from the npy file and apply log transformation and thresholding. + """ + # Load the matrix + A = np.load(npyfile) + total_bins = header["total_bins"] + + # Select specific submatrix + if contig: + contig_start = header["starts"][contig] + contig_size = header["sizes"][contig] + contig_end = contig_start + contig_size + A = A[contig_start:contig_end, contig_start:contig_end] + else: + A = A[:total_bins, :total_bins] + + # Convert seqids to positions for each group + new_groups = [] + for seqids, color in groups: + seqids = seqids.split(",") + assert all( + x in header["starts"] for x in seqids + ), f"{seqids} contain ids not found in starts" + assert all( + x in header["sizes"] for x in seqids + ), f"{seqids} contain ids not found in sizes" + start = min(header["starts"][x] for x in seqids) + end = max(header["starts"][x] + header["sizes"][x] for x in seqids) + position_seqids = [] + for seqid in seqids: + seqid_start = header["starts"][seqid] + seqid_size = header["sizes"][seqid] + position_seqids.append((seqid_start + seqid_size / 2, seqid)) + new_groups.append((start, end, position_seqids, color)) + + # Several concerns in practice: + # The diagonal counts may be too strong, this can either be resolved by + # masking them. Or perform a log transform on the entire heatmap. + B = A.astype("float64") + B += 1.0 + B = np.log(B) + B[B < vmin] = vmin + B[B > vmax] = vmax + print(B) + logger.debug("Matrix log-transformation and thresholding (%d-%d) done", vmin, vmax) + + breaks = list(header["starts"].values()) + breaks += [total_bins] # This is actually discarded + breaks = sorted(breaks)[1:] + if contig or not plot_breaks: + breaks = [] + + return B, new_groups, breaks + + +def draw_hic_heatmap( + root, + ax, + npyfile: str, + jsonfile: str, + contig: Optional[str], + groups_file: str, + title: str, + vmin: int, + vmax: int, + plot_breaks: bool, +): + """ + Draw heatmap based on .npy file. The .npy file stores a square matrix with + bins of genome, and cells inside the matrix represent number of links + between bin i and bin j. The `genome.json` contains the offsets of each + contig/chr so that we know where to draw boundary lines, or extract per + contig/chromosome heatmap. + """ + groups = list(generate_groups(groups_file)) if groups_file else [] + + # Load contig/chromosome starts and sizes + header = json.loads(open(jsonfile, encoding="utf-8").read()) + resolution = header.get("resolution") + assert resolution is not None, "`resolution` not found in `{}`".format(jsonfile) + logger.debug("Resolution set to %d", resolution) + + B, new_groups, breaks = read_matrix( + npyfile, header, contig, groups, vmin, vmax, plot_breaks + ) + plot_heatmap(ax, B, breaks, groups=new_groups, binsize=resolution) + + # Title + if contig: + title += f"-{contig}" + root.text( + 0.5, + 0.96, + markup(title), + color="darkslategray", + ha="center", + va="center", + ) + + normalize_axes(root) + + +def heatmap(args): + """ + %prog heatmap input.npy genome.json + + Plot heatmap based on .npy data file. The .npy stores a square matrix with + bins of genome, and cells inside the matrix represent number of links + between bin i and bin j. The `genome.json` contains the offsets of each + contig/chr so that we know where to draw boundary lines, or extract per + contig/chromosome heatmap. + + If a 'groups' file is given (with --groups), we will draw squares on the + heatmap. The 'groups' file has the following format, for example: + + seq1,seq2 b + seq1 g + seq2 g + + This will first draw a square around seq1+seq2 with blue color, then seq1 + and seq2 individually with green color. + """ + p = OptionParser(heatmap.__doc__) + p.add_argument("--title", help="Title of the heatmap") + p.add_argument("--groups", help="Groups file, see doc") + p.add_argument("--vmin", default=1, type=int, help="Minimum value in the heatmap") + p.add_argument("--vmax", default=6, type=int, help="Maximum value in the heatmap") + p.add_argument("--chr", help="Plot this contig/chr only") + p.add_argument( + "--nobreaks", + default=False, + action="store_true", + help="Do not plot breaks (esp. if contigs are small)", + ) + opts, args, iopts = p.set_image_options( + args, figsize="11x11", style="white", cmap="coolwarm", dpi=120 + ) + + if len(args) != 2: + sys.exit(not p.print_help()) + + npyfile, jsonfile = args + # Canvas + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) # whole canvas + ax = fig.add_axes((0.05, 0.05, 0.9, 0.9)) # just the heatmap + + draw_hic_heatmap( + root, + ax, + npyfile, + jsonfile, + contig=opts.chr, + groups_file=opts.groups, + title=opts.title, + vmin=opts.vmin, + vmax=opts.vmax, + plot_breaks=not opts.nobreaks, + ) + + pf = npyfile.rsplit(".", 1)[0] + image_name = pf + "." + iopts.format + # macOS sometimes has way too verbose output + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def mergemat(args): + """ + %prog mergemat *.npy + + Combine counts from multiple .npy data files. + """ + p = OptionParser(mergemat.__doc__) + p.set_outfile(outfile="out") + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + npyfiles = args + A = np.load(npyfiles[0]) + logger.debug("Load `%s`: matrix of shape %s; sum=%d", npyfiles[0], A.shape, A.sum()) + for npyfile in npyfiles[1:]: + B = np.load(npyfile) + A += B + logger.debug("Load `%s`: sum=%d", npyfiles[0], A.sum()) + + pf = opts.outfile + np.save(pf, A) + logger.debug("Combined %d files into `%s.npy`", len(npyfiles), pf) + + +def get_seqstarts(bamfile, N, seqids=None): + """Go through the SQ headers and pull out all sequences with size + greater than the resolution settings, i.e. contains at least a few cells + """ + import pysam + + bamfile = pysam.AlignmentFile(bamfile, "rb") + seqsize = {} + for kv in bamfile.header["SQ"]: + if kv["LN"] < 10 * N: + continue + seqsize[kv["SN"]] = kv["LN"] // N + 1 + + allseqs = seqids or natsorted(seqsize.keys()) + allseqsizes = np.array([seqsize[x] for x in allseqs]) + seqstarts = np.cumsum(allseqsizes) + seqstarts = np.roll(seqstarts, 1) + total_bins = seqstarts[0] + seqstarts[0] = 0 + seqstarts = dict(zip(allseqs, seqstarts)) + seqid_sizes = dict((x, seqsize[x]) for x in allseqs) + + return seqstarts, seqid_sizes, total_bins + + +def get_distbins(start=100, bins=2000, ratio=1.01): + """Get exponentially sized bins for link length""" + b = np.ones(bins, dtype="float64") + b[0] = 100 + for i in range(1, bins): + b[i] = b[i - 1] * ratio + bins = np.around(b).astype(dtype=int) + binsizes = np.diff(bins) + return bins, binsizes + + +def bam2mat(args): + """ + %prog bam2mat input.bam + + Convert bam file to .mat format, which is simply numpy 2D array. Important + parameter is the resolution, which is the cell size. Small cell size lead + to more fine-grained heatmap, but leads to large .mat size and slower + plotting. + """ + import pysam + from jcvi.utils.cbook import percentage + + p = OptionParser(bam2mat.__doc__) + p.add_argument( + "--resolution", + default=500000, + type=int, + help="Resolution when counting the links", + ) + p.add_argument( + "--seqids", + help="Use a given seqids file, a single line with seqids joined by comma", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bamfilename,) = args + pf = bamfilename.rsplit(".", 1)[0] + N = opts.resolution + pf += f".resolution_{N}" + bins = 1500 # Distance distribution bins + minsize = 100 # Record distance if it is at least minsize + seqids = opts.seqids + seqids = ( + open(seqids).readline().strip().split(",") + if seqids and op.exists(seqids) + else None + ) + + seqstarts, seqsize, total_bins = get_seqstarts(bamfilename, N, seqids=seqids) + distbinstarts, distbinsizes = get_distbins(start=minsize, bins=bins) + + # Store the starts and sizes into a JSON file + jsonfile = pf + ".json" + fwjson = open(jsonfile, "w") + header = { + "starts": seqstarts, + "sizes": seqsize, + "total_bins": total_bins, + "distbinstarts": list(distbinstarts), + "distbinsizes": list(distbinsizes), + "resolution": N, + } + + # int64 will not be able to deserialize with Python 3 + # Here is a workaround: + # https://stackoverflow.com/questions/11942364/typeerror-integer-is-not-json-serializable-when-serializing-json-in-python + def default(o): + if isinstance(o, np.int64): + return int(o) + raise TypeError + + json.dump(header, fwjson, sort_keys=True, indent=4, default=default) + fwjson.close() + logger.debug("Contig bin starts written to `%s`", jsonfile) + + print(sorted(seqstarts.items(), key=lambda x: x[-1])) + logger.debug("Initialize matrix of size %dx%d", total_bins, total_bins) + A = np.zeros((total_bins, total_bins), dtype=int) + B = np.zeros(bins, dtype=int) + + # Find the bin ID of each read + def bin_number(chr, pos): + return seqstarts[chr] + pos // N + + def distbin_number(dist, start=minsize, ratio=1.01): + return int(round(math.log(dist * 1.0 / start, ratio))) + + bamfile = pysam.AlignmentFile(bamfilename, "rb") + # Check all reads, rules borrowed from LACHESIS + # https://github.com/shendurelab/LACHESIS/blob/master/src/GenomeLinkMatrix.cc#L1476 + j = k = 0 + for c in bamfile: + j += 1 + if j % 100000 == 0: + print("{} reads counted".format(j), file=sys.stderr) + + if c.is_qcfail and c.is_duplicate: + continue + if c.is_secondary and c.is_supplementary: + continue + if c.mapping_quality == 0: + continue + if not c.is_paired: + continue + if c.is_read2: # Take only one read + continue + + # pysam v0.8.3 does not support keyword reference_name + achr = bamfile.getrname(c.reference_id) + apos = c.reference_start + bchr = bamfile.getrname(c.next_reference_id) + bpos = c.next_reference_start + if achr not in seqstarts or bchr not in seqstarts: + continue + if achr == bchr: + dist = abs(apos - bpos) + if dist < minsize: + continue + db = distbin_number(dist) + B[db] += 1 + + abin, bbin = bin_number(achr, apos), bin_number(bchr, bpos) + A[abin, bbin] += 1 + if abin != bbin: + A[bbin, abin] += 1 + + k += 1 + + logger.debug("Total reads counted: %s", percentage(2 * k, j)) + bamfile.close() + np.save(pf, A) + logger.debug("Link counts written to `%s.npy`", pf) + np.save(pf + ".dist", B) + logger.debug("Link dists written to `%s.dist.npy`", pf) + + +def simulate(args): + """ + %prog simulate test + + Simulate CLM and IDS files with given names. + + The simulator assumes several distributions: + - Links are distributed uniformly across genome + - Log10(link_size) are distributed normally + - Genes are distributed uniformly + """ + p = OptionParser(simulate.__doc__) + p.add_argument("--genomesize", default=10000000, type=int, help="Genome size") + p.add_argument("--genes", default=1000, type=int, help="Number of genes") + p.add_argument("--contigs", default=100, type=int, help="Number of contigs") + p.add_argument("--coverage", default=10, type=int, help="Link coverage") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (pf,) = args + GenomeSize = opts.genomesize + Genes = opts.genes + Contigs = opts.contigs + Coverage = opts.coverage + PE = 500 + Links = int(GenomeSize * Coverage / PE) + + # Simulate the contig sizes that sum to GenomeSize + # See also: + # + (ContigSizes,) = np.random.dirichlet([1] * Contigs, 1) * GenomeSize + ContigSizes = np.array(np.round_(ContigSizes, decimals=0), dtype=int) + ContigStarts = np.zeros(Contigs, dtype=int) + ContigStarts[1:] = np.cumsum(ContigSizes)[:-1] + + # Write IDS file + idsfile = pf + ".ids" + fw = open(idsfile, "w") + print("#Contig\tRECounts\tLength", file=fw) + for i, s in enumerate(ContigSizes): + print("tig{:04d}\t{}\t{}".format(i, s // (4**4), s), file=fw) + fw.close() + + # Simulate the gene positions + GenePositions = np.sort(np.random.randint(0, GenomeSize, size=Genes)) + write_last_and_beds(pf, GenePositions, ContigStarts) + + # Simulate links, uniform start, with link distances following 1/x, where x + # is the distance between the links. As an approximation, we have links + # between [1e3, 1e7], so we map from uniform [1e-7, 1e-3] + LinkStarts = np.sort(np.random.randint(1, GenomeSize, size=Links)) + a, b = 1e-7, 1e-3 + LinkSizes = np.array( + np.round_(1 / ((b - a) * np.random.rand(Links) + a), decimals=0), dtype=int + ) + LinkEnds = LinkStarts + LinkSizes + + # Find link to contig membership + LinkStartContigs = np.searchsorted(ContigStarts, LinkStarts) - 1 + LinkEndContigs = np.searchsorted(ContigStarts, LinkEnds) - 1 + + # Extract inter-contig links + InterContigLinks = (LinkStartContigs != LinkEndContigs) & ( + LinkEndContigs != Contigs + ) + ICLinkStartContigs = LinkStartContigs[InterContigLinks] + ICLinkEndContigs = LinkEndContigs[InterContigLinks] + ICLinkStarts = LinkStarts[InterContigLinks] + ICLinkEnds = LinkEnds[InterContigLinks] + + # Write CLM file + write_clm( + pf, + ICLinkStartContigs, + ICLinkEndContigs, + ICLinkStarts, + ICLinkEnds, + ContigStarts, + ContigSizes, + ) + + +def write_last_and_beds(pf, GenePositions, ContigStarts): + """ + Write LAST file, query and subject BED files. + """ + qbedfile = pf + "tigs.bed" + sbedfile = pf + "chr.bed" + lastfile = "{}tigs.{}chr.last".format(pf, pf) + qbedfw = open(qbedfile, "w") + sbedfw = open(sbedfile, "w") + lastfw = open(lastfile, "w") + + GeneContigs = np.searchsorted(ContigStarts, GenePositions) - 1 + for i, (c, gstart) in enumerate(zip(GeneContigs, GenePositions)): + gene = "gene{:05d}".format(i) + tig = "tig{:04d}".format(c) + start = ContigStarts[c] + cstart = gstart - start + print("\t".join(str(x) for x in (tig, cstart, cstart + 1, gene)), file=qbedfw) + print( + "\t".join(str(x) for x in ("chr1", gstart, gstart + 1, gene)), file=sbedfw + ) + lastatoms = [gene, gene, 100] + [0] * 8 + [100] + print("\t".join(str(x) for x in lastatoms), file=lastfw) + + qbedfw.close() + sbedfw.close() + lastfw.close() + + +def write_clm( + pf, + ICLinkStartContigs, + ICLinkEndContigs, + ICLinkStarts, + ICLinkEnds, + ContigStarts, + ContigSizes, +): + """ + Write CLM file from simulated data. + """ + clm = defaultdict(list) + for start, end, linkstart, linkend in zip( + ICLinkStartContigs, ICLinkEndContigs, ICLinkStarts, ICLinkEnds + ): + start_a = ContigStarts[start] + start_b = start_a + ContigSizes[start] + end_a = ContigStarts[end] + end_b = end_a + ContigSizes[end] + if linkend >= end_b: + continue + clm[(start, end)].append( + (linkstart - start_a, start_b - linkstart, linkend - end_a, end_b - linkend) + ) + + clmfile = pf + ".clm" + fw = open(clmfile, "w") + + def format_array(a): + return [str(x) for x in sorted(a) if x > 0] + + for (start, end), links in sorted(clm.items()): + start = "tig{:04d}".format(start) + end = "tig{:04d}".format(end) + nlinks = len(links) + if not nlinks: + continue + ff = format_array([(b + c) for a, b, c, d in links]) + fr = format_array([(b + d) for a, b, c, d in links]) + rf = format_array([(a + c) for a, b, c, d in links]) + rr = format_array([(a + d) for a, b, c, d in links]) + print("{}+ {}+\t{}\t{}".format(start, end, nlinks, " ".join(ff)), file=fw) + print("{}+ {}-\t{}\t{}".format(start, end, nlinks, " ".join(fr)), file=fw) + print("{}- {}+\t{}\t{}".format(start, end, nlinks, " ".join(rf)), file=fw) + print("{}- {}-\t{}\t{}".format(start, end, nlinks, " ".join(rr)), file=fw) + fw.close() + + +def density(args): + """ + %prog density test.clm + + Estimate link density of contigs. + """ + p = OptionParser(density.__doc__) + p.add_argument( + "--save", + default=False, + action="store_true", + help="Write log densitites of contigs to file", + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (clmfile,) = args + clm = CLMFile(clmfile) + pf = clmfile.rsplit(".", 1)[0] + + if opts.save: + logdensities = clm.calculate_densities() + densityfile = pf + ".density" + fw = open(densityfile, "w") + for name, logd in logdensities.items(): + s = clm.tig_to_size[name] + print("\t".join(str(x) for x in (name, s, logd)), file=fw) + fw.close() + logger.debug("Density written to `%s`", densityfile) + + tourfile = clmfile.rsplit(".", 1)[0] + ".tour" + tour = clm.activate(tourfile=tourfile, backuptour=False) + clm.flip_all(tour) + clm.flip_whole(tour) + clm.flip_one(tour) + + +def optimize(args): + """ + %prog optimize test.clm + + Optimize the contig order and orientation, based on CLM file. + """ + p = OptionParser(optimize.__doc__) + p.add_argument( + "--skiprecover", + default=False, + action="store_true", + help="Do not import 'recover' contigs", + ) + p.add_argument( + "--startover", + default=False, + action="store_true", + help="Do not resume from existing tour file", + ) + p.add_argument("--skipGA", default=False, action="store_true", help="Skip GA step") + p.set_outfile(outfile=None) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (clmfile,) = args + startover = opts.startover + runGA = not opts.skipGA + cpus = opts.cpus + + # Load contact map + clm = CLMFile(clmfile, skiprecover=opts.skiprecover) + + tourfile = opts.outfile or clmfile.rsplit(".", 1)[0] + ".tour" + if startover: + tourfile = None + tour = clm.activate(tourfile=tourfile) + + fwtour = open(tourfile, "w") + # Store INIT tour + print_tour(fwtour, clm.tour, "INIT", clm.active_contigs, clm.oo, signs=clm.signs) + + if runGA: + for phase in range(1, 3): + tour = optimize_ordering(fwtour, clm, phase, cpus) + tour = clm.prune_tour(tour, cpus) + + # Flip orientations + phase = 1 + while True: + tag1, tag2 = optimize_orientations(fwtour, clm, phase, cpus) + if tag1 == REJECT and tag2 == REJECT: + logger.debug("Terminating ... no more %s", ACCEPT) + break + phase += 1 + + fwtour.close() + + +def optimize_ordering(fwtour, clm, phase, cpus): + """ + Optimize the ordering of contigs by Genetic Algorithm (GA). + """ + from .chic import score_evaluate_M + + # Prepare input files + tour_contigs = clm.active_contigs + tour_sizes = clm.active_sizes + tour_M = clm.M + tour = clm.tour + signs = clm.signs + oo = clm.oo + + def callback(tour, gen, phase, oo): + fitness = tour.fitness if hasattr(tour, "fitness") else None + label = "GA{}-{}".format(phase, gen) + if fitness: + fitness = "{0}".format(fitness).split(",")[0].replace("(", "") + label += "-" + fitness + if gen % 20 == 0: + print_tour(fwtour, tour, label, tour_contigs, oo, signs=signs) + return tour + + callbacki = partial(callback, phase=phase, oo=oo) + toolbox = GA_setup(tour) + toolbox.register("evaluate", score_evaluate_M, tour_sizes=tour_sizes, tour_M=tour_M) + tour, tour_fitness = GA_run( + toolbox, ngen=1000, npop=100, cpus=cpus, callback=callbacki + ) + clm.tour = tour + + return tour + + +def optimize_orientations(fwtour, clm, phase, cpus): + """ + Optimize the orientations of contigs by using heuristic flipping. + """ + # Prepare input files + tour_contigs = clm.active_contigs + tour = clm.tour + oo = clm.oo + + print_tour( + fwtour, tour, "FLIPALL{}".format(phase), tour_contigs, oo, signs=clm.signs + ) + tag1 = clm.flip_whole(tour) + print_tour( + fwtour, tour, "FLIPWHOLE{}".format(phase), tour_contigs, oo, signs=clm.signs + ) + tag2 = clm.flip_one(tour) + print_tour( + fwtour, tour, "FLIPONE{}".format(phase), tour_contigs, oo, signs=clm.signs + ) + + return tag1, tag2 + + +def prepare_synteny(tourfile, lastfile, odir, p, opts): + """ + Prepare synteny plots for movie(). + """ + qbedfile, sbedfile = get_bed_filenames(lastfile, p, opts) + qbedfile = op.abspath(qbedfile) + sbedfile = op.abspath(sbedfile) + + qbed = Bed(qbedfile, sorted=False) + contig_to_beds = dict(qbed.sub_beds()) + + # Create a separate directory for the subplots and movie + mkdir(odir, overwrite=True) + os.chdir(odir) + logger.debug("Change into subdir `%s`", odir) + + # Make anchorsfile + anchorsfile = ".".join(op.basename(lastfile).split(".", 2)[:2]) + ".anchors" + fw = open(anchorsfile, "w") + for b in Blast(lastfile): + print( + "\t".join((gene_name(b.query), gene_name(b.subject), str(int(b.score)))), + file=fw, + ) + fw.close() + + # Symlink sbed + symlink(sbedfile, op.basename(sbedfile)) + + return anchorsfile, qbedfile, contig_to_beds + + +def separate_tour_and_o(row): + """ + The tour line typically contains contig list like: + tig00044568+ tig00045748- tig00071055- tig00015093- tig00030900- + + This function separates the names from the orientations. + """ + tour = [] + tour_o = [] + for contig in row.split(): + if contig[-1] in ("+", "-", "?"): + tour.append(contig[:-1]) + tour_o.append(contig[-1]) + else: # Unoriented + tour.append(contig) + tour_o.append("?") + return tour, tour_o + + +def iter_last_tour(tourfile, clm): + """ + Extract last tour from tourfile. The clm instance is also passed in to see + if any contig is covered in the clm. + """ + row = open(tourfile).readlines()[-1] + _tour, _tour_o = separate_tour_and_o(row) + tour = [] + tour_o = [] + for tc, to in zip(_tour, _tour_o): + if tc not in clm.contigs: + logger.debug( + "Contig `%s` in file `%s` not found in `%s`", tc, tourfile, clm.idsfile + ) + continue + tour.append(tc) + tour_o.append(to) + return tour, tour_o + + +def iter_tours(tourfile, frames=1): + """ + Extract tours from tourfile. Tourfile contains a set of contig + configurations, generated at each iteration of the genetic algorithm. Each + configuration has two rows, first row contains iteration id and score, + second row contains list of contigs, separated by comma. + """ + fp = open(tourfile) + + i = 0 + for row in fp: + if row[0] == ">": + label = row[1:].strip() + if label.startswith("GA"): + pf, j, score = label.split("-", 2) + j = int(j) + else: + j = 0 + i += 1 + else: + if j % frames != 0: + continue + tour, tour_o = separate_tour_and_o(row) + yield i, label, tour, tour_o + + fp.close() + + +def movie(args): + """ + %prog movie test.tour test.clm ref.contigs.last + + Plot optimization history. + """ + p = OptionParser(movie.__doc__) + p.add_argument("--frames", default=500, type=int, help="Only plot every N frames") + p.add_argument( + "--engine", + default="ffmpeg", + choices=("ffmpeg", "gifsicle"), + help="Movie engine, output MP4 or GIF", + ) + p.set_beds() + opts, args, iopts = p.set_image_options( + args, figsize="16x8", style="white", cmap="coolwarm", format="png", dpi=300 + ) + + if len(args) != 3: + sys.exit(not p.print_help()) + + tourfile, clmfile, lastfile = args + tourfile = op.abspath(tourfile) + clmfile = op.abspath(clmfile) + lastfile = op.abspath(lastfile) + cwd = os.getcwd() + odir = op.basename(tourfile).rsplit(".", 1)[0] + "-movie" + anchorsfile, qbedfile, contig_to_beds = prepare_synteny( + tourfile, lastfile, odir, p, opts + ) + + args = [] + for i, label, tour, tour_o in iter_tours(tourfile, frames=opts.frames): + padi = "{:06d}".format(i) + # Make sure the anchorsfile and bedfile has the serial number in, + # otherwise parallelization may fail + a, b = op.basename(anchorsfile).split(".", 1) + ianchorsfile = a + "_" + padi + "." + b + symlink(anchorsfile, ianchorsfile) + + # Make BED file with new order + qb = Bed() + for contig, o in zip(tour, tour_o): + if contig not in contig_to_beds: + continue + bedlines = contig_to_beds[contig][:] + if o == "-": + bedlines.reverse() + for x in bedlines: + qb.append(x) + + a, b = op.basename(qbedfile).split(".", 1) + ibedfile = a + "_" + padi + "." + b + qb.print_to_file(ibedfile) + # Plot dot plot, but do not sort contigs by name (otherwise losing + # order) + image_name = padi + "." + iopts.format + + tour = ",".join(tour) + args.append( + [[tour, clmfile, ianchorsfile, "--outfile", image_name, "--label", label]] + ) + + Jobs(movieframe, args).run() + + os.chdir(cwd) + make_movie(odir, odir, engine=opts.engine, format=iopts.format) + + +def score(args): + """ + %prog score main_results/ cached_data/ contigsfasta + + Score the current LACHESIS CLM. + """ + p = OptionParser(score.__doc__) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + mdir, cdir, contigsfasta = args + orderingfiles = natsorted(iglob(mdir, "*.ordering")) + sizes = Sizes(contigsfasta) + contig_names = list(sizes.iter_names()) + contig_ids = dict((name, i) for (i, name) in enumerate(contig_names)) + + oo = [] + # Load contact matrix + glm = op.join(cdir, "all.GLM") + N = len(contig_ids) + M = np.zeros((N, N), dtype=int) + fp = open(glm) + for row in fp: + if row[0] == "#": + continue + x, y, z = row.split() + if x == "X": + continue + M[int(x), int(y)] = int(z) + + fwtour = open("tour", "w") + + def callback(tour, gen, oo): + fitness = tour.fitness if hasattr(tour, "fitness") else None + label = "GA-{0}".format(gen) + if fitness: + fitness = "{0}".format(fitness).split(",")[0].replace("(", "") + label += "-" + fitness + print_tour(fwtour, tour, label, contig_names, oo) + return tour + + for ofile in orderingfiles: + co = ContigOrdering(ofile) + for x in co: + contig_id = contig_ids[x.contig_name] + oo.append(contig_id) + pf = op.basename(ofile).split(".")[0] + print(pf) + print(oo) + + tour, tour_sizes, tour_M = prepare_ec(oo, sizes, M) + # Store INIT tour + print_tour(fwtour, tour, "INIT", contig_names, oo) + + # Faster Cython version for evaluation + from .chic import score_evaluate_M + + callbacki = partial(callback, oo=oo) + toolbox = GA_setup(tour) + toolbox.register( + "evaluate", score_evaluate_M, tour_sizes=tour_sizes, tour_M=tour_M + ) + tour, tour.fitness = GA_run( + toolbox, npop=100, cpus=opts.cpus, callback=callbacki + ) + print(tour, tour.fitness) + break + + fwtour.close() + + +def print_tour(fwtour, tour, label, contig_names, oo, signs=None): + print(">" + label, file=fwtour) + if signs is not None: + contig_o = [] + for x in tour: + idx = oo[x] + sign = {1: "+", 0: "?", -1: "-"}[signs[idx]] + contig_o.append(contig_names[idx] + sign) + print(" ".join(contig_o), file=fwtour) + else: + print(" ".join(contig_names[oo[x]] for x in tour), file=fwtour) + + +def prepare_ec(oo, sizes, M): + """ + This prepares EC and converts from contig_id to an index. + """ + tour = range(len(oo)) + tour_sizes = np.array([sizes.sizes[x] for x in oo]) + tour_M = M[oo, :][:, oo] + return tour, tour_sizes, tour_M + + +def score_evaluate(tour, tour_sizes=None, tour_M=None): + """SLOW python version of the evaluation function. For benchmarking + purposes only. Do not use in production. + """ + sizes_oo = np.array([tour_sizes[x] for x in tour]) + sizes_cum = np.cumsum(sizes_oo) - sizes_oo / 2 + s = 0 + size = len(tour) + for ia in range(size): + a = tour[ia] + for ib in range(ia + 1, size): + b = tour[ib] + links = tour_M[a, b] + dist = sizes_cum[ib] - sizes_cum[ia] + if dist > 1e7: + break + s += links * 1.0 / dist + return (s,) + + +def movieframe(args): + """ + %prog movieframe tour test.clm contigs.ref.anchors + + Draw heatmap and synteny in the same plot. + """ + p = OptionParser(movieframe.__doc__) + p.add_argument("--label", help="Figure title") + p.set_beds() + p.set_outfile(outfile=None) + opts, args, iopts = p.set_image_options( + args, figsize="16x8", style="white", cmap="coolwarm", format="png", dpi=120 + ) + + if len(args) != 3: + sys.exit(not p.print_help()) + + tour, clmfile, anchorsfile = args + tour = tour.split(",") + image_name = opts.outfile or ("movieframe." + iopts.format) + label = opts.label or op.basename(image_name).rsplit(".", 1)[0] + + clm = CLMFile(clmfile) + totalbins, bins, breaks = make_bins(tour, clm.tig_to_size) + M = read_clm(clm, totalbins, bins) + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) # whole canvas + ax1 = fig.add_axes((0.05, 0.1, 0.4, 0.8)) # heatmap + ax2 = fig.add_axes((0.55, 0.1, 0.4, 0.8)) # dot plot + ax2_root = fig.add_axes((0.5, 0, 0.5, 1)) # dot plot canvas + + # Left axis: heatmap + plot_heatmap(ax1, M, breaks, binsize=BINSIZE) + + # Right axis: synteny + qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts, sorted=False) + dotplot(anchorsfile, qbed, sbed, fig, ax2_root, ax2, sep=False, title="") + + root.text(0.5, 0.98, clm.name, color="g", ha="center", va="center") + root.text(0.5, 0.95, label, color="darkslategray", ha="center", va="center") + normalize_axes(root) + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def make_bins(tour, sizes): + breaks = [] + start = 0 + bins = {} + for x in tour: + size = sizes[x] + end = start + int(round(size * 1.0 / BINSIZE)) + bins[x] = (start, end) + start = end + breaks.append(start) + + totalbins = start + return totalbins, bins, breaks + + +def read_clm(clm, totalbins, bins): + M = np.zeros((totalbins, totalbins)) + for (x, y), z in clm.contacts.items(): + if x not in bins or y not in bins: + continue + xstart, xend = bins[x] + ystart, yend = bins[y] + M[xstart:xend, ystart:yend] = z + M[ystart:yend, xstart:xend] = z + + M = np.log10(M + 1) + return M + + +def agp(args): + """ + %prog agp main_results/ contigs.fasta + + Generate AGP file based on LACHESIS output. + """ + p = OptionParser(agp.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + odir, contigsfasta = args + fwagp = must_open(opts.outfile, "w") + orderingfiles = natsorted(iglob(odir, "*.ordering")) + sizes = Sizes(contigsfasta).mapping + contigs = set(sizes.keys()) + anchored = set() + + for ofile in orderingfiles: + co = ContigOrdering(ofile) + anchored |= set([x.contig_name for x in co]) + obj = op.basename(ofile).split(".")[0] + co.write_agp(obj, sizes, fwagp) + + singletons = contigs - anchored + logger.debug("Anchored: %d, Singletons: %d", len(anchored), len(singletons)) + + for s in natsorted(singletons): + order_to_agp(s, [(s, "?")], sizes, fwagp) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/kmer.py b/jcvi/assembly/kmer.py new file mode 100644 index 00000000..f50d69ca --- /dev/null +++ b/jcvi/assembly/kmer.py @@ -0,0 +1,1410 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Deals with K-mers and K-mer distribution from reads or genome +""" +import os.path as op +import sys +import math + +from collections import defaultdict +from typing import List + +import numpy as np +from more_itertools import chunked + +from ..apps.grid import MakeManager +from ..apps.base import ( + ActionDispatcher, + OptionParser, + PIPE, + Popen, + logger, + need_update, + sh, +) +from ..formats.fasta import Fasta +from ..formats.base import BaseFile, must_open, get_number +from ..graphics.base import ( + adjust_spines, + asciiplot, + markup, + normalize_axes, + panel_labels, + plt, + savefig, + set_human_axis, + set_ticklabels_helvetica, + write_messages, +) +from ..utils.cbook import thousands, percentage + +from .automaton import iter_project + + +KMERYL, KSOAP, KALLPATHS = range(3) + + +class KmerSpectrum(BaseFile): + def __init__(self, histfile): + super().__init__(histfile) + self.load_data(histfile) + + def load_data(self, histfile): + self.data = [] + self.totalKmers = 0 + self.hist = {} + kformat = self.guess_format(histfile) + kformats = ("Meryl", "Soap", "AllPaths") + logger.debug("Guessed format: %s", kformats[kformat]) + + fp = open(histfile) + for rowno, row in enumerate(fp): + if row[0] == "#": + continue + if kformat == KSOAP: + K = rowno + 1 + counts = int(row.strip()) + else: # meryl histogram + K, counts = row.split()[:2] + K, counts = int(K), int(counts) + + Kcounts = K * counts + self.totalKmers += Kcounts + self.hist[K] = Kcounts + self.data.append((K, counts)) + + def guess_format(self, histfile): + # Guess the format of the Kmer histogram + fp = open(histfile) + for row in fp: + if row.startswith("# 1:"): + return KALLPATHS + if len(row.split()) == 1: + return KSOAP + return KMERYL + + def get_xy(self, vmin=1, vmax=100): + self.counts = sorted((a, b) for a, b in self.hist.items() if vmin <= a <= vmax) + return zip(*self.counts) + + def analyze(self, K=23, maxiter=100, method="nbinom"): + """Analyze K-mer histogram. + + Args: + K (int, optional): K-mer size. Defaults to 23. + maxiter (int): Iterations to run. Defaults to 100. + method (str, optional): Method to use, either 'nbinom' or + 'allpaths'. Defaults to "nbinom". + + Returns: + A dictionary containing info for annotating the plot. analyze() also + sets the following properties: + - lambda_: Main peak + - repetitive: Repeats message + - snprate: SNP rate message + """ + if method == "nbinom": + return self.analyze_nbinom(K=K, maxiter=maxiter) + return self.analyze_allpaths(K=K) + + def analyze_nbinom(self, K=23, maxiter=100): + """Analyze the K-mer histogram using negative binomial distribution. + + Args: + K (int, optional): K-mer size used when generating the histogram. Defaults to 23. + """ + from scipy.stats import nbinom + from scipy.optimize import minimize_scalar + from functools import lru_cache + + method, xopt = "bounded", "xatol" + MAX_1CN_SIZE = 1e10 + MAX_OPTIMIZED_SIZE = 9.9e9 + + # Generate bins for the decomposed negative binomial distributions + bins = [ + (i, i) for i in range(1, 9) + ] # The first 8 CN are critical often determines ploidy + for i in (8, 16, 32, 64, 128, 256, 512): # 14 geometricly sized bins + a, b = i + 1, int(round(i * 2**0.5)) + bins.append((a, b)) + a, b = b + 1, i * 2 + bins.append((a, b)) + + # Convert histogram to np array so we can index by CN + kf_ceil = max([cov for cov, _ in self.data]) + N = kf_ceil + 1 + hist = np.zeros(N, dtype=int) + for cov, count in self.data: + hist[cov] = count + + # min1: find first minimum + _kf_min1 = 5 + while ( + _kf_min1 - 1 >= 2 + and hist[_kf_min1 - 1] * (_kf_min1 - 1) < hist[_kf_min1] * _kf_min1 + ): + _kf_min1 -= 1 + while ( + _kf_min1 <= kf_ceil + and hist[_kf_min1 + 1] * (_kf_min1 + 1) < hist[_kf_min1] * _kf_min1 + ): + _kf_min1 += 1 + + # max2: find absolute maximum mx2 above first minimum min1 + _kf_max2 = _kf_min1 + for kf in range(_kf_min1 + 1, int(0.8 * kf_ceil)): + if hist[kf] * kf > hist[_kf_max2] * _kf_max2: + _kf_max2 = kf + + # Discard the last entry as that is usually an inflated number + hist = hist[:-1] + kf_range = np.arange(_kf_min1, len(hist), dtype=int) + P = hist[kf_range] * kf_range # Target distribution + print("==> Start nbinom method on range ({}, {})".format(_kf_min1, len(hist))) + + # Below is the optimization schemes, we optimize one variable at a time + @lru_cache(maxsize=None) + def nbinom_pmf_range(lambda_: int, rho: int, bin_id: int): + stacked = np.zeros(len(kf_range), dtype=np.float64) + lambda_ /= 100 # 2-digit precision + rho /= 100 # 2-digit precision + n = lambda_ / (rho - 1) + p = 1 / rho + start, end = bins[bin_id] + for i in range(start, end + 1): + stacked += nbinom.pmf(kf_range, n * i, p) + return stacked + + def generative_model(G, lambda_, rho): + stacked = np.zeros(len(kf_range), dtype=np.float64) + lambda_ = int(round(lambda_ * 100)) + rho = int(round(rho * 100)) + for bin_id, g in enumerate(G): + stacked += g * nbinom_pmf_range(lambda_, rho, bin_id) + stacked *= kf_range + return stacked + + def func(lambda_, rho, G): + stacked = generative_model(G, lambda_, rho) + return np.sum((P - stacked) ** 2) # L2 norm + + def optimize_func(lambda_, rho, G): + # Iterate over all G + for i, g in enumerate(G): + G_i = optimize_func_Gi(lambda_, rho, G, i) + if ( + not 1 < G_i < MAX_OPTIMIZED_SIZE + ): # Optimizer did not optimize this G_i + break + # Also remove the last bin since it is subject to marginal effect + G[i - 1] = 0 + lambda_ = optimize_func_lambda_(lambda_, rho, G) + rho = optimize_func_rho(lambda_, rho, G) + score = func(lambda_, rho, G) + return lambda_, rho, G, score + + def optimize_func_lambda_(lambda_, rho, G): + def f(arg): + return func(arg, rho, G) + + res = minimize_scalar( + f, bounds=(_kf_min1, 100), method=method, options={xopt: 0.01} + ) + return res.x + + def optimize_func_rho(lambda_, rho, G): + def f(arg): + return func(lambda_, arg, G) + + res = minimize_scalar( + f, bounds=(1.001, 5), method=method, options={xopt: 0.01} + ) + return res.x + + def optimize_func_Gi(lambda_, rho, G, i): + # Iterate a single G_i + def f(arg): + G[i] = arg + return func(lambda_, rho, G) + + res = minimize_scalar( + f, bounds=(0, MAX_1CN_SIZE), method=method, options={xopt: 100} + ) + return res.x + + def run_optimization(termination=0.999, maxiter=100): + ll, rr, GG = l0, r0, G0 + prev_score = np.inf + for i in range(maxiter): + print("Iteration", i + 1, file=sys.stderr) + ll, rr, GG, score = optimize_func(ll, rr, GG) + if score / prev_score > termination: + break + prev_score = score + if i % 10 == 0: + print(ll, rr, GG, score, file=sys.stderr) + print("Success!", file=sys.stderr) + # Remove bogus values that are close to the bounds + final_GG = [g for g in GG if 1 < g < MAX_OPTIMIZED_SIZE] + return ll, rr, final_GG + + # Optimization - very slow + G0 = np.zeros(len(bins)) + l0 = _kf_max2 + r0 = 1.5 + print(l0, r0, G0, file=sys.stderr) + ll, rr, GG = run_optimization(maxiter=maxiter) + print(ll, rr, GG, file=sys.stderr) + + # Ready for genome summary + m = f"\n==> Kmer (K={K}) Spectrum Analysis\n" + + genome_size = int(round(self.totalKmers / ll)) + inferred_genome_size = 0 + for i, g in enumerate(GG): + start, end = bins[i] + mid = (start + end) / 2 + inferred_genome_size += g * mid * (end - start + 1) + inferred_genome_size = int(round(inferred_genome_size)) + genome_size = max(genome_size, inferred_genome_size) + m += f"Genome size estimate = {thousands(genome_size)}\n" + copy_series = [] + copy_messages = [] + for i, g in enumerate(GG): + start, end = bins[i] + mid = (start + end) / 2 + copy_num = start if start == end else "{}-{}".format(start, end) + g_copies = int(round(g * mid * (end - start + 1))) + copy_series.append((mid, copy_num, g_copies, g)) + copy_message = f"CN {copy_num}: {g_copies / 1e6:.1f} Mb ({ g_copies * 100 / genome_size:.1f} %)" + copy_messages.append(copy_message) + m += copy_message + "\n" + + if genome_size > inferred_genome_size: + g_copies = genome_size - inferred_genome_size + copy_num = "{}+".format(end + 1) + copy_series.append((end + 1, copy_num, g_copies, g_copies / (end + 1))) + m += f"CN {copy_num}: {g_copies / 1e6:.1f} Mb ({ g_copies * 100 / genome_size:.1f} %)\n" + + # Determine ploidy + def determine_ploidy(copy_series, threshold=0.15): + counts_so_far = 1 + ploidy_so_far = 0 + for mid, _, g_copies, _ in copy_series: + if g_copies / counts_so_far < threshold: + break + counts_so_far += g_copies + ploidy_so_far = mid + return int(ploidy_so_far) + + ploidy = determine_ploidy(copy_series) + self.ploidy = ploidy + self.ploidy_message = f"Ploidy: {ploidy}" + m += self.ploidy_message + "\n" + self.copy_messages = copy_messages[:ploidy] + + # Repeat content + def calc_repeats(copy_series, ploidy, genome_size): + unique = 0 + for mid, _, g_copies, _ in copy_series: + if mid <= ploidy: + unique += g_copies + else: + break + return 1 - unique / genome_size + + repeats = calc_repeats(copy_series, ploidy, genome_size) + self.repetitive = f"Repeats: {repeats * 100:.1f} %" + m += self.repetitive + "\n" + + # SNP rate + def calc_snp_rate(copy_series, ploidy, genome_size, K): + # We can calculate the SNP rate s, assuming K-mer of length K: + # s = 1-(1-L/G)^(1/K) + # L: # of unique K-mers under 'het' peak + # G: genome size + # K: K-mer length + L = 0 + for mid, copy_num, g_copies, g in copy_series: + if mid < ploidy: + L += g + else: + break + return 1 - (1 - L / genome_size) ** (1 / K) + + snp_rate = calc_snp_rate(copy_series, ploidy, genome_size, K) + self.snprate = f"SNP rate: {snp_rate * 100:.2f} %" + m += self.snprate + "\n" + print(m, file=sys.stderr) + + self.lambda_ = ll + return { + "generative_model": generative_model, + "Gbins": GG, + "lambda": ll, + "rho": rr, + "kf_range": kf_range, + } + + def analyze_allpaths(self, ploidy=2, K=23, covmax=1000000): + """ + Analyze Kmer spectrum, calculations derived from + allpathslg/src/kmers/KmerSpectra.cc + """ + from math import sqrt + + data = self.data + kf_ceil = max(K for (K, c) in data) + if kf_ceil > covmax: + exceeds = sum(1 for (K, c) in data if K > covmax) + logger.debug( + "A total of %d distinct K-mers appear > %d times. Ignored ...", + exceeds, + covmax, + ) + kf_ceil = covmax + + nkf = kf_ceil + 1 + a = [0] * nkf + for kf, c in data: + if kf > kf_ceil: + continue + a[kf] = c + + ndk = a # number of distinct kmers + nk = [k * c for k, c in enumerate(a)] # number of kmers + cndk = [0] * nkf # cumulative number of distinct kmers + cnk = [0] * nkf # cumulative number of kmers + for kf in range(1, nkf): + cndk[kf] = cndk[kf - 1] + 0.5 * (ndk[kf - 1] + ndk[kf]) + cnk[kf] = cnk[kf - 1] + 0.5 * (nk[kf - 1] + nk[kf]) + + # Separate kmer spectrum in 5 regions based on the kf + # 1 ... kf_min1 : bad kmers with low frequency + # kf_min1 ... kf_min2 : good kmers CN = 1/2 (SNPs) + # kf_min2 ... kf_min3 : good kmers CN = 1 + # kf_min3 ... kf_hi : good kmers CN > 1 (repetitive) + # kf_hi ... inf : bad kmers with high frequency + + # min1: find first minimum + _kf_min1 = 10 + while _kf_min1 - 1 >= 2 and nk[_kf_min1 - 1] < nk[_kf_min1]: + _kf_min1 -= 1 + while _kf_min1 <= kf_ceil and nk[_kf_min1 + 1] < nk[_kf_min1]: + _kf_min1 += 1 + + # max2: find absolute maximum mx2 above first minimum min1 + _kf_max2 = _kf_min1 + for kf in range(_kf_min1 + 1, int(0.8 * kf_ceil)): + if nk[kf] > nk[_kf_max2]: + _kf_max2 = kf + + # max2: resetting max2 for cases of very high polymorphism + if ploidy == 2: + ndk_half = ndk[_kf_max2 // 2] + ndk_double = ndk[_kf_max2 * 2] + if ndk_double > ndk_half: + _kf_max2 *= 2 + + # max1: SNPs local maximum max1 as half global maximum max2 + _kf_max1 = _kf_max2 // 2 + + # min2: SNPs local minimum min2 between max1 and max2 + _kf_min2 = ( + _kf_max1 + * (2 * ndk[_kf_max1] + ndk[_kf_max2]) + // (ndk[_kf_max1] + ndk[_kf_max2]) + ) + + # min1: refine between min1 and max2/2 + for kf in range(_kf_min1 + 1, _kf_max1): + if nk[kf] < nk[_kf_min1]: + _kf_min1 = kf + + # min3: not a minimum, really. upper edge of main peak + _kf_min3 = _kf_max2 * 3 // 2 + + print("kfs:", _kf_min1, _kf_max1, _kf_min2, _kf_max2, _kf_min3, file=sys.stderr) + self.min1 = _kf_min1 + self.max1 = _kf_max1 + self.min2 = _kf_min2 + self.max2 = _kf_max2 + self.min3 = _kf_min3 + self.lambda_ = self.max2 # Main peak + + # Define maximum kf above which we neglect data + _kf_hi = ( + _kf_max2 * sqrt(4 * ndk[2 * _kf_max2] * _kf_max2) + if 2 * _kf_max2 < len(ndk) + else _kf_max2 * sqrt(4 * ndk[len(ndk) - 1] * _kf_max2) + ) + _kf_hi = int(_kf_hi) + + if _kf_hi > kf_ceil: + _kf_hi = kf_ceil + + _nk_total = cnk[len(cnk) - 1] + _nk_bad_low_kf = cnk[_kf_min1] + _nk_good_uniq = cnk[_kf_min3] - cnk[_kf_min2] + _nk_bad_high_kf = _nk_total - cnk[_kf_hi] + _ndk_good_snp = cndk[_kf_min2] - cndk[_kf_min1] + _ndk_good_uniq = cndk[_kf_min3] - cndk[_kf_min2] + + # kmer coverage C_k + _kf_ave_uniq = _nk_good_uniq * 1.0 / _ndk_good_uniq + _genome_size = (_nk_total - _nk_bad_low_kf - _nk_bad_high_kf) / _kf_ave_uniq + _genome_size_unique = _ndk_good_uniq + _ndk_good_snp / 2 + _genome_size_repetitive = _genome_size - _genome_size_unique + _coverage = _nk_total / _genome_size if _genome_size else 0 + + # SNP rate estimation, assumes uniform distribution of SNPs over the + # genome and accounts for the reduction in SNP kmer counts when + # polymorphism is very high + if ploidy == 2: + _d_SNP = ( + 1.0 / (1.0 - (1.0 - 0.5 * _ndk_good_snp / _genome_size) ** (1.0 / K)) + if _ndk_good_snp > 0 + else 1000000 + ) + + G = int(_genome_size) + G1 = int(_genome_size_unique) + GR = int(_genome_size_repetitive) + coverage = int(_coverage) + + m = f"Kmer (K={K}) Spectrum Analysis\n" + m += f"Genome size estimate = {thousands(G)}\n" + m += f"Genome size estimate CN = 1 = {thousands(G1)} ({percentage(G1, G)})\n" + m += f"Genome size estimate CN > 1 = {thousands(GR)} ({percentage(GR, G)})\n" + m += f"Coverage estimate: {coverage} x\n" + self.repetitive = f"Repeats: {GR * 100 // G} %" + + if ploidy == 2: + d_SNP = int(_d_SNP) + self.snprate = f"SNP rate ~= 1/{d_SNP}" + else: + self.snprate = f"SNP rate not computed (Ploidy = {ploidy})" + m += self.snprate + "\n" + + self.genomesize = int(round(self.totalKmers * 1.0 / self.max2)) + + print(m, file=sys.stderr) + return {} + + +class KMCComplex(object): + def __init__(self, indices): + self.indices = indices + + def write( + self, + outfile: str, + action: str = "union", + ci_in: int = 0, + ci_out: int = 0, + batch: int = 0, + ): + assert action in ("union", "intersect") + op = " + sum " if action == "union" else " * " + mm = MakeManager() + if batch > 1: + filename = outfile + ".{}.def" + # Divide indices into batches + batches = [] + batchsize = (len(self.indices) + batch - 1) // batch + logger.debug("Use batchsize of %d", batchsize) + for i, indices in enumerate(chunked(self.indices, batchsize)): + filename_i = filename.format(i + 1) + outfile_i = outfile + ".{}".format(i + 1) + self.write_definitions( + filename_i, indices, outfile_i, op, ci_in=ci_in, ci_out=0 + ) + cmd = "kmc_tools complex {}".format(filename_i) + outfile_suf = outfile_i + ".kmc_suf" + mm.add(indices, outfile_suf, cmd) + batches.append(outfile_suf) + else: + batches = self.indices + + # Merge batches into one + filename = outfile + ".def" + self.write_definitions( + filename, batches, outfile, op, ci_in=ci_in, ci_out=ci_out + ) + outfile_suf = outfile + ".kmc_suf" + mm.add(batches, outfile_suf, "kmc_tools complex {}".format(filename)) + + # Write makefile + mm.write() + + def write_definitions( + self, + filename: str, + indices: List[str], + outfile: str, + op: str, + ci_in: int, + ci_out: int, + ): + fw = must_open(filename, "w") + print("INPUT:", file=fw) + ss = [] + pad = len(str(len(indices))) + for i, e in enumerate(indices): + s = "s{0:0{1}d}".format(i + 1, pad) + ss.append(s) + msg = "{} = {}".format(s, e.rsplit(".", 1)[0]) + if ci_in: + msg += f" -ci{ci_in}" + print(msg, file=fw) + print("OUTPUT:", file=fw) + print("{} = {}".format(outfile, op.join(ss)), file=fw) + if ci_out: + print("OUTPUT_PARAMS:", file=fw) + print(f"-ci{ci_out}", file=fw) + fw.close() + + +def main(): + + actions = ( + # K-mer counting + ("jellyfish", "count kmers using `jellyfish`"), + ("meryl", "count kmers using `meryl`"), + ("kmc", "count kmers using `kmc`"), + ("kmcop", "intersect or union kmc indices"), + ("entropy", "calculate entropy for kmers from kmc dump"), + ("bed", "map kmers on FASTA"), + # K-mer histogram + ("histogram", "plot the histogram based on meryl K-mer distribution"), + ("multihistogram", "plot histogram across a set of K-mer sizes"), + # These forms a pipeline to count K-mers for given FASTA seq + ("dump", "convert FASTA sequences to list of K-mers"), + ("bin", "serialize counts to bitarrays"), + ("bincount", "count K-mers in the bin"), + ("count", "run dump - jellyfish - bin - bincount in serial"), + ("logodds", "compute log likelihood between two db"), + ("model", "model kmer distribution given error rate"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def entropy_score(kmer): + """ + Schmieder and Edwards. Quality control and preprocessing of metagenomic datasets. (2011) Bioinformatics + https://academic.oup.com/bioinformatics/article/27/6/863/236283/Quality-control-and-preprocessing-of-metagenomic + """ + l = len(kmer) - 2 + k = l if l < 64 else 64 + counts = defaultdict(int) + for i in range(l): + trinuc = kmer[i : i + 3] + counts[trinuc] += 1 + + logk = math.log(k) + res = 0 + for k, v in counts.items(): + f = v * 1.0 / l + res += f * math.log(f) / logk + return res * -100 + + +def entropy(args): + """ + %prog entropy kmc_dump.out + + kmc_dump.out contains two columns: + AAAAAAAAAAAGAAGAAAGAAA 34 + """ + p = OptionParser(entropy.__doc__) + p.add_argument( + "--threshold", default=0, type=int, help="Complexity needs to be above" + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (kmc_out,) = args + fp = open(kmc_out) + for row in fp: + kmer, count = row.split() + score = entropy_score(kmer) + if score >= opts.threshold: + print(" ".join((kmer, count, "{:.2f}".format(score)))) + + +def bed(args): + """ + %prog bed fastafile kmer.dump.txt + + Map kmers on FASTA. + """ + from jcvi.formats.fasta import rc, parse_fasta + + p = OptionParser(bed.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, dumpfile = args + fp = open(dumpfile) + KMERS = set() + for row in fp: + kmer = row.split()[0] + kmer_rc = rc(kmer) + KMERS.add(kmer) + KMERS.add(kmer_rc) + + K = len(kmer) + logger.debug("Imported %d %d-mers", len(KMERS), K) + + for name, seq in parse_fasta(fastafile): + name = name.split()[0] + for i in range(len(seq) - K): + if i % 5000000 == 0: + print("{}:{}".format(name, i), file=sys.stderr) + kmer = seq[i : i + K] + if kmer in KMERS: + print("\t".join(str(x) for x in (name, i, i + K, kmer))) + + +def kmcop(args): + """ + %prog kmcop *.kmc_suf + + Intersect or union kmc indices. + """ + p = OptionParser(kmcop.__doc__) + p.add_argument( + "--action", + choices=("union", "intersect", "reduce"), + default="union", + help="Action", + ) + p.add_argument( + "--ci_in", + default=0, + type=int, + help="Exclude input kmers with less than ci_in counts", + ) + p.add_argument( + "--cs", + default=0, + type=int, + help="Maximal value of a counter, only used when action is reduce", + ) + p.add_argument( + "--ci_out", + default=0, + type=int, + help="Exclude output kmers with less than ci_out counts", + ) + p.add_argument( + "--batch", + default=1, + type=int, + help="Number of batch, useful to reduce memory usage", + ) + p.add_argument("--exclude", help="Exclude accessions from this list") + p.add_argument("-o", default="results", help="Output name") + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + indices = args + if opts.exclude: + before = set(indices) + exclude_ids = set(x.strip() for x in open(opts.exclude)) + indices = [x for x in indices if x.rsplit(".", 2)[0] not in exclude_ids] + after = set(indices) + if before > after: + logger.debug( + "Excluded accessions %d → %d (%s)", + len(before), + len(after), + ",".join(before - after), + ) + if opts.action == "reduce": + mm = MakeManager() + ci = opts.ci_in + cs = opts.cs + suf = "" + if ci: + suf += f"_ci{ci}" + if cs: + suf += f"_cs{cs}" + for index in indices: + idx = index.rsplit(".", 1)[0] + reduced_idx = idx + suf + cmd = f"kmc_tools transform {idx} reduce {reduced_idx}" + if ci: + cmd += f" -ci{ci}" + if cs: + cmd += f" -cs{cs}" + reduced_index = reduced_idx + ".kmc_suf" + mm.add(index, reduced_index, cmd) + mm.write() + else: + ku = KMCComplex(indices) + ku.write( + opts.o, + action=opts.action, + ci_in=opts.ci_in, + ci_out=opts.ci_out, + batch=opts.batch, + ) + + +def kmc(args): + """ + %prog kmc folder + + Run kmc3 on Illumina reads. + """ + p = OptionParser(kmc.__doc__) + p.add_argument("-k", default=27, type=int, help="Kmer size") + p.add_argument( + "--ci", default=2, type=int, help="Exclude kmers with less than ci counts" + ) + p.add_argument("--cs", default=0, type=int, help="Maximal value of a counter") + p.add_argument("--cx", type=int, help="Exclude kmers with more than cx counts") + p.add_argument( + "--single", + default=False, + action="store_true", + help="Input is single-end data, only one FASTQ/FASTA", + ) + p.add_argument( + "--fasta", + default=False, + action="store_true", + help="Input is FASTA instead of FASTQ", + ) + p.add_argument( + "--mem", default=48, type=int, help="Max amount of RAM in GB (`kmc -m`)" + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (folder,) = args + K = opts.k + n = 1 if opts.single else 2 + pattern = ( + "*.fa,*.fa.gz,*.fasta,*.fasta.gz" + if opts.fasta + else "*.fq,*.fq.gz,*.fastq,*.fastq.gz" + ) + + mm = MakeManager() + for p, pf in iter_project(folder, pattern=pattern, n=n, commonprefix=False): + pf = pf.split("_")[0] + ".ms{}".format(K) + infiles = pf + ".infiles" + fw = open(infiles, "w") + print("\n".join(p), file=fw) + fw.close() + + cmd = "kmc -k{} -m{} -t{}".format(K, opts.mem, opts.cpus) + cmd += " -ci{}".format(opts.ci) + if opts.cs: + cmd += " -cs{}".format(opts.cs) + if opts.cx: + cmd += " -cx{}".format(opts.cx) + if opts.fasta: + cmd += " -fm" + cmd += " @{} {} .".format(infiles, pf) + outfile = pf + ".kmc_suf" + mm.add(p, outfile, cmd) + + mm.write() + + +def meryl(args): + """ + %prog meryl folder + + Run meryl on Illumina reads. + """ + p = OptionParser(meryl.__doc__) + p.add_argument("-k", default=19, type=int, help="Kmer size") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (folder,) = args + K = opts.k + cpus = opts.cpus + mm = MakeManager() + for p, pf in iter_project(folder): + cmds = [] + mss = [] + for i, ip in enumerate(p): + ms = "{}{}.ms{}".format(pf, i + 1, K) + mss.append(ms) + cmd = "meryl -B -C -m {} -threads {}".format(K, cpus) + cmd += " -s {} -o {}".format(ip, ms) + cmds.append(cmd) + ams, bms = mss + pms = "{}.ms{}".format(pf, K) + cmd = "meryl -M add -s {} -s {} -o {}".format(ams, bms, pms) + cmds.append(cmd) + cmd = "rm -f {}.mcdat {}.mcidx {}.mcdat {}.mcidx".format(ams, ams, bms, bms) + cmds.append(cmd) + mm.add(p, pms + ".mcdat", cmds) + + mm.write() + + +def model(args): + """ + %prog model erate + + Model kmer distribution given error rate. See derivation in FIONA paper: + + """ + from scipy.stats import binom, poisson + + p = OptionParser(model.__doc__) + p.add_argument("-k", default=23, type=int, help="Kmer size") + p.add_argument("--cov", default=50, type=int, help="Expected coverage") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (erate,) = args + erate = float(erate) + cov = opts.cov + k = opts.k + + xy = [] + # Range include c although it is unclear what it means to have c=0 + for c in range(0, cov * 2 + 1): + Prob_Yk = 0 + for i in range(k + 1): + # Probability of having exactly i errors + pi_i = binom.pmf(i, k, erate) + # Expected coverage of kmer with exactly i errors + mu_i = cov * (erate / 3) ** i * (1 - erate) ** (k - i) + # Probability of seeing coverage of c + Prob_Yk_i = poisson.pmf(c, mu_i) + # Sum i over 0, 1, ... up to k errors + Prob_Yk += pi_i * Prob_Yk_i + xy.append((c, Prob_Yk)) + + x, y = zip(*xy) + asciiplot(x, y, title="Model") + + +def logodds(args): + """ + %prog logodds cnt1 cnt2 + + Compute log likelihood between two db. + """ + from math import log + from jcvi.formats.base import DictFile + + p = OptionParser(logodds.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + cnt1, cnt2 = args + d = DictFile(cnt2) + fp = open(cnt1) + for row in fp: + scf, c1 = row.split() + c2 = d[scf] + c1, c2 = float(c1), float(c2) + c1 += 1 + c2 += 1 + score = int(100 * (log(c1) - log(c2))) + print("{0}\t{1}".format(scf, score)) + + +def get_K(jfdb): + """ + Infer K from jellyfish db. + """ + j = jfdb.rsplit("_", 1)[0].rsplit("-", 1)[-1] + assert j[0] == "K" + return int(j[1:]) + + +def count(args): + """ + %prog count fastafile jf.db + + Run dump - jellyfish - bin - bincount in serial. + """ + from bitarray import bitarray + + p = OptionParser(count.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, jfdb = args + K = get_K(jfdb) + cmd = "jellyfish query {0} -C | cut -d' ' -f 2".format(jfdb) + t = must_open("tmp", "w") + proc = Popen(cmd, stdin=PIPE, stdout=t) + t.flush() + + f = Fasta(fastafile, lazy=True) + for name, rec in f.iteritems_ordered(): + kmers = list(make_kmers(rec.seq, K)) + print("\n".join(kmers), file=proc.stdin) + proc.stdin.close() + logger.debug(cmd) + proc.wait() + + a = bitarray() + binfile = ".".join((fastafile, jfdb, "bin")) + fw = open(binfile, "w") + t.seek(0) + for row in t: + c = row.strip() + a.append(int(c)) + a.tofile(fw) + logger.debug("Serialize %d bits to `%s`.", len(a), binfile) + fw.close() + sh("rm {0}".format(t.name)) + + logger.debug( + "Shared K-mers (K=%d) between `%s` and `%s` written to `%s`.", + K, + fastafile, + jfdb, + binfile, + ) + cntfile = ".".join((fastafile, jfdb, "cnt")) + bincount([fastafile, binfile, "-o", cntfile, "-K {0}".format(K)]) + logger.debug("Shared K-mer counts written to `%s`.", cntfile) + + +def bincount(args): + """ + %prog bincount fastafile binfile + + Count K-mers in the bin. + """ + from bitarray import bitarray + from jcvi.formats.sizes import Sizes + + p = OptionParser(bincount.__doc__) + p.add_argument("-K", default=23, type=int, help="K-mer size") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, binfile = args + K = opts.K + + fp = open(binfile) + a = bitarray() + a.fromfile(fp) + f = Sizes(fastafile) + tsize = 0 + fw = must_open(opts.outfile, "w") + for name, seqlen in f.iter_sizes(): + ksize = seqlen - K + 1 + b = a[tsize : tsize + ksize] + bcount = b.count() + print("\t".join(str(x) for x in (name, bcount)), file=fw) + tsize += ksize + + +def bin(args): + """ + %prog bin filename filename.bin + + Serialize counts to bitarrays. + """ + from bitarray import bitarray + + p = OptionParser(bin.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + inp, outp = args + fp = must_open(inp) + fw = must_open(outp, "w") + a = bitarray() + for row in fp: + c = row.split()[-1] + a.append(int(c)) + a.tofile(fw) + fw.close() + + +def make_kmers(seq, K): + seq = str(seq).upper().replace("N", "A") + seqlen = len(seq) + for i in range(seqlen - K + 1): + yield seq[i : i + K] + + +def dump(args): + """ + %prog dump fastafile + + Convert FASTA sequences to list of K-mers. + """ + p = OptionParser(dump.__doc__) + p.add_argument("-K", default=23, type=int, help="K-mer size") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + K = opts.K + fw = must_open(opts.outfile, "w") + f = Fasta(fastafile, lazy=True) + for name, rec in f.iteritems_ordered(): + kmers = list(make_kmers(rec.seq, K)) + print("\n".join(kmers), file=fw) + fw.close() + + +def jellyfish(args): + """ + %prog jellyfish [*.fastq|*.fasta] + + Run jellyfish to dump histogram to be used in kmer.histogram(). + """ + from jcvi.apps.base import getfilesize + from jcvi.utils.cbook import human_size + + p = OptionParser(jellyfish.__doc__) + p.add_argument("-K", default=23, type=int, help="K-mer size") + p.add_argument( + "--coverage", + default=40, + type=int, + help="Expected sequence coverage", + ) + p.add_argument("--prefix", default="jf", help="Database prefix") + p.add_argument( + "--nohist", + default=False, + action="store_true", + help="Do not print histogram", + ) + p.set_home("jellyfish") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + fastqfiles = args + K = opts.K + coverage = opts.coverage + + totalfilesize = sum(getfilesize(x) for x in fastqfiles) + fq = fastqfiles[0] + pf = opts.prefix + gzip = fq.endswith(".gz") + + hashsize = totalfilesize / coverage + logger.debug( + "Total file size: %s, hashsize (-s): %d", + human_size(totalfilesize, a_kilobyte_is_1024_bytes=True), + hashsize, + ) + + jfpf = "{0}-K{1}".format(pf, K) + jfdb = jfpf + fastqfiles = " ".join(fastqfiles) + + jfcmd = op.join(opts.jellyfish_home, "jellyfish") + cmd = jfcmd + cmd += " count -t {0} -C -o {1}".format(opts.cpus, jfpf) + cmd += " -s {0} -m {1}".format(hashsize, K) + if gzip: + cmd = "gzip -dc {0} | ".format(fastqfiles) + cmd + " /dev/fd/0" + else: + cmd += " " + fastqfiles + + if need_update(fastqfiles, jfdb): + sh(cmd) + + if opts.nohist: + return + + jfhisto = jfpf + ".histogram" + cmd = jfcmd + " histo -t 64 {0} -o {1}".format(jfdb, jfhisto) + + if need_update(jfdb, jfhisto): + sh(cmd) + + +def multihistogram(args): + """ + %prog multihistogram *.histogram species + + Plot the histogram based on a set of K-mer hisotograms. The method is based + on Star et al.'s method (Atlantic Cod genome paper). + """ + p = OptionParser(multihistogram.__doc__) + p.add_argument("--kmin", default=15, type=int, help="Minimum K-mer size, inclusive") + p.add_argument("--kmax", default=30, type=int, help="Maximum K-mer size, inclusive") + p.add_argument("--vmin", default=2, type=int, help="Minimum value, inclusive") + p.add_argument("--vmax", default=100, type=int, help="Maximum value, inclusive") + opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300) + + if len(args) < 1: + sys.exit(not p.print_help()) + + histfiles = args[:-1] + species = args[-1] + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + A = fig.add_axes((0.08, 0.12, 0.38, 0.76)) + B = fig.add_axes((0.58, 0.12, 0.38, 0.76)) + + lines = [] + legends = [] + genomesizes = [] + for histfile in histfiles: + ks = KmerSpectrum(histfile) + x, y = ks.get_xy(opts.vmin, opts.vmax) + K = get_number(op.basename(histfile).split(".")[0].split("-")[-1]) + if not opts.kmin <= K <= opts.kmax: + continue + + (line,) = A.plot(x, y, "-", lw=1) + lines.append(line) + legends.append("K = {0}".format(K)) + ks.analyze(K=K, method="allpaths") + genomesizes.append((K, ks.genomesize / 1e6)) + + leg = A.legend(lines, legends, shadow=True, fancybox=True) + leg.get_frame().set_alpha(0.5) + + title = "{0} genome K-mer histogram".format(species) + A.set_title(markup(title)) + xlabel, ylabel = "Coverage (X)", "Counts" + A.set_xlabel(xlabel) + A.set_ylabel(ylabel) + set_human_axis(A) + + title = "{0} genome size estimate".format(species) + B.set_title(markup(title)) + x, y = zip(*genomesizes) + B.plot(x, y, "ko", mfc="w") + t = np.linspace(opts.kmin - 0.5, opts.kmax + 0.5, 100) + p = np.poly1d(np.polyfit(x, y, 2)) + B.plot(t, p(t), "r:") + + xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)" + B.set_xlabel(xlabel) + B.set_ylabel(ylabel) + set_ticklabels_helvetica(B) + + labels = ((0.04, 0.96, "A"), (0.54, 0.96, "B")) + panel_labels(root, labels) + + normalize_axes(root) + imagename = species + ".multiK.pdf" + savefig(imagename, dpi=iopts.dpi, iopts=iopts) + + +def plot_nbinom_fit(ax, ks: KmerSpectrum, ymax: float, method_info: dict): + """ + Plot the negative binomial fit. + """ + generative_model = method_info["generative_model"] + GG = method_info["Gbins"] + ll = method_info["lambda"] + rr = method_info["rho"] + kf_range = method_info["kf_range"] + stacked = generative_model(GG, ll, rr) + ax.plot( + kf_range, + stacked, + ":", + color="#6a3d9a", + lw=2, + ) + # Plot multiple CN locations, CN1, CN2, ... up to ploidy + cn_color = "#a6cee3" + for i in range(1, ks.ploidy + 1): + x = i * ks.lambda_ + ax.plot((x, x), (0, ymax), "-.", color=cn_color) + ax.text( + x, + ymax * 0.95, + f"CN{i}", + ha="right", + va="center", + color=cn_color, + rotation=90, + ) + + +def draw_ks_histogram( + ax, + histfile: str, + method: str, + coverage: int, + vmin: int, + vmax: int, + species: str, + K: int, + maxiter: int, + peaks: bool, +) -> int: + """ + Draw the K-mer histogram. + """ + ks = KmerSpectrum(histfile) + method_info = ks.analyze(K=K, maxiter=maxiter, method=method) + + Total_Kmers = int(ks.totalKmers) + Kmer_coverage = ks.lambda_ if not coverage else coverage + Genome_size = int(round(Total_Kmers * 1.0 / Kmer_coverage)) + + Total_Kmers_msg = f"Total {K}-mers: {thousands(Total_Kmers)}" + Kmer_coverage_msg = f"{K}-mer coverage: {Kmer_coverage:.1f}x" + Genome_size_msg = f"Estimated genome size: {Genome_size / 1e6:.1f} Mb" + Repetitive_msg = ks.repetitive + SNPrate_msg = ks.snprate + + messages = [ + Total_Kmers_msg, + Kmer_coverage_msg, + Genome_size_msg, + Repetitive_msg, + SNPrate_msg, + ] + for msg in messages: + print(msg, file=sys.stderr) + + x, y = ks.get_xy(vmin, vmax) + title = f"{species} {K}-mer histogram" + + ax.bar(x, y, fc="#b2df8a", lw=0) + + if peaks: # Only works for method 'allpaths' + t = (ks.min1, ks.max1, ks.min2, ks.max2, ks.min3) + tcounts = [(x, y) for x, y in ks.counts if x in t] + if tcounts: + x, y = zip(*tcounts) + tcounts = dict(tcounts) + ax.plot(x, y, "ko", lw=3, mec="k", mfc="w") + ax.text(ks.max1, tcounts[ks.max1], "SNP peak") + ax.text(ks.max2, tcounts[ks.max2], "Main peak") + + _, ymax = ax.get_ylim() + ymax *= 7 / 6 + # Plot the negative binomial fit + if method == "nbinom": + plot_nbinom_fit(ax, ks, ymax, method_info) + messages += [ks.ploidy_message] + ks.copy_messages + + write_messages(ax, messages) + + ax.set_title(markup(title)) + ax.set_xlim((0, vmax)) + ax.set_ylim((0, ymax)) + adjust_spines(ax, ["left", "bottom"], outward=True) + xlabel, ylabel = "Coverage (X)", "Counts" + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + set_human_axis(ax) + + return Genome_size + + +def histogram(args): + """ + %prog histogram meryl.histogram species K + + Plot the histogram based on Jellyfish or meryl K-mer distribution, species and N are + only used to annotate the graphic. + """ + p = OptionParser(histogram.__doc__) + p.add_argument( + "--vmin", + dest="vmin", + default=2, + type=int, + help="minimum value, inclusive", + ) + p.add_argument( + "--vmax", + dest="vmax", + default=200, + type=int, + help="maximum value, inclusive", + ) + p.add_argument( + "--method", + choices=("nbinom", "allpaths"), + default="nbinom", + help="'nbinom' - slow but more accurate for het or polyploid genome; " + + "'allpaths' - fast and works for homozygous enomes", + ) + p.add_argument( + "--maxiter", + default=100, + type=int, + help="Max iterations for optimization. Only used with --method nbinom", + ) + p.add_argument( + "--coverage", default=0, type=int, help="Kmer coverage [default: auto]" + ) + p.add_argument( + "--nopeaks", + default=False, + action="store_true", + help="Do not annotate K-mer peaks", + ) + opts, args, iopts = p.set_image_options(args, figsize="7x7") + + if len(args) != 3: + sys.exit(not p.print_help()) + + histfile, species, N = args + method = opts.method + vmin, vmax = opts.vmin, opts.vmax + peaks = not opts.nopeaks and method == "allpaths" + N = int(N) + + fig = plt.figure(1, (iopts.w, iopts.h)) + ax = fig.add_axes((0.1, 0.1, 0.8, 0.8)) + + Genome_size = draw_ks_histogram( + ax, histfile, method, opts.coverage, vmin, vmax, species, N, opts.maxiter, peaks + ) + + imagename = histfile.split(".")[0] + "." + iopts.format + savefig(imagename, dpi=100) + + return Genome_size + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/opticalmap.py b/jcvi/assembly/opticalmap.py new file mode 100644 index 00000000..88dabb55 --- /dev/null +++ b/jcvi/assembly/opticalmap.py @@ -0,0 +1,427 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Optical map alignment parser. +""" +import sys + +from collections import defaultdict +from xml.etree.ElementTree import ElementTree + +import numpy as np +from more_itertools import pairwise + +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..formats.base import must_open +from ..formats.bed import Bed +from ..utils.range import range_chain, range_parse, Range + + +class OpticalMap(object): + def __init__(self, xmlfile): + tree = ElementTree() + self.root = tree.parse(xmlfile) + self.maps = dict(self.iter_maps()) + self.alignments = [] + + for ref, aligned, e in self.iter_alignments(): + aligned_map = self.maps[aligned] + nfrags = aligned_map.num_frags + if e.orientation == "-": + e.alignment = [(nfrags - i - 1, l, r) for (i, l, r) in e.alignment] + self.alignments.append(e) + + def iter_maps(self): + for e in self.root.findall("restriction_map"): + e = RestrictionMap(e) + yield e.name, e + + def iter_alignments(self): + for e in self.root.findall("map_alignment"): + e = MapAlignment(e) + yield e.reference_map_name, e.aligned_map_name, e + + def write_bed( + self, bedfile="stdout", point=False, scale=None, blockonly=False, switch=False + ): + fw = must_open(bedfile, "w") + # when switching ref_map and aligned_map elements, disable `blockOnly` + if switch: + blockonly = False + for a in self.alignments: + reference_map_name = a.reference_map_name + aligned_map_name = a.aligned_map_name + + ref_map = self.maps[reference_map_name] + aligned_map = self.maps[aligned_map_name] + + ref_blocks = ref_map.cumsizes + aligned_blocks = aligned_map.cumsizes + + score = a.soma_score + score = "{0:.1f}".format(score) + orientation = a.orientation + + endpoints = [] + ref_endpoints = [] + for i, l, r in a.alignment: + start = 0 if i == 0 else (aligned_blocks[i - 1] - 1) + end = aligned_blocks[i] - 1 + endpoints.extend([start, end]) + + ref_start = ref_blocks[l - 1] - 1 + ref_end = ref_blocks[r] - 1 + ref_endpoints.extend([ref_start, ref_end]) + + if switch: + if scale: + ref_start /= scale + ref_end /= scale + accn = "{0}:{1}-{2}".format(reference_map_name, ref_start, ref_end) + else: + if scale: + start /= scale + end /= scale + accn = "{0}:{1}-{2}".format(aligned_map_name, start, end) + + if point: + accn = accn.rsplit("-")[0] + + if not blockonly: + bed_elems = ( + [ + reference_map_name, + ref_start, + ref_end, + accn, + score, + orientation, + ] + if not switch + else [aligned_map_name, start, end, accn, score, orientation] + ) + print("\t".join(str(x) for x in bed_elems), file=fw) + + if blockonly: + start, end = min(endpoints), max(endpoints) + accn = "{0}:{1}-{2}".format(aligned_map_name, start, end) + + start, end = min(ref_endpoints), max(ref_endpoints) + print( + "\t".join( + str(x) + for x in ( + reference_map_name, + start, + end, + accn, + score, + orientation, + ) + ), + file=fw, + ) + + +class RestrictionMap(object): + def __init__(self, node): + num_frags = node.find("num_frags").text + map_blocks = node.find("map_block").text + + num_frags = int(num_frags) + + self.name = node.find("name").text + self.num_frags = num_frags + self.map_blocks = [int(round(float(x) * 1000)) for x in map_blocks.split()] + + assert len(self.map_blocks) == self.num_frags + + @property + def cumsizes(self): + return np.cumsum(self.map_blocks) + + +class MapAlignment(object): + def __init__(self, node): + reference_map = node.find("reference_map") + reference_map_name = reference_map.find("name").text + + aligned_map = node.find("aligned_map") + aligned_map_name = aligned_map.find("name").text + aligned_map_orientation = aligned_map.find("orientation").text + + assert aligned_map_orientation in ("N", "R") + self.orientation = "-" if aligned_map_orientation == "R" else "+" + + soma_score = node.find("soma_score").text + count = node.find("count").text + + soma_score = float(soma_score) + count = int(count) + + self.reference_map_name = reference_map_name + self.aligned_map_name = aligned_map_name + self.aligned_map_orientation = aligned_map_orientation + + self.soma_score = soma_score + self.alignment = [] + + for f in node.findall("f"): + i = f.find("i").text + l = f.find("l").text + r = f.find("r").text + i, l, r = [int(x) for x in (i, l, r)] + self.alignment.append((i, l, r)) + + +def main(): + + actions = ( + ("bed", "convert xml format into bed format"), + ("condense", "condense split alignments in om bed"), + ("fasta", "use the OM bed to scaffold and create pseudomolecules"), + ("chimera", "scan the bed file to break scaffolds that multi-maps"), + ("silicosoma", "convert .silico to .soma"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def silicosoma(args): + """ + %prog silicosoma in.silico > out.soma + + Convert .silico to .soma file. + + Format of .silico + A text file containing in-silico digested contigs. This file contains pairs + of lines. The first line in each pair constains an identifier, this contig + length in bp, and the number of restriction sites, separated by white space. + The second line contains a white space delimited list of the restriction + site positions. + + Format of .soma + Each line of the text file contains two decimal numbers: The size of the + fragment and the standard deviation (both in kb), separated by white space. + The standard deviation is ignored. + """ + p = OptionParser(silicosoma.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (silicofile,) = args + fp = must_open(silicofile) + fw = must_open(opts.outfile, "w") + next(fp) + positions = [int(x) for x in next(fp).split()] + for a, b in pairwise(positions): + assert a <= b + fragsize = int(round((b - a) / 1000.0)) # kb + if fragsize: + print(fragsize, 0, file=fw) + + +def condense(args): + """ + %prog condense OM.bed + + Merge split alignments in OM bed. + """ + from itertools import groupby + from jcvi.assembly.patch import merge_ranges + + p = OptionParser(condense.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + bed = Bed(bedfile, sorted=False) + key = lambda x: (x.seqid, x.start, x.end) + for k, sb in groupby(bed, key=key): + sb = list(sb) + b = sb[0] + chr, start, end, strand = merge_ranges(sb) + + id = "{0}:{1}-{2}".format(chr, start, end) + b.accn = id + print(b) + + +def chimera(args): + """ + %prog chimera bedfile + + Scan the bed file to break scaffolds that multi-maps. + """ + p = OptionParser(chimera.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + bed = Bed(bedfile) + selected = select_bed(bed) + mapped = defaultdict(set) # scaffold => chr + chimerabed = "chimera.bed" + fw = open(chimerabed, "w") + for b in selected: + scf = range_parse(b.accn).seqid + chr = b.seqid + mapped[scf].add(chr) + + nchimera = 0 + for s, chrs in sorted(mapped.items()): + if len(chrs) == 1: + continue + + print("=" * 80, file=sys.stderr) + print( + "{0} mapped to multiple locations: {1}".format(s, ",".join(sorted(chrs))), + file=sys.stderr, + ) + ranges = [] + for b in selected: + rr = range_parse(b.accn) + scf = rr.seqid + if scf == s: + print(b, file=sys.stderr) + ranges.append(rr) + + # Identify breakpoints + ranges.sort(key=lambda x: (x.seqid, x.start, x.end)) + for a, b in pairwise(ranges): + seqid = a.seqid + if seqid != b.seqid: + continue + + start, end = a.end, b.start + if start > end: + start, end = end, start + + chimeraline = "\t".join(str(x) for x in (seqid, start, end)) + print(chimeraline, file=fw) + print(chimeraline, file=sys.stderr) + nchimera += 1 + + fw.close() + logger.debug("A total of %d junctions written to `%s`.", nchimera, chimerabed) + + +def select_bed(bed): + """ + Return non-overlapping set of ranges, choosing high scoring blocks over low + scoring alignments when there are conflicts. + """ + ranges = [ + Range(x.seqid, x.start, x.end, float(x.score), i) for i, x in enumerate(bed) + ] + selected, score = range_chain(ranges) + selected = [bed[x.id] for x in selected] + + return selected + + +def fasta(args): + """ + %prog fasta bedfile scf.fasta pseudomolecules.fasta + + Use OM bed to scaffold and create pseudomolecules. bedfile can be generated + by running jcvi.assembly.opticalmap bed --blockonly + """ + from jcvi.formats.sizes import Sizes + from jcvi.formats.agp import OO, build + + p = OptionParser(fasta.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + bedfile, scffasta, pmolfasta = args + pf = bedfile.rsplit(".", 1)[0] + bed = Bed(bedfile) + selected = select_bed(bed) + oo = OO() + seen = set() + sizes = Sizes(scffasta).mapping + agpfile = pf + ".agp" + agp = open(agpfile, "w") + for b in selected: + scf = range_parse(b.accn).seqid + chr = b.seqid + cs = (chr, scf) + if cs not in seen: + oo.add(chr, scf, sizes[scf], b.strand) + seen.add(cs) + else: + logger.debug("Seen %s, ignored.", cs) + + oo.write_AGP(agp, gaptype="contig") + agp.close() + build([agpfile, scffasta, pmolfasta]) + + +def bed(args): + """ + %prog bed xmlfile + + Print summary of optical map alignment in BED format. + """ + from jcvi.formats.bed import sort + + p = OptionParser(bed.__doc__) + p.add_argument( + "--blockonly", + default=False, + action="store_true", + help="Only print out large blocks, not fragments", + ) + p.add_argument( + "--point", + default=False, + action="store_true", + help="Print accesssion as single point instead of interval", + ) + p.add_argument("--scale", type=float, help="Scale the OM distance by factor") + p.add_argument( + "--switch", + default=False, + action="store_true", + help="Switch reference and aligned map elements", + ) + p.add_argument( + "--nosort", + default=False, + action="store_true", + help="Do not sort bed", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (xmlfile,) = args + bedfile = xmlfile.rsplit(".", 1)[0] + ".bed" + + om = OpticalMap(xmlfile) + om.write_bed( + bedfile, + point=opts.point, + scale=opts.scale, + blockonly=opts.blockonly, + switch=opts.switch, + ) + + if not opts.nosort: + sort([bedfile, "--inplace"]) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/patch.py b/jcvi/assembly/patch.py new file mode 100644 index 00000000..e3102f52 --- /dev/null +++ b/jcvi/assembly/patch.py @@ -0,0 +1,968 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Patch the sequences of one assembly using sequences from another assembly. This +is tested on merging the medicago WGS assembly with the clone-by-clone assembly. + +There are a few techniques, used in curating medicago assembly. + +1. Split chimeric scaffolds based on genetic map and then refine breakpoints +2. Create patchers by mix-and-max guided by optical map +3. Find gaps and fill N's using alternative assembly +4. Add telomeric sequences +5. Find gaps in optical map +6. Insert unplaced scaffolds using mates +""" +import os.path as op +import sys +import math + +from collections import defaultdict +from itertools import groupby +from more_itertools import pairwise, roundrobin + +from ..apps.base import ActionDispatcher, OptionParser, cleanup, logger, sh +from ..formats.base import FileMerger +from ..formats.bed import ( + Bed, + BedLine, + complementBed, + fastaFromBed, + mergeBed, + summary, +) +from ..formats.blast import BlastSlow +from ..formats.sizes import Sizes +from ..utils.range import ( + range_closest, + range_distance, + range_interleave, + range_merge, + range_minmax, + range_parse, +) + + +def main(): + + actions = ( + # OM guided approach + ("refine", "find gaps within or near breakpoint regions"), + ("patcher", "given om alignment, prepare the patchers"), + # Gap filling through sequence matching + ("fill", "perform gap filling using one assembly vs the other"), + ("install", "install patches into backbone"), + # Placement through mates and manual insertions and deletions + ("bambus", "find candidate scaffolds to insert based on mates"), + ("insert", "insert scaffolds into assembly"), + ("eject", "eject scaffolds from assembly"), + ("closest", "find the nearest gaps flanking suggested regions"), + # Misc + ("tips", "append telomeric sequences based on patchers and complements"), + ("gaps", "create patches around OM gaps"), + # Touch-up + ("pasteprepare", "prepare sequences for paste"), + ("paste", "paste in good sequences in the final assembly"), + ("pastegenes", "paste in zero or low coverage genes"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def pastegenes(args): + """ + %prog pastegenes coverage.list old.genes.bed new.genes.bed old.assembly + + Paste in zero or low coverage genes. For a set of neighboring genes + missing, add the whole cassette as unplaced scaffolds. For singletons the + program will try to make a patch. + """ + from jcvi.formats.base import DictFile + from jcvi.utils.cbook import gene_name + + p = OptionParser(pastegenes.__doc__) + p.add_argument( + "--cutoff", + default=90, + type=int, + help="Coverage cutoff to call gene missing", + ) + p.add_argument( + "--flank", + default=2000, + type=int, + help="Get the seq of size on two ends", + ) + p.add_argument( + "--maxsize", + default=50000, + type=int, + help="Maximum size of patchers to be replaced", + ) + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + coveragefile, oldbed, newbed, oldassembly = args + cutoff = opts.cutoff + flank = opts.flank + maxsize = opts.maxsize + + coverage = DictFile(coveragefile, valuepos=2, cast=float) + + obed = Bed(oldbed) + order = obed.order + bed = [x for x in obed if x.accn in coverage] + key = lambda x: coverage[x.accn] >= cutoff + + extrabed = "extra.bed" + extendbed = "extend.bed" + pastebed = "paste.bed" + + fw = open(extrabed, "w") + fwe = open(extendbed, "w") + fwp = open(pastebed, "w") + fw_ids = open(extendbed + ".ids", "w") + + singletons, large, large_genes = 0, 0, 0 + for chr, chrbed in groupby(bed, key=lambda x: x.seqid): + chrbed = list(chrbed) + for good, beds in groupby(chrbed, key=key): + if good: + continue + + beds = list(beds) + blocksize = len(set([gene_name(x.accn) for x in beds])) + if blocksize == 1: + singletons += 1 + accn = beds[0].accn + gi, gb = order[accn] + leftb = obed[gi - 1] + rightb = obed[gi + 1] + leftr = leftb.range + rightr = rightb.range + cur = gb.range + distance_to_left, oo = range_distance(leftr, cur) + distance_to_right, oo = range_distance(cur, rightr) + span, oo = range_distance(leftr, rightr) + + label = "LEFT" if 0 < distance_to_left <= distance_to_right else "RIGHT" + + if 0 < span <= maxsize: + print( + "\t".join( + str(x) for x in (chr, leftb.start, rightb.end, gb.accn) + ), + file=fwp, + ) + + print(leftb, file=fwe) + print(gb, file=fwe) + print(rightb, file=fwe) + print( + "L:{0} R:{1} [{2}]".format( + distance_to_left, distance_to_right, label + ), + file=fwe, + ) + print(gb.accn, file=fw_ids) + continue + + large += 1 + large_genes += blocksize + + ranges = [(x.start, x.end) for x in beds] + rmin, rmax = range_minmax(ranges) + rmin -= flank + rmax += flank + + name = "-".join((beds[0].accn, beds[-1].accn)) + print("\t".join(str(x) for x in (chr, rmin - 1, rmax, name)), file=fw) + + fw.close() + fwe.close() + + extrabed = mergeBed(extrabed, d=flank, nms=True) + fastaFromBed(extrabed, oldassembly, name=True) + summary([extrabed]) + + logger.debug("Singleton blocks : {0}".format(singletons)) + logger.debug("Large blocks : {0} ({1} genes)".format(large, large_genes)) + + +def pasteprepare(args): + """ + %prog pasteprepare bacs.fasta + + Prepare sequences for paste. + """ + p = OptionParser(pasteprepare.__doc__) + p.add_argument( + "--flank", + default=5000, + type=int, + help="Get the seq of size on two ends", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (goodfasta,) = args + flank = opts.flank + pf = goodfasta.rsplit(".", 1)[0] + extbed = pf + ".ext.bed" + + sizes = Sizes(goodfasta) + fw = open(extbed, "w") + for bac, size in sizes.iter_sizes(): + print("\t".join(str(x) for x in (bac, 0, min(flank, size), bac + "L")), file=fw) + print( + "\t".join(str(x) for x in (bac, max(size - flank, 0), size, bac + "R")), + file=fw, + ) + fw.close() + + fastaFromBed(extbed, goodfasta, name=True) + + +def paste(args): + """ + %prog paste flanks.bed flanks_vs_assembly.blast backbone.fasta + + Paste in good sequences in the final assembly. + """ + from jcvi.formats.bed import uniq + + p = OptionParser(paste.__doc__) + p.add_argument( + "--maxsize", + default=300000, + type=int, + help="Maximum size of patchers to be replaced", + ) + p.add_argument("--prefix", help="Prefix of the new object") + p.set_rclip(rclip=1) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + pbed, blastfile, bbfasta = args + maxsize = opts.maxsize # Max DNA size to replace gap + order = Bed(pbed).order + + beforebed, afterbed = blast_to_twobeds( + blastfile, order, log=True, rclip=opts.rclip, maxsize=maxsize, flipbeds=True + ) + beforebed = uniq([beforebed]) + + afbed = Bed(beforebed) + bfbed = Bed(afterbed) + + shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix) + + +def eject(args): + """ + %prog eject candidates.bed chr.fasta + + Eject scaffolds from assembly, using the range identified by closest(). + """ + p = OptionParser(eject.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + candidates, chrfasta = args + sizesfile = Sizes(chrfasta).filename + cbedfile = complementBed(candidates, sizesfile) + + cbed = Bed(cbedfile) + for b in cbed: + b.accn = b.seqid + b.score = 1000 + b.strand = "+" + + cbed.print_to_file() + + +def closest(args): + """ + %prog closest candidates.bed gaps.bed fastafile + + Identify the nearest gaps flanking suggested regions. + """ + p = OptionParser(closest.__doc__) + p.add_argument( + "--om", + default=False, + action="store_true", + help="The bedfile is OM blocks", + ) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + candidates, gapsbed, fastafile = args + sizes = Sizes(fastafile).mapping + bed = Bed(candidates) + ranges = [] + for b in bed: + r = range_parse(b.accn) if opts.om else b + ranges.append([r.seqid, r.start, r.end]) + + gapsbed = Bed(gapsbed) + granges = [(x.seqid, x.start, x.end) for x in gapsbed] + + ranges = range_merge(ranges) + for r in ranges: + a = range_closest(granges, r) + b = range_closest(granges, r, left=False) + seqid = r[0] + + if a is not None and a[0] != seqid: + a = None + if b is not None and b[0] != seqid: + b = None + + mmin = 1 if a is None else a[1] + mmax = sizes[seqid] if b is None else b[2] + + print("\t".join(str(x) for x in (seqid, mmin - 1, mmax))) + + +def insert(args): + """ + %prog insert candidates.bed gaps.bed chrs.fasta unplaced.fasta + + Insert scaffolds into assembly. + """ + from jcvi.formats.agp import mask, bed + from jcvi.formats.sizes import agp + + p = OptionParser(insert.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + candidates, gapsbed, chrfasta, unplacedfasta = args + refinedbed = refine([candidates, gapsbed]) + sizes = Sizes(unplacedfasta).mapping + cbed = Bed(candidates) + corder = cbed.order + gbed = Bed(gapsbed) + gorder = gbed.order + + gpbed = Bed() + gappositions = {} # (chr, start, end) => gapid + + fp = open(refinedbed) + gap_to_scf = defaultdict(list) + seen = set() + for row in fp: + atoms = row.split() + if len(atoms) <= 6: + continue + unplaced = atoms[3] + strand = atoms[5] + gapid = atoms[9] + if gapid not in seen: + seen.add(gapid) + gi, gb = gorder[gapid] + gpbed.append(gb) + gappositions[(gb.seqid, gb.start, gb.end)] = gapid + gap_to_scf[gapid].append((unplaced, strand)) + + gpbedfile = "candidate.gaps.bed" + gpbed.print_to_file(gpbedfile, sorted=True) + + agpfile = agp([chrfasta]) + maskedagpfile = mask([agpfile, gpbedfile]) + maskedbedfile = maskedagpfile.rsplit(".", 1)[0] + ".bed" + bed([maskedagpfile, "--outfile={0}".format(maskedbedfile)]) + + mbed = Bed(maskedbedfile) + finalbed = Bed() + for b in mbed: + sid = b.seqid + key = (sid, b.start, b.end) + if key not in gappositions: + finalbed.add("{0}\n".format(b)) + continue + + gapid = gappositions[key] + scfs = gap_to_scf[gapid] + + # For scaffolds placed in the same gap, sort according to positions + scfs.sort(key=lambda x: corder[x[0]][1].start + corder[x[0]][1].end) + for scf, strand in scfs: + size = sizes[scf] + finalbed.add("\t".join(str(x) for x in (scf, 0, size, sid, 1000, strand))) + + finalbedfile = "final.bed" + finalbed.print_to_file(finalbedfile) + + # Clean-up + toclean = [gpbedfile, agpfile, maskedagpfile, maskedbedfile] + cleanup(toclean) + + +def gaps(args): + """ + %prog gaps OM.bed fastafile + + Create patches around OM gaps. + """ + from jcvi.formats.bed import uniq + + p = OptionParser(gaps.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + ombed, fastafile = args + ombed = uniq([ombed]) + bed = Bed(ombed) + + for a, b in pairwise(bed): + om_a = (a.seqid, a.start, a.end, "+") + om_b = (b.seqid, b.start, b.end, "+") + ch_a = range_parse(a.accn) + ch_b = range_parse(b.accn) + ch_a = (ch_a.seqid, ch_a.start, ch_a.end, "+") + ch_b = (ch_b.seqid, ch_b.start, ch_b.end, "+") + + om_dist, x = range_distance(om_a, om_b, distmode="ee") + ch_dist, x = range_distance(ch_a, ch_b, distmode="ee") + + if om_dist <= 0 and ch_dist <= 0: + continue + + print(a) + print(b) + print(om_dist, ch_dist) + + +def tips(args): + """ + %prog tips patchers.bed complements.bed original.fasta backbone.fasta + + Append telomeric sequences based on patchers and complements. + """ + p = OptionParser(tips.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + pbedfile, cbedfile, sizesfile, bbfasta = args + + pbed = Bed(pbedfile, sorted=False) + cbed = Bed(cbedfile, sorted=False) + + complements = dict() + for object, beds in groupby(cbed, key=lambda x: x.seqid): + beds = list(beds) + complements[object] = beds + + sizes = Sizes(sizesfile).mapping + bbsizes = Sizes(bbfasta).mapping + tbeds = [] + + for object, beds in groupby(pbed, key=lambda x: x.accn): + beds = list(beds) + startbed, endbed = beds[0], beds[-1] + start_id, end_id = startbed.seqid, endbed.seqid + if startbed.start == 1: + start_id = None + if endbed.end == sizes[end_id]: + end_id = None + print(object, start_id, end_id, file=sys.stderr) + if start_id: + b = complements[start_id][0] + b.accn = object + tbeds.append(b) + tbeds.append( + BedLine( + "\t".join( + str(x) for x in (object, 0, bbsizes[object], object, 1000, "+") + ) + ) + ) + if end_id: + b = complements[end_id][-1] + b.accn = object + tbeds.append(b) + + tbed = Bed() + tbed.extend(tbeds) + + tbedfile = "tips.bed" + tbed.print_to_file(tbedfile) + + +def fill(args): + """ + %prog fill gaps.bed bad.fasta + + Perform gap filling of one assembly (bad) using sequences from another. + """ + p = OptionParser(fill.__doc__) + p.add_argument( + "--extend", + default=2000, + type=int, + help="Extend seq flanking the gaps", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gapsbed, badfasta = args + Ext = opts.extend + + gapdist = 2 * Ext + 1 # This is to prevent to replacement ranges intersect + gapsbed = mergeBed(gapsbed, d=gapdist, nms=True) + + bed = Bed(gapsbed) + sizes = Sizes(badfasta).mapping + pf = gapsbed.rsplit(".", 1)[0] + extbed = pf + ".ext.bed" + fw = open(extbed, "w") + for b in bed: + gapname = b.accn + start, end = max(0, b.start - Ext - 1), b.start - 1 + print("\t".join(str(x) for x in (b.seqid, start, end, gapname + "L")), file=fw) + start, end = b.end, min(sizes[b.seqid], b.end + Ext) + print("\t".join(str(x) for x in (b.seqid, start, end, gapname + "R")), file=fw) + fw.close() + + fastaFromBed(extbed, badfasta, name=True) + + +def blast_to_twobeds( + blastfile, order, log=False, rclip=1, maxsize=300000, flipbeds=False +): + + abed, bbed = "before.bed", "after.bed" + beforebed, afterbed = abed, bbed + if flipbeds: + beforebed, afterbed = afterbed, beforebed + + fwa = open(beforebed, "w") + fwb = open(afterbed, "w") + if log: + logfile = "problems.log" + log = open(logfile, "w") + + key1 = lambda x: x.query + key2 = lambda x: x.query[:-rclip] if rclip else key1 + data = BlastSlow(blastfile) + OK = "OK" + + seen = set() + for pe, lines in groupby(data, key=key2): + label = OK + lines = list(lines) + if len(lines) != 2: + label = "Singleton" + + else: + a, b = lines + + aquery, bquery = a.query, b.query + asubject, bsubject = a.subject, b.subject + if asubject != bsubject: + label = "Different chr {0}|{1}".format(asubject, bsubject) + + else: + astrand, bstrand = a.orientation, b.orientation + assert aquery[-1] == "L" and bquery[-1] == "R", str((aquery, bquery)) + + ai, ax = order[aquery] + bi, bx = order[bquery] + qstart, qstop = ax.start + a.qstart - 1, bx.start + b.qstop - 1 + + if astrand == "+" and bstrand == "+": + sstart, sstop = a.sstart, b.sstop + + elif astrand == "-" and bstrand == "-": + sstart, sstop = b.sstart, a.sstop + + else: + label = "Strand {0}|{1}".format(astrand, bstrand) + + if sstart > sstop: + label = "Start beyond stop" + + if sstop > sstart + maxsize: + label = "Stop beyond start plus {0}".format(maxsize) + + aquery = lines[0].query + bac_name = aquery[:-1] + seen.add(bac_name) + name = bac_name + "LR" + + if label != OK: + if log: + print("\t".join((name, label)), file=log) + continue + + print( + "\t".join(str(x) for x in (ax.seqid, qstart - 1, qstop, name, 1000, "+")), + file=fwa, + ) + print( + "\t".join( + str(x) for x in (asubject, sstart - 1, sstop, name, 1000, astrand) + ), + file=fwb, + ) + + # Missing + if log: + label = "Missing" + for k in order.keys(): + k = k[:-1] + if k not in seen: + seen.add(k) + k += "LR" + print("\t".join((k, label)), file=log) + log.close() + + fwa.close() + fwb.close() + + return abed, bbed + + +def shuffle_twobeds(afbed, bfbed, bbfasta, prefix=None): + # Shuffle the two bedfiles together + sz = Sizes(bbfasta) + sizes = sz.mapping + shuffled = "shuffled.bed" + border = bfbed.order + + all = [] + afbed.sort(key=afbed.nullkey) + totalids = len(sizes) + pad = int(math.log10(totalids)) + 1 + cj = 0 + seen = set() + accn = lambda x: "{0}{1:0{2}d}".format(prefix, x, pad) + + for seqid, aa in afbed.sub_beds(): + cj += 1 + abeds, bbeds, beds = [], [], [] + size = sizes[seqid] + ranges = [(x.seqid, x.start, x.end) for x in aa] + cranges = range_interleave(ranges, sizes={seqid: size}, empty=True) + for crange in cranges: + if crange: + seqid, start, end = crange + bedline = "\t".join(str(x) for x in (seqid, start - 1, end)) + abeds.append(BedLine(bedline)) + else: + abeds.append(None) + + for a in aa: + gapid = a.accn + bi, b = border[gapid] + if a.strand == "-": + b.extra[1] = b.strand = "-" if b.strand == "+" else "+" + + bbeds.append(b) + + n_abeds = len(abeds) + n_bbeds = len(bbeds) + assert n_abeds - n_bbeds == 1, "abeds: {0}, bbeds: {1}".format(n_abeds, n_bbeds) + + beds = [x for x in roundrobin(abeds, bbeds) if x] + if prefix: + for b in beds: + b.accn = accn(cj) + + all.extend(beds) + seen.add(seqid) + + # Singletons + for seqid, size in sz.iter_sizes(): + if seqid in seen: + continue + + bedline = "\t".join(str(x) for x in (seqid, 0, size, accn(cj))) + b = BedLine(bedline) + + cj += 1 + if prefix: + b.accn = accn(cj) + + all.append(b) + + shuffledbed = Bed() + shuffledbed.extend(all) + shuffledbed.print_to_file(shuffled) + + return shuffledbed + + +def install(args): + """ + %prog install patchers.bed patchers.fasta backbone.fasta alt.fasta + + Install patches into backbone, using sequences from alternative assembly. + The patches sequences are generated via jcvi.assembly.patch.fill(). + + The output is a bedfile that can be converted to AGP using + jcvi.formats.agp.frombed(). + """ + from jcvi.apps.align import blast + from jcvi.formats.fasta import SeqIO + + p = OptionParser(install.__doc__) + p.set_rclip(rclip=1) + p.add_argument( + "--maxsize", + default=300000, + type=int, + help="Maximum size of patchers to be replaced", + ) + p.add_argument("--prefix", help="Prefix of the new object") + p.add_argument( + "--strict", + default=False, + action="store_true", + help="Only update if replacement has no gaps", + ) + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + pbed, pfasta, bbfasta, altfasta = args + maxsize = opts.maxsize # Max DNA size to replace gap + rclip = opts.rclip + + blastfile = blast([altfasta, pfasta, "--wordsize=100", "--pctid=99"]) + order = Bed(pbed).order + beforebed, afterbed = blast_to_twobeds( + blastfile, order, rclip=rclip, maxsize=maxsize + ) + + beforefasta = fastaFromBed(beforebed, bbfasta, name=True, stranded=True) + afterfasta = fastaFromBed(afterbed, altfasta, name=True, stranded=True) + + # Exclude the replacements that contain more Ns than before + ah = SeqIO.parse(beforefasta, "fasta") + bh = SeqIO.parse(afterfasta, "fasta") + count_Ns = lambda x: x.seq.count("n") + x.seq.count("N") + exclude = set() + for arec, brec in zip(ah, bh): + an = count_Ns(arec) + bn = count_Ns(brec) + if opts.strict: + if bn == 0: + continue + + elif bn < an: + continue + + id = arec.id + exclude.add(id) + + logger.debug( + "Ignore {0} updates because of decreasing quality.".format(len(exclude)) + ) + + abed = Bed(beforebed, sorted=False) + bbed = Bed(afterbed, sorted=False) + abed = [x for x in abed if x.accn not in exclude] + bbed = [x for x in bbed if x.accn not in exclude] + + abedfile = "before.filtered.bed" + bbedfile = "after.filtered.bed" + afbed = Bed() + afbed.extend(abed) + bfbed = Bed() + bfbed.extend(bbed) + + afbed.print_to_file(abedfile) + bfbed.print_to_file(bbedfile) + + shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix) + + +def refine(args): + """ + %prog refine breakpoints.bed gaps.bed + + Find gaps within or near breakpoint region. + + For breakpoint regions with no gaps, there are two options: + - Break in the middle of the region + - Break at the closest gap (--closest) + """ + from pybedtools import BedTool + + p = OptionParser(refine.__doc__) + p.add_argument( + "--closest", + default=False, + action="store_true", + help="In case of no gaps, use closest", + ) + p.set_outfile("auto") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + breakpointsbed, gapsbed = args + ncols = len(next(open(breakpointsbed)).split()) + logger.debug("File %s contains %d columns.", breakpointsbed, ncols) + a = BedTool(breakpointsbed) + b = BedTool(gapsbed) + o = a.intersect(b, wao=True) + + pf = "{0}.{1}".format( + op.basename(breakpointsbed).split(".")[0], op.basename(gapsbed).split(".")[0] + ) + nogapsbed = pf + ".nogaps.bed" + largestgapsbed = pf + ".largestgaps.bed" + nogapsfw = open(nogapsbed, "w") + largestgapsfw = open(largestgapsbed, "w") + for b, gaps in groupby(o, key=lambda x: x[:ncols]): + gaps = list(gaps) + gap = gaps[0] + if len(gaps) == 1 and gap[-1] == "0": + assert gap[-3] == "." + print("\t".join(b), file=nogapsfw) + continue + + gaps = [(int(x[-1]), x) for x in gaps] + maxgap = max(gaps)[1] + # Write the gap interval that's intersected (often from column 4 and on) + print("\t".join(maxgap[ncols:]), file=largestgapsfw) + + nogapsfw.close() + largestgapsfw.close() + beds = [largestgapsbed] + toclean = [nogapsbed, largestgapsbed] + + if opts.closest: + closestgapsbed = pf + ".closestgaps.bed" + cmd = "closestBed -a {0} -b {1} -d".format(nogapsbed, gapsbed) + sh(cmd, outfile=closestgapsbed) + beds += [closestgapsbed] + toclean += [closestgapsbed] + else: + pointbed = pf + ".point.bed" + pbed = Bed() + bed = Bed(nogapsbed) + for b in bed: + pos = (b.start + b.end) // 2 + b.start, b.end = pos, pos + pbed.append(b) + pbed.print_to_file(pointbed) + beds += [pointbed] + toclean += [pointbed] + + refinedbed = pf + ".refined.bed" if opts.outfile == "auto" else opts.outfile + FileMerger(beds, outfile=refinedbed).merge() + + # Clean-up + cleanup(toclean) + + return refinedbed + + +def merge_ranges(beds): + + m = [x.accn for x in beds] + + mr = [range_parse(x) for x in m] + mc = set(x.seqid for x in mr) + if len(mc) != 1: + logger.error("Multiple seqid found in pocket. Aborted.") + return + + mc = list(mc)[0] + ms = min(x.start for x in mr) + me = max(x.end for x in mr) + + neg_strands = sum(1 for x in beds if x.strand == "-") + pos_strands = len(beds) - neg_strands + strand = "-" if neg_strands > pos_strands else "+" + + return mc, ms, me, strand + + +def patcher(args): + """ + %prog patcher backbone.bed other.bed + + Given optical map alignment, prepare the patchers. Use --backbone to suggest + which assembly is the major one, and the patchers will be extracted from + another assembly. + """ + from jcvi.formats.bed import uniq + + p = OptionParser(patcher.__doc__) + p.add_argument( + "--backbone", + default="OM", + help="Prefix of the backbone assembly", + ) + p.add_argument("--object", default="object", help="New object name") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + backbonebed, otherbed = args + backbonebed = uniq([backbonebed]) + otherbed = uniq([otherbed]) + + pf = backbonebed.split(".")[0] + + # Make a uniq bed keeping backbone at redundant intervals + cmd = "intersectBed -v -wa" + cmd += " -a {0} -b {1}".format(otherbed, backbonebed) + outfile = otherbed.rsplit(".", 1)[0] + ".not." + backbonebed + sh(cmd, outfile=outfile) + + uniqbed = Bed() + uniqbedfile = pf + ".merged.bed" + uniqbed.extend(Bed(backbonebed)) + uniqbed.extend(Bed(outfile)) + uniqbed.print_to_file(uniqbedfile, sorted=True) + + # Condense adjacent intervals, allow some chaining + bed = uniqbed + key = lambda x: range_parse(x.accn).seqid + + bed_fn = pf + ".patchers.bed" + bed_fw = open(bed_fn, "w") + + for k, sb in groupby(bed, key=key): + sb = list(sb) + chr, start, end, strand = merge_ranges(sb) + + print( + "\t".join(str(x) for x in (chr, start, end, opts.object, 1000, strand)), + file=bed_fw, + ) + + bed_fw.close() + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/postprocess.py b/jcvi/assembly/postprocess.py new file mode 100644 index 00000000..d6b899d2 --- /dev/null +++ b/jcvi/assembly/postprocess.py @@ -0,0 +1,537 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Finishing pipeline, starting with a phase1/2 BAC. The pipeline ideally should +include the following components + ++ BLAST against the Illumina contigs to fish out additional seqs ++ Use minimus2 to combine the contigs through overlaps ++ Map the mates to the contigs and perform scaffolding +""" +import os +import os.path as op +import sys + +from collections import defaultdict +from itertools import groupby + +from ..apps.align import run_megablast +from ..apps.base import ( + ActionDispatcher, + OptionParser, + cleanup, + logger, + mkdir, + need_update, + sh, +) +from ..formats.base import must_open +from ..formats.contig import ContigFile +from ..formats.fasta import ( + Fasta, + Seq, + SeqIO, + SeqRecord, + format, + gaps, + parse_fasta, + tidy, +) +from ..formats.sizes import Sizes +from ..utils.cbook import depends + +from .base import n50 + + +def main(): + + actions = ( + ("screen", "screen sequences against library"), + ("circular", "make circular genome"), + ("dedup", "remove duplicate contigs within assembly"), + ("dust", "remove low-complexity contigs within assembly"), + ("dust2bed", "extract low-complexity regions as bed file"), + ("build", "build assembly files after a set of clean-ups"), + ("overlap", "build larger contig set by fishing additional seqs"), + ("overlapbatch", "call overlap on a set of sequences"), + ("scaffold", "build scaffolds based on the ordering in the AGP file"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def dust2bed(args): + """ + %prog dust2bed fastafile + + Use dustmasker to find low-complexity regions (LCRs) in the genome. + """ + from jcvi.formats.base import read_block + + p = OptionParser(dust2bed.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + interval = fastafile + ".iv" + if need_update(fastafile, interval): + cmd = "dustmasker -in {0}".format(fastafile) + sh(cmd, outfile=interval) + + fp = open(interval) + bedfile = fastafile.rsplit(".", 1)[0] + ".dust.bed" + fw = must_open(bedfile, "w") + nlines = 0 + nbases = 0 + for header, block in read_block(fp, ">"): + header = header.strip(">") + for b in block: + start, end = b.split(" - ") + start, end = int(start), int(end) + print("\t".join(str(x) for x in (header, start, end)), file=fw) + nlines += 1 + nbases += end - start + logger.debug( + "A total of {0} DUST intervals ({1} bp) exported to `{2}`".format( + nlines, nbases, bedfile + ) + ) + + +def fasta2bed(fastafile): + """ + Alternative BED generation from FASTA file. Used for sanity check. + """ + dustfasta = fastafile.rsplit(".", 1)[0] + ".dust.fasta" + for name, seq in parse_fasta(dustfasta): + for islower, ss in groupby(enumerate(seq), key=lambda x: x[-1].islower()): + if not islower: + continue + ss = list(ss) + ms, mn = min(ss) + xs, xn = max(ss) + print("\t".join(str(x) for x in (name, ms, xs))) + + +def circular(args): + """ + %prog circular fastafile startpos + + Make circular genome, startpos is the place to start the sequence. This can + be determined by mapping to a reference. Self overlaps are then resolved. + Startpos is 1-based. + """ + from jcvi.assembly.goldenpath import overlap + + p = OptionParser(circular.__doc__) + p.add_argument( + "--flip", + default=False, + action="store_true", + help="Reverse complement the sequence", + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, startpos = args + startpos = int(startpos) + key, seq = next(parse_fasta(fastafile)) + aseq = seq[startpos:] + bseq = seq[:startpos] + aseqfile, bseqfile = "a.seq", "b.seq" + + for f, s in zip((aseqfile, bseqfile), (aseq, bseq)): + fw = must_open(f, "w") + print(">{0}\n{1}".format(f, s), file=fw) + fw.close() + + o = overlap([aseqfile, bseqfile]) + seq = aseq[: o.qstop] + bseq[o.sstop :] + seq = Seq(seq) + + if opts.flip: + seq = seq.reverse_complement() + + cleanup(aseqfile, bseqfile) + + fw = must_open(opts.outfile, "w") + rec = SeqRecord(seq, id=key, description="") + SeqIO.write([rec], fw, "fasta") + fw.close() + + +def dust(args): + """ + %prog dust assembly.fasta + + Remove low-complexity contigs within assembly. + """ + p = OptionParser(dust.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + dustfastafile = fastafile.rsplit(".", 1)[0] + ".dust.fasta" + if need_update(fastafile, dustfastafile): + cmd = "dustmasker -in {0}".format(fastafile) + cmd += " -out {0} -outfmt fasta".format(dustfastafile) + sh(cmd) + + for name, seq in parse_fasta(dustfastafile): + nlow = sum(1 for x in seq if x in "acgtnN") + pctlow = nlow * 100.0 / len(seq) + if pctlow < 98: + continue + # print "{0}\t{1:.1f}".format(name, pctlow) + print(name) + + +def dedup(args): + """ + %prog dedup assembly.assembly.blast assembly.fasta + + Remove duplicate contigs within assembly. + """ + from jcvi.formats.blast import BlastLine + + p = OptionParser(dedup.__doc__) + p.set_align(pctid=0, pctcov=98) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + blastfile, fastafile = args + cov = opts.pctcov / 100.0 + sizes = Sizes(fastafile).mapping + fp = open(blastfile) + removed = set() + for row in fp: + b = BlastLine(row) + query, subject = b.query, b.subject + if query == subject: + continue + qsize, ssize = sizes[query], sizes[subject] + qspan = abs(b.qstop - b.qstart) + if qspan < qsize * cov: + continue + if (qsize, query) < (ssize, subject): + removed.add(query) + + print("\n".join(sorted(removed))) + + +def build(args): + """ + %prog build current.fasta Bacteria_Virus.fasta prefix + + Build assembly files after a set of clean-ups: + 1. Use cdhit (100%) to remove duplicate scaffolds + 2. Screen against the bacteria and virus database (remove scaffolds 95% id, 50% cov) + 3. Mask matches to UniVec_Core + 4. Sort by decreasing scaffold sizes + 5. Rename the scaffolds sequentially + 6. Build the contigs by splitting scaffolds at gaps + 7. Rename the contigs sequentially + """ + from jcvi.apps.cdhit import deduplicate + from jcvi.apps.vecscreen import mask + from jcvi.formats.fasta import sort + + p = OptionParser(build.__doc__) + p.add_argument( + "--nodedup", + default=False, + action="store_true", + help="Do not deduplicate [default: deduplicate]", + ) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + fastafile, bacteria, pf = args + dd = deduplicate([fastafile, "--pctid=100"]) if not opts.nodedup else fastafile + screenfasta = screen([dd, bacteria]) + tidyfasta = mask([screenfasta]) + sortedfasta = sort([tidyfasta, "--sizes"]) + scaffoldfasta = pf + ".assembly.fasta" + format([sortedfasta, scaffoldfasta, "--prefix=scaffold_", "--sequential"]) + gapsplitfasta = pf + ".gapSplit.fasta" + cmd = "gapSplit -minGap=10 {0} {1}".format(scaffoldfasta, gapsplitfasta) + sh(cmd) + contigsfasta = pf + ".contigs.fasta" + format([gapsplitfasta, contigsfasta, "--prefix=contig_", "--sequential"]) + + +def screen(args): + """ + %prog screen scaffolds.fasta library.fasta + + Screen sequences against FASTA library. Sequences that have 95% id and 50% + cov will be removed by default. + """ + from jcvi.apps.align import blast + from jcvi.formats.blast import covfilter + + p = OptionParser(screen.__doc__) + p.set_align(pctid=95, pctcov=50) + p.add_argument("--best", default=1, type=int, help="Get the best N hit") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + scaffolds, library = args + pctidflag = "--pctid={0}".format(opts.pctid) + blastfile = blast([library, scaffolds, pctidflag, "--best={0}".format(opts.best)]) + + idsfile = blastfile.rsplit(".", 1)[0] + ".ids" + covfilter( + [ + blastfile, + scaffolds, + "--ids=" + idsfile, + pctidflag, + "--pctcov={0}".format(opts.pctcov), + ] + ) + + pf = scaffolds.rsplit(".", 1)[0] + nf = pf + ".screen.fasta" + cmd = "faSomeRecords {0} -exclude {1} {2}".format(scaffolds, idsfile, nf) + sh(cmd) + + logger.debug("Screened FASTA written to `{0}`.".format(nf)) + + return nf + + +def scaffold(args): + """ + %prog scaffold ctgfasta agpfile + + Build scaffolds based on ordering in the AGP file. + """ + from jcvi.formats.agp import bed, order_to_agp, build + from jcvi.formats.bed import Bed + + p = OptionParser(scaffold.__doc__) + p.add_argument( + "--prefix", + default=False, + action="store_true", + help="Keep IDs with same prefix together", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + ctgfasta, agpfile = args + sizes = Sizes(ctgfasta).mapping + + pf = ctgfasta.rsplit(".", 1)[0] + phasefile = pf + ".phases" + fwphase = open(phasefile, "w") + newagpfile = pf + ".new.agp" + fwagp = open(newagpfile, "w") + + scaffoldbuckets = defaultdict(list) + + bedfile = bed([agpfile, "--nogaps", "--outfile=tmp"]) + bb = Bed(bedfile) + for s, partialorder in bb.sub_beds(): + name = partialorder[0].accn + bname = name.rsplit("_", 1)[0] if opts.prefix else s + scaffoldbuckets[bname].append([(b.accn, b.strand) for b in partialorder]) + + # Now the buckets contain a mixture of singletons and partially resolved + # scaffolds. Print the scaffolds first then remaining singletons. + for bname, scaffolds in sorted(scaffoldbuckets.items()): + ctgorder = [] + singletons = set() + for scaf in sorted(scaffolds): + for node, orientation in scaf: + ctgorder.append((node, orientation)) + if len(scaf) == 1: + singletons.add(node) + nscaffolds = len(scaffolds) + nsingletons = len(singletons) + if nsingletons == 1 and nscaffolds == 0: + phase = 3 + elif nsingletons == 0 and nscaffolds == 1: + phase = 2 + else: + phase = 1 + + msg = "{0}: Scaffolds={1} Singletons={2} Phase={3}".format( + bname, nscaffolds, nsingletons, phase + ) + print(msg, file=sys.stderr) + print("\t".join((bname, str(phase))), file=fwphase) + + order_to_agp(bname, ctgorder, sizes, fwagp) + + fwagp.close() + cleanup(bedfile) + + fastafile = "final.fasta" + build([newagpfile, ctgfasta, fastafile]) + tidy([fastafile]) + + +@depends +def run_gapsplit(infile=None, outfile=None): + gaps([infile, "--split"]) + return outfile + + +def overlapbatch(args): + """ + %prog overlapbatch ctgfasta poolfasta + + Fish out the sequences in `poolfasta` that overlap with `ctgfasta`. + Mix and combine using `minimus2`. + """ + p = OptionParser(overlap.__doc__) + opts, args = p.parse_args(args) + if len(args) != 2: + sys.exit(not p.print_help()) + + ctgfasta, poolfasta = args + f = Fasta(ctgfasta) + for k, rec in f.iteritems_ordered(): + fastafile = k + ".fasta" + fw = open(fastafile, "w") + SeqIO.write([rec], fw, "fasta") + fw.close() + + overlap([fastafile, poolfasta]) + + +def overlap(args): + """ + %prog overlap ctgfasta poolfasta + + Fish out the sequences in `poolfasta` that overlap with `ctgfasta`. + Mix and combine using `minimus2`. + """ + p = OptionParser(overlap.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + ctgfasta, poolfasta = args + prefix = ctgfasta.split(".")[0] + rid = list(Fasta(ctgfasta).iterkeys()) + assert len(rid) == 1, "Use overlapbatch() to improve multi-FASTA file" + + rid = rid[0] + splitctgfasta = ctgfasta.rsplit(".", 1)[0] + ".split.fasta" + ctgfasta = run_gapsplit(infile=ctgfasta, outfile=splitctgfasta) + + # Run BLAST + blastfile = ctgfasta + ".blast" + run_megablast(infile=ctgfasta, outfile=blastfile, db=poolfasta) + + # Extract contigs and merge using minimus2 + closuredir = prefix + ".closure" + closure = False + if need_update(blastfile, closuredir): + mkdir(closuredir, overwrite=True) + closure = True + + if closure: + idsfile = op.join(closuredir, prefix + ".ids") + cmd = "cut -f2 {0} | sort -u".format(blastfile) + sh(cmd, outfile=idsfile) + + idsfastafile = op.join(closuredir, prefix + ".ids.fasta") + cmd = "faSomeRecords {0} {1} {2}".format(poolfasta, idsfile, idsfastafile) + sh(cmd) + + # This step is a hack to weight the bases from original sequences more + # than the pulled sequences, by literally adding another copy to be used + # in consensus calls. + redundantfastafile = op.join(closuredir, prefix + ".redundant.fasta") + format([ctgfasta, redundantfastafile, "--prefix=RED."]) + + mergedfastafile = op.join(closuredir, prefix + ".merged.fasta") + cmd = "cat {0} {1} {2}".format(ctgfasta, redundantfastafile, idsfastafile) + sh(cmd, outfile=mergedfastafile) + + afgfile = op.join(closuredir, prefix + ".afg") + cmd = "toAmos -s {0} -o {1}".format(mergedfastafile, afgfile) + sh(cmd) + + cwd = os.getcwd() + os.chdir(closuredir) + cmd = "minimus2 {0} -D REFCOUNT=0".format(prefix) + cmd += " -D OVERLAP=100 -D MINID=98" + sh(cmd) + os.chdir(cwd) + + # Analyze output, make sure that: + # + Get the singletons of the original set back + # + Drop any contig that is comprised entirely of pulled set + originalIDs = set(Fasta(ctgfasta).iterkeys()) + minimuscontig = op.join(closuredir, prefix + ".contig") + c = ContigFile(minimuscontig) + excludecontigs = set() + for rec in c.iter_records(): + reads = set(x.id for x in rec.reads) + if reads.isdisjoint(originalIDs): + excludecontigs.add(rec.id) + + logger.debug("Exclude contigs: {0}".format(", ".join(sorted(excludecontigs)))) + + finalfasta = prefix + ".improved.fasta_" + fw = open(finalfasta, "w") + minimusfasta = op.join(closuredir, prefix + ".fasta") + f = Fasta(minimusfasta) + for id, rec in f.iteritems_ordered(): + if id in excludecontigs: + continue + SeqIO.write([rec], fw, "fasta") + + singletonfile = op.join(closuredir, prefix + ".singletons") + singletons = set(x.strip() for x in open(singletonfile)) + leftovers = singletons & originalIDs + + logger.debug("Pull leftover singletons: {0}".format(", ".join(sorted(leftovers)))) + + f = Fasta(ctgfasta) + for id, rec in f.iteritems_ordered(): + if id not in leftovers: + continue + SeqIO.write([rec], fw, "fasta") + + fw.close() + + fastafile = finalfasta + finalfasta = fastafile.rstrip("_") + format( + [fastafile, finalfasta, "--sequential", "--pad0=3", "--prefix={0}_".format(rid)] + ) + + logger.debug("Improved FASTA written to `{0}`.".format(finalfasta)) + + n50([ctgfasta]) + n50([finalfasta]) + + errlog = "error.log" + cleanup(fastafile, blastfile, errlog) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/preprocess.py b/jcvi/assembly/preprocess.py new file mode 100644 index 00000000..54b7659f --- /dev/null +++ b/jcvi/assembly/preprocess.py @@ -0,0 +1,735 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Wrapper to trim and correct sequence data. +""" +import os +import os.path as op +import sys + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + cleanup, + datadir, + download, + logger, + mkdir, + need_update, + sh, +) +from ..formats.base import BaseFile, must_open, write_file +from ..formats.fastq import guessoffset +from ..utils.cbook import depends, human_size + + +class FastQCdata(BaseFile, dict): + def __init__(self, filename, human=False): + super().__init__(filename) + if not op.exists(filename): + logger.debug("File `%s` not found.", filename) + # Sample_RF37-1/RF37-1_GATCAG_L008_R2_fastqc => + # RF37-1_GATCAG_L008_R2 + self["Filename"] = op.basename(op.split(filename)[0]).rsplit("_", 1)[0] + self["Total Sequences"] = self["Sequence length"] = self["Total Bases"] = ( + "na" + ) + return + + fp = open(filename) + for row in fp: + atoms = row.rstrip().split("\t") + if atoms[0] in ("#", ">"): + continue + if len(atoms) != 2: + continue + + a, b = atoms + self[a] = b + + ts = self["Total Sequences"] + sl = self["Sequence length"] + if "-" in sl: + a, b = sl.split("-") + sl = (int(a) + int(b)) / 2 + if a == "30": + sl = int(b) + + ts, sl = int(ts), int(sl) + tb = ts * sl + + self["Total Sequences"] = human_size(ts).rstrip("b") if human else ts + self["Total Bases"] = human_size(tb).rstrip("b") if human else tb + + +def main(): + + actions = ( + ("contamination", "check reads contamination against Ecoli"), + ("correct", "correct reads using ALLPATHS-LG"), + ("count", "count reads based on FASTQC results"), + ("diginorm", "run K-mer based normalization"), + ("expand", "expand sequences using short reads"), + ("hetsmooth", "reduce K-mer diversity using het-smooth"), + ("trim", "trim reads using TRIMMOMATIC"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def diginorm(args): + """ + %prog diginorm fastqfile + + Run K-mer based normalization. Based on tutorial: + + + Assume input is either an interleaved pairs file, or two separate files. + + To set up khmer: + $ git clone git://github.com/ged-lab/screed.git + $ git clone git://github.com/ged-lab/khmer.git + $ cd screed + $ python setup.py install + $ cd ../khmer + $ make test + $ export PYTHONPATH=~/export/khmer + """ + from jcvi.formats.fastq import shuffle, pairinplace, split + from jcvi.apps.base import getfilesize + + p = OptionParser(diginorm.__doc__) + p.add_argument( + "--single", default=False, action="store_true", help="Single end reads" + ) + p.add_argument("--tablesize", help="Memory size") + p.add_argument( + "--npass", + default="1", + choices=("1", "2"), + help="How many passes of normalization", + ) + p.set_depth(depth=50) + p.set_home("khmer", default="/usr/local/bin/") + opts, args = p.parse_args(args) + + if len(args) not in (1, 2): + sys.exit(not p.print_help()) + + if len(args) == 2: + fastq = shuffle(args + ["--tag"]) + else: + (fastq,) = args + + kh = opts.khmer_home + depth = opts.depth + PE = not opts.single + sys.path.insert(0, op.join(kh, "python")) + + pf = fastq.rsplit(".", 1)[0] + keepfile = fastq + ".keep" + hashfile = pf + ".kh" + mints = 10000000 + ts = opts.tablesize or ((getfilesize(fastq) / 16 / mints + 1) * mints) + + norm_cmd = op.join(kh, "normalize-by-median.py") + filt_cmd = op.join(kh, "filter-abund.py") + if need_update(fastq, (hashfile, keepfile)): + cmd = norm_cmd + cmd += " -C {0} -k 20 -N 4 -x {1}".format(depth, ts) + if PE: + cmd += " -p" + cmd += " -s {0} {1}".format(hashfile, fastq) + sh(cmd) + + abundfiltfile = keepfile + ".abundfilt" + if need_update((hashfile, keepfile), abundfiltfile): + cmd = filt_cmd + cmd += " {0} {1}".format(hashfile, keepfile) + sh(cmd) + + if opts.npass == "1": + seckeepfile = abundfiltfile + else: + seckeepfile = abundfiltfile + ".keep" + if need_update(abundfiltfile, seckeepfile): + cmd = norm_cmd + cmd += " -C {0} -k 20 -N 4 -x {1}".format(depth - 10, ts / 2) + cmd += " {0}".format(abundfiltfile) + sh(cmd) + + if PE: + pairsfile = pairinplace( + [seckeepfile, "--base={0}".format(pf + "_norm"), "--rclip=2"] + ) + split([pairsfile]) + + +def expand(args): + """ + %prog expand bes.fasta reads.fastq + + Expand sequences using short reads. Useful, for example for getting BAC-end + sequences. The template to use, in `bes.fasta` may just contain the junction + sequences, then align the reads to get the 'flanks' for such sequences. + """ + import math + + from jcvi.formats.fasta import Fasta, SeqIO + from jcvi.formats.fastq import readlen, first, fasta + from jcvi.formats.blast import Blast + from jcvi.apps.base import cleanup + from jcvi.apps.bowtie import align, get_samfile + from jcvi.apps.align import blast + + p = OptionParser(expand.__doc__) + p.set_depth(depth=200) + p.set_firstN() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bes, reads = args + size = Fasta(bes).totalsize + rl = readlen([reads]) + expected_size = size + 2 * rl + nreads = expected_size * opts.depth / rl + nreads = int(math.ceil(nreads / 1000.0)) * 1000 + + # Attract reads + samfile, logfile = align( + [bes, reads, "--reorder", "--mapped", "--firstN={0}".format(opts.firstN)] + ) + + samfile, mapped, _ = get_samfile(reads, bes, bowtie=True, mapped=True) + logger.debug("Extract first %d reads from `%s`.", nreads, mapped) + + pf = mapped.split(".")[0] + pf = pf.split("-")[0] + bespf = bes.split(".")[0] + reads = pf + ".expand.fastq" + first([str(nreads), mapped, "-o", reads]) + + # Perform mini-assembly + fastafile = reads.rsplit(".", 1)[0] + ".fasta" + qualfile = "" + if need_update(reads, fastafile): + fastafile, qualfile = fasta([reads]) + + contigs = op.join(pf, "454LargeContigs.fna") + if need_update(fastafile, contigs): + cmd = "runAssembly -o {0} -cpu 8 {1}".format(pf, fastafile) + sh(cmd) + assert op.exists(contigs) + + # Annotate contigs + blastfile = blast([bes, contigs]) + mapping = {} + for query, b in Blast(blastfile).iter_best_hit(): + mapping[query] = b + + f = Fasta(contigs, lazy=True) + annotatedfasta = ".".join((pf, bespf, "fasta")) + fw = open(annotatedfasta, "w") + keys = list(Fasta(bes).iterkeys_ordered()) # keep an ordered list + recs = [] + for key, v in f.iteritems_ordered(): + vid = v.id + if vid not in mapping: + continue + b = mapping[vid] + subject = b.subject + rec = v.reverse_complement() if b.orientation == "-" else v + rec.id = rid = "_".join((pf, vid, subject)) + rec.description = "" + recs.append((keys.index(subject), rid, rec)) + + recs = [x[-1] for x in sorted(recs)] + SeqIO.write(recs, fw, "fasta") + fw.close() + + cleanup(samfile, logfile, mapped, reads, fastafile, qualfile, blastfile, pf) + logger.debug("Annotated seqs (n=%d) written to `%s`.", len(recs), annotatedfasta) + + return annotatedfasta + + +def contamination(args): + """ + %prog contamination Ecoli.fasta genome.fasta read.fastq + + Check read contamination on a folder of paired reads. Use bowtie2 to compare + the reads against: + 1. Ecoli.fsata - this will tell us the lower bound of contamination + 2. genome.fasta - this will tell us the upper bound of contamination + """ + from jcvi.apps.bowtie import BowtieLogFile, align + + p = OptionParser(contamination.__doc__) + p.set_firstN() + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + ecoli, genome, fq = args + firstN_opt = "--firstN={0}".format(opts.firstN) + samfile, logfile = align([ecoli, fq, firstN_opt]) + bl = BowtieLogFile(logfile) + lowerbound = bl.rate + samfile, logfile = align([genome, fq, firstN_opt]) + bl = BowtieLogFile(logfile) + upperbound = 100 - bl.rate + + median = (lowerbound + upperbound) / 2 + + clogfile = fq + ".Ecoli" + fw = open(clogfile, "w") + lowerbound = "{0:.1f}".format(lowerbound) + upperbound = "{0:.1f}".format(upperbound) + median = "{0:.1f}".format(median) + + print("\t".join((fq, lowerbound, median, upperbound)), file=fw) + print( + "{0}: Ecoli contamination rate {1}-{2}".format(fq, lowerbound, upperbound), + file=sys.stderr, + ) + fw.close() + + +def count(args): + """ + %prog count *.gz + + Count reads based on FASTQC results. FASTQC needs to be run on all the input + data given before running this command. + """ + from jcvi.utils.table import loadtable, write_csv + + p = OptionParser(count.__doc__) + p.add_argument("--dir", help="Sub-directory where FASTQC was run") + p.add_argument( + "--human", + default=False, + action="store_true", + help="Human friendly numbers", + ) + p.set_table() + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + filenames = args + subdir = opts.dir + header = "Filename|Total Sequences|Sequence length|Total Bases".split("|") + rows = [] + human = opts.human + for f in filenames: + folder = f.replace(".gz", "").rsplit(".", 1)[0] + "_fastqc" + if subdir: + folder = op.join(subdir, folder) + summaryfile = op.join(folder, "fastqc_data.txt") + + fqcdata = FastQCdata(summaryfile, human=human) + row = [fqcdata[x] for x in header] + rows.append(row) + + print(loadtable(header, rows), file=sys.stderr) + write_csv(header, rows, sep=opts.sep, filename=opts.outfile, align=opts.align) + + +def hetsmooth(args): + """ + %prog hetsmooth reads_1.fq reads_2.fq jf-23_0 + + Wrapper against het-smooth. Below is the command used in het-smooth manual. + + $ het-smooth --kmer-len=23 --bottom-threshold=38 --top-threshold=220 + --no-multibase-replacements --jellyfish-hash-file=23-mers.jf + reads_1.fq reads_2.fq + """ + p = OptionParser(hetsmooth.__doc__) + p.add_argument("-K", default=23, type=int, help="K-mer size") + p.add_argument("-L", type=int, help="Bottom threshold, first min") + p.add_argument("-U", type=int, help="Top threshold, second min") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + reads1fq, reads2fq, jfdb = args + K = opts.K + L = opts.L + U = opts.U + + assert L is not None and U is not None, "Please specify -L and -U" + + cmd = "het-smooth --kmer-len={0}".format(K) + cmd += " --bottom-threshold={0} --top-threshold={1}".format(L, U) + cmd += " --no-multibase-replacements --jellyfish-hash-file={0}".format(jfdb) + cmd += " --no-reads-log" + cmd += " " + " ".join((reads1fq, reads2fq)) + + sh(cmd) + + +def trim(args): + """ + %prog trim fastqfiles + + Trim reads using TRIMMOMATIC. If two fastqfiles are given, then it invokes + the paired reads mode. See manual: + + + """ + tv = "0.32" + TrimJar = "trimmomatic-{0}.jar".format(tv) + p = OptionParser(trim.__doc__) + p.add_argument( + "--path", + default=op.join("~/bin", TrimJar), + help="Path to trimmomatic jar file", + ) + p.set_phred() + p.add_argument( + "--nofrags", + default=False, + action="store_true", + help="Discard frags file in PE mode", + ) + p.add_argument( + "--minqv", + default=15, + type=int, + help="Average qv after trimming", + ) + p.add_argument( + "--minlen", + default=36, + type=int, + help="Minimum length after trimming", + ) + p.add_argument( + "--adapteronly", + default=False, + action="store_true", + help="Only trim adapters with no qv trimming", + ) + p.add_argument( + "--nogz", + default=False, + action="store_true", + help="Do not write to gzipped files", + ) + p.add_argument( + "--log", + default=None, + dest="trimlog", + help="Specify a `trimlog` file", + ) + p.set_cpus(cpus=4) + opts, args = p.parse_args(args) + + if len(args) not in (1, 2): + sys.exit(not p.print_help()) + + path = op.expanduser(opts.path) + url = "http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-{0}.zip".format( + tv + ) + + if not op.exists(path): + path = download(url) + TrimUnzipped = "Trimmomatic-" + tv + if not op.exists(TrimUnzipped): + sh("unzip " + path) + cleanup(path) + path = op.join(TrimUnzipped, TrimJar) + + assert op.exists(path), "Couldn't find Trimmomatic jar file at `{0}`".format(path) + + adaptersfile = "adapters.fasta" + Adapters = must_open(op.join(datadir, adaptersfile)).read() + write_file(adaptersfile, Adapters, skipcheck=True) + + assert op.exists( + adaptersfile + ), "Please place the illumina adapter sequence in `{0}`".format(adaptersfile) + + if opts.phred is None: + offset = guessoffset([args[0]]) + else: + offset = int(opts.phred) + + phredflag = " -phred{0}".format(offset) + threadsflag = " -threads {0}".format(opts.cpus) + if opts.trimlog: + trimlog = " -trimlog {0}".format(opts.trimlog) + + cmd = "java -Xmx4g -jar {0}".format(path) + frags = ".frags.fastq" + pairs = ".pairs.fastq" + if not opts.nogz: + frags += ".gz" + pairs += ".gz" + + get_prefix = lambda x: op.basename(x).replace(".gz", "").rsplit(".", 1)[0] + get_dirname = lambda x: "{0}/".format(op.dirname(x)) if op.dirname(x) else "" + if len(args) == 1: + cmd += " SE" + cmd += phredflag + cmd += threadsflag + if opts.trimlog: + cmd += trimlog + (fastqfile,) = args + prefix = get_prefix(fastqfile) + dirname = get_dirname(fastqfile) + frags1 = dirname + prefix + frags + cmd += " {0}".format(" ".join((fastqfile, frags1))) + else: + cmd += " PE" + cmd += phredflag + cmd += threadsflag + if opts.trimlog: + cmd += trimlog + fastqfile1, fastqfile2 = args + prefix1 = get_prefix(fastqfile1) + dirname1 = get_dirname(fastqfile1) + prefix2 = get_prefix(fastqfile2) + dirname2 = get_dirname(fastqfile2) + pairs1 = dirname1 + prefix1 + pairs + pairs2 = dirname2 + prefix2 + pairs + frags1 = dirname1 + prefix1 + frags + frags2 = dirname2 + prefix2 + frags + if opts.nofrags: + frags1 = "/dev/null" + frags2 = "/dev/null" + cmd += " {0}".format( + " ".join((fastqfile1, fastqfile2, pairs1, frags1, pairs2, frags2)) + ) + + cmd += " ILLUMINACLIP:{0}:2:30:10".format(adaptersfile) + + if not opts.adapteronly: + cmd += " LEADING:3 TRAILING:3" + cmd += " SLIDINGWINDOW:4:{0}".format(opts.minqv) + + cmd += " MINLEN:{0}".format(opts.minlen) + + if offset != 33: + cmd += " TOPHRED33" + sh(cmd) + + +@depends +def run_RemoveDodgyReads( + infile=None, + outfile=None, + removeDuplicates=True, + rc=False, + nthreads=32, +): + # orig.fastb => filt.fastb + assert op.exists(infile) + orig = infile.rsplit(".", 1)[0] + filt = outfile.rsplit(".", 1)[0] + + cmd = "RemoveDodgyReads IN_HEAD={0} OUT_HEAD={1}".format(orig, filt) + if not removeDuplicates: + cmd += " REMOVE_DUPLICATES=False" + if rc: + cmd += " RC=True" + cmd += nthreads + sh(cmd) + + +@depends +def run_FastbAndQualb2Fastq(infile=None, outfile=None, rc=False): + corr = op.basename(infile).rsplit(".", 1)[0] + cmd = "FastbQualbToFastq HEAD_IN={0} HEAD_OUT={0}".format(corr) + cmd += " PAIRED=False PHRED_OFFSET=33" + if rc: + cmd += " FLIP=True" + sh(cmd) + + +@depends +def run_pairs(infile=None, outfile=None, suffix=False): + from jcvi.assembly.allpaths import pairs + + args = infile + if suffix: + args.append("--suffix") + pairs(args) + + +def correct(args): + """ + %prog correct *.fastq + + Correct the fastqfile and generated corrected fastqfiles. This calls + assembly.allpaths.prepare() to generate input files for ALLPATHS-LG. The + naming convention for your fastqfiles are important, and are listed below. + + By default, this will correct all PE reads, and remove duplicates of all MP + reads, and results will be placed in `frag_reads.corr.{pairs,frags}.fastq` + and `jump_reads.corr.{pairs,frags}.fastq`. + """ + from jcvi.assembly.allpaths import prepare + from jcvi.assembly.base import FastqNamings + + p = OptionParser(correct.__doc__ + FastqNamings) + p.add_argument("--dir", default="data", help="Working directory") + p.add_argument( + "--fragsdedup", + default=False, + action="store_true", + help="Don't deduplicate the fragment reads", + ) + p.add_argument("--ploidy", default="2", choices=("1", "2"), help="Ploidy") + p.add_argument( + "--haploidify", + default=False, + action="store_true", + help="Set HAPLOIDIFY=True", + ) + p.add_argument( + "--suffix", + default=False, + action="store_true", + help="Add suffix /1, /2 to read names", + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + fastq = args + tag, tagj, taglj = "frag_reads", "jump_reads", "long_jump_reads" + + ploidy = opts.ploidy + haploidify = opts.haploidify + suffix = opts.suffix + assert (not haploidify) or (haploidify and ploidy == "2") + + prepare(["Unknown"] + fastq + ["--norun"]) + + datadir = opts.dir + mkdir(datadir) + fullpath = op.join(os.getcwd(), datadir) + nthreads = " NUM_THREADS={0}".format(opts.cpus) + phred64 = guessoffset([args[0]]) == 64 + + orig = datadir + "/{0}_orig".format(tag) + origfastb = orig + ".fastb" + if need_update(fastq, origfastb): + cmd = "PrepareAllPathsInputs.pl DATA_DIR={0} HOSTS='{1}' PLOIDY={2}".format( + fullpath, opts.cpus, ploidy + ) + if phred64: + cmd += " PHRED_64=True" + sh(cmd) + + if op.exists(origfastb): + correct_frag( + datadir, + tag, + origfastb, + nthreads, + dedup=opts.fragsdedup, + haploidify=haploidify, + suffix=suffix, + ) + + origj = datadir + "/{0}_orig".format(tagj) + origjfastb = origj + ".fastb" + if op.exists(origjfastb): + correct_jump(datadir, tagj, origjfastb, nthreads, suffix=suffix) + + origlj = datadir + "/{0}_orig".format(taglj) + origljfastb = origlj + ".fastb" + if op.exists(origljfastb): + correct_jump(datadir, taglj, origljfastb, nthreads, suffix=suffix) + + +def export_fastq(datadir, corrfastb, rc=False, suffix=False): + pf = op.basename(corrfastb.rsplit(".", 1)[0]) + + cwd = os.getcwd() + os.chdir(datadir) + corrfastq = pf + ".fastq" + run_FastbAndQualb2Fastq(infile=op.basename(corrfastb), outfile=corrfastq, rc=rc) + os.chdir(cwd) + + pairsfile = pf + ".pairs" + fragsfastq = pf + ".corr.fastq" + run_pairs( + infile=[op.join(datadir, pairsfile), op.join(datadir, corrfastq)], + outfile=fragsfastq, + suffix=suffix, + ) + + +def correct_frag( + datadir, tag, origfastb, nthreads, dedup=False, haploidify=False, suffix=False +): + filt = datadir + "/{0}_filt".format(tag) + filtfastb = filt + ".fastb" + run_RemoveDodgyReads( + infile=origfastb, + outfile=filtfastb, + removeDuplicates=dedup, + rc=False, + nthreads=nthreads, + ) + + filtpairs = filt + ".pairs" + edit = datadir + "/{0}_edit".format(tag) + editpairs = edit + ".pairs" + if need_update(filtpairs, editpairs): + cmd = "ln -sf {0} {1}.pairs".format(op.basename(filtpairs), edit) + sh(cmd) + + editfastb = edit + ".fastb" + if need_update(filtfastb, editfastb): + cmd = "FindErrors HEAD_IN={0} HEAD_OUT={1}".format(filt, edit) + cmd += " PLOIDY_FILE=data/ploidy" + cmd += nthreads + sh(cmd) + + corr = datadir + "/{0}_corr".format(tag) + corrfastb = corr + ".fastb" + if need_update(editfastb, corrfastb): + cmd = "CleanCorrectedReads DELETE=True" + cmd += " HEAD_IN={0} HEAD_OUT={1}".format(edit, corr) + cmd += " PLOIDY_FILE={0}/ploidy".format(datadir) + if haploidify: + cmd += " HAPLOIDIFY=True" + cmd += nthreads + sh(cmd) + + export_fastq(datadir, corrfastb, suffix=suffix) + + +def correct_jump(datadir, tagj, origjfastb, nthreads, suffix=False): + # Pipeline for jump reads does not involve correction + filt = datadir + "/{0}_filt".format(tagj) + filtfastb = filt + ".fastb" + run_RemoveDodgyReads( + infile=origjfastb, + outfile=filtfastb, + removeDuplicates=True, + rc=True, + nthreads=nthreads, + ) + + export_fastq(datadir, filtfastb, rc=True, suffix=suffix) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/sim.py b/jcvi/assembly/sim.py new file mode 100644 index 00000000..124dd5e7 --- /dev/null +++ b/jcvi/assembly/sim.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Simulate Illumina sequencing reads. +""" +import math +import os +import os.path as op +import random +import sys + +from ..apps.base import ActionDispatcher, OptionParser, cleanup, logger, sh +from ..formats.fasta import Fasta + + +def main(): + + actions = ( + ("wgsim", "sample paired end reads using dwgsim"), + ("eagle", "simulate Illumina reads using EAGLE"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def add_sim_options(p): + """ + Add options shared by eagle or wgsim. + """ + p.add_argument( + "--distance", + default=500, + type=int, + help="Outer distance between the two ends", + ) + p.add_argument("--readlen", default=150, type=int, help="Length of the read") + p.set_depth(depth=10) + p.set_outfile(outfile=None) + + +def eagle(args): + """ + %prog eagle fastafile + + """ + p = OptionParser(eagle.__doc__) + p.add_argument( + "--share", default="/usr/local/share/EAGLE/", help="Default EAGLE share path" + ) + add_sim_options(p) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + share = opts.share + depth = opts.depth + readlen = opts.readlen + distance = opts.distance + pf = op.basename(fastafile).split(".")[0] + + # Since EAGLE does not natively support read length other than 100bp and + # 250bp - for an arbitrary read length we need to generate a bunch of + # support files + + # First file is the Runinfo + runinfo_readlen = "RunInfo_PairedReads2x{}Cycles1x1Tiles.xml".format(readlen) + if not op.exists(runinfo_readlen): + runinfo = op.join(share, "RunInfo/RunInfo_PairedReads2x251Cycles1x1Tiles.xml") + runinfo_xml = open(runinfo).read() + runinfo_xml = ( + runinfo_xml.replace("251", str(readlen)) + .replace("252", str(readlen + 1)) + .replace("502", str(2 * readlen)) + ) + fw = open(runinfo_readlen, "w") + print(runinfo_xml.strip(), file=fw) + fw.close() + + # Generate quality profiles + quality_file1 = "QualityTable.read1.length{}.qval".format(readlen) + quality_file2 = "QualityTable.read2.length{}.qval".format(readlen) + if not (op.exists(quality_file1) and op.exists(quality_file2)): + for i, qq in enumerate([quality_file1, quality_file2]): + cmd = "/usr/local/libexec/EAGLE/scaleQualityTable.pl" + cmd += " --input {}".format( + op.join( + share, + "QualityTables/DefaultQualityTable.read{}.length101.qval".format( + i + 1 + ), + ) + ) + cmd += " --cycles {}".format(readlen) + cmd += " --output {}".format(qq) + sh(cmd, silent=True) + + # Since distance is different from the default distribution which is + # centered around 319, we shift our peak to the new peak + template_lengths = op.join( + share, "TemplateLengthTables/DefaultTemplateLengthTable.tsv" + ) + template_distance = "TemplateLengthTable{}.tsv".format(distance) + shift = distance - 319 + if not op.exists(template_distance): + fp = open(template_lengths) + fw = open(template_distance, "w") + for row in fp: + size, counts = row.split() + size = int(size) + counts = int(counts) + size += shift + if size < readlen: + continue + print("\t".join(str(x) for x in (size, counts)), file=fw) + fw.close() + + # All done, let's simulate! + cmd = "configureEAGLE.pl" + cmd += " --reference-genome {}".format(fastafile) + cmd += " --coverage-depth {}".format(depth) + cmd += " --gc-coverage-fit-table {}".format( + op.join(share, "GcCoverageFitTables/Homo_sapiens.example1.tsv") + ) + cmd += " --run-info {}".format(runinfo_readlen) + cmd += " --quality-table {}".format(quality_file1) + cmd += " --quality-table {}".format(quality_file2) + cmd += " --template-length-table {}".format(template_distance) + cmd += " --random-seed {}".format(random.randint(1, 65535)) + sh(cmd, silent=True) + + # Retrieve results + outpf = opts.outfile or "{0}.{1}bp.{2}x".format(pf, distance, depth) + outpf += ".bwa" + cwd = os.getcwd() + eagle_dir = "EAGLE" + os.chdir(eagle_dir) + sh("make bam", silent=True) + + # Convert BAM to FASTQ + from jcvi.formats.sam import fastq + + a, b = fastq(["eagle.bam", outpf]) + sh("mv {} {} ../".format(a, b)) + os.chdir(cwd) + + # Clean-up + cleanup(eagle_dir) + + +def wgsim(args): + """ + %prog wgsim fastafile + + Run dwgsim on fastafile. + """ + p = OptionParser(wgsim.__doc__) + p.add_argument( + "--erate", + default=0.01, + type=float, + help="Base error rate of the read", + ) + p.add_argument( + "--noerrors", + default=False, + action="store_true", + help="Simulate reads with no errors", + ) + p.add_argument( + "--genomesize", + type=int, + help="Genome size in Mb [default: estimate from data]", + ) + add_sim_options(p) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + pf = op.basename(fastafile).split(".")[0] + + genomesize = opts.genomesize + size = genomesize * 1000000 if genomesize else Fasta(fastafile).totalsize + depth = opts.depth + readlen = opts.readlen + readnum = int(math.ceil(size * depth / (2 * readlen))) + + distance = opts.distance + stdev = distance / 10 + + outpf = opts.outfile or "{0}.{1}bp.{2}x".format(pf, distance, depth) + + logger.debug("Total genome size: {0} bp".format(size)) + logger.debug("Target depth: {0}x".format(depth)) + logger.debug("Number of read pairs (2x{0}): {1}".format(readlen, readnum)) + + if opts.noerrors: + opts.erate = 0 + + cmd = "dwgsim -e {0} -E {0}".format(opts.erate) + if opts.noerrors: + cmd += " -r 0 -R 0 -X 0 -y 0" + + cmd += " -d {0} -s {1}".format(distance, stdev) + cmd += " -N {0} -1 {1} -2 {1}".format(readnum, readlen) + cmd += " {0} {1}".format(fastafile, outpf) + sh(cmd) + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/soap.py b/jcvi/assembly/soap.py new file mode 100644 index 00000000..273370f8 --- /dev/null +++ b/jcvi/assembly/soap.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Script to write and assist SOAPdenovo assembly. +""" +import os.path as op +import sys + +from jcvi.formats.fastq import guessoffset, readlen, is_fastq +from jcvi.assembly.base import FastqNamings, Library, get_libs +from jcvi.apps.base import OptionParser, ActionDispatcher, need_update, sh + + +class FillLine(object): + def __init__(self, row): + args = row.split() + self.start = int(args[0]) + self.end = int(args[1]) + self.leftextend = int(args[2]) + self.rightextend = int(args[3]) + self.closed = int(args[4]) == 1 + self.extendlength = int(args[5]) + self.before = int(args[6]) + self.after = int(args[7]) + # Convert from unsigned to signed + # + if self.after > 0 and (self.after & 0x80000000): + self.after += -0x100000000 + + @property + def delta(self): + return self.after - self.before + + +def main(): + + actions = ( + ("clean", "clean and dedup paired FASTQ files"), + ("correct", "correct reads using ErrorCorrection"), + ("prepare", "prepare SOAP config files and run script"), + ("fillstats", "build stats on .fill file from GapCloser"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +SOAPHEADER = """ +P={0} +K={1} +S=soap.config +G=soap.gc.config +C={2} +A=asm$K +""" + +GCRUN = ( + "GapCloser_v1.12 -a ${A}.scafSeq -b $G -l 155 -o ${A}.closed.scafSeq -p 31 -t $P" +) +GCRUNG = "GapCloser_v1.12 -a {0} -b $G -l 155 -o {1} -p 31 -t $P" + +SOAPRUN = ( + """ +$C pregraph -s $S -d 1 -K $K -o $A -R -p $P +$C contig -s $S -g $A -M 1 -R -p $P +$C map -s $S -g $A -p $P +$C scaff -g $A -F -p $P +""" + + GCRUN +) + +SCFRUN = ( + """ +prepare -K $K -c %s -g $A +$C map -s $S -g $A -p $P +$C scaff -z -g $A -F -p $P +""" + + GCRUN +) + + +def get_size(filename): + + library_name = lambda x: "-".join(op.basename(x).split(".")[0].split("-")[:2]) + + lib = Library(library_name(filename)) + return lib.size + + +def correct(args): + """ + %prog correct *.fastq + + Correct reads using ErrorCorrection. Only PE will be used to build the K-mer + table. + """ + p = OptionParser(correct.__doc__) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + lstfile = "reads2cor.lst" + fw = open(lstfile, "w") + print("\n".join(x for x in args if x[:2] == "PE"), file=fw) + fw.close() + + p1 = args[0] + offset = guessoffset([p1]) + cpus = opts.cpus + + freq = "output.freq.cz" + freqlen = freq + ".len" + if need_update(args, (freq, freqlen)): + cmd = "KmerFreq_AR_v2.0 -k 17 -c -1 -q {0}".format(offset) + cmd += " -m 1 -t {0}".format(cpus) + cmd += " -p output {0}".format(lstfile) + sh(cmd) + + fw = open(lstfile, "w") + print("\n".join(args), file=fw) + fw.close() + + cmd = "Corrector_AR_v2.0 -k 17 -l 3 -m 5 -c 5 -a 0 -e 1 -w 0 -r 45" + cmd += " -Q {0} -q 30 -x 8 -t {1} -o 1 ".format(offset, cpus) + cmd += " {0} {1} {2}".format(freq, freqlen, lstfile) + sh(cmd) + + +def clean(args): + """ + %prog clean 1.fastq 2.fastq [insertsize] + + Clean and dedup paired FASTQ files. + """ + p = OptionParser(clean.__doc__) + p.add_argument("-a", default=0, type=int, help="Trim length at 5' end") + p.add_argument("-b", default=50, type=int, help="Trim length at 3' end") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) == 2: + p1, p2 = args + size = get_size(p1) + elif len(args) == 3: + p1, p2, size = args + size = int(size) + else: + sys.exit(not p.print_help()) + + pf = p1.split(".")[0] + cpus = opts.cpus + + offset = guessoffset([p1]) + a, b = opts.a, opts.b + + p1_clean = p1 + ".clean" + p1_cleangz = p1_clean + ".gz" + p2_clean = p2 + ".clean" + p2_cleangz = p2_clean + ".gz" + if need_update([p1, p2], [p1_cleangz, p2_cleangz]): + cmd = "SOAPfilter_v2.0 -t {0} -m 2000000 -p -y -z -g".format(cpus) + cmd += " -q {0} -w 10 -B 50 -f 0".format(offset) + cmd += " -l {0} -a {1} -b {2} -c {1} -d {2}".format(size, a, b, a, b) + cmd += " {0} {1} {2}.clean.stat {3} {4}".format(p1, p2, pf, p1_clean, p2_clean) + sh(cmd) + + +def fillstats(args): + """ + %prog fillstats genome.fill + + Build stats on .fill file from GapCloser. + """ + from jcvi.utils.cbook import SummaryStats, percentage, thousands + + p = OptionParser(fillstats.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fillfile,) = args + fp = open(fillfile) + scaffolds = 0 + gaps = [] + for row in fp: + if row[0] == ">": + scaffolds += 1 + continue + fl = FillLine(row) + gaps.append(fl) + + print("{0} scaffolds in total".format(scaffolds), file=sys.stderr) + + closed = [x for x in gaps if x.closed] + closedbp = sum(x.before for x in closed) + notClosed = [x for x in gaps if not x.closed] + notClosedbp = sum(x.before for x in notClosed) + + totalgaps = len(closed) + len(notClosed) + + print( + "Closed gaps: {0} size: {1} bp".format( + percentage(len(closed), totalgaps), thousands(closedbp) + ), + file=sys.stderr, + ) + ss = SummaryStats([x.after for x in closed]) + print(ss, file=sys.stderr) + + ss = SummaryStats([x.delta for x in closed]) + print("Delta:", ss, file=sys.stderr) + + print( + "Remaining gaps: {0} size: {1} bp".format( + percentage(len(notClosed), totalgaps), thousands(notClosedbp) + ), + file=sys.stderr, + ) + ss = SummaryStats([x.after for x in notClosed]) + print(ss, file=sys.stderr) + + +def prepare(args): + """ + %prog prepare *.fastq + + Scan input fastq files (see below) and write SOAP config files based + on inputfiles. Use "--scaffold contigs.fasta" to perform scaffolding. + """ + from jcvi.formats.base import write_file + + p = OptionParser(prepare.__doc__ + FastqNamings) + p.add_argument("-K", default=45, type=int, help="K-mer size") + p.add_argument( + "--assemble_1st_rank_only", + default=False, + action="store_true", + help="Assemble the first rank only, other libs asm_flags=2", + ) + p.add_argument("--scaffold", help="Only perform scaffolding") + p.add_argument("--gapclose", help="Only perform gap closure") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + fnames = args + K = opts.K + for x in fnames: + assert op.exists(x), "File `{0}` not found.".format(x) + + a1st = opts.assemble_1st_rank_only + + cfgfile = "soap.config" + gc_cfgfile = "soap.gc.config" + fw = open(cfgfile, "w") + fw_gc = open(gc_cfgfile, "w") + + libs = get_libs(fnames) + rank = 0 + max_rd_len = max(readlen([f]) for f in fnames) + + block = "max_rd_len={0}\n".format(max_rd_len) + for stream in (sys.stderr, fw, fw_gc): + print(block, file=stream) + + # Collect singletons first + singletons = [] + for lib, fs in libs: + if lib.size == 0: + singletons += fs + continue + + for lib, fs in libs: + size = lib.size + if size == 0: + continue + + rank += 1 + block = "[LIB]\n" + block += "avg_ins={0}\n".format(size) + block += "reverse_seq={0}\n".format(lib.reverse_seq) + asm_flags = 2 if (rank > 1 and a1st) else lib.asm_flags + block += "asm_flags={0}\n".format(asm_flags) + block += "rank={0}\n".format(rank) + if lib.reverse_seq: + pair_num_cutoff = 3 + block += "pair_num_cutoff={0}\n".format(pair_num_cutoff) + block += "map_len=35\n" + + for f in fs: + if ".1." in f: + tag = "q1" + elif ".2." in f: + tag = "q2" + block += "{0}={1}\n".format(tag, f) + + if rank == 1: + for s in singletons: + tag = "q" if is_fastq(s) else "f" + block += tag + "={0}\n".format(s) + + print(block, file=sys.stderr) + print(block, file=fw) + + if asm_flags > 2: + print(block, file=fw_gc) + + runfile = "run.sh" + scaffold = opts.scaffold + bb = 63 if K <= 63 else 127 + binary = "SOAPdenovo-{0}mer".format(bb) + header = SOAPHEADER.format(opts.cpus, K, binary) + if opts.gapclose: + gapclose = opts.gapclose + outfile = gapclose.rsplit(".", 1)[0] + ".closed.fasta" + template = header + GCRUNG.format(gapclose, outfile) + else: + template = header + (SCFRUN % scaffold if scaffold else SOAPRUN) + + write_file(runfile, template) + fw.close() + fw_gc.close() + + +if __name__ == "__main__": + main() diff --git a/jcvi/assembly/syntenypath.py b/jcvi/assembly/syntenypath.py new file mode 100644 index 00000000..82fa9a84 --- /dev/null +++ b/jcvi/assembly/syntenypath.py @@ -0,0 +1,553 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Syntenic path assembly. +""" +import sys + +from collections import defaultdict +from itertools import groupby, combinations +from more_itertools import pairwise + +from ..algorithms.graph import BiGraph +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..formats.base import LineFile, must_open +from ..formats.blast import Blast, BlastSlow +from ..formats.sizes import Sizes +from ..utils.range import range_intersect + + +class OVLLine: + def __init__(self, row): + # tig00000004 tig00042923 I -64039 -18713 16592 99.84 + # See also: assembly.goldenpath.Overlap for another implementation + args = row.split() + self.a = args[0] + self.b = args[1] + self.bstrand = "+" if args[2] == "N" else "-" + self.ahang = int(args[3]) + self.bhang = int(args[4]) + self.overlap = int(args[5]) + self.pctid = float(args[6]) + self.score = int(self.overlap * self.pctid / 100) + self.best = None + + @property + def tag(self): + if self.ahang >= 0: + t = "a->b" if self.bhang > 0 else "b in a" + elif self.ahang < 0: + t = "b->a" if self.bhang < 0 else "a in b" + return t + + +class OVL(LineFile): + def __init__(self, filename): + super().__init__(filename) + fp = must_open(filename) + contained = set() + alledges = defaultdict(list) + for row in fp: + o = OVLLine(row) + self.append(o) + if o.tag == "a in b": + contained.add(o.a) + elif o.tag == "b in a": + contained.add(o.b) + if o.tag == "a->b": + alledges[o.a + "-3`"].append(o) + elif o.tag == "b->a": + alledges[o.a + "-5`"].append(o) + logger.debug( + "Imported {} links. Contained tigs: {}".format(len(self), len(contained)) + ) + self.contained = contained + + logger.debug("Pruning edges to keep the mutual best") + for k, v in alledges.items(): + bo = max(v, key=lambda x: x.score) + bo.best = True + + self.graph = BiGraph() + for o in self: + if not o.best: + continue + if o.tag == "a->b": + a, b = o.a, o.b + elif o.tag == "b->a": + a, b = o.b, o.a + if a in contained or b in contained: + continue + bstrand = "<" if o.bstrand == "-" else ">" + self.graph.add_edge(a, b, ">", bstrand, length=o.score) + + +def main(): + + actions = ( + ("bed", "convert ANCHORS file to BED format"), + ("fromblast", "Generate path from BLAST file"), + ("fromovl", "build overlap graph from AMOS overlaps"), + ("happy", "Make graph from happy mapping data"), + ("partition", "Make individual graphs partitioned by happy mapping"), + ("merge", "Merge multiple graphs together and visualize"), + ("connect", "connect contigs using long reads"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def fromovl(args): + """ + %prog graph nucmer2ovl.ovl fastafile + + Build overlap graph from ovl file which is converted using NUCMER2OVL. + """ + p = OptionParser(fromovl.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + ovlfile, fastafile = args + ovl = OVL(ovlfile) + g = ovl.graph + + fw = open("contained.ids", "w") + print("\n".join(sorted(ovl.contained)), file=fw) + + graph_to_agp(g, ovlfile, fastafile, exclude=ovl.contained, verbose=False) + + +def bed(args): + """ + %prog bed anchorsfile + + Convert ANCHORS file to BED format. + """ + from collections import defaultdict + from jcvi.compara.synteny import check_beds + from jcvi.formats.bed import Bed + from jcvi.formats.base import get_number + from ..compara.base import AnchorFile + + p = OptionParser(bed.__doc__) + p.add_argument( + "--switch", + default=False, + action="store_true", + help="Switch reference and aligned map elements", + ) + p.add_argument( + "--scale", type=float, help="Scale the aligned map distance by factor" + ) + p.set_beds() + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (anchorsfile,) = args + switch = opts.switch + scale = opts.scale + ac = AnchorFile(anchorsfile) + pairs = defaultdict(list) + for a, b, block_id in ac.iter_pairs(): + pairs[a].append(b) + + qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts) + bd = Bed() + for q in qbed: + qseqid, qstart, qend, qaccn = q.seqid, q.start, q.end, q.accn + if qaccn not in pairs: + continue + for s in pairs[qaccn]: + si, s = sorder[s] + sseqid, sstart, send, saccn = s.seqid, s.start, s.end, s.accn + if switch: + qseqid, sseqid = sseqid, qseqid + qstart, sstart = sstart, qstart + qend, send = send, qend + qaccn, saccn = saccn, qaccn + if scale: + sstart /= scale + try: + newsseqid = get_number(sseqid) + except ValueError: + raise ValueError( + "`{0}` is on `{1}` with no number to extract".format(saccn, sseqid) + ) + bedline = "\t".join( + str(x) + for x in (qseqid, qstart - 1, qend, "{0}:{1}".format(newsseqid, sstart)) + ) + bd.add(bedline) + + bd.print_to_file(filename=opts.outfile, sorted=True) + + +def happy_nodes(row, prefix=None): + row = row.translate(None, "[](){}+-") + scfs = [x.strip() for x in row.split(":")] + if prefix: + scfs = [prefix + x for x in scfs] + return scfs + + +def happy_edges(row, prefix=None): + """ + Convert a row in HAPPY file and yield edges. + """ + trans = str.maketrans("[](){}", " ") + row = row.strip().strip("+") + row = row.translate(trans) + scfs = [x.strip("+") for x in row.split(":")] + for a, b in pairwise(scfs): + oa = "<" if a.strip()[0] == "-" else ">" + ob = "<" if b.strip()[0] == "-" else ">" + + is_uncertain = a[-1] == " " or b[0] == " " + + a = a.strip().strip("-") + b = b.strip().strip("-") + + if prefix: + a = prefix + a + b = prefix + b + + yield (a, b, oa, ob), is_uncertain + + +def partition(args): + """ + %prog partition happy.txt synteny.graph + + Select edges from another graph and merge it with the certain edges built + from the HAPPY mapping data. + """ + allowed_format = ("png", "ps") + p = OptionParser(partition.__doc__) + p.add_argument("--prefix", help="Add prefix to the name") + p.add_argument( + "--namestart", + default=0, + type=int, + help="Use a shorter name, starting index", + ) + p.add_argument( + "--format", + default="png", + choices=allowed_format, + help="Generate image of format", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + happyfile, graphfile = args + bg = BiGraph() + bg.read(graphfile, color="red") + prefix = opts.prefix + fp = open(happyfile) + for i, row in enumerate(fp): + nns = happy_nodes(row, prefix=prefix) + nodes = set(nns) + edges = happy_edges(row, prefix=prefix) + + small_graph = BiGraph() + for (a, b, oa, ob), is_uncertain in edges: + color = "gray" if is_uncertain else "black" + small_graph.add_edge(a, b, oa, ob, color=color) + + for (u, v), e in bg.edges.items(): + # Grab edge if both vertices are on the same line + if u in nodes and v in nodes: + uv = (str(u), str(v)) + if uv in small_graph.edges: + e = small_graph.edges[uv] + e.color = "blue" # supported by both evidences + else: + small_graph.add_edge(e) + + print(small_graph, file=sys.stderr) + + pngfile = "A{0:02d}.{1}".format(i + 1, opts.format) + telomeres = (nns[0], nns[-1]) + small_graph.draw( + pngfile, namestart=opts.namestart, nodehighlight=telomeres, dpi=72 + ) + + legend = [ + "Edge colors:", + "[BLUE] Experimental + Synteny", + "[BLACK] Experimental certain", + "[GRAY] Experimental uncertain", + "[RED] Synteny only", + "Rectangle nodes are telomeres.", + ] + print("\n".join(legend), file=sys.stderr) + + +def merge(args): + """ + %prog merge graphs + + Merge multiple graphs together and visualize. + """ + p = OptionParser(merge.__doc__) + p.add_argument( + "--colorlist", + default="black,red,pink,blue,green", + help="The color palette", + ) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + colorlist = opts.colorlist.split(",") + assert len(colorlist) >= len(args), "Need more colors in --colorlist" + + g = BiGraph() + for a, c in zip(args, colorlist): + g.read(a, color=c) + + g.draw("merged.png") + + +def happy(args): + """ + %prog happy happy.txt + + Make bi-directed graph from HAPPY mapping data. JCVI encodes uncertainties + in the order of the contigs / scaffolds. + + : separates scaffolds + + means telomere (though the telomere repeats may not show because the + telomere-adjacent sequence is missing) + - means that the scaffold is in reverse orientation to that shown in the 2003 + TIGR scaffolds. + + Ambiguities are represented as follows, using Paul Dear.s description: + [ ] means undetermined orientation. error quite possible (70% confidence?) + ( ) means uncertain orientation. small chance of error (90% confidence?) + { } means uncertain order. + + Example: + +-8254707:8254647:-8254690:{[8254694]:[8254713]:[8254531]:[8254797]}:8254802:8254788+ + """ + p = OptionParser(happy.__doc__) + p.add_argument("--prefix", help="Add prefix to the name") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (happyfile,) = args + + certain = "certain.graph" + uncertain = "uncertain.graph" + fw1 = open(certain, "w") + fw2 = open(uncertain, "w") + + fp = open(happyfile) + for row in fp: + for e, is_uncertain in happy_edges(row, prefix=opts.prefix): + fw = fw2 if is_uncertain else fw1 + print(e, file=fw) + + logger.debug("Edges written to `{0}`".format(",".join((certain, uncertain)))) + + +def fromblast(args): + """ + %prog fromblast blastfile subject.fasta + + Generate path from BLAST file. If multiple subjects map to the same query, + an edge is constructed between them (with the link provided by the query). + + The BLAST file MUST be filtered, chained, supermapped. + """ + from jcvi.formats.blast import sort + from jcvi.utils.range import range_distance + + p = OptionParser(fromblast.__doc__) + p.add_argument( + "--clique", + default=False, + action="store_true", + help="Populate clique instead of linear path", + ) + p.add_argument( + "--maxdist", + default=100000, + type=int, + help="Create edge within certain distance", + ) + p.set_verbose(help="Print verbose reports to stdout") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + blastfile, subjectfasta = args + clique = opts.clique + maxdist = opts.maxdist + sort([blastfile, "--query"]) + blast = BlastSlow(blastfile, sorted=True) + g = BiGraph() + for query, blines in groupby(blast, key=lambda x: x.query): + blines = list(blines) + iterator = combinations(blines, 2) if clique else pairwise(blines) + for a, b in iterator: + asub, bsub = a.subject, b.subject + if asub == bsub: + continue + + arange = (a.query, a.qstart, a.qstop, "+") + brange = (b.query, b.qstart, b.qstop, "+") + dist, oo = range_distance(arange, brange, distmode="ee") + if dist > maxdist: + continue + + atag = ">" if a.orientation == "+" else "<" + btag = ">" if b.orientation == "+" else "<" + g.add_edge(asub, bsub, atag, btag) + + graph_to_agp(g, blastfile, subjectfasta, verbose=opts.verbose) + + +def graph_to_agp(g, blastfile, subjectfasta, exclude=[], verbose=False): + + from jcvi.formats.agp import order_to_agp + + logger.debug(str(g)) + g.write("graph.txt") + # g.draw("graph.pdf") + + paths = [] + for path in g.iter_paths(): + m, oo = g.path(path) + if len(oo) == 1: # Singleton path + continue + paths.append(oo) + if verbose: + print(m) + print(oo) + + npaths = len(paths) + ntigs = sum(len(x) for x in paths) + logger.debug( + "Graph decomposed to {0} paths with {1} components.".format(npaths, ntigs) + ) + + agpfile = blastfile + ".agp" + sizes = Sizes(subjectfasta) + fwagp = open(agpfile, "w") + scaffolded = set() + for i, oo in enumerate(paths): + ctgorder = [(str(ctg), ("+" if strand else "-")) for ctg, strand in oo] + scaffolded |= set(ctg for ctg, strand in ctgorder) + object = "pmol_{0:04d}".format(i) + order_to_agp(object, ctgorder, sizes.mapping, fwagp) + + # Get the singletons as well + nsingletons = nscaffolded = nexcluded = 0 + for ctg, size in sizes.iter_sizes(): + if ctg in scaffolded: + nscaffolded += 1 + continue + if ctg in exclude: + nexcluded += 1 + continue + + ctgorder = [(ctg, "+")] + object = ctg + order_to_agp(object, ctgorder, sizes.mapping, fwagp) + nsingletons += 1 + logger.debug( + "scaffolded={} excluded={} singletons={}".format( + nscaffolded, nexcluded, nsingletons + ) + ) + + fwagp.close() + logger.debug("AGP file written to `{0}`.".format(agpfile)) + + +def connect(args): + """ + %prog connect assembly.fasta read_mapping.blast + + Connect contigs using long reads. + """ + p = OptionParser(connect.__doc__) + p.add_argument( + "--clip", + default=2000, + type=int, + help="Only consider end of contigs", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, blastfile = args + clip = opts.clip + + sizes = Sizes(fastafile).mapping + blast = Blast(blastfile) + blasts = [] + for b in blast: + seqid = b.subject + size = sizes[seqid] + start, end = b.sstart, b.sstop + cstart, cend = min(size, clip), max(0, size - clip) + if start > cstart and end < cend: + continue + blasts.append(b) + + key = lambda x: x.query + blasts.sort(key=key) + g = BiGraph() + for query, bb in groupby(blasts, key=key): + bb = sorted(bb, key=lambda x: x.qstart) + nsubjects = len(set(x.subject for x in bb)) + if nsubjects == 1: + continue + print("\n".join(str(x) for x in bb)) + for a, b in pairwise(bb): + astart, astop = a.qstart, a.qstop + bstart, bstop = b.qstart, b.qstop + if a.subject == b.subject: + continue + + arange = astart, astop + brange = bstart, bstop + ov = range_intersect(arange, brange) + alen = astop - astart + 1 + blen = bstop - bstart + 1 + if ov: + ostart, ostop = ov + ov = ostop - ostart + 1 + + print(ov, alen, blen) + if ov and (ov > alen / 2 or ov > blen / 2): + print("Too much overlap ({0})".format(ov)) + continue + + asub = a.subject + bsub = b.subject + atag = ">" if a.orientation == "+" else "<" + btag = ">" if b.orientation == "+" else "<" + g.add_edge(asub, bsub, atag, btag) + + graph_to_agp(g, blastfile, fastafile, verbose=False) + + +if __name__ == "__main__": + main() diff --git a/jcvi/compara/__init__.py b/jcvi/compara/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/compara/__main__.py b/jcvi/compara/__main__.py new file mode 100644 index 00000000..ef7ff4e8 --- /dev/null +++ b/jcvi/compara/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Cluster of comparative genomics analysis methods: SynFind and QUOTA-ALIGN algorithms, synteny analysis, QC, etc. +""" + +from ..apps.base import dmain + + +if __name__ == "__main__": + dmain(__file__) diff --git a/jcvi/compara/base.py b/jcvi/compara/base.py new file mode 100644 index 00000000..972a6ad3 --- /dev/null +++ b/jcvi/compara/base.py @@ -0,0 +1,164 @@ +from collections import defaultdict +from typing import Dict, Tuple + +from ..apps.base import logger +from ..formats.base import BaseFile, read_block, must_open +from ..utils.range import Range + + +class AnchorFile(BaseFile): + def __init__(self, filename, minsize=0): + super().__init__(filename) + self.blocks = list(self.iter_blocks(minsize=minsize)) + + def iter_blocks(self, minsize=0): + fp = open(self.filename) + for _, lines in read_block(fp, "#"): + lines = [x.split() for x in lines] + if len(lines) >= minsize: + yield lines + + def iter_pairs(self, minsize=0): + block_id = -1 + for rows in self.iter_blocks(minsize=minsize): + block_id += 1 + for row in rows: + a, b = row[:2] + yield a, b, block_id + + def make_ranges(self, order, clip=10): + """Prepare anchors information into a set of ranges for chaining""" + ranges = [] + block_pairs = defaultdict(dict) + blocks = self.blocks + for i, ib in enumerate(blocks): + q, s, t = zip(*ib) + if q[0] not in order: + q, s = s, q + + r = make_range(q, s, t, i, order, block_pairs, clip=clip) + ranges.append(r) + + assert q[0] in order + if s[0] not in order: + continue + + # is_self comparison + q, s = s, q + r = make_range(q, s, t, i, order, block_pairs, clip=clip) + ranges.append(r) + return ranges, block_pairs + + def filter_blocks(self, accepted: Dict[Tuple[str, str], str]): + """ + Filter the blocks based on the accepted pairs. This is used to update + the anchors so that they match the info in the LAST file. + """ + new_blocks = [] + nremoved = 0 + ncorrected = 0 + nblocks_removed = 0 + for block in self.blocks: + new_block = [] + for line in block: + a, b, score = line + pair = (a, b) + if pair not in accepted: + nremoved += 1 + continue + av = accepted[pair] + if score != av and score != av + "L": + score = av + ncorrected += 1 + new_block.append((a, b, score)) + if new_block: + new_blocks.append(new_block) + else: + nblocks_removed += 1 + + logger.debug("Removed %d existing anchors", nremoved) + if nblocks_removed: + logger.debug("Removed %d empty blocks", nblocks_removed) + logger.debug("Corrected scores for %d anchors", ncorrected) + self.blocks = new_blocks + + def print_to_file(self, filename="stdout"): + """ + Print the anchors to a file, optionally filtering them based on the + accepted pairs. + """ + fw = must_open(filename, "w") + for block in self.blocks: + print("###", file=fw) + for line in block: + a, b, score = line + print("\t".join((a, b, score)), file=fw) + fw.close() + + logger.debug("Anchors written to `%s`", filename) + + def blast(self, blastfile=None, outfile=None): + """ + convert anchor file to 12 col blast file + """ + from ..formats.blast import BlastSlow, BlastLineByConversion + + if not outfile: + outfile = self.filename + ".blast" + + if blastfile is not None: + blasts = BlastSlow(blastfile).to_dict() + else: + blasts = None + + fw = must_open(outfile, "w", checkexists=True) + nlines = 0 + for a, b, _ in self.iter_pairs(): + if (a, b) in blasts: + bline = blasts[(a, b)] + elif (b, a) in blasts: + bline = blasts[(b, a)] + else: + line = "\t".join((a, b)) + bline = BlastLineByConversion(line, mode="110000000000") + + print(bline, file=fw) + nlines += 1 + fw.close() + + logger.debug("A total of %d BLAST lines written to `%s`", nlines, outfile) + + return outfile + + @property + def is_empty(self): + blocks = self.blocks + return not blocks or not blocks[0] + + +def get_best_pair(qs, ss, ts): + pairs = {} + for q, s, t in zip(qs, ss, ts): + t = int(t[:-1]) if t[-1] == "L" else int(t) + if q not in pairs or pairs[q][1] < t: + pairs[q] = (s, t) + + # Discard score + spairs = dict((q, s) for q, (s, t) in pairs.items()) + return spairs + + +def make_range(q, s, t, i, order, block_pairs, clip=10): + pairs = get_best_pair(q, s, t) + score = len(pairs) + block_pairs[i].update(pairs) + + q = [order[x][0] for x in q] + q.sort() + qmin = q[0] + qmax = q[-1] + if qmax - qmin >= 2 * clip: + qmin += clip / 2 + qmax -= clip / 2 + + return Range("0", qmin, qmax, score=score, id=i) diff --git a/jcvi/compara/blastfilter.py b/jcvi/compara/blastfilter.py new file mode 100755 index 00000000..6b53324e --- /dev/null +++ b/jcvi/compara/blastfilter.py @@ -0,0 +1,325 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog blast_file --qbed query.bed --sbed subject.bed + +Accepts bed format and blast file, and run several BLAST filters below:: + +* Local dup filter: +if the input is query.bed and subject.bed, the script files query.localdups +and subject.localdups are created containing the parent|offspring dups, as +inferred by subjects hitting same query or queries hitting same subject. + +* C-score filter: +see supplementary info for sea anemone genome paper, formula:: + + cscore(A,B) = score(A,B) / + max(best score for A, best score for B) + +Finally a blast.filtered file is created. +""" +import sys +import os.path as op + +from collections import defaultdict +from itertools import groupby + +from ..apps.base import OptionParser, logger +from ..compara.synteny import check_beds +from ..formats.blast import Blast +from ..utils.cbook import gene_name +from ..utils.grouper import Grouper + + +def blastfilter_main(blast_file, p, opts): + + qbed, sbed, qorder, sorder, is_self = check_beds(blast_file, p, opts) + + tandem_Nmax = opts.tandem_Nmax + cscore = opts.cscore + exclude = opts.exclude + + fp = open(blast_file) + total_lines = sum(1 for line in fp if line[0] != "#") + logger.debug( + "Load BLAST file `{}` (total {} lines)".format(blast_file, total_lines) + ) + bl = Blast(blast_file) + blasts = sorted(list(bl), key=lambda b: b.score, reverse=True) + + filtered_blasts = [] + seen = set() + ostrip = opts.strip_names + nwarnings = 0 + for b in blasts: + query, subject = b.query, b.subject + if query == subject: + continue + + if ostrip: + query, subject = gene_name(query), gene_name(subject) + if query not in qorder: + if nwarnings < 100: + logger.warning("{} not in {}".format(query, qbed.filename)) + elif nwarnings == 100: + logger.warning("too many warnings.. suppressed") + nwarnings += 1 + continue + if subject not in sorder: + if nwarnings < 100: + logger.warning("{} not in {}".format(subject, sbed.filename)) + elif nwarnings == 100: + logger.warning("too many warnings.. suppressed") + nwarnings += 1 + continue + + qi, q = qorder[query] + si, s = sorder[subject] + + if is_self and qi > si: + # move all hits to same side when doing self-self BLAST + query, subject = subject, query + qi, si = si, qi + q, s = s, q + + key = query, subject + if key in seen: + continue + seen.add(key) + b.query, b.subject = [str(k) for k in key] + + b.qi, b.si = qi, si + b.qseqid, b.sseqid = q.seqid, s.seqid + + filtered_blasts.append(b) + + if exclude: + before_filter = len(filtered_blasts) + logger.debug("running excluded pairs (--exclude `{}`) ..".format(exclude)) + filtered_blasts = list(filter_exclude(filtered_blasts, exclude=exclude)) + logger.debug( + "after filter ({}->{}) ..".format(before_filter, len(filtered_blasts)) + ) + + if cscore: + before_filter = len(filtered_blasts) + logger.debug("running the cscore filter (cscore>=%.2f) .." % cscore) + filtered_blasts = list(filter_cscore(filtered_blasts, cscore=cscore)) + logger.debug( + "after filter ({}->{}) ..".format(before_filter, len(filtered_blasts)) + ) + + if tandem_Nmax: + logger.debug( + "running the local dups filter (tandem_Nmax={}) ..".format(tandem_Nmax) + ) + + qtandems = tandem_grouper(filtered_blasts, flip=True, tandem_Nmax=tandem_Nmax) + standems = tandem_grouper(filtered_blasts, flip=False, tandem_Nmax=tandem_Nmax) + + qdups_fh = ( + open(op.splitext(opts.qbed)[0] + ".localdups", "w") + if opts.tandems_only + else None + ) + + if is_self: + for s in standems: + qtandems.join(*s) + qdups_to_mother = write_localdups(qtandems, qbed, qdups_fh) + sdups_to_mother = qdups_to_mother + else: + qdups_to_mother = write_localdups(qtandems, qbed, qdups_fh) + sdups_fh = ( + open(op.splitext(opts.sbed)[0] + ".localdups", "w") + if opts.tandems_only + else None + ) + sdups_to_mother = write_localdups(standems, sbed, sdups_fh) + + if opts.tandems_only: + # write out new .bed after tandem removal + write_new_bed(qbed, qdups_to_mother) + if not is_self: + write_new_bed(sbed, sdups_to_mother) + + # just want to use this script as a tandem finder. + # sys.exit() + + before_filter = len(filtered_blasts) + filtered_blasts = list( + filter_tandem(filtered_blasts, qdups_to_mother, sdups_to_mother) + ) + logger.debug( + "after filter ({}->{}) ..".format(before_filter, len(filtered_blasts)) + ) + + blastfilteredfile = blast_file + ".filtered" + fw = open(blastfilteredfile, "w") + write_new_blast(filtered_blasts, fh=fw) + fw.close() + + +def write_localdups(tandems, bed, dups_fh=None): + + tandem_groups = [] + for group in tandems: + rows = [bed[i] for i in group] + # within the tandem groups, genes are sorted with decreasing size + rows.sort(key=lambda a: (-abs(a.end - a.start), a.accn)) + tandem_groups.append([x.accn for x in rows]) + + dups_to_mother = {} + n = 1 + for accns in sorted(tandem_groups): + if dups_fh: + print("\t".join(accns), file=dups_fh) + if n: + n -= 1 + logger.debug("write local dups to file {}".format(dups_fh.name)) + + for dup in accns[1:]: + dups_to_mother[dup] = accns[0] + + return dups_to_mother + + +def write_new_bed(bed, children): + # generate local dup removed annotation files + out_name = "%s.nolocaldups%s" % op.splitext(bed.filename) + logger.debug("write tandem-filtered bed file %s" % out_name) + fh = open(out_name, "w") + for i, row in enumerate(bed): + if row["accn"] in children: + continue + print(row, file=fh) + fh.close() + + +def write_new_blast(filtered_blasts, fh=sys.stdout): + for b in filtered_blasts: + print(b, file=fh) + + +def filter_exclude(blast_list, exclude=None): + """Filter gene pairs from an excluded list + + Args: + blast_list (List[BlastLine]): List of BlastLines + exclude (str, optional): Path to the excluded anchors file. Defaults to None. + """ + from .base import AnchorFile + + excluded_pairs = set() + ac = AnchorFile(exclude) + for a, b, block in ac.iter_pairs(): + excluded_pairs.add((a, b)) + excluded_pairs.add((b, a)) + for b in blast_list: + if (b.query, b.subject) in excluded_pairs: + continue + yield b + + +def filter_cscore(blast_list, cscore=0.5): + + best_score = defaultdict(float) + for b in blast_list: + if b.score > best_score[b.query]: + best_score[b.query] = b.score + if b.score > best_score[b.subject]: + best_score[b.subject] = b.score + + for b in blast_list: + cur_cscore = b.score / max(best_score[b.query], best_score[b.subject]) + if cur_cscore > cscore: + yield b + + +def filter_tandem(blast_list, qdups_to_mother, sdups_to_mother): + + mother_blast = [] + for b in blast_list: + if b.query in qdups_to_mother: + b.query = qdups_to_mother[b.query] + if b.subject in sdups_to_mother: + b.subject = sdups_to_mother[b.subject] + mother_blast.append(b) + + mother_blast.sort(key=lambda b: b.score, reverse=True) + seen = {} + for b in mother_blast: + if b.query == b.subject: + continue + key = b.query, b.subject + if key in seen: + continue + seen[key] = None + yield b + + +def tandem_grouper(blast_list, tandem_Nmax=10, flip=True): + if not flip: + simple_blast = [ + (b.query, (b.sseqid, b.si)) for b in blast_list if b.evalue < 1e-10 + ] + else: + simple_blast = [ + (b.subject, (b.qseqid, b.qi)) for b in blast_list if b.evalue < 1e-10 + ] + + simple_blast.sort() + + standems = Grouper() + for name, hits in groupby(simple_blast, key=lambda x: x[0]): + # these are already sorted. + hits = [x[1] for x in hits] + for ia, a in enumerate(hits[:-1]): + b = hits[ia + 1] + # on the same chr and rank difference no larger than tandem_Nmax + if b[1] - a[1] <= tandem_Nmax and b[0] == a[0]: + standems.join(a[1], b[1]) + + return standems + + +def main(args): + + p = OptionParser(__doc__) + p.set_beds() + p.set_stripnames() + p.add_argument( + "--tandems_only", + dest="tandems_only", + action="store_true", + default=False, + help="only calculate tandems, write .localdup file and exit.", + ) + p.add_argument( + "--tandem_Nmax", + type=int, + default=10, + help="merge tandem genes within distance", + ) + p.add_argument( + "--cscore", + type=float, + default=0.7, + help="retain hits that have good bitscore. a value of 0.5 means " + "keep all values that are 50% or greater of the best hit. " + "higher is more stringent", + ) + p.add_argument("--exclude", help="Remove anchors from a previous run") + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + blastfilter_main(blastfile, p, opts) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/jcvi/compara/catalog.py b/jcvi/compara/catalog.py new file mode 100644 index 00000000..fe2d4247 --- /dev/null +++ b/jcvi/compara/catalog.py @@ -0,0 +1,982 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +import os.path as op +import sys +import string + +from collections import defaultdict +from itertools import product, combinations + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + glob, + logger, + mkdir, + need_update, + sh, +) +from ..apps.align import last as last_main, diamond_blastp_main, blast_main +from ..compara.blastfilter import main as blastfilter_main +from ..compara.quota import main as quota_main +from ..compara.synteny import scan, mcscan, liftover +from ..formats.base import BaseFile, DictFile, must_open +from ..formats.bed import Bed +from ..formats.blast import ( + BlastLine, + cscore, + filter as blast_filter, + filtered_blastfile_name, +) +from ..formats.fasta import Fasta +from ..utils.cbook import gene_name +from ..utils.grouper import Grouper + +from .base import AnchorFile +from .synteny import check_beds + + +class OMGFile(BaseFile): + def __init__(self, filename): + super().__init__(filename) + fp = open(filename) + inblock = False + components = [] + component = [] + for row in fp: + if inblock: + atoms = row.split() + natoms = len(atoms) + assert natoms in (0, 7) + if natoms: + gene, taxa = atoms[0], atoms[5] + component.append((gene, taxa)) + else: + inblock = False + components.append(tuple(component)) + + if row.strip().startswith("---"): + inblock = True + component = [] + + if inblock: + components.append(tuple(component)) + self.components = components + + def best(self): + bb = set() + for component in self.components: + size = len(component) + if size > 1: + bb.add(component) + return bb + + +def main(): + actions = ( + ("tandem", "identify tandem gene groups within certain distance"), + ("ortholog", "run a combined synteny and RBH pipeline to call orthologs"), + ("group", "cluster the anchors into ortho-groups"), + ("omgprepare", "prepare weights file to run Sankoff OMG algorithm"), + ("omg", "generate a series of Sankoff OMG algorithm inputs"), + ("omgparse", "parse the OMG outputs to get gene lists"), + ("enrich", "enrich OMG output by pulling genes missed by OMG"), + ("layout", "layout the gene lists"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def get_weights(weightsfiles=None): + if weightsfiles is None: + weightsfiles = glob("*.weights") + + weights = defaultdict(list) + for row in must_open(weightsfiles): + a, b, c = row.split() + weights[a].append((a, b, c)) + return weights + + +def get_edges(weightsfiles=None): + if weightsfiles is None: + weightsfiles = glob("*.weights") + + edges = {} + for row in must_open(weightsfiles): + a, b, c = row.split() + c = int(c) + edges[(a, b)] = c + edges[(b, a)] = c + return edges + + +def get_info(): + infofiles = glob("*.info") + info = {} + for row in must_open(infofiles): + a = row.split()[0] + info[a] = row.rstrip() + return info + + +def enrich(args): + """ + %prog enrich omgfile groups ntaxa > enriched.omg + + Enrich OMG output by pulling genes misses by OMG. + """ + p = OptionParser(enrich.__doc__) + p.add_argument( + "--ghost", + default=False, + action="store_true", + help="Add ghost homologs already used", + ) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + omgfile, groupsfile, ntaxa = args + ntaxa = int(ntaxa) + ghost = opts.ghost + + # Get gene pair => weight mapping + weights = get_edges() + info = get_info() + # Get gene => taxon mapping + info = dict((k, v.split()[5]) for k, v in info.items()) + + groups = Grouper() + + fp = open(groupsfile) + for row in fp: + members = row.strip().split(",") + groups.join(*members) + + logger.debug( + "Imported %d families with %d members.", len(groups), groups.num_members + ) + + seen = set() + omggroups = Grouper() + fp = open(omgfile) + for row in fp: + genes, idxs = row.split() + genes = genes.split(",") + seen.update(genes) + omggroups.join(*genes) + + nmembers = omggroups.num_members + logger.debug("Imported %d OMG families with %d members.", len(omggroups), nmembers) + assert nmembers == len(seen) + + alltaxa = set(str(x) for x in range(ntaxa)) + recruited = [] + fp = open(omgfile) + for row in fp: + genes, idxs = row.split() + genes = genes.split(",") + a = genes[0] + + idxs = set(idxs.split(",")) + missing_taxa = alltaxa - idxs + if not missing_taxa: + print(row.rstrip()) + continue + + leftover = groups[a] + if not ghost: + leftover = set(leftover) - seen + + if not leftover: + print(row.rstrip()) + continue + + leftover_sorted_by_taxa = dict( + (k, [x for x in leftover if info[x] == k]) for k in missing_taxa + ) + + # print genes, leftover + # print leftover_sorted_by_taxa + solutions = [] + for solution in product(*leftover_sorted_by_taxa.values()): + score = sum(weights.get((a, b), 0) for a in solution for b in genes) + if score == 0: + continue + score += sum(weights.get((a, b), 0) for a, b in combinations(solution, 2)) + solutions.append((score, solution)) + # print solution, score + + best_solution = max(solutions) if solutions else None + if best_solution is None: + print(row.rstrip()) + continue + + # print "best ==>", best_solution + best_score, best_addition = best_solution + genes.extend(best_addition) + recruited.extend(best_addition) + + genes = sorted([(info[x], x) for x in genes]) + idxs, genes = zip(*genes) + + if ghost: # decorate additions so it's clear that they were added + pgenes = [] + for g in genes: + if g in recruited and g in seen: + pgenes.append("|{0}|".format(g)) + else: + pgenes.append(g) + genes = pgenes + + print("\t".join((",".join(genes), ",".join(idxs)))) + if not ghost: + seen.update(best_addition) + + logger.debug("Recruited %d new genes.", len(recruited)) + + +def pairwise_distance(a, b, threadorder): + d = 0 + for x, y in zip(a, b)[:-1]: # Last column not used + x, y = x.strip("|"), y.strip("|") + if "." in (x, y): + dd = 50 + else: + xi, x = threadorder[x] + yi, y = threadorder[y] + dd = min(abs(xi - yi), 50) + d += dd + return d + + +def insert_into_threaded(atoms, threaded, threadorder): + min_idx, min_d = 0, 1000 + for i, t in enumerate(threaded): + # calculate distance + d = pairwise_distance(atoms, t, threadorder) + if d < min_d: + min_idx = i + min_d = d + + i = min_idx + t = threaded[i] + threaded.insert(i, atoms) + logger.debug("Insert %s before %s (d=%d)", atoms, t, min_d) + + +def sort_layout(thread, listfile, column=0): + """ + Sort the syntelog table according to chromomomal positions. First orient the + contents against threadbed, then for contents not in threadbed, insert to + the nearest neighbor. + """ + outfile = listfile.rsplit(".", 1)[0] + ".sorted.list" + threadorder = thread.order + fw = open(outfile, "w") + lt = DictFile(listfile, keypos=column, valuepos=None) + threaded = [] + imported = set() + for t in thread: + accn = t.accn + if accn not in lt: + continue + + imported.add(accn) + atoms = lt[accn] + threaded.append(atoms) + + assert len(threaded) == len(imported) + + total = sum(1 for x in open(listfile)) + logger.debug("Total: %d, currently threaded: %d", total, len(threaded)) + fp = open(listfile) + for row in fp: + atoms = row.split() + accn = atoms[0] + if accn in imported: + continue + insert_into_threaded(atoms, threaded, threadorder) + + for atoms in threaded: + print("\t".join(atoms), file=fw) + + fw.close() + logger.debug("File `%s` sorted to `%s`.", outfile, thread.filename) + + +def layout(args): + """ + %prog layout omgfile taxa + + Build column formatted gene lists after omgparse(). Use species list + separated by comma in place of taxa, e.g. "BR,BO,AN,CN" + """ + p = OptionParser(layout.__doc__) + p.add_argument("--sort", help="Sort layout file based on bedfile") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + omgfile, taxa = args + listfile = omgfile.rsplit(".", 1)[0] + ".list" + taxa = taxa.split(",") + ntaxa = len(taxa) + fw = open(listfile, "w") + + data = [] + fp = open(omgfile) + for row in fp: + genes, idxs = row.split() + row = ["."] * ntaxa + genes = genes.split(",") + ixs = [int(x) for x in idxs.split(",")] + for gene, idx in zip(genes, ixs): + row[idx] = gene + txs = ",".join(taxa[x] for x in ixs) + print("\t".join(("\t".join(row), txs)), file=fw) + data.append(row) + + coldata = zip(*data) + ngenes = [] + for i, tx in enumerate(taxa): + genes = [x for x in coldata[i] if x != "."] + genes = set(x.strip("|") for x in genes) + ngenes.append((len(genes), tx)) + + details = ", ".join("{0} {1}".format(a, b) for a, b in ngenes) + total = sum(a for a, b in ngenes) + s = "A list of {0} orthologous families that collectively".format(len(data)) + s += " contain a total of {0} genes ({1})".format(total, details) + print(s, file=sys.stderr) + + fw.close() + lastcolumn = ntaxa + 1 + cmd = "sort -k{0},{0} {1} -o {1}".format(lastcolumn, listfile) + sh(cmd) + + logger.debug("List file written to `%s`.", listfile) + sort = opts.sort + if sort: + thread = Bed(sort) + sort_layout(thread, listfile) + + +def omgparse(args): + """ + %prog omgparse work + + Parse the OMG outputs to get gene lists. + """ + p = OptionParser(omgparse.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (work,) = args + omgfiles = glob(op.join(work, "gf*.out")) + for omgfile in omgfiles: + omg = OMGFile(omgfile) + best = omg.best() + for bb in best: + genes, taxa = zip(*bb) + print("\t".join((",".join(genes), ",".join(taxa)))) + + +def group(args): + """ + %prog group anchorfiles + + Group the anchors into ortho-groups. Can input multiple anchor files. + """ + p = OptionParser(group.__doc__) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + anchorfiles = args + groups = Grouper() + for anchorfile in anchorfiles: + ac = AnchorFile(anchorfile) + for a, b, idx in ac.iter_pairs(): + groups.join(a, b) + + logger.debug("Created %d groups with %d members.", len(groups), groups.num_members) + + outfile = opts.outfile + fw = must_open(outfile, "w") + for g in groups: + print(",".join(sorted(g)), file=fw) + fw.close() + + return outfile + + +def omg(args): + """ + %prog omg weightsfile + + Run Sankoff's OMG algorithm to get orthologs. Download OMG code at: + + + This script only writes the partitions, but not launch OMGMec. You may need to: + + $ parallel "java -cp ~/code/OMGMec TestOMGMec {} 4 > {}.out" ::: work/gf????? + + Then followed by omgparse() to get the gene lists. + """ + p = OptionParser(omg.__doc__) + + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + weightsfiles = args + groupfile = group(weightsfiles + ["--outfile=groups"]) + + weights = get_weights(weightsfiles) + info = get_info() + + fp = open(groupfile) + + work = "work" + mkdir(work) + for i, row in enumerate(fp): + gf = op.join(work, "gf{0:05d}".format(i)) + genes = row.rstrip().split(",") + + fw = open(gf, "w") + contents = "" + npairs = 0 + for gene in genes: + gene_pairs = weights[gene] + for a, b, c in gene_pairs: + if b not in genes: + continue + + contents += "weight {0}".format(c) + "\n" + contents += info[a] + "\n" + contents += info[b] + "\n\n" + npairs += 1 + + header = "a group of genes :length ={0}".format(npairs) + print(header, file=fw) + print(contents, file=fw) + + fw.close() + + +def geneinfo(bed, genomeidx, ploidy): + bedfile = bed.filename + p = bedfile.split(".")[0] + idx = genomeidx[p] + pd = ploidy[p] + infofile = p + ".info" + + if not need_update(bedfile, infofile): + return infofile + + fwinfo = open(infofile, "w") + + for s in bed: + chr = "".join(x for x in s.seqid if x in string.digits) + try: + chr = int(chr) + except ValueError: + chr = "0" + + print( + "\t".join(str(x) for x in (s.accn, chr, s.start, s.end, s.strand, idx, pd)), + file=fwinfo, + ) + fwinfo.close() + + logger.debug("Update info file `%s`.", infofile) + + return infofile + + +def omgprepare(args): + """ + %prog omgprepare ploidy anchorsfile blastfile + + Prepare to run Sankoff's OMG algorithm to get orthologs. + """ + p = OptionParser(omgprepare.__doc__) + p.add_argument("--norbh", action="store_true", help="Disable RBH hits") + p.add_argument( + "--pctid", default=0, type=int, help="Percent id cutoff for RBH hits" + ) + p.add_argument("--cscore", default=90, type=int, help="C-score cutoff for RBH hits") + p.set_stripnames() + p.set_beds() + + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + ploidy, anchorfile, blastfile = args + norbh = opts.norbh + pctid = opts.pctid + cs = opts.cscore + qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) + + fp = open(ploidy) + genomeidx = dict((x.split()[0], i) for i, x in enumerate(fp)) + fp.close() + + ploidy = DictFile(ploidy) + + geneinfo(qbed, genomeidx, ploidy) + geneinfo(sbed, genomeidx, ploidy) + + pf = blastfile.rsplit(".", 1)[0] + cscorefile = pf + ".cscore" + cscore([blastfile, "-o", cscorefile, "--cutoff=0", "--pct"]) + ac = AnchorFile(anchorfile) + pairs = set((a, b) for a, b, i in ac.iter_pairs()) + logger.debug("Imported %d pairs from `%s`.", len(pairs), anchorfile) + + weightsfile = pf + ".weights" + fp = open(cscorefile) + fw = open(weightsfile, "w") + npairs = 0 + for row in fp: + a, b, c, pct = row.split() + c, pct = float(c), float(pct) + c = int(c * 100) + if (a, b) not in pairs: + if norbh: + continue + if c < cs: + continue + if pct < pctid: + continue + c /= 10 # This severely penalizes RBH against synteny + + print("\t".join((a, b, str(c))), file=fw) + npairs += 1 + fw.close() + + logger.debug("Write %d pairs to `%s`.", npairs, weightsfile) + + +def make_ortholog(blocksfile, rbhfile, orthofile): + # Generate mapping both ways + adict = DictFile(rbhfile) + bdict = DictFile(rbhfile, keypos=1, valuepos=0) + adict.update(bdict) + + fp = open(blocksfile) + fw = open(orthofile, "w") + nrecruited = 0 + for row in fp: + a, b = row.split() + if b == ".": + if a in adict: + b = adict[a] + nrecruited += 1 + b += "'" + print("\t".join((a, b)), file=fw) + + logger.debug("Recruited %d pairs from RBH.", nrecruited) + fp.close() + fw.close() + + +def ortholog(args): + """ + %prog ortholog species_a species_b + + Run a sensitive pipeline to find orthologs between two species a and b. + The pipeline runs LAST and generate .lifted.anchors. + + `--full` mode would assume 1-to-1 quota synteny blocks as the backbone of + such predictions. Extra orthologs will be recruited from reciprocal best + match (RBH). + """ + p = OptionParser(ortholog.__doc__) + p.add_argument( + "--dbtype", + default="nucl", + choices=("nucl", "prot"), + help="Molecule type of subject database", + ) + + p.add_argument( + "--full", + default=False, + action="store_true", + help="Run in full 1x1 mode, including blocks and RBH", + ) + p.add_argument("--cscore", default=0.7, type=float, help="C-score cutoff") + p.add_argument( + "--dist", default=20, type=int, help="Extent of flanking regions to search" + ) + p.add_argument( + "-n", + "--min_size", + dest="n", + type=int, + default=4, + help="minimum number of anchors in a cluster", + ) + p.add_argument("--quota", help="Quota align parameter") + p.add_argument("--exclude", help="Remove anchors from a previous run") + p.add_argument( + "--self_remove", + default=98, + type=float, + help="Remove self hits that are above this percent identity", + ) + p.add_argument( + "--no_strip_names", + default=False, + action="store_true", + help="Do not strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", + ) + p.add_argument( + "--liftover_dist", + type=int, + help="Distance to extend from liftover. Defaults to half of --dist", + ) + p.set_cpus() + dotplot_group = p.set_dotplot_opts() + dotplot_group.add_argument( + "--notex", default=False, action="store_true", help="Do not use tex" + ) + dotplot_group.add_argument( + "--no_dotplot", default=False, action="store_true", help="Do not make dotplot" + ) + p.add_argument( + "--ignore_zero_anchor", + default=False, + action="store_true", + help="Ignore this pair of ortholog identification instead of throwing an error when performing many pairs of cataloging.", + ) + p.add_argument( + "--align_soft", + default="last", + choices=("last", "blast", "diamond_blastp"), + help="Sequence alignment software. Default for both and . Users could also use for both and , or for .", + ) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + a, b = args + dbtype = opts.dbtype + ignore_zero_anchor = opts.ignore_zero_anchor + suffix = ".cds" if dbtype == "nucl" else ".pep" + abed, afasta = a + ".bed", a + suffix + bbed, bfasta = b + ".bed", b + suffix + ccscore = opts.cscore + quota = opts.quota + exclude = opts.exclude + dist = "--dist={0}".format(opts.dist) + minsize_flag = "--min_size={}".format(opts.n) + cpus_flag = "--cpus={}".format(opts.cpus) + align_soft = opts.align_soft + + aprefix = op.basename(a) + bprefix = op.basename(b) + pprefix = ".".join((aprefix, bprefix)) + qprefix = ".".join((bprefix, aprefix)) + last = pprefix + ".last" + if need_update((afasta, bfasta), last, warn=True): + if align_soft == "blast": + blast_main([bfasta, afasta, cpus_flag], dbtype) + elif dbtype == "prot" and align_soft == "diamond_blastp": + diamond_blastp_main([bfasta, afasta, cpus_flag], dbtype) + else: + last_main([bfasta, afasta, cpus_flag], dbtype) + + self_remove = opts.self_remove + if a == b: + lastself = filtered_blastfile_name(last, self_remove, 0, inverse=True) + if need_update(last, lastself, warn=True): + blast_filter( + [last, "--hitlen=0", f"--pctid={self_remove}", "--inverse", "--noself"] + ) + last = lastself + + filtered_last = last + ".filtered" + if need_update(last, filtered_last, warn=True): + # If we are doing filtering based on another file then we don't run cscore anymore + dargs = [last, "--cscore={}".format(ccscore)] + if exclude: + dargs += ["--exclude={}".format(exclude)] + if opts.no_strip_names: + dargs += ["--no_strip_names"] + blastfilter_main(dargs) + + anchors = pprefix + ".anchors" + lifted_anchors = pprefix + ".lifted.anchors" + pdf = pprefix + ".pdf" + if not opts.full: + if need_update(filtered_last, lifted_anchors, warn=True): + dargs = [ + filtered_last, + anchors, + minsize_flag, + dist, + "--liftover={0}".format(last), + ] + if opts.no_strip_names: + dargs += ["--no_strip_names"] + if opts.liftover_dist: + dargs += ["--liftover_dist={}".format(opts.liftover_dist)] + try: + scan(dargs) + except ValueError as e: + if ignore_zero_anchor: + logger.debug(str(e)) + logger.debug("Ignoring this error and continuing...") + return + else: + raise ValueError(e) from e + if quota: + quota_main([lifted_anchors, "--quota={0}".format(quota), "--screen"]) + if need_update(anchors, pdf, warn=True) and not opts.no_dotplot: + from jcvi.graphics.dotplot import dotplot_main + + dargs = [anchors] + if opts.nostdpf: + dargs += ["--nostdpf"] + if opts.nochpf: + dargs += ["--nochpf"] + if opts.skipempty: + dargs += ["--skipempty"] + if opts.genomenames: + dargs += ["--genomenames", opts.genomenames] + if opts.theme: + dargs += ["--theme", opts.theme] + if opts.notex: + dargs += ["--notex"] + dotplot_main(dargs) + return + + if need_update(filtered_last, anchors, warn=True): + if opts.no_strip_names: + scan([filtered_last, anchors, dist, "--no_strip_names"]) + else: + scan([filtered_last, anchors, dist]) + + ooanchors = pprefix + ".1x1.anchors" + if need_update(anchors, ooanchors, warn=True): + quota_main([anchors, "--quota=1:1", "--screen"]) + + lifted_anchors = pprefix + ".1x1.lifted.anchors" + if need_update((last, ooanchors), lifted_anchors, warn=True): + if opts.no_strip_names: + liftover([last, ooanchors, dist, "--no_strip_names"]) + else: + liftover([last, ooanchors, dist]) + + pblocks = pprefix + ".1x1.blocks" + qblocks = qprefix + ".1x1.blocks" + if need_update(lifted_anchors, [pblocks, qblocks], warn=True): + mcscan([abed, lifted_anchors, "--iter=1", "-o", pblocks]) + mcscan([bbed, lifted_anchors, "--iter=1", "-o", qblocks]) + + rbh = pprefix + ".rbh" + if need_update(last, rbh, warn=True): + cscore([last, "-o", rbh]) + + portho = pprefix + ".ortholog" + qortho = qprefix + ".ortholog" + if need_update([pblocks, qblocks, rbh], [portho, qortho], warn=True): + make_ortholog(pblocks, rbh, portho) + make_ortholog(qblocks, rbh, qortho) + + +def tandem_main( + blast_file, + cds_file, + bed_file, + N=3, + P=50, + is_self=True, + evalue=0.01, + strip_name=".", + ofile=sys.stderr, + genefam=False, +): + if genefam: + N = 1e5 + + # get the sizes for the CDS first + f = Fasta(cds_file) + sizes = dict(f.itersizes()) + + # retrieve the locations + bed = Bed(bed_file) + order = bed.order + + if is_self: + # filter the blast file + g = Grouper() + fp = open(blast_file) + for row in fp: + b = BlastLine(row) + query_len = sizes[b.query] + subject_len = sizes[b.subject] + if b.hitlen < min(query_len, subject_len) * P / 100.0: + continue + + query = gene_name(b.query, sep=strip_name) + subject = gene_name(b.subject, sep=strip_name) + qi, q = order[query] + si, s = order[subject] + + if abs(qi - si) <= N and b.evalue <= evalue: + if genefam: + g.join(query, subject) + elif q.seqid == s.seqid: + g.join(query, subject) + + else: + homologs = Grouper() + fp = open(blast_file) + for row in fp: + b = BlastLine(row) + query_len = sizes[b.query] + subject_len = sizes[b.subject] + if b.hitlen < min(query_len, subject_len) * P / 100.0: + continue + if b.evalue > evalue: + continue + + query = gene_name(b.query, sep=strip_name) + subject = gene_name(b.subject, sep=strip_name) + homologs.join(query, subject) + + if genefam: + g = homologs + else: + g = Grouper() + for i, atom in enumerate(bed): + for x in range(1, N + 1): + if all( + [ + i - x >= 0, + bed[i - x].seqid == atom.seqid, + homologs.joined(bed[i - x].accn, atom.accn), + ] + ): + leni = sizes[bed[i].accn] + lenx = sizes[bed[i - x].accn] + if abs(leni - lenx) > max(leni, lenx) * (1 - P / 100.0): + continue + g.join(bed[i - x].accn, atom.accn) + + # dump the grouper + fw = must_open(ofile, "w") + ngenes, nfamilies = 0, 0 + families = [] + for group in sorted(g): + if len(group) >= 2: + print(",".join(sorted(group)), file=fw) + ngenes += len(group) + nfamilies += 1 + families.append(sorted(group)) + + longest_family = max(families, key=lambda x: len(x)) + + # generate reports + print("Proximal paralogues (dist=%d):" % N, file=sys.stderr) + print("Total %d genes in %d families" % (ngenes, nfamilies), file=sys.stderr) + print( + "Longest families (%d): %s" % (len(longest_family), ",".join(longest_family)), + file=sys.stderr, + ) + + return families + + +def tandem(args): + """ + %prog tandem blast_file cds_file bed_file [options] + + Find tandem gene clusters that are separated by N genes, based on filtered + blast_file by enforcing alignments between any two genes at least 50% + (or user specified value) of either gene. + + pep_file can also be used in same manner. + """ + p = OptionParser(tandem.__doc__) + p.add_argument( + "--tandem_Nmax", + dest="tandem_Nmax", + type=int, + default=3, + help="merge tandem genes within distance", + ) + p.add_argument( + "--percent_overlap", + type=int, + default=50, + help="tandem genes have >=x% aligned sequence, x=0-100", + ) + p.set_align(evalue=0.01) + p.add_argument( + "--not_self", + default=False, + action="store_true", + help="provided is not self blast file", + ) + p.add_argument( + "--strip_gene_name", + dest="sep", + type=str, + default=".", + help="strip alternative splicing. Use None for no stripping.", + ) + p.add_argument( + "--genefamily", + dest="genefam", + action="store_true", + help="compile gene families based on similarity", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + blast_file, cds_file, bed_file = args + N = opts.tandem_Nmax + P = opts.percent_overlap + is_self = not opts.not_self + sep = opts.sep + ofile = opts.outfile + + tandem_main( + blast_file, + cds_file, + bed_file, + N=N, + P=P, + is_self=is_self, + evalue=opts.evalue, + strip_name=sep, + ofile=ofile, + genefam=opts.genefam, + ) + + +if __name__ == "__main__": + main() diff --git a/jcvi/compara/fractionation.py b/jcvi/compara/fractionation.py new file mode 100644 index 00000000..61e59413 --- /dev/null +++ b/jcvi/compara/fractionation.py @@ -0,0 +1,854 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Catalog gene losses, and bites within genes. +""" +import sys + +from itertools import groupby + +from ..apps.base import ActionDispatcher, OptionParser, logger, sh +from ..formats.bed import Bed +from ..formats.blast import Blast +from ..utils.cbook import gene_name +from ..utils.grouper import Grouper +from ..utils.range import range_minmax, range_overlap, range_distance + +from .synteny import check_beds + + +def main(): + + actions = ( + # Identify true gene loss + ("loss", "extract likely gene loss candidates"), + ("validate", "confirm synteny loss against CDS bed overlaps"), + ("summary", "provide summary of fractionation"), + ("gaps", "check gene locations against gaps"), + # Gene specific status + ("gffselect", "dump gff for the missing genes"), + ("genestatus", "tag genes based on translation from GMAP models"), + # Specific study for napus (requires specific datasets) + ("napus", "extract gene loss vs diploid ancestors (napus)"), + ("merge", "merge protein quartets table with registry (napus)"), + ("segment", "merge adjacent gene loss into segmental loss (napus)"), + ("offdiag", "find gene pairs that are off diagonal"), + ("diff", "calculate diff of size of syntenic regions"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def offdiag(args): + """ + %prog offdiag diploid.napus.1x1.lifted.anchors + + Find gene pairs that are off diagnoal. "Off diagonal" are the pairs that are + not on the orthologous chromosomes. For example, napus chrA01 and brapa A01. + """ + p = OptionParser(offdiag.__doc__) + p.set_beds() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (anchorsfile,) = args + qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts) + + fp = open(anchorsfile) + pf = "-".join(anchorsfile.split(".")[:2]) + header = "Block-id|Napus|Diploid|Napus-chr|Diploid-chr|RBH?".split("|") + print("\t".join(header)) + i = -1 + for row in fp: + if row[0] == "#": + i += 1 + continue + q, s, score = row.split() + rbh = "no" if score[-1] == "L" else "yes" + qi, qq = qorder[q] + si, ss = sorder[s] + oqseqid = qseqid = qq.seqid + osseqid = sseqid = ss.seqid + sseqid = sseqid.split("_")[0][-3:] + if qseqid[0] == "A": + qseqid = qseqid[-3:] # A09 => A09 + elif qseqid[0] == "C": + qseqid = "C0" + qseqid[-1] # C9 => C09 + else: + continue + if qseqid == sseqid or sseqid[-2:] == "nn": + continue + block_id = pf + "-block-{0}".format(i) + print("\t".join((block_id, q, s, oqseqid, osseqid, rbh))) + + +def diff(args): + """ + %prog diff simplefile + + Calculate difference of pairwise syntenic regions. + """ + from jcvi.utils.cbook import SummaryStats + + p = OptionParser(diff.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (simplefile,) = args + fp = open(simplefile) + data = [x.split() for x in fp] + spans = [] + for block_id, ab in groupby(data[1:], key=lambda x: x[0]): + a, b = list(ab) + aspan, bspan = a[4], b[4] + aspan, bspan = int(aspan), int(bspan) + spans.append((aspan, bspan)) + aspans, bspans = zip(*spans) + dspans = [b - a for a, b, in spans] + s = SummaryStats(dspans) + print("For a total of {0} blocks:".format(len(dspans)), file=sys.stderr) + print("Sum of A: {0}".format(sum(aspans)), file=sys.stderr) + print("Sum of B: {0}".format(sum(bspans)), file=sys.stderr) + print("Sum of Delta: {0} ({1})".format(sum(dspans), s), file=sys.stderr) + + +def estimate_size(accns, bed, order, conservative=True): + """ + Estimate the bp length for the deletion tracks, indicated by the gene accns. + True different levels of estimates vary on conservativeness. + """ + accns = [order[x] for x in accns] + ii, bb = zip(*accns) + mini, maxi = min(ii), max(ii) + if not conservative: # extend one gene + mini -= 1 + maxi += 1 + minb = bed[mini] + maxb = bed[maxi] + assert minb.seqid == maxb.seqid + distmode = "ss" if conservative else "ee" + ra = (minb.seqid, minb.start, minb.end, "+") + rb = (maxb.seqid, maxb.start, maxb.end, "+") + + dist, orientation = range_distance(ra, rb, distmode=distmode) + assert dist != -1 + return dist + + +def segment(args): + """ + %prog segment loss.ids bedfile + + Merge adjacent gene loss into segmental loss. + + Then based on the segmental loss, estimate amount of DNA loss in base pairs. + Two estimates can be given: + - conservative: just within the start and end of a single gene + - aggressive: extend the deletion track to the next gene + + The real deletion size is within these estimates. + """ + from jcvi.formats.base import SetFile + + p = OptionParser(segment.__doc__) + p.add_argument( + "--chain", + default=1, + type=int, + help="Allow next N genes to be chained", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + idsfile, bedfile = args + bed = Bed(bedfile) + order = bed.order + ids = SetFile(idsfile) + losses = Grouper() + skip = opts.chain + for i, a in enumerate(bed): + a = a.accn + for j in range(i + 1, i + 1 + skip): + if j >= len(bed): + break + b = bed[j].accn + if a in ids: + losses.join(a, a) + if a in ids and b in ids: + losses.join(a, b) + + losses = list(losses) + singletons = [x for x in losses if len(x) == 1] + segments = [x for x in losses if len(x) > 1] + ns, nm, nt = len(singletons), len(segments), len(losses) + assert ns + nm == nt + + # Summary for all segments + for x in sorted(singletons) + sorted(segments): + print( + "\t".join( + str(x) + for x in ("|".join(sorted(x)), len(x), estimate_size(x, bed, order)) + ) + ) + + # Find longest segment stretch + if segments: + mx, maxsegment = max([(len(x), x) for x in segments]) + print("Longest stretch: run of {0} genes".format(mx), file=sys.stderr) + print(" {0}".format("|".join(sorted(maxsegment))), file=sys.stderr) + seg_asize = sum(estimate_size(x, bed, order) for x in segments) + seg_bsize = sum( + estimate_size(x, bed, order, conservative=False) for x in segments + ) + else: + seg_asize = seg_bsize = 0 + + sing_asize = sum(estimate_size(x, bed, order) for x in singletons) + sing_bsize = sum( + estimate_size(x, bed, order, conservative=False) for x in singletons + ) + total_asize = sing_asize + seg_asize + total_bsize = sing_bsize + seg_bsize + print( + "Singleton ({0}): {1} - {2} bp".format(ns, sing_asize, sing_bsize), + file=sys.stderr, + ) + print( + "Segment ({0}): {1} - {2} bp".format(nm, seg_asize, seg_bsize), file=sys.stderr + ) + print( + "Total ({0}): {1} - {2} bp".format(nt, total_asize, total_bsize), + file=sys.stderr, + ) + print( + "Average ({0}): {1} bp".format(nt, (total_asize + total_bsize) / 2), + file=sys.stderr, + ) + + +def merge(args): + """ + %prog merge protein-quartets registry LOST + + Merge protein quartets table with dna quartets registry. This is specific + to the napus project. + """ + from jcvi.formats.base import DictFile + + p = OptionParser(merge.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + quartets, registry, lost = args + qq = DictFile(registry, keypos=1, valuepos=3) + lost = DictFile(lost, keypos=1, valuepos=0, delimiter="|") + qq.update(lost) + fp = open(quartets) + cases = { + "AN,CN": 4, + "BO,AN,CN": 8, + "BO,CN": 2, + "BR,AN": 1, + "BR,AN,CN": 6, + "BR,BO": 3, + "BR,BO,AN": 5, + "BR,BO,AN,CN": 9, + "BR,BO,CN": 7, + } + ip = { + "syntenic_model": "Syntenic_model_excluded_by_OMG", + "complete": "Predictable", + "partial": "Truncated", + "pseudogene": "Pseudogene", + "random": "Match_random", + "real_ns": "Transposed", + "gmap_fail": "GMAP_fail", + "AN LOST": "AN_LOST", + "CN LOST": "CN_LOST", + "BR LOST": "BR_LOST", + "BO LOST": "BO_LOST", + "outside": "Outside_synteny_blocks", + "[NF]": "Not_found", + } + for row in fp: + atoms = row.strip().split("\t") + genes = atoms[:4] + tag = atoms[4] + a, b, c, d = [qq.get(x, ".").rsplit("-", 1)[-1] for x in genes] + qqs = [c, d, a, b] + for i, q in enumerate(qqs): + if atoms[i] != ".": + qqs[i] = "syntenic_model" + # Make comment + comment = "Case{0}".format(cases[tag]) + dots = sum([1 for x in genes if x == "."]) + if dots == 1: + idx = genes.index(".") + status = qqs[idx] + status = ip[status] + comment += "-" + status + print(row.strip() + "\t" + "\t".join(qqs + [comment])) + + +def gffselect(args): + """ + %prog gffselect gmaplocation.bed expectedlocation.bed translated.ids tag + + Try to match up the expected location and gmap locations for particular + genes. translated.ids was generated by fasta.translate --ids. tag must be + one of "complete|pseudogene|partial". + """ + from jcvi.formats.bed import intersectBed_wao + + p = OptionParser(gffselect.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + gmapped, expected, idsfile, tag = args + data = get_tags(idsfile) + completeness = dict((a.replace("mrna", "path"), c) for (a, b, c) in data) + + seen = set() + idsfile = expected.rsplit(".", 1)[0] + ".ids" + fw = open(idsfile, "w") + cnt = 0 + for a, b in intersectBed_wao(expected, gmapped): + if b is None: + continue + aname, bbname = a.accn, b.accn + bname = bbname.split(".")[0] + if completeness[bbname] != tag: + continue + if aname == bname: + if bname in seen: + continue + seen.add(bname) + print(bbname, file=fw) + cnt += 1 + fw.close() + + logger.debug("Total {0} records written to `{1}`.".format(cnt, idsfile)) + + +def gaps(args): + """ + %prog gaps idsfile fractionationfile gapsbed + + Check gene locations against gaps. `idsfile` contains a list of IDs to query + into `fractionationfile` in order to get expected locations. + """ + from jcvi.formats.base import DictFile + from jcvi.apps.base import popen + from jcvi.utils.cbook import percentage + + p = OptionParser(gaps.__doc__) + p.add_argument("--bdist", default=0, type=int, help="Base pair distance") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + idsfile, frfile, gapsbed = args + bdist = opts.bdist + d = DictFile(frfile, keypos=1, valuepos=2) + bedfile = idsfile + ".bed" + fw = open(bedfile, "w") + fp = open(idsfile) + total = 0 + for row in fp: + id = row.strip() + hit = d[id] + tag, pos = get_tag(hit, None) + seqid, start, end = pos + start, end = max(start - bdist, 1), end + bdist + print("\t".join(str(x) for x in (seqid, start - 1, end, id)), file=fw) + total += 1 + fw.close() + + cmd = "intersectBed -a {0} -b {1} -v | wc -l".format(bedfile, gapsbed) + not_in_gaps = popen(cmd).read() + not_in_gaps = int(not_in_gaps) + in_gaps = total - not_in_gaps + print("Ids in gaps: {1}".format(total, percentage(in_gaps, total)), file=sys.stderr) + + +def get_tags(idsfile): + fp = open(idsfile) + data = [] + for row in fp: + mRNA, label = row.split() + labelatoms = label.split(",") + if label == "complete" or label == "contain_ns,complete": + tag = "complete" + if "cannot_translate" in labelatoms: + tag = "pseudogene" + elif "five_prime_missing" in labelatoms or "three_prime_missing" in labelatoms: + tag = "partial" + data.append((mRNA, label, tag)) + return data + + +def genestatus(args): + """ + %prog genestatus diploid.gff3.exon.ids + + Tag genes based on translation from GMAP models, using fasta.translate() + --ids. + """ + p = OptionParser(genestatus.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (idsfile,) = args + data = get_tags(idsfile) + key = lambda x: x[0].split(".")[0] + for gene, cc in groupby(data, key=key): + cc = list(cc) + tags = [x[-1] for x in cc] + if "complete" in tags: + tag = "complete" + elif "partial" in tags: + tag = "partial" + else: + tag = "pseudogene" + print("\t".join((gene, tag))) + + +def summary(args): + """ + %prog summary diploid.napus.fractionation gmap.status + + Provide summary of fractionation. `fractionation` file is generated with + loss(). `gmap.status` is generated with genestatus(). + """ + from jcvi.formats.base import DictFile + from jcvi.utils.cbook import percentage, Registry + + p = OptionParser(summary.__doc__) + p.add_argument("--extra", help="Cross with extra tsv file") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + frfile, statusfile = args + status = DictFile(statusfile) + fp = open(frfile) + registry = Registry() # keeps all the tags for any given gene + for row in fp: + seqid, gene, tag = row.split() + if tag == ".": + registry[gene].append("outside") + else: + registry[gene].append("inside") + if tag[0] == "[": + registry[gene].append("no_syntenic_model") + if tag.startswith("[S]"): + registry[gene].append("[S]") + gstatus = status.get(gene, None) + if gstatus == "complete": + registry[gene].append("complete") + elif gstatus == "pseudogene": + registry[gene].append("pseudogene") + elif gstatus == "partial": + registry[gene].append("partial") + else: + registry[gene].append("gmap_fail") + elif tag.startswith("[NS]"): + registry[gene].append("[NS]") + if "random" in tag or "Scaffold" in tag: + registry[gene].append("random") + else: + registry[gene].append("real_ns") + elif tag.startswith("[NF]"): + registry[gene].append("[NF]") + else: + registry[gene].append("syntenic_model") + + inside = registry.count("inside") + outside = registry.count("outside") + syntenic = registry.count("syntenic_model") + non_syntenic = registry.count("no_syntenic_model") + s = registry.count("[S]") + ns = registry.count("[NS]") + nf = registry.count("[NF]") + complete = registry.count("complete") + pseudogene = registry.count("pseudogene") + partial = registry.count("partial") + gmap_fail = registry.count("gmap_fail") + random = registry.count("random") + real_ns = registry.count("real_ns") + + complete_models = registry.get_tag("complete") + pseudogenes = registry.get_tag("pseudogene") + partial_deletions = registry.get_tag("partial") + + m = "{0} inside synteny blocks\n".format(inside) + m += "{0} outside synteny blocks\n".format(outside) + m += "{0} has syntenic gene\n".format(syntenic) + m += "{0} lack syntenic gene\n".format(non_syntenic) + m += "{0} has sequence match in syntenic location\n".format(s) + m += "{0} has sequence match in non-syntenic location\n".format(ns) + m += "{0} has sequence match in un-ordered scaffolds\n".format(random) + m += "{0} has sequence match in real non-syntenic location\n".format(real_ns) + m += "{0} has no sequence match\n".format(nf) + m += "{0} syntenic sequence - complete model\n".format(percentage(complete, s)) + m += "{0} syntenic sequence - partial model\n".format(percentage(partial, s)) + m += "{0} syntenic sequence - pseudogene\n".format(percentage(pseudogene, s)) + m += "{0} syntenic sequence - gmap fail\n".format(percentage(gmap_fail, s)) + print(m, file=sys.stderr) + + aa = ["complete_models", "partial_deletions", "pseudogenes"] + bb = [complete_models, partial_deletions, pseudogenes] + for a, b in zip(aa, bb): + fw = open(a, "w") + print("\n".join(b), file=fw) + fw.close() + + extra = opts.extra + if extra: + registry.update_from(extra) + + fp.seek(0) + fw = open("registry", "w") + for row in fp: + seqid, gene, tag = row.split() + ts = registry[gene] + print("\t".join((seqid, gene, tag, "-".join(ts))), file=fw) + fw.close() + + logger.debug("Registry written.") + + +def get_tag(name, order): + if name[0] == "[": + tag, tname = name[1:].split("]") + seqid, se = tname.split(":") + start, end = se.split("-") + start, end = int(start), int(end) + else: + tag = None + xi, x = order[name] + seqid, start, end = x.seqid, x.start, x.end + return tag, (seqid, start, end) + + +def napus(args): + """ + %prog napus napus.bed brapa.boleracea.i1.blocks diploid.napus.fractionation + + Extract napus gene loss vs diploid ancestors. We are looking specifically + for anything that has the pattern: + + BR - BO or BR - BO + | | + AN CN + + Step 1: extract BR - BO syntenic pairs + Step 2: get diploid gene retention patterns from BR or BO as query + Step 3: look for if AN or CN is NS(non-syntenic) or NF(not found) and + specifically with NS, the NS location is actually the homeologous site. + Step 4: categorize gene losses into singleton, or segmental (defined as + consecutive losses with a maximum skip of 1 + """ + from jcvi.utils.cbook import SummaryStats + + p = OptionParser(napus.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + napusbed, brbo, dpnp = args + retention = {} + fp = open(dpnp) + for row in fp: + seqid, query, hit = row.split() + retention[query] = hit + + order = Bed(napusbed).order + + quartetsfile = "quartets" + fp = open(brbo) + fw = open(quartetsfile, "w") + AL = "AN LOST" + CL = "CN LOST" + for row in fp: + br, bo = row.split() + if "." in (br, bo): + continue + an, cn = retention[br], retention[bo] + row = "\t".join((br, bo, an, cn)) + if "." in (an, cn): + # print row + continue + + # label loss candidates + antag, anrange = get_tag(an, order) + cntag, cnrange = get_tag(cn, order) + + if range_overlap(anrange, cnrange): + if (antag, cntag) == ("NS", None): + row = row + "\t{0}|{1}".format(AL, br) + if (antag, cntag) == (None, "NS"): + row = row + "\t{0}|{1}".format(CL, bo) + + print(row, file=fw) + fw.close() + + logger.debug("Quartets and gene losses written to `{0}`.".format(quartetsfile)) + + # Parse the quartets file to extract singletons vs.segmental losses + fp = open(quartetsfile) + fw = open(quartetsfile + ".summary", "w") + data = [x.rstrip().split("\t") for x in fp] + skip = 1 # max distance between losses + + g = Grouper() + losses = [(len(x) == 5) for x in data] + for i, d in enumerate(losses): + if not d: + continue + g.join(i, i) + itag = data[i][-1].split("|")[0] + for j in range(i + 1, i + skip + 1): + jtag = data[j][-1].split("|")[0] + if j < len(losses) and losses[j] and itag == jtag: + g.join(i, j) + + losses = list(g) + singletons = [x for x in losses if len(x) == 1] + segments = [x for x in losses if len(x) > 1] + ns, nm = len(singletons), len(segments) + assert len(losses) == ns + nm + + grab_tag = lambda pool, tag: [ + x for x in pool if all(data[z][-1].startswith(tag) for z in x) + ] + + an_loss_singletons = grab_tag(singletons, AL) + cn_loss_singletons = grab_tag(singletons, CL) + als, cls = len(an_loss_singletons), len(cn_loss_singletons) + + an_loss_segments = grab_tag(segments, AL) + cn_loss_segments = grab_tag(segments, CL) + alm, clm = len(an_loss_segments), len(cn_loss_segments) + mixed = len(segments) - alm - clm + assert mixed == 0 + + logger.debug("Singletons: {0} (AN LOSS: {1}, CN LOSS: {2})".format(ns, als, cls)) + logger.debug("Segments: {0} (AN LOSS: {1}, CN LOSS: {2})".format(nm, alm, clm)) + print(SummaryStats([len(x) for x in losses]), file=sys.stderr) + + for x in singletons + segments: + print("### LENGTH =", len(x), file=fw) + for i in x: + print("\t".join(data[i]), file=fw) + fw.close() + + +def region_str(region): + return "{0}:{1}-{2}".format(*region) + + +def loss(args): + """ + %prog loss a.b.i1.blocks [a.b-genomic.blast] + + Extract likely gene loss candidates between genome a and b. + """ + p = OptionParser(loss.__doc__) + p.add_argument( + "--bed", + default=False, + action="store_true", + help="Genomic BLAST is in bed format", + ) + p.add_argument("--gdist", default=20, type=int, help="Gene distance") + p.add_argument( + "--bdist", + default=20000, + type=int, + help="Base pair distance", + ) + p.set_beds() + opts, args = p.parse_args(args) + + if len(args) not in (1, 2): + sys.exit(not p.print_help()) + + blocksfile = args[0] + emptyblast = len(args) == 1 + if emptyblast: + genomicblast = "empty.blast" + sh("touch {0}".format(genomicblast)) + else: + genomicblast = args[1] + + gdist, bdist = opts.gdist, opts.bdist + qbed, sbed, qorder, sorder, is_self = check_beds(blocksfile, p, opts) + blocks = [] + fp = open(blocksfile) + genetrack = {} + proxytrack = {} + for row in fp: + a, b = row.split() + genetrack[a] = b + blocks.append((a, b)) + + data = [] + for key, rows in groupby(blocks, key=lambda x: x[-1]): + rows = list(rows) + data.append((key, rows)) + + imax = len(data) - 1 + for i, (key, rows) in enumerate(data): + if i == 0 or i == imax: + continue + if key != ".": + continue + + before, br = data[i - 1] + after, ar = data[i + 1] + bi, bx = sorder[before] + ai, ax = sorder[after] + dist = abs(bi - ai) + if bx.seqid != ax.seqid or dist > gdist: + continue + + start, end = range_minmax(((bx.start, bx.end), (ax.start, ax.end))) + start, end = max(start - bdist, 1), end + bdist + proxy = (bx.seqid, start, end) + for a, b in rows: + proxytrack[a] = proxy + + tags = {} + if opts.bed: + bed = Bed(genomicblast, sorted=False) + key = lambda x: gene_name(x.accn.rsplit(".", 1)[0]) + for query, bb in groupby(bed, key=key): + bb = list(bb) + if query not in proxytrack: + continue + + proxy = proxytrack[query] + tag = "NS" + best_b = bb[0] + for b in bb: + hsp = (b.seqid, b.start, b.end) + if range_overlap(proxy, hsp): + tag = "S" + best_b = b + break + + hsp = (best_b.seqid, best_b.start, best_b.end) + proxytrack[query] = hsp + tags[query] = tag + + else: + blast = Blast(genomicblast) + for query, bb in blast.iter_hits(): + bb = list(bb) + query = gene_name(query) + if query not in proxytrack: + continue + + proxy = proxytrack[query] + tag = "NS" + best_b = bb[0] + for b in bb: + hsp = (b.subject, b.sstart, b.sstop) + if range_overlap(proxy, hsp): + tag = "S" + best_b = b + break + + hsp = (best_b.subject, best_b.sstart, best_b.sstop) + proxytrack[query] = hsp + tags[query] = tag + + for b in qbed: + accn = b.accn + target_region = genetrack[accn] + if accn in proxytrack: + target_region = region_str(proxytrack[accn]) + if accn in tags: + ptag = "[{0}]".format(tags[accn]) + else: + ptag = "[NF]" + target_region = ptag + target_region + + print("\t".join((b.seqid, accn, target_region))) + + if emptyblast: + sh("rm -f {0}".format(genomicblast)) + + +def validate(args): + """ + %prog validate diploid.napus.fractionation cds.bed + + Check whether [S] intervals overlap with CDS. + """ + from jcvi.formats.bed import intersectBed_wao + + p = OptionParser(validate.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fractionation, cdsbed = args + fp = open(fractionation) + + sbed = "S.bed" + fw = open(sbed, "w") + for row in fp: + a, b, c = row.split() + if not c.startswith("[S]"): + continue + + tag, (seqid, start, end) = get_tag(c, None) + print("\t".join(str(x) for x in (seqid, start - 1, end, b)), file=fw) + + fw.close() + + pairs = {} + for a, b in intersectBed_wao(sbed, cdsbed): + if b is None: + continue + pairs[a.accn] = b.accn + + validated = fractionation + ".validated" + fw = open(validated, "w") + fp.seek(0) + fixed = 0 + for row in fp: + a, b, c = row.split() + if b in pairs: + assert c.startswith("[S]") + c = pairs[b] + fixed += 1 + + print("\t".join((a, b, c)), file=fw) + + logger.debug("Fixed {0} [S] cases in `{1}`.".format(fixed, validated)) + fw.close() + + +if __name__ == "__main__": + main() diff --git a/jcvi/compara/ks.py b/jcvi/compara/ks.py new file mode 100644 index 00000000..2dfcf3e3 --- /dev/null +++ b/jcvi/compara/ks.py @@ -0,0 +1,1176 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Calculation of synonymous substitutions (Ks). +""" +import csv +import os +import os.path as op +import sys + +from functools import partial +from itertools import combinations, product +from math import exp, log, pi, sqrt +from typing import Optional + +import numpy as np + +from Bio import AlignIO, SeqIO +from Bio.Align.Applications import ClustalwCommandline, MuscleCommandline + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + Popen, + cleanup, + getpath, + iglob, + logger, + mkdir, + sh, +) +from ..formats.base import LineFile, must_open +from ..graphics.base import AbstractLayout, adjust_spines, markup, plt, savefig +from ..utils.cbook import gene_name +from ..utils.table import write_csv + +CLUSTALW_BIN = partial(getpath, name="CLUSTALW2", warn="warn") +MUSCLE_BIN = partial(getpath, name="MUSCLE", warn="warn") +PAL2NAL_BIN = partial(getpath, name="PAL2NAL", warn="warn") +PAML_BIN = partial(getpath, name="PAML", warn="warn") + + +class AbstractCommandline: + def run(self): + r = Popen(str(self)) + return r.communicate() + + +class YnCommandline(AbstractCommandline): + """Little commandline for yn00.""" + + def __init__(self, ctl_file, command=PAML_BIN("yn00")): + self.ctl_file = ctl_file + self.parameters = [] + self.command = command + + def __str__(self): + return self.command + " %s >/dev/null" % self.ctl_file + + +class MrTransCommandline(AbstractCommandline): + """Simple commandline faker.""" + + def __init__( + self, + prot_align_file, + nuc_file, + output_file, + outfmt="paml", + command=PAL2NAL_BIN("pal2nal.pl"), + ): + self.prot_align_file = prot_align_file + self.nuc_file = nuc_file + self.output_file = output_file + self.outfmt = outfmt + self.command = command + + self.parameters = [] + + def __str__(self): + return self.command + " %s %s -output %s > %s" % ( + self.prot_align_file, + self.nuc_file, + self.outfmt, + self.output_file, + ) + + +def main(): + + actions = ( + ("batch", "compute ks for a set of anchors file"), + ("fromgroups", "flatten the gene families into pairs"), + ("prepare", "prepare pairs of sequences"), + ("calc", "calculate Ks between pairs of sequences"), + ("subset", "subset pre-calculated Ks according to pairs file"), + ("gc3", "filter the Ks results to remove high GC3 genes"), + ("report", "generate a distribution of Ks values"), + ("multireport", "generate several Ks value distributions in same figure"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def batch(args): + """ + %prog batch all.cds *.anchors + + Compute Ks values for a set of anchors file. This will generate a bunch of + work directories for each comparisons. The anchorsfile should be in the form + of specie1.species2.anchors. + """ + from jcvi.apps.grid import MakeManager + + p = OptionParser(batch.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + cdsfile = args[0] + anchors = args[1:] + workdirs = [".".join(op.basename(x).split(".")[:2]) for x in anchors] + for wd in workdirs: + mkdir(wd) + + mm = MakeManager() + for wd, ac in zip(workdirs, anchors): + pairscdsfile = wd + ".cds.fasta" + cmd = "python -m jcvi.apps.ks prepare {} {} -o {}".format( + ac, cdsfile, pairscdsfile + ) + mm.add((ac, cdsfile), pairscdsfile, cmd) + ksfile = wd + ".ks" + cmd = "python -m jcvi.apps.ks calc {} -o {} --workdir {}".format( + pairscdsfile, ksfile, wd + ) + mm.add(pairscdsfile, ksfile, cmd) + mm.write() + + +class LayoutLine(object): + def __init__(self, row, delimiter=","): + args = row.rstrip().split(delimiter) + args = [x.strip() for x in args] + self.ksfile = args[0] + self.components = int(args[1]) + self.label = args[2] + self.color = args[3] + self.marker = args[4] + + def __str__(self): + return ", ".join( + str(x) + for x in (self.ksfile, self.components, self.label, self.color, self.marker) + ) + + +class Layout(AbstractLayout): + def __init__(self, filename, delimiter=",", seed: Optional[int] = None): + super().__init__(filename) + if not op.exists(filename): + ksfiles = iglob(".", "*.ks") + header = "Ks file|ncomponents|label|color|marker".split("|") + contents = [] + for ksfile in ksfiles: + leg = op.basename(ksfile).rsplit(".", 1)[0] + if leg.count(".") == 1: + leg = leg.replace(".", " *vs.* ") + contents.append((ksfile, "1", leg, "", "")) + write_csv(header, contents, comment=True, filename=filename) + + fp = open(filename) + for row in fp: + if row[0] == "#": + continue + self.append(LayoutLine(row, delimiter=delimiter)) + + self.assign_colors(seed=seed) + self.assign_markers(seed=seed) + + +class KsPlot(object): + def __init__(self, ax, ks_max, bins, legendp="upper left"): + + self.ax = ax + self.ks_max = ks_max + self.interval = ks_max / bins + self.legendp = legendp + self.lines = [] + self.labels = [] + + def add_data( + self, + data, + components=1, + label="Ks", + color="r", + marker=".", + fill=False, + fitted=True, + kde=False, + ): + + ax = self.ax + ks_max = self.ks_max + interval = self.interval + if kde: + marker = None + + line, line_mixture = plot_ks_dist( + ax, + data, + interval, + components, + ks_max, + color=color, + marker=marker, + fill=fill, + fitted=fitted, + kde=kde, + ) + self.lines.append(line) + self.labels.append(label) + + if fitted: + self.lines.append(line_mixture) + self.labels.append(label + " (fitted)") + + def draw(self, title="*Ks* distribution", filename="Ks_plot.pdf"): + + ax = self.ax + ks_max = self.ks_max + lines = self.lines + labels = [markup(x) for x in self.labels] + legendp = self.legendp + if len(lines) > 1: + leg = ax.legend( + lines, + labels, + loc=legendp, + shadow=True, + fancybox=True, + prop={"size": 10}, + ) + leg.get_frame().set_alpha(0.5) + + ax.set_xlim((0, ks_max - self.interval)) + ylim = ax.get_ylim()[-1] + ax.set_ylim(0, ylim) + ax.set_title(markup(title), fontweight="bold") + ax.set_xlabel(markup("Synonymous substitutions per site (*Ks*)")) + ax.set_ylabel("Percentage of gene pairs (bin={})".format(self.interval)) + + ax.set_xticklabels(ax.get_xticks(), family="Helvetica") + ax.set_yticklabels(ax.get_yticks(), family="Helvetica") + + adjust_spines(ax, ["left", "bottom"], outward=True) + + if filename: + savefig(filename, dpi=300) + + +def multireport(args): + """ + %prog multireport layoutfile + + Generate several Ks value distributions in the same figure. If the layout + file is missing then a template file listing all ks files will be written. + + The layout file contains the Ks file, number of components, colors, and labels: + + # Ks file, ncomponents, label, color, marker + LAP.sorghum.ks, 1, LAP-sorghum, r, o + SES.sorghum.ks, 1, SES-sorghum, g, + + MOL.sorghum.ks, 1, MOL-sorghum, m, ^ + + If color or marker is missing, then a random one will be assigned. + """ + p = OptionParser(multireport.__doc__) + p.set_outfile(outfile="Ks_plot.pdf") + add_plot_options(p) + opts, args, iopts = p.set_image_options(args, figsize="8x6") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (layoutfile,) = args + ks_min = opts.vmin + ks_max = opts.vmax + bins = opts.bins + fill = opts.fill + layout = Layout(layoutfile, seed=iopts.seed) + print(layout, file=sys.stderr) + + fig = plt.figure(1, (iopts.w, iopts.h)) + ax = fig.add_axes([0.12, 0.13, 0.8, 0.8]) + + kp = KsPlot(ax, ks_max, bins, legendp=opts.legendp) + for lo in layout: + data = KsFile(lo.ksfile) + data = [x.ng_ks for x in data] + data = [x for x in data if ks_min <= x <= ks_max] + kp.add_data( + data, + lo.components, + label=lo.label, + color=lo.color, + marker=lo.marker, + fill=fill, + fitted=opts.fit, + kde=opts.kde, + ) + + kp.draw(title=opts.title, filename=opts.outfile) + + +def get_GC3(cdsfile): + from jcvi.formats.fasta import Fasta + + f = Fasta(cdsfile, lazy=True) + GC3 = {} + for name, rec in f.iteritems_ordered(): + positions = rec.seq[2::3].upper() + gc_counts = sum(1 for x in positions if x in "GC") + gc_ratio = gc_counts * 1.0 / len(positions) + GC3[name] = gc_ratio + + return GC3 + + +def plot_GC3(GC3, cdsfile, fill="white"): + from jcvi.graphics.histogram import histogram + + numberfile = "{0}.gc3".format(cdsfile) + fw = must_open(numberfile, "w") + fw.write("\n".join(map(str, GC3.values()))) + fw.close() + histogram( + numberfile, + vmin=0, + vmax=1, + xlabel="GC3", + title=cdsfile, + bins=50, + skip=0, + ascii=False, + fill=fill, + ) + + logger.debug("{0} GC3 values plotted to {1}.pdf".format(len(GC3), numberfile)) + + +def gc3(args): + """ + %prog gc3 ksfile cdsfile [cdsfile2] -o newksfile + + Filter the Ks results to remove high GC3 genes. High GC3 genes are + problematic in Ks calculation - see Tang et al. 2010 PNAS. Specifically, the + two calculation methods produce drastically different results for these + pairs. Therefore we advise to remoeve these high GC3 genes. This is often + the case for studying cereal genes. + + If 2 genomes are involved, the cdsfile of the 2nd genome can be provided + concatenated or separated. + """ + p = OptionParser(gc3.__doc__) + p.add_argument( + "--plot", default=False, action="store_true", help="Also plot the GC3 histogram" + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + outfile = opts.outfile + plot = opts.plot + + if not 1 < len(args) < 4: + sys.exit(not p.print_help()) + + ks_file, cdsfile = args[:2] + GC3 = get_GC3(cdsfile) + if plot: + plot_GC3(GC3, cdsfile, fill="green") + + if len(args) == 3: + cdsfile2 = args[2] + GC3_2 = get_GC3(cdsfile2) + GC3.update(GC3_2) + if plot: + plot_GC3(GC3_2, cdsfile2, fill="lightgreen") + + data = KsFile(ks_file) + noriginals = len(data) + + fw = must_open(outfile, "w") + writer = csv.writer(fw) + writer.writerow(fields.split(",")) + nlines = 0 + cutoff = 0.75 + for d in data: + a, b = d.name.split(";") + aratio, bratio = GC3[a], GC3[b] + if (aratio + bratio) / 2 > cutoff: + continue + writer.writerow(d) + nlines += 1 + logger.debug("{0} records written (from {1}).".format(nlines, noriginals)) + + +def extract_pairs(abed, bbed, groups): + """ + Called by fromgroups(), extract pairs specific to a pair of species. + """ + agenome = op.basename(abed.filename).split(".")[0] + bgenome = op.basename(bbed.filename).split(".")[0] + aorder = abed.order + border = bbed.order + pairsfile = "{0}.{1}.pairs".format(agenome, bgenome) + fw = open(pairsfile, "w") + + is_self = abed.filename == bbed.filename + npairs = 0 + for group in groups: + iter = combinations(group, 2) if is_self else product(group, repeat=2) + + for a, b in iter: + if a not in aorder or b not in border: + continue + + print("\t".join((a, b)), file=fw) + npairs += 1 + + logger.debug("File `{0}` written with {1} pairs.".format(pairsfile, npairs)) + + +def fromgroups(args): + """ + %prog fromgroups groupsfile a.bed b.bed ... + + Flatten the gene familes into pairs, the groupsfile is a file with each line + containing the members, separated by comma. The commands also require + several bed files in order to sort the pairs into different piles (e.g. + pairs of species in comparison. + """ + from jcvi.formats.bed import Bed + + p = OptionParser(fromgroups.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + groupsfile = args[0] + bedfiles = args[1:] + beds = [Bed(x) for x in bedfiles] + fp = open(groupsfile) + groups = [row.strip().split(",") for row in fp] + for b1, b2 in product(beds, repeat=2): + extract_pairs(b1, b2, groups) + + +def find_first_isoform(a, f): + if a in f: + return a + for i in range(100): + ia = ".".join((a, str(i))) + if ia in f: + return ia + return a + + +def prepare(args): + """ + %prog prepare pairsfile cdsfile [pepfile] -o paired.cds.fasta + + Pick sequences from cdsfile to form pairs, ready to be calculated. The + pairsfile can be generated from formats.blast.cscore(). The first two + columns contain the pair. + """ + from jcvi.formats.fasta import Fasta + + p = OptionParser(prepare.__doc__) + p.set_outfile() + + opts, args = p.parse_args(args) + outfile = opts.outfile + + if len(args) == 2: + pairsfile, cdsfile = args + pepfile = None + elif len(args) == 3: + pairsfile, cdsfile, pepfile = args + else: + sys.exit(not p.print_help()) + + f = Fasta(cdsfile) + fp = open(pairsfile) + fw = must_open(outfile, "w") + if pepfile: + assert outfile != "stdout", "Please specify outfile name." + f2 = Fasta(pepfile) + fw2 = must_open(outfile + ".pep", "w") + for row in fp: + if row[0] == "#": + continue + a, b = row.split()[:2] + if a == b: + logger.debug("Self pairs found: {0} - {1}. Ignored".format(a, b)) + continue + + if a not in f: + a = find_first_isoform(a, f) + assert a, a + if b not in f: + b = find_first_isoform(b, f) + assert b, b + + acds = f[a] + bcds = f[b] + SeqIO.write((acds, bcds), fw, "fasta") + if pepfile: + apep = f2[a] + bpep = f2[b] + SeqIO.write((apep, bpep), fw2, "fasta") + fw.close() + if pepfile: + fw2.close() + + +def calc(args): + """ + %prog calc [prot.fasta] cds.fasta > out.ks + + Protein file is optional. If only one file is given, it is assumed to + be CDS sequences with correct frame (frame 0). Results will be written to + stdout. Both protein file and nucleotide file are assumed to be Fasta format, + with adjacent records as the pairs to compare. + + Author: Haibao Tang , Brad Chapman, Jingping Li + Calculate synonymous mutation rates for gene pairs + + This does the following: + 1. Fetches a protein pair. + 2. Aligns the protein pair with clustalw (default) or muscle. + 3. Convert the output to Fasta format. + 4. Use this alignment info to align gene sequences using PAL2NAL + 5. Run PAML yn00 to calculate synonymous mutation rates. + """ + from jcvi.formats.fasta import translate + + p = OptionParser(calc.__doc__) + p.add_argument( + "--longest", + action="store_true", + help="Get longest ORF, only works if no pep file, e.g. ESTs", + ) + p.add_argument( + "--msa", + default="clustalw", + choices=("clustalw", "muscle"), + help="software used to align the proteins", + ) + p.add_argument("--workdir", default=os.getcwd(), help="Work directory") + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) == 1: + protein_file, dna_file = None, args[0] + elif len(args) == 2: + protein_file, dna_file = args + else: + print("Incorrect arguments", file=sys.stderr) + sys.exit(not p.print_help()) + + output_h = must_open(opts.outfile, "w") + print(fields, file=output_h) + work_dir = op.join(opts.workdir, "syn_analysis") + mkdir(work_dir) + + if not protein_file: + protein_file = dna_file + ".pep" + translate_args = [dna_file, "--outfile=" + protein_file] + if opts.longest: + translate_args += ["--longest"] + dna_file, protein_file = translate(translate_args) + + prot_iterator = SeqIO.parse(open(protein_file), "fasta") + dna_iterator = SeqIO.parse(open(dna_file), "fasta") + for p_rec_1, p_rec_2, n_rec_1, n_rec_2 in zip( + prot_iterator, prot_iterator, dna_iterator, dna_iterator + ): + + print("--------", p_rec_1.name, p_rec_2.name, file=sys.stderr) + if opts.msa == "clustalw": + align_fasta = clustal_align_protein((p_rec_1, p_rec_2), work_dir) + elif opts.msa == "muscle": + align_fasta = muscle_align_protein((p_rec_1, p_rec_2), work_dir) + mrtrans_fasta = run_mrtrans(align_fasta, (n_rec_1, n_rec_2), work_dir) + if mrtrans_fasta: + ds_subs_yn, dn_subs_yn, ds_subs_ng, dn_subs_ng = find_synonymous( + mrtrans_fasta, work_dir + ) + if ds_subs_yn is not None: + pair_name = "%s;%s" % (p_rec_1.name, p_rec_2.name) + output_h.write( + "%s\n" + % ( + ",".join( + str(x) + for x in ( + pair_name, + ds_subs_yn, + dn_subs_yn, + ds_subs_ng, + dn_subs_ng, + ) + ) + ) + ) + output_h.flush() + + # Clean-up + sh("rm -rf 2YN.t 2YN.dN 2YN.dS rst rub rst1 syn_analysis") + + +def find_synonymous(input_file, work_dir): + """Run yn00 to find the synonymous subsitution rate for the alignment.""" + cwd = os.getcwd() + os.chdir(work_dir) + # create the .ctl file + ctl_file = "yn-input.ctl" + output_file = "nuc-subs.yn" + ctl_h = open(ctl_file, "w") + ctl_h.write( + "seqfile = %s\noutfile = %s\nverbose = 0\n" + % (op.basename(input_file), output_file) + ) + ctl_h.write("icode = 0\nweighting = 0\ncommonf3x4 = 0\n") + ctl_h.close() + + cl = YnCommandline(ctl_file) + print("\tyn00:", cl, file=sys.stderr) + r, e = cl.run() + ds_value_yn = None + ds_value_ng = None + dn_value_yn = None + dn_value_ng = None + + # Nei-Gojobori + output_h = open(output_file) + row = output_h.readline() + while row: + if row.find("Nei & Gojobori") >= 0: + for x in range(5): + row = next(output_h) + dn_value_ng, ds_value_ng = row.split("(")[1].split(")")[0].split() + break + row = output_h.readline() + output_h.close() + + # Yang + output_h = open(output_file) + for line in output_h: + if line.find("+-") >= 0 and line.find("dS") == -1: + parts = line.split(" +-") + ds_value_yn = extract_subs_value(parts[1]) + dn_value_yn = extract_subs_value(parts[0]) + + if ds_value_yn is None or ds_value_ng is None: + h = open(output_file) + print("yn00 didn't work: \n%s" % h.read(), file=sys.stderr) + + os.chdir(cwd) + return ds_value_yn, dn_value_yn, ds_value_ng, dn_value_ng + + +def extract_subs_value(text): + """Extract a subsitution value from a line of text. + + This is just a friendly function to grab a float value for Ks and Kn + values from the junk I get from the last line of the yn00 file. + + Line: + 2 1 52.7 193.3 2.0452 0.8979 0.0193 0.0573 +- 0.0177 + 2.9732 +- 3.2002 + + Parts: + [' 2 1 52.7 193.3 2.0452 0.8979 0.0193 0.0573', + ' 0.0177 2.9732', ' 3.2002\n'] + + So we want 0.0573 for Kn and 2.9732 for Ks. + """ + parts = text.split() + value = float(parts[-1]) + + return value + + +def run_mrtrans(align_fasta, recs, work_dir, outfmt="paml"): + """Align nucleotide sequences with mrtrans and the protein alignment.""" + align_file = op.join(work_dir, "prot-align.fasta") + nuc_file = op.join(work_dir, "nuc.fasta") + output_file = op.join(work_dir, "nuc-align.mrtrans") + + # make the prot_align file and nucleotide file + align_h0 = open(align_file + "0", "w") + align_h0.write(str(align_fasta)) + align_h0.close() + prot_seqs = {} + i = 0 + for rec in SeqIO.parse(align_h0.name, "fasta"): + prot_seqs[i] = rec.seq + i += 1 + align_h = open(align_file, "w") + for i, rec in enumerate(recs): + if len(rec.id) > 30: + rec.id = rec.id[:28] + "_" + str(i) + rec.description = "" + print(">{0}\n{1}".format(rec.id, prot_seqs[i]), file=align_h) + align_h.close() + SeqIO.write(recs, open(nuc_file, "w"), "fasta") + + # run the program + cl = MrTransCommandline(align_file, nuc_file, output_file, outfmt=outfmt) + r, e = cl.run() + if e is None: + print("\tpal2nal:", cl, file=sys.stderr) + return output_file + elif e.read().find("could not translate") >= 0: + print("***pal2nal could not translate", file=sys.stderr) + return None + + +def clustal_align_protein(recs, work_dir, outfmt="fasta"): + """ + Align given proteins with clustalw. + recs are iterable of Biopython SeqIO objects + """ + fasta_file = op.join(work_dir, "prot-start.fasta") + align_file = op.join(work_dir, "prot.aln") + SeqIO.write(recs, open(fasta_file, "w"), "fasta") + + clustal_cl = ClustalwCommandline( + cmd=CLUSTALW_BIN("clustalw2"), + infile=fasta_file, + outfile=align_file, + outorder="INPUT", + type="PROTEIN", + ) + stdout, stderr = clustal_cl() + + aln_file = open(clustal_cl.outfile) + alignment = AlignIO.read(aln_file, "clustal") + print("\tDoing clustalw alignment: %s" % clustal_cl, file=sys.stderr) + if outfmt == "fasta": + return alignment.format("fasta") + if outfmt == "clustal": + return alignment + + +def muscle_align_protein(recs, work_dir, outfmt="fasta", inputorder=True): + """ + Align given proteins with muscle. + recs are iterable of Biopython SeqIO objects + """ + fasta_file = op.join(work_dir, "prot-start.fasta") + align_file = op.join(work_dir, "prot.aln") + SeqIO.write(recs, open(fasta_file, "w"), "fasta") + + muscle_cl = MuscleCommandline( + cmd=MUSCLE_BIN("muscle"), + input=fasta_file, + out=align_file, + seqtype="protein", + clwstrict=True, + ) + stdout, stderr = muscle_cl() + alignment = AlignIO.read(muscle_cl.out, "clustal") + + if inputorder: + try: + muscle_inputorder(muscle_cl.input, muscle_cl.out) + except ValueError: + return "" + alignment = AlignIO.read(muscle_cl.out, "fasta") + + print("\tDoing muscle alignment: %s" % muscle_cl, file=sys.stderr) + if outfmt == "fasta": + return alignment.format("fasta") + if outfmt == "clustal": + return alignment.format("clustal") + + +def muscle_inputorder(inputfastafile, alnfile, trunc_name=True): + """ + Fix for muscle -stable option according to here: + http://drive5.com/muscle/stable.html + """ + sh("cp {0} {0}.old".format(alnfile), log=False) + maxi = 30 if trunc_name else 1000 + + aa = AlignIO.read(alnfile, "clustal") + alignment = dict((a.id[:maxi], a) for a in aa) + if trunc_name and len(alignment) < len(aa): + raise ValueError("ERROR: The first 30 chars of your seq names are not unique") + + fw = must_open(alnfile, "w") + for rec in SeqIO.parse(inputfastafile, "fasta"): + a = alignment[rec.id[:maxi]] + fw.write(">{0}\n{1}\n".format(a.id[:maxi], a.seq)) + + fw.close() + sh("rm {0}.old".format(alnfile), log=False) + + +def subset(args): + """ + %prog subset pairsfile ksfile1 ksfile2 ... -o pairs.ks + + Subset some pre-calculated ks ka values (in ksfile) according to pairs + in tab delimited pairsfile/anchorfile. + """ + p = OptionParser(subset.__doc__) + p.add_argument( + "--noheader", action="store_true", help="don't write ksfile header line" + ) + p.add_argument( + "--block", action="store_true", help="preserve block structure in input" + ) + p.set_stripnames() + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + pairsfile, ksfiles = args[0], args[1:] + noheader = opts.noheader + block = opts.block + if block: + noheader = True + outfile = opts.outfile + + ksvals = {} + for ksfile in ksfiles: + ksvals.update( + dict( + (line.name, line) + for line in KsFile(ksfile, strip_names=opts.strip_names) + ) + ) + + fp = open(pairsfile) + fw = must_open(outfile, "w") + + if not noheader: + print(fields, file=fw) + + i = j = 0 + for row in fp: + if row[0] == "#": + if block: + print(row.strip(), file=fw) + continue + a, b = row.split()[:2] + name = ";".join((a, b)) + if name not in ksvals: + name = ";".join((b, a)) + if name not in ksvals: + j += 1 + print("\t".join((a, b, ".", ".")), file=fw) + continue + ksline = ksvals[name] + if block: + print("\t".join(str(x) for x in (a, b, ksline.ks)), file=fw) + else: + ksline.name = ";".join((a, b)) + print(ksline, file=fw) + i += 1 + fw.close() + + logger.debug("{0} pairs not found in ksfiles".format(j)) + logger.debug("{0} ks records written to `{1}`".format(i, outfile)) + return outfile + + +fields = "name,yn_ks,yn_ka,ng_ks,ng_ka" +descriptions = { + "name": "Gene pair", + "yn_ks": "Yang-Nielson Ks estimate", + "yn_ka": "Yang-Nielson Ka estimate", + "ng_ks": "Nei-Gojobori Ks estimate", + "ng_ka": "Nei-Gojobori Ka estimate", +} + + +class KsLine: + def __init__(self, row, strip_names=False): + args = row.strip().split(",") + self.name = args[0] + self.yn_ks = self.get_float(args[1]) + self.yn_ka = self.get_float(args[2]) + self.ng_ks = self.get_float(args[3]) + self.ng_ka = self.get_float(args[4]) + self.ks = self.ng_ks + if ";" in self.name: + self.gene_a, self.gene_b = self.name.split(";") + if strip_names: + self.gene_a = gene_name(self.gene_a) + self.gene_b = gene_name(self.gene_b) + + def get_float(self, x): + try: + x = float(x) + except: + x = -1 + return x + + def __str__(self): + return ",".join( + str(x) for x in (self.name, self.yn_ks, self.yn_ka, self.ng_ks, self.ng_ka) + ) + + @property + def anchorline(self): + return "\t".join( + (gene_name(self.gene_a), gene_name(self.gene_b), "{:.3f}".format(self.ks)) + ) + + +class KsFile(LineFile): + def __init__(self, filename, strip_names=False): + super().__init__(filename) + + fp = open(filename) + for row in fp: + ksline = KsLine(row, strip_names=strip_names) + if ksline.name == "name": # header + continue + self.append(ksline) + + logger.debug( + "File `{0}` contains a total of {1} gene pairs".format(filename, len(self)) + ) + + def print_to_anchors(self, outfile): + fw = must_open(outfile, "w") + for row in self: + print(row.anchorline, file=fw) + fw.close() + + +def my_hist(ax, l, interval, max_r, color="g", marker=".", fill=False, kde=False): + if not l: + return + + n, p = [], [] + total_len = len(l) + for i in np.arange(0, max_r, interval): + xmin, xmax = i - 0.5 * interval, i + 0.5 * interval + nx = [x for x in l if xmin <= x < xmax] + n.append(i) + p.append(len(nx) * 100.0 / total_len) + + if kde: + from scipy import stats + + kernel = stats.gaussian_kde(l) + n = np.arange(0, max_r, interval) + kn = kernel(n) + p = kn / sum(kn) * 100 + + if fill: + from pylab import poly_between + + xs, ys = poly_between(n, 0, p) + line = ax.fill(xs, ys, fc=color, alpha=0.5) + + else: + line = ax.plot( + n, p, color=color, lw=2, ms=3, marker=marker, mfc="w", mec=color, mew=2 + ) + + return line + + +def lognormpdf(bins, mu, sigma): + return np.exp(-((np.log(bins) - mu) ** 2) / (2 * sigma**2)) / ( + bins * sigma * sqrt(2 * pi) + ) + + +def lognormpdf_mix(bins, probs, mus, sigmas, interval=0.1): + y = 0 + for prob, mu, sigma in zip(probs, mus, sigmas): + y += prob * lognormpdf(bins, mu, sigma) + y *= 100 * interval # Percentage + + return y + + +def get_mixture(data, components): + """ + probs = [.476, .509] + mus = [.69069, -.15038] + variances = [.468982e-1, .959052e-1] + """ + from jcvi.apps.base import popen + + probs, mus, sigmas = [], [], [] + fw = must_open("tmp", "w") + log_data = [log(x) for x in data if x > 0.05] + data = "\n".join(["%.4f" % x for x in log_data]).replace("inf\n", "") + fw.write(data) + fw.close() + + cmd = "gmm-bic {0} {1} {2}".format(components, len(log_data), fw.name) + pipe = popen(cmd) + + for row in pipe: + if row[0] != "#": + continue + + atoms = row.split(",") + a, b, c = atoms[1:4] + a = float(a) + b = float(b) + c = float(c) + + mus.append(a) + sigmas.append(b) + probs.append(c) + + cleanup(fw.name) + return probs, mus, sigmas + + +def plot_ks_dist( + ax, + data, + interval, + components, + ks_max, + color="r", + marker=".", + fill=False, + fitted=True, + kde=False, +): + + (line,) = my_hist( + ax, data, interval, ks_max, color=color, marker=marker, fill=fill, kde=kde + ) + logger.debug("Total {0} pairs after filtering.".format(len(data))) + + line_mixture = None + if fitted: + probs, mus, variances = get_mixture(data, components) + + iv = 0.001 + bins = np.arange(iv, ks_max, iv) + y = lognormpdf_mix(bins, probs, mus, variances, interval) + + (line_mixture,) = ax.plot(bins, y, ":", color=color, lw=3) + + for i in range(components): + peak_val = exp(mus[i]) + mixline = lognormpdf_mix(peak_val, probs, mus, variances, interval) + ax.text( + peak_val, + mixline, + "Ks=%.2f" % peak_val, + color="w", + size=10, + bbox=dict(ec="w", fc=color, alpha=0.6, boxstyle="round"), + ) + + return line, line_mixture + + +def add_plot_options(p): + p.add_argument( + "--fit", default=False, action="store_true", help="Plot fitted lines" + ) + p.add_argument( + "--kde", default=False, action="store_true", help="Use KDE smoothing" + ) + p.add_argument("--vmin", default=0.0, type=float, help="Minimum value, inclusive") + p.add_argument("--vmax", default=3.0, type=float, help="Maximum value, inclusive") + p.add_argument( + "--bins", default=60, type=int, help="Number of bins to plot in the histogram" + ) + p.add_argument("--legendp", default="upper right", help="Place of the legend") + p.add_argument( + "--fill", + default=False, + action="store_true", + help="Do not fill the histogram area", + ) + p.add_argument("--title", default="*Ks* distribution", help="Title of the plot") + + +def report(args): + """ + %prog report ksfile + + generate a report given a Ks result file (as produced by synonymous_calc.py). + describe the median Ks, Ka values, as well as the distribution in stem-leaf plot + """ + from jcvi.utils.cbook import SummaryStats + from jcvi.graphics.histogram import stem_leaf_plot + + p = OptionParser(report.__doc__) + p.add_argument( + "--pdf", + default=False, + action="store_true", + help="Generate graphic output for the histogram", + ) + p.add_argument( + "--components", + default=1, + type=int, + help="Number of components to decompose peaks", + ) + add_plot_options(p) + opts, args, iopts = p.set_image_options(args, figsize="5x5") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (ks_file,) = args + data = KsFile(ks_file) + ks_min = opts.vmin + ks_max = opts.vmax + bins = opts.bins + + for f in fields.split(",")[1:]: + columndata = [getattr(x, f) for x in data] + ks = "ks" in f + if not ks: + continue + + columndata = [x for x in columndata if ks_min <= x <= ks_max] + + st = SummaryStats(columndata) + title = "{0} ({1}): ".format(descriptions[f], ks_file) + title += "Median:{0:.3f} (1Q:{1:.3f}|3Q:{2:.3f}||".format( + st.median, st.firstq, st.thirdq + ) + title += "Mean:{0:.3f}|Std:{1:.3f}||N:{2})".format(st.mean, st.sd, st.size) + + tbins = (0, ks_max, bins) if ks else (0, 0.6, 10) + digit = 2 if (ks_max * 1.0 / bins) < 0.1 else 1 + stem_leaf_plot(columndata, *tbins, digit=digit, title=title) + + if not opts.pdf: + return + + components = opts.components + data = [x.ng_ks for x in data] + data = [x for x in data if ks_min <= x <= ks_max] + + fig = plt.figure(1, (iopts.w, iopts.h)) + ax = fig.add_axes([0.12, 0.1, 0.8, 0.8]) + kp = KsPlot(ax, ks_max, opts.bins, legendp=opts.legendp) + kp.add_data(data, components, fill=opts.fill, fitted=opts.fit, kde=opts.kde) + kp.draw(title=opts.title) + + +if __name__ == "__main__": + main() diff --git a/jcvi/compara/pad.py b/jcvi/compara/pad.py new file mode 100644 index 00000000..dad5fda5 --- /dev/null +++ b/jcvi/compara/pad.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +This implements the method described in Tang et al. 2010 PNAS paper, + + +Angiosperm genome comparisons reveal early polyploidy in the monocot lineage + +The main pipeline assumes starting with defined synteny blocks in .anchors +format (use compara.synteny.scan()), then segment the chromosomes and cluster +segments according to the matching patterns. Finally the putative ancestral +regions (PAR) are identified and visualized. +""" +import os.path as op +import sys + +from math import log + +import numpy as np + +from more_itertools import pairwise + +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh +from ..formats.bed import Bed +from ..formats.blast import BlastLine + +from .base import AnchorFile +from .synteny import check_beds + + +def main(): + + actions = ( + ("cluster", "cluster the segments"), + ("pad", "test and reconstruct candidate PADs"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def make_arrays(blastfile, qpadbed, spadbed, qpadnames, spadnames): + """ + This function makes three matrices: observed, expected and logmp. The logmp + contains the statistical significance for each comparison. + """ + m, n = len(qpadnames), len(spadnames) + qpadorder, spadorder = qpadbed.order, spadbed.order + qpadid = dict((a, i) for i, a in enumerate(qpadnames)) + spadid = dict((a, i) for i, a in enumerate(spadnames)) + qpadlen = dict((a, len(b)) for a, b in qpadbed.sub_beds()) + spadlen = dict((a, len(b)) for a, b in spadbed.sub_beds()) + + qsize, ssize = len(qpadbed), len(spadbed) + + assert sum(qpadlen.values()) == qsize + assert sum(spadlen.values()) == ssize + + # Populate arrays of observed counts and expected counts + logger.debug("Initialize array of size ({0} x {1})".format(m, n)) + observed = np.zeros((m, n)) + fp = open(blastfile) + all_dots = 0 + for row in fp: + b = BlastLine(row) + qi, q = qpadorder[b.query] + si, s = spadorder[b.subject] + qseqid, sseqid = q.seqid, s.seqid + qsi, ssi = qpadid[qseqid], spadid[sseqid] + observed[qsi, ssi] += 1 + all_dots += 1 + + assert int(round(observed.sum())) == all_dots + + logger.debug("Total area: {0} x {1}".format(qsize, ssize)) + S = qsize * ssize + expected = np.zeros((m, n)) + qsum = 0 + for i, a in enumerate(qpadnames): + alen = qpadlen[a] + qsum += alen + for j, b in enumerate(spadnames): + blen = spadlen[b] + expected[i, j] = all_dots * alen * blen * 1.0 / S + + assert int(round(expected.sum())) == all_dots + + # Calculate the statistical significance for each cell + from scipy.stats.distributions import poisson + + logmp = np.zeros((m, n)) + for i in range(m): + for j in range(n): + obs, exp = observed[i, j], expected[i, j] + pois = max(poisson.pmf(obs, exp), 1e-250) # Underflow + logmp[i, j] = max(-log(pois), 0) + + return logmp + + +def pad(args): + """ + %prog pad blastfile cdtfile --qbed q.pad.bed --sbed s.pad.bed + + Test and reconstruct candidate PADs. + """ + from jcvi.formats.cdt import CDT + + p = OptionParser(pad.__doc__) + p.set_beds() + p.add_argument( + "--cutoff", + default=0.3, + type=float, + help="The clustering cutoff to call similar", + ) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + cutoff = opts.cutoff + blastfile, cdtfile = args + qbed, sbed, qorder, sorder, is_self = check_beds(blastfile, p, opts) + + cdt = CDT(cdtfile) + qparts = list(cdt.iter_partitions(cutoff=cutoff)) + sparts = list(cdt.iter_partitions(cutoff=cutoff, gtr=False)) + + qid, sid = {}, {} + for i, part in enumerate(qparts): + qid.update(dict((x, i) for x in part)) + for i, part in enumerate(sparts): + sid.update(dict((x, i) for x in part)) + + # Without writing files, conversion from PAD to merged PAD is done in memory + for q in qbed: + q.seqid = qid[q.seqid] + for s in sbed: + s.seqid = sid[s.seqid] + + qnames = range(len(qparts)) + snames = range(len(sparts)) + + logmp = make_arrays(blastfile, qbed, sbed, qnames, snames) + m, n = logmp.shape + pvalue_cutoff = 1e-30 + cutoff = -log(pvalue_cutoff) + + significant = [] + for i in range(m): + for j in range(n): + score = logmp[i, j] + if score < cutoff: + continue + significant.append((qparts[i], sparts[j], score)) + + for a, b, score in significant: + print("|".join(a), "|".join(b), score) + + logger.debug( + "Collected {0} PAR comparisons significant at (P < {1}).".format( + len(significant), pvalue_cutoff + ) + ) + + return significant + + +def get_segments(ranges, extra, minsegment=40): + """ + Given a list of Range, perform chaining on the ranges and select a highest + scoring subset and cut based on their boundaries. Let's say the projection + of the synteny blocks onto one axis look like the following. + + 1=====10......20====30....35====~~ + + Then the segmentation will yield a block [1, 20), [20, 35), using an + arbitrary right extension rule. Extra are additional end breaks for + chromosomes. + """ + from jcvi.utils.range import range_chain, LEFT, RIGHT + + NUL = 2 + selected, score = range_chain(ranges) + + endpoints = [(x.start, NUL) for x in selected] + endpoints += [(x[0], LEFT) for x in extra] + endpoints += [(x[1], RIGHT) for x in extra] + endpoints.sort() + + current_left = 0 + for a, ai in endpoints: + + if ai == LEFT: + current_left = a + if ai == RIGHT: + yield current_left, a + elif ai == NUL: + if a - current_left < minsegment: + continue + yield current_left, a - 1 + current_left = a + + +def write_PAD_bed(bedfile, prefix, pads, bed): + + fw = open(bedfile, "w") + padnames = ["{0}:{1:05d}-{2:05d}".format(prefix, a, b) for a, b in pads] + for a, b in pairwise(padnames): + assert a != b, a + + j = 0 + # Assign all genes to new partitions + for i, x in enumerate(bed): + a, b = pads[j] + if i > b: + j += 1 + a, b = pads[j] + print("\t".join((padnames[j], str(i), str(i + 1), x.accn)), file=fw) + + fw.close() + + npads = len(pads) + logger.debug("{0} partition written in `{1}`.".format(npads, bedfile)) + return npads, padnames + + +def cluster(args): + """ + %prog cluster blastfile anchorfile --qbed qbedfile --sbed sbedfile + + Cluster the segments and form PAD. This is the method described in Tang et + al. (2010) PNAS paper. The anchorfile defines a list of synteny blocks, + based on which the genome on one or both axis can be chopped up into pieces + and clustered. + """ + from jcvi.utils.range import Range + + p = OptionParser(cluster.__doc__) + p.set_beds() + p.add_argument( + "--minsize", default=10, type=int, help="Only segment using blocks >= size" + ) + p.add_argument( + "--path", default="~/scratch/bin", help="Path to the CLUSTER 3.0 binary" + ) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + blastfile, anchorfile = args + qbed, sbed, qorder, sorder, is_self = check_beds(blastfile, p, opts) + + minsize = opts.minsize + ac = AnchorFile(anchorfile) + qranges, sranges = [], [] + qextra = [x[1:] for x in qbed.get_breaks()] + sextra = [x[1:] for x in sbed.get_breaks()] + + id = 0 + for block in ac.iter_blocks(minsize=minsize): + q, s = list(zip(*block))[:2] + q = [qorder[x][0] for x in q] + s = [sorder[x][0] for x in s] + minq, maxq = min(q), max(q) + mins, maxs = min(s), max(s) + id += 1 + + qr = Range("0", minq, maxq, maxq - minq, id) + sr = Range("0", mins, maxs, maxs - mins, id) + qranges.append(qr) + sranges.append(sr) + + qpads = list(get_segments(qranges, qextra)) + spads = list(get_segments(sranges, sextra)) + + suffix = ".pad.bed" + qpf = opts.qbed.split(".")[0] + spf = opts.sbed.split(".")[0] + qpadfile = qpf + suffix + spadfile = spf + suffix + qnpads, qpadnames = write_PAD_bed(qpadfile, qpf, qpads, qbed) + snpads, spadnames = write_PAD_bed(spadfile, spf, spads, sbed) + + qpadbed, spadbed = Bed(qpadfile), Bed(spadfile) + + logmp = make_arrays(blastfile, qpadbed, spadbed, qpadnames, spadnames) + m, n = logmp.shape + + matrixfile = ".".join((qpf, spf, "logmp.txt")) + fw = open(matrixfile, "w") + header = ["o"] + spadnames + print("\t".join(header), file=fw) + for i in range(m): + row = [qpadnames[i]] + ["{0:.1f}".format(x) for x in logmp[i, :]] + print("\t".join(row), file=fw) + + fw.close() + + # Run CLUSTER 3.0 (Pearson correlation, average linkage) + cmd = op.join(opts.path, "cluster") + cmd += " -g 2 -e 2 -m a -f {0}".format(matrixfile) + pf = matrixfile.rsplit(".", 1)[0] + cdtfile = pf + ".cdt" + if need_update(matrixfile, cdtfile): + sh(cmd) + + +if __name__ == "__main__": + main() diff --git a/jcvi/compara/pedigree.py b/jcvi/compara/pedigree.py new file mode 100644 index 00000000..94536fca --- /dev/null +++ b/jcvi/compara/pedigree.py @@ -0,0 +1,270 @@ +""" +Pedigree file manipulation. +""" + +import sys + +from collections import Counter +from dataclasses import dataclass +from random import sample +from typing import Dict, Optional + +import networkx as nx +import numpy as np + +from ..apps.base import OptionParser, ActionDispatcher, logger +from ..formats.base import BaseFile +from ..graphics.base import set3_n + + +@dataclass +class Sample: + """ + A sample in the pedigree file. + """ + + name: str + dad: Optional[str] + mom: Optional[str] + + @property + def is_terminal(self) -> bool: + """ + Return True if the sample is terminal. + """ + return self.dad is None and self.mom is None + + +@dataclass +class SampleInbreeding: + """ + Store inbreeding information for a sample. + """ + + name: str + mean_inbreeding: float + std_inbreeding: float + dosage: Dict[str, float] + + def __str__(self): + return f"{self.name}\t{self.mean_inbreeding:.4f}\t{self.std_inbreeding:.4f}" + + +class Pedigree(BaseFile, dict): + """ + Read a pedigree file and store the information. + """ + + def __init__(self, pedfile: str): + super().__init__(pedfile) + with open(self.filename, encoding="utf-8") as fp: + for row in fp: + row = row.strip() + if row[0] == "#": # header + continue + if not row: + continue + atoms = row.split() + _, name, dad, mom = atoms[:4] + dad = dad if dad != "0" else None + mom = mom if mom != "0" else None + s = Sample(name, dad, mom) + self[s.name] = s + self._check() + + def _check(self): + """ + # Check if all nodes are assigned, including the roots + """ + terminal_nodes = set() + for s in self: + dad, mom = self[s].dad, self[s].mom + if dad and dad not in self: + terminal_nodes.add(dad) + if mom and mom not in self: + terminal_nodes.add(mom) + for s in terminal_nodes: + logger.info("Adding %s to pedigree", s) + self[s] = Sample(s, None, None) + self.terminal_nodes = terminal_nodes + + def to_graph( + self, inbreeding_dict: Dict[str, SampleInbreeding], title: str = "" + ) -> nx.DiGraph: + """ + Convert the pedigree to a graph. + """ + graph_styles = {"labelloc": "b", "label": title, "splines": "curved"} + edge_styles = {"arrowhead": "none", "color": "lightslategray"} + G = nx.DiGraph(**graph_styles) + for s in self: + dad, mom = self[s].dad, self[s].mom + if dad: + G.add_edge(dad, s, **edge_styles) + if mom: + G.add_edge(mom, s, **edge_styles) + # Map colors to terminal nodes + terminal_nodes = [s for s in self if self[s].is_terminal] + colors = dict(zip(terminal_nodes, set3_n(len(terminal_nodes)))) + for s in self: + inb = inbreeding_dict[s] + label = s + if inb.mean_inbreeding > 0.01: + label += f"\n(F={inb.mean_inbreeding:.2f})" + dosage = inb.dosage + fillcolor = [f"{colors[k]};{v:.2f}" for k, v in dosage.items()] + fillcolor = ":".join(fillcolor) + # Hack to make the color appear on the wedge + if fillcolor.count(";") == 1: + fillcolor += ":white" + else: + fillcolor = fillcolor.rsplit(";", 1)[0] + node_styles = { + "color": "none", + "fillcolor": fillcolor, + "fixedsize": "true", + "fontname": "Helvetica", + "fontsize": "10", + "height": "0.6", + "label": label, + "shape": "circle", + "style": "wedged", + "width": "0.6", + } + for k, v in node_styles.items(): + G._node[s][k] = v + return G + + +class GenotypeCollection(dict): + """ + Store genotypes for each sample. + """ + + def add(self, s: str, ploidy: int): + """ + Add genotypes for a fixed sample (usually terminal). + """ + self[s] = [f"{s}_{i:02d}" for i in range(ploidy)] + + def cross(self, s: str, dad: str, mom: str, ploidy: int): + """ + Cross two samples to generate genotype for a new sample. + """ + dad_genotype = self[dad] + mom_genotype = self[mom] + gamete_ploidy = ploidy // 2 + dad_gamete = sample(dad_genotype, gamete_ploidy) + mom_gamete = sample(mom_genotype, gamete_ploidy) + sample_genotype = sorted(dad_gamete + mom_gamete) + self[s] = sample_genotype + + def inbreeding_coef(self, s: str) -> float: + """ + Calculate inbreeding coefficient for a sample. + + Traditional inbreeding coefficient (F) is a measure of the probability + that two alleles at a locus are identical by descent. This definition is + not applicable for polyploids. + + Here we use a simpler measure of inbreeding coefficient, which is the + proportion of alleles that are non-unique in a genotype. Or we should + really call it "Proportion inbred". + """ + genotype = self[s] + ploidy = len(genotype) + unique = len(set(genotype)) + return 1 - unique / ploidy + + def dosage(self, s: str) -> Counter: + """ + Calculate dosage for a sample. + """ + genotype = self[s] + return Counter(allele.rsplit("_", 1)[0] for allele in genotype) + + +def simulate_one_iteration(ped: Pedigree, ploidy: int) -> GenotypeCollection: + """ + Simulate one iteration of genotypes. + """ + genotypes = GenotypeCollection() + while len(genotypes) < len(ped): + for s in ped: + if ped[s].is_terminal: + genotypes.add(s, ploidy=ploidy) + else: + dad, mom = ped[s].dad, ped[s].mom + if dad not in genotypes or mom not in genotypes: + continue + genotypes.cross(s, dad, mom, ploidy=ploidy) + return genotypes + + +def calculate_inbreeding( + ped: Pedigree, + ploidy: int, + N: int, +) -> Dict[str, SampleInbreeding]: + """ + Wrapper to calculate inbreeding coefficients for a sample. + """ + logger.info("Simulating %d samples with ploidy=%d", N, ploidy) + all_collections = [] + for _ in range(N): + genotypes = simulate_one_iteration(ped, ploidy) + all_collections.append(genotypes) + + results = {} + for s in ped: + inbreeding_coefs = [ + genotypes.inbreeding_coef(s) for genotypes in all_collections + ] + dosages = [genotypes.dosage(s) for genotypes in all_collections] + dosage = sum(dosages, Counter()) + # normalize + dosage = {k: round(v / (ploidy * N), 3) for k, v in dosage.items()} + mean_inbreeding = float(np.mean(inbreeding_coefs)) + std_inbreeding = float(np.std(inbreeding_coefs)) + sample_inbreeding = SampleInbreeding(s, mean_inbreeding, std_inbreeding, dosage) + results[s] = sample_inbreeding + return results + + +def pedigree(args): + """ + %prog pedigree pedfile + + Plot pedigree and calculate pedigree coefficients from a pedigree file. + """ + p = OptionParser(pedigree.__doc__) + p.add_argument("--ploidy", default=2, type=int, help="Ploidy") + p.add_argument("--N", default=10000, type=int, help="Number of samples") + p.add_argument("--title", default="", help="Title of the graph") + opts, args, iopts = p.set_image_options(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (pedfile,) = args + ped = Pedigree(pedfile) + inb = calculate_inbreeding(ped, opts.ploidy, opts.N) + print("Sample\tProportion Inbreeding\tStd dev.") + for _, v in inb.items(): + print(v) + + G = ped.to_graph(inb, title=opts.title) + A = nx.nx_agraph.to_agraph(G) + image_file = f"{pedfile}.{iopts.format}" + A.draw(image_file, prog="dot") + logger.info("Pedigree graph written to `%s`", image_file) + + +def main(): + actions = (("pedigree", "Plot pedigree and calculate inbreeding coefficients"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +if __name__ == "__main__": + main() diff --git a/jcvi/compara/phylogeny.py b/jcvi/compara/phylogeny.py new file mode 100644 index 00000000..5233595f --- /dev/null +++ b/jcvi/compara/phylogeny.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# +# phylogeny.py +# compara +# +# Created by Haibao Tang on 05/21/20 +# Copyright © 2020 Haibao Tang. All rights reserved. +# +import csv +import sys +import os.path as op + +from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir +from ..formats.fasta import Fasta, SeqIO + + +def lcn(args): + """ + %prog lcn Orthogroups/Orthogroups.tsv Orthogroup_Sequences/ lcn/ + """ + p = OptionParser(lcn.__doc__) + p.add_argument( + "--min-single-ratio", default=0.9, help="Single copy ratio must be > " + ) + p.add_argument("--max-zero-ratio", default=0, help="Zero copy ratio must be < ") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + (groups_tsv, sequence_dir, lcn_dir) = args + selected = [] + # Read in the orthogroup definition and selected based on counts + with open(groups_tsv) as fp: + reader = csv.reader(fp, delimiter="\t") + header = next(reader, None) + species_names = header[1:] + for row in reader: + counts = [len(x.split(", ")) if x.strip() != "" else 0 for x in row[1:]] + single_ratio = sum([x == 1 for x in counts]) / len(counts) + zero_ratio = sum([x == 0 for x in counts]) / len(counts) + if single_ratio < opts.min_single_ratio: + continue + if zero_ratio > opts.max_zero_ratio: + continue + print(row[0], single_ratio, zero_ratio, counts, file=sys.stderr) + selected.append(row) + + logger.debug("A total of %d orthogroups selected", len(selected)) + + # Collect the FASTA sequences now + mkdir(lcn_dir) + for row in selected: + orthogroup = row[0] + orthogroup_fasta = "{}.fa".format(orthogroup) + input_fasta = op.join(sequence_dir, orthogroup_fasta) + fasta = Fasta(input_fasta) + selected_seqs = [] + for gene_names, species_name in zip(row[1:], species_names): + gene_names = gene_names.split(", ") + if len(gene_names) == 1: + (selected,) = gene_names + else: + max_length, selected = max((len(fasta[x]), x) for x in gene_names) + selected_seq = fasta[selected] + # Set gene name to species name so we can later combine them in supermatrix + selected_seq.id = species_name + selected_seq.name = species_name + selected_seq.description = "" + selected_seqs.append(selected_seq) + + output_fasta = op.join(lcn_dir, orthogroup_fasta) + with open(output_fasta, "w") as fw: + SeqIO.write(selected_seqs, fw, "fasta") + print( + "{}: {} => {} ({})".format( + orthogroup, len(fasta), len(selected_seqs), output_fasta + ), + file=sys.stderr, + ) + + +def main(): + actions = (("lcn", "collect low copy ortholog groups from OrthoFinder results"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +if __name__ == "__main__": + main() diff --git a/jcvi/compara/quota.py b/jcvi/compara/quota.py new file mode 100755 index 00000000..9e6356c3 --- /dev/null +++ b/jcvi/compara/quota.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Quota synteny alignment (QUOTA-ALIGN) + +%prog [options] anchorsfile --qbed=qbedfile --sbed=sbedfile + +This python program does the following: +1. merge 2D-overlapping blocks (now skipped, but existed in original version) +2. build constraints that represent 1D-overlap among blocks +3. feed the data into the linear programming solver + +The algorithm is described in Tang et al. BMC Bioinformatics 2011. +"Screening synteny blocks in pairwise genome comparisons through integer +programming." +""" + +import os.path as op +import sys + +from ..algorithms.lpsolve import MIPDataModel +from ..apps.base import OptionParser, logger +from ..compara.synteny import _score, check_beds +from ..formats.base import must_open + +from .base import AnchorFile + + +def get_1D_overlap(eclusters, depth=1): + """ + Find blocks that are 1D overlapping, + returns cliques of block ids that are in conflict + """ + overlap_set = set() + active = set() + + ends = [] + for i, (chr, left, right) in enumerate(eclusters): + ends.append((chr, left, 0, i)) # 0/1 for left/right-ness + ends.append((chr, right, 1, i)) + ends.sort() + + chr_last = "" + for chr, _, left_right, i in ends: + if chr != chr_last: + active.clear() + if left_right == 0: + active.add(i) + else: + active.remove(i) + + if len(active) > depth: + overlap_set.add(tuple(sorted(active))) + + chr_last = chr + + return overlap_set + + +def make_range(clusters, extend=0): + """ + Convert to interval ends from a list of anchors + extend modifies the xmax, ymax boundary of the box, + which can be positive or negative + very useful when we want to make the range as fuzzy as we specify + """ + eclusters = [] + for cluster in clusters: + xlist, ylist, _ = zip(*cluster) + score = _score(cluster) + + xchr, xmin = min(xlist) + xchr, xmax = max(xlist) + ychr, ymin = min(ylist) + ychr, ymax = max(ylist) + + # allow fuzziness to the boundary + xmax += extend + ymax += extend + # because extend can be negative values, we don't want it to be less than min + if xmax < xmin: + xmin, xmax = xmax, xmin + if ymax < ymin: + ymin, ymax = ymax, ymin + + eclusters.append(((xchr, xmin, xmax), (ychr, ymin, ymax), score)) + + return eclusters + + +def get_constraints(clusters, quota=(1, 1), Nmax=0): + """ + Check pairwise cluster comparison, if they overlap then mark edge as conflict + """ + qa, qb = quota + eclusters = make_range(clusters, extend=-Nmax) + nodes = [c[-1] for c in eclusters] + + eclusters_x, eclusters_y, _ = zip(*eclusters) + + # represents the contraints over x-axis and y-axis + constraints_x = get_1D_overlap(eclusters_x, qa) + constraints_y = get_1D_overlap(eclusters_y, qb) + + return nodes, constraints_x, constraints_y + + +def create_data_model(nodes, constraints_x, qa, constraints_y, qb): + """ + Maximize + 4 x1 + 2 x2 + 3 x3 + x4 + Subject To + x1 + x2 <= 1 + End + """ + num_vars = len(nodes) + obj_coeffs = nodes[:] + constraint_coeffs = [] + bounds = [] + for c in constraints_x: + constraint_coeffs.append({x: 1 for x in c}) + bounds.append(qa) + num_constraints = len(constraints_x) + + # non-self + if not (constraints_x is constraints_y): + for c in constraints_y: + constraint_coeffs.append({x: 1 for x in c}) + bounds.append(qb) + num_constraints += len(constraints_y) + + return MIPDataModel( + constraint_coeffs, bounds, obj_coeffs, num_vars, num_constraints + ) + + +def solve_lp( + clusters, + quota, + work_dir="work", + Nmax=0, + self_match=False, + verbose=False, +): + """ + Solve the formatted LP instance + """ + qb, qa = quota # flip it + nodes, constraints_x, constraints_y = get_constraints(clusters, (qa, qb), Nmax=Nmax) + + if self_match: + constraints_x = constraints_y = constraints_x | constraints_y + + data = create_data_model(nodes, constraints_x, qa, constraints_y, qb) + return data.solve(work_dir=work_dir, verbose=verbose) + + +def read_clusters(qa_file, qorder, sorder): + """Read in the clusters from anchors file + + Args: + qa_file (str): Path to input file + qorder (dict): Dictionary to find position of feature in query + sorder (dict): Dictionary to find position of feature in subject + + Returns: + List: List of matches and scores + """ + af = AnchorFile(qa_file) + blocks = af.blocks + clusters = [] + for block in blocks: + cluster = [] + for a, b, score in block: + ia, oa = qorder[a] + ib, ob = sorder[b] + ca, cb = oa.seqid, ob.seqid + cluster.append(((ca, ia), (cb, ib), score)) + clusters.append(cluster) + + return clusters + + +def main(args): + p = OptionParser(__doc__) + + p.set_beds() + p.add_argument( + "--quota", + default="1:1", + help="`quota mapping` procedure -- screen blocks to constrain mapping" + " (useful for orthology), " + "put in the format like (#subgenomes expected for genome X):" + "(#subgenomes expected for genome Y)", + ) + p.add_argument( + "--Nm", + dest="Nmax", + type=int, + default=10, + help="distance cutoff to tolerate two blocks that are " + "slightly overlapping (cutoff for `quota mapping`) " + "[default: %default units (gene or bp dist)]", + ) + + p.add_argument( + "--self", + dest="self_match", + action="store_true", + default=False, + help="you might turn this on when screening paralogous blocks, " + "esp. if you have reduced mirrored blocks into non-redundant set", + ) + p.set_verbose(help="Show verbose solver output") + + p.add_argument( + "--screen", + default=False, + action="store_true", + help="generate new anchors file", + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (qa_file,) = args + _, _, qorder, sorder, _ = check_beds(qa_file, p, opts) + + # sanity check for the quota + if opts.quota: + try: + qa, qb = opts.quota.split(":") + qa, qb = int(qa), int(qb) + except ValueError: + logger.error("quota string should be the form x:x (2:4, 1:3, etc.)") + sys.exit(1) + + if opts.self_match and qa != qb: + raise Exception( + "when comparing genome to itself, " + "quota must be the same number " + "(like 1:1, 2:2) you have %s" % opts.quota + ) + quota = (qa, qb) + + self_match = opts.self_match + + clusters = read_clusters(qa_file, qorder, sorder) + for cluster in clusters: + assert len(cluster) > 0 + + # below runs `quota mapping` + work_dir = op.join(op.dirname(op.abspath(qa_file)), "work") + + selected_ids = solve_lp( + clusters, + quota, + work_dir=work_dir, + Nmax=opts.Nmax, + self_match=self_match, + verbose=opts.verbose, + ) + + logger.debug("Selected %d blocks", len(selected_ids)) + prefix = qa_file.rsplit(".", 1)[0] + suffix = "{}x{}".format(qa, qb) + outfile = ".".join((prefix, suffix)) + fw = must_open(outfile, "w") + print(",".join(str(x) for x in selected_ids), file=fw) + fw.close() + logger.debug("Screened blocks ids written to `%s`", outfile) + + if opts.screen: + from jcvi.compara.synteny import screen + + new_qa_file = ".".join((prefix, suffix, "anchors")) + largs = [qa_file, new_qa_file, "--ids", outfile] + if opts.qbed and opts.sbed: + largs += ["--qbed={0}".format(opts.qbed)] + largs += ["--sbed={0}".format(opts.sbed)] + screen(largs) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/jcvi/compara/reconstruct.py b/jcvi/compara/reconstruct.py new file mode 100644 index 00000000..83a9c377 --- /dev/null +++ b/jcvi/compara/reconstruct.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +From synteny blocks, reconstruct ancestral order by interleaving the genes in +between the anchors. This is the bottom-up method used first in Bowers (2003), +and in Tang (2010), to reconstruct pre-alpha and pre-rho order, respectively. +""" +import sys + +from itertools import zip_longest +from math import sqrt +from more_itertools import pairwise + +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..formats.base import get_number +from ..formats.bed import Bed +from ..utils.grouper import Grouper + +from .base import AnchorFile +from .synteny import check_beds + + +def main(): + + actions = ( + ("collinear", "reduce synteny blocks to strictly collinear"), + ("zipbed", "build ancestral contig from collinear blocks"), + ("pairs", "convert anchorsfile to pairsfile"), + # Sankoff-Zheng reconstruction + ("adjgraph", "construct adjacency graph"), + # Experimental gene order graph for ancestral reconstruction + ("fuse", "fuse gene orders based on anchorsfile"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def add_bed_to_graph(G, bed, families): + for seqid, bs in bed.sub_beds(): + prev_node, prev_strand = None, "+" + for b in bs: + accn = b.accn + strand = b.strand + node = "=".join(families[accn]) + if prev_node: + G.add_edge(prev_node, node, prev_strand, strand) + prev_node, prev_strand = node, strand + + return G + + +def print_edges(bed, families): + """ + Instead of going through the graph construction, just print the edges. + """ + symbols = {"+": ">", "-": "<"} + for seqid, bs in bed.sub_beds(): + prev_node, prev_strand = None, "+" + for b in bs: + accn = b.accn + strand = b.strand + node = "=".join(families[accn]) + if prev_node: + print( + "{}{}--{}{}".format( + prev_node, symbols[prev_strand], symbols[strand], node + ) + ) + prev_node, prev_strand = node, strand + + +def fuse(args): + """ + %prog fuse *.bed *.anchors + + Fuse gene orders based on anchors file. + """ + from jcvi.algorithms.graph import BiGraph + + p = OptionParser(fuse.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + bedfiles = [x for x in args if x.endswith(".bed")] + anchorfiles = [x for x in args if x.endswith(".anchors")] + + # TODO: Use Markov clustering to sparsify the edges + families = Grouper() + for anchorfile in anchorfiles: + af = AnchorFile(anchorfile) + for a, b, block_id in af.iter_pairs(): + families.join(a, b) + + allowed = set(families.keys()) + logger.debug( + "Total families: {}, Gene members: {}".format(len(families), len(allowed)) + ) + + # TODO: Use C++ implementation of BiGraph() when available + # For now just serialize this to the disk + for bedfile in bedfiles: + bed = Bed(bedfile, include=allowed) + print_edges(bed, families) + + +def adjgraph(args): + """ + %prog adjgraph adjacency.txt subgraph.txt + + Construct adjacency graph for graphviz. The file may look like sample below. + The lines with numbers are chromosomes with gene order information. + + genome 0 + chr 0 + -1 -13 -16 3 4 -6126 -5 17 -6 7 18 5357 8 -5358 5359 -9 -10 -11 5362 5360 + chr 1 + 138 6133 -5387 144 -6132 -139 140 141 146 -147 6134 145 -170 -142 -143 + """ + import pygraphviz as pgv + + from jcvi.formats.base import SetFile + + p = OptionParser(adjgraph.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + infile, subgraph = args + subgraph = SetFile(subgraph) + subgraph = set(x.strip("-") for x in subgraph) + + G = pgv.AGraph(strict=False) # allow multi-edge + SG = pgv.AGraph(strict=False) + + palette = ("green", "magenta", "tomato", "peachpuff") + fp = open(infile) + genome_id = -1 + key = 0 + for row in fp: + if row.strip() == "": + continue + + atoms = row.split() + tag = atoms[0] + if tag in ("ChrNumber", "chr"): + continue + + if tag == "genome": + genome_id += 1 + gcolor = palette[genome_id] + continue + + nodeseq = [] + for p in atoms: + np = p.strip("-") + nodeL, nodeR = np + "L", np + "R" + if p[0] == "-": # negative strand + nodeseq += [nodeR, nodeL] + else: + nodeseq += [nodeL, nodeR] + + for a, b in pairwise(nodeseq): + G.add_edge(a, b, key, color=gcolor) + key += 1 + + na, nb = a[:-1], b[:-1] + if na not in subgraph and nb not in subgraph: + continue + + SG.add_edge(a, b, key, color=gcolor) + + G.graph_attr.update(dpi="300") + + fw = open("graph.dot", "w") + G.write(fw) + fw.close() + + fw = open("subgraph.dot", "w") + SG.write(fw) + fw.close() + + +def pairs(args): + """ + %prog pairs anchorsfile prefix + + Convert anchorsfile to pairsfile. + """ + p = OptionParser(pairs.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + anchorfile, prefix = args + outfile = prefix + ".pairs" + fw = open(outfile, "w") + + af = AnchorFile(anchorfile) + blocks = af.blocks + pad = len(str(len(blocks))) + npairs = 0 + for i, block in enumerate(blocks): + block_id = "{0}{1:0{2}d}".format(prefix, i + 1, pad) + lines = [] + for q, s, score in block: + npairs += 1 + score = score.replace("L", "") + lines.append("\t".join((q, s, score, block_id))) + print("\n".join(sorted(lines)), file=fw) + + fw.close() + logger.debug("A total of {0} pairs written to `{1}`.".format(npairs, outfile)) + + +def interleave_pairs(pairs): + a, b = pairs[0] + yield a + yield b + for c, d in pairs[1:]: + assert a < c + xx = range(a + 1, c) + yy = range(b + 1, d) if b < d else range(b - 1, d, -1) + for x, y in zip_longest(xx, yy): + if x: + yield x + if y: + yield y + a, b = c, d + yield a + yield b + + +def zipbed(args): + """ + %prog zipbed species.bed collinear.anchors + + Build ancestral contig from collinear blocks. For example, to build pre-rho + order, use `zipbed rice.bed rice.rice.1x1.collinear.anchors`. The algorithms + proceeds by interleaving the genes together. + """ + p = OptionParser(zipbed.__doc__) + p.add_argument("--prefix", default="b", help="Prefix for the new seqid") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, anchorfile = args + prefix = opts.prefix + bed = Bed(bedfile) + order = bed.order + newbedfile = prefix + ".bed" + fw = open(newbedfile, "w") + + af = AnchorFile(anchorfile) + blocks = af.blocks + pad = len(str(len(blocks))) + for i, block in enumerate(blocks): + block_id = "{0}{1:0{2}d}".format(prefix, i + 1, pad) + pairs = [] + for q, s, score in block: + qi, q = order[q] + si, s = order[s] + pairs.append((qi, si)) + newbed = list(interleave_pairs(pairs)) + for i, b in enumerate(newbed): + accn = bed[b].accn + print("\t".join(str(x) for x in (block_id, i, i + 1, accn)), file=fw) + + logger.debug("Reconstructed bedfile written to `{0}`.".format(newbedfile)) + + +# Non-linear transformation of anchor scores +def score_convert(x): + return int(sqrt(x)) + + +def get_collinear(block): + # block contains (gene a, gene b, score) + asc_score, asc_chain = print_chain(block) + desc_score, desc_chain = print_chain(block, ascending=False) + return asc_chain if asc_score > desc_score else desc_chain + + +def print_chain(block, ascending=True): + + scope = 50 # reduce search complexity + if not ascending: + block = [(a, -b, c) for (a, b, c) in block] + + block.sort() + bsize = len(block) + fromm = [-1] * bsize + scores = [score_convert(c) for (a, b, c) in block] + + for i, (a, b, c) in enumerate(block): + for j in range(i + 1, i + scope): + if j >= bsize: + break + + d, e, f = block[j] + + # Ensure strictly collinear + if d == a or b >= e: + continue + + this_score = scores[i] + score_convert(f) + if this_score > scores[j]: + fromm[j] = i + scores[j] = this_score + + scoresfromm = list(zip(scores, fromm)) + maxchain = max(scoresfromm) + chainscore, chainend = maxchain + solution = [scoresfromm.index(maxchain), chainend] + last = chainend + while True: + _last = fromm[last] + if _last == -1: + break + last = _last + solution.append(last) + + solution.reverse() + solution = [block[x] for x in solution] + if not ascending: + solution = [(a, -b, c) for (a, b, c) in solution] + return chainscore, solution + + +def collinear(args): + """ + %prog collinear a.b.anchors + + Reduce synteny blocks to strictly collinear, use dynamic programming in a + procedure similar to DAGchainer. + """ + p = OptionParser(collinear.__doc__) + p.set_beds() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (anchorfile,) = args + qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) + + af = AnchorFile(anchorfile) + newanchorfile = anchorfile.rsplit(".", 1)[0] + ".collinear.anchors" + fw = open(newanchorfile, "w") + + blocks = af.blocks + for block in blocks: + print("#" * 3, file=fw) + iblock = [] + for q, s, score in block: + qi, q = qorder[q] + si, s = sorder[s] + score = get_number(score) + iblock.append([qi, si, score]) + + block = get_collinear(iblock) + + for q, s, score in block: + q = qbed[q].accn + s = sbed[s].accn + print("\t".join((q, s, str(score))), file=fw) + + fw.close() + + +if __name__ == "__main__": + main() diff --git a/jcvi/compara/synfind.py b/jcvi/compara/synfind.py new file mode 100755 index 00000000..62112bb1 --- /dev/null +++ b/jcvi/compara/synfind.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog rice.sorghum.last --qbed=rice.bed --sbed=sorghum.bed + +Given a blast, we find the syntenic regions for every single gene. The +algorithm works by expanding the query gene to a window centered on the gene. A +single linkage algorithm follows that outputs the synteny block. + +The result looks like the following: +Os01g0698300 Sb03g032090 S 7 + +Os01g0698500 Sb03g032140 G 11 + + +The pairs (A, B) -- A is query, and then B is the syntenic region found. +G is "Gray gene", which means it does not have match to the region (fractionated +or inserted). In this case, a right flanker is used to represent the region. +S is "Syntelog", which means it has a match to the region. In this case, the match +itself is used to represent the region. The number in the 4th column is the +synteny score. For the same query, it is ordered with decreasing synteny score. +The last column means orientation. "+" is same direction. +""" +import os.path as op +import sqlite3 +import sys + +from bisect import bisect_left +from itertools import groupby, tee + +from ..algorithms.lis import ( + longest_increasing_subsequence, + longest_decreasing_subsequence, +) +from ..apps.base import OptionParser, logger +from ..formats.base import must_open +from ..utils.grouper import Grouper + +from .synteny import check_beds, read_blast + + +def transposed(data): + x, y = zip(*data) + return zip(y, x) + + +def get_flanker(group, query): + """ + >>> get_flanker([(370, 15184), (372, 15178), (373, 15176), (400, 15193)], 385) + ((373, 15176), (400, 15193), True) + + >>> get_flanker([(124, 13639), (137, 13625)], 138) + ((137, 13625), (137, 13625), False) + """ + group.sort() + pos = bisect_left(group, (query, 0)) + left_flanker = group[0] if pos == 0 else group[pos - 1] + right_flanker = group[-1] if pos == len(group) else group[pos] + # pick the closest flanker + if abs(query - left_flanker[0]) < abs(query - right_flanker[0]): + flanker, other = left_flanker, right_flanker + else: + flanker, other = right_flanker, left_flanker + + flanked = not (pos == 0 or pos == len(group) or flanker == query) + + return flanker, other, flanked + + +def find_synteny_region(query, sbed, data, window, cutoff, colinear=False): + """ + Get all synteny blocks for a query, algorithm is single linkage + anchors are a window centered on query + + Two categories of syntenic regions depending on what query is: + (Syntelog): syntenic region is denoted by the syntelog + (Gray gene): syntenic region is marked by the closest flanker + """ + regions = [] + ysorted = sorted(data, key=lambda x: x[1]) + g = Grouper() + + a, b = tee(ysorted) + next(b, None) + for ia, ib in zip(a, b): + pos1, pos2 = ia[1], ib[1] + if pos2 - pos1 < window and sbed[pos1].seqid == sbed[pos2].seqid: + g.join(ia, ib) + + for group in sorted(g): + (qflanker, syntelog), (far_flanker, far_syntelog), flanked = get_flanker( + group, query + ) + + # run a mini-dagchainer here, take the direction that gives us most anchors + if colinear: + y_indexed_group = [(y, i) for i, (x, y) in enumerate(group)] + lis = longest_increasing_subsequence(y_indexed_group) + lds = longest_decreasing_subsequence(y_indexed_group) + + if len(lis) >= len(lds): + track = lis + orientation = "+" + else: + track = lds + orientation = "-" + + group = [group[i] for (y, i) in track] + + xpos, ypos = zip(*group) + score = min(len(set(xpos)), len(set(ypos))) + + if qflanker == query: + gray = "S" + else: + gray = "G" if not flanked else "F" + score -= 1 # slight penalty for not finding syntelog + + if score < cutoff: + continue + + # y-boundary of the block + left, right = group[0][1], group[-1][1] + # this characterizes a syntenic region (left, right). + # syntelog is -1 if it's a gray gene + syn_region = (syntelog, far_syntelog, left, right, gray, orientation, score) + regions.append(syn_region) + + return sorted(regions, key=lambda x: -x[-1]) # decreasing synteny score + + +def batch_query(qbed, sbed, all_data, opts, fw=None, c=None, transpose=False): + + cutoff = int(opts.cutoff * opts.window) + window = opts.window / 2 + colinear = opts.scoring == "collinear" + qnote, snote = opts.qnote, opts.snote + if qnote == "null" or snote == "null": + qnote = op.basename(qbed.filename).split(".")[0] + snote = op.basename(sbed.filename).split(".")[0] + + # process all genes present in the bed file + if transpose: + all_data = transposed(all_data) + qbed, sbed = sbed, qbed + qnote, snote = snote, qnote + + all_data.sort() + + def simple_bed(x): + return sbed[x].seqid, sbed[x].start + + qsimplebed = qbed.simple_bed + + for seqid, ranks in groupby(qsimplebed, key=lambda x: x[0]): + ranks = [x[1] for x in ranks] + for r in ranks: + rmin = max(r - window, ranks[0]) + rmax = min(r + window + 1, ranks[-1]) + rmin_pos = bisect_left(all_data, (rmin, 0)) + rmax_pos = bisect_left(all_data, (rmax, 0)) + data = all_data[rmin_pos:rmax_pos] + regions = find_synteny_region( + r, sbed, data, window, cutoff, colinear=colinear + ) + for ( + syntelog, + far_syntelog, + left, + right, + gray, + orientation, + score, + ) in regions: + query = qbed[r].accn + + left_chr, left_pos = simple_bed(left) + right_chr, right_pos = simple_bed(right) + + anchor = sbed[syntelog].accn + anchor_chr, anchor_pos = simple_bed(syntelog) + # below is useful for generating the syntenic region in the coge url + left_dist = abs(anchor_pos - left_pos) if anchor_chr == left_chr else 0 + right_dist = ( + abs(anchor_pos - right_pos) if anchor_chr == right_chr else 0 + ) + flank_dist = (max(left_dist, right_dist) / 10000 + 1) * 10000 + + far_syntelog = sbed[far_syntelog].accn + + data = [ + query, + anchor, + gray, + score, + flank_dist, + orientation, + far_syntelog, + ] + pdata = data[:6] + [qnote, snote] + if fw: + print("\t".join(str(x) for x in pdata), file=fw) + continue + c.execute("insert into synteny values (?,?,?,?,?,?,?,?)", pdata) + + +def main(blastfile, p, opts): + + sqlite = opts.sqlite + qbed, sbed, qorder, sorder, is_self = check_beds(blastfile, p, opts) + filtered_blast = read_blast( + blastfile, qorder, sorder, is_self=is_self, ostrip=opts.strip_names + ) + all_data = [(b.qi, b.si) for b in filtered_blast] + + c = None + if sqlite: + conn = sqlite3.connect(sqlite) + c = conn.cursor() + c.execute("drop table if exists synteny") + c.execute( + "create table synteny (query text, anchor text, " + "gray varchar(1), score integer, dr integer, " + "orientation varchar(1), qnote text, snote text)" + ) + fw = None + else: + fw = must_open(opts.outfile, "w") + + batch_query(qbed, sbed, all_data, opts, fw=fw, c=c, transpose=False) + if qbed.filename == sbed.filename: + logger.debug("Self comparisons, mirror ignored") + else: + batch_query(qbed, sbed, all_data, opts, fw=fw, c=c, transpose=True) + + if sqlite: + c.execute("create index q on synteny (query)") + conn.commit() + c.close() + else: + fw.close() + + +if __name__ == "__main__": + + p = OptionParser(__doc__) + p.set_beds() + p.set_stripnames() + p.set_outfile() + + coge_group = p.add_argument_group("CoGe-specific options") + coge_group.add_argument("--sqlite", help="Write sqlite database") + coge_group.add_argument("--qnote", default="null", help="Query dataset group id") + coge_group.add_argument("--snote", default="null", help="Subject dataset group id") + + params_group = p.add_argument_group("Synteny parameters") + params_group.add_argument( + "--window", type=int, default=40, help="Synteny window size" + ) + params_group.add_argument( + "--cutoff", + type=float, + default=0.1, + help="Minimum number of anchors to call synteny", + ) + supported_scoring = ("collinear", "density") + params_group.add_argument( + "--scoring", + choices=supported_scoring, + default="collinear", + help="Scoring scheme", + ) + + opts, args = p.parse_args() + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + main(blastfile, p, opts) diff --git a/jcvi/compara/synteny.py b/jcvi/compara/synteny.py new file mode 100755 index 00000000..7059d93f --- /dev/null +++ b/jcvi/compara/synteny.py @@ -0,0 +1,1883 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +"""Syntenty inference in comparative genomics +""" + +import os.path as op +import sys + +from collections import defaultdict +from collections.abc import Iterable + +import numpy as np + +from ..algorithms.lis import heaviest_increasing_subsequence as his +from ..apps.base import ActionDispatcher, OptionParser, cleanup, logger +from ..formats.base import BaseFile, SetFile, read_block, must_open +from ..formats.bed import Bed, BedLine +from ..formats.blast import Blast +from ..utils.cbook import gene_name, human_size +from ..utils.grouper import Grouper +from ..utils.range import range_chain + +from .base import AnchorFile + + +class BlockFile(BaseFile): + """Parse .blocks file which is the mcscan output with multiple columns as 'tracks'""" + + def __init__(self, filename, defaultcolor="#fb8072", header=False): + super().__init__(filename) + fp = must_open(filename) + hd = next(fp).rstrip().split("\t") + ncols = len(hd) + if header: + self.header = hd + else: + fp.seek(0) + self.header = range(ncols) + + data = [] + highlight = [] + for row in fp: + hl = "*" in row + # r* highlights the block in red color + if hl: + hl, row = row.split("*", 1) + hl = hl or defaultcolor + atoms = row.rstrip().split("\t") + atoms = [x.strip() for x in atoms] + atoms = ["." if x == "" else x for x in atoms] + if len(atoms) > ncols: + atoms = atoms[:ncols] + elif len(atoms) < ncols: + atoms = atoms + ["."] * (ncols - len(atoms)) + data.append(atoms) + highlight.append(hl) + + self.data = data + self.highlight = highlight + self.columns = list(zip(*data)) + self.ncols = ncols + + def get_extent(self, i, order, debug=True): + # Some blocks file, such as ones manually edited, will have garbled + # order, which prompts the hack below + acol = [order[x][0] for x in self.columns[0] if x in order] + bcol = [order[x][0] for x in self.columns[i] if x in order] + elen = min(len(acol), len(bcol)) + ia, ib = acol[:elen], bcol[:elen] + orientation = get_orientation(ia, ib) + + ocol = [order[x] for x in self.columns[i] if x in order] + # orientation = '+' if ocol[0][0] <= ocol[-1][0] else '-' + si, start = min(ocol) + ei, end = max(ocol) + same_chr = start.seqid == end.seqid + chr = start.seqid if same_chr else None + ngenes = ei - si + 1 + if debug: + r = "{0}:{1}-{2}".format(chr, start.start, end.end) + print( + "Column {0}: {1} - {2} ({3})".format(i, start.accn, end.accn, r), + file=sys.stderr, + ) + print( + " {0} .. {1} ({2}) features .. {3}".format( + chr, ngenes, len(ocol), orientation + ), + file=sys.stderr, + ) + + span = abs(start.start - end.end) + + return start, end, si, ei, chr, orientation, span + + def iter_pairs(self, i, j, highlight=False): + for h, d in zip(self.highlight, self.data): + if highlight and not h: + continue + + a, b = d[i], d[j] + if "." in (a, b) or "" in (a, b): + continue + + yield a, b, h + + def iter_all_pairs(self): + ncols = self.ncols + for i in range(ncols): + for j in range(i + 1, ncols): + for a, b, h in self.iter_pairs(i, j): + yield a, b, h + + def iter_gene_col(self): + for hd, col in zip(self.header, self.columns): + for g in col: + if g not in (".", ""): + yield g, hd + + def query_gene(self, gene, color=None, invert=False): + """ + Used in mcscanq() for query + """ + qi = self.columns[0].index(gene) + ndata = len(self.data) + for col in self.columns[1:]: + upstream_dist = downstream_dist = 1000 + # search upstream + for i in range(qi - 1, -1, -1): + if col[i] not in (".", ""): + upstream = col[i] + upstream_dist = qi - i + break + # search downstream + for i in range(qi, ndata): + if col[i] not in (".", ""): + downstream = col[i] + downstream_dist = i - qi + break + closest = upstream if upstream_dist < downstream_dist else downstream + # output in .simple format + if invert: + line = "\t".join(str(x) for x in (closest, closest, gene, gene, 0, "+")) + else: + line = "\t".join(str(x) for x in (gene, gene, closest, closest, 0, "+")) + if color is not None: + line = color + "*" + line + yield line + + def grouper(self) -> Grouper: + """Build orthogroup based on the gene matches.""" + grouper = Grouper() + for row in self.data: + if "." not in row: + grouper.join(*row) + logger.debug("A total of %d orthogroups formed", len(grouper)) + return grouper + + +class SimpleFile(object): + def __init__(self, simplefile, defaultcolor="#fb8072", order=None): + # Sometimes the simplefile has query and subject wrong + fp = open(simplefile) + self.blocks = [] + check = False + for row in fp: + if row[:2] == "##" or row.startswith("StartGeneA"): + continue + hl = "*" in row + if hl: + hl, row = row.split("*", 1) + hl = hl or defaultcolor + a, b, c, d, score, orientation = row.split() + if order and a not in order: + if c not in order: + check = True + print( + """{} {} {} {} can not found in bed files.""".format( + a, b, c, d + ), + file=sys.stderr, + ) + else: + a, b, c, d = c, d, a, b + if orientation == "-": + c, d = d, c + score = int(score) + self.blocks.append((a, b, c, d, score, orientation, hl)) + if check: + print( + "Error: some genes in blocks can't be found, please rerun after making sure that bed file agree with simple file.", + file=sys.stderr, + ) + exit(1) + + +def _score(cluster): + """ + score of the cluster, in this case, is the number of non-repetitive matches + """ + x, y = list(zip(*cluster))[:2] + return min(len(set(x)), len(set(y))) + + +def get_orientation(ia, ib): + """Infer the orientation of a pairwise block. + + Args: + ia (List[int]): List a + ib (List[int]): List b + + Returns: + str: plus (+) or minus (-) + """ + if len(ia) != len(ib) or len(ia) < 2: + return "+" # Just return a default orientation + + slope, _ = np.polyfit(ia, ib, 1) + return "+" if slope >= 0 else "-" + + +def group_hits(blasts): + if not blasts: + return {"": []} + + # Already in the form of (qi, si, score) + if isinstance(blasts[0], Iterable) and len(blasts[0]) == 3: + return {"": blasts} + + # grouping the hits based on chromosome pair + all_hits = defaultdict(list) + for b in blasts: + all_hits[(b.qseqid, b.sseqid)].append((b.qi, b.si, b.score)) + + return all_hits + + +def read_blast(blast_file, qorder, sorder, is_self=False, ostrip=True): + """Read the blast and convert name into coordinates""" + filtered_blast = [] + seen = set() + bl = Blast(blast_file) + for b in bl: + query, subject = b.query, b.subject + if is_self and query == subject: + continue + if ostrip: + query, subject = gene_name(query), gene_name(subject) + if query not in qorder or subject not in sorder: + continue + + qi, q = qorder[query] + si, s = sorder[subject] + + if is_self: + # remove redundant a<->b to one side when doing self-self BLAST + if qi > si: + query, subject = subject, query + qi, si = si, qi + q, s = s, q + # Too close to diagonal! possible tandem repeats + if q.seqid == s.seqid and si - qi < 40: + continue + + key = query, subject + if key in seen: + continue + seen.add(key) + + b.qseqid, b.sseqid = q.seqid, s.seqid + b.qi, b.si = qi, si + b.query, b.subject = query, subject + + filtered_blast.append(b) + + logger.debug( + "A total of %d BLAST imported from `%s`.", len(filtered_blast), blast_file + ) + + return filtered_blast + + +def read_anchors(ac, qorder, sorder, minsize=0): + """ + anchors file are just (geneA, geneB) pairs (with possible deflines) + """ + all_anchors = defaultdict(list) + nanchors = 0 + anchor_to_block = {} + + for a, b, idx in ac.iter_pairs(minsize=minsize): + if a not in qorder or b not in sorder: + continue + qi, q = qorder[a] + si, s = sorder[b] + pair = (qi, si) + + all_anchors[(q.seqid, s.seqid)].append(pair) + anchor_to_block[pair] = idx + nanchors += 1 + + logger.debug("A total of {0} anchors imported.".format(nanchors)) + assert nanchors == len(anchor_to_block) + + return all_anchors, anchor_to_block + + +def synteny_scan(points, xdist, ydist, N, is_self=False, intrabound=300): + """ + This is the core single linkage algorithm which behaves in O(n): + iterate through the pairs, foreach pair we look back on the + adjacent pairs to find links + """ + clusters = Grouper() + n = len(points) + points.sort() + for i in range(n): + for j in range(i - 1, -1, -1): + # x-axis distance + del_x = points[i][0] - points[j][0] + if del_x > xdist: + break + # y-axis distance + del_y = points[i][1] - points[j][1] + if abs(del_y) > ydist: + continue + # In self-comparison, ignore the anchors that are too close to the diagonal + if is_self: + intradist = min( + abs(points[i][0] - points[i][1]), abs(points[j][0] - points[j][1]) + ) + if intradist < intrabound: + continue + # otherwise join + clusters.join(points[i], points[j]) + + # select clusters that are at least >=N + clusters = [sorted(cluster) for cluster in list(clusters) if _score(cluster) >= N] + + return clusters + + +def batch_scan(points, xdist=20, ydist=20, N=5, is_self=False, intrabound=300): + """ + runs synteny_scan() per chromosome pair + """ + chr_pair_points = group_hits(points) + + clusters = [] + for chr_pair in sorted(chr_pair_points.keys()): + points = chr_pair_points[chr_pair] + clusters.extend( + synteny_scan( + points, xdist, ydist, N, is_self=is_self, intrabound=intrabound + ) + ) + + return clusters + + +def synteny_liftover(points, anchors, dist): + """ + This is to get the nearest anchors for all the points (useful for the + `liftover` operation below). + """ + from scipy.spatial import cKDTree + + points = np.array(points, dtype=int) + ppoints = points[:, :2] if points.shape[1] > 2 else points + tree = cKDTree(anchors, leafsize=16) + dists, idxs = tree.query(ppoints, p=1, distance_upper_bound=dist) + + for point, dist, idx in zip(points, dists, idxs): + if idx == tree.n: # nearest is out of range + continue + if dist == 0: # already in anchors + continue + + yield point, tuple(anchors[idx]) + + +def get_bed_filenames(hintfile, p, opts): + wd, hintfile = op.split(hintfile) + if not (opts.qbed and opts.sbed): + try: + q, s = hintfile.split(".", 2)[:2] + opts.qbed = op.join(wd, q + ".bed") + opts.sbed = op.join(wd, s + ".bed") + logger.debug("Assuming --qbed={0} --sbed={1}".format(opts.qbed, opts.sbed)) + except: + print("Options --qbed and --sbed are required", file=sys.stderr) + sys.exit(not p.print_help()) + + return opts.qbed, opts.sbed + + +def check_beds(hintfile, p, opts, sorted=True): + qbed_file, sbed_file = get_bed_filenames(hintfile, p, opts) + # is this a self-self blast? + is_self = qbed_file == sbed_file + if is_self: + logger.debug("Looks like self-self comparison.") + + qbed = Bed(opts.qbed, sorted=sorted) + sbed = Bed(opts.sbed, sorted=sorted) + qorder = qbed.order + sorder = sbed.order + + return qbed, sbed, qorder, sorder, is_self + + +def add_arguments(p, args, dist=10): + """ + scan and liftover has similar interfaces, so share common options + returns opts, files + """ + p.set_beds() + p.add_argument( + "--dist", default=dist, type=int, help="Extent of flanking regions to search" + ) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + blast_file, anchor_file = args + + return blast_file, anchor_file, opts.dist, opts + + +def main(): + + actions = ( + ("scan", "get anchor list using single-linkage algorithm"), + ("summary", "provide statistics for pairwise blocks"), + ("liftover", "given anchor list, pull adjacent pairs from blast file"), + # Multiple synteny blocks inference + ("mcscan", "stack synteny blocks on a reference bed"), + ("mcscanq", "query multiple synteny blocks"), + # Assemble multiple synteny blocks + ("query", "collect matching region based on the query region"), + ("assemble", "build blocks from regions defined by start and end"), + # Filter synteny blocks + ("screen", "extract subset of blocks from anchorfile"), + ("simple", "convert anchorfile to simple block descriptions"), + ("stats", "provide statistics for mscan blocks"), + ("depth", "calculate the depths in the two genomes in comparison"), + ("breakpoint", "identify breakpoints where collinearity ends"), + ("matrix", "make oxford grid based on anchors file"), + ("coge", "convert CoGe file to anchors file"), + ("spa", "convert chr ordering from SPA to simple lists"), + ("layout", "compute layout based on .simple file"), + ("rebuild", "rebuild anchors file from prebuilt blocks file"), + # Formatting + ("fromaligns", "convert aligns file to anchors file"), + ("toaligns", "convert anchors file to aligns file"), + ) + + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def get_region_size(region, bed, order): + """Get a summary of a syntenic region, how many anchors it has and + how many genes it spans. + + Args: + region (List[str]): List of gene ids + order (Dict[str, BedLine]): Bed order to retrieve the positions + + Returns: + Tuple of three strs and two ints, start / end gene / seqid of the + region and total anchor counts and the span (number of genes) + """ + ris = [order[x] for x in region] + min_ri, min_r = min(ris) + max_ri, max_r = max(ris) + anchor_count = len(region) + span = max_ri - min_ri + 1 + min_seqid = min_r.seqid + max_seqid = max_r.seqid + assert min_seqid == max_seqid, "SeqId do not match, region invalid" + return min_r.accn, max_r.accn, min_seqid, span, anchor_count + + +def query(args): + """ + %prog query anchorsfile startGeneId endGeneId + + Collect matching region based on query region as given by startGeneId to + endGeneId. This can be considered a local version of mcscan(). The bedfile + must contain the range from startGeneId to endGeneId. + + Typical pipeline is to extract a set of pairwise syntenic regions to the + selected region of interest and then assemble them into .blocks file for + plotting purposes. + """ + p = OptionParser(query.__doc__) + p.set_beds() + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + anchorsfile, start_gene_id, end_gene_id = args + qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts) + + # Guess which is qbed, which is sbed + if start_gene_id in sorder: # flip query and subject + qbed, sbed = sbed, qbed + qorder, sorder = sorder, qorder + + ac = AnchorFile(anchorsfile) + blocks = ac.blocks + si, s = qorder[start_gene_id] + ei, e = qorder[end_gene_id] + target_region = qbed[si : ei + 1] + target_genes = set(x.accn for x in target_region) + + # Go through all the blocks and pick out all matching regions + regions = [] + for block in blocks: + matching_region = set() + for a, b, score in block: + if not (a in target_genes or b in target_genes): + continue + if a in target_genes: + matching_region.add(b) + else: + matching_region.add(a) + if len(matching_region) < 2: + continue + # Print a summary of the matching region + regions.append(get_region_size(matching_region, sbed, sorder)) + + for min_accn, max_accn, seqid, span, anchor_count in sorted( + regions, key=lambda x: (-x[-1], -x[-2]) # Sort by (anchor_count, span) DESC + ): + print( + "{} {} ({}): span {}, anchors {}".format( + min_accn, max_accn, seqid, span, anchor_count + ) + ) + + +def assemble(args): + """ + %prog assemble regionsfile all.bed all.cds + + Assemble blocks file based on regions file. Regions file may look like: + + amborella evm_27.model.AmTr_v1.0_scaffold00004.87 evm_27.model.AmTr_v1.0_scaffold00004.204 + apostasia Ash010455 Ash010479 (fragScaff_scaffold_5) + apostasia Ash018328 Ash018367 (original_scaffold_2912) + apostasia Ash007533 Ash007562 (fragScaff_scaffold_132) + apostasia Ash002281 Ash002299 (fragScaff_scaffold_86) + + Where each line lists a region, starting with the species name (species.bed + must be present in the current directory). Followed by start and end gene. + Contents after the 3rd field (end gene) are ignored. Using the example + above, the final .blocks file will contain 5 columns, one column for each line. + """ + import shutil + from tempfile import mkdtemp, mkstemp + + from jcvi.apps.align import last + from jcvi.formats.fasta import some + + p = OptionParser(assemble.__doc__) + p.add_argument( + "--no_strip_names", + default=False, + action="store_true", + help="Do not strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + strip_names = not opts.no_strip_names + regionsfile, bedfile, cdsfile = args + species_beds = {} + column_genes = [] + pivot = None + with open(regionsfile) as fp: + for row in fp: + species, start, end = row.split()[:3] + if pivot is None: + pivot = species + if species not in species_beds: + species_beds[species] = Bed(species + ".bed") + bed = species_beds[species] + order = bed.order + si, s = order[start] + ei, e = order[end] + genes = set(x.accn for x in bed[si : ei + 1]) + column_genes.append(genes) + + # Write gene ids + workdir = mkdtemp() + fd, idsfile = mkstemp(dir=workdir) + with open(idsfile, "w") as fw: + for genes in column_genes: + print(" ".join(genes), file=fw) + + logger.debug("Gene ids written to `{}`".format(idsfile)) + + # Extract FASTA + fd, fastafile = mkstemp(dir=workdir) + some_args = [cdsfile, idsfile, fastafile] + if not strip_names: + some_args += ["--no_strip_names"] + some(some_args) + + # Perform self-comparison and collect all pairs + last_output = last([fastafile, fastafile, "--outdir", workdir]) + blast = Blast(last_output) + pairs = set() + for b in blast: + query, subject = b.query, b.subject + if strip_names: + query, subject = gene_name(query), gene_name(subject) + pairs.add((query, subject)) + logger.debug("Extracted {} gene pairs from `{}`".format(len(pairs), last_output)) + + # Sort the pairs into columns + N = len(column_genes) + all_slots = [] + for i in range(N): + for j in range(i + 1, N): + genes_i = column_genes[i] + genes_j = column_genes[j] + for a, b in pairs: + if not (a in genes_i and b in genes_j): + continue + slots = ["."] * N + slots[i] = a + slots[j] = b + all_slots.append(slots) + + # Compress the pairwise results and merge when possible + # TODO: This is currently not optimized and inefficient + def is_compatible(slots1, slots2): + # At least intersects for one gene + assert len(slots1) == len(slots2) + flag = False + for a, b in zip(slots1, slots2): + if "." in (a, b): + continue + if a == b: + flag = True + else: + return False + return flag + + def merge(slots, processed): + for i, a in enumerate(slots): + if processed[i] == "." and a != ".": + processed[i] = a + + processed_slots = [] + all_slots.sort() + for slots in all_slots: + merged = False + for processed in processed_slots: + if is_compatible(slots, processed): + merge(slots, processed) # Merge into that line + merged = True + break + if not merged: # New information + processed_slots.append(slots) + + logger.debug( + "Before compression: {}, After compression: {}".format( + len(all_slots), len(processed_slots) + ) + ) + + pivot_order = species_beds[pivot].order + pivot_max = len(species_beds[pivot]) + pivot_sort_key = lambda x: pivot_order[x[0]][0] if x[0] != "." else pivot_max + processed_slots.sort(key=pivot_sort_key) + + with must_open(opts.outfile, "w") as fw: + for slots in processed_slots: + print("\t".join(slots), file=fw) + + # Cleanup + cleanup(workdir) + + +def colinear_evaluate_weights(tour, data): + tour = dict((s, i) for i, s in enumerate(tour)) + data = [(tour[x], score) for x, score in data if x in tour] + return (his(data)[-1],) + + +def layout(args): + """ + %prog layout query.subject.simple query.seqids subject.seqids + + Compute optimal seqids order in a second genome, based on seqids on one + genome, given the pairwise blocks in .simple format. + """ + from jcvi.algorithms.ec import GA_setup, GA_run + + p = OptionParser(layout.__doc__) + p.set_beds() + p.set_cpus(cpus=32) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + simplefile, qseqids, sseqids = args + qbed, sbed, qorder, sorder, is_self = check_beds(simplefile, p, opts) + + qseqids = qseqids.strip().split(",") + sseqids = sseqids.strip().split(",") + qseqids_ii = dict((s, i) for i, s in enumerate(qseqids)) + sseqids_ii = dict((s, i) for i, s in enumerate(sseqids)) + + blocks = SimpleFile(simplefile).blocks + scores = defaultdict(int) + for a, b, c, d, score, orientation, hl in blocks: + qi, q = qorder[a] + si, s = sorder[c] + qseqid, sseqid = q.seqid, s.seqid + if sseqid not in sseqids: + continue + scores[sseqids_ii[sseqid], qseqid] += score + + data = [] + for (a, b), score in sorted(scores.items()): + if b not in qseqids_ii: + continue + data.append((qseqids_ii[b], score)) + + tour = range(len(qseqids)) + toolbox = GA_setup(tour) + toolbox.register("evaluate", colinear_evaluate_weights, data=data) + tour, fitness = GA_run(toolbox, ngen=100, npop=100, cpus=opts.cpus) + tour = [qseqids[x] for x in tour] + + print(",".join(tour)) + + +def fromaligns(args): + """ + %prog fromaligns out.aligns + + Convert aligns file (old MCscan output) to anchors file. + """ + p = OptionParser(fromaligns.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (alignsfile,) = args + fp = must_open(alignsfile) + fw = must_open(opts.outfile, "w") + for row in fp: + if row.startswith("## Alignment"): + print("###", file=fw) + continue + if row[0] == "#" or not row.strip(): + continue + atoms = row.split(":")[-1].split() + print("\t".join(atoms[:2]), file=fw) + fw.close() + + +def toaligns(args): + """ + %prog fromaligns input.anchors + + Convert anchors file to tab-separated aligns file, adding the first column + with the Block ID. + """ + p = OptionParser(toaligns.__doc__) + p.add_argument("--prefix", default="b", help="Prefix to the block id") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (anchorfile,) = args + ac = AnchorFile(anchorfile) + logger.debug("A total of {} blocks imported".format(len(ac.blocks))) + max_block_id_len = len(str(len(ac.blocks) - 1)) + header = "\t".join(("#Block ID", "Gene 1", "Gene 2")) + + with must_open(opts.outfile, "w") as fw: + print(header, file=fw) + for a, b, block_id in ac.iter_pairs(): + block_id = "{}{:0{}d}".format(opts.prefix, block_id, max_block_id_len) + print("\t".join((block_id, a, b)), file=fw) + + +def mcscanq(args): + """ + %prog mcscanq query.ids blocksfile + + Query multiple synteny blocks to get the closest alignment feature. Mostly + used for 'highlighting' the lines in the synteny plot, drawn by + graphics.karyotype and graphics.synteny. + """ + p = OptionParser(mcscanq.__doc__) + p.add_argument("--color", help="Add color highlight, used in plotting") + p.add_argument( + "--invert", default=False, action="store_true", help="Invert query and subject" + ) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + qids, blocksfile = args + b = BlockFile(blocksfile) + fp = open(qids) + for gene in fp: + gene = gene.strip() + for line in b.query_gene(gene, color=opts.color, invert=opts.invert): + print(line) + + +def spa(args): + """ + %prog spa spafiles + + Convert chromosome ordering from SPA to simple lists. First column is the + reference order. + """ + from jcvi.algorithms.graph import merge_paths + from jcvi.utils.cbook import uniqify + + p = OptionParser(spa.__doc__) + p.add_argument( + "--unmapped", + default=False, + action="store_true", + help="Include unmapped scaffolds in the list", + ) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + spafiles = args + paths = [] + mappings = [] + missings = [] + for spafile in spafiles: + fp = open(spafile) + path = [] + mapping = [] + missing = [] + for row in fp: + if row[0] == "#" or not row.strip(): + continue + + atoms = row.rstrip().split("\t") + if len(atoms) == 2: + a, c2 = atoms + assert a == "unmapped" + missing.append(c2) + continue + + c1, c2, orientation = atoms + path.append(c1) + mapping.append(c2) + + paths.append(uniqify(path)) + mappings.append(mapping) + missings.append(missing) + + ref = merge_paths(paths) + print("ref", len(ref), ",".join(ref)) + for spafile, mapping, missing in zip(spafiles, mappings, missings): + mapping = [x for x in mapping if "random" not in x] + mapping = uniqify(mapping) + if len(mapping) < 50 and opts.unmapped: + mapping = uniqify(mapping + missing) + + print(spafile, len(mapping), ",".join(mapping)) + + +def rebuild(args): + """ + %prog rebuild blocksfile blastfile + + Rebuild anchors file from pre-built blocks file. + """ + p = OptionParser(rebuild.__doc__) + p.add_argument( + "--header", default=False, action="store_true", help="First line is header" + ) + p.add_argument( + "--write_blast", + default=False, + action="store_true", + help="Get blast records of rebuilt anchors", + ) + p.set_beds() + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + blocksfile, blastfile = args + bk = BlockFile(blocksfile, header=opts.header) + fw = open("pairs", "w") + for a, b, h in bk.iter_all_pairs(): + print("\t".join((a, b)), file=fw) + fw.close() + + if opts.write_blast: + AnchorFile("pairs").blast(blastfile, "pairs.blast") + + fw = open("tracks", "w") + for g, col in bk.iter_gene_col(): + print("\t".join(str(x) for x in (g, col)), file=fw) + fw.close() + + +def coge(args): + """ + %prog coge cogefile + + Convert CoGe file to anchors file. + """ + p = OptionParser(coge.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (cogefile,) = args + fp = must_open(cogefile) + cogefile = cogefile.replace(".gz", "") + ksfile = cogefile + ".ks" + anchorsfile = cogefile + ".anchors" + fw_ks = must_open(ksfile, "w") + fw_ac = must_open(anchorsfile, "w") + + tag = "###" + print(tag, file=fw_ks) + for header, lines in read_block(fp, tag): + print(tag, file=fw_ac) + lines = list(lines) + for line in lines: + if line[0] == "#": + continue + ( + ks, + ka, + achr, + a, + astart, + astop, + bchr, + b, + bstart, + bstop, + ev, + ss, + ) = line.split() + a = a.split("||")[3] + b = b.split("||")[3] + print("\t".join((a, b, ev)), file=fw_ac) + print(",".join((";".join((a, b)), ks, ka, ks, ka)), file=fw_ks) + + fw_ks.close() + fw_ac.close() + + +def matrix(args): + """ + %prog matrix all.bed anchorfile matrixfile + + Make oxford grid based on anchors file. + """ + + p = OptionParser(matrix.__doc__) + p.add_argument("--seqids", help="File with seqids") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + bedfile, anchorfile, matrixfile = args + ac = AnchorFile(anchorfile) + seqidsfile = opts.seqids + if seqidsfile: + seqids = SetFile(seqidsfile, delimiter=",") + + order = Bed(bedfile).order + blocks = ac.blocks + m = defaultdict(int) + fw = open(matrixfile, "w") + aseqids = set() + bseqids = set() + for block in blocks: + a, b, scores = zip(*block) + ai, af = order[a[0]] + bi, bf = order[b[0]] + aseqid = af.seqid + bseqid = bf.seqid + if seqidsfile: + if (aseqid not in seqids) or (bseqid not in seqids): + continue + m[(aseqid, bseqid)] += len(block) + aseqids.add(aseqid) + bseqids.add(bseqid) + + aseqids = list(aseqids) + bseqids = list(bseqids) + print("\t".join(["o"] + bseqids), file=fw) + for aseqid in aseqids: + print("\t".join([aseqid] + [str(m[(aseqid, x)]) for x in bseqids]), file=fw) + + +def get_boundary_bases(start, end, order): + + from jcvi.utils.range import range_minmax + + (i, s), (j, e) = order[start], order[end] + seqid = s.seqid + assert seqid == e.seqid + + startbase, endbase = range_minmax([(s.start, s.end), (e.start, e.end)]) + + return seqid, startbase, endbase + + +def simple(args): + """ + %prog simple anchorfile --qbed=qbedfile --sbed=sbedfile [options] + + Write the block ends for each block in the anchorfile. + GeneA1 GeneA2 GeneB1 GeneB2 +/- score + + Optional additional columns: + orderA1 orderA2 orderB1 orderB2 sizeA sizeB size block_id + + With base coordinates (--coords): + block_id seqidA startA endA bpSpanA GeneA1 GeneA2 geneSpanA + block_id seqidB startB endB bpSpanB GeneB1 GeneB2 geneSpanB + """ + p = OptionParser(simple.__doc__) + p.add_argument( + "--rich", default=False, action="store_true", help="Output additional columns" + ) + p.add_argument( + "--coords", + default=False, + action="store_true", + help="Output columns with base coordinates", + ) + p.add_argument( + "--bed", + default=False, + action="store_true", + help="Generate BED file for the blocks", + ) + p.add_argument( + "--noheader", default=False, action="store_true", help="Don't output header" + ) + p.set_beds() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (anchorfile,) = args + additional = opts.rich + coords = opts.coords + header = not opts.noheader + bed = opts.bed + if bed: + coords = True + bbed = Bed() + + ac = AnchorFile(anchorfile) + simplefile = anchorfile.rsplit(".", 1)[0] + ".simple" + + qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) + pf = "-".join(anchorfile.split(".", 2)[:2]) + if ac.is_empty: + logger.error("No blocks found in `%s`. Aborting ..", anchorfile) + return + + if coords: + h = "Block|Chr|Start|End|Span|StartGene|EndGene|GeneSpan|Orientation" + else: + h = "StartGeneA|EndGeneA|StartGeneB|EndGeneB|Orientation|Score" + if additional: + h += "|StartOrderA|EndOrderA|StartOrderB|EndOrderB|SizeA|SizeB|Size|Block" + + fws = open(simplefile, "w") + if header: + print("\t".join(h.split("|")), file=fws) + + blocks = ac.blocks + atotalbase = btotalbase = 0 + for i, block in enumerate(blocks): + a, b, scores = zip(*block) + a = [qorder[x] for x in a] + b = [sorder[x] for x in b] + ia, oa = zip(*a) + ib, ob = zip(*b) + + astarti, aendi = min(ia), max(ia) + bstarti, bendi = min(ib), max(ib) + astart, aend = min(a)[1].accn, max(a)[1].accn + bstart, bend = min(b)[1].accn, max(b)[1].accn + + sizeA = len(set(ia)) + sizeB = len(set(ib)) + size = len(block) + + orientation = get_orientation(ia, ib) + aspan = aendi - astarti + 1 + bspan = bendi - bstarti + 1 + score = int((aspan * bspan) ** 0.5) + score = str(score) + block_id = pf + "-block-{0}".format(i) + + if coords: + + aseqid, astartbase, aendbase = get_boundary_bases(astart, aend, qorder) + bseqid, bstartbase, bendbase = get_boundary_bases(bstart, bend, sorder) + abase = aendbase - astartbase + 1 + bbase = bendbase - bstartbase + 1 + atotalbase += abase + btotalbase += bbase + + # Write dual lines + aargs = [ + block_id, + aseqid, + astartbase, + aendbase, + abase, + astart, + aend, + aspan, + "+", + ] + bargs = [ + block_id, + bseqid, + bstartbase, + bendbase, + bbase, + bstart, + bend, + bspan, + orientation, + ] + + if bed: + bbed.append( + BedLine( + "\t".join( + str(x) + for x in ( + bseqid, + bstartbase - 1, + bendbase, + "{}:{}-{}".format(aseqid, astartbase, aendbase), + size, + orientation, + ) + ) + ) + ) + + for args in (aargs, bargs): + print("\t".join(str(x) for x in args), file=fws) + continue + + args = [astart, aend, bstart, bend, score, orientation] + if additional: + args += [astarti, aendi, bstarti, bendi, sizeA, sizeB, size, block_id] + print("\t".join(str(x) for x in args), file=fws) + + fws.close() + logger.debug("A total of {0} blocks written to `{1}`.".format(i + 1, simplefile)) + + if coords: + print( + "Total block span in {0}: {1}".format( + qbed.filename, human_size(atotalbase, precision=2) + ), + file=sys.stderr, + ) + print( + "Total block span in {0}: {1}".format( + sbed.filename, human_size(btotalbase, precision=2) + ), + file=sys.stderr, + ) + print( + "Ratio: {0:.1f}x".format( + max(atotalbase, btotalbase) * 1.0 / min(atotalbase, btotalbase) + ), + file=sys.stderr, + ) + + if bed: + bedfile = simplefile + ".bed" + bbed.print_to_file(filename=bedfile, sorted=True) + logger.debug("Bed file written to `{}`".format(bedfile)) + + +def screen(args): + """ + %prog screen anchorfile newanchorfile --qbed=qbedfile --sbed=sbedfile [options] + + Extract subset of blocks from anchorfile. Provide several options: + + 1. Option --ids: a file with IDs, 0-based, comma separated, all in one line. + 2. Option --seqids: only allow seqids in this file. + 3. Option --seqpairs: only allow seqpairs in this file, one per line, e.g. "Chr01,Chr05". + 4. Option --minspan: remove blocks with less span than this. + 5. Option --minsize: remove blocks with less number of anchors than this. + 6. Option --intrabound: remove blocks that are too close to the diagonal on + self dot plot that are typically artifacts + """ + from jcvi.utils.range import range_distance + + p = OptionParser(screen.__doc__) + p.set_beds() + p.add_argument("--ids", help="File with block IDs (0-based)") + p.add_argument("--seqids", help="File with seqids") + p.add_argument("--seqpairs", help="File with seqpairs") + p.add_argument( + "--intrabound", + default=300, + type=int, + help="Lower bound of intra-chromosomal blocks (only for self comparison)", + ) + p.add_argument("--minspan", default=0, type=int, help="Only blocks with span >=") + p.add_argument("--minsize", default=0, type=int, help="Only blocks with anchors >=") + p.add_argument( + "--simple", action="store_true", help="Write simple anchorfile with block ends" + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + anchorfile, newanchorfile = args + ac = AnchorFile(anchorfile) + idsfile = opts.ids + seqidsfile = opts.seqids + seqpairsfile = opts.seqpairs + minspan = opts.minspan + minsize = opts.minsize + osimple = opts.simple + intrabound = opts.intrabound + ids, seqids, seqpairs = None, None, None + + if idsfile: + ids = SetFile(idsfile, delimiter=",") + ids = set(int(x) for x in ids) + if seqidsfile: + seqids = SetFile(seqidsfile, delimiter=",") + if seqpairsfile: + fp = open(seqpairsfile) + seqpairs = set() + for row in fp: + a, b = row.strip().split(",") + seqpairs.add((a, b)) + seqpairs.add((b, a)) + + qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) + blocks = ac.blocks + selected = 0 + fw = open(newanchorfile, "w") + + for i, block in enumerate(blocks): + if ids and i not in ids: + continue + + a, b, scores = zip(*block) + a = [qorder[x] for x in a] + b = [sorder[x] for x in b] + ia, oa = zip(*a) + ib, ob = zip(*b) + min_ia, max_ia = min(ia), max(ia) + min_ib, max_ib = min(ib), max(ib) + aspan = max_ia - min_ia + 1 + bspan = max_ib - min_ib + 1 + aseqid = oa[0].seqid + bseqid = ob[0].seqid + + if seqids: + if (aseqid not in seqids) or (bseqid not in seqids): + continue + + if seqpairs: + if (aseqid, bseqid) not in seqpairs: + continue + + same_chromosome = is_self and (aseqid == bseqid) + + if same_chromosome: + dist, _ = range_distance( + (aseqid, min_ia, max_ia, "?"), (bseqid, min_ib, max_ib, "?") + ) + if dist < intrabound: + continue + + if minsize: + if len(block) < minsize: + continue + + if minspan: + if aspan < minspan or bspan < minspan: + continue + + selected += 1 + print("###", file=fw) + for line in block: + print("\t".join(line), file=fw) + + fw.close() + + if osimple: + simple( + [ + newanchorfile, + "--noheader", + "--qbed=" + qbed.filename, + "--sbed=" + sbed.filename, + ] + ) + + logger.debug("Before: {0} blocks, After: {1} blocks".format(len(blocks), selected)) + + +def summary(args): + """ + %prog summary anchorfile + + Provide statistics for pairwise blocks. + """ + from jcvi.utils.cbook import SummaryStats + + p = OptionParser(summary.__doc__) + p.add_argument("--prefix", help="Generate per block stats") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (anchorfile,) = args + ac = AnchorFile(anchorfile) + clusters = ac.blocks + if clusters == [[]]: + logger.debug("A total of 0 anchor was found. Aborted.") + raise ValueError("A total of 0 anchor was found. Aborted.") + + nclusters = len(clusters) + nanchors = [len(c) for c in clusters] + nranchors = [_score(c) for c in clusters] # non-redundant anchors + print( + "A total of {0} (NR:{1}) anchors found in {2} clusters.".format( + sum(nanchors), sum(nranchors), nclusters + ), + file=sys.stderr, + ) + print("Stats:", SummaryStats(nanchors), file=sys.stderr) + print("NR stats:", SummaryStats(nranchors), file=sys.stderr) + + prefix = opts.prefix + if prefix: + pad = len(str(nclusters)) + for i, c in enumerate(clusters): + block_id = "{0}{1:0{2}d}".format(prefix, i + 1, pad) + print("\t".join((block_id, str(len(c))))) + + +def stats(args): + """ + %prog stats blocksfile + + Provide statistics for MCscan-style blocks. The count of homologs in each + pivot gene is recorded. + """ + from jcvi.utils.cbook import percentage + + p = OptionParser(stats.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blocksfile,) = args + fp = open(blocksfile) + counts = defaultdict(int) + total = orthologous = 0 + for row in fp: + atoms = row.rstrip().split("\t") + hits = [x for x in atoms[1:] if x != "."] + counts[len(hits)] += 1 + total += 1 + if atoms[1] != ".": + orthologous += 1 + + print("Total lines: {0}".format(total), file=sys.stderr) + for i, n in sorted(counts.items()): + print("Count {0}: {1}".format(i, percentage(n, total)), file=sys.stderr) + + print(file=sys.stderr) + + matches = sum(n for i, n in counts.items() if i != 0) + print( + "Total lines with matches: {0}".format(percentage(matches, total)), + file=sys.stderr, + ) + for i, n in sorted(counts.items()): + if i == 0: + continue + + print("Count {0}: {1}".format(i, percentage(n, matches)), file=sys.stderr) + + print(file=sys.stderr) + print( + "Orthologous matches: {0}".format(percentage(orthologous, matches)), + file=sys.stderr, + ) + + +def mcscan(args): + """ + %prog mcscan bedfile anchorfile [options] + + Stack synteny blocks on a reference bed, MCSCAN style. The first column in + the output is the reference order, given in the bedfile. Then each column + next to it are separate 'tracks'. + + If --mergetandem=tandem_file is specified, tandem_file should have each + tandem cluster as one line, tab separated. + """ + p = OptionParser(mcscan.__doc__) + p.add_argument( + "--iter", default=100, type=int, help="Max number of chains to output" + ) + p.add_argument( + "--ascii", + default=False, + action="store_true", + help="Output symbols rather than gene names", + ) + p.add_argument( + "--Nm", default=10, type=int, help="Clip block ends to allow slight overlaps" + ) + p.add_argument( + "--trackids", action="store_true", help="Track block IDs in separate file" + ) + p.add_argument( + "--mergetandem", + default=None, + help="merge tandems genes in output acoording to PATH-TO-TANDEM_FILE, " + "cannot be used with --ascii", + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, anchorfile = args + ascii = opts.ascii + clip = opts.Nm + trackids = opts.trackids + ofile = opts.outfile + mergetandem = opts.mergetandem + bed = Bed(bedfile) + order = bed.order + + if trackids: + olog = ofile + ".tracks" + fwlog = must_open(olog, "w") + + if mergetandem: + assert not ascii + tandems = {} + for row in open(mergetandem): + row = row.split() + s = ";".join(row) + for atom in row: + tandems[atom] = s + + ac = AnchorFile(anchorfile) + ranges, block_pairs = ac.make_ranges(order, clip=clip) + + fw = must_open(ofile, "w") + + tracks = [] + print("Chain started: {0} blocks".format(len(ranges)), file=sys.stderr) + iteration = 0 + while ranges: + if iteration >= opts.iter: + break + + selected, score = range_chain(ranges) + tracks.append(selected) + selected = set(x.id for x in selected) + if trackids: + print(",".join(str(x) for x in sorted(selected)), file=fwlog) + + ranges = [x for x in ranges if x.id not in selected] + msg = "Chain {0}: score={1}".format(iteration, score) + if ranges: + msg += " {0} blocks remained..".format(len(ranges)) + else: + msg += " done!" + + print(msg, file=sys.stderr) + iteration += 1 + + mbed = [] + for b in bed: + id = b.accn + atoms = [] + for track in tracks: + track_ids = [x.id for x in track] + for tid in track_ids: + pairs = block_pairs[tid] + anchor = pairs.get(id, ".") + if anchor != ".": + break + if ascii and anchor != ".": + anchor = "x" + atoms.append(anchor) + mbed.append((id, atoms)) + + for id, atoms in mbed: + sep = "" if ascii else "\t" + if mergetandem: + for i, atom in enumerate(atoms): + atoms[i] = tandems.get(atom, atom) + print("\t".join((id, sep.join(atoms))), file=fw) + + logger.debug("MCscan blocks written to `{0}`.".format(ofile)) + if trackids: + logger.debug("Block IDs written to `{0}`.".format(olog)) + + +def write_details(fw, details, bed): + """ + Write per gene depth to file + """ + for a, b, depth in details: + for i in range(a, b): + gi = bed[i].accn + print("\t".join((gi, str(depth))), file=fw) + + +def depth(args): + """ + %prog depth anchorfile --qbed qbedfile --sbed sbedfile + + Calculate the depths in the two genomes in comparison, given in --qbed and + --sbed. The synteny blocks will be layered on the genomes, and the + multiplicity will be summarized to stderr. + """ + from jcvi.utils.range import range_depth + from jcvi.graphics.base import latex + + p = OptionParser(depth.__doc__) + p.add_argument("--depthfile", help="Generate file with gene and depth") + p.add_argument( + "--histogram", default=False, action="store_true", help="Plot histograms in PDF" + ) + p.add_argument("--xmax", type=int, help="x-axis maximum to display in plot") + p.add_argument("--title", default=None, help="Title to display in plot") + p.add_argument("--quota", help="Force to use this quota, e.g. 1:1, 1:2 ...") + p.set_beds() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (anchorfile,) = args + qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) + depthfile = opts.depthfile + ac = AnchorFile(anchorfile) + qranges = [] + sranges = [] + blocks = ac.blocks + for ib in blocks: + q, s, t = zip(*ib) + q = [qorder[x] for x in q] + s = [sorder[x] for x in s] + qrange = (min(q)[0], max(q)[0]) + srange = (min(s)[0], max(s)[0]) + qranges.append(qrange) + sranges.append(srange) + if is_self: + qranges.append(srange) + + qgenome = op.basename(qbed.filename).split(".")[0] + sgenome = op.basename(sbed.filename).split(".")[0] + qtag = "Genome {0} depths".format(qgenome) + print("{}:".format(qtag), file=sys.stderr) + dsq, details = range_depth(qranges, len(qbed)) + if depthfile: + fw = open(depthfile, "w") + write_details(fw, details, qbed) + + if is_self: + return + + stag = "Genome {0} depths".format(sgenome) + print("{}:".format(stag), file=sys.stderr) + dss, details = range_depth(sranges, len(sbed)) + if depthfile: + write_details(fw, details, sbed) + fw.close() + logger.debug("Depth written to `{0}`.".format(depthfile)) + + if not opts.histogram: + return + + from jcvi.graphics.base import plt, quickplot_ax, savefig, normalize_axes + + # Plot two histograms one for query genome, one for subject genome + plt.figure(1, (6, 3)) + f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) + + xmax = opts.xmax or max(4, max(list(dsq.keys()) + list(dss.keys()))) + if opts.quota: + speak, qpeak = opts.quota.split(":") + qpeak, speak = int(qpeak), int(speak) + else: + qpeak = find_peak(dsq) + speak = find_peak(dss) + + qtag = "# of {} blocks per {} gene".format(sgenome, qgenome) + stag = "# of {} blocks per {} gene".format(qgenome, sgenome) + quickplot_ax( + ax1, + dss, + 0, + xmax, + stag, + ylabel="Percentage of genome", + highlight=range(1, speak + 1), + ) + quickplot_ax(ax2, dsq, 0, xmax, qtag, ylabel=None, highlight=range(1, qpeak + 1)) + + title = opts.title or "{} vs {} syntenic depths\n{}:{} pattern".format( + qgenome, sgenome, speak, qpeak + ) + root = f.add_axes([0, 0, 1, 1]) + vs, pattern = latex(title).split("\n") + root.text(0.5, 0.97, vs, ha="center", va="center", color="darkslategray") + root.text(0.5, 0.925, pattern, ha="center", va="center", color="tomato", size=16) + print(title, file=sys.stderr) + + normalize_axes(root) + + pf = anchorfile.rsplit(".", 1)[0] + ".depth" + image_name = pf + ".pdf" + savefig(image_name) + + +def find_peak(data, cutoff=0.9): + """ + This will look for the point where cumulative cutoff is reached. For + example: + + >>> find_peak({0: 27, 1: 71, 2: 1}) + 1 + """ + total_length = sum(data.values()) + count_cutoff = cutoff * total_length + cum_sum = 0 + for i, count in sorted(data.items()): + cum_sum += count + if cum_sum > count_cutoff: + return i + + +def get_blocks(scaffold, bs, order, xdist=20, ydist=20, N=6): + points = [] + for b in bs: + accn = b.accn.rsplit(".", 1)[0] + if accn not in order: + continue + x, xx = order[accn] + y = (b.start + b.end) / 2 + points.append((x, y)) + + # print scaffold, points + blocks = synteny_scan(points, xdist, ydist, N) + return blocks + + +def breakpoint(args): + """ + %prog breakpoint blastfile bedfile [options] + + Identify breakpoints where collinearity ends. `blastfile` contains mapping + from markers (query) to scaffolds (subject). `bedfile` contains marker + locations in the related species. + """ + from jcvi.formats.blast import bed + from jcvi.utils.range import range_interleave + + p = OptionParser(breakpoint.__doc__) + p.add_argument( + "--xdist", type=int, default=20, help="xdist (in related genome) cutoff" + ) + p.add_argument( + "--ydist", type=int, default=200000, help="ydist (in current genome) cutoff" + ) + p.add_argument("-n", type=int, default=5, help="number of markers in a block") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + blastfile, bedfile = args + order = Bed(bedfile).order + blastbedfile = bed([blastfile]) + bbed = Bed(blastbedfile) + for scaffold, bs in bbed.sub_beds(): + blocks = get_blocks( + scaffold, bs, order, xdist=opts.xdist, ydist=opts.ydist, N=opts.n + ) + sblocks = [] + for block in blocks: + xx, yy = zip(*block) + sblocks.append((scaffold, min(yy), max(yy))) + iblocks = range_interleave(sblocks) + for ib in iblocks: + ch, start, end = ib + print("{0}\t{1}\t{2}".format(ch, start - 1, end)) + + +def scan(args): + """ + %prog scan blastfile anchor_file [options] + + pull out syntenic anchors from blastfile based on single-linkage algorithm + """ + p = OptionParser(scan.__doc__) + p.add_argument( + "-n", + "--min_size", + dest="n", + type=int, + default=4, + help="minimum number of anchors in a cluster", + ) + p.add_argument( + "--intrabound", + default=300, + type=int, + help="Lower bound of intra-chromosomal blocks (only for self comparison)", + ) + p.add_argument("--liftover", help="Scan BLAST file to find extra anchors") + p.add_argument( + "--liftover_dist", + type=int, + help="Distance to extend from liftover. Defaults to half of --dist", + ) + p.set_stripnames() + + blast_file, anchor_file, dist, opts = add_arguments(p, args, dist=20) + qbed, sbed, qorder, sorder, is_self = check_beds(blast_file, p, opts) + + intrabound = opts.intrabound + filtered_blast = read_blast( + blast_file, qorder, sorder, is_self=is_self, ostrip=False + ) + + fw = open(anchor_file, "w") + logger.debug("Chaining distance = {0}".format(dist)) + + clusters = batch_scan( + filtered_blast, + xdist=dist, + ydist=dist, + N=opts.n, + is_self=is_self, + intrabound=intrabound, + ) + for cluster in clusters: + print("###", file=fw) + for qi, si, score in cluster: + query, subject = qbed[qi].accn, sbed[si].accn + print("\t".join((query, subject, str(int(score)))), file=fw) + + fw.close() + summary([anchor_file]) + + lo = opts.liftover + if not lo: + return anchor_file + + dargs = ["--qbed=" + opts.qbed, "--sbed=" + opts.sbed] + if not opts.strip_names: + dargs += ["--no_strip_names"] + liftover_dist = opts.liftover_dist or dist // 2 + dargs += ["--dist={}".format(liftover_dist)] + newanchorfile = liftover([lo, anchor_file] + dargs) + return newanchorfile + + +def liftover(args): + """ + %prog liftover blastfile anchorfile [options] + + Typical use for this program is given a list of anchors (syntennic + genes), choose from the blastfile the pairs that are close to the anchors. + + Anchorfile has the following format, each row defines a pair. + + geneA geneB + geneC geneD + """ + p = OptionParser(liftover.__doc__) + p.set_stripnames() + + blast_file, anchor_file, dist, opts = add_arguments(p, args) + qbed, sbed, qorder, sorder, is_self = check_beds(blast_file, p, opts) + + filtered_blast = read_blast( + blast_file, qorder, sorder, is_self=is_self, ostrip=opts.strip_names + ) + blast_to_score = dict(((b.qi, b.si), int(b.score)) for b in filtered_blast) + accepted = dict(((b.query, b.subject), str(int(b.score))) for b in filtered_blast) + + ac = AnchorFile(anchor_file) + all_hits = group_hits(filtered_blast) + all_anchors, anchor_to_block = read_anchors(ac, qorder, sorder) + + # select hits that are close to the anchor list + lifted = 0 + for chr_pair in sorted(all_anchors.keys()): + hits = np.array(all_hits[chr_pair]) + anchors = np.array(all_anchors[chr_pair]) + + if not len(hits): + continue + + for point, nearest in synteny_liftover(hits, anchors, dist): + qi, si = point[:2] + block_id = anchor_to_block[nearest] + query, subject = qbed[qi].accn, sbed[si].accn + score = blast_to_score[(qi, si)] + + ac.blocks[block_id].append((query, subject, str(score) + "L")) + lifted += 1 + + logger.debug("%d new pairs found (dist=%d).", lifted, dist) + newanchorfile = anchor_file.rsplit(".", 1)[0] + ".lifted.anchors" + if accepted: + ac.filter_blocks(accepted) + ac.print_to_file(filename=newanchorfile) + summary([newanchorfile]) + + return newanchorfile + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/__init__.py b/jcvi/formats/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/formats/__main__.py b/jcvi/formats/__main__.py new file mode 100644 index 00000000..483e4cb9 --- /dev/null +++ b/jcvi/formats/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Array of data parsers for bioinformatics file formats, such as: GFF3, BED, SAM/BAM, VCF, PSL, AGP, FASTA/FASTQ, BLAST, etc. +""" + +from ..apps.base import dmain + + +if __name__ == "__main__": + dmain(__file__) diff --git a/jcvi/formats/agp.py b/jcvi/formats/agp.py new file mode 100644 index 00000000..58c79362 --- /dev/null +++ b/jcvi/formats/agp.py @@ -0,0 +1,2188 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Genbank AGP file format, see spec here +http://www.ncbi.nlm.nih.gov/projects/genome/assembly/agp +""" +import re +import shutil +import sys + +from collections import defaultdict +from copy import deepcopy +from itertools import groupby, zip_longest + +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from Bio import SeqIO +from more_itertools import pairwise + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + cleanup, + flatten, + logger, + need_update, +) +from ..assembly.base import calculate_A50 +from ..utils.range import range_intersect + +from .base import LineFile, must_open +from .bed import Bed +from .fasta import Fasta + + +Supported_AGP_Version = "2.1" +AGP_Version_Pragma = "##agp-version " + Supported_AGP_Version +Valid_component_type = list("ADFGNOPUW") + +Valid_gap_type = ( + "scaffold", + "fragment", # in v2.0, obsolete in v2.1 + "clone", # in v1.1, obsolete in v2.0 + "contig", + "centromere", + "short_arm", # in both versions + "heterochromatin", + "telomere", + "repeat", # in both versions + "contamination", +) # new in v2.0 + +Valid_orientation = ("+", "-", "0", "?", "na") + +Valid_evidence = ( + "", + "na", + "paired-ends", + "align_genus", + "align_xgenus", + "align_trnscpt", + "within_clone", + "clone_contig", + "map", + "pcr", # new in v2.1 + "proximity_ligation", # new in v2.1 + "strobe", + "unspecified", +) + +component_RGB = {"O": "0,100,0", "F": "0,100,0", "D": "50,205,50", "N": "255,255,255"} + +""" +phase 0 - (P)refinish; phase 1,2 - (D)raft; +phase 3 - (F)inished; 4 - (O)thers +""" +Phases = "PDDFO" + + +class AGPLine(object): + def __init__(self, row, validate=True): + + atoms = row.split("\t") + atoms[-1] = atoms[-1].strip() + self.object = atoms[0] + self.object_beg = int(atoms[1]) + self.object_end = int(atoms[2]) + self.object_span = self.object_end - self.object_beg + 1 + self.part_number = atoms[3] + self.component_type = atoms[4] + self.is_gap = self.component_type in ("N", "U") + + if not self.is_gap: + self.component_id = atoms[5] + self.component_beg = int(atoms[6]) + self.component_end = int(atoms[7]) + self.component_span = self.component_end - self.component_beg + 1 + self.orientation = atoms[8].strip() + else: + self.gap_length = int(atoms[5]) + self.gap_type = atoms[6] + self.linkage = atoms[7] + self.linkage_evidence = [] + if len(atoms) > 8: + linkage_evidence = atoms[8].strip() + if linkage_evidence: + self.linkage_evidence = linkage_evidence.split(";") + self.orientation = "na" + self.component_id = "{0}.gap{1:03d}".format( + self.gap_type, int(self.part_number) + ) + + if validate: + try: + self.validate() + except AssertionError as b: + logger.error("%s\nerror when validating this line:\n%s", b, row) + + self.sign = {"+": 1, "-": -1, "?": 0}.get(self.orientation) + + def __str__(self): + + fields = [ + self.object, + self.object_beg, + self.object_end, + self.part_number, + self.component_type, + ] + + if not self.is_gap: + fields += [ + self.component_id, + self.component_beg, + self.component_end, + self.orientation, + ] + else: + fields += [ + self.gap_length, + self.gap_type, + self.linkage, + ";".join(self.linkage_evidence), + ] + + return "\t".join(str(x) for x in fields) + + __repr__ = __str__ + + @property + def bedline(self): + # bed formatted line + gid = self.component_id if not self.is_gap else self.gap_type + return "\t".join( + ( + self.object, + str(self.object_beg - 1), + str(self.object_end), + gid, + self.component_type, + self.orientation, + ) + ) + + @property + def bedextra(self): + # extra lines for bed12 + return "\t".join( + str(x) + for x in ( + self.object_beg - 1, + self.object_end, + component_RGB[self.component_type], + 1, + str(self.object_end - self.object_beg + 1) + ",", + "0,", + ) + ) + + @property + def bed12line(self): + # bed12 formatted line + return self.bedline + "\t" + self.bedextra + + def gffline(self, gff_source="MGSC", gff_feat_type="golden_path_fragment"): + # gff3 formatted line + gff_feat_id = "".join( + str(x) for x in (self.object, ".", "{0:03d}".format(int(self.part_number))) + ) + attributes = ";".join( + ( + "ID=" + gff_feat_id, + "Name=" + self.component_id, + "phase=" + self.component_type, + ) + ) + gff_feat_type = "gap" if self.component_type in ["N", "U"] else gff_feat_type + orientation = "." if self.orientation == "na" else self.orientation + + return "\t".join( + str(x) + for x in ( + self.object, + gff_source, + gff_feat_type, + str(self.object_beg), + str(self.object_end), + ".", + orientation, + ".", + attributes, + ) + ) + + @property + def isCloneGap(self): + return self.is_gap and self.gap_type != "fragment" + + def validate(self): + assert ( + self.orientation in Valid_orientation + ), "orientation must be one of {0}".format("|".join(Valid_orientation)) + assert ( + self.component_type in Valid_component_type + ), "component_type must be one of {0}".format("|".join(Valid_component_type)) + assert ( + self.object_beg <= self.object_end + ), "object_beg needs to be <= object_end" + + if not self.is_gap: + assert ( + self.component_beg <= self.component_end + ), "component_begin must be <= component_end" + assert ( + self.object_span == self.component_span + ), f"object_span ({self.object_span}) must be same as component_span ({self.component_span})" + else: + assert self.gap_length >= 1, "gap_length must be >= 1" + assert ( + self.object_span == self.gap_length + ), "object span (%d) must be same as gap_length (%d)" % ( + self.object_span, + self.gap_length, + ) + assert ( + self.gap_type in Valid_gap_type + ), "gap_type must be one of {}, you have {}".format( + "|".join(Valid_gap_type), self.gap_type + ) + + assert all( + x in Valid_evidence for x in self.linkage_evidence + ), "linkage_evidence must be one of {0}, you have {1}".format( + "|".join(Valid_evidence), self.linkage_evidence + ) + + if self.linkage == "no": + assert not self.linkage_evidence or self.linkage_evidence[0] in ( + "", + "na", + ), "linkage no is incompatible with evidence {0}".format( + self.linkage_evidence + ) + + @classmethod + def agpline(cls, tuple): + return AGPLine("\t".join(str(x) for x in tuple), validate=False) + + @classmethod + def cline(cls, object, cid, sizes, o): + line = [object, 0, 0, 0] + line += ["W", cid, 1, sizes[cid], o] + return AGPLine.agpline(line) + + @classmethod + def gline(cls, object, gap, unknown=100): + line = [object, 0, 0, 0] + gtype = "N" + if gap < unknown: + gtype = "U" + gap = unknown # Reset it to 100 + line += [gtype, gap, "scaffold", "yes", "paired-ends"] + return AGPLine.agpline(line) + + +class AGP(LineFile): + def __init__(self, filename, nogaps=False, validate=True, sorted=True): + super().__init__(filename) + + fp = must_open(filename) + self.header = [] + for row in fp: + if row[0] == "#": + self.header.append(row.strip()) + continue + if row.strip() == "": + continue + a = AGPLine(row, validate=validate) + if nogaps and a.is_gap: + continue + self.append(a) + + self.validate = validate + if validate: + if not sorted: + self.sort(key=lambda x: (x.object, x.object_beg)) + self.validate_all() + + @property + def order(self): + """ + Returns a dict with component_id => (i, agpline) + """ + d = {} + for i, x in enumerate(self): + if x.is_gap: + continue + xid = x.component_id + d[xid] = (i, x) + + xid = xid.rsplit(".", 1)[0] # Remove Genbank version + if xid not in d: + d[xid] = (i, x) + + return d + + def getAdjacentClone(self, i, south=True): + """ + Returns the adjacent clone name. + """ + rr = range(i + 1, len(self)) if south else range(i - 1, -1, -1) + a = self[i] + for ix in rr: + x = self[ix] + if x.object != a.object: + break + if x.is_gap: + if x.isCloneGap: + return x + else: + continue + else: + return x + return None + + def getNorthSouthClone(self, i): + """ + Returns the adjacent clone name from both sides. + """ + north = self.getAdjacentClone(i, south=False) + south = self.getAdjacentClone(i) + return north, south + + def transfer_header(self, fw=sys.stdout): + """ + transfer_header() copies header to a new file. + print_header() creates a new header. + """ + print("\n".join(self.header), file=fw) + + @classmethod + def print_header( + cls, fw=sys.stdout, organism=None, taxid=None, source=None, comment=None + ): + print(AGP_Version_Pragma, file=fw) + # these comments are entirely optional, modeled after maize AGP + if organism: + print("# ORGANISM: {0}".format(organism), file=fw) + if taxid: + print("# TAX_ID: {0}".format(taxid), file=fw) + if source: + print("# GENOME CENTER: {0}".format(source), file=fw) + if comment: + print("# COMMENT: {0}".format(comment), file=fw) + fields = ( + "object object_beg object_end part_number component_type " + "component_id/gap_length component_beg/gap_type " + "component_end/linkage orientation/linkage_evidence" + ) + print("# FIELDS: {0}".format(", ".join(fields.split())), file=fw) + + def rstats(self, object, bacs, components, scaffold_sizes, length): + from jcvi.utils.cbook import human_size + + nbacs = len(bacs) + nscaffolds = len(scaffold_sizes) + a50, l50, n50 = calculate_A50(scaffold_sizes) + l50 = human_size(l50) + length = human_size(length) + + return (object, nbacs, components, nscaffolds, n50, l50, length) + + def iter_object(self): + for ob, lines_with_same_ob in groupby(self, key=lambda x: x.object): + yield ob, list(lines_with_same_ob) + + def iter_paired_components(self): + for object, lines in self.iter_object(): + lines = [x for x in lines if not x.is_gap] + for a, b in pairwise(lines): + qreverse = a.orientation == "-" + yield a, b, qreverse + + def print_to_file(self, filename, index=True): + fw = open(filename, "w") + for a in self: + print(a, file=fw) + fw.close() + logger.debug("AGP file written to `%s`.", filename) + if index: + reindex([filename, "--inplace"]) + + def summary_one(self, object, lines): + bacs = set() + components = 0 + scaffold_sizes = [] + _scaffold_key = lambda x: x.is_gap and x.linkage == "no" + length = max(x.object_end for x in lines) + + for is_gap, scaffold in groupby(lines, key=_scaffold_key): + if is_gap: + continue + + scaffold = list(scaffold) + scaffold_size = 0 + for b in scaffold: + if b.is_gap: + scaffold_size += b.gap_length + else: + bacs.add(b.component_id) + components += 1 + scaffold_size += b.component_span + + scaffold_sizes.append(scaffold_size) + + return ( + self.rstats(object, bacs, components, scaffold_sizes, length), + (bacs, components, scaffold_sizes, length), + ) + + def summary_all(self): + + all_bacs = set() + all_scaffold_sizes = [] + all_components = 0 + all_length = 0 + for ob, lines in self.iter_object(): + s, bstats = self.summary_one(ob, lines) + yield s + + bacs, components, scaffold_sizes, length = bstats + all_components += components + all_bacs |= bacs + all_scaffold_sizes.extend(scaffold_sizes) + all_length += length + + yield self.rstats( + "Total", all_bacs, all_components, all_scaffold_sizes, all_length + ) + + def validate_one(self, object, lines): + object_beg = lines[0].object_beg + assert object_beg == 1, "object %s must start at 1 (instead of %d)" % ( + object, + object_beg, + ) + + for a, b in pairwise(lines): + assert ( + b.object_beg - a.object_end == 1 + ), "lines not continuous coords between:\n%s\n%s" % (a, b) + + def validate_all(self): + for ob, lines in self.iter_object(): + self.validate_one(ob, lines) + + def build_one(self, object, lines, fasta, fw, newagp=None): + """ + Construct molecule using component fasta sequence + """ + components = [] + + total_bp = 0 + for line in lines: + + if line.is_gap: + seq = "N" * line.gap_length + if newagp: + print(line, file=newagp) + else: + seq = fasta.sequence( + dict( + chr=line.component_id, + start=line.component_beg, + stop=line.component_end, + strand=line.orientation, + ) + ) + # Check for dangling N's + if newagp: + trimNs(seq, line, newagp) + + components.append(seq) + total_bp += len(seq) + + if self.validate: + assert ( + total_bp == line.object_end + ), "cumulative base pairs (%d) does not match (%d)" % ( + total_bp, + line.object_end, + ) + + if not newagp: + rec = SeqRecord(Seq("".join(components)), id=object, description="") + SeqIO.write([rec], fw, "fasta") + if len(rec) > 1000000: + logger.debug("Write object %s to `%s`", object, fw.name) + + def build_all(self, componentfasta, targetfasta, newagp=None): + f = Fasta(componentfasta, index=False) + fw = open(targetfasta, "w") + + for ob, lines in self.iter_object(): + self.build_one(ob, lines, f, fw, newagp=newagp) + + @property + def graph(self): + from jcvi.algorithms.graph import BiGraph + + g = BiGraph() + for ob, lines in self.iter_object(): + components = [x for x in lines if not x.is_gap] + gaps = [x for x in lines if x.is_gap] + for i, (a, b) in enumerate(pairwise(components)): + g.add_edge( + a.component_id, + b.component_id, + a.orientation, + b.orientation, + length=gaps[i].gap_length, + ) + if len(components) == 1: # Singleton object + a = components[0] + g.add_node(a.component_id) + + return g + + def get_line(self, cid): + for i, a in enumerate(self): + if not a.is_gap and a.component_id == cid: + return i, a + return None, None + + # Update AGP on the fly + def delete_line(self, a, verbose=False): + ai, ax = self.get_line(a) + if ai is None: + return + + if verbose: + msg = "* Delete line:\n{0}".format(ax) + print(msg, file=sys.stderr) + + del self[ai] + + def delete_lines(self, lines, verbose=False): + deleted = set() + for r in lines: + if r.is_gap: + continue + cid = r.component_id + self.delete_line(cid, verbose=verbose) + deleted.add(cid) + return deleted + + def insert_lines(self, a, lines, after=False, delete=False, verbose=False): + if delete: + deleted = self.delete_lines(lines, verbose=verbose) + + ai, ax = self.get_line(a) + if after: + ai += 1 + for i, x in enumerate(lines): + self.insert(ai + i, x) + if verbose: + tag = "after" if after else "before" + msg = "* Insert {0} line:\n".format(tag) + msg += "\n".join([str(ax), "-" * 60]) + "\n" + msg += "\n".join(str(x) for x in lines) + print(msg, file=sys.stderr) + return deleted + + def update_between(self, a, b, lines, delete=True, verbose=False): + if delete: + deleted = self.delete_lines(lines, verbose=verbose) + + ai, ax = self.get_line(a) + bi, bx = self.get_line(b) + # Update + self[ai + 1 : bi] = lines + if verbose: + msg = "* Update between:\n" + msg += "\n".join([str(ax), str(bx), "-" * 60]) + "\n" + msg += "\n".join(str(x) for x in lines) + print(msg, file=sys.stderr) + return deleted + + def convert_to_gap(self, a, verbose=False): + ai, ax = self.get_line(a) + gline = AGPLine.gline(ax.object, 100) + self[ai] = gline + if verbose: + msg = "* Convert from/to:\n" + msg += "\n".join([str(ax), str(gline), "-" * 60]) + "\n" + print(msg, file=sys.stderr) + + def delete_between(self, a, b, verbose=True): + return self.update_between(a, b, [], verbose=verbose) + + def switch_between(self, a, b, verbose=True): + ai, ax = self.get_line(a) + bi, bx = self.get_line(b) + self[ai] = bx + self[bi] = ax + if verbose: + msg = "* Switch between:\n" + msg += "\n".join([str(ax), str(bx)]) + print(msg, file=sys.stderr) + + +class TPFLine(object): + def __init__(self, line): + args = line.split() + self.component_id = args[0] + self.object = args[1] + if self.is_gap: + self.gap_type = self.component_id + self.orientation = args[2] + + def __str__(self): + return "\t".join((self.component_id, self.object_id, self.orientation)) + + @property + def is_gap(self): + return self.component_id in Valid_gap_type + + @property + def isCloneGap(self): + return self.is_gap and self.gap_type != "fragment" + + +class TPF(LineFile): + def __init__(self, filename): + super().__init__(filename) + fp = open(filename) + for row in fp: + if row[0] == "#": + continue + self.append(TPFLine(row)) + + def getAdjacentClone(self, i, south=True): + """ + Returns adjacent clone name, either the line before or after the current + line. + """ + rr = range(i + 1, len(self)) if south else range(i - 1, -1, -1) + a = self[i] + for ix in rr: + x = self[ix] + if x.object != a.object: + break + return x + return None + + def getNorthSouthClone(self, i): + """ + Returns adjacent clone name on both sides. + """ + north = self.getAdjacentClone(i, south=False) + south = self.getAdjacentClone(i) + return north, south + + +class OOLine(object): + def __init__(self, id, component_id, component_size, strand): + self.id = id + self.component_id = component_id + self.component_size = component_size + self.strand = strand + + +class OO(LineFile): + def __init__(self, filename=None, ctgsizes=None): + super().__init__(filename) + + if filename is None: + return + + from jcvi.formats.base import read_block + + fp = open(filename) + prefix = "contig_" + self.contigs = set() + for header, block in read_block(fp, ">"): + header = header[1:] # Trim the '>' + header = header.split()[0] + for b in block: + ctg, orientation = b.split() + if ctg.startswith(prefix): + ctg = ctg[len(prefix) :] + + assert orientation in ("BE", "EB") + + strand = "+" if orientation == "BE" else "-" + ctgsize = ctgsizes[ctg] + self.add(header, ctg, ctgsize, strand) + self.contigs.add(ctg) + + def add(self, scaffold, ctg, ctgsize, strand="0"): + self.append(OOLine(scaffold, ctg, ctgsize, strand)) + + def sub_beds(self): + for scaffold, beds in groupby(self, key=lambda x: x.id): + yield scaffold, list(beds) + + def write_AGP( + self, fw=sys.stdout, gapsize=100, phases={}, gaptype="scaffold", evidence="" + ): + + linkage = "yes" + + for object, beds in self.sub_beds(): + object_beg = 1 + part_number = 0 + for b in beds: + component_id = b.component_id + size = b.component_size + if ( + part_number > 0 and gapsize > 0 + ): # Print gap except for the first one + object_end = object_beg + gapsize - 1 + part_number += 1 + component_type = "U" if gapsize == 100 else "N" + print( + "\t".join( + str(x) + for x in ( + object, + object_beg, + object_end, + part_number, + component_type, + gapsize, + gaptype, + linkage, + evidence, + ) + ), + file=fw, + ) + + object_beg += gapsize + + object_end = object_beg + size - 1 + part_number += 1 + strand = "?" if b.strand == "0" else b.strand + print( + "\t".join( + str(x) + for x in ( + object, + object_beg, + object_end, + part_number, + phases.get(component_id, "W"), + component_id, + 1, + size, + strand, + ) + ), + file=fw, + ) + + object_beg += size + + +def order_to_agp( + object, ctgorder, sizes, fwagp, gapsize=100, gaptype="scaffold", evidence="" +): + + o = OO() # Without a filename + for scaffold_number, (ctg, strand) in enumerate(ctgorder): + size = sizes[ctg] + o.add(object, ctg, size, strand) + + o.write_AGP(fwagp, gapsize=gapsize, gaptype=gaptype, phases={}, evidence=evidence) + + +def trimNs(seq, line, newagp): + """ + Test if the sequences contain dangling N's on both sides. This component + needs to be adjusted to the 'actual' sequence range. + """ + start, end = line.component_beg, line.component_end + size = end - start + 1 + leftNs, rightNs = 0, 0 + lid, lo = line.component_id, line.orientation + for s in seq: + if s in "nN": + leftNs += 1 + else: + break + for s in seq[::-1]: + if s in "nN": + rightNs += 1 + else: + break + + if lo == "-": + trimstart = start + rightNs + trimend = end - leftNs + else: + trimstart = start + leftNs + trimend = end - rightNs + + trimrange = (trimstart, trimend) + oldrange = (start, end) + + if trimrange != oldrange: + logger.debug("{0} trimmed of N's: {1} => {2}".format(lid, oldrange, trimrange)) + + if leftNs: + print( + "\t".join( + str(x) + for x in (line.object, 0, 0, 0, "N", leftNs, "fragment", "yes", "") + ), + file=newagp, + ) + if trimend > trimstart: + print( + "\t".join( + str(x) + for x in ( + line.object, + 0, + 0, + 0, + line.component_type, + lid, + trimstart, + trimend, + lo, + ) + ), + file=newagp, + ) + if rightNs and rightNs != size: + print( + "\t".join( + str(x) + for x in (line.object, 0, 0, 0, "N", rightNs, "fragment", "yes", "") + ), + file=newagp, + ) + else: + print(line, file=newagp) + + +def main(): + + actions = ( + ("summary", "print out a table of scaffold statistics"), + ("stats", "print out a report for length of gaps and components"), + ("phase", "given genbank file, get the phase for the HTG BAC record"), + ("bed", "print out the tiling paths in bed/gff3 format"), + ("frombed", "generate AGP file based on bed file"), + ("fromcsv", "generate AGP file based on simple csv file"), + ( + "extendbed", + "extend the components to fill the component range and output bed/gff3 format file", + ), + ("gaps", "print out the distribution of gap sizes"), + ("tpf", "print out a list of accessions, aka Tiling Path File"), + ("cut", "cut at the boundaries of given ranges"), + ("mask", "mask given ranges in components to gaps"), + ("swap", "swap objects and components"), + ("format", "reformat AGP file"), + ("reindex", "assume accurate component order, reindex coordinates"), + ("tidy", "run trim=>reindex=>merge sequentially"), + ( + "build", + "given agp file and component fasta file, build " + "pseudomolecule fasta", + ), + ( + "validate", + "given agp file, component and pseudomolecule fasta, " + + "validate if the build is correct", + ), + ("infer", "infer where the components are in the genome"), + ("compress", "compress coordinates based on multiple AGP files"), + ) + + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def fromcsv(args): + """ + %prog fromcsv contigs.fasta map.csv map.agp + + Convert csv which contains list of scaffolds/contigs to AGP file. + """ + import csv + from jcvi.formats.sizes import Sizes + + p = OptionParser(fromcsv.__doc__) + p.add_argument("--evidence", default="map", help="Linkage evidence to add in AGP") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + contigsfasta, mapcsv, mapagp = args + reader = csv.reader(open(mapcsv)) + sizes = Sizes(contigsfasta).mapping + next(reader) # Header + fwagp = must_open(mapagp, "w") + o = OO() + for row in reader: + if len(row) == 2: + object, ctg = row + strand = "?" + elif len(row) == 3: + object, ctg, strand = row + size = sizes[ctg] + o.add(object, ctg, size, strand) + + o.write_AGP( + fwagp, gapsize=100, gaptype="scaffold", phases={}, evidence=opts.evidence + ) + + +def compress(args): + """ + %prog compress a.agp b.agp + + Convert coordinates based on multiple AGP files. Useful to simplify multiple + liftOvers to compress multiple chain files into a single chain file, in + upgrading locations of genomic features. + + Example: + `a.agp` could contain split scaffolds: + scaffold_0.1 1 600309 1 W scaffold_0 1 600309 + + + `b.agp` could contain mapping to chromosomes: + LG05 6435690 7035998 53 W scaffold_0.1 1 600309 + + + The final AGP we want is: + LG05 6435690 7035998 53 W scaffold_0 1 600309 + + """ + p = OptionParser(compress.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + aagpfile, bagpfile = args + # First AGP provides the mapping + store = {} + agp = AGP(aagpfile) + for a in agp: + if a.is_gap: + continue + # Ignore '?' in the mapping + if a.sign == 0: + a.sign = 1 + store[(a.object, a.object_beg, a.object_end)] = ( + a.component_id, + a.component_beg, + a.component_end, + a.sign, + ) + + # Second AGP forms the backbone + agp = AGP(bagpfile) + fw = must_open(opts.outfile, "w") + print("\n".join(agp.header), file=fw) + for a in agp: + if a.is_gap: + print(a, file=fw) + continue + component_id, component_beg, component_end, sign = store[ + (a.component_id, a.component_beg, a.component_end) + ] + + orientation = {1: "+", -1: "-", 0: "?"}.get(sign * a.sign) + atoms = ( + a.object, + a.object_beg, + a.object_end, + a.part_number, + a.component_type, + component_id, + component_beg, + component_end, + orientation, + ) + a = AGPLine("\t".join(str(x) for x in atoms)) + print(a, file=fw) + + +def map_one_scaffold_1way(scaffold, genome, orientation="+"): + if orientation == "-": + scaffold = scaffold.reverse_complement() + + scaffold = str(scaffold) + for obj_name, obj in genome.iteritems(): + obj_idx = obj.find(scaffold) + if obj_idx == -1: + continue + else: + return obj_name, obj_idx, orientation + return -1, -1, orientation # unmapped scaffolds + + +def map_one_scaffold(opts): + scaffold_name, scaffold, genome = opts + scaffold = scaffold.seq + obj_name, obj_idx, objo = map_one_scaffold_1way(scaffold, genome) + if obj_name == -1: + obj_name, obj_idx, objo = map_one_scaffold_1way( + scaffold, genome, orientation="-" + ) + if obj_name == -1: + return "" + + obj_end = obj_idx + len(scaffold) + return "\t".join( + str(x) for x in (obj_name, obj_idx, obj_end, scaffold_name, 1000, objo) + ) + + +def check_seen(r, seen): + from jcvi.utils.range import range_overlap + + for s in seen: + if range_overlap(r, s): + return True + return False + + +def infer(args): + """ + %prog infer scaffolds.fasta genome.fasta + + Infer where the components are in the genome. This function is rarely used, + but can be useful when distributor does not ship an AGP file. + """ + from jcvi.apps.grid import WriteJobs + from jcvi.formats.bed import sort + + p = OptionParser(infer.__doc__) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + scaffoldsf, genomef = args + inferbed = "infer-components.bed" + if need_update((scaffoldsf, genomef), inferbed): + scaffolds = Fasta(scaffoldsf, lazy=True) + genome = Fasta(genomef) + genome = genome.tostring() + args = [ + (scaffold_name, scaffold, genome) + for scaffold_name, scaffold in scaffolds.iteritems_ordered() + ] + + pool = WriteJobs(map_one_scaffold, args, inferbed, cpus=opts.cpus) + pool.run() + + sort([inferbed, "-i"]) + bed = Bed(inferbed) + inferagpbed = "infer.bed" + fw = open(inferagpbed, "w") + seen = [] + for b in bed: + r = (b.seqid, b.start, b.end) + if check_seen(r, seen): + continue + print( + "\t".join(str(x) for x in (b.accn, 0, b.span, b.seqid, b.score, b.strand)), + file=fw, + ) + seen.append(r) + fw.close() + + frombed([inferagpbed]) + + +def format(args): + """ + %prog format oldagpfile newagpfile + + Reformat AGP file. --switch will replace the ids in the AGP file. + """ + from jcvi.formats.base import DictFile + + p = OptionParser(format.__doc__) + p.add_argument("--switchcomponent", help="Switch component id based on") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + oldagpfile, newagpfile = args + switchcomponent = opts.switchcomponent + if switchcomponent: + switchcomponent = DictFile(switchcomponent) + + agp = AGP(oldagpfile) + fw = open(newagpfile, "w") + nconverts = 0 + for i, a in enumerate(agp): + if not a.is_gap and a.component_id in switchcomponent: + oldid = a.component_id + newid = switchcomponent[a.component_id] + a.component_id = newid + logger.debug("Covert {0} to {1} on line {2}".format(oldid, newid, i + 1)) + nconverts += 1 + print(a, file=fw) + + logger.debug("Total converted records: {0}".format(nconverts)) + + +def frombed(args): + """ + %prog frombed bedfile + + Generate AGP file based on bed file. The bed file must have at least 6 + columns. With the 4-th column indicating the new object. + """ + p = OptionParser(frombed.__doc__) + p.add_argument( + "--gapsize", + default=100, + type=int, + help="Insert gaps of size", + ) + p.add_argument("--evidence", default="map", help="Linkage evidence to add in AGP") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + gapsize = opts.gapsize + agpfile = bedfile.replace(".bed", ".agp") + fw = open(agpfile, "w") + + bed = Bed(bedfile, sorted=False) + for object, beds in groupby(bed, key=lambda x: x.accn): + beds = list(beds) + for i, b in enumerate(beds): + if gapsize and i != 0: + print( + "\t".join( + str(x) + for x in ( + object, + 0, + 0, + 0, + "U", + gapsize, + "scaffold", + "yes", + opts.evidence, + ) + ), + file=fw, + ) + + print( + "\t".join( + str(x) + for x in (object, 0, 0, 0, "W", b.seqid, b.start, b.end, b.strand) + ), + file=fw, + ) + + fw.close() + + # Reindex + return reindex([agpfile, "--inplace"]) + + +def swap(args): + """ + %prog swap agpfile + + Swap objects and components. Will add gap lines. This is often used in + conjuction with formats.chain.fromagp() to convert between different + coordinate systems. + """ + from jcvi.utils.range import range_interleave + + p = OptionParser(swap.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (agpfile,) = args + + agp = AGP(agpfile, nogaps=True, validate=False) + agp.sort(key=lambda x: (x.component_id, x.component_beg)) + + newagpfile = agpfile.rsplit(".", 1)[0] + ".swapped.agp" + fw = open(newagpfile, "w") + agp.transfer_header(fw) + for cid, aa in groupby(agp, key=(lambda x: x.component_id)): + aa = list(aa) + aranges = [(x.component_id, x.component_beg, x.component_end) for x in aa] + gaps = range_interleave(aranges) + for a, g in zip_longest(aa, gaps): + a.object, a.component_id = a.component_id, a.object + a.component_beg = a.object_beg + a.component_end = a.object_end + print(a, file=fw) + if not g: + continue + + aline = [cid, 0, 0, 0] + gseq, ga, gb = g + cspan = gb - ga + 1 + aline += ["N", cspan, "fragment", "yes"] + print("\t".join(str(x) for x in aline), file=fw) + + fw.close() + # Reindex + reindex([newagpfile, "--inplace"]) + + return newagpfile + + +def stats(args): + """ + %prog stats agpfile + + Print out a report for length of gaps and components. + """ + from jcvi.utils.table import tabulate + + p = OptionParser(stats.__doc__) + p.add_argument( + "--warn", + default=False, + action="store_true", + help="Warnings on small component spans", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (agpfile,) = args + + agp = AGP(agpfile) + gap_lengths = [] + component_lengths = [] + for a in agp: + span = a.object_span + if a.is_gap: + label = a.gap_type + gap_lengths.append((span, label)) + else: + label = "{0}:{1}-{2}".format( + a.component_id, a.component_beg, a.component_end + ) + component_lengths.append((span, label)) + if opts.warn and span < 50: + logger.error("component span too small ({0}):\n{1}".format(span, a)) + + table = dict() + for label, lengths in zip(("Gaps", "Components"), (gap_lengths, component_lengths)): + + if not lengths: + table[(label, "Min")] = table[(label, "Max")] = table[(label, "Sum")] = ( + "n.a." + ) + continue + + table[(label, "Min")] = "{0} ({1})".format(*min(lengths)) + table[(label, "Max")] = "{0} ({1})".format(*max(lengths)) + table[(label, "Sum")] = sum(x[0] for x in lengths) + + print(tabulate(table), file=sys.stderr) + + +def cut(args): + """ + %prog cut agpfile bedfile + + Cut at the boundaries of the ranges in the bedfile. + """ + p = OptionParser(cut.__doc__) + p.add_argument("--sep", default=".", help="Separator for splits") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + agpfile, bedfile = args + sep = opts.sep + + agp = AGP(agpfile) + bed = Bed(bedfile) + simple_agp = agp.order + newagpfile = agpfile.replace(".agp", ".cut.agp") + fw = open(newagpfile, "w") + + agp_fixes = defaultdict(list) + for component, intervals in bed.sub_beds(): + i, a = simple_agp[component] + object = a.object + component_span = a.component_span + orientation = a.orientation + + assert a.component_beg, a.component_end + cuts = set() + for i in intervals: + start, end = i.start, i.end + end -= 1 + + assert start <= end + cuts.add(start) + cuts.add(end) + + cuts.add(0) + cuts.add(component_span) + cuts = list(sorted(cuts)) + + sum_of_spans = 0 + for i, (a, b) in enumerate(pairwise(cuts)): + oid = object + "{0}{1}".format(sep, i + 1) + aline = [oid, 0, 0, 0] + cspan = b - a + aline += ["D", component, a + 1, b, orientation] + sum_of_spans += cspan + + aline = "\t".join(str(x) for x in aline) + agp_fixes[component].append(aline) + + assert component_span == sum_of_spans + + # Finally write the masked agp + for a in agp: + if not a.is_gap and a.component_id in agp_fixes: + print("\n".join(agp_fixes[a.component_id]), file=fw) + else: + print(a, file=fw) + + fw.close() + # Reindex + reindex([newagpfile, "--inplace"]) + + return newagpfile + + +def mask(args): + """ + %prog mask agpfile bedfile + + Mask given ranges in components to gaps. When the bedfile contains a single + base pair, this position can be a point of split and no base is lost + (--splitsingle). + """ + p = OptionParser(mask.__doc__) + p.add_argument( + "--splitobject", + default=False, + action="store_true", + help="Create new names for object", + ) + p.add_argument( + "--splitcomponent", + default=False, + action="store_true", + help="Create new names for component", + ) + p.add_argument( + "--splitsingle", + default=False, + action="store_true", + help="Do not remove base on single point", + ) + p.add_argument( + "--gaptype", + default="scaffold", + help="Masked region has gap type of", + ) + p.add_argument( + "--noretain", + default=False, + action="store_true", + help="Do not retain old names for non-split objects", + ) + p.add_argument("--sep", default=".", help="Separator for splits") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(p.print_help()) + + agpfile, bedfile = args + gaptype = opts.gaptype + splitobject = opts.splitobject + splitcomponent = opts.splitcomponent + sep = opts.sep + + assert not ( + splitobject and splitcomponent + ), "Options --splitobject and --splitcomponent conflict" + + agp = AGP(agpfile) + bed = Bed(bedfile) + simple_agp = agp.order + # agp lines to replace original ones, keyed by the component + agp_fixes = defaultdict(list) + + newagpfile = agpfile.replace(".agp", ".masked.agp") + fw = open(newagpfile, "w") + + if splitcomponent: + componentindex = defaultdict(int) + + for component, intervals in bed.sub_beds(): + i, a = simple_agp[component] + object = a.object + orientation = a.orientation + + assert a.component_beg, a.component_end + arange = a.component_beg, a.component_end + + # Make sure `ivs` contain DISJOINT ranges, and located within `arange` + ivs = [] + points = set() + for i in intervals: + start, end = i.start, i.end + if opts.splitsingle: + points.add(start) + iv = range_intersect(arange, (start, end)) + if iv is not None: + ivs.append(iv) + + # Sort the ends of `ivs` as well as the arange + arange = a.component_beg - 1, a.component_end + 1 + endpoints = sorted(flatten(ivs + [arange])) + # reverse if component on negative strand + if orientation == "-": + endpoints.reverse() + + sum_of_spans = 0 + # assign complements as sequence components + for i, (a, b) in enumerate(pairwise(endpoints)): + if orientation == "-": + a, b = b, a + if orientation not in ("+", "-"): + orientation = "+" + + oid = object + "{0}{1}".format(sep, i // 2 + 1) if splitobject else object + aline = [oid, 0, 0, 0] + if i % 2 == 0: + cspan = b - a - 1 + if splitcomponent: + cid = component + "{0}{1}".format( + sep, componentindex[component] + 1 + ) + componentindex[component] += 1 + aline += ["W", cid, 1, cspan, orientation] + else: + end = b if (opts.splitsingle and b in points) else b - 1 + aline += ["W", component, a + 1, end, orientation] + is_gap = False + else: + cspan = b - a + 1 + aline += ["N", cspan, gaptype, "yes", "paired-ends"] + is_gap = True + if cspan <= 0: + continue + + sum_of_spans += cspan + aline = "\t".join(str(x) for x in aline) + if not (splitobject and is_gap): + agp_fixes[component].append(aline) + + retain = not opts.noretain + # Finally write the masked agp + for a in agp: + if a.is_gap: + print(a, file=fw) + elif a.component_id in agp_fixes: + print("\n".join(agp_fixes[a.component_id]), file=fw) + else: + if not retain: + if splitobject: + a.object += sep + "0" + elif splitcomponent: + a.component_id += sep + "0" + print(a, file=fw) + + fw.close() + + # Reindex + reindex([newagpfile, "--inplace"]) + + return newagpfile + + +def reindex(args): + """ + %prog agpfile + + assume the component line order is correct, modify coordinates, this is + necessary mostly due to manual edits (insert/delete) that disrupts + the target coordinates. + """ + p = OptionParser(reindex.__doc__) + p.add_argument( + "--nogaps", + default=False, + action="store_true", + help="Remove all gap lines", + ) + p.add_argument( + "--inplace", + default=False, + action="store_true", + help="Replace input file", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (agpfile,) = args + inplace = opts.inplace + agp = AGP(agpfile, validate=False) + pf = agpfile.rsplit(".", 1)[0] + newagpfile = pf + ".reindexed.agp" + + fw = open(newagpfile, "w") + agp.transfer_header(fw) + for chr, chr_agp in groupby(agp, lambda x: x.object): + chr_agp = list(chr_agp) + object_beg = 1 + for i, b in enumerate(chr_agp): + b.object_beg = object_beg + b.part_number = i + 1 + if opts.nogaps and b.is_gap: + continue + + if b.is_gap: + b.object_end = object_beg + b.gap_length - 1 + else: + b.object_end = object_beg + b.component_span - 1 + + object_beg = b.object_end + 1 + + print(str(b), file=fw) + + # Last step: validate the new agpfile + fw.close() + AGP(newagpfile, validate=True) + + if inplace: + shutil.move(newagpfile, agpfile) + logger.debug("Rename file `{0}` to `{1}`".format(newagpfile, agpfile)) + newagpfile = agpfile + + return newagpfile + + +def summary(args): + """ + %prog summary agpfile + + print a table of scaffold statistics, number of BACs, no of scaffolds, + scaffold N50, scaffold L50, actual sequence, PSMOL NNNs, PSMOL-length, % of + PSMOL sequenced. + """ + from jcvi.utils.table import write_csv + + p = OptionParser(summary.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (agpfile,) = args + header = ( + "Chromosome #_Distinct #_Components #_Scaffolds " + "Scaff_N50 Scaff_L50 Length".split() + ) + + agp = AGP(agpfile) + data = list(agp.summary_all()) + write_csv(header, data, sep=" ") + + +chr_pat = re.compile(r"chromosome (\d)", re.I) +clone_pat = re.compile(r"clone ([^, ]*\d)[ ,]", re.I) + + +def get_clone(rec): + """ + >>> get_clone("Medicago truncatula chromosome 2 clone mth2-48e18") + ('2', 'mth2-48e18') + """ + s = rec.description + chr = re.search(chr_pat, s) + clone = re.search(clone_pat, s) + chr = chr.group(1) if chr else "" + clone = clone.group(1) if clone else "" + + return chr, clone + + +def get_phase(rec): + keywords = rec.annotations["keywords"] + description = rec.description.upper() + + if "HTGS_PHASE1" in keywords: + phase = 1 + elif "HTGS_PHASE2" in keywords: + phase = 2 + elif len(keywords) == 1 and "HTG" in keywords: + phase = 3 + elif "PLN" in keywords: # EMBL BACs + if "DRAFT" in description: + if "UNORDERED" in description: + phase = 1 + else: + phase = 2 + else: + assert "COMPLETE" in description, description + phase = 3 + else: + phase = 3 + + return phase, keywords + + +def phase(args): + """ + %prog phase genbankfiles + + Input has to be gb file. Search the `KEYWORDS` section to look for PHASE. + Also look for "chromosome" and "clone" in the definition line. + """ + p = OptionParser(phase.__doc__) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + fw = must_open(opts.outfile, "w") + for gbfile in args: + for rec in SeqIO.parse(gbfile, "gb"): + bac_phase, keywords = get_phase(rec) + chr, clone = get_clone(rec) + keyword_field = ";".join(keywords) + print( + "\t".join((rec.id, str(bac_phase), keyword_field, chr, clone)), file=fw + ) + + +def tpf(args): + """ + %prog tpf agpfile + + Print out a list of ids, one per line. Also known as the Tiling Path. + + AC225490.9 chr6 + + Can optionally output scaffold gaps. + """ + p = OptionParser(tpf.__doc__) + p.add_argument( + "--noversion", + default=False, + action="store_true", + help="Remove trailing accession versions", + ) + p.add_argument( + "--gaps", + default=False, + action="store_true", + help="Include gaps in the output", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (agpfile,) = args + agp = AGP(agpfile) + for a in agp: + object = a.object + if a.is_gap: + if opts.gaps and a.isCloneGap: + print("\t".join((a.gap_type, object, "na"))) + continue + + component_id = a.component_id + orientation = a.orientation + + if opts.noversion: + component_id = component_id.rsplit(".", 1)[0] + + print("\t".join((component_id, object, orientation))) + + +def bed(args): + """ + %prog bed agpfile + + print out the tiling paths in bed/gff3 format + """ + from jcvi.formats.obo import validate_term + + p = OptionParser(bed.__doc__) + p.add_argument( + "--gaps", + default=False, + action="store_true", + help="Only print bed lines for gaps", + ) + p.add_argument( + "--nogaps", + default=False, + action="store_true", + help="Do not print bed lines for gaps", + ) + p.add_argument( + "--bed12", + default=False, + action="store_true", + help="Produce bed12 formatted output", + ) + p.add_argument( + "--component", + default=False, + action="store_true", + help="Generate bed file for components", + ) + p.set_outfile() + g1 = p.add_argument_group( + "GFF specific parameters", + "Note: If not specified, output will be in `bed` format", + ) + g1.add_argument( + "--gff", + default=False, + action="store_true", + help="Produce gff3 formatted output. By default, ignores AGP gap lines", + ) + g1.add_argument("--source", default="MGSC", help="Specify a gff3 source") + g1.add_argument( + "--feature", + default="golden_path_fragment", + help="Specify a gff3 feature type", + ) + p.set_SO_opts() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + if opts.component: + opts.nogaps = True + + # If output format is gff3 and 'verifySO' option is invoked, validate the SO term + if opts.gff and opts.verifySO: + validate_term(opts.feature, method=opts.verifySO) + + (agpfile,) = args + agp = AGP(agpfile) + fw = must_open(opts.outfile, "w") + if opts.gff: + print("##gff-version 3", file=fw) + + for a in agp: + if opts.nogaps and a.is_gap: + continue + if opts.gaps and not a.is_gap: + continue + if opts.bed12: + print(a.bed12line, file=fw) + elif opts.gff: + print( + a.gffline(gff_source=opts.source, gff_feat_type=opts.feature), file=fw + ) + elif opts.component: + name = "{0}:{1}-{2}".format( + a.component_id, a.component_beg, a.component_end + ) + print( + "\t".join( + str(x) + for x in ( + a.component_id, + a.component_beg - 1, + a.component_end, + name, + a.component_type, + a.orientation, + ) + ), + file=fw, + ) + else: + print(a.bedline, file=fw) + fw.close() + + return fw.name + + +def extendbed(args): + """ + %prog extend agpfile componentfasta + + Extend the components to fill the component range. For example, a bed/gff3 file + that was converted from the agp will contain only the BAC sequence intervals + that are 'represented' - sometimes leaving the 5` and 3` out (those that + overlap with adjacent sequences. This script fill up those ranges, + potentially to make graphics for tiling path. + """ + from jcvi.formats.sizes import Sizes + + p = OptionParser(extendbed.__doc__) + p.add_argument( + "--nogaps", + default=False, + action="store_true", + help="Do not print bed lines for gaps", + ) + p.add_argument( + "--bed12", + default=False, + action="store_true", + help="Produce bed12 formatted output", + ) + p.add_argument( + "--gff", + default=False, + action="store_true", + help="Produce gff3 formatted output. By default, ignores " + " AGP gap lines.", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + # If output format is GFF3, ignore AGP gap lines. + if opts.gff: + opts.nogaps = True + + agpfile, fastafile = args + agp = AGP(agpfile) + fw = must_open(opts.outfile, "w") + if opts.gff: + print("##gff-version 3", file=fw) + + ranges = defaultdict(list) + thickCoords = [] # These are the coordinates before modify ranges + # Make the first pass to record all the component ranges + for a in agp: + thickCoords.append((a.object_beg, a.object_end)) + if a.is_gap: + continue + ranges[a.component_id].append(a) + + # Modify the ranges + sizes = Sizes(fastafile).mapping + for accn, rr in ranges.items(): + alen = sizes[accn] + + a = rr[0] + if a.orientation == "+": + hang = a.component_beg - 1 + else: + hang = alen - a.component_end + a.object_beg -= hang + + a = rr[-1] + if a.orientation == "+": + hang = alen - a.component_end + else: + hang = a.component_beg - 1 + a.object_end += hang + + for a, (ts, te) in zip(agp, thickCoords): + if opts.nogaps and a.is_gap: + continue + if opts.bed12: + line = a.bedline + a.object_beg, a.object_end = ts, te + line += "\t" + a.bedextra + print(line, file=fw) + elif opts.gff: + print(a.gffline(), file=fw) + else: + print(a.bedline, file=fw) + + +def gaps(args): + """ + %prog gaps agpfile + + Print out the distribution of gapsizes. Option --merge allows merging of + adjacent gaps which is used by tidy(). + """ + from jcvi.graphics.histogram import loghistogram + + p = OptionParser(gaps.__doc__) + p.add_argument( + "--merge", + dest="merge", + default=False, + action="store_true", + help="Merge adjacent gaps (to conform to AGP specification)", + ) + p.add_argument( + "--header", + default=False, + action="store_true", + help="Produce an AGP header", + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + merge = opts.merge + (agpfile,) = args + + if merge: + merged_agpfile = agpfile.replace(".agp", ".merged.agp") + fw = open(merged_agpfile, "w") + + agp = AGP(agpfile) + sizes = [] + data = [] # store merged AGPLine's + priorities = ("centromere", "telomere", "scaffold", "contig", "clone", "fragment") + + for is_gap, alines in groupby(agp, key=lambda x: (x.object, x.is_gap)): + alines = list(alines) + is_gap = is_gap[1] + if is_gap: + gap_size = sum(x.gap_length for x in alines) + gap_types = set(x.gap_type for x in alines) + for gtype in ("centromere", "telomere"): + if gtype in gap_types: + gap_size = gtype + + sizes.append(gap_size) + b = deepcopy(alines[0]) + b.object_beg = min(x.object_beg for x in alines) + b.object_end = max(x.object_end for x in alines) + b.gap_length = sum(x.gap_length for x in alines) + + assert b.gap_length == b.object_end - b.object_beg + 1 + b.component_type = "U" if b.gap_length == 100 else "N" + + gtypes = [x.gap_type for x in alines] + for gtype in priorities: + if gtype in gtypes: + b.gap_type = gtype + break + + linkages = [x.linkage for x in alines] + for linkage in ("no", "yes"): + if linkage in linkages: + b.linkage = linkage + break + + alines = [b] + + data.extend(alines) + + loghistogram(sizes) + + if opts.header: + AGP.print_header( + fw, + organism="Medicago truncatula", + taxid=3880, + source="J. Craig Venter Institute", + ) + + if merge: + for ob, bb in groupby(data, lambda x: x.object): + for i, b in enumerate(bb): + b.part_number = i + 1 + print(b, file=fw) + return merged_agpfile + + +def tidy(args): + """ + %prog tidy agpfile componentfasta + + Given an agp file, run through the following steps: + 1. Trim components with dangling N's + 2. Merge adjacent gaps + 3. Trim gaps at the end of an object + 4. Reindex the agp + + Final output is in `.tidy.agp`. + """ + p = OptionParser(tidy.__doc__) + p.add_argument( + "--nogaps", + default=False, + action="store_true", + help="Remove all gap lines", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(p.print_help()) + + agpfile, componentfasta = args + originalagpfile = agpfile + + # Step 1: Trim terminal Ns + tmpfasta = "tmp.fasta" + trimmed_agpfile = build( + [agpfile, componentfasta, tmpfasta, "--newagp", "--novalidate"] + ) + cleanup(tmpfasta) + agpfile = trimmed_agpfile + agpfile = reindex([agpfile, "--inplace"]) + + # Step 2: Merge adjacent gaps + merged_agpfile = gaps([agpfile, "--merge"]) + cleanup(agpfile) + + # Step 3: Trim gaps at the end of object + agpfile = merged_agpfile + agp = AGP(agpfile) + newagpfile = agpfile.replace(".agp", ".fixed.agp") + fw = open(newagpfile, "w") + for object, a in groupby(agp, key=lambda x: x.object): + a = list(a) + if a[0].is_gap: + g, a = a[0], a[1:] + logger.debug("Trim beginning Ns({0}) of {1}".format(g.gap_length, object)) + if a and a[-1].is_gap: + a, g = a[:-1], a[-1] + logger.debug("Trim trailing Ns({0}) of {1}".format(g.gap_length, object)) + print("\n".join(str(x) for x in a), file=fw) + fw.close() + cleanup(agpfile) + + # Step 4: Final reindex + agpfile = newagpfile + reindex_opts = [agpfile, "--inplace"] + if opts.nogaps: + reindex_opts += ["--nogaps"] + agpfile = reindex(reindex_opts) + + tidyagpfile = originalagpfile.replace(".agp", ".tidy.agp") + shutil.move(agpfile, tidyagpfile) + + logger.debug("File written to `%s`.", tidyagpfile) + return tidyagpfile + + +def build(args): + """ + %prog build agpfile componentfasta targetfasta + + Build targetfasta based on info from agpfile + """ + p = OptionParser(build.__doc__) + p.add_argument( + "--newagp", + dest="newagp", + default=False, + action="store_true", + help="Check components to trim dangling N's", + ) + p.add_argument( + "--novalidate", + dest="novalidate", + default=False, + action="store_true", + help="Don't validate the agpfile", + ) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + agpfile, componentfasta, targetfasta = args + validate = not opts.novalidate + + if opts.newagp: + assert agpfile.endswith(".agp") + newagpfile = agpfile.replace(".agp", ".trimmed.agp") + newagp = open(newagpfile, "w") + else: + newagpfile = None + newagp = None + + agp = AGP(agpfile, validate=validate, sorted=True) + agp.build_all(componentfasta=componentfasta, targetfasta=targetfasta, newagp=newagp) + logger.debug("Target fasta written to `%s`.", targetfasta) + + return newagpfile + + +def validate(args): + """ + %prog validate agpfile componentfasta targetfasta + + validate consistency between agpfile and targetfasta + """ + p = OptionParser(validate.__doc__) + + opts, args = p.parse_args(args) + if len(args) < 3: + sys.exit(not p.print_help()) + + agpfile, componentfasta, targetfasta = args + agp = AGP(agpfile) + build = Fasta(targetfasta) + bacs = Fasta(componentfasta, index=False) + + # go through this line by line + for aline in agp: + try: + build_seq = build.sequence( + dict(chr=aline.object, start=aline.object_beg, stop=aline.object_end) + ) + + if aline.is_gap: + assert build_seq.upper() == aline.gap_length * "N", ( + "gap mismatch: %s" % aline + ) + else: + bac_seq = bacs.sequence( + dict( + chr=aline.component_id, + start=aline.component_beg, + stop=aline.component_end, + strand=aline.orientation, + ) + ) + + assert ( + build_seq.upper() == bac_seq.upper() + ), f"sequence mismatch: {aline}" + + logger.debug( + "%s:%d-%d verified", aline.object, aline.object_beg, aline.object_end + ) + + except Exception as e: + logger.error(e) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/base.py b/jcvi/formats/base.py new file mode 100644 index 00000000..f711b9ea --- /dev/null +++ b/jcvi/formats/base.py @@ -0,0 +1,1196 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +import fileinput +import math +import os +import os.path as op +import sys + +from collections import OrderedDict +from itertools import cycle, groupby, islice +from typing import IO, Union + + +from Bio import SeqIO +from ..apps.base import ( + OptionParser, + ActionDispatcher, + cleanup, + logger, + mkdir, + need_update, + popen, + sh, +) + + +FastaExt = ("fasta", "fa", "fna", "cds", "pep", "faa", "fsa", "seq", "nt", "aa") +FastqExt = ("fastq", "fq") + + +class BaseFile(object): + def __init__(self, filename): + self.filename = filename + if filename: + logger.debug("Load file `%s`", filename) + + +class LineFile(BaseFile, list): + """ + Generic file parser for line-based files + """ + + def __init__(self, filename, comment=None, load=False): + super().__init__(filename) + + if load: + fp = must_open(filename) + self.lines = [l.strip() for l in fp if l[0] != comment] + logger.debug("Load %d lines from `%s`", len(self.lines), filename) + + +class DictFile(BaseFile, OrderedDict): + """ + Generic file parser for multi-column files, keyed by a particular index. + """ + + def __init__( + self, + filename, + keypos=0, + valuepos=1, + delimiter=None, + strict=True, + keycast=None, + cast=None, + ): + BaseFile.__init__(self, filename) + OrderedDict.__init__(self) + self.keypos = keypos + + fp = must_open(filename) + ncols = (max(keypos, valuepos) if valuepos else keypos) + 1 + thiscols = 0 + for lineno, row in enumerate(fp): + row = row.rstrip() + atoms = row.split(delimiter) + atoms = [x.strip() for x in atoms] + thiscols = len(atoms) + if thiscols < ncols: + action = "Aborted" if strict else "Skipped" + + msg = "Must contain >= {0} columns. {1}.\n".format(ncols, action) + msg += " --> Line {0}: {1}".format(lineno + 1, row) + logger.error(msg) + if strict: + sys.exit(1) + else: + continue + + key = atoms[keypos] + value = atoms[valuepos] if (valuepos is not None) else atoms + if keycast: + key = keycast(key) + if cast: + value = cast(value) + self[key] = value + + assert thiscols, "File empty" + self.ncols = thiscols + logger.debug("Imported %d records from `%s`", len(self), filename) + + @classmethod + def num_columns(cls, filename, delimiter=None): + """Return the column number of the csv file. + + Args: + filename (str): Path to the file. + delimiter (str, optional): Separator of the csv file. Defaults to None. + + Returns: + int: Column number. + """ + fp = must_open(filename) + return max(len(row.split(delimiter)) for row in fp) + + +class SetFile(BaseFile, set): + def __init__(self, filename, column=-1, delimiter=None): + super().__init__(filename) + fp = open(filename) + for row in fp: + if not row.strip(): + continue + keys = [x.strip() for x in row.split(delimiter)] + if column >= 0: + keys = [keys[column]] + self.update(keys) + + +class FileMerger(object): + """ + Same as cat * > filename + """ + + def __init__(self, filelist, outfile): + self.filelist = filelist + self.outfile = outfile + self.ingz = filelist[0].endswith(".gz") + self.outgz = outfile.endswith(".gz") + + def merge(self, checkexists=False): + outfile = self.outfile + if checkexists and not need_update(self.filelist, outfile, warn=True): + return + + files = " ".join(self.filelist) + ingz, outgz = self.ingz, self.outgz + if ingz and outgz: # can merge gz files directly + cmd = "cat {}".format(files) + else: + cmd = "zcat" if self.ingz else "cat" + cmd += " " + files + sh(cmd, outfile=outfile) + + return outfile + + +class FileSplitter(object): + def __init__(self, filename, outputdir=None, format="fasta", mode="cycle"): + self.filename = filename + self.outputdir = outputdir + self.mode = mode + + format = format or self._guess_format(filename) + logger.debug("format is %s", format) + + if format in ("fasta", "fastq"): + self.klass = "seqio" + elif format == "clust": + self.klass = "clust" + else: + self.klass = "txt" + + self.format = format + mkdir(outputdir) + + def _open(self, filename): + if self.klass == "seqio": + handle = SeqIO.parse(open(filename), self.format) + elif self.klass == "clust": + from jcvi.apps.uclust import ClustFile + + handle = iter(ClustFile(filename)) + else: + handle = open(filename) + return handle + + @property + def num_records(self): + handle = self._open(self.filename) + return sum(1 for x in handle) + + def _guess_format(self, filename): + root, ext = op.splitext(filename) + ext = ext.strip(".") + + if ext in FastaExt: + format = "fasta" + elif ext in FastqExt: + format = "fastq" + else: + format = "txt" + return format + + def _batch_iterator(self, N=1): + """Returns N lists of records. + + This can be used on any iterator, for example to batch up + SeqRecord objects from Bio.SeqIO.parse(...), or to batch + Alignment objects from Bio.AlignIO.parse(...), or simply + lines from a file handle. + + This is a generator function, and it returns lists of the + entries from the supplied iterator. Each list will have + batch_size entries, although the final list may be shorter. + """ + batch_size = math.ceil(self.num_records / float(N)) + handle = self._open(self.filename) + while True: + batch = list(islice(handle, batch_size)) + if not batch: + break + yield batch + + @classmethod + def get_names(cls, filename, N): + root, ext = op.splitext(op.basename(filename)) + + names = [] + pad0 = len(str(int(N - 1))) + for i in range(N): + name = "{0}_{1:0{2}d}{3}".format(root, i, pad0, ext) + names.append(name) + + return names + + def write(self, fw, batch): + if self.klass == "seqio": + SeqIO.write(batch, fw, self.format) + elif self.klass == "clust": + for b in batch: + print(b, file=fw) + else: + for line in batch: + fw.write(line) + return len(batch) + + def split(self, N, force=False): + """ + There are two modes of splitting the records + - batch: splitting is sequentially to records/N chunks + - cycle: placing each record in the splitted files and cycles + + use `cycle` if the len of the record is not evenly distributed + """ + mode = self.mode + assert mode in ("batch", "cycle", "optimal") + logger.debug("set split mode=%s", mode) + + self.names = self.__class__.get_names(self.filename, N) + if self.outputdir: + self.names = [op.join(self.outputdir, x) for x in self.names] + + if not need_update(self.filename, self.names) and not force: + logger.error("file %s already existed, skip file splitting", self.names[0]) + return + + filehandles = [open(x, "w") for x in self.names] + + if mode == "batch": + for batch, fw in zip(self._batch_iterator(N), filehandles): + count = self.write(fw, batch) + logger.debug("write %d records to %s", count, fw.name) + + elif mode == "cycle": + handle = self._open(self.filename) + for record, fw in zip(handle, cycle(filehandles)): + count = self.write(fw, [record]) + + elif mode == "optimal": + """ + This mode is based on Longest Processing Time (LPT) algorithm: + + A simple, often-used algorithm is the LPT algorithm (Longest + Processing Time) which sorts the jobs by its processing time and + then assigns them to the machine with the earliest end time so far. + This algorithm achieves an upper bound of 4/3 - 1/(3m) OPT. + + Citation: + """ + endtime = [0] * N + handle = self._open(self.filename) + for record in handle: + mt, mi = min((x, i) for (i, x) in enumerate(endtime)) + fw = filehandles[mi] + count = self.write(fw, [record]) + endtime[mi] += len(record) + + for fw in filehandles: + fw.close() + + +def longest_unique_prefix(query, targets, remove_self=True): + """ + Find the longest unique prefix for filename, when compared against a list of + filenames. Useful to simplify file names in a pool of files. See usage in + formats.fasta.pool(). + """ + query = op.basename(query) + targets = [op.basename(x) for x in targets] + prefix_lengths = [len(op.commonprefix([query, name])) for name in targets] + if remove_self and len(query) in prefix_lengths: + prefix_lengths.remove(len(query)) + longest_length = max(prefix_lengths) + return query[: longest_length + 1] + + +def check_exists(filename, oappend=False): + """ + Avoid overwriting some files accidentally. + """ + from jcvi.utils.console import console + + if op.exists(filename): + if oappend: + return oappend + overwrite = ( + console.input("`{}` found, overwrite (Y/n)?".format(filename)) == "Y" + ) + else: + overwrite = True + + return overwrite + + +def timestamp(): + from datetime import datetime as dt + + return "{0}{1:02d}{2:02d}".format(dt.now().year, dt.now().month, dt.now().day) + + +def must_open( + filename: str, + mode: str = "r", + checkexists: bool = False, + skipcheck: bool = False, + oappend: bool = False, +) -> Union[IO, fileinput.FileInput]: + """ + Accepts filename and returns filehandle. + + Checks on multiple files, stdin/stdout/stderr, .gz or .bz2 file. + """ + if isinstance(filename, list): + assert "r" in mode + + if filename[0].endswith((".gz", ".bz2")): + filename = " ".join(filename) # allow opening multiple gz/bz2 files + else: + return fileinput.input(filename) + + if filename.startswith("s3://"): + from jcvi.utils.aws import pull_from_s3 + + filename = pull_from_s3(filename) + + if filename in ("-", "stdin"): + assert "r" in mode + fp = sys.stdin + + elif filename == "stdout": + assert "w" in mode + fp = sys.stdout + + elif filename == "stderr": + assert "w" in mode + fp = sys.stderr + + elif filename == "tmp" and mode == "w": + from tempfile import NamedTemporaryFile + + fp = NamedTemporaryFile(mode=mode, delete=False) + + elif filename.endswith(".gz"): + import gzip + + if "r" in mode: + fp = gzip.open(filename, mode + "t") + elif "w" in mode: + fp = gzip.open(filename, mode) + + elif filename.endswith(".bz2"): + if "r" in mode: + cmd = f"bzcat {filename}" + fp = popen(cmd, debug=False) + elif "w" in mode: + import bz2 + + fp = bz2.BZ2File(filename, mode) + + else: + if checkexists: + assert mode == "w" + overwrite = ( + (not op.exists(filename)) + if skipcheck + else check_exists(filename, oappend) + ) + if overwrite: + if oappend: + fp = open(filename, "a") + else: + fp = open(filename, "w") + else: + logger.debug("File `%s` already exists. Skipped.", filename) + return None + else: + fp = open(filename, mode) + + return fp + + +bash_shebang = "#!/bin/bash" +python_shebang = """#!/usr/bin/env python +# -*- coding: UTF-8 -*-""" + + +def write_file(filename, contents, meta=None, skipcheck=False, append=False, tee=False): + if not meta: + suffix = filename.rsplit(".", 1)[-1] + if suffix == "sh": + meta = "run script" + elif suffix == "py": + meta = "python script" + else: + meta = "file" + + meta_choices = ("file", "run script", "python script") + assert meta in meta_choices, "meta must be one of {0}".format( + "|".join(meta_choices) + ) + + contents = contents.strip() + shebang = "\n" + if "script" in meta: + if not append: + if meta == "run script": + shebang = bash_shebang + elif meta == "python script": + shebang = python_shebang + contents = "\n\n".join((shebang, contents)) + + fw = must_open(filename, "w", checkexists=True, skipcheck=skipcheck, oappend=append) + if fw: + print(contents, file=fw) + fw.close() + if tee: + print(contents, file=sys.stderr) + + fileop = "appended" if append else "written" + message = "{0} {1} to `{2}`.".format(meta, fileop, filename) + logger.debug(message.capitalize()) + if meta == "run script" and not append: + sh("chmod u+x {0}".format(filename)) + + +def read_until(handle, start): + # read each line until a certain start, then puts the start tag back + while 1: + pos = handle.tell() + line = handle.readline() + if not line: + break + if line.startswith(start): + handle.seek(pos) + return + + +def read_block(handle, signal): + """ + Useful for reading block-like file formats, for example FASTA or OBO file, + such file usually startswith some signal, and in-between the signals are a + record + """ + signal_len = len(signal) + it = ( + x[1] + for x in groupby(handle, key=lambda row: row.strip()[:signal_len] == signal) + ) + found_signal = False + for header in it: + header = list(header) + for h in header[:-1]: + h = h.strip() + if h[:signal_len] != signal: + continue + yield h, [] # Header only, no contents + header = header[-1].strip() + if header[:signal_len] != signal: + continue + found_signal = True + seq = list(s.strip() for s in next(it)) + yield header, seq + + if not found_signal: + handle.seek(0) + seq = list(s.strip() for s in handle) + yield None, seq + + +def is_number(s, cast=float): + """ + Check if a string is a number. Use cast=int to check if s is an integer. + """ + try: + cast(s) # for int, long and float + except ValueError: + return False + + return True + + +def get_number(s, cast=int): + """ + Try to get a number out of a string, and cast it. + """ + import string + + d = "".join(x for x in str(s) if x in string.digits) + return cast(d) if d else s + + +def flexible_cast(s): + if is_number(s, cast=int): + return int(s) + elif is_number(s, cast=float): + return float(s) + return s + + +def main(): + actions = ( + ("pairwise", "convert a list of IDs into all pairs"), + ("split", "split large file into N chunks"), + ("reorder", "reorder columns in tab-delimited files"), + ("flatten", "convert a list of IDs into one per line"), + ("unflatten", "convert lines to a list of IDs on single line"), + ("group", "group elements in a table based on key (groupby) column"), + ("setop", "set operations on files"), + ("join", "join tabular-like files based on common column"), + ("subset", "subset tabular-like files based on common column"), + ("truncate", "remove lines from end of file"), + ("append", "append a column with fixed value"), + ("seqids", "make a list of seqids for graphics.karyotype"), + ("mergecsv", "merge a set of tsv files"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def seqids(args): + """ + %prog seqids prefix start end + + Make a list of seqids for graphics.karyotype. For example: + + $ python -m jcvi.formats.base seqids chromosome_ 1 3 + chromosome_1,chromosome_2,chromosome_3 + $ python -m jcvi.formats.base seqids A 3 1 --pad0=2 + A03,A02,A01 + """ + p = OptionParser(seqids.__doc__) + p.add_argument("--pad0", default=0, help="How many zeros to pad") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + prefix, start, end = args + pad0 = opts.pad0 + start, end = int(start), int(end) + step = 1 if start <= end else -1 + + print( + ",".join( + [ + "{}{:0{}d}".format(prefix, x, pad0) + for x in range(start, end + step, step) + ] + ) + ) + + +def pairwise(args): + """ + %prog pairwise ids + + Convert a list of IDs into all pairs. + """ + from itertools import combinations + + p = OptionParser(pairwise.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (idsfile,) = args + ids = SetFile(idsfile) + ids = sorted(ids) + fw = open(idsfile + ".pairs", "w") + for a, b in combinations(ids, 2): + print("\t".join((a, b)), file=fw) + fw.close() + + +def append(args): + """ + %prog append csvfile [tag] + + Append a column with fixed value. If tag is missing then just append the + filename. + """ + p = OptionParser(append.__doc__) + p.set_sep() + p.set_outfile() + opts, args = p.parse_args(args) + + nargs = len(args) + if nargs not in (1, 2): + sys.exit(not p.print_help()) + + csvfile = args[0] + tag = args[1] if nargs == 2 else csvfile + fp = must_open(csvfile) + fw = must_open(opts.outfile, "w") + for row in fp: + row = row.rstrip("\r\n") + row = opts.sep.join((row, tag)) + print(row, file=fw) + + +def truncate(args): + """ + %prog truncate linecount filename + + Remove linecount lines from the end of the file in-place. Borrowed from: + + """ + p = OptionParser(truncate.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + number, filename = args + number = int(number) + count = 0 + + f = open(filename, "r+b") + f.seek(0, os.SEEK_END) + while f.tell() > 0: + f.seek(-1, os.SEEK_CUR) + char = f.read(1) + if char == "\n": + count += 1 + if count == number + 1: + f.truncate() + print("Removed {0} lines from end of file".format(number), file=sys.stderr) + return number + + f.seek(-1, os.SEEK_CUR) + + if count < number + 1: + print("No change: requested removal would leave empty file", file=sys.stderr) + return -1 + + +def flatten(args): + """ + %prog flatten filename > ids + + Convert a list of IDs (say, multiple IDs per line) and move them into one + per line. + + For example, convert this, to this: + A,B,C | A + 1 | B + a,4 | C + | 1 + | a + | 4 + + If multi-column file with multiple elements per column, zip then flatten like so: + A,B,C 2,10,gg | A,2 + 1,3 4 | B,10 + | C,gg + | 1,4 + | 3,na + """ + from itertools import zip_longest + + p = OptionParser(flatten.__doc__) + p.set_sep(sep=",") + p.add_argument( + "--zipflatten", + default=None, + dest="zipsep", + help="Specify if columns of the file should be zipped before" + + " flattening. If so, specify delimiter separating column elements", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (tabfile,) = args + zipsep = opts.zipsep + + fp = must_open(tabfile) + for row in fp: + if zipsep: + row = row.rstrip() + atoms = row.split(opts.sep) + frows = [] + for atom in atoms: + frows.append(atom.split(zipsep)) + print( + "\n".join( + [zipsep.join(x) for x in list(zip_longest(*frows, fillvalue="na"))] + ) + ) + else: + print(row.strip().replace(opts.sep, "\n")) + + +def unflatten(args): + """ + %prog unflatten idsfile > unflattened + + Given a list of ids, one per line, unflatten the list onto a single line with sep. + """ + p = OptionParser(unflatten.__doc__) + p.add_argument("--sep", default=",", help="Separator when joining ids") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (idsfile,) = args + ids = must_open(idsfile).read().split() + with must_open(opts.outfile, "w") as fw: + print(opts.sep.join(ids), file=fw) + + +def group(args): + """ + %prog group tabfile > tabfile.grouped + + Given a tab-delimited file, either group all elements within the file or + group the elements in the value column(s) based on the key (groupby) column + + For example, convert this | into this + --------------------------------------- + a 2 3 4 | a,2,3,4,5,6 + a 5 6 | b,7,8 + b 7 8 | c,9,10,11 + c 9 | + c 10 11 | + + If grouping by a particular column, + convert this | into this: + --------------------------------------------- + a 2 3 4 | a 2,5 3,6 4 + a 5 6 | b 7 8 + b 7 8 | c 9,10 11 + c 9 | + c 10 11 | + + By default, it uniqifies all the grouped elements + """ + from jcvi.utils.cbook import AutoVivification + from jcvi.utils.grouper import Grouper + + p = OptionParser(group.__doc__) + p.set_sep() + p.add_argument( + "--groupby", default=None, type=int, help="Default column to groupby" + ) + p.add_argument( + "--groupsep", default=",", help="Separator to join the grouped elements" + ) + p.add_argument( + "--nouniq", + default=False, + action="store_true", + help="Do not uniqify the grouped elements", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (tabfile,) = args + sep = opts.sep + groupby = opts.groupby + groupsep = opts.groupsep + + cols = [] + grouper = AutoVivification() if groupby is not None else Grouper() + fp = must_open(tabfile) + for row in fp: + row = row.rstrip() + atoms = row.split(sep) + if groupby is not None: + if len(cols) < len(atoms): + cols = [x for x in range(len(atoms))] + if groupby not in cols: + logger.error("groupby col index `%s` is out of range", groupby) + sys.exit() + + key = atoms[groupby] + for col in cols: + if col == groupby: + continue + if not grouper[key][col]: + grouper[key][col] = [] if opts.nouniq else set() + if col < len(atoms): + if groupsep in atoms[col]: + for atom in atoms[col].split(groupsep): + if opts.nouniq: + grouper[key][col].append(atom) + else: + grouper[key][col].add(atom) + else: + if opts.nouniq: + grouper[key][col].append(atoms[col]) + else: + grouper[key][col].add(atoms[col]) + else: + grouper.join(*atoms) + + for key in grouper: + if groupby is not None: + line = [] + for col in cols: + if col == groupby: + line.append(key) + elif col in grouper[key].keys(): + line.append(groupsep.join(grouper[key][col])) + else: + line.append("na") + print(sep.join(line)) + else: + print(groupsep.join(key)) + + +def reorder(args): + """ + %prog reorder tabfile 1,2,4,3 > newtabfile + + Reorder columns in tab-delimited files. The above syntax will print out a + new file with col-1,2,4,3 from the old file. + """ + import csv + + p = OptionParser(reorder.__doc__) + p.set_sep() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + tabfile, order = args + sep = opts.sep + order = [int(x) - 1 for x in order.split(",")] + reader = csv.reader(must_open(tabfile), delimiter=sep) + writer = csv.writer(sys.stdout, delimiter=sep) + for row in reader: + newrow = [row[x] for x in order] + writer.writerow(newrow) + + +def split(args): + """ + %prog split file outdir N + + Split file into N records. This allows splitting FASTA/FASTQ/TXT file + properly at boundary of records. Split is useful for parallelization + on input chunks. + + Option --mode is useful on how to break into chunks. + 1. chunk - chunk records sequentially, 1-100 in file 1, 101-200 in file 2, etc. + 2. cycle - chunk records in Round Robin fashion + 3. optimal - try to make split file of roughly similar sizes, using LPT + algorithm. This is the default. + """ + p = OptionParser(split.__doc__) + mode_choices = ("batch", "cycle", "optimal") + p.add_argument( + "--all", default=False, action="store_true", help="split all records" + ) + p.add_argument( + "--mode", + default="optimal", + choices=mode_choices, + help="Mode when splitting records", + ) + p.add_argument( + "--format", choices=("fasta", "fastq", "txt", "clust"), help="input file format" + ) + + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + filename, outdir, N = args + fs = FileSplitter(filename, outputdir=outdir, format=opts.format, mode=opts.mode) + + if opts.all: + logger.debug("option -all override N") + N = fs.num_records + else: + N = min(fs.num_records, int(N)) + assert N > 0, "N must be > 0" + + logger.debug("split file into %d chunks", N) + fs.split(N) + + return fs + + +def join(args): + """ + %prog join file1.txt(pivotfile) file2.txt .. + + Join tabular-like files based on common column. + --column specifies the column index to pivot on. + Use comma to separate multiple values if the pivot column is different + in each file. Maintain the order in the first file. + --sep specifies the column separators, default to tab. + Use comma to separate multiple values if the column separator is different + in each file. + """ + p = OptionParser(join.__doc__) + p.add_argument( + "--column", default="0", help="0-based column id, multiple values allowed" + ) + p.set_sep(multiple=True) + p.add_argument( + "--noheader", default=False, action="store_true", help="Do not print header" + ) + p.add_argument("--na", default="na", help="Value for unjoined data") + p.add_argument( + "--compact", + default=False, + action="store_true", + help="Do not repeat pivotal columns in output", + ) + p.add_argument( + "--keysep", + default=",", + help="specify separator joining multiple elements in the key column" + + " of the pivot file", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + nargs = len(args) + + keysep = opts.keysep + compact = opts.compact + + if len(args) < 2: + sys.exit(not p.print_help()) + + na = opts.na + c = opts.column + if "," in c: + cc = [int(x) for x in c.split(",")] + else: + cc = [int(c)] * nargs + + assert len(cc) == nargs, "Column index number != File number" + + s = opts.sep + if "," in s: + ss = [x for x in s.split(",")] + else: + ss = [s] * nargs + + assert len(ss) == nargs, "column separator number != File number" + + # Maintain the first file line order, and combine other files into it + pivotfile = args[0] + files = [ + DictFile(f, keypos=c, valuepos=None, delimiter=s) + for f, c, s in zip(args, cc, ss) + ] + otherfiles = files[1:] + # The header contains filenames + headers = [] + for i, x in enumerate(files): + ncols = x.ncols + if i and compact: + ncols -= 1 + headers += [op.basename(x.filename)] * ncols + header = "\t".join(headers) + + fp = must_open(pivotfile) + fw = must_open(opts.outfile, "w") + if not opts.noheader: + print(header, file=fw) + + for row in fp: + row = row.rstrip() + atoms = row.split(ss[0]) + newrow = atoms + key = atoms[cc[0]] + keys = key.split(keysep) if keysep in key else [key] + for d in otherfiles: + drows = list() + for key in keys: + krow = d.get(key, [na] * d.ncols) + if compact: + krow.pop(d.keypos) + drows.append(krow) + drow = [keysep.join(x) for x in list(zip(*drows))] + newrow += drow + print("\t".join(newrow), file=fw) + + +def subset(args): + """ + %prog subset file1.txt(pivotfile) file2.txt .. + + subset tabular-like file1 based on common column with file 2. + Normally file1 should have unique row entries. + If more than one file2 are provided, they must have same column separators. + Multiple file2's will be concatenated in the output. + + --column specifies the column index (0-based) to pivot on. + Use comma to separate multiple values if the pivot column is different + in each file. Maintain the order in the first file. + --sep specifies the column separators, default to tab. + Use comma to separate multiple values if the column separator is different + in each file. + """ + + p = OptionParser(subset.__doc__) + p.add_argument( + "--column", default="0", help="0-based column id, multiple values allowed" + ) + p.set_sep(multiple=True) + p.add_argument( + "--pivot", + default=1, + type=int, + help="1 for using order in file1, 2 for using order in \ + file2", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + nargs = len(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + c = opts.column + if "," in c: + cc = [int(x) for x in c.split(",")] + assert len(set(cc[1:])) == 1, "Multiple file2's must have same column index." + cc = cc[0:2] + else: + cc = [int(c)] * 2 + + s = opts.sep + if "," in s: + ss = [x for x in s.split(",")] + assert ( + len(set(cc[1:])) == 1 + ), "Multiple file2's must have same column separator." + ss = ss[0:2] + else: + ss = [s] * 2 + + if nargs > 2: + file2 = FileMerger(args[1:], outfile="concatenatedFile2").merge() + else: + file2 = args[1] + newargs = [args[0], file2] + + files = [ + DictFile(f, keypos=c, valuepos=None, delimiter=s) + for f, c, s in zip(newargs, cc, ss) + ] + + pivot = 0 if opts.pivot == 1 else 1 + fp = open(newargs[pivot]) + fw = must_open(opts.outfile, "w") + + for row in fp: + row = row.rstrip() + atoms = row.split(ss[pivot]) + key = atoms[cc[pivot]] + d = files[1 - pivot] + if key in d: + print(ss[0].join(files[0][key]), file=fw) + + if nargs > 2: + cleanup(file2) + + +def setop(args): + """ + %prog setop "fileA & fileB" > newfile + + Perform set operations, except on files. The files (fileA and fileB) contain + list of ids. The operator is one of the four: + + |: union (elements found in either file) + &: intersection (elements found in both) + -: difference (elements in fileA but not in fileB) + ^: symmetric difference (elementes found in either set but not both) + + Please quote the argument to avoid shell interpreting | and &. + """ + from natsort import natsorted + + p = OptionParser(setop.__doc__) + p.add_argument( + "--column", + default=0, + type=int, + help="The column to extract, 0-based, -1 to disable", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (statement,) = args + fa, op, fb = statement.split() + assert op in ("|", "&", "-", "^") + + column = opts.column + fa = SetFile(fa, column=column) + fb = SetFile(fb, column=column) + + if op == "|": + t = fa | fb + elif op == "&": + t = fa & fb + elif op == "-": + t = fa - fb + elif op == "^": + t = fa ^ fb + + for x in natsorted(t): + print(x) + + +def mergecsv(args): + """ + %prog mergecsv *.tsv + + Merge a set of tsv files. + """ + p = OptionParser(mergecsv.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + tsvfiles = args + outfile = opts.outfile + + cleanup(outfile) + + fw = must_open(opts.outfile, "w") + for i, tsvfile in enumerate(tsvfiles): + fp = open(tsvfile) + if i > 0: + next(fp) + for row in fp: + fw.write(row) + fw.close() + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/bed.py b/jcvi/formats/bed.py new file mode 100755 index 00000000..ac656a58 --- /dev/null +++ b/jcvi/formats/bed.py @@ -0,0 +1,2504 @@ +""" +Classes to handle the .bed files +""" + +import math +import os +import os.path as op +import shutil +import sys + +from collections import defaultdict, OrderedDict +from itertools import groupby +from typing import Optional, Tuple + +import numpy as np + +from more_itertools import pairwise +from natsort import natsorted, natsort_key + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + cleanup, + logger, + need_update, + popen, + sh, +) +from ..utils.cbook import SummaryStats, percentage, thousands +from ..utils.grouper import Grouper +from ..utils.range import ( + Range, + range_chain, + range_distance, + range_intersect, + range_union, +) + +from .base import DictFile, LineFile, get_number, is_number, must_open +from .sizes import Sizes + + +class BedLine(object): + # the Bed format supports more columns. we only need + # the first 4, but keep the information in 'extra'. + __slots__ = ( + "seqid", + "start", + "end", + "accn", + "extra", + "score", + "strand", + "args", + "nargs", + ) + + def __init__(self, sline): + args = sline.strip().split("\t") + self.nargs = nargs = len(args) + self.seqid = args[0] + self.start = int(args[1]) + 1 + self.end = int(args[2]) + assert self.start <= self.end, "start={0} end={1}".format(self.start, self.end) + self.extra = self.accn = self.score = self.strand = None + + if nargs > 3: + self.accn = args[3] + if nargs > 4: + self.score = args[4] + if nargs > 5: + self.strand = args[5] + if nargs > 6: + self.extra = args[6:] + + self.args = args + + def __str__(self): + args = [self.seqid, self.start - 1, self.end] + if self.accn is not None: + args += [self.accn] + if self.score is not None: + args += [self.score] + if self.strand is not None: + args += [self.strand] + if self.extra is not None: + args += self.extra + + s = "\t".join(str(x) for x in args) + return s + + __repr__ = __str__ + + def __getitem__(self, key): + return getattr(self, key) + + @property + def span(self): + return self.end - self.start + 1 + + @property + def range(self): + strand = self.strand or "+" + return self.seqid, self.start, self.end, strand + + @property + def tag(self): + return "{0}:{1}-{2}".format(self.seqid, self.start, self.end) + + def reverse_complement(self, sizes): + size = sizes.get_size(self.seqid) + + start = size - self.end + 1 + end = size - self.start + 1 + self.start, self.end = start, end + assert self.start <= self.end, "start={0} end={1}".format(self.start, self.end) + + if self.strand: + strand = {"+": "-", "-": "+"}[self.strand] + + def gffline(self, type="match", source="default"): + score = ( + "." + if not self.score or (self.score and not is_number(self.score)) + else self.score + ) + strand = "." if not self.strand else self.strand + row = "\t".join( + ( + self.seqid, + source, + type, + str(self.start), + str(self.end), + score, + strand, + ".", + f"ID={self.accn}", + ) + ) + return row + + +class Bed(LineFile): + def __init__(self, filename=None, key=None, sorted=True, juncs=False, include=None): + super().__init__(filename) + + # the sorting key provides some flexibility in ordering the features + # for example, user might not like the lexico-order of seqid + self.nullkey = lambda x: (natsort_key(x.seqid), x.start, x.accn) + self.key = key or self.nullkey + + if not filename: + return + + for line in must_open(filename): + if ( + line.strip() == "" + or line[0] == "#" + or line.startswith("browser ") + or line.startswith("track name") + ): + continue + b = BedLine(line) + if include and b.accn not in include: + continue + self.append(b) + + if sorted: + self.sort(key=self.key) + + def add(self, row): + self.append(BedLine(row)) + + def print_to_file(self, filename="stdout", sorted=False): + if sorted: + self.sort(key=self.key) + + fw = must_open(filename, "w") + for b in self: + if b.start < 1: + logger.error("Start < 1. Reset start for `%s`.", b.accn) + b.start = 1 + print(b, file=fw) + fw.close() + + def sum(self, seqid=None, unique=True): + return bed_sum(self, seqid=seqid, unique=unique) + + @property + def seqids(self): + return natsorted(set(b.seqid for b in self)) + + @property + def accns(self): + return natsorted(set(b.accn for b in self)) + + @property + def order(self): + # get the gene order given a Bed object + return dict((f.accn, (i, f)) for (i, f) in enumerate(self)) + + @property + def order_in_chr(self): + # get the gene order on a particular seqid + res = {} + self.sort(key=self.nullkey) + for seqid, beds in groupby(self, key=lambda x: x.seqid): + for i, f in enumerate(beds): + res[f.accn] = (seqid, i, f) + return res + + @property + def bp_in_chr(self): + # get the bp position on a particular seqid + res = {} + self.sort(key=self.nullkey) + for seqid, beds in groupby(self, key=lambda x: x.seqid): + for i, f in enumerate(beds): + res[f.accn] = (seqid, (f.start + f.end) / 2, f) + return res + + @property + def max_bp_in_chr(self): + # Get the maximum bp position on a particular seqid + res = OrderedDict() + self.sort(key=self.nullkey) + for seqid, beds in groupby(self, key=lambda x: x.seqid): + res[seqid] = max(x.end for x in beds) + return res + + @property + def simple_bed(self): + return [(b.seqid, i) for (i, b) in enumerate(self)] + + @property + def links(self): + r = [] + for s, sb in self.sub_beds(): + for a, b in pairwise(sb): + r.append(((a.accn, a.strand), (b.accn, b.strand))) + return r + + def extract(self, seqid, start, end): + # get all features within certain range + for b in self: + if b.seqid != seqid: + continue + if b.start < start or b.end > end: + continue + yield b + + def sub_bed(self, seqid): + # get all the beds on one chromosome + for b in self: + if b.seqid == seqid: + yield b + + def sub_beds(self): + self.sort(key=self.nullkey) + # get all the beds on all chromosomes, emitting one at a time + for bs, sb in groupby(self, key=lambda x: x.seqid): + yield bs, list(sb) + + def get_breaks(self): + # get chromosome break positions + simple_bed = self.simple_bed + for seqid, ranks in groupby(simple_bed, key=lambda x: x[0]): + ranks = list(ranks) + # chromosome, extent of the chromosome + yield seqid, ranks[0][1], ranks[-1][1] + + +class BedpeLine(object): + def __init__(self, sline): + args = sline.strip().split("\t") + self.seqid1 = args[0] + self.start1 = int(args[1]) + 1 + self.end1 = int(args[2]) + self.seqid2 = args[3] + self.start2 = int(args[4]) + 1 + self.end2 = int(args[5]) + self.accn = args[6] + self.score = args[7] + self.strand1 = args[8] + self.strand2 = args[9] + self.isdup = False + + @property + def innerdist(self): + if self.seqid1 != self.seqid2: + return -1 + return abs(self.start2 - self.end1) + + @property + def outerdist(self): + if self.seqid1 != self.seqid2: + return -1 + return abs(self.end2 - self.start1) + + @property + def is_innie(self): + return (self.strand1, self.strand2) == ("+", "-") + + def rc(self): + self.strand1 = "+" if self.strand1 == "-" else "-" + self.strand2 = "+" if self.strand2 == "-" else "-" + + def _extend(self, rlen, size, start, end, strand): + if strand == "+": + end = start + rlen - 1 + if end > size: + end = size + start = end - rlen + 1 + else: + start = end - rlen + 1 + if start < 1: + start = 1 + end = start + rlen - 1 + return start, end, strand + + def extend(self, rlen, size): + self.start1, self.end1, self.strand1 = self._extend( + rlen, size, self.start1, self.end1, self.strand1 + ) + self.start2, self.end2, self.strand2 = self._extend( + rlen, size, self.start2, self.end2, self.strand2 + ) + + def __str__(self): + args = ( + self.seqid1, + self.start1 - 1, + self.end1, + self.seqid2, + self.start2 - 1, + self.end2, + self.accn, + self.score, + self.strand1, + self.strand2, + ) + return "\t".join(str(x) for x in args) + + @property + def bedline(self): + assert self.seqid1 == self.seqid2 + assert self.start1 <= self.end2 + args = (self.seqid1, self.start1 - 1, self.end2, self.accn) + return "\t".join(str(x) for x in args) + + +class BedEvaluate(object): + def __init__(self, TPbed, FPbed, FNbed, TNbed): + self.TP = Bed(TPbed).sum(unique=True) + self.FP = Bed(FPbed).sum(unique=True) + self.FN = Bed(FNbed).sum(unique=True) + self.TN = Bed(TNbed).sum(unique=True) + + def __str__(self): + from jcvi.utils.table import tabulate + + table = { + ("Prediction-True", "Reality-True"): self.TP, + ("Prediction-True", "Reality-False"): self.FP, + ("Prediction-False", "Reality-True"): self.FN, + ("Prediction-False", "Reality-False"): self.TN, + } + msg = str(tabulate(table)) + + msg += "\nSensitivity [TP / (TP + FN)]: {0:.1f} %\n".format( + self.sensitivity * 100 + ) + msg += "Specificity [TP / (TP + FP)]: {0:.1f} %\n".format( + self.specificity * 100 + ) + msg += "Accuracy [(TP + TN) / (TP + FP + FN + TN)]: {0:.1f} %".format( + self.accuracy * 100 + ) + return msg + + @property + def sensitivity(self): + if self.TP + self.FN == 0: + return 0 + return self.TP * 1.0 / (self.TP + self.FN) + + @property + def specificity(self): + if self.TP + self.FP == 0: + return 0 + return self.TP * 1.0 / (self.TP + self.FP) + + @property + def accuracy(self): + if self.TP + self.FP + self.FN + self.TN == 0: + return 0 + return (self.TP + self.TN) * 1.0 / (self.TP + self.FP + self.FN + self.TN) + + @property + def score(self): + return "|".join( + ( + "{0:.3f}".format(x) + for x in (self.sensitivity, self.specificity, self.accuracy) + ) + ) + + +class BedSummary(object): + def __init__(self, bed): + mspans = [(x.span, x.accn) for x in bed] + spans, accns = zip(*mspans) + self.mspans = mspans + self.stats = SummaryStats(spans) + self.nseqids = len(set(x.seqid for x in bed)) + self.nfeats = len(bed) + self.total_bases = bed_sum(bed, unique=False) + self.unique_bases = bed_sum(bed) + self.coverage = self.total_bases * 1.0 / self.unique_bases + + def report(self): + print("Total seqids: {0}".format(self.nseqids), file=sys.stderr) + print("Total ranges: {0}".format(self.nfeats), file=sys.stderr) + print( + "Total unique bases: {0} bp".format(thousands(self.unique_bases)), + file=sys.stderr, + ) + print( + "Total bases: {0} bp".format(thousands(self.total_bases)), file=sys.stderr + ) + print("Estimated coverage: {0:.1f}x".format(self.coverage), file=sys.stderr) + print(self.stats, file=sys.stderr) + maxspan, maxaccn = max(self.mspans) + minspan, minaccn = min(self.mspans) + print("Longest: {0} ({1})".format(maxaccn, maxspan), file=sys.stderr) + print("Shortest: {0} ({1})".format(minaccn, minspan), file=sys.stderr) + + def __str__(self): + return "\t".join(str(x) for x in (self.nfeats, self.unique_bases)) + + +def bed_sum(beds, seqid=None, unique=True): + if seqid: + ranges = [(x.seqid, x.start, x.end) for x in beds if x.seqid == seqid] + else: + ranges = [(x.seqid, x.start, x.end) for x in beds] + + unique_sum = range_union(ranges) + raw_sum = sum(x.span for x in beds) + return unique_sum if unique else raw_sum + + +def main(): + actions = ( + ("bedpe", "convert to bedpe format"), + ("bins", "bin bed lengths into each window"), + ("chain", "chain bed segments together"), + ("closest", "find closest BED feature"), + ("density", "calculates density of features per seqid"), + ("depth", "calculate average depth per feature using coverageBed"), + ("distance", "calculate distance between bed features"), + ("evaluate", "make truth table and calculate sensitivity and specificity"), + ("filter", "filter bedfile to retain records between size range"), + ("filterbedgraph", "filter bedgraph to extract unique regions"), + ("fix", "fix non-standard bed files"), + ("flanking", "get n flanking features for a given position"), + ("format", "reformat BED file"), + ("gaps", "define gaps in BED file using complementBed"), + ("index", "index bed file using tabix"), + ("juncs", "trim junctions.bed overhang to get intron, merge multiple beds"), + ("longest", "select longest feature within overlapping piles"), + ("mates", "print paired reads from bedfile"), + ("merge", "merge bed files"), + ("mergebydepth", "returns union of features beyond certain depth"), + ("pairs", "estimate insert size between paired reads from bedfile"), + ("pile", "find the ids that intersect"), + ("random", "extract a random subset of features"), + ("refine", "refine bed file using a second bed file"), + ("sample", "sample bed file and remove high-coverage regions"), + ("seqids", "print out all seqids on one line"), + ("sizes", "infer the sizes for each seqid"), + ("some", "get a subset of bed features given a list"), + ("sort", "sort bed file"), + ("summary", "summarize the lengths of the intervals"), + ("tiling", "compute the minimum tiling path"), + ("uniq", "remove overlapping features with higher scores"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def gaps(args): + """ + %prog gaps bedfile reference.fasta + + This is used to define gaps in BED file using complementBed. One use case is + to define gaps in a BED file that was derived from a pairwise BLAST, for + example between two genomes. The reference.fasta is the reference genome. + The bedfile contains 'covered' features by BLAST hits, while the output + bedfile will contain 'uncovered' (i.e. gap) features, in that case use + --missing to note if gap is missing in one or more seqids. + """ + from pybedtools import BedTool + + p = OptionParser(gaps.__doc__) + p.add_argument( + "--na_in", + help="Add '_na_in_xxx' to gap name, use comma to separate, " + + "e.g. --na_in=chr1,chr2 to note if gap is missing in chr1 or " + + "chr2, default is to not add anything. Note that if one of the " + + "missing seqids happens to be the seqid of the current feature, " + + "it will not be reported.", + ) + p.add_argument("--minsize", default=1000, type=int, help="Minimum gap size") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + inputbed, ref_fasta = args + ref_sizes = Sizes(ref_fasta).mapping + minsize = opts.minsize + fw = must_open(opts.outfile, "w") + na_in = set(opts.na_in.split(",")) if opts.na_in else set() + comp = BedTool(inputbed).complement(genome=ref_fasta, L=True, stream=True) + n_gaps = 0 + all_gaps = defaultdict(list) + for f in comp: + seqid = f[0] + start = f[1] + end = f[2] + size = int(end) - int(start) + if size < minsize: + continue + all_gaps[seqid].append(size) + gap_name = f"{seqid}_{start}_L{size}" + miss = "_".join(na_in - set([seqid])) + if miss: + gap_name += f"_na_in_{miss}" + print("\t".join((seqid, start, end, gap_name)), file=fw) + n_gaps += 1 + for seqid, gap_sizes in all_gaps.items(): + total_gap_size = sum(gap_sizes) + logger.debug( + "Total gaps in %s: %d, %s", + seqid, + len(gap_sizes), + percentage(total_gap_size, ref_sizes[seqid]), + ) + + +def closest(args): + """ + %prog closest input.bed features.bed + + Find the closest feature in `features.bed` to `input.bed`. + `features.bed` must be sorted using `jcvi.formats.bed sort`. + """ + from pybedtools import BedTool + + p = OptionParser(closest.__doc__) + p.add_argument("--maxdist", default=5000, help="Maximum distance") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + inputbed, featuresbed = args + maxdist = opts.maxdist + sort([inputbed, "-i"]) + inputs = BedTool(inputbed) + features = BedTool(featuresbed) + nearby = inputs.closest(features, d=True, t="first", stream=True) + accn_column = inputs.field_count() + features.field_count() - 3 + for f in nearby: + seqid = f[0] + start = f[1] + end = f[2] + accn = f[3] + feat = f[accn_column].split(":")[0] + dist = int(f[-1]) + if dist > maxdist: + feat = "." + print("\t".join((seqid, start, end, "{}:{}".format(accn, feat)))) + + +def format(args): + """ + %prog format input.bed + + Re-format BED file, e.g. switch sequence ids. + """ + p = OptionParser(format.__doc__) + p.add_argument("--chrprefix", help="Add prefix to seqid") + p.add_argument("--prefix", help="Add prefix to name column (4th)") + p.add_argument("--switch", help="Switch seqids based on two-column file") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + switch = DictFile(opts.switch, delimiter="\t") if opts.switch else None + prefix = opts.prefix + chrprefix = opts.chrprefix + bed = Bed(bedfile) + with must_open(opts.outfile, "w") as fw: + for b in bed: + if prefix: + b.accn = prefix + b.accn + if chrprefix: + b.seqid = chrprefix + b.seqid + if switch and b.seqid in switch: + b.seqid = switch[b.seqid] + print(b, file=fw) + + +def filterbedgraph(args): + """ + %prog filterbedgraph a.bedgraph 1 + + Filter the bedGraph, typically from the gem-mappability pipeline. Unique + regions are 1, two copies .5, etc. + """ + p = OptionParser(filterbedgraph.__doc__) + _, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedgraphfile, cutoff = args + c = float(cutoff) + fp = open(bedgraphfile) + pf = bedgraphfile.rsplit(".", 1)[0] + filteredbed = pf + ".filtered-{}.bed".format(cutoff) + fw = open(filteredbed, "w") + nfiltered = ntotal = 0 + for row in fp: + b = BedLine(row) + ntotal += 1 + if float(b.accn) >= c: + print(b, file=fw) + nfiltered += 1 + fw.close() + logger.debug( + "A total of %s intervals (score >= %.2f) written to `%s`", + percentage(nfiltered, ntotal), + cutoff, + filteredbed, + ) + + mergeBed(filteredbed, sorted=True, delim=None) + + +def tiling(args): + """ + %prog tiling bedfile + + Compute minimum tiling path using as few clones as possible. Implemented + with dynamic programming. Greedy algorithm may also work according a + stackoverflow source. + """ + p = OptionParser(tiling.__doc__) + p.add_argument( + "--overlap", + default=3000, + type=int, + help="Minimum amount of overlaps required", + ) + p.set_verbose() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + ov = opts.overlap + + bed = Bed(bedfile) + inf = len(bed) + selected = Bed() + for seqid, sbed in bed.sub_beds(): + g = Grouper() + current = sbed[0] + # Partition connected features + for a in sbed: + g.join(a) + # requires a real overlap + if a.start < current.end - ov: + g.join(a, current) + if a.end > current.end: + current = a + + # Process per partition + for gbed in g: + end = max(x.end for x in gbed) + gbed.sort(key=lambda x: (x.start, -x.end)) + entries = len(gbed) + counts = [inf] * entries + counts[0] = 1 + traceback = [-1] * entries + for i, a in enumerate(gbed): + for j in range(i + 1, entries): + b = gbed[j] + if b.start >= a.end - ov: + break + # Two ranges overlap! + if counts[i] + 1 < counts[j]: + counts[j] = counts[i] + 1 + traceback[j] = i + endi = [i for i, a in enumerate(gbed) if a.end == end] + last = min((traceback[i], i) for i in endi)[1] + chain = [] + while last != -1: + chain.append(last) + last = traceback[last] + chain = chain[::-1] + selected.extend([gbed[x] for x in chain]) + + if opts.verbose: + print(counts) + print(traceback) + print(chain) + print("\n".join(str(x) for x in gbed)) + print("*" * 30) + print("\n".join(str(gbed[x]) for x in chain)) + print() + + tilingbedfile = bedfile.rsplit(".", 1)[0] + ".tiling.bed" + selected.print_to_file(filename=tilingbedfile, sorted=True) + logger.debug( + "A total of %d tiling features written to `%s`", len(selected), tilingbedfile + ) + + +def chain(args): + """ + %prog chain bedfile + + Chain BED segments together. + """ + p = OptionParser(chain.__doc__) + p.add_argument("--dist", default=100000, help="Chaining distance") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + cmd = "sort -k4,4 -k1,1 -k2,2n -k3,3n {0} -o {0}".format(bedfile) + sh(cmd) + bed = Bed(bedfile, sorted=False) + newbed = Bed() + for accn, bb in groupby(bed, key=lambda x: x.accn): + bb = list(bb) + g = Grouper() + for a in bb: + g.join(a) + for a, b in pairwise(bb): + if a.seqid == b.seqid and b.start - a.end < opts.dist: + g.join(a, b) + data = [] + for p in g: + seqid = p[0].seqid + start = min(x.start for x in p) + end = max(x.end for x in p) + score = sum(x.span for x in p) + data.append((seqid, start - 1, end, accn, score)) + + d = max(data, key=lambda x: x[-1]) + newbed.append(BedLine("\t".join(str(x) for x in d))) + + newbed.print_to_file(opts.outfile, sorted=True) + + +def density(args): + """ + %prog density bedfile ref.fasta + + Calculates density of features per seqid. + """ + p = OptionParser(density.__doc__) + _, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, fastafile = args + bed = Bed(bedfile) + sizes = Sizes(fastafile).mapping + header = "seqid features size density_per_Mb".split() + print("\t".join(header)) + for seqid, bb in bed.sub_beds(): + nfeats = len(bb) + size = sizes[seqid] + ds = nfeats * 1e6 / size + print("\t".join(str(x) for x in (seqid, nfeats, size, "{0:.1f}".format(ds)))) + + +def sfa_to_fq(sfa, qvchar): + fq = sfa.rsplit(".", 1)[0] + ".fq" + fp = must_open(sfa) + fw = must_open(fq, "w") + total = 0 + for row in fp: + total += 1 + name, seq = row.split() + qual = len(seq) * qvchar + print("\n".join(("@" + name, seq, "+", qual)), file=fw) + logger.debug("A total of %d sequences written to `%s`.", total, fq) + return fq + + +def filter_bedpe(bedpe, filtered, ref, rc=False, rlen=None, minlen=2000, maxlen=8000): + tag = " after RC" if rc else "" + logger.debug( + "Filter criteria: innie%s, %d <= insertsize <= %d", tag, minlen, maxlen + ) + sizes = Sizes(ref).mapping + fp = must_open(bedpe) + fw = must_open(filtered, "w") + retained = total = 0 + for row in fp: + b = BedpeLine(row) + total += 1 + if rc: + b.rc() + if not b.is_innie: + continue + b.score = b.outerdist + if not minlen <= b.score <= maxlen: + continue + retained += 1 + if rlen: + b.extend(rlen, sizes[b.seqid1]) + print(b, file=fw) + logger.debug( + "A total of %d mates written to `%s`.", percentage(retained, total), filtered + ) + fw.close() + + +def rmdup_bedpe(filtered, rmdup, dupwiggle=10): + sortedfiltered = filtered + ".sorted" + if need_update(filtered, sortedfiltered): + sh("sort -k1,1 -k2,2n -i {0} -o {1}".format(filtered, sortedfiltered)) + + logger.debug("Rmdup criteria: wiggle <= %d", dupwiggle) + fp = must_open(sortedfiltered) + fw = must_open(rmdup, "w") + data = [BedpeLine(x) for x in fp] + retained = total = 0 + for _, ss in groupby(data, key=lambda x: x.seqid1): + ss = list(ss) + for i, a in enumerate(ss): + if a.isdup: + continue + for b in ss[i + 1 :]: + if b.start1 > a.start1 + dupwiggle: + break + if b.isdup: + continue + if ( + a.seqid2 == b.seqid2 + and a.start2 - dupwiggle <= b.start2 <= a.start2 + dupwiggle + ): + b.isdup = True + for a in ss: + total += 1 + if a.isdup: + continue + retained += 1 + print(a, file=fw) + logger.debug( + "A total of %s mates written to `%s`.", percentage(retained, total), rmdup + ) + fw.close() + + +def seqids(args): + """ + %prog seqids bedfile + + Print out all seqids on one line. Useful for graphics.karyotype. + """ + p = OptionParser(seqids.__doc__) + p.add_argument("--maxn", default=100, type=int, help="Maximum number of seqids") + p.add_argument("--prefix", help="Seqids must start with") + p.add_argument("--exclude", default="random", help="Seqids should not contain") + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + pf = opts.prefix + exclude = opts.exclude + bed = Bed(bedfile) + s = bed.seqids + if pf: + s = [x for x in s if x.startswith(pf)] + if exclude: + s = [x for x in s if exclude not in x] + s = s[: opts.maxn] + print(",".join(s)) + + +def juncs(args): + """ + %prog junctions junctions1.bed [junctions2.bed ...] + + Given a TopHat junctions.bed file, trim the read overhang to get intron span + + If more than one junction bed file is provided, uniq the junctions and + calculate cumulative (sum) junction support + """ + from tempfile import mkstemp + from pybedtools import BedTool + + p = OptionParser(juncs.__doc__) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + fh, trimbed = mkstemp(suffix=".bed") + fw = must_open(trimbed, "w") + for i, juncbed in enumerate(args): + bed = Bed(juncbed, juncs=True) + for b in bed: + ovh = [int(x) for x in b.extra[-2].split(",")] + b.start += ovh[0] + b.end -= ovh[1] + b.accn = "{0}-{1}".format(b.accn, i) + b.extra = None + print(b, file=fw) + fw.close() + + if len(args) > 1: + sh("sort -k1,1 -k2,2n {0} -o {0}".format(trimbed)) + + tbed = BedTool(trimbed) + grouptbed = tbed.groupby(g=[1, 2, 3, 6], c=5, ops=["sum"]) + + cmd = """awk -F $'\t' 'BEGIN { OFS = FS } { ID = sprintf("mJUNC%07d", NR); print $1,$2,$3,ID,$5,$4; }'""" + infile = grouptbed.fn + sh(cmd, infile=infile, outfile=opts.outfile) + else: + sort([trimbed, "-o", opts.outfile]) + + os.unlink(trimbed) + + +def random(args): + """ + %prog random bedfile number_of_features + + Extract a random subset of features. Number of features can be an integer + number, or a fractional number in which case a random fraction (for example + 0.1 = 10% of all features) will be extracted. + """ + from random import sample + from jcvi.formats.base import flexible_cast + + p = OptionParser(random.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, N = args + assert is_number(N) + + b = Bed(bedfile) + NN = flexible_cast(N) + if NN < 1: + NN = int(round(NN * len(b))) + + beds = sample(b, NN) + new_bed = Bed() + new_bed.extend(beds) + + outfile = bedfile.rsplit(".", 1)[0] + ".{0}.bed".format(N) + new_bed.print_to_file(outfile) + logger.debug("Write %d features to `%s`", NN, outfile) + + +def filter(args): + """ + %prog filter bedfile + + Filter the bedfile to retain records between certain size range. + """ + p = OptionParser(filter.__doc__) + p.add_argument("--minsize", default=0, type=int, help="Minimum feature length") + p.add_argument( + "--maxsize", default=1000000000, type=int, help="Minimum feature length" + ) + p.add_argument( + "--minaccn", + type=int, + help="Minimum value of accn, useful to filter based on coverage", + ) + p.add_argument("--minscore", type=int, help="Minimum score") + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + fp = must_open(bedfile) + fw = must_open(opts.outfile, "w") + minsize, maxsize = opts.minsize, opts.maxsize + minaccn = opts.minaccn + minscore = opts.minscore + total = [] + keep = [] + for row in fp: + try: + b = BedLine(row) + except IndexError: + print(row.strip(), file=fw) + continue + span = b.span + total.append(span) + if not minsize <= span <= maxsize: + continue + if minaccn and int(b.accn) < minaccn: + continue + if minscore and int(b.score) < minscore: + continue + print(b, file=fw) + keep.append(span) + + logger.debug("Stats: %s features kept.", percentage(len(keep), len(total))) + logger.debug("Stats: %s bases kept.", percentage(sum(keep), sum(total))) + + +def make_bedgraph(bedfile, fastafile): + sizesfile = Sizes(fastafile).filename + pf = bedfile.rsplit(".", 1)[0] + bedfile = sort([bedfile]) + bedgraph = pf + ".bedgraph" + if need_update(bedfile, bedgraph): + cmd = "genomeCoverageBed" + cmd += " -i {0} -g {1} -bga".format(bedfile, sizesfile) + sh(cmd, outfile=bedgraph) + + return bedgraph + + +def mergebydepth(args): + """ + %prog mergebydepth reads.bed genome.fasta + + Similar to mergeBed, but only returns regions beyond certain depth. + """ + p = OptionParser(mergebydepth.__doc__) + p.add_argument("--mindepth", default=3, type=int, help="Minimum depth required") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, fastafile = args + mindepth = opts.mindepth + bedgraph = make_bedgraph(bedfile, fastafile) + + bedgraphfiltered = bedgraph + ".d{0}".format(mindepth) + if need_update(bedgraph, bedgraphfiltered): + filter( + [ + bedgraph, + "--minaccn={0}".format(mindepth), + "--outfile={0}".format(bedgraphfiltered), + ] + ) + + merged = bedgraphfiltered + ".merge.fasta" + if need_update(bedgraphfiltered, merged): + mergeBed(bedgraphfiltered, sorted=True) + + +def depth(args): + """ + %prog depth reads.bed features.bed + + Calculate depth depth per feature using coverageBed. + """ + p = OptionParser(depth.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + readsbed, featsbed = args + fp = open(featsbed) + nargs = len(fp.readline().split("\t")) + keepcols = ",".join(str(x) for x in range(1, nargs + 1)) + cmd = "coverageBed -a {0} -b {1} -d".format(readsbed, featsbed) + cmd += " | groupBy -g {0} -c {1} -o mean".format(keepcols, nargs + 2) + sh(cmd, outfile=opts.outfile) + + +def remove_isoforms(ids): + """ + This is more or less a hack to remove the GMAP multiple mappings. Multiple + GMAP mappings can be seen given the names .mrna1, .mrna2, etc. + """ + key = lambda x: x.rsplit(".", 1)[0] + iso_number = lambda x: get_number(x.split(".")[-1]) + ids = sorted(ids, key=key) + newids = [] + for k, ii in groupby(ids, key=key): + min_i = min(list(ii), key=iso_number) + newids.append(min_i) + return newids + + +def longest(args): + """ + %prog longest bedfile fastafile + + Select longest feature within overlapping piles. + """ + from jcvi.formats.sizes import Sizes + + p = OptionParser(longest.__doc__) + p.add_argument("--maxsize", default=20000, type=int, help="Limit max size") + p.add_argument("--minsize", default=60, type=int, help="Limit min size") + p.add_argument( + "--precedence", default="Medtr", help="Accessions with prefix take precedence" + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, fastafile = args + maxsize = opts.maxsize + minsize = opts.minsize + prec = opts.precedence + mergedbed = mergeBed(bedfile, nms=True) + sizes = Sizes(fastafile).mapping + bed = Bed(mergedbed) + + pf = bedfile.rsplit(".", 1)[0] + ids = set() + for b in bed: + accns = b.accn.split(";") + prec_accns = [x for x in accns if x.startswith(prec)] + if prec_accns: + accns = prec_accns + accn_sizes = [(sizes.get(x, 0), x) for x in accns] + accn_sizes = [(size, x) for size, x in accn_sizes if size < maxsize] + if not accn_sizes: + continue + max_size, max_accn = max(accn_sizes) + if max_size < minsize: + continue + ids.add(max_accn) + + newids = remove_isoforms(ids) + logger.debug("Remove isoforms: before=%d after=%d", len(ids), len(newids)) + + longestidsfile = pf + ".longest.ids" + fw = open(longestidsfile, "w") + print("\n".join(newids), file=fw) + fw.close() + logger.debug("A total of %d records written to `%s`.", len(newids), longestidsfile) + + longestbedfile = pf + ".longest.bed" + some( + [ + bedfile, + longestidsfile, + "--outfile={0}".format(longestbedfile), + "--no_strip_names", + ] + ) + + +def merge(args): + """ + %prog merge bedfiles > newbedfile + + Concatenate bed files together. Performing seqid and name changes to avoid + conflicts in the new bed file. + """ + p = OptionParser(merge.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + bedfiles = args + fw = must_open(opts.outfile, "w") + for bedfile in bedfiles: + bed = Bed(bedfile) + pf = op.basename(bedfile).split(".")[0] + for b in bed: + b.seqid = "_".join((pf, b.seqid)) + print(b, file=fw) + + +def fix(args): + """ + %prog fix bedfile > newbedfile + + Fix non-standard bed files. One typical problem is start > end. + """ + p = OptionParser(fix.__doc__) + p.add_argument("--minspan", default=0, type=int, help="Enforce minimum span") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + minspan = opts.minspan + fp = open(bedfile) + fw = must_open(opts.outfile, "w") + nfixed = nfiltered = ntotal = 0 + for row in fp: + atoms = row.strip().split("\t") + assert len(atoms) >= 3, "Must be at least 3 columns" + seqid, start, end = atoms[:3] + start, end = int(start), int(end) + orientation = "+" + if start > end: + start, end = end, start + orientation = "-" + nfixed += 1 + + atoms[1:3] = [str(start), str(end)] + if len(atoms) > 6: + atoms[6] = orientation + line = "\t".join(atoms) + b = BedLine(line) + + if b.span >= minspan: + print(b, file=fw) + nfiltered += 1 + + ntotal += 1 + + if nfixed: + logger.debug("Total fixed: %s".format(percentage(nfixed, ntotal))) + if nfiltered: + logger.debug("Total filtered: %s".format(percentage(nfiltered, ntotal))) + + +def some(args): + """ + %prog some bedfile idsfile > newbedfile + + Retrieve a subset of bed features given a list of ids. + """ + from jcvi.formats.base import SetFile + from jcvi.utils.cbook import gene_name + + p = OptionParser(some.__doc__) + p.add_argument( + "-v", + dest="inverse", + default=False, + action="store_true", + help="Get the inverse, like grep -v", + ) + p.set_outfile() + p.set_stripnames() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, idsfile = args + inverse = opts.inverse + ostrip = opts.strip_names + fw = must_open(opts.outfile, "w") + + ids = SetFile(idsfile) + if ostrip: + ids = set(gene_name(x) for x in ids) + bed = Bed(bedfile) + ntotal = nkeep = 0 + for b in bed: + ntotal += 1 + keep = b.accn in ids + if inverse: + keep = not keep + + if keep: + nkeep += 1 + print(b, file=fw) + + fw.close() + logger.debug("Stats: %s features kept.".format(percentage(nkeep, ntotal))) + + +def uniq(args): + """ + %prog uniq bedfile + + Remove overlapping features with higher scores. + """ + from jcvi.formats.sizes import Sizes + + p = OptionParser(uniq.__doc__) + p.add_argument("--sizes", help="Use sequence length as score") + p.add_argument( + "--mode", default="span", choices=("span", "score"), help="Pile mode" + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + uniqbedfile = bedfile.split(".")[0] + ".uniq.bed" + bed = Bed(bedfile) + + if opts.sizes: + sizes = Sizes(opts.sizes).mapping + ranges = [ + Range(x.seqid, x.start, x.end, sizes[x.accn], i) for i, x in enumerate(bed) + ] + else: + if opts.mode == "span": + ranges = [ + Range(x.seqid, x.start, x.end, x.end - x.start + 1, i) + for i, x in enumerate(bed) + ] + else: + ranges = [ + Range(x.seqid, x.start, x.end, float(x.score), i) + for i, x in enumerate(bed) + ] + + selected, score = range_chain(ranges) + selected = [x.id for x in selected] + selected_ids = set(selected) + selected = [bed[x] for x in selected] + notselected = [x for i, x in enumerate(bed) if i not in selected_ids] + + newbed = Bed() + newbed.extend(selected) + newbed.print_to_file(uniqbedfile, sorted=True) + + if notselected: + leftoverfile = bedfile.split(".")[0] + ".leftover.bed" + leftoverbed = Bed() + leftoverbed.extend(notselected) + leftoverbed.print_to_file(leftoverfile, sorted=True) + + logger.debug("Imported: %d, Exported: %d", len(bed), len(newbed)) + + return uniqbedfile + + +def subtractbins(binfile1, binfile2): + from jcvi.graphics.landscape import BinFile + + abin = BinFile(binfile1) + bbin = BinFile(binfile2) + + assert len(abin) == len(bbin) + + fw = open(binfile1, "w") + + for a, b in zip(abin, bbin): + assert a.chr == b.chr + assert a.binlen == b.binlen + + a.subtract(b) + print(a, file=fw) + + fw.close() + + return binfile1 + + +def get_nbins(clen: int, shift: int) -> Tuple[int, int]: + """ + Get the number of bins for a given chromosome length and shift. + """ + nbins, last_bin = divmod(clen, shift) + if last_bin: + nbins += 1 + return nbins, last_bin + + +def bins(args): + """ + %prog bins bedfile fastafile + + Bin bed lengths into each consecutive window. Use --subtract to remove bases + from window, e.g. --subtract gaps.bed ignores the gap sequences. + """ + + p = OptionParser(bins.__doc__) + p.add_argument("--binsize", default=100000, type=int, help="Size of the bins") + p.add_argument("--subtract", help="Subtract bases from window") + p.add_argument( + "--mode", + default="span", + choices=("span", "count", "score"), + help="Accumulate feature based on", + ) + p.add_argument( + "--nomerge", default=False, action="store_true", help="Do not merge features" + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, fastafile = args + subtract = opts.subtract + mode = opts.mode + assert op.exists(bedfile), "File `{0}` not found".format(bedfile) + + binsize = opts.binsize + binfile = bedfile + ".{0}".format(binsize) + binfile += ".{0}.bins".format(mode) + + if not need_update(bedfile, binfile): + return binfile + + sz = Sizes(fastafile) + sizesfile = sz.filename + sizes = sz.mapping + fw = open(binfile, "w") + scores = "median" if mode == "score" else None + if not opts.nomerge: + bedfile = mergeBed(bedfile, nms=True, scores=scores) + if subtract: + subtractmerge = mergeBed(subtract) + subtract_complement = complementBed(subtractmerge, sizesfile) + bedfile = intersectBed(bedfile, subtract_complement) + + bedfile = sort([bedfile, "-i"]) + + bed = Bed(bedfile) + sbdict = dict(bed.sub_beds()) + for chr, chr_len in sorted(sizes.items()): + chr_len = sizes[chr] + subbeds = sbdict.get(chr, []) + nbins, last_bin = get_nbins(chr_len, binsize) + + a = np.zeros(nbins) # values + b = np.zeros(nbins, dtype=int) # bases + c = np.zeros(nbins, dtype=int) # count + b[:-1] = binsize + b[-1] = last_bin + + for bb in subbeds: + start, end = bb.start, bb.end + startbin = start // binsize + endbin = end // binsize + + assert startbin <= endbin + c[startbin : endbin + 1] += 1 + + if mode == "score": + a[startbin : endbin + 1] += float(bb.score) + + elif mode == "span": + if startbin == endbin: + a[startbin] += end - start + 1 + + if startbin < endbin: + firstsize = (startbin + 1) * binsize - start + 1 + lastsize = end - endbin * binsize + a[startbin] += firstsize + if startbin + 1 < endbin: + a[startbin + 1 : endbin] += binsize + a[endbin] += lastsize + + if mode == "count": + a = c + + for xa, xb in zip(a, b): + print("\t".join(str(x) for x in (chr, xa, xb)), file=fw) + + fw.close() + + if subtract: + subtractbinfile = bins([subtract, fastafile, "--binsize={0}".format(binsize)]) + binfile = subtractbins(binfile, subtractbinfile) + + return binfile + + +def pile(args): + """ + %prog pile abedfile bbedfile > piles + + Call intersectBed on two bedfiles. + """ + from jcvi.utils.grouper import Grouper + + p = OptionParser(pile.__doc__) + p.add_argument("--minOverlap", default=0, type=int, help="Minimum overlap required") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + abedfile, bbedfile = args + iw = intersectBed_wao(abedfile, bbedfile, minOverlap=opts.minOverlap) + groups = Grouper() + for a, b in iw: + groups.join(a.accn, b.accn) + + ngroups = 0 + for group in groups: + if len(group) > 1: + ngroups += 1 + print("|".join(group)) + + logger.debug("A total of %d piles (>= 2 members)", ngroups) + + +def index(args): + """ + %prog index bedfile + + Compress and index bedfile using `tabix`. Use --fasta to give a FASTA file + so that a bedgraph file can be generated and indexed. + """ + p = OptionParser(index.__doc__) + p.add_argument("--fasta", help="Generate bedgraph and index") + p.add_argument("--query", help="Chromosome location") + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + fastafile = opts.fasta + if fastafile: + bedfile = make_bedgraph(bedfile, fastafile) + + bedfile = sort([bedfile]) + + gzfile = bedfile + ".gz" + if need_update(bedfile, gzfile): + cmd = "bgzip {0}".format(bedfile) + sh(cmd) + + tbifile = gzfile + ".tbi" + if need_update(gzfile, tbifile): + cmd = "tabix -p bed {0}".format(gzfile) + sh(cmd) + + query = opts.query + if not query: + return + + cmd = "tabix {0} {1}".format(gzfile, query) + sh(cmd, outfile=opts.outfile) + + +def fastaFromBed(bedfile, fastafile, name=False, tab=False, stranded=False): + suffix = ".sfa" if tab else ".fasta" + outfile = op.basename(bedfile).rsplit(".", 1)[0] + suffix + cmd = "fastaFromBed -fi {0} -bed {1} -fo {2}".format(fastafile, bedfile, outfile) + if name: + cmd += " -name" + if tab: + cmd += " -tab" + if stranded: + cmd += " -s" + + if need_update([bedfile, fastafile], outfile): + sh(cmd, outfile=outfile) + + return outfile + + +def mergeBed( + bedfile: str, + d: int = 0, + sorted: bool = False, + nms: bool = False, + s: bool = False, + scores: Optional[str] = None, + delim: str = ";", + inplace: bool = False, +): + if not sorted: + bedfile = sort([bedfile, "-i"]) + cmd = "mergeBed -i {0}".format(bedfile) + if d: + cmd += " -d {0}".format(d) + if nms: + nargs = len(open(bedfile).readline().split()) + if nargs <= 3: + logger.debug("Only %d columns detected... set nms=True", nargs) + else: + cmd += " -c 4 -o collapse" + if s: + cmd += " -s" + if scores: + valid_opts = ( + "sum", + "min", + "max", + "mean", + "median", + "mode", + "antimode", + "collapse", + ) + if scores not in valid_opts: + scores = "mean" + cmd += " -scores {0}".format(scores) + + if nms and delim: + cmd += ' -delim "{0}"'.format(delim) + + pf = bedfile.rsplit(".", 1)[0] if bedfile.endswith(".bed") else bedfile + mergebedfile = op.basename(pf) + ".merge.bed" + + if need_update(bedfile, mergebedfile): + sh(cmd, outfile=mergebedfile) + + if inplace: + shutil.move(mergebedfile, bedfile) + return mergebedfile + + +def complementBed(bedfile, sizesfile): + cmd = "complementBed" + cmd += " -i {0} -g {1}".format(bedfile, sizesfile) + complementbedfile = "complement_" + op.basename(bedfile) + + if need_update([bedfile, sizesfile], complementbedfile): + sh(cmd, outfile=complementbedfile) + return complementbedfile + + +def intersectBed(bedfile1, bedfile2): + cmd = "intersectBed" + cmd += " -a {0} -b {1}".format(bedfile1, bedfile2) + suffix = ".intersect.bed" + + intersectbedfile = ( + ".".join( + (op.basename(bedfile1).split(".")[0], op.basename(bedfile2).split(".")[0]) + ) + + suffix + ) + + if need_update([bedfile1, bedfile2], intersectbedfile): + sh(cmd, outfile=intersectbedfile) + return intersectbedfile + + +def query_to_range(query, sizes): + # chr1:1-10000 => (chr1, 0, 10000) + if ":" in query: + a, bc = query.split(":", 1) + b, c = [int(x) for x in bc.split("-", 1)] + b -= 1 + else: + a = query + b, c = 0, sizes.mapping[a] + + return a, b, c + + +def evaluate(args): + """ + %prog evaluate prediction.bed reality.bed fastafile + + Make a truth table like: + True False --- Reality + True TP FP + False FN TN + |----Prediction + + Sn = TP / (all true in reality) = TP / (TP + FN) + Sp = TP / (all true in prediction) = TP / (TP + FP) + Ac = (TP + TN) / (TP + FP + FN + TN) + """ + from jcvi.formats.sizes import Sizes + + p = OptionParser(evaluate.__doc__) + p.add_argument("--query", help="Chromosome location") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + prediction, reality, fastafile = args + query = opts.query + prediction = mergeBed(prediction) + reality = mergeBed(reality) + sizes = Sizes(fastafile) + sizesfile = sizes.filename + + prediction_complement = complementBed(prediction, sizesfile) + reality_complement = complementBed(reality, sizesfile) + + TPbed = intersectBed(prediction, reality) + FPbed = intersectBed(prediction, reality_complement) + FNbed = intersectBed(prediction_complement, reality) + TNbed = intersectBed(prediction_complement, reality_complement) + beds = (TPbed, FPbed, FNbed, TNbed) + + if query: + subbeds = [] + rr = query_to_range(query, sizes) + ce = 'echo "{0}"'.format("\t".join(str(x) for x in rr)) + for b in beds: + subbed = ".".join((b, query)) + cmd = ce + " | intersectBed -a stdin -b {0}".format(b) + sh(cmd, outfile=subbed) + subbeds.append(subbed) + beds = subbeds + + be = BedEvaluate(*beds) + print(be, file=sys.stderr) + + if query: + cleanup(subbeds) + + return be + + +def intersectBed_wao(abedfile, bbedfile, minOverlap=0): + abed = Bed(abedfile) + bbed = Bed(bbedfile) + print("`{0}` has {1} features.".format(abedfile, len(abed)), file=sys.stderr) + print("`{0}` has {1} features.".format(bbedfile, len(bbed)), file=sys.stderr) + + cmd = "intersectBed -wao -a {0} -b {1}".format(abedfile, bbedfile) + acols = abed[0].nargs + bcols = bbed[0].nargs + fp = popen(cmd) + for row in fp: + atoms = row.split() + aline = "\t".join(atoms[:acols]) + bline = "\t".join(atoms[acols : acols + bcols]) + c = int(atoms[-1]) + if c < minOverlap: + continue + a = BedLine(aline) + try: + b = BedLine(bline) + except AssertionError: + b = None + + yield a, b + + +def refine(args): + """ + %prog refine bedfile1 bedfile2 refinedbed + + Refine bed file using a second bed file. The final bed is keeping all the + intervals in bedfile1, but refined by bedfile2 whenever they have + intersection. + """ + p = OptionParser(refine.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + abedfile, bbedfile, refinedbed = args + fw = open(refinedbed, "w") + intersected = refined = 0 + for a, b in intersectBed_wao(abedfile, bbedfile): + if b is None: + print(a, file=fw) + continue + + intersected += 1 + aspan_before = a.span + arange = (a.start, a.end) + brange = (b.start, b.end) + irange = range_intersect(arange, brange) + a.start, a.end = irange + aspan_after = a.span + if aspan_before > aspan_after: + refined += 1 + print(a, file=fw) + + fw.close() + print("Total intersected: {0}".format(intersected), file=sys.stderr) + print("Total refined: {0}".format(refined), file=sys.stderr) + summary([abedfile]) + summary([refinedbed]) + + +def distance(args): + """ + %prog distance bedfile + + Calculate distance between bed features. The output file is a list of + distances, which can be used to plot histogram, etc. + """ + p = OptionParser(distance.__doc__) + p.add_argument( + "--distmode", + default="ss", + choices=("ss", "ee"), + help="Distance mode between paired reads. ss is outer distance, " + "ee is inner distance", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + sortedbedfile = sort([bedfile]) + valid = total = 0 + fp = open(sortedbedfile) + for a, b in pairwise(fp): + a = BedLine(a) + b = BedLine(b) + ar = (a.seqid, a.start, a.end, "+") + br = (b.seqid, b.start, b.end, "+") + dist, oo = range_distance(ar, br, distmode=opts.distmode) + total += 1 + if dist > 0: + print(dist) + valid += 1 + + logger.debug("Total valid (> 0) distances: %s.", percentage(valid, total)) + + +def sample(args): + """ + %prog sample bedfile sizesfile + + Sample bed file and remove high-coverage regions. + + When option --targetsize is used, this program uses a differnent mode. It + first calculates the current total bases from all ranges and then compare to + targetsize, if more, then sample down as close to targetsize as possible. + + Selection via --raindrop has the effect of making coverage even. Selected + reads have the property that their end points are not within a certain + window from one another. One sweep goes from left to right, the other in + the reverse direction. + """ + import random + from jcvi.assembly.coverage import Coverage + + p = OptionParser(sample.__doc__) + p.add_argument( + "--raindrop", + default=0, + type=int, + help="Raindrop selection, ignores all other options", + ) + p.add_argument("--max", default=10, type=int, help="Max depth allowed") + p.add_argument( + "--targetsize", type=int, help="Sample bed file to get target base number" + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, sizesfile = args + pf = bedfile.rsplit(".", 1)[0] + raindrop = opts.raindrop + + # Raindrop method + if raindrop: + bed = Bed(bedfile) + forward = [] + for b in bed: + if not forward or abs(b.start - forward[-1].start) >= raindrop: + forward.append(b) + + reverse = [] + bed.sort(key=lambda x: -x.end) + for b in bed: + if not reverse or abs(b.end - reverse[-1].end) >= raindrop: + reverse.append(b) + + for tag, L in zip(("forward", "reverse"), (forward, reverse)): + logger.debug( + "Selected %d features in %s direction, span: %d", + len(L), + tag, + sum(x.span for x in L), + ) + + selected = Bed() + selected.extend(set(forward + reverse)) + selected.print_to_file(opts.outfile, sorted=True) + return + + targetsize = opts.targetsize + if targetsize: + bed = Bed(bedfile) + samplebed = pf + ".sample.bed" + fw = open(samplebed, "w") + nfeats = len(bed) + nbases = bed.sum(unique=False) + targetfeats = int(round(nfeats * targetsize / nbases)) + sub_bed = random.sample(bed, targetfeats) + for b in sub_bed: + print(b, file=fw) + + logger.debug("File written to `%s`.", samplebed) + return + + c = Coverage(bedfile, sizesfile) + coveragefile = c.filename + samplecoveragefile = pf + ".sample.coverage" + fw = open(samplecoveragefile, "w") + fp = open(coveragefile) + for row in fp: + seqid, start, end, cov = row.split() + cov = int(cov) + if cov <= opts.max: + fw.write(row) + fw.close() + + samplebedfile = pf + ".sample.bed" + cmd = "intersectBed -a {0} -b {1} -wa -u".format(bedfile, samplecoveragefile) + sh(cmd, outfile=samplebedfile) + logger.debug("Sampled bedfile written to `%s`.", samplebedfile) + + +def bedpe(args): + """ + %prog bedpe bedfile + + Convert to bedpe format. Use --span to write another bed file that contain + the span of the read pairs. + """ + from jcvi.assembly.coverage import bed_to_bedpe + + p = OptionParser(bedpe.__doc__) + p.add_argument( + "--span", default=False, action="store_true", help="Write span bed file" + ) + p.add_argument( + "--strand", default=False, action="store_true", help="Write the strand columns" + ) + p.add_argument("--mates", help="Check the library stats from .mates file") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + pf = bedfile.rsplit(".", 1)[0] + bedpefile = pf + ".bedpe" + bedspanfile = pf + ".spans.bed" if opts.span else None + bed_to_bedpe( + bedfile, + bedpefile, + pairsbedfile=bedspanfile, + matesfile=opts.mates, + strand=opts.strand, + ) + return bedpefile, bedspanfile + + +def sizes(args): + """ + %prog sizes bedfile + + Infer the sizes for each seqid. Useful before dot plots. + """ + p = OptionParser(sizes.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + assert op.exists(bedfile) + + sizesfile = bedfile.rsplit(".", 1)[0] + ".sizes" + + fw = must_open(sizesfile, "w", checkexists=True, skipcheck=True) + if fw: + b = Bed(bedfile) + for s, sbeds in b.sub_beds(): + print("{0}\t{1}".format(s, max(x.end for x in sbeds)), file=fw) + logger.debug("Sizes file written to `%s`.", sizesfile) + + return sizesfile + + +def analyze_dists(dists, cutoff=1000, alpha=0.1): + """ + The dists can show bimodal distribution if they come from a mate-pair + library. Assume bimodal distribution and then separate the two peaks. Based + on the percentage in each peak, we can decide if it is indeed one peak or + two peaks, and report the median respectively. + """ + peak0 = [d for d in dists if d < cutoff] + peak1 = [d for d in dists if d >= cutoff] + c0, c1 = len(peak0), len(peak1) + logger.debug("Component counts: %d %d", c0, c1) + if c0 == 0 or c1 == 0 or float(c1) / len(dists) < alpha: + logger.debug("Single peak identified (%d / %d < %.1f)", c1, len(dists), alpha) + return np.median(dists) + + peak0_median = np.median(peak0) + peak1_median = np.median(peak1) + logger.debug( + "Dual peaks identified: %dbp (%d), %dbp (%d) (selected)", + int(peak0_median), + c0, + int(peak1_median), + c1, + ) + + return peak1_median + + +def report_pairs( + data, + cutoff=0, + mateorientation=None, + pairsfile=None, + insertsfile=None, + rclip=1, + ascii=False, + bins=20, + distmode="ss", + mpcutoff=1000, +): + """ + This subroutine is used by the pairs function in blast.py and cas.py. + Reports number of fragments and pairs as well as linked pairs + """ + allowed_mateorientations = ("++", "--", "+-", "-+") + + if mateorientation: + assert mateorientation in allowed_mateorientations + + num_fragments, num_pairs = 0, 0 + + all_dist = [] + linked_dist = [] + # +- (forward-backward) is `innie`, -+ (backward-forward) is `outie` + orientations = defaultdict(int) + + # clip how many chars from end of the read name to get pair name + key = (lambda x: x.accn[:-rclip]) if rclip else (lambda x: x.accn) + data.sort(key=key) + + if pairsfile: + pairsfw = open(pairsfile, "w") + if insertsfile: + insertsfw = open(insertsfile, "w") + + for pe, lines in groupby(data, key=key): + lines = list(lines) + if len(lines) != 2: + num_fragments += len(lines) + continue + + num_pairs += 1 + a, b = lines + + asubject, astart, astop = a.seqid, a.start, a.end + bsubject, bstart, bstop = b.seqid, b.start, b.end + + aquery, bquery = a.accn, b.accn + astrand, bstrand = a.strand, b.strand + + dist, orientation = range_distance( + (asubject, astart, astop, astrand), + (bsubject, bstart, bstop, bstrand), + distmode=distmode, + ) + + if dist >= 0: + all_dist.append((dist, orientation, aquery, bquery)) + + # select only pairs with certain orientations - e.g. innies, outies, etc. + if mateorientation: + all_dist = [x for x in all_dist if x[1] == mateorientation] + + # try to infer cutoff as twice the median until convergence + if cutoff <= 0: + dists = np.array([x[0] for x in all_dist], dtype=int) + p0 = analyze_dists(dists, cutoff=mpcutoff) + cutoff = int(2 * p0) # initial estimate + cutoff = int(math.ceil(cutoff / bins)) * bins + logger.debug("Insert size cutoff set to %d, use '--cutoff' to override", cutoff) + + for dist, orientation, aquery, bquery in all_dist: + if dist > cutoff: + continue + if cutoff > 2 * mpcutoff and dist < mpcutoff: + continue + + linked_dist.append(dist) + if pairsfile: + print("{0}\t{1}\t{2}".format(aquery, bquery, dist), file=pairsfw) + orientations[orientation] += 1 + + print( + "{0} fragments, {1} pairs ({2} total)".format( + num_fragments, num_pairs, num_fragments + num_pairs * 2 + ), + file=sys.stderr, + ) + + s = SummaryStats(linked_dist, dtype=int) + num_links = s.size + + meandist, stdev = s.mean, s.sd + p0, p1, p2 = s.median, s.p1, s.p2 + + print( + "%d pairs (%.1f%%) are linked (cutoff=%d)" + % (num_links, num_links * 100.0 / num_pairs, cutoff), + file=sys.stderr, + ) + print( + "mean distance between mates: {0} +/- {1}".format(meandist, stdev), + file=sys.stderr, + ) + print("median distance between mates: {0}".format(p0), file=sys.stderr) + print("95% distance range: {0} - {1}".format(p1, p2), file=sys.stderr) + print("\nOrientations:", file=sys.stderr) + + orientation_summary = [] + for orientation, count in sorted(orientations.items()): + o = "{0}:{1}".format(orientation, percentage(count, num_links, mode=1)) + orientation_summary.append(o.split()[0]) + print(o, file=sys.stderr) + + if insertsfile: + from jcvi.graphics.histogram import histogram + + print("\n".join(str(x) for x in linked_dist), file=insertsfw) + insertsfw.close() + prefix = insertsfile.rsplit(".", 1)[0] + if prefix > 10: + prefix = prefix.split("-")[0] + osummary = " ".join(orientation_summary) + title = "{0} ({1}; median:{2} bp)".format(prefix, osummary, p0) + histogram( + insertsfile, + vmin=0, + vmax=cutoff, + bins=bins, + xlabel="Insertsize", + title=title, + ascii=ascii, + ) + cleanup(insertsfile) + + return s + + +def pairs(args): + """ + See __doc__ for OptionParser.set_pairs(). + """ + p = OptionParser(pairs.__doc__) + p.set_pairs() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + + basename = bedfile.split(".")[0] + insertsfile = ".".join((basename, "inserts")) + bedfile = sort([bedfile, "--accn"]) + + fp = open(bedfile) + data = [BedLine(row) for i, row in enumerate(fp) if i < opts.nrows] + + ascii = not opts.pdf + return ( + bedfile, + report_pairs( + data, + opts.cutoff, + opts.mateorientation, + pairsfile=opts.pairsfile, + insertsfile=insertsfile, + rclip=opts.rclip, + ascii=ascii, + bins=opts.bins, + distmode=opts.distmode, + ), + ) + + +def summary(args): + """ + %prog summary bedfile + + Sum the total lengths of the intervals. + """ + p = OptionParser(summary.__doc__) + p.add_argument( + "--sizes", default=False, action="store_true", help="Write .sizes file" + ) + p.add_argument( + "--all", + default=False, + action="store_true", + help="Write summary stats per seqid", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + bed = Bed(bedfile) + bs = BedSummary(bed) + if opts.sizes: + sizesfile = bedfile + ".sizes" + fw = open(sizesfile, "w") + for span, accn in bs.mspans: + print(span, file=fw) + fw.close() + logger.debug("Spans written to `%s`.", sizesfile) + return bs + + if not opts.all: + bs.report() + return bs + + for seqid, subbeds in bed.sub_beds(): + bs = BedSummary(subbeds) + print("\t".join((seqid, str(bs)))) + + +def sort(args): + """ + %prog sort bedfile + + Sort bed file to have ascending order of seqid, then start. It uses the + `sort` command. + """ + p = OptionParser(sort.__doc__) + p.add_argument( + "-i", + "--inplace", + dest="inplace", + default=False, + action="store_true", + help="Sort bed file in place", + ) + p.add_argument( + "-u", + dest="unique", + default=False, + action="store_true", + help="Uniqify the bed file", + ) + p.add_argument( + "--accn", + default=False, + action="store_true", + help="Sort based on the accessions", + ) + p.add_argument( + "--num", + default=False, + action="store_true", + help="Numerically sort seqid column, e.g. chr1,chr2,...", + ) + p.set_outfile(outfile=None) + p.set_tmpdir() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + inplace = opts.inplace + + if opts.num: + bed = Bed(bedfile) + bed.print_to_file(opts.outfile or "stdout", sorted=True) + return + + if not inplace and ".sorted." in bedfile: + return bedfile + + sortedbed = opts.outfile + if inplace: + sortedbed = bedfile + elif opts.outfile is None: + pf, sf = op.basename(bedfile).rsplit(".", 1) + sortedbed = pf + ".sorted." + sf + + sortopt = ( + "-k1,1 -k2,2n -k3,3n -k4,4" if not opts.accn else "-k4,4 -k1,1 -k2,2n -k3,3n" + ) + cmd = "sort" + if opts.tmpdir: + cmd += " -T {0}".format(opts.tmpdir) + if opts.unique: + cmd += " -u" + cmd += " {0} {1} -o {2}".format(sortopt, bedfile, sortedbed) + + if inplace or need_update(bedfile, sortedbed): + sh(cmd) + + return sortedbed + + +def mates(args): + """ + %prog mates bedfile + + Generate the mates file by inferring from the names. + """ + p = OptionParser(mates.__doc__) + p.add_argument( + "--lib", + default=False, + action="store_true", + help="Output library information along with pairs", + ) + p.add_argument( + "--nointra", + default=False, + action="store_true", + help="Remove mates that are intra-scaffold", + ) + p.add_argument( + "--prefix", + default=False, + action="store_true", + help="Only keep links between IDs with same prefix", + ) + p.set_mates() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + rclip = opts.rclip + + key = (lambda x: x.accn[:-rclip]) if rclip else (lambda x: x.accn) + bed = Bed(bedfile, key=key) + + pf = bedfile.rsplit(".", 1)[0] + matesfile = pf + ".mates" + lib = pf if opts.lib else None + fw = open(matesfile, "w") + if lib: + bedfile, stats = pairs( + [bedfile, "--rclip={0}".format(rclip), "--cutoff={0}".format(opts.cutoff)] + ) + sv = int(2 * stats.sd) + mindist = max(stats.mean - sv, 1) + maxdist = stats.mean + sv + print("\t".join(str(x) for x in ("library", pf, mindist, maxdist)), file=fw) + + num_fragments = num_pairs = 0 + matesbedfile = matesfile + ".bed" + fwm = open(matesbedfile, "w") + for _, lines in groupby(bed, key=key): + lines = list(lines) + if len(lines) != 2: + num_fragments += len(lines) + continue + + a, b = lines + + if opts.nointra and a.seqid == b.seqid: + continue + + # Use --prefix to limit the links between seqids with the same prefix + # For example, contigs of the same BAC, mth2-23j10_001, mth-23j10_002 + if opts.prefix: + aprefix = a.seqid.split("_")[0] + bprefix = b.seqid.split("_")[0] + if aprefix != bprefix: + continue + + num_pairs += 1 + pair = [a.accn, b.accn] + if lib: + pair.append(lib) + print("\t".join(pair), file=fw) + + print(a, file=fwm) + print(b, file=fwm) + + logger.debug( + "Discard %d frags and write %d pairs to `%s` and `%s`.", + num_fragments, + num_pairs, + matesfile, + matesbedfile, + ) + + fw.close() + fwm.close() + + return matesfile, matesbedfile + + +def flanking(args): + """ + %prog flanking bedfile [options] + + Get up to n features (upstream or downstream or both) flanking a given position. + """ + from numpy import array, argsort + + p = OptionParser(flanking.__doc__) + p.add_argument( + "--chrom", + default=None, + type=str, + help="chrom name of the position in query. Make sure it matches bedfile.", + ) + p.add_argument( + "--coord", default=None, type=int, help="coordinate of the position in query." + ) + p.add_argument( + "-n", default=10, type=int, help="number of flanking features to get" + ) + p.add_argument( + "--side", + default="both", + choices=("upstream", "downstream", "both"), + help="which side to get flanking features", + ) + p.add_argument( + "--max_d", default=None, type=int, help="features <= max_d away from position" + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if any([len(args) != 1, opts.chrom is None, opts.coord is None]): + sys.exit(not p.print_help()) + + (bedfile,) = args + position = (opts.chrom, opts.coord) + n, side, maxd = opts.n, opts.side, opts.max_d + + chrombed = Bed(bedfile).sub_bed(position[0]) + + if side == "upstream": + data = [ + (abs(f.start - position[1]), f) for f in chrombed if f.start <= position[1] + ] + elif side == "downstream": + data = [ + (abs(f.start - position[1]), f) for f in chrombed if f.start >= position[1] + ] + else: + data = [(abs(f.start - position[1]), f) for f in chrombed] + + if maxd: + data = [f for f in data if f[0] <= maxd] + + n += 1 # not counting self + n = min(n, len(data)) + distances, subbed = zip(*data) + distances = array(distances) + idx = argsort(distances)[:n] + flankingbed = [f for (i, f) in enumerate(subbed) if i in idx] + + fw = must_open(opts.outfile, "w") + for atom in flankingbed: + print(str(atom), file=fw) + + return position, flankingbed + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/blast.py b/jcvi/formats/blast.py new file mode 100644 index 00000000..54b2ede0 --- /dev/null +++ b/jcvi/formats/blast.py @@ -0,0 +1,1543 @@ +""" +parses tabular BLAST -m8 (-format 6 in BLAST+) format +""" + +import os.path as op +import sys + +from itertools import groupby +from collections import defaultdict + +from ..apps.base import ActionDispatcher, OptionParser, logger, popen, sh +from ..assembly.base import calculate_A50 +from ..compara.base import AnchorFile +from ..utils.cbook import percentage +from ..utils.grouper import Grouper +from ..utils.orderedcollections import OrderedDict +from ..utils.range import range_distance + +from .base import LineFile, BaseFile, must_open +from .bed import Bed +from .sizes import Sizes + + +try: + from .cblast import BlastLine +except ImportError as e: + logger.error(f"Failed to import cblast: {e}") + from .pyblast import BlastLine + logger.warning("Fall back to Python implementation of BlastLine") + + +class BlastSlow(LineFile): + """ + Load entire blastfile into memory + """ + + def __init__(self, filename, sorted=False): + super().__init__(filename) + fp = must_open(filename) + for row in fp: + self.append(BlastLine(row)) + self.sorted = sorted + if not sorted: + self.sort(key=lambda x: x.query) + + def iter_hits(self): + for query, blines in groupby(self, key=lambda x: x.query): + yield query, blines + + def iter_hits_pair(self): + key = lambda x: (x.query, x.subject) + if not self.sorted: + self.sort(key=key) + for qs, blines in groupby(self, key=key): + yield qs, blines + + def to_dict(self): + # for multiple HSPs pick the one with highest score + d = OrderedDict() + for line in self: + if (line.query, line.subject) not in d: + d[(line.query, line.subject)] = line + else: + cur_score = d[(line.query, line.subject)].score + if line.score > cur_score: + d[(line.query, line.subject)] = line + return d + + +class Blast(BaseFile): + """ + We can have a Blast class that loads entire file into memory, this is + not very efficient for big files (BlastSlow); when the BLAST file is + generated by BLAST/BLAT, the file is already sorted + """ + + def __init__(self, filename): + super().__init__(filename) + self.fp = must_open(filename) + + def __iter__(self): + self.fp.seek(0) + for row in self.fp: + if row[0] == "#": + continue + yield BlastLine(row) + + def iter_hits(self): + for query, blines in groupby(self.fp, key=lambda x: BlastLine(x).query): + blines = [BlastLine(x) for x in blines] + blines.sort(key=lambda x: -x.score) # descending score + yield query, blines + + def iter_best_hit(self, N=1, hsps=False, ref="query"): + if ref == "query": + ref, hit = "query", "subject" + elif ref == "subject": + ref, hit = "subject", "query" + else: + sys.exit("`ref` must be either `query` or `subject`.") + + for bref, blines in groupby(self.fp, key=lambda x: getattr(BlastLine(x), ref)): + blines = [BlastLine(x) for x in blines] + blines.sort(key=lambda x: -x.score) + counter = 0 + selected = set() + for b in blines: + if hsps: + selected.add(getattr(b, hit)) + counter = len(selected) + if counter > N: + selected.remove(getattr(b, hit)) + continue + else: + counter += 1 + if counter > N: + break + + yield bref, b + + @property + def hits(self): + """ + returns a dict with query => blastline + """ + return dict(self.iter_hits()) + + @property + def best_hits(self): + """ + returns a dict with query => best blasthit + """ + return dict(self.iter_best_hit()) + + +class BlastLineByConversion(BlastLine): + """ + make BlastLine object from tab delimited line objects with + BlastLine-like up to 12 fields formats + """ + + def __init__(self, sline, mode="1" * 12): + if int(mode, 2) == 4095: + super().__init__(sline) + elif 3072 <= int(mode, 2) < 4095: + args = sline.split("\t") + atoms = args[:2] + mode = list(mode) + if len(args) == 12: + for i in range(2, 12): + if mode[i] == "1": + atoms.append(args[i]) + else: + atoms.append("-1") + if len(args) < 12: + for i in range(2, 12): + if mode[i] == "1": + atoms.append(args[i - mode[:i].count("0")]) + else: + atoms.append("-1") + sline = "\t".join(atoms) + super().__init__(sline) + else: + m = "mode can only contain 0 or 1 \n" + m += "first two fields (query, subject) cannot be empty" + sys.exit(m) + + +class AlignStats: + """ + Stores the alignment statistics that is used in formats.blast.summary() + and formats.coords.summary() + """ + + def __init__( + self, filename, qrycovered, refcovered, qryspan, refspan, identicals, AL50 + ): + self.filename = filename + self.qrycovered = qrycovered + self.refcovered = refcovered + self.qryspan = qryspan + self.refspan = refspan + self.identicals = identicals + self.AL50 = AL50 + + def __str__(self): + pp = lambda x, d: "{:.2f}".format(x * 100.0 / d) + return "\t".join( + str(x) + for x in ( + self.filename, + self.identicals, + self.qrycovered, + pp(self.identicals, self.qrycovered), + self.refcovered, + pp(self.identicals, self.refcovered), + self.qryspan, + pp(self.identicals, self.qryspan), + self.refspan, + pp(self.identicals, self.refspan), + ) + ) + + def print_stats(self): + qrycovered = self.qrycovered + refcovered = self.refcovered + qryspan = self.qryspan + refspan = self.refspan + m0 = "AL50 (>=50% of bases in alignment blocks >= this size): {}".format( + self.AL50 + ) + m1 = "Query coverage: {}".format(percentage(self.identicals, qrycovered)) + m2 = "Reference coverage: {}".format(percentage(self.identicals, refcovered)) + m3 = "Query span: {}".format(percentage(self.identicals, qryspan)) + m4 = "Reference span: {}".format(percentage(self.identicals, refspan)) + print("\n".join((m0, m1, m2, m3, m4)), file=sys.stderr) + + +def get_stats(blastfile, strict=False): + from jcvi.utils.range import range_union, range_span + from .pyblast import BlastLine + + logger.debug("Report stats on `%s`" % blastfile) + fp = open(blastfile) + ref_ivs = [] + qry_ivs = [] + identicals = 0 + ngaps = 0 + alignlens = [] + + for row in fp: + c = BlastLine(row) + qstart, qstop = c.qstart, c.qstop + if qstart > qstop: + qstart, qstop = qstop, qstart + qry_ivs.append((c.query, qstart, qstop)) + + sstart, sstop = c.sstart, c.sstop + if sstart > sstop: + sstart, sstop = sstop, sstart + ref_ivs.append((c.subject, sstart, sstop)) + + alen = c.hitlen + ngaps += c.ngaps + identicals += c.hitlen - c.nmismatch - c.ngaps + alignlens.append(alen) + + qrycovered = range_union(qry_ivs) + refcovered = range_union(ref_ivs) + if strict: + # We discount gaps in counting covered bases, since we + # did not track individually gaps in qry and ref, we assume + # the gaps are opened evenly in the two sequences + qrycovered -= ngaps / 2 + refcovered -= ngaps / 2 + qryspan = range_span(qry_ivs) + refspan = range_span(ref_ivs) + _, AL50, _ = calculate_A50(alignlens) + filename = op.basename(blastfile) + alignstats = AlignStats( + filename, qrycovered, refcovered, qryspan, refspan, identicals, AL50 + ) + + return alignstats + + +def filtered_blastfile_name( + blastfile: str, + pctid: float, + hitlen: int, + inverse: bool = False, +) -> str: + """ + Return a filtered filename for LAST output, with the given similarity cutoff. + """ + pctid_str = f"{pctid:.1f}".replace(".", "_").replace("_0", "") + newblastfile = blastfile + ".P{0}L{1}".format(pctid_str, hitlen) + if inverse: + newblastfile += ".inverse" + return newblastfile + + +def filter(args): + """ + %prog filter test.blast + + Produce a new blast file and filter based on: + - score: >= cutoff + - pctid: >= cutoff + - hitlen: >= cutoff + - evalue: <= cutoff + - ids: valid ids + + Use --inverse to obtain the complementary records for the criteria above. + + - noself: remove self-self hits + """ + p = OptionParser(filter.__doc__) + p.add_argument("--score", dest="score", default=0, type=int, help="Score cutoff") + p.set_align(pctid=95, hitlen=100, evalue=0.01) + p.add_argument( + "--noself", default=False, action="store_true", help="Remove self-self hits" + ) + p.add_argument("--ids", help="Path to file with ids to retain") + p.add_argument( + "--inverse", + default=False, + action="store_true", + help="Similar to grep -v, inverse", + ) + p.set_outfile(outfile=None) + + opts, args = p.parse_args(args) + if len(args) != 1: + sys.exit(not p.print_help()) + + if opts.ids: + ids = set() + for row in must_open(opts.ids): + if row[0] == "#": + continue + row = row.replace(",", "\t") + ids.update(row.split()) + else: + ids = None + + (blastfile,) = args + inverse = opts.inverse + fp = must_open(blastfile) + + score, pctid, hitlen, evalue, noself = ( + opts.score, + opts.pctid, + opts.hitlen, + opts.evalue, + opts.noself, + ) + blastfile = opts.outfile or blastfile + newblastfile = filtered_blastfile_name(blastfile, pctid, hitlen, inverse) + fw = must_open(newblastfile, "w") + for row in fp: + if row[0] == "#": + continue + c = BlastLine(row) + + if ids: + if c.query in ids and c.subject in ids: + noids = False + else: + noids = True + else: + noids = None + + remove = ( + c.score < score + or c.pctid < pctid + or c.hitlen < hitlen + or c.evalue > evalue + or noids + ) + + if inverse: + remove = not remove + + remove = remove or (noself and c.query == c.subject) + + if not remove: + print(row.rstrip(), file=fw) + + fw.close() + + return newblastfile + + +def main(): + actions = ( + ("summary", "provide summary on id% and cov%"), + ("completeness", "print completeness statistics for each query"), + ("annotation", "create tabular file with the annotations"), + ("top10", "count the most frequent 10 hits"), + ("filter", "filter BLAST file (based on score, id%, alignlen)"), + ("covfilter", "filter BLAST file (based on id% and cov%)"), + ("cscore", "calculate C-score for BLAST pairs"), + ("best", "get best BLAST hit per query"), + ("anchors", "keep only the BLAST pairs that are in the anchors file"), + ("pairs", "print paired-end reads of BLAST tabular file"), + ("bed", "get bed file from BLAST tabular file"), + ("condense", "group HSPs together for same query-subject pair"), + ("chain", "chain adjacent HSPs together"), + ("swap", "swap query and subjects in BLAST tabular file"), + ("sort", "sort lines so that query grouped together and scores desc"), + ("subset", "extract hits from some query and subject chrs"), + ("mismatches", "print out histogram of mismatches of HSPs"), + ("annotate", "annotate overlap types in BLAST tabular file"), + ("score", "add up the scores for each query seq"), + ("rbbh", "find reciprocal-best blast hits"), + ("gaps", "find distribution of gap sizes between adjacent HSPs"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def collect_gaps(blast, use_subject=False): + """ + Collect the gaps between adjacent HSPs in the BLAST file. + """ + key = lambda x: x.sstart if use_subject else x.qstart + blast.sort(key=key) + + for a, b in zip(blast, blast[1:]): + if use_subject: + if a.sstop < b.sstart: + yield b.sstart - a.sstop + else: + if a.qstop < b.qstart: + yield b.qstart - a.qstop + + +def gaps(args): + """ + %prog gaps A_vs_B.blast + + Find distribution of gap sizes betwen adjacent HSPs. + """ + p = OptionParser(gaps.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + blast = BlastSlow(blastfile) + logger.debug("A total of {} records imported".format(len(blast))) + + query_gaps = list(collect_gaps(blast)) + subject_gaps = list(collect_gaps(blast, use_subject=True)) + logger.debug( + "Query gaps: {} Subject gaps: {}".format(len(query_gaps), len(subject_gaps)) + ) + + from jcvi.graphics.base import savefig + import seaborn as sns + + sns.distplot(query_gaps) + savefig("query_gaps.pdf") + + +def rbbh(args): + """ + %prog rbbh A_vs_B.blast B_vs_A.blast + + Identify the reciprocal best blast hit for each query sequence in set A + when compared to set B. + + This program assumes that the BLAST results have already been filtered + based on a combination of %id, %cov, e-value cutoffs. BLAST output should + be in tabular `-m 8` format. + """ + p = OptionParser(rbbh.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + ( + abfile, + bafile, + ) = args + ab = Blast(abfile) + ba = Blast(bafile) + + ab_hits = ab.best_hits + ba_hits = ba.best_hits + + for aquery in ab_hits: + ahit = ab_hits[aquery].subject + ba_bline = ba_hits.get(ahit) + if ba_bline: + bhit = ba_bline.subject + if bhit == aquery: + print("\t".join(str(x) for x in (aquery, ahit))) + + +def score(args): + """ + %prog score blastfile query.fasta A.ids + + Add up the scores for each query seq. Go through the lines and for each + query sequence, add up the scores when subject is in each pile by A.ids. + """ + from jcvi.formats.base import SetFile + from jcvi.formats.fasta import Fasta + + p = OptionParser(score.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + blastfile, fastafile, idsfile = args + ids = SetFile(idsfile) + + blast = Blast(blastfile) + scores = defaultdict(int) + for b in blast: + query = b.query + subject = b.subject + if subject not in ids: + continue + scores[query] += b.score + + logger.debug("A total of {0} ids loaded.".format(len(ids))) + + f = Fasta(fastafile) + for s in f.iterkeys_ordered(): + sc = scores.get(s, 0) + print("\t".join((s, str(sc)))) + + +def annotation(args): + """ + %prog annotation blastfile > annotations + + Create simple two column files from the first two coluns in blastfile. Use + --queryids and --subjectids to switch IDs or descriptions. + """ + from jcvi.formats.base import DictFile + + p = OptionParser(annotation.__doc__) + p.add_argument("--queryids", help="Query IDS file to switch") + p.add_argument("--subjectids", help="Subject IDS file to switch") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + + d = "\t" + qids = DictFile(opts.queryids, delimiter=d) if opts.queryids else None + sids = DictFile(opts.subjectids, delimiter=d) if opts.subjectids else None + blast = Blast(blastfile) + for b in blast: + query, subject = b.query, b.subject + if qids: + query = qids[query] + if sids: + subject = sids[subject] + print("\t".join((query, subject))) + + +def completeness(args): + """ + %prog completeness blastfile ref.fasta > outfile + + Print statistics for each gene, the coverage of the alignment onto the best hit, + as an indicator for completeness of the gene model. For example, one might + BLAST sugarcane ESTs against sorghum annotations as reference, to find + full-length transcripts. + """ + from jcvi.utils.range import range_minmax + from jcvi.utils.cbook import SummaryStats + + p = OptionParser(completeness.__doc__) + p.add_argument("--ids", help="Save ids that are over 50% complete") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + blastfile, fastafile = args + idsfile = opts.ids + f = Sizes(fastafile).mapping + + b = BlastSlow(blastfile) + valid = [] + data = [] + cutoff = 50 + for query, blines in groupby(b, key=lambda x: x.query): + blines = list(blines) + ranges = [(x.sstart, x.sstop) for x in blines] + b = blines[0] + query, subject = b.query, b.subject + + rmin, rmax = range_minmax(ranges) + subject_len = f[subject] + + nterminal_dist = rmin - 1 + cterminal_dist = subject_len - rmax + covered = (rmax - rmin + 1) * 100 / subject_len + if covered > cutoff: + valid.append(query) + + data.append((nterminal_dist, cterminal_dist, covered)) + print( + "\t".join( + str(x) + for x in (query, subject, nterminal_dist, cterminal_dist, covered) + ) + ) + + nd, cd, cv = zip(*data) + m = "Total: {0}, Coverage > {1}%: {2}\n".format(len(data), cutoff, len(valid)) + m += "N-terminal: {0}\n".format(SummaryStats(nd)) + m += "C-terminal: {0}\n".format(SummaryStats(cd)) + m += "Coverage: {0}".format(SummaryStats(cv)) + print(m, file=sys.stderr) + + if idsfile: + fw = open(idsfile, "w") + print("\n".join(valid), file=fw) + logger.debug( + "A total of {0} ids (cov > {1} %) written to `{2}`.".format( + len(valid), cutoff, idsfile + ) + ) + fw.close() + + +def annotate(args): + """ + %prog annotate blastfile query.fasta subject.fasta + + Annotate overlap types (dovetail, contained, etc) in BLAST tabular file. + """ + from jcvi.assembly.goldenpath import Cutoff, Overlap, Overlap_types + + p = OptionParser(annotate.__doc__) + p.set_align(pctid=94, hitlen=500) + p.add_argument("--hang", default=500, type=int, help="Maximum overhang length") + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + blastfile, afasta, bfasta = args + fp = must_open(blastfile) + asizes = Sizes(afasta).mapping + bsizes = Sizes(bfasta).mapping + cutoff = Cutoff(opts.pctid, opts.hitlen, opts.hang) + logger.debug(str(cutoff)) + for row in fp: + b = BlastLine(row) + asize = asizes[b.query] + bsize = bsizes[b.subject] + if b.query == b.subject: + continue + ov = Overlap(b, asize, bsize, cutoff) + if ov.otype: + ov.print_graphic() + print("{0}\t{1}".format(b, Overlap_types[ov.otype])) + + +def top10(args): + """ + %prog top10 blastfile.best + + Count the most frequent 10 hits. Usually the BLASTFILE needs to be screened + the get the best match. You can also provide an .ids file to query the ids. + For example the ids file can contain the seqid to species mapping. + + The ids file is two-column, and can sometimes be generated by + `jcvi.formats.fasta ids --description`. + """ + from jcvi.formats.base import DictFile + + p = OptionParser(top10.__doc__) + p.add_argument( + "--top", + default=10, + type=int, + help="Top N taxa to extract", + ) + p.add_argument( + "--ids", + default=None, + help="Two column ids file to query seqid", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + mapping = DictFile(opts.ids, delimiter="\t") if opts.ids else {} + + cmd = "cut -f2 {0}".format(blastfile) + cmd += " | sort | uniq -c | sort -k1,1nr | head -n {0}".format(opts.top) + fp = popen(cmd) + for row in fp: + count, seqid = row.split() + nseqid = mapping.get(seqid, seqid) + print("\t".join((count, nseqid))) + + +def sort(args): + """ + %prog sort + + Sort lines so that same query grouped together with scores descending. The + sort is 'in-place'. + """ + p = OptionParser(sort.__doc__) + p.add_argument( + "--query", + default=False, + action="store_true", + help="Sort by query position", + ) + p.add_argument( + "--ref", + default=False, + action="store_true", + help="Sort by reference position", + ) + p.add_argument( + "--refscore", + default=False, + action="store_true", + help="Sort by reference name, then score descending", + ) + p.add_argument( + "--coords", + default=False, + action="store_true", + help="File is .coords generated by NUCMER", + ) + p.set_tmpdir() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + + if opts.coords: + if opts.query: + key = "-k13,13 -k3,3n" + elif opts.ref: + key = "-k12,12 -k1,1n" + + else: + if opts.query: + key = "-k1,1 -k7,7n" + elif opts.ref: + key = "-k2,2 -k9,9n" + elif opts.refscore: + key = "-k2,2 -k12,12gr" + else: + key = "-k1,1 -k12,12gr" + + cmd = "sort" + if opts.tmpdir: + cmd += " -T {0}".format(opts.tmpdir) + cmd += " {0} {1} -o {1}".format(key, blastfile) + sh(cmd) + + +def cscore(args): + """ + %prog cscore blastfile > cscoreOut + + See supplementary info for sea anemone genome paper, C-score formula: + + cscore(A,B) = score(A,B) / + max(best score for A, best score for B) + + A C-score of one is the same as reciprocal best hit (RBH). + + Output file will be 3-column (query, subject, cscore). Use --cutoff to + select a different cutoff. + """ + from jcvi.utils.cbook import gene_name + + p = OptionParser(cscore.__doc__) + p.add_argument( + "--cutoff", + default=0.9999, + type=float, + help="Minimum C-score to report", + ) + p.add_argument( + "--pct", + default=False, + action="store_true", + help="Also include pct as last column", + ) + p.add_argument( + "--writeblast", + default=False, + action="store_true", + help="Also write filtered blast file", + ) + p.set_stripnames() + p.set_outfile() + + opts, args = p.parse_args(args) + ostrip = opts.strip_names + writeblast = opts.writeblast + outfile = opts.outfile + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + + blast = Blast(blastfile) + logger.debug("Register best scores ..") + best_score = defaultdict(float) + for b in blast: + query, subject = b.query, b.subject + if ostrip: + query, subject = gene_name(query), gene_name(subject) + + score = b.score + if score > best_score[query]: + best_score[query] = score + if score > best_score[subject]: + best_score[subject] = score + + blast = Blast(blastfile) + pairs = {} + cutoff = opts.cutoff + for b in blast: + query, subject = b.query, b.subject + if ostrip: + query, subject = gene_name(query), gene_name(subject) + + score = b.score + pctid = b.pctid + s = score / max(best_score[query], best_score[subject]) + if s > cutoff: + pair = (query, subject) + if pair not in pairs or s > pairs[pair][0]: + pairs[pair] = (s, pctid, b) + + fw = must_open(outfile, "w") + if writeblast: + fwb = must_open(outfile + ".filtered.blast", "w") + pct = opts.pct + for (query, subject), (s, pctid, b) in sorted(pairs.items()): + args = [query, subject, "{0:.2f}".format(s)] + if pct: + args.append("{0:.1f}".format(pctid)) + print("\t".join(args), file=fw) + if writeblast: + print(b, file=fwb) + fw.close() + if writeblast: + fwb.close() + + +def get_distance(a, b, xaxis=True): + """ + Returns the distance between two blast HSPs. + """ + if xaxis: + arange = ("0", a.qstart, a.qstop, a.orientation) # 0 is the dummy chromosome + brange = ("0", b.qstart, b.qstop, b.orientation) + else: + arange = ("0", a.sstart, a.sstop, a.orientation) + brange = ("0", b.sstart, b.sstop, b.orientation) + + dist, oo = range_distance(arange, brange, distmode="ee") + dist = abs(dist) + + return dist + + +def combine_HSPs(a): + """ + Combine HSPs into a single BlastLine. + """ + m = a[0] + if len(a) == 1: + return m + + for b in a[1:]: + assert m.query == b.query + assert m.subject == b.subject + m.hitlen += b.hitlen + m.nmismatch += b.nmismatch + m.ngaps += b.ngaps + m.qstart = min(m.qstart, b.qstart) + m.qstop = max(m.qstop, b.qstop) + m.sstart = min(m.sstart, b.sstart) + m.sstop = max(m.sstop, b.sstop) + if m.has_score: + m.score += b.score + + m.pctid = 100 - (m.nmismatch + m.ngaps) * 100.0 / m.hitlen + return m + + +def chain_HSPs(blast, xdist=100, ydist=100): + """ + Take a list of BlastLines (or a BlastSlow instance), and returns a list of + BlastLines. + """ + key = lambda x: (x.query, x.subject) + blast.sort(key=key) + + clusters = Grouper() + for qs, points in groupby(blast, key=key): + points = sorted( + list(points), key=lambda x: (x.qstart, x.qstop, x.sstart, x.sstop) + ) + + n = len(points) + for i in range(n): + a = points[i] + clusters.join(a) + for j in range(i + 1, n): + b = points[j] + + # x-axis distance + del_x = get_distance(a, b) + if del_x > xdist: + break + # y-axis distance + del_y = get_distance(a, b, xaxis=False) + if del_y > ydist: + continue + # otherwise join + clusters.join(a, b) + + chained_hsps = [combine_HSPs(x) for x in clusters] + key = lambda x: (x.query, -x.score if x.has_score else 0) + chained_hsps = sorted(chained_hsps, key=key) + + return chained_hsps + + +def chain(args): + """ + %prog chain blastfile + + Chain adjacent HSPs together to form larger HSP. + """ + p = OptionParser(chain.__doc__) + p.add_argument( + "--dist", + dest="dist", + default=100, + type=int, + help="extent of flanking regions to search", + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + dist = opts.dist + assert dist > 0 + + blast = BlastSlow(blastfile) + logger.debug("A total of {} records imported".format(len(blast))) + chained_hsps = chain_HSPs(blast, xdist=dist, ydist=dist) + logger.debug("A total of {} records after chaining".format(len(chained_hsps))) + + for b in chained_hsps: + print(b) + + +def condense(args): + """ + %prog condense blastfile > blastfile.condensed + + Condense HSPs that belong to the same query-subject pair into one. + """ + p = OptionParser(condense.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + blast = BlastSlow(blastfile) + key = lambda x: x.query + blast.sort(key=key) + + clusters = [] + for q, lines in groupby(blast, key=key): + lines = list(lines) + condenser = defaultdict(list) + + for b in lines: + condenser[(b.subject, b.orientation)].append(b) + + for bs in condenser.values(): + clusters.append(bs) + + chained_hsps = [combine_HSPs(x) for x in clusters] + chained_hsps = sorted(chained_hsps, key=lambda x: (x.query, -x.score)) + for b in chained_hsps: + print(b) + + +def mismatches(args): + """ + %prog mismatches blastfile + + Print out histogram of mismatches of HSPs, usually for evaluating SNP level. + """ + from jcvi.utils.cbook import percentage + from jcvi.graphics.histogram import stem_leaf_plot + + p = OptionParser(mismatches.__doc__) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + + data = [] + b = Blast(blastfile) + for query, bline in b.iter_best_hit(): + mm = bline.nmismatch + bline.ngaps + data.append(mm) + + nonzeros = [x for x in data if x != 0] + title = "Polymorphic sites: {0}".format(percentage(len(nonzeros), len(data))) + stem_leaf_plot(data, 0, 20, 20, title=title) + + +def covfilter(args): + """ + %prog covfilter blastfile fastafile + + Fastafile is used to get the sizes of the queries. Two filters can be + applied, the id% and cov%. + """ + from jcvi.algorithms.supermap import supermap + from jcvi.utils.range import range_union + + allowed_iterby = ("query", "query_sbjct") + + p = OptionParser(covfilter.__doc__) + p.set_align(pctid=95, pctcov=50) + p.add_argument( + "--scov", + default=False, + action="store_true", + help="Subject coverage instead of query", + ) + p.add_argument( + "--supermap", action="store_true", help="Use supermap instead of union" + ) + p.add_argument( + "--ids", + dest="ids", + default=None, + help="Print out the ids that satisfy", + ) + p.add_argument( + "--list", + dest="list", + default=False, + action="store_true", + help="List the id% and cov% per gene", + ) + p.add_argument( + "--iterby", + dest="iterby", + default="query", + choices=allowed_iterby, + help="Choose how to iterate through BLAST", + ) + p.set_outfile(outfile=None) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + blastfile, fastafile = args + pctid = opts.pctid + pctcov = opts.pctcov + union = not opts.supermap + scov = opts.scov + sz = Sizes(fastafile) + sizes = sz.mapping + iterby = opts.iterby + qspair = iterby == "query_sbjct" + + if not union: + querysupermap = blastfile + ".query.supermap" + if not op.exists(querysupermap): + supermap(blastfile, filter="query") + + blastfile = querysupermap + + assert op.exists(blastfile) + + covered = 0 + mismatches = 0 + gaps = 0 + alignlen = 0 + queries = set() + valid = set() + blast = BlastSlow(blastfile) + iterator = blast.iter_hits_pair if qspair else blast.iter_hits + + covidstore = {} + for query, blines in iterator(): + blines = list(blines) + queries.add(query) + + # per gene report + this_covered = 0 + this_alignlen = 0 + this_mismatches = 0 + this_gaps = 0 + this_identity = 0 + + ranges = [] + for b in blines: + if scov: + s, start, stop = b.subject, b.sstart, b.sstop + else: + s, start, stop = b.query, b.qstart, b.qstop + cov_id = s + + if b.pctid < pctid: + continue + + if start > stop: + start, stop = stop, start + this_covered += stop - start + 1 + this_alignlen += b.hitlen + this_mismatches += b.nmismatch + this_gaps += b.ngaps + ranges.append(("1", start, stop)) + + if ranges: + this_identity = ( + 100.0 - (this_mismatches + this_gaps) * 100.0 / this_alignlen + ) + + if union: + this_covered = range_union(ranges) + + this_coverage = this_covered * 100.0 / sizes[cov_id] + covidstore[query] = (this_identity, this_coverage) + if this_identity >= pctid and this_coverage >= pctcov: + valid.add(query) + + covered += this_covered + mismatches += this_mismatches + gaps += this_gaps + alignlen += this_alignlen + + if opts.list: + if qspair: + allpairs = defaultdict(list) + for q, s in covidstore: + allpairs[q].append((q, s)) + allpairs[s].append((q, s)) + + for id, size in sz.iter_sizes(): + if id not in allpairs: + print("\t".join((id, "na", "0", "0"))) + else: + for qs in allpairs[id]: + this_identity, this_coverage = covidstore[qs] + print( + "{0}\t{1:.1f}\t{2:.1f}".format( + "\t".join(qs), this_identity, this_coverage + ) + ) + else: + for query, size in sz.iter_sizes(): + this_identity, this_coverage = covidstore.get(query, (0, 0)) + print( + "{0}\t{1:.1f}\t{2:.1f}".format(query, this_identity, this_coverage) + ) + + mapped_count = len(queries) + valid_count = len(valid) + cutoff_message = "(id={0.pctid}% cov={0.pctcov}%)".format(opts) + + m = "Identity: {0} mismatches, {1} gaps, {2} alignlen\n".format( + mismatches, gaps, alignlen + ) + total = len(sizes.keys()) + m += "Total mapped: {0} ({1:.1f}% of {2})\n".format( + mapped_count, mapped_count * 100.0 / total, total + ) + m += "Total valid {0}: {1} ({2:.1f}% of {3})\n".format( + cutoff_message, valid_count, valid_count * 100.0 / total, total + ) + m += "Average id = {0:.2f}%\n".format(100 - (mismatches + gaps) * 100.0 / alignlen) + + queries_combined = sz.totalsize + m += "Coverage: {0} covered, {1} total\n".format(covered, queries_combined) + m += "Average coverage = {0:.2f}%".format(covered * 100.0 / queries_combined) + + logfile = blastfile + ".covfilter.log" + fw = open(logfile, "w") + for f in (sys.stderr, fw): + print(m, file=f) + fw.close() + + if opts.ids: + filename = opts.ids + fw = must_open(filename, "w") + for id in valid: + print(id, file=fw) + logger.debug( + "Queries beyond cutoffs {0} written to `{1}`.".format( + cutoff_message, filename + ) + ) + + outfile = opts.outfile + if not outfile: + return + + fw = must_open(outfile, "w") + blast = Blast(blastfile) + for b in blast: + query = (b.query, b.subject) if qspair else b.query + if query in valid: + print(b, file=fw) + + +def swap(args): + """ + %prog swap blastfile + + Print out a new blast file with query and subject swapped. + """ + p = OptionParser(swap.__doc__) + + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + swappedblastfile = blastfile + ".swapped" + fp = must_open(blastfile) + fw = must_open(swappedblastfile, "w") + for row in fp: + b = BlastLine(row) + print(b.swapped, file=fw) + + fw.close() + sort([swappedblastfile]) + + +def bed(args): + """ + %prog bed blastfile + + Print out bed file based on coordinates in BLAST report. By default, write + out subject positions. Use --swap to write query positions. + """ + from .bed import sort as bed_sort, mergeBed + + p = OptionParser(bed.__doc__) + p.add_argument( + "--swap", + default=False, + action="store_true", + help="Write query positions", + ) + p.add_argument( + "--both", + default=False, + action="store_true", + help="Generate one line for each of query and subject", + ) + p.add_argument( + "--merge", + default=None, + type=int, + help="Merge hits within this distance", + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (blastfile,) = args + positive = (not opts.swap) or opts.both + negative = opts.swap or opts.both + + fp = must_open(blastfile) + bedfile = ( + "{0}.bed".format(blastfile.rsplit(".", 1)[0]) + if blastfile.endswith(".blast") + else "{0}.bed".format(blastfile) + ) + fw = open(bedfile, "w") + for row in fp: + b = BlastLine(row) + if positive: + print(b.bedline, file=fw) + if negative: + print(b.swapped.bedline, file=fw) + + logger.debug("File written to `%s`.", bedfile) + fw.close() + bed_sort([bedfile, "-i"]) + if opts.merge: + mergeBed(bedfile, sorted=True, d=opts.merge, inplace=True) + + return bedfile + + +def pairs(args): + """ + See __doc__ for OptionParser.set_pairs(). + """ + import jcvi.formats.bed + + p = OptionParser(pairs.__doc__) + p.set_pairs() + opts, targs = p.parse_args(args) + + if len(targs) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = targs + bedfile = bed([blastfile]) + args[args.index(blastfile)] = bedfile + + return jcvi.formats.bed.pairs(args) + + +def anchors(args): + """ + %prog anchors blastfile anchorsfile + + Extract a subset of the BLAST file based on the anchors file. The anchors + file is a tab-delimited file with two columns, likely generated from synteny + pipeline. This is useful to filter down BLAST. + """ + p = OptionParser(anchors.__doc__) + p.set_outfile() + p.add_argument( + "--best", default=False, action="store_true", help="Keep only the best hit" + ) + opts, args = p.parse_args(args) + if len(args) != 2: + sys.exit(not p.print_help()) + + blastfile, anchorsfile = args + anchor_file = AnchorFile(anchorsfile) + anchor_pairs = set((a, b) for a, b, _ in anchor_file.iter_pairs()) + blast = Blast(blastfile) + found, total = 0, 0 + fw = must_open(opts.outfile, "w") + seen = set() + for rec in blast: + pp = (rec.query, rec.subject) + if pp in anchor_pairs: + found += 1 + if opts.best and pp in seen: + continue + print(rec, file=fw) + seen.add(pp) + total += 1 + logger.info("Found %s", percentage(found, total)) + + +def best(args): + """ + %prog best blastfile + + print the best hit for each query in the blastfile + """ + p = OptionParser(best.__doc__) + + p.add_argument("-n", default=1, type=int, help="get best N hits") + p.add_argument( + "--nosort", + default=False, + action="store_true", + help="assume BLAST is already sorted", + ) + p.add_argument( + "--hsps", + default=False, + action="store_true", + help="get all HSPs for the best pair", + ) + p.add_argument( + "--subject", + default=False, + action="store_true", + help="get best hit(s) for subject genome instead", + ) + p.set_tmpdir() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + n = opts.n + hsps = opts.hsps + tmpdir = opts.tmpdir + ref = "query" if not opts.subject else "subject" + + if not opts.nosort: + sargs = [blastfile] + if tmpdir: + sargs += ["-T {0}".format(tmpdir)] + if ref != "query": + sargs += ["--refscore"] + sort(sargs) + else: + logger.debug("Assuming sorted BLAST") + + if not opts.subject: + bestblastfile = blastfile + ".best" + else: + bestblastfile = blastfile + ".subject.best" + fw = open(bestblastfile, "w") + + b = Blast(blastfile) + for q, bline in b.iter_best_hit(N=n, hsps=hsps, ref=ref): + print(bline, file=fw) + + return bestblastfile + + +def summary(args): + """ + %prog summary blastfile + + Provide summary on id% and cov%, for both query and reference. Often used in + comparing genomes (based on NUCMER results). + + Columns: + filename, identicals, qrycovered, pct_qrycovered, refcovered, pct_refcovered, + qryspan, pct_qryspan, refspan, pct_refspan + """ + p = OptionParser(summary.__doc__) + p.add_argument( + "--strict", + default=False, + action="store_true", + help="Strict 'gapless' mode. Exclude gaps from covered base.", + ) + p.add_argument( + "--tabular", + default=False, + action="store_true", + help="Print succint tabular output", + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (blastfile,) = args + + alignstats = get_stats(blastfile, strict=opts.strict) + if opts.tabular: + print(str(alignstats)) + else: + alignstats.print_stats() + + +def subset(args): + """ + %prog subset blastfile qbedfile sbedfile + + Extract blast hits between given query and subject chrs. + + If --qchrs or --schrs is not given, then all chrs from q/s genome will + be included. However one of --qchrs and --schrs must be specified. + Otherwise the script will do nothing. + """ + p = OptionParser(subset.__doc__) + p.add_argument( + "--qchrs", + default=None, + help="query chrs to extract, comma sep", + ) + p.add_argument( + "--schrs", + default=None, + help="subject chrs to extract, comma sep", + ) + p.add_argument( + "--convert", + default=False, + action="store_true", + help="convert accns to chr_rank", + ) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + blastfile, qbedfile, sbedfile = args + qchrs = opts.qchrs + schrs = opts.schrs + assert qchrs or schrs, p.print_help() + convert = opts.convert + + outfile = blastfile + "." + if qchrs: + outfile += qchrs + "." + qchrs = set(qchrs.split(",")) + else: + qchrs = set(Bed(qbedfile).seqids) + if schrs: + schrs = set(schrs.split(",")) + if qbedfile != sbedfile or qchrs != schrs: + outfile += ",".join(schrs) + "." + else: + schrs = set(Bed(sbedfile).seqids) + outfile += "blast" + + qo = Bed(qbedfile).order + so = Bed(sbedfile).order + + fw = must_open(outfile, "w") + for b in Blast(blastfile): + q, s = b.query, b.subject + if qo[q][1].seqid in qchrs and so[s][1].seqid in schrs: + if convert: + b.query = qo[q][1].seqid + "_" + "{0:05d}".format(qo[q][0]) + b.subject = so[s][1].seqid + "_" + "{0:05d}".format(so[s][0]) + print(b, file=fw) + fw.close() + logger.debug("Subset blastfile written to `{0}`".format(outfile)) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/cblast.c b/jcvi/formats/cblast.c new file mode 100644 index 00000000..8b92b238 --- /dev/null +++ b/jcvi/formats/cblast.c @@ -0,0 +1,16862 @@ +/* Generated by Cython 3.0.11 */ + +/* BEGIN: Cython Metadata +{ + "distutils": { + "depends": [], + "extra_compile_args": [ + "-O3" + ], + "name": "jcvi.formats.cblast", + "sources": [ + "src/jcvi/formats/cblast.pyx" + ] + }, + "module_name": "jcvi.formats.cblast" +} +END: Cython Metadata */ + +#ifndef PY_SSIZE_T_CLEAN +#define PY_SSIZE_T_CLEAN +#endif /* PY_SSIZE_T_CLEAN */ +#if defined(CYTHON_LIMITED_API) && 0 + #ifndef Py_LIMITED_API + #if CYTHON_LIMITED_API+0 > 0x03030000 + #define Py_LIMITED_API CYTHON_LIMITED_API + #else + #define Py_LIMITED_API 0x03030000 + #endif + #endif +#endif + +#include "Python.h" +#ifndef Py_PYTHON_H + #error Python headers needed to compile C extensions, please install development version of Python. +#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) + #error Cython requires Python 2.7+ or Python 3.3+. +#else +#if defined(CYTHON_LIMITED_API) && CYTHON_LIMITED_API +#define __PYX_EXTRA_ABI_MODULE_NAME "limited" +#else +#define __PYX_EXTRA_ABI_MODULE_NAME "" +#endif +#define CYTHON_ABI "3_0_11" __PYX_EXTRA_ABI_MODULE_NAME +#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI +#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "." +#define CYTHON_HEX_VERSION 0x03000BF0 +#define CYTHON_FUTURE_DIVISION 0 +#include +#ifndef offsetof + #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) +#endif +#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS) + #ifndef __stdcall + #define __stdcall + #endif + #ifndef __cdecl + #define __cdecl + #endif + #ifndef __fastcall + #define __fastcall + #endif +#endif +#ifndef DL_IMPORT + #define DL_IMPORT(t) t +#endif +#ifndef DL_EXPORT + #define DL_EXPORT(t) t +#endif +#define __PYX_COMMA , +#ifndef HAVE_LONG_LONG + #define HAVE_LONG_LONG +#endif +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif +#ifndef Py_HUGE_VAL + #define Py_HUGE_VAL HUGE_VAL +#endif +#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX +#if defined(GRAALVM_PYTHON) + /* For very preliminary testing purposes. Most variables are set the same as PyPy. + The existence of this section does not imply that anything works or is even tested */ + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 1 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) + #endif + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(PYPY_VERSION) + #define CYTHON_COMPILING_IN_PYPY 1 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) + #endif + #if PY_VERSION_HEX < 0x03090000 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00) + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(CYTHON_LIMITED_API) + #ifdef Py_LIMITED_API + #undef __PYX_LIMITED_VERSION_HEX + #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API + #endif + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 1 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #undef CYTHON_CLINE_IN_TRACEBACK + #define CYTHON_CLINE_IN_TRACEBACK 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 1 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #endif + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #undef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 1 + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 + #endif + #undef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 +#elif defined(Py_GIL_DISABLED) || defined(Py_NOGIL) + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 1 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #ifndef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #ifndef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL 0 + #ifndef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL 1 + #endif + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #ifndef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #endif + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 + #endif + #ifndef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 0 + #endif +#else + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_CPYTHON 1 + #define CYTHON_COMPILING_IN_LIMITED_API 0 + #define CYTHON_COMPILING_IN_GRAAL 0 + #define CYTHON_COMPILING_IN_NOGIL 0 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #ifndef CYTHON_USE_TYPE_SPECS + #define CYTHON_USE_TYPE_SPECS 0 + #endif + #ifndef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 1 + #endif + #if PY_MAJOR_VERSION < 3 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #ifndef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 1 + #endif + #ifndef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 1 + #endif + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #elif !defined(CYTHON_USE_UNICODE_WRITER) + #define CYTHON_USE_UNICODE_WRITER 1 + #endif + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #ifndef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 1 + #endif + #ifndef CYTHON_FAST_GIL + #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6) + #endif + #ifndef CYTHON_METH_FASTCALL + #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1) + #endif + #ifndef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 1 + #endif + #ifndef CYTHON_PEP487_INIT_SUBCLASS + #define CYTHON_PEP487_INIT_SUBCLASS 1 + #endif + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_MODULE_STATE + #define CYTHON_USE_MODULE_STATE 0 + #endif + #if PY_VERSION_HEX < 0x030400a1 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 + #elif !defined(CYTHON_USE_TP_FINALIZE) + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #if PY_VERSION_HEX < 0x030600B1 + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #elif !defined(CYTHON_USE_DICT_VERSIONS) + #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX < 0x030C00A5) + #endif + #if PY_VERSION_HEX < 0x030700A3 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 + #elif !defined(CYTHON_USE_EXC_INFO_STACK) + #define CYTHON_USE_EXC_INFO_STACK 1 + #endif + #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC + #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 + #endif + #ifndef CYTHON_USE_FREELISTS + #define CYTHON_USE_FREELISTS 1 + #endif +#endif +#if !defined(CYTHON_FAST_PYCCALL) +#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) +#endif +#if !defined(CYTHON_VECTORCALL) +#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1) +#endif +#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1) +#if CYTHON_USE_PYLONG_INTERNALS + #if PY_MAJOR_VERSION < 3 + #include "longintrepr.h" + #endif + #undef SHIFT + #undef BASE + #undef MASK + #ifdef SIZEOF_VOID_P + enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; + #endif +#endif +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#ifndef __has_cpp_attribute + #define __has_cpp_attribute(x) 0 +#endif +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif +#endif +#ifndef CYTHON_UNUSED + #if defined(__cplusplus) + /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17 + * but leads to warnings with -pedantic, since it is a C++17 feature */ + #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) + #if __has_cpp_attribute(maybe_unused) + #define CYTHON_UNUSED [[maybe_unused]] + #endif + #endif + #endif +#endif +#ifndef CYTHON_UNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_UNUSED_VAR +# if defined(__cplusplus) + template void CYTHON_UNUSED_VAR( const T& ) { } +# else +# define CYTHON_UNUSED_VAR(x) (void)(x) +# endif +#endif +#ifndef CYTHON_MAYBE_UNUSED_VAR + #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x) +#endif +#ifndef CYTHON_NCP_UNUSED +# if CYTHON_COMPILING_IN_CPYTHON +# define CYTHON_NCP_UNUSED +# else +# define CYTHON_NCP_UNUSED CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_USE_CPP_STD_MOVE + #if defined(__cplusplus) && (\ + __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600)) + #define CYTHON_USE_CPP_STD_MOVE 1 + #else + #define CYTHON_USE_CPP_STD_MOVE 0 + #endif +#endif +#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) +#ifdef _MSC_VER + #ifndef _MSC_STDINT_H_ + #if _MSC_VER < 1300 + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + #else + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; + #endif + #endif + #if _MSC_VER < 1300 + #ifdef _WIN64 + typedef unsigned long long __pyx_uintptr_t; + #else + typedef unsigned int __pyx_uintptr_t; + #endif + #else + #ifdef _WIN64 + typedef unsigned __int64 __pyx_uintptr_t; + #else + typedef unsigned __int32 __pyx_uintptr_t; + #endif + #endif +#else + #include + typedef uintptr_t __pyx_uintptr_t; +#endif +#ifndef CYTHON_FALLTHROUGH + #if defined(__cplusplus) + /* for clang __has_cpp_attribute(fallthrough) is true even before C++17 + * but leads to warnings with -pedantic, since it is a C++17 feature */ + #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) + #if __has_cpp_attribute(fallthrough) + #define CYTHON_FALLTHROUGH [[fallthrough]] + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_cpp_attribute(clang::fallthrough) + #define CYTHON_FALLTHROUGH [[clang::fallthrough]] + #elif __has_cpp_attribute(gnu::fallthrough) + #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] + #endif + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_attribute(fallthrough) + #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) + #else + #define CYTHON_FALLTHROUGH + #endif + #endif + #if defined(__clang__) && defined(__apple_build_version__) + #if __apple_build_version__ < 7000000 + #undef CYTHON_FALLTHROUGH + #define CYTHON_FALLTHROUGH + #endif + #endif +#endif +#ifdef __cplusplus + template + struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);}; + #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL::value) +#else + #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0) +#endif +#if CYTHON_COMPILING_IN_PYPY == 1 + #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000) +#else + #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000) +#endif +#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer)) + +#ifndef CYTHON_INLINE + #if defined(__clang__) + #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) + #elif defined(__GNUC__) + #define CYTHON_INLINE __inline__ + #elif defined(_MSC_VER) + #define CYTHON_INLINE __inline + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_INLINE inline + #else + #define CYTHON_INLINE + #endif +#endif + +#define __PYX_BUILD_PY_SSIZE_T "n" +#define CYTHON_FORMAT_SSIZE_T "z" +#if PY_MAJOR_VERSION < 3 + #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" + #define __Pyx_DefaultClassType PyClass_Type + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_BUILTIN_MODULE_NAME "builtins" + #define __Pyx_DefaultClassType PyType_Type +#if CYTHON_COMPILING_IN_LIMITED_API + static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, + PyObject *code, PyObject *c, PyObject* n, PyObject *v, + PyObject *fv, PyObject *cell, PyObject* fn, + PyObject *name, int fline, PyObject *lnos) { + PyObject *exception_table = NULL; + PyObject *types_module=NULL, *code_type=NULL, *result=NULL; + #if __PYX_LIMITED_VERSION_HEX < 0x030B0000 + PyObject *version_info; + PyObject *py_minor_version = NULL; + #endif + long minor_version = 0; + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000 + minor_version = 11; + #else + if (!(version_info = PySys_GetObject("version_info"))) goto end; + if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end; + minor_version = PyLong_AsLong(py_minor_version); + Py_DECREF(py_minor_version); + if (minor_version == -1 && PyErr_Occurred()) goto end; + #endif + if (!(types_module = PyImport_ImportModule("types"))) goto end; + if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end; + if (minor_version <= 7) { + (void)p; + result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code, + c, n, v, fn, name, fline, lnos, fv, cell); + } else if (minor_version <= 10) { + result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code, + c, n, v, fn, name, fline, lnos, fv, cell); + } else { + if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end; + result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code, + c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell); + } + end: + Py_XDECREF(code_type); + Py_XDECREF(exception_table); + Py_XDECREF(types_module); + if (type) { + PyErr_Restore(type, value, traceback); + } + return result; + } + #ifndef CO_OPTIMIZED + #define CO_OPTIMIZED 0x0001 + #endif + #ifndef CO_NEWLOCALS + #define CO_NEWLOCALS 0x0002 + #endif + #ifndef CO_VARARGS + #define CO_VARARGS 0x0004 + #endif + #ifndef CO_VARKEYWORDS + #define CO_VARKEYWORDS 0x0008 + #endif + #ifndef CO_ASYNC_GENERATOR + #define CO_ASYNC_GENERATOR 0x0200 + #endif + #ifndef CO_GENERATOR + #define CO_GENERATOR 0x0020 + #endif + #ifndef CO_COROUTINE + #define CO_COROUTINE 0x0080 + #endif +#elif PY_VERSION_HEX >= 0x030B0000 + static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, + PyObject *code, PyObject *c, PyObject* n, PyObject *v, + PyObject *fv, PyObject *cell, PyObject* fn, + PyObject *name, int fline, PyObject *lnos) { + PyCodeObject *result; + PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); + if (!empty_bytes) return NULL; + result = + #if PY_VERSION_HEX >= 0x030C0000 + PyUnstable_Code_NewWithPosOnlyArgs + #else + PyCode_NewWithPosOnlyArgs + #endif + (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes); + Py_DECREF(empty_bytes); + return result; + } +#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#else + #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) +#endif +#endif +#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE) + #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type) +#else + #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type)) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is) + #define __Pyx_Py_Is(x, y) Py_Is(x, y) +#else + #define __Pyx_Py_Is(x, y) ((x) == (y)) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone) + #define __Pyx_Py_IsNone(ob) Py_IsNone(ob) +#else + #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue) + #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob) +#else + #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True) +#endif +#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse) + #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob) +#else + #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False) +#endif +#define __Pyx_NoneAsNull(obj) (__Pyx_Py_IsNone(obj) ? NULL : (obj)) +#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o) +#else + #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o) +#endif +#ifndef CO_COROUTINE + #define CO_COROUTINE 0x80 +#endif +#ifndef CO_ASYNC_GENERATOR + #define CO_ASYNC_GENERATOR 0x200 +#endif +#ifndef Py_TPFLAGS_CHECKTYPES + #define Py_TPFLAGS_CHECKTYPES 0 +#endif +#ifndef Py_TPFLAGS_HAVE_INDEX + #define Py_TPFLAGS_HAVE_INDEX 0 +#endif +#ifndef Py_TPFLAGS_HAVE_NEWBUFFER + #define Py_TPFLAGS_HAVE_NEWBUFFER 0 +#endif +#ifndef Py_TPFLAGS_HAVE_FINALIZE + #define Py_TPFLAGS_HAVE_FINALIZE 0 +#endif +#ifndef Py_TPFLAGS_SEQUENCE + #define Py_TPFLAGS_SEQUENCE 0 +#endif +#ifndef Py_TPFLAGS_MAPPING + #define Py_TPFLAGS_MAPPING 0 +#endif +#ifndef METH_STACKLESS + #define METH_STACKLESS 0 +#endif +#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL) + #ifndef METH_FASTCALL + #define METH_FASTCALL 0x80 + #endif + typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs); + typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args, + Py_ssize_t nargs, PyObject *kwnames); +#else + #if PY_VERSION_HEX >= 0x030d00A4 + # define __Pyx_PyCFunctionFast PyCFunctionFast + # define __Pyx_PyCFunctionFastWithKeywords PyCFunctionFastWithKeywords + #else + # define __Pyx_PyCFunctionFast _PyCFunctionFast + # define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords + #endif +#endif +#if CYTHON_METH_FASTCALL + #define __Pyx_METH_FASTCALL METH_FASTCALL + #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast + #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords +#else + #define __Pyx_METH_FASTCALL METH_VARARGS + #define __Pyx_PyCFunction_FastCall PyCFunction + #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords +#endif +#if CYTHON_VECTORCALL + #define __pyx_vectorcallfunc vectorcallfunc + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET + #define __Pyx_PyVectorcall_NARGS(n) PyVectorcall_NARGS((size_t)(n)) +#elif CYTHON_BACKPORT_VECTORCALL + typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args, + size_t nargsf, PyObject *kwnames); + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1)) + #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)) +#else + #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET 0 + #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(n)) +#endif +#if PY_MAJOR_VERSION >= 0x030900B1 +#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_CheckExact(func) +#else +#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_Check(func) +#endif +#define __Pyx_CyOrPyCFunction_Check(func) PyCFunction_Check(func) +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) (((PyCFunctionObject*)(func))->m_ml->ml_meth) +#elif !CYTHON_COMPILING_IN_LIMITED_API +#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) PyCFunction_GET_FUNCTION(func) +#endif +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_CyOrPyCFunction_GET_FLAGS(func) (((PyCFunctionObject*)(func))->m_ml->ml_flags) +static CYTHON_INLINE PyObject* __Pyx_CyOrPyCFunction_GET_SELF(PyObject *func) { + return (__Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_STATIC) ? NULL : ((PyCFunctionObject*)func)->m_self; +} +#endif +static CYTHON_INLINE int __Pyx__IsSameCFunction(PyObject *func, void *cfunc) { +#if CYTHON_COMPILING_IN_LIMITED_API + return PyCFunction_Check(func) && PyCFunction_GetFunction(func) == (PyCFunction) cfunc; +#else + return PyCFunction_Check(func) && PyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; +#endif +} +#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCFunction(func, cfunc) +#if __PYX_LIMITED_VERSION_HEX < 0x030900B1 + #define __Pyx_PyType_FromModuleAndSpec(m, s, b) ((void)m, PyType_FromSpecWithBases(s, b)) + typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *); +#else + #define __Pyx_PyType_FromModuleAndSpec(m, s, b) PyType_FromModuleAndSpec(m, s, b) + #define __Pyx_PyCMethod PyCMethod +#endif +#ifndef METH_METHOD + #define METH_METHOD 0x200 +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) + #define PyObject_Malloc(s) PyMem_Malloc(s) + #define PyObject_Free(p) PyMem_Free(p) + #define PyObject_Realloc(p) PyMem_Realloc(p) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) +#else + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyThreadState_Current PyThreadState_Get() +#elif !CYTHON_FAST_THREAD_STATE + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#elif PY_VERSION_HEX >= 0x030d00A1 + #define __Pyx_PyThreadState_Current PyThreadState_GetUnchecked() +#elif PY_VERSION_HEX >= 0x03060000 + #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() +#elif PY_VERSION_HEX >= 0x03000000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#else + #define __Pyx_PyThreadState_Current _PyThreadState_Current +#endif +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op) +{ + void *result; + result = PyModule_GetState(op); + if (!result) + Py_FatalError("Couldn't find the module state"); + return result; +} +#endif +#define __Pyx_PyObject_GetSlot(obj, name, func_ctype) __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype) +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((func_ctype) PyType_GetSlot((type), Py_##name)) +#else + #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((type)->name) +#endif +#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT) +#include "pythread.h" +#define Py_tss_NEEDS_INIT 0 +typedef int Py_tss_t; +static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) { + *key = PyThread_create_key(); + return 0; +} +static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) { + Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t)); + *key = Py_tss_NEEDS_INIT; + return key; +} +static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) { + PyObject_Free(key); +} +static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) { + return *key != Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) { + PyThread_delete_key(*key); + *key = Py_tss_NEEDS_INIT; +} +static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) { + return PyThread_set_key_value(*key, value); +} +static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { + return PyThread_get_key_value(*key); +} +#endif +#if PY_MAJOR_VERSION < 3 + #if CYTHON_COMPILING_IN_PYPY + #if PYPY_VERSION_NUM < 0x07030600 + #if defined(__cplusplus) && __cplusplus >= 201402L + [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]] + #elif defined(__GNUC__) || defined(__clang__) + __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))) + #elif defined(_MSC_VER) + __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")) + #endif + static CYTHON_INLINE int PyGILState_Check(void) { + return 0; + } + #else // PYPY_VERSION_NUM < 0x07030600 + #endif // PYPY_VERSION_NUM < 0x07030600 + #else + static CYTHON_INLINE int PyGILState_Check(void) { + PyThreadState * tstate = _PyThreadState_Current; + return tstate && (tstate == PyGILState_GetThisThreadState()); + } + #endif +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 || defined(_PyDict_NewPresized) +#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) +#else +#define __Pyx_PyDict_NewPresized(n) PyDict_New() +#endif +#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && PY_VERSION_HEX < 0x030d0000 && CYTHON_USE_UNICODE_INTERNALS +#define __Pyx_PyDict_GetItemStrWithError(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash) +static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) { + PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name); + if (res == NULL) PyErr_Clear(); + return res; +} +#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000) +#define __Pyx_PyDict_GetItemStrWithError PyDict_GetItemWithError +#define __Pyx_PyDict_GetItemStr PyDict_GetItem +#else +static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) { +#if CYTHON_COMPILING_IN_PYPY + return PyDict_GetItem(dict, name); +#else + PyDictEntry *ep; + PyDictObject *mp = (PyDictObject*) dict; + long hash = ((PyStringObject *) name)->ob_shash; + assert(hash != -1); + ep = (mp->ma_lookup)(mp, name, hash); + if (ep == NULL) { + return NULL; + } + return ep->me_value; +#endif +} +#define __Pyx_PyDict_GetItemStr PyDict_GetItem +#endif +#if CYTHON_USE_TYPE_SLOTS + #define __Pyx_PyType_GetFlags(tp) (((PyTypeObject *)tp)->tp_flags) + #define __Pyx_PyType_HasFeature(type, feature) ((__Pyx_PyType_GetFlags(type) & (feature)) != 0) + #define __Pyx_PyObject_GetIterNextFunc(obj) (Py_TYPE(obj)->tp_iternext) +#else + #define __Pyx_PyType_GetFlags(tp) (PyType_GetFlags((PyTypeObject *)tp)) + #define __Pyx_PyType_HasFeature(type, feature) PyType_HasFeature(type, feature) + #define __Pyx_PyObject_GetIterNextFunc(obj) PyIter_Next +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v) +#else + #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v) +#endif +#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000 +#define __Pyx_PyHeapTypeObject_GC_Del(obj) {\ + PyTypeObject *type = Py_TYPE((PyObject*)obj);\ + assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\ + PyObject_GC_Del(obj);\ + Py_DECREF(type);\ +} +#else +#define __Pyx_PyHeapTypeObject_GC_Del(obj) PyObject_GC_Del(obj) +#endif +#if CYTHON_COMPILING_IN_LIMITED_API + #define CYTHON_PEP393_ENABLED 1 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GetLength(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((void)u, 1114111U) + #define __Pyx_PyUnicode_KIND(u) ((void)u, (0)) + #define __Pyx_PyUnicode_DATA(u) ((void*)u) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)k, PyUnicode_ReadChar((PyObject*)(d), i)) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GetLength(u)) +#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) + #define CYTHON_PEP393_ENABLED 1 + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_READY(op) (0) + #else + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + 0 : _PyUnicode_Ready((PyObject *)(op))) + #endif + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) + #define __Pyx_PyUnicode_KIND(u) ((int)PyUnicode_KIND(u)) + #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) + #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, (Py_UCS4) ch) + #if PY_VERSION_HEX >= 0x030C0000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) + #else + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) + #else + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) + #endif + #endif +#else + #define CYTHON_PEP393_ENABLED 0 + #define PyUnicode_1BYTE_KIND 1 + #define PyUnicode_2BYTE_KIND 2 + #define PyUnicode_4BYTE_KIND 4 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U) + #define __Pyx_PyUnicode_KIND(u) ((int)sizeof(Py_UNICODE)) + #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #if !defined(PyUnicode_DecodeUnicodeEscape) + #define PyUnicode_DecodeUnicodeEscape(s, size, errors) PyUnicode_Decode(s, size, "unicode_escape", errors) + #endif + #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500) + #undef PyUnicode_Contains + #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) + #endif + #if !defined(PyByteArray_Check) + #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) + #endif + #if !defined(PyObject_Format) + #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) + #endif +#endif +#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) +#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) +#else + #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) +#endif +#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) + #define PyObject_ASCII(o) PyObject_Repr(o) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBaseString_Type PyUnicode_Type + #define PyStringObject PyUnicodeObject + #define PyString_Type PyUnicode_Type + #define PyString_Check PyUnicode_Check + #define PyString_CheckExact PyUnicode_CheckExact +#ifndef PyObject_Unicode + #define PyObject_Unicode PyObject_Str +#endif +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) +#endif +#if CYTHON_COMPILING_IN_CPYTHON + #define __Pyx_PySequence_ListKeepNew(obj)\ + (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj)) +#else + #define __Pyx_PySequence_ListKeepNew(obj) PySequence_List(obj) +#endif +#ifndef PySet_CheckExact + #define PySet_CheckExact(obj) __Pyx_IS_TYPE(obj, &PySet_Type) +#endif +#if PY_VERSION_HEX >= 0x030900A4 + #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size) +#else + #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt) + #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size) +#endif +#if CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i) + #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq) + #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0)) + #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0)) + #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o) + #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o) + #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o) + #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o) + #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o) +#else + #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i) + #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq) + #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v) + #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v) + #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o) + #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o) + #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o) + #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o) + #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o) +#endif +#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 + #define __Pyx_PyImport_AddModuleRef(name) PyImport_AddModuleRef(name) +#else + static CYTHON_INLINE PyObject *__Pyx_PyImport_AddModuleRef(const char *name) { + PyObject *module = PyImport_AddModule(name); + Py_XINCREF(module); + return module; + } +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyIntObject PyLongObject + #define PyInt_Type PyLong_Type + #define PyInt_Check(op) PyLong_Check(op) + #define PyInt_CheckExact(op) PyLong_CheckExact(op) + #define __Pyx_Py3Int_Check(op) PyLong_Check(op) + #define __Pyx_Py3Int_CheckExact(op) PyLong_CheckExact(op) + #define PyInt_FromString PyLong_FromString + #define PyInt_FromUnicode PyLong_FromUnicode + #define PyInt_FromLong PyLong_FromLong + #define PyInt_FromSize_t PyLong_FromSize_t + #define PyInt_FromSsize_t PyLong_FromSsize_t + #define PyInt_AsLong PyLong_AsLong + #define PyInt_AS_LONG PyLong_AS_LONG + #define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask + #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask + #define PyNumber_Int PyNumber_Long +#else + #define __Pyx_Py3Int_Check(op) (PyLong_Check(op) || PyInt_Check(op)) + #define __Pyx_Py3Int_CheckExact(op) (PyLong_CheckExact(op) || PyInt_CheckExact(op)) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBoolObject PyLongObject +#endif +#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY + #ifndef PyUnicode_InternFromString + #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) + #endif +#endif +#if PY_VERSION_HEX < 0x030200A4 + typedef long Py_hash_t; + #define __Pyx_PyInt_FromHash_t PyInt_FromLong + #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t +#else + #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t + #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t +#endif +#if CYTHON_USE_ASYNC_SLOTS + #if PY_VERSION_HEX >= 0x030500B1 + #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods + #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) + #else + #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) + #endif +#else + #define __Pyx_PyType_AsAsync(obj) NULL +#endif +#ifndef __Pyx_PyAsyncMethodsStruct + typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; + } __Pyx_PyAsyncMethodsStruct; +#endif + +#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) + #if !defined(_USE_MATH_DEFINES) + #define _USE_MATH_DEFINES + #endif +#endif +#include +#ifdef NAN +#define __PYX_NAN() ((float) NAN) +#else +static CYTHON_INLINE float __PYX_NAN() { + float value; + memset(&value, 0xFF, sizeof(value)); + return value; +} +#endif +#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) +#define __Pyx_truncl trunc +#else +#define __Pyx_truncl truncl +#endif + +#define __PYX_MARK_ERR_POS(f_index, lineno) \ + { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; } +#define __PYX_ERR(f_index, lineno, Ln_error) \ + { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; } + +#ifdef CYTHON_EXTERN_C + #undef __PYX_EXTERN_C + #define __PYX_EXTERN_C CYTHON_EXTERN_C +#elif defined(__PYX_EXTERN_C) + #ifdef _MSC_VER + #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") + #else + #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. + #endif +#else + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#define __PYX_HAVE__jcvi__formats__cblast +#define __PYX_HAVE_API__jcvi__formats__cblast +/* Early includes */ +#include +#include +#ifdef _OPENMP +#include +#endif /* _OPENMP */ + +#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) +#define CYTHON_WITHOUT_ASSERTIONS +#endif + +typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; + const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; + +#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8) +#define __PYX_DEFAULT_STRING_ENCODING "" +#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString +#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#define __Pyx_uchar_cast(c) ((unsigned char)c) +#define __Pyx_long_cast(x) ((long)x) +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ + (sizeof(type) < sizeof(Py_ssize_t)) ||\ + (sizeof(type) > sizeof(Py_ssize_t) &&\ + likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX) &&\ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ + v == (type)PY_SSIZE_T_MIN))) ||\ + (sizeof(type) == sizeof(Py_ssize_t) &&\ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX))) ) +static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) { + return (size_t) i < (size_t) limit; +} +#if defined (__cplusplus) && __cplusplus >= 201103L + #include + #define __Pyx_sst_abs(value) std::abs(value) +#elif SIZEOF_INT >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) abs(value) +#elif SIZEOF_LONG >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) labs(value) +#elif defined (_MSC_VER) + #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define __Pyx_sst_abs(value) llabs(value) +#elif defined (__GNUC__) + #define __Pyx_sst_abs(value) __builtin_llabs(value) +#else + #define __Pyx_sst_abs(value) ((value<0) ? -value : value) +#endif +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s); +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char*); +#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) +#define __Pyx_PyBytes_FromString PyBytes_FromString +#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#else + #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize +#endif +#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyObject_AsWritableString(s) ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableSString(s) ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) +#define __Pyx_PyUnicode_FromOrdinal(o) PyUnicode_FromOrdinal((int)o) +#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode +#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) +#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b); +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*); +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); +#define __Pyx_PySequence_Tuple(obj)\ + (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); +static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); +#if CYTHON_ASSUME_SAFE_MACROS +#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) +#else +#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) +#endif +#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) +#else +#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) +#endif +#if CYTHON_USE_PYLONG_INTERNALS + #if PY_VERSION_HEX >= 0x030C00A7 + #ifndef _PyLong_SIGN_MASK + #define _PyLong_SIGN_MASK 3 + #endif + #ifndef _PyLong_NON_SIZE_BITS + #define _PyLong_NON_SIZE_BITS 3 + #endif + #define __Pyx_PyLong_Sign(x) (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK) + #define __Pyx_PyLong_IsNeg(x) ((__Pyx_PyLong_Sign(x) & 2) != 0) + #define __Pyx_PyLong_IsNonNeg(x) (!__Pyx_PyLong_IsNeg(x)) + #define __Pyx_PyLong_IsZero(x) (__Pyx_PyLong_Sign(x) & 1) + #define __Pyx_PyLong_IsPos(x) (__Pyx_PyLong_Sign(x) == 0) + #define __Pyx_PyLong_CompactValueUnsigned(x) (__Pyx_PyLong_Digits(x)[0]) + #define __Pyx_PyLong_DigitCount(x) ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS)) + #define __Pyx_PyLong_SignedDigitCount(x)\ + ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x)) + #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue) + #define __Pyx_PyLong_IsCompact(x) PyUnstable_Long_IsCompact((PyLongObject*) x) + #define __Pyx_PyLong_CompactValue(x) PyUnstable_Long_CompactValue((PyLongObject*) x) + #else + #define __Pyx_PyLong_IsCompact(x) (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS)) + #define __Pyx_PyLong_CompactValue(x) ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0]) + #endif + typedef Py_ssize_t __Pyx_compact_pylong; + typedef size_t __Pyx_compact_upylong; + #else + #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0) + #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0) + #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0) + #define __Pyx_PyLong_IsPos(x) (Py_SIZE(x) > 0) + #define __Pyx_PyLong_CompactValueUnsigned(x) ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0]) + #define __Pyx_PyLong_DigitCount(x) __Pyx_sst_abs(Py_SIZE(x)) + #define __Pyx_PyLong_SignedDigitCount(x) Py_SIZE(x) + #define __Pyx_PyLong_IsCompact(x) (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1) + #define __Pyx_PyLong_CompactValue(x)\ + ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0])) + typedef sdigit __Pyx_compact_pylong; + typedef digit __Pyx_compact_upylong; + #endif + #if PY_VERSION_HEX >= 0x030C00A5 + #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit) + #else + #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->ob_digit) + #endif +#endif +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII +#include +static int __Pyx_sys_getdefaultencoding_not_ascii; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + PyObject* ascii_chars_u = NULL; + PyObject* ascii_chars_b = NULL; + const char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + if (strcmp(default_encoding_c, "ascii") == 0) { + __Pyx_sys_getdefaultencoding_not_ascii = 0; + } else { + char ascii_chars[128]; + int c; + for (c = 0; c < 128; c++) { + ascii_chars[c] = (char) c; + } + __Pyx_sys_getdefaultencoding_not_ascii = 1; + ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); + if (!ascii_chars_u) goto bad; + ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); + if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { + PyErr_Format( + PyExc_ValueError, + "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", + default_encoding_c); + goto bad; + } + Py_DECREF(ascii_chars_u); + Py_DECREF(ascii_chars_b); + } + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + Py_XDECREF(ascii_chars_u); + Py_XDECREF(ascii_chars_b); + return -1; +} +#endif +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) +#else +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#include +static char* __PYX_DEFAULT_STRING_ENCODING; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1); + if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; + strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + return -1; +} +#endif +#endif + + +/* Test for GCC > 2.95 */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) + #define likely(x) __builtin_expect(!!(x), 1) + #define unlikely(x) __builtin_expect(!!(x), 0) +#else /* !__GNUC__ or GCC < 2.95 */ + #define likely(x) (x) + #define unlikely(x) (x) +#endif /* __GNUC__ */ +static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } + +#if !CYTHON_USE_MODULE_STATE +static PyObject *__pyx_m = NULL; +#endif +static int __pyx_lineno; +static int __pyx_clineno = 0; +static const char * __pyx_cfilenm = __FILE__; +static const char *__pyx_filename; + +/* #### Code section: filename_table ### */ + +static const char *__pyx_f[] = { + "src/jcvi/formats/cblast.pyx", + "", +}; +/* #### Code section: utility_code_proto_before_types ### */ +/* ForceInitThreads.proto */ +#ifndef __PYX_FORCE_INIT_THREADS + #define __PYX_FORCE_INIT_THREADS 0 +#endif + +/* #### Code section: numeric_typedefs ### */ +/* #### Code section: complex_type_declarations ### */ +/* #### Code section: type_declarations ### */ + +/*--- Type declarations ---*/ +struct __pyx_obj_4jcvi_7formats_6cblast_Blast; +struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine; +struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; +struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; + +/* "jcvi/formats/cblast.pyx":21 + * + * + * cdef class Blast: # <<<<<<<<<<<<<< + * cdef: + * FILE* fh + */ +struct __pyx_obj_4jcvi_7formats_6cblast_Blast { + PyObject_HEAD + FILE *fh; + PyObject *filename; +}; + + +/* "jcvi/formats/cblast.pyx":66 + * + * + * cdef class BlastLine: # <<<<<<<<<<<<<< + * """ + * Given a string of tab-delimited (-m 8) blast output, parse it and create + */ +struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine { + PyObject_HEAD + char _query[0x80]; + char _subject[0x80]; + int hitlen; + int nmismatch; + int ngaps; + int qstart; + int qstop; + int sstart; + int sstop; + float pctid; + float score; + double evalue; + PyObject *qseqid; + PyObject *sseqid; + int qi; + int si; + char orientation; +}; + + +/* "jcvi/formats/cblast.pyx":172 + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< + * return BlastLine(b) + * + */ +struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr { + PyObject_HEAD + PyObject *__pyx_genexpr_arg_0; + PyObject *__pyx_v_x; + PyObject *__pyx_t_0; + Py_ssize_t __pyx_t_1; +}; + + +/* "cfunc.to_py":66 + * + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + */ +struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc { + PyObject_HEAD + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*__pyx_v_f)(char *, char *, float, int, int, int, int, int, int, int, float, float); +}; + +/* #### Code section: utility_code_proto ### */ + +/* --- Runtime support code (head) --- */ +/* Refnanny.proto */ +#ifndef CYTHON_REFNANNY + #define CYTHON_REFNANNY 0 +#endif +#if CYTHON_REFNANNY + typedef struct { + void (*INCREF)(void*, PyObject*, Py_ssize_t); + void (*DECREF)(void*, PyObject*, Py_ssize_t); + void (*GOTREF)(void*, PyObject*, Py_ssize_t); + void (*GIVEREF)(void*, PyObject*, Py_ssize_t); + void* (*SetupContext)(const char*, Py_ssize_t, const char*); + void (*FinishContext)(void**); + } __Pyx_RefNannyAPIStruct; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); + #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; +#ifdef WITH_THREAD + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + if (acquire_gil) {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ + PyGILState_Release(__pyx_gilstate_save);\ + } else {\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ + } + #define __Pyx_RefNannyFinishContextNogil() {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __Pyx_RefNannyFinishContext();\ + PyGILState_Release(__pyx_gilstate_save);\ + } +#else + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__)) + #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext() +#endif + #define __Pyx_RefNannyFinishContextNogil() {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __Pyx_RefNannyFinishContext();\ + PyGILState_Release(__pyx_gilstate_save);\ + } + #define __Pyx_RefNannyFinishContext()\ + __Pyx_RefNanny->FinishContext(&__pyx_refnanny) + #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) + #define __Pyx_XINCREF(r) do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0) + #define __Pyx_XDECREF(r) do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0) + #define __Pyx_XGOTREF(r) do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0) + #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0) +#else + #define __Pyx_RefNannyDeclarations + #define __Pyx_RefNannySetupContext(name, acquire_gil) + #define __Pyx_RefNannyFinishContextNogil() + #define __Pyx_RefNannyFinishContext() + #define __Pyx_INCREF(r) Py_INCREF(r) + #define __Pyx_DECREF(r) Py_DECREF(r) + #define __Pyx_GOTREF(r) + #define __Pyx_GIVEREF(r) + #define __Pyx_XINCREF(r) Py_XINCREF(r) + #define __Pyx_XDECREF(r) Py_XDECREF(r) + #define __Pyx_XGOTREF(r) + #define __Pyx_XGIVEREF(r) +#endif +#define __Pyx_Py_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; Py_XDECREF(tmp);\ + } while (0) +#define __Pyx_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_XDECREF(tmp);\ + } while (0) +#define __Pyx_DECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_DECREF(tmp);\ + } while (0) +#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) +#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) + +/* PyErrExceptionMatches.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) +static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); +#else +#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) +#endif + +/* PyThreadStateGet.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; +#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; +#if PY_VERSION_HEX >= 0x030C00A6 +#define __Pyx_PyErr_Occurred() (__pyx_tstate->current_exception != NULL) +#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL) +#else +#define __Pyx_PyErr_Occurred() (__pyx_tstate->curexc_type != NULL) +#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->curexc_type) +#endif +#else +#define __Pyx_PyThreadState_declare +#define __Pyx_PyThreadState_assign +#define __Pyx_PyErr_Occurred() (PyErr_Occurred() != NULL) +#define __Pyx_PyErr_CurrentExceptionType() PyErr_Occurred() +#endif + +/* PyErrFetchRestore.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) +#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6 +#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) +#else +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#endif +#else +#define __Pyx_PyErr_Clear() PyErr_Clear() +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) +#endif + +/* PyObjectGetAttrStr.proto */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) +#endif + +/* PyObjectGetAttrStrNoError.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name); + +/* GetBuiltinName.proto */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name); + +/* TupleAndListFromArray.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n); +static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n); +#endif + +/* IncludeStringH.proto */ +#include + +/* BytesEquals.proto */ +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); + +/* UnicodeEquals.proto */ +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); + +/* fastcall.proto */ +#if CYTHON_AVOID_BORROWED_REFS + #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i) +#elif CYTHON_ASSUME_SAFE_MACROS + #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i) +#else + #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i) +#endif +#if CYTHON_AVOID_BORROWED_REFS + #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg) + #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg) +#else + #define __Pyx_Arg_NewRef_VARARGS(arg) arg + #define __Pyx_Arg_XDECREF_VARARGS(arg) +#endif +#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds) +#define __Pyx_KwValues_VARARGS(args, nargs) NULL +#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s) +#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw) +#if CYTHON_METH_FASTCALL + #define __Pyx_Arg_FASTCALL(args, i) args[i] + #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds) + #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs)) + static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s); +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 + CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues); + #else + #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw) + #endif + #define __Pyx_Arg_NewRef_FASTCALL(arg) arg /* no-op, __Pyx_Arg_FASTCALL is direct and this needs + to have the same reference counting */ + #define __Pyx_Arg_XDECREF_FASTCALL(arg) +#else + #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS + #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS + #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS + #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS + #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS + #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg) + #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg) +#endif +#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS +#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start) +#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start) +#else +#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop) +#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop) +#endif + +/* RaiseArgTupleInvalid.proto */ +static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, + Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); + +/* RaiseDoubleKeywords.proto */ +static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); + +/* ParseKeywords.proto */ +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues, + PyObject **argnames[], + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, + const char* function_name); + +/* IncludeStructmemberH.proto */ +#include + +/* FixUpExtensionType.proto */ +#if CYTHON_USE_TYPE_SPECS +static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type); +#endif + +/* FetchSharedCythonModule.proto */ +static PyObject *__Pyx_FetchSharedCythonABIModule(void); + +/* FetchCommonType.proto */ +#if !CYTHON_USE_TYPE_SPECS +static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type); +#else +static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases); +#endif + +/* PyMethodNew.proto */ +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { + PyObject *typesModule=NULL, *methodType=NULL, *result=NULL; + CYTHON_UNUSED_VAR(typ); + if (!self) + return __Pyx_NewRef(func); + typesModule = PyImport_ImportModule("types"); + if (!typesModule) return NULL; + methodType = PyObject_GetAttrString(typesModule, "MethodType"); + Py_DECREF(typesModule); + if (!methodType) return NULL; + result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL); + Py_DECREF(methodType); + return result; +} +#elif PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { + CYTHON_UNUSED_VAR(typ); + if (!self) + return __Pyx_NewRef(func); + return PyMethod_New(func, self); +} +#else + #define __Pyx_PyMethod_New PyMethod_New +#endif + +/* PyVectorcallFastCallDict.proto */ +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw); +#endif + +/* CythonFunctionShared.proto */ +#define __Pyx_CyFunction_USED +#define __Pyx_CYFUNCTION_STATICMETHOD 0x01 +#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02 +#define __Pyx_CYFUNCTION_CCLASS 0x04 +#define __Pyx_CYFUNCTION_COROUTINE 0x08 +#define __Pyx_CyFunction_GetClosure(f)\ + (((__pyx_CyFunctionObject *) (f))->func_closure) +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + #define __Pyx_CyFunction_GetClassObj(f)\ + (((__pyx_CyFunctionObject *) (f))->func_classobj) +#else + #define __Pyx_CyFunction_GetClassObj(f)\ + ((PyObject*) ((PyCMethodObject *) (f))->mm_class) +#endif +#define __Pyx_CyFunction_SetClassObj(f, classobj)\ + __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj)) +#define __Pyx_CyFunction_Defaults(type, f)\ + ((type *)(((__pyx_CyFunctionObject *) (f))->defaults)) +#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\ + ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g) +typedef struct { +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject_HEAD + PyObject *func; +#elif PY_VERSION_HEX < 0x030900B1 + PyCFunctionObject func; +#else + PyCMethodObject func; +#endif +#if CYTHON_BACKPORT_VECTORCALL + __pyx_vectorcallfunc func_vectorcall; +#endif +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API + PyObject *func_weakreflist; +#endif + PyObject *func_dict; + PyObject *func_name; + PyObject *func_qualname; + PyObject *func_doc; + PyObject *func_globals; + PyObject *func_code; + PyObject *func_closure; +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + PyObject *func_classobj; +#endif + void *defaults; + int defaults_pyobjects; + size_t defaults_size; + int flags; + PyObject *defaults_tuple; + PyObject *defaults_kwdict; + PyObject *(*defaults_getter)(PyObject *); + PyObject *func_annotations; + PyObject *func_is_coroutine; +} __pyx_CyFunctionObject; +#undef __Pyx_CyOrPyCFunction_Check +#define __Pyx_CyFunction_Check(obj) __Pyx_TypeCheck(obj, __pyx_CyFunctionType) +#define __Pyx_CyOrPyCFunction_Check(obj) __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type) +#define __Pyx_CyFunction_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_CyFunctionType) +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc); +#undef __Pyx_IsSameCFunction +#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCyOrCFunction(func, cfunc) +static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml, + int flags, PyObject* qualname, + PyObject *closure, + PyObject *module, PyObject *globals, + PyObject* code); +static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj); +static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m, + size_t size, + int pyobjects); +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m, + PyObject *tuple); +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m, + PyObject *dict); +static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, + PyObject *dict); +static int __pyx_CyFunction_init(PyObject *module); +#if CYTHON_METH_FASTCALL +static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); +#if CYTHON_BACKPORT_VECTORCALL +#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall) +#else +#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall) +#endif +#endif + +/* CythonFunction.proto */ +static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, + int flags, PyObject* qualname, + PyObject *closure, + PyObject *module, PyObject *globals, + PyObject* code); + +/* GetTopmostException.proto */ +#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE +static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate); +#endif + +/* SaveResetException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +#else +#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb) +#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb) +#endif + +/* FastTypeChecks.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) +#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2) +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); +#else +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2)) +#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) +#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) +#endif +#define __Pyx_PyErr_ExceptionMatches2(err1, err2) __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2) +#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) + +/* KeywordStringCheck.proto */ +static int __Pyx_CheckKeywordStrings(PyObject *kw, const char* function_name, int kw_allowed); + +/* RaiseException.proto */ +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); + +/* PyObjectCall.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + +/* UnpackUnboundCMethod.proto */ +typedef struct { + PyObject *type; + PyObject **method_name; + PyCFunction func; + PyObject *method; + int flag; +} __Pyx_CachedCFunction; + +/* CallUnboundCMethod1.proto */ +static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg); +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg); +#else +#define __Pyx_CallUnboundCMethod1(cfunc, self, arg) __Pyx__CallUnboundCMethod1(cfunc, self, arg) +#endif + +/* RaiseUnexpectedTypeError.proto */ +static int __Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj); + +/* decode_c_string_utf16.proto */ +static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, const char *errors) { + int byteorder = 0; + return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); +} +static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16LE(const char *s, Py_ssize_t size, const char *errors) { + int byteorder = -1; + return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); +} +static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16BE(const char *s, Py_ssize_t size, const char *errors) { + int byteorder = 1; + return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); +} + +/* decode_c_bytes.proto */ +static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes( + const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop, + const char* encoding, const char* errors, + PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)); + +/* decode_bytes.proto */ +static CYTHON_INLINE PyObject* __Pyx_decode_bytes( + PyObject* string, Py_ssize_t start, Py_ssize_t stop, + const char* encoding, const char* errors, + PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { + char* as_c_string; + Py_ssize_t size; +#if CYTHON_ASSUME_SAFE_MACROS + as_c_string = PyBytes_AS_STRING(string); + size = PyBytes_GET_SIZE(string); +#else + if (PyBytes_AsStringAndSize(string, &as_c_string, &size) < 0) { + return NULL; + } +#endif + return __Pyx_decode_c_bytes( + as_c_string, size, + start, stop, encoding, errors, decode_func); +} + +/* ArgTypeTest.proto */ +#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ + ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\ + __Pyx__ArgTypeTest(obj, type, name, exact)) +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); + +/* PyFunctionFastCall.proto */ +#if CYTHON_FAST_PYCALL +#if !CYTHON_VECTORCALL +#define __Pyx_PyFunction_FastCall(func, args, nargs)\ + __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs); +#endif +#define __Pyx_BUILD_ASSERT_EXPR(cond)\ + (sizeof(char [1 - 2*!(cond)]) - 1) +#ifndef Py_MEMBER_SIZE +#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) +#endif +#if !CYTHON_VECTORCALL +#if PY_VERSION_HEX >= 0x03080000 + #include "frameobject.h" +#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif + #define __Pxy_PyFrame_Initialize_Offsets() + #define __Pyx_PyFrame_GetLocalsplus(frame) ((frame)->f_localsplus) +#else + static size_t __pyx_pyframe_localsplus_offset = 0; + #include "frameobject.h" + #define __Pxy_PyFrame_Initialize_Offsets()\ + ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\ + (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus))) + #define __Pyx_PyFrame_GetLocalsplus(frame)\ + (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset)) +#endif +#endif +#endif + +/* PyObjectCallMethO.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +/* PyObjectFastCall.proto */ +#define __Pyx_PyObject_FastCall(func, args, nargs) __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL) +static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs); + +/* PyObjectCallOneArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + +/* SliceObject.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice( + PyObject* obj, Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** py_start, PyObject** py_stop, PyObject** py_slice, + int has_cstart, int has_cstop, int wraparound); + +/* ListCompAppend.proto */ +#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS +static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { + PyListObject* L = (PyListObject*) list; + Py_ssize_t len = Py_SIZE(list); + if (likely(L->allocated > len)) { + Py_INCREF(x); + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 + L->ob_item[len] = x; + #else + PyList_SET_ITEM(list, len, x); + #endif + __Pyx_SET_SIZE(list, len + 1); + return 0; + } + return PyList_Append(list, x); +} +#else +#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) +#endif + +/* GetAttr.proto */ +static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *, PyObject *); + +/* SetItemInt.proto */ +#define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) :\ + (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) :\ + __Pyx_SetItemInt_Generic(o, to_py_func(i), v))) +static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v); +static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, + int is_list, int wraparound, int boundscheck); + +/* HasAttr.proto */ +static CYTHON_INLINE int __Pyx_HasAttr(PyObject *, PyObject *); + +/* RaiseUnboundLocalError.proto */ +static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname); + +/* PyObject_Str.proto */ +#define __Pyx_PyObject_Str(obj)\ + (likely(PyString_CheckExact(obj)) ? __Pyx_NewRef(obj) : PyObject_Str(obj)) + +/* SliceObject.proto */ +#define __Pyx_PyObject_DelSlice(obj, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound)\ + __Pyx_PyObject_SetSlice(obj, (PyObject*)NULL, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound) +static CYTHON_INLINE int __Pyx_PyObject_SetSlice( + PyObject* obj, PyObject* value, Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** py_start, PyObject** py_stop, PyObject** py_slice, + int has_cstart, int has_cstop, int wraparound); + +/* PyObjectCall2Args.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2); + +/* PyObjectGetMethod.proto */ +static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method); + +/* PyObjectCallMethod1.proto */ +static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg); + +/* StringJoin.proto */ +#if PY_MAJOR_VERSION < 3 +#define __Pyx_PyString_Join __Pyx_PyBytes_Join +#define __Pyx_PyBaseString_Join(s, v) (PyUnicode_CheckExact(s) ? PyUnicode_Join(s, v) : __Pyx_PyBytes_Join(s, v)) +#else +#define __Pyx_PyString_Join PyUnicode_Join +#define __Pyx_PyBaseString_Join PyUnicode_Join +#endif +static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values); + +/* PyObjectSetAttrStr.proto */ +#if CYTHON_USE_TYPE_SLOTS +#define __Pyx_PyObject_DelAttrStr(o,n) __Pyx_PyObject_SetAttrStr(o, n, NULL) +static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value); +#else +#define __Pyx_PyObject_DelAttrStr(o,n) PyObject_DelAttr(o,n) +#define __Pyx_PyObject_SetAttrStr(o,n,v) PyObject_SetAttr(o,n,v) +#endif + +/* PyObjectCallNoArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); + +/* PyObjectCallMethod0.proto */ +static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name); + +/* ValidateBasesTuple.proto */ +#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS +static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases); +#endif + +/* PyType_Ready.proto */ +CYTHON_UNUSED static int __Pyx_PyType_Ready(PyTypeObject *t); + +/* PyObject_GenericGetAttrNoDict.proto */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GenericGetAttrNoDict PyObject_GenericGetAttr +#endif + +/* PyObject_GenericGetAttr.proto */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name); +#else +#define __Pyx_PyObject_GenericGetAttr PyObject_GenericGetAttr +#endif + +/* SetupReduce.proto */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __Pyx_setup_reduce(PyObject* type_obj); +#endif + +/* Import.proto */ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); + +/* ImportDottedModule.proto */ +static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple); +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple); +#endif + +/* ImportDottedModuleRelFirst.proto */ +static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple); + +/* PyDictVersioning.proto */ +#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1) +#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\ + (version_var) = __PYX_GET_DICT_VERSION(dict);\ + (cache_var) = (value); +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\ + static PY_UINT64_T __pyx_dict_version = 0;\ + static PyObject *__pyx_dict_cached_value = NULL;\ + if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\ + (VAR) = __pyx_dict_cached_value;\ + } else {\ + (VAR) = __pyx_dict_cached_value = (LOOKUP);\ + __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\ + }\ +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj); +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj); +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version); +#else +#define __PYX_GET_DICT_VERSION(dict) (0) +#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var) +#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP); +#endif + +/* CLineInTraceback.proto */ +#ifdef CYTHON_CLINE_IN_TRACEBACK +#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) +#else +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); +#endif + +/* CodeObjectCache.proto */ +#if !CYTHON_COMPILING_IN_LIMITED_API +typedef struct { + PyCodeObject* code_object; + int code_line; +} __Pyx_CodeObjectCacheEntry; +struct __Pyx_CodeObjectCache { + int count; + int max_count; + __Pyx_CodeObjectCacheEntry* entries; +}; +static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); +static PyCodeObject *__pyx_find_code_object(int code_line); +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); +#endif + +/* AddTraceback.proto */ +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename); + +/* GCCDiagnostics.proto */ +#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) +#define __Pyx_HAS_GCC_DIAGNOSTIC +#endif + +/* CIntFromPy.proto */ +static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); + +/* CIntFromPy.proto */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); + +/* CIntFromPy.proto */ +static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_char(char value); + +/* FormatTypeName.proto */ +#if CYTHON_COMPILING_IN_LIMITED_API +typedef PyObject *__Pyx_TypeName; +#define __Pyx_FMT_TYPENAME "%U" +static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp); +#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj) +#else +typedef const char *__Pyx_TypeName; +#define __Pyx_FMT_TYPENAME "%.200s" +#define __Pyx_PyType_GetName(tp) ((tp)->tp_name) +#define __Pyx_DECREF_TypeName(obj) +#endif + +/* CIntFromPy.proto */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); + +/* SwapException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_ExceptionSwap(type, value, tb) __Pyx__ExceptionSwap(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#else +static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb); +#endif + +/* CoroutineBase.proto */ +struct __pyx_CoroutineObject; +typedef PyObject *(*__pyx_coroutine_body_t)(struct __pyx_CoroutineObject *, PyThreadState *, PyObject *); +#if CYTHON_USE_EXC_INFO_STACK +#define __Pyx_ExcInfoStruct _PyErr_StackItem +#else +typedef struct { + PyObject *exc_type; + PyObject *exc_value; + PyObject *exc_traceback; +} __Pyx_ExcInfoStruct; +#endif +typedef struct __pyx_CoroutineObject { + PyObject_HEAD + __pyx_coroutine_body_t body; + PyObject *closure; + __Pyx_ExcInfoStruct gi_exc_state; + PyObject *gi_weakreflist; + PyObject *classobj; + PyObject *yieldfrom; + PyObject *gi_name; + PyObject *gi_qualname; + PyObject *gi_modulename; + PyObject *gi_code; + PyObject *gi_frame; + int resume_label; + char is_running; +} __pyx_CoroutineObject; +static __pyx_CoroutineObject *__Pyx__Coroutine_New( + PyTypeObject *type, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, + PyObject *name, PyObject *qualname, PyObject *module_name); +static __pyx_CoroutineObject *__Pyx__Coroutine_NewInit( + __pyx_CoroutineObject *gen, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, + PyObject *name, PyObject *qualname, PyObject *module_name); +static CYTHON_INLINE void __Pyx_Coroutine_ExceptionClear(__Pyx_ExcInfoStruct *self); +static int __Pyx_Coroutine_clear(PyObject *self); +static PyObject *__Pyx_Coroutine_Send(PyObject *self, PyObject *value); +static PyObject *__Pyx_Coroutine_Close(PyObject *self); +static PyObject *__Pyx_Coroutine_Throw(PyObject *gen, PyObject *args); +#if CYTHON_USE_EXC_INFO_STACK +#define __Pyx_Coroutine_SwapException(self) +#define __Pyx_Coroutine_ResetAndClearException(self) __Pyx_Coroutine_ExceptionClear(&(self)->gi_exc_state) +#else +#define __Pyx_Coroutine_SwapException(self) {\ + __Pyx_ExceptionSwap(&(self)->gi_exc_state.exc_type, &(self)->gi_exc_state.exc_value, &(self)->gi_exc_state.exc_traceback);\ + __Pyx_Coroutine_ResetFrameBackpointer(&(self)->gi_exc_state);\ + } +#define __Pyx_Coroutine_ResetAndClearException(self) {\ + __Pyx_ExceptionReset((self)->gi_exc_state.exc_type, (self)->gi_exc_state.exc_value, (self)->gi_exc_state.exc_traceback);\ + (self)->gi_exc_state.exc_type = (self)->gi_exc_state.exc_value = (self)->gi_exc_state.exc_traceback = NULL;\ + } +#endif +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyGen_FetchStopIterationValue(pvalue)\ + __Pyx_PyGen__FetchStopIterationValue(__pyx_tstate, pvalue) +#else +#define __Pyx_PyGen_FetchStopIterationValue(pvalue)\ + __Pyx_PyGen__FetchStopIterationValue(__Pyx_PyThreadState_Current, pvalue) +#endif +static int __Pyx_PyGen__FetchStopIterationValue(PyThreadState *tstate, PyObject **pvalue); +static CYTHON_INLINE void __Pyx_Coroutine_ResetFrameBackpointer(__Pyx_ExcInfoStruct *exc_state); + +/* PatchModuleWithCoroutine.proto */ +static PyObject* __Pyx_Coroutine_patch_module(PyObject* module, const char* py_code); + +/* PatchGeneratorABC.proto */ +static int __Pyx_patch_abc(void); + +/* Generator.proto */ +#define __Pyx_Generator_USED +#define __Pyx_Generator_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_GeneratorType) +#define __Pyx_Generator_New(body, code, closure, name, qualname, module_name)\ + __Pyx__Coroutine_New(__pyx_GeneratorType, body, code, closure, name, qualname, module_name) +static PyObject *__Pyx_Generator_Next(PyObject *self); +static int __pyx_Generator_init(PyObject *module); + +/* CheckBinaryVersion.proto */ +static unsigned long __Pyx_get_runtime_version(void); +static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer); + +/* InitStrings.proto */ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); + +/* #### Code section: module_declarations ### */ + +/* Module declarations from "libc.string" */ + +/* Module declarations from "libc.stdio" */ + +/* Module declarations from "jcvi.formats.cblast" */ +static char const *__pyx_v_4jcvi_7formats_6cblast_blast_format; +static char const *__pyx_v_4jcvi_7formats_6cblast_blast_format_line; +static char const *__pyx_v_4jcvi_7formats_6cblast_blast_output; +static char const *__pyx_v_4jcvi_7formats_6cblast_bed_output; +static PyObject *__pyx_f_4jcvi_7formats_6cblast_c_str(PyObject *); /*proto*/ +static PyObject *__pyx_f_4jcvi_7formats_6cblast_py_str(PyObject *); /*proto*/ +static struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_f_4jcvi_7formats_6cblast_create_blast_line(char *, char *, float, int, int, int, int, int, int, int, float, float); /*proto*/ +static PyObject *__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*)(char *, char *, float, int, int, int, int, int, int, int, float, float)); /*proto*/ +static int __Pyx_carray_from_py_char(PyObject *, char *, Py_ssize_t); /*proto*/ +/* #### Code section: typeinfo ### */ +/* #### Code section: before_global_var ### */ +#define __Pyx_MODULE_NAME "jcvi.formats.cblast" +extern int __pyx_module_is_main_jcvi__formats__cblast; +int __pyx_module_is_main_jcvi__formats__cblast = 0; + +/* Implementation of "jcvi.formats.cblast" */ +/* #### Code section: global_var ### */ +static PyObject *__pyx_builtin_StopIteration; +static PyObject *__pyx_builtin_TypeError; +static PyObject *__pyx_builtin_id; +static PyObject *__pyx_builtin_OverflowError; +static PyObject *__pyx_builtin_enumerate; +static PyObject *__pyx_builtin_IndexError; +/* #### Code section: string_decls ### */ +static const char __pyx_k_s[] = "s"; +static const char __pyx_k__5[] = "\t"; +static const char __pyx_k__6[] = "*"; +static const char __pyx_k_gc[] = "gc"; +static const char __pyx_k_id[] = "id"; +static const char __pyx_k_qi[] = "qi"; +static const char __pyx_k_si[] = "si"; +static const char __pyx_k__13[] = "?"; +static const char __pyx_k_sys[] = "sys"; +static const char __pyx_k_args[] = "args"; +static const char __pyx_k_join[] = "join"; +static const char __pyx_k_main[] = "__main__"; +static const char __pyx_k_name[] = "__name__"; +static const char __pyx_k_self[] = "self"; +static const char __pyx_k_send[] = "send"; +static const char __pyx_k_spec[] = "__spec__"; +static const char __pyx_k_test[] = "__test__"; +static const char __pyx_k_wrap[] = "wrap"; +static const char __pyx_k_Blast[] = "Blast"; +static const char __pyx_k_UTF_8[] = "UTF-8"; +static const char __pyx_k_close[] = "close"; +static const char __pyx_k_ngaps[] = "ngaps"; +static const char __pyx_k_pctid[] = "pctid"; +static const char __pyx_k_qstop[] = "qstop"; +static const char __pyx_k_query[] = "query"; +static const char __pyx_k_score[] = "score"; +static const char __pyx_k_slots[] = "__slots__"; +static const char __pyx_k_sstop[] = "sstop"; +static const char __pyx_k_throw[] = "throw"; +static const char __pyx_k_enable[] = "enable"; +static const char __pyx_k_encode[] = "encode"; +static const char __pyx_k_evalue[] = "evalue"; +static const char __pyx_k_hitlen[] = "hitlen"; +static const char __pyx_k_import[] = "__import__"; +static const char __pyx_k_qseqid[] = "qseqid"; +static const char __pyx_k_qstart[] = "qstart"; +static const char __pyx_k_reduce[] = "__reduce__"; +static const char __pyx_k_sseqid[] = "sseqid"; +static const char __pyx_k_sstart[] = "sstart"; +static const char __pyx_k_Blast_s[] = "Blast('%s')"; +static const char __pyx_k_disable[] = "disable"; +static const char __pyx_k_genexpr[] = "genexpr"; +static const char __pyx_k_richcmp[] = "__richcmp__"; +static const char __pyx_k_subject[] = "subject"; +static const char __pyx_k_filename[] = "filename"; +static const char __pyx_k_getstate[] = "__getstate__"; +static const char __pyx_k_setstate[] = "__setstate__"; +static const char __pyx_k_BlastLine[] = "BlastLine"; +static const char __pyx_k_TypeError[] = "TypeError"; +static const char __pyx_k_enumerate[] = "enumerate"; +static const char __pyx_k_isenabled[] = "isenabled"; +static const char __pyx_k_nmismatch[] = "nmismatch"; +static const char __pyx_k_pyx_state[] = "__pyx_state"; +static const char __pyx_k_reduce_ex[] = "__reduce_ex__"; +static const char __pyx_k_IndexError[] = "IndexError"; +static const char __pyx_k_cfunc_to_py[] = "cfunc.to_py"; +static const char __pyx_k_orientation[] = "orientation"; +static const char __pyx_k_initializing[] = "_initializing"; +static const char __pyx_k_is_coroutine[] = "_is_coroutine"; +static const char __pyx_k_stringsource[] = ""; +static const char __pyx_k_OverflowError[] = "OverflowError"; +static const char __pyx_k_StopIteration[] = "StopIteration"; +static const char __pyx_k_reduce_cython[] = "__reduce_cython__"; +static const char __pyx_k_setstate_cython[] = "__setstate_cython__"; +static const char __pyx_k_BlastLine___reduce[] = "BlastLine.__reduce__"; +static const char __pyx_k_asyncio_coroutines[] = "asyncio.coroutines"; +static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; +static const char __pyx_k_jcvi_formats_cblast[] = "jcvi.formats.cblast"; +static const char __pyx_k_Blast___reduce_cython[] = "Blast.__reduce_cython__"; +static const char __pyx_k_Blast___setstate_cython[] = "Blast.__setstate_cython__"; +static const char __pyx_k_src_jcvi_formats_cblast_pyx[] = "src/jcvi/formats/cblast.pyx"; +static const char __pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma[] = "__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc..wrap"; +static const char __pyx_k_Cythonized_fast_version_of_Blas[] = "\nCythonized (fast) version of BlastLine\n\nStolen from brentp's biostuff (thanks):\n\n"; +static const char __pyx_k_that_comparison_not_implemented[] = "that comparison not implemented"; +static const char __pyx_k_BlastLine___get___locals_genexpr[] = "BlastLine.__get__..genexpr"; +static const char __pyx_k_BlastLine_s_to_s_eval_3f_score_1[] = "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)"; +static const char __pyx_k_no_default___reduce___due_to_non[] = "no default __reduce__ due to non-trivial __cinit__"; +/* #### Code section: decls ### */ +static PyObject *__pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(PyObject *__pyx_self, char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, char *__pyx_v_filename); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ +static void __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_s); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_other, size_t __pyx_v_op); /* proto */ +static Py_hash_t __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_genexpr_arg_0); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_Blast(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ +static PyObject *__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ +static __Pyx_CachedCFunction __pyx_umethod_PyString_Type_encode = {0, 0, 0, 0, 0}; +/* #### Code section: late_includes ### */ +/* #### Code section: module_state ### */ +typedef struct { + PyObject *__pyx_d; + PyObject *__pyx_b; + PyObject *__pyx_cython_runtime; + PyObject *__pyx_empty_tuple; + PyObject *__pyx_empty_bytes; + PyObject *__pyx_empty_unicode; + #ifdef __Pyx_CyFunction_USED + PyTypeObject *__pyx_CyFunctionType; + #endif + #ifdef __Pyx_FusedFunction_USED + PyTypeObject *__pyx_FusedFunctionType; + #endif + #ifdef __Pyx_Generator_USED + PyTypeObject *__pyx_GeneratorType; + #endif + #ifdef __Pyx_IterableCoroutine_USED + PyTypeObject *__pyx_IterableCoroutineType; + #endif + #ifdef __Pyx_Coroutine_USED + PyTypeObject *__pyx_CoroutineAwaitType; + #endif + #ifdef __Pyx_Coroutine_USED + PyTypeObject *__pyx_CoroutineType; + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + #endif + #if CYTHON_USE_MODULE_STATE + PyObject *__pyx_type_4jcvi_7formats_6cblast_Blast; + PyObject *__pyx_type_4jcvi_7formats_6cblast_BlastLine; + PyObject *__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; + PyObject *__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; + #endif + PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast_Blast; + PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast_BlastLine; + PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; + PyTypeObject *__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; + PyObject *__pyx_n_s_Blast; + PyObject *__pyx_n_s_BlastLine; + PyObject *__pyx_n_s_BlastLine___get___locals_genexpr; + PyObject *__pyx_n_s_BlastLine___reduce; + PyObject *__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1; + PyObject *__pyx_n_s_Blast___reduce_cython; + PyObject *__pyx_n_s_Blast___setstate_cython; + PyObject *__pyx_kp_s_Blast_s; + PyObject *__pyx_n_s_IndexError; + PyObject *__pyx_n_s_OverflowError; + PyObject *__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma; + PyObject *__pyx_n_s_StopIteration; + PyObject *__pyx_n_s_TypeError; + PyObject *__pyx_kp_s_UTF_8; + PyObject *__pyx_n_s__13; + PyObject *__pyx_kp_s__5; + PyObject *__pyx_n_s__6; + PyObject *__pyx_n_s_args; + PyObject *__pyx_n_s_asyncio_coroutines; + PyObject *__pyx_n_s_cfunc_to_py; + PyObject *__pyx_n_s_cline_in_traceback; + PyObject *__pyx_n_s_close; + PyObject *__pyx_kp_u_disable; + PyObject *__pyx_kp_u_enable; + PyObject *__pyx_n_s_encode; + PyObject *__pyx_n_s_enumerate; + PyObject *__pyx_n_s_evalue; + PyObject *__pyx_n_s_filename; + PyObject *__pyx_kp_u_gc; + PyObject *__pyx_n_s_genexpr; + PyObject *__pyx_n_s_getstate; + PyObject *__pyx_n_s_hitlen; + PyObject *__pyx_n_s_id; + PyObject *__pyx_n_s_import; + PyObject *__pyx_n_s_initializing; + PyObject *__pyx_n_s_is_coroutine; + PyObject *__pyx_kp_u_isenabled; + PyObject *__pyx_n_s_jcvi_formats_cblast; + PyObject *__pyx_n_s_join; + PyObject *__pyx_n_s_main; + PyObject *__pyx_n_s_name; + PyObject *__pyx_n_s_ngaps; + PyObject *__pyx_n_s_nmismatch; + PyObject *__pyx_kp_s_no_default___reduce___due_to_non; + PyObject *__pyx_n_s_orientation; + PyObject *__pyx_n_s_pctid; + PyObject *__pyx_n_s_pyx_state; + PyObject *__pyx_n_s_qi; + PyObject *__pyx_n_s_qseqid; + PyObject *__pyx_n_s_qstart; + PyObject *__pyx_n_s_qstop; + PyObject *__pyx_n_s_query; + PyObject *__pyx_n_s_reduce; + PyObject *__pyx_n_s_reduce_cython; + PyObject *__pyx_n_s_reduce_ex; + PyObject *__pyx_n_s_richcmp; + PyObject *__pyx_n_s_s; + PyObject *__pyx_n_s_score; + PyObject *__pyx_n_s_self; + PyObject *__pyx_n_s_send; + PyObject *__pyx_n_s_setstate; + PyObject *__pyx_n_s_setstate_cython; + PyObject *__pyx_n_s_si; + PyObject *__pyx_n_s_slots; + PyObject *__pyx_n_s_spec; + PyObject *__pyx_kp_s_src_jcvi_formats_cblast_pyx; + PyObject *__pyx_n_s_sseqid; + PyObject *__pyx_n_s_sstart; + PyObject *__pyx_n_s_sstop; + PyObject *__pyx_kp_s_stringsource; + PyObject *__pyx_n_s_subject; + PyObject *__pyx_n_s_sys; + PyObject *__pyx_n_s_test; + PyObject *__pyx_kp_s_that_comparison_not_implemented; + PyObject *__pyx_n_s_throw; + PyObject *__pyx_n_s_wrap; + PyObject *__pyx_int_2; + PyObject *__pyx_int_12; + PyObject *__pyx_tuple_; + PyObject *__pyx_slice__4; + PyObject *__pyx_tuple__3; + PyObject *__pyx_tuple__7; + PyObject *__pyx_tuple__9; + PyObject *__pyx_tuple__11; + PyObject *__pyx_codeobj__2; + PyObject *__pyx_codeobj__8; + PyObject *__pyx_codeobj__10; + PyObject *__pyx_codeobj__12; +} __pyx_mstate; + +#if CYTHON_USE_MODULE_STATE +#ifdef __cplusplus +namespace { + extern struct PyModuleDef __pyx_moduledef; +} /* anonymous namespace */ +#else +static struct PyModuleDef __pyx_moduledef; +#endif + +#define __pyx_mstate(o) ((__pyx_mstate *)__Pyx_PyModule_GetState(o)) + +#define __pyx_mstate_global (__pyx_mstate(PyState_FindModule(&__pyx_moduledef))) + +#define __pyx_m (PyState_FindModule(&__pyx_moduledef)) +#else +static __pyx_mstate __pyx_mstate_global_static = +#ifdef __cplusplus + {}; +#else + {0}; +#endif +static __pyx_mstate *__pyx_mstate_global = &__pyx_mstate_global_static; +#endif +/* #### Code section: module_state_clear ### */ +#if CYTHON_USE_MODULE_STATE +static int __pyx_m_clear(PyObject *m) { + __pyx_mstate *clear_module_state = __pyx_mstate(m); + if (!clear_module_state) return 0; + Py_CLEAR(clear_module_state->__pyx_d); + Py_CLEAR(clear_module_state->__pyx_b); + Py_CLEAR(clear_module_state->__pyx_cython_runtime); + Py_CLEAR(clear_module_state->__pyx_empty_tuple); + Py_CLEAR(clear_module_state->__pyx_empty_bytes); + Py_CLEAR(clear_module_state->__pyx_empty_unicode); + #ifdef __Pyx_CyFunction_USED + Py_CLEAR(clear_module_state->__pyx_CyFunctionType); + #endif + #ifdef __Pyx_FusedFunction_USED + Py_CLEAR(clear_module_state->__pyx_FusedFunctionType); + #endif + Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast_Blast); + Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast_Blast); + Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); + Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast_BlastLine); + Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); + Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); + Py_CLEAR(clear_module_state->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); + Py_CLEAR(clear_module_state->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); + Py_CLEAR(clear_module_state->__pyx_n_s_Blast); + Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine); + Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine___get___locals_genexpr); + Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine___reduce); + Py_CLEAR(clear_module_state->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1); + Py_CLEAR(clear_module_state->__pyx_n_s_Blast___reduce_cython); + Py_CLEAR(clear_module_state->__pyx_n_s_Blast___setstate_cython); + Py_CLEAR(clear_module_state->__pyx_kp_s_Blast_s); + Py_CLEAR(clear_module_state->__pyx_n_s_IndexError); + Py_CLEAR(clear_module_state->__pyx_n_s_OverflowError); + Py_CLEAR(clear_module_state->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma); + Py_CLEAR(clear_module_state->__pyx_n_s_StopIteration); + Py_CLEAR(clear_module_state->__pyx_n_s_TypeError); + Py_CLEAR(clear_module_state->__pyx_kp_s_UTF_8); + Py_CLEAR(clear_module_state->__pyx_n_s__13); + Py_CLEAR(clear_module_state->__pyx_kp_s__5); + Py_CLEAR(clear_module_state->__pyx_n_s__6); + Py_CLEAR(clear_module_state->__pyx_n_s_args); + Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines); + Py_CLEAR(clear_module_state->__pyx_n_s_cfunc_to_py); + Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback); + Py_CLEAR(clear_module_state->__pyx_n_s_close); + Py_CLEAR(clear_module_state->__pyx_kp_u_disable); + Py_CLEAR(clear_module_state->__pyx_kp_u_enable); + Py_CLEAR(clear_module_state->__pyx_n_s_encode); + Py_CLEAR(clear_module_state->__pyx_n_s_enumerate); + Py_CLEAR(clear_module_state->__pyx_n_s_evalue); + Py_CLEAR(clear_module_state->__pyx_n_s_filename); + Py_CLEAR(clear_module_state->__pyx_kp_u_gc); + Py_CLEAR(clear_module_state->__pyx_n_s_genexpr); + Py_CLEAR(clear_module_state->__pyx_n_s_getstate); + Py_CLEAR(clear_module_state->__pyx_n_s_hitlen); + Py_CLEAR(clear_module_state->__pyx_n_s_id); + Py_CLEAR(clear_module_state->__pyx_n_s_import); + Py_CLEAR(clear_module_state->__pyx_n_s_initializing); + Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine); + Py_CLEAR(clear_module_state->__pyx_kp_u_isenabled); + Py_CLEAR(clear_module_state->__pyx_n_s_jcvi_formats_cblast); + Py_CLEAR(clear_module_state->__pyx_n_s_join); + Py_CLEAR(clear_module_state->__pyx_n_s_main); + Py_CLEAR(clear_module_state->__pyx_n_s_name); + Py_CLEAR(clear_module_state->__pyx_n_s_ngaps); + Py_CLEAR(clear_module_state->__pyx_n_s_nmismatch); + Py_CLEAR(clear_module_state->__pyx_kp_s_no_default___reduce___due_to_non); + Py_CLEAR(clear_module_state->__pyx_n_s_orientation); + Py_CLEAR(clear_module_state->__pyx_n_s_pctid); + Py_CLEAR(clear_module_state->__pyx_n_s_pyx_state); + Py_CLEAR(clear_module_state->__pyx_n_s_qi); + Py_CLEAR(clear_module_state->__pyx_n_s_qseqid); + Py_CLEAR(clear_module_state->__pyx_n_s_qstart); + Py_CLEAR(clear_module_state->__pyx_n_s_qstop); + Py_CLEAR(clear_module_state->__pyx_n_s_query); + Py_CLEAR(clear_module_state->__pyx_n_s_reduce); + Py_CLEAR(clear_module_state->__pyx_n_s_reduce_cython); + Py_CLEAR(clear_module_state->__pyx_n_s_reduce_ex); + Py_CLEAR(clear_module_state->__pyx_n_s_richcmp); + Py_CLEAR(clear_module_state->__pyx_n_s_s); + Py_CLEAR(clear_module_state->__pyx_n_s_score); + Py_CLEAR(clear_module_state->__pyx_n_s_self); + Py_CLEAR(clear_module_state->__pyx_n_s_send); + Py_CLEAR(clear_module_state->__pyx_n_s_setstate); + Py_CLEAR(clear_module_state->__pyx_n_s_setstate_cython); + Py_CLEAR(clear_module_state->__pyx_n_s_si); + Py_CLEAR(clear_module_state->__pyx_n_s_slots); + Py_CLEAR(clear_module_state->__pyx_n_s_spec); + Py_CLEAR(clear_module_state->__pyx_kp_s_src_jcvi_formats_cblast_pyx); + Py_CLEAR(clear_module_state->__pyx_n_s_sseqid); + Py_CLEAR(clear_module_state->__pyx_n_s_sstart); + Py_CLEAR(clear_module_state->__pyx_n_s_sstop); + Py_CLEAR(clear_module_state->__pyx_kp_s_stringsource); + Py_CLEAR(clear_module_state->__pyx_n_s_subject); + Py_CLEAR(clear_module_state->__pyx_n_s_sys); + Py_CLEAR(clear_module_state->__pyx_n_s_test); + Py_CLEAR(clear_module_state->__pyx_kp_s_that_comparison_not_implemented); + Py_CLEAR(clear_module_state->__pyx_n_s_throw); + Py_CLEAR(clear_module_state->__pyx_n_s_wrap); + Py_CLEAR(clear_module_state->__pyx_int_2); + Py_CLEAR(clear_module_state->__pyx_int_12); + Py_CLEAR(clear_module_state->__pyx_tuple_); + Py_CLEAR(clear_module_state->__pyx_slice__4); + Py_CLEAR(clear_module_state->__pyx_tuple__3); + Py_CLEAR(clear_module_state->__pyx_tuple__7); + Py_CLEAR(clear_module_state->__pyx_tuple__9); + Py_CLEAR(clear_module_state->__pyx_tuple__11); + Py_CLEAR(clear_module_state->__pyx_codeobj__2); + Py_CLEAR(clear_module_state->__pyx_codeobj__8); + Py_CLEAR(clear_module_state->__pyx_codeobj__10); + Py_CLEAR(clear_module_state->__pyx_codeobj__12); + return 0; +} +#endif +/* #### Code section: module_state_traverse ### */ +#if CYTHON_USE_MODULE_STATE +static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) { + __pyx_mstate *traverse_module_state = __pyx_mstate(m); + if (!traverse_module_state) return 0; + Py_VISIT(traverse_module_state->__pyx_d); + Py_VISIT(traverse_module_state->__pyx_b); + Py_VISIT(traverse_module_state->__pyx_cython_runtime); + Py_VISIT(traverse_module_state->__pyx_empty_tuple); + Py_VISIT(traverse_module_state->__pyx_empty_bytes); + Py_VISIT(traverse_module_state->__pyx_empty_unicode); + #ifdef __Pyx_CyFunction_USED + Py_VISIT(traverse_module_state->__pyx_CyFunctionType); + #endif + #ifdef __Pyx_FusedFunction_USED + Py_VISIT(traverse_module_state->__pyx_FusedFunctionType); + #endif + Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast_Blast); + Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast_Blast); + Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); + Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast_BlastLine); + Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); + Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); + Py_VISIT(traverse_module_state->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); + Py_VISIT(traverse_module_state->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); + Py_VISIT(traverse_module_state->__pyx_n_s_Blast); + Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine); + Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine___get___locals_genexpr); + Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine___reduce); + Py_VISIT(traverse_module_state->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1); + Py_VISIT(traverse_module_state->__pyx_n_s_Blast___reduce_cython); + Py_VISIT(traverse_module_state->__pyx_n_s_Blast___setstate_cython); + Py_VISIT(traverse_module_state->__pyx_kp_s_Blast_s); + Py_VISIT(traverse_module_state->__pyx_n_s_IndexError); + Py_VISIT(traverse_module_state->__pyx_n_s_OverflowError); + Py_VISIT(traverse_module_state->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma); + Py_VISIT(traverse_module_state->__pyx_n_s_StopIteration); + Py_VISIT(traverse_module_state->__pyx_n_s_TypeError); + Py_VISIT(traverse_module_state->__pyx_kp_s_UTF_8); + Py_VISIT(traverse_module_state->__pyx_n_s__13); + Py_VISIT(traverse_module_state->__pyx_kp_s__5); + Py_VISIT(traverse_module_state->__pyx_n_s__6); + Py_VISIT(traverse_module_state->__pyx_n_s_args); + Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines); + Py_VISIT(traverse_module_state->__pyx_n_s_cfunc_to_py); + Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback); + Py_VISIT(traverse_module_state->__pyx_n_s_close); + Py_VISIT(traverse_module_state->__pyx_kp_u_disable); + Py_VISIT(traverse_module_state->__pyx_kp_u_enable); + Py_VISIT(traverse_module_state->__pyx_n_s_encode); + Py_VISIT(traverse_module_state->__pyx_n_s_enumerate); + Py_VISIT(traverse_module_state->__pyx_n_s_evalue); + Py_VISIT(traverse_module_state->__pyx_n_s_filename); + Py_VISIT(traverse_module_state->__pyx_kp_u_gc); + Py_VISIT(traverse_module_state->__pyx_n_s_genexpr); + Py_VISIT(traverse_module_state->__pyx_n_s_getstate); + Py_VISIT(traverse_module_state->__pyx_n_s_hitlen); + Py_VISIT(traverse_module_state->__pyx_n_s_id); + Py_VISIT(traverse_module_state->__pyx_n_s_import); + Py_VISIT(traverse_module_state->__pyx_n_s_initializing); + Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine); + Py_VISIT(traverse_module_state->__pyx_kp_u_isenabled); + Py_VISIT(traverse_module_state->__pyx_n_s_jcvi_formats_cblast); + Py_VISIT(traverse_module_state->__pyx_n_s_join); + Py_VISIT(traverse_module_state->__pyx_n_s_main); + Py_VISIT(traverse_module_state->__pyx_n_s_name); + Py_VISIT(traverse_module_state->__pyx_n_s_ngaps); + Py_VISIT(traverse_module_state->__pyx_n_s_nmismatch); + Py_VISIT(traverse_module_state->__pyx_kp_s_no_default___reduce___due_to_non); + Py_VISIT(traverse_module_state->__pyx_n_s_orientation); + Py_VISIT(traverse_module_state->__pyx_n_s_pctid); + Py_VISIT(traverse_module_state->__pyx_n_s_pyx_state); + Py_VISIT(traverse_module_state->__pyx_n_s_qi); + Py_VISIT(traverse_module_state->__pyx_n_s_qseqid); + Py_VISIT(traverse_module_state->__pyx_n_s_qstart); + Py_VISIT(traverse_module_state->__pyx_n_s_qstop); + Py_VISIT(traverse_module_state->__pyx_n_s_query); + Py_VISIT(traverse_module_state->__pyx_n_s_reduce); + Py_VISIT(traverse_module_state->__pyx_n_s_reduce_cython); + Py_VISIT(traverse_module_state->__pyx_n_s_reduce_ex); + Py_VISIT(traverse_module_state->__pyx_n_s_richcmp); + Py_VISIT(traverse_module_state->__pyx_n_s_s); + Py_VISIT(traverse_module_state->__pyx_n_s_score); + Py_VISIT(traverse_module_state->__pyx_n_s_self); + Py_VISIT(traverse_module_state->__pyx_n_s_send); + Py_VISIT(traverse_module_state->__pyx_n_s_setstate); + Py_VISIT(traverse_module_state->__pyx_n_s_setstate_cython); + Py_VISIT(traverse_module_state->__pyx_n_s_si); + Py_VISIT(traverse_module_state->__pyx_n_s_slots); + Py_VISIT(traverse_module_state->__pyx_n_s_spec); + Py_VISIT(traverse_module_state->__pyx_kp_s_src_jcvi_formats_cblast_pyx); + Py_VISIT(traverse_module_state->__pyx_n_s_sseqid); + Py_VISIT(traverse_module_state->__pyx_n_s_sstart); + Py_VISIT(traverse_module_state->__pyx_n_s_sstop); + Py_VISIT(traverse_module_state->__pyx_kp_s_stringsource); + Py_VISIT(traverse_module_state->__pyx_n_s_subject); + Py_VISIT(traverse_module_state->__pyx_n_s_sys); + Py_VISIT(traverse_module_state->__pyx_n_s_test); + Py_VISIT(traverse_module_state->__pyx_kp_s_that_comparison_not_implemented); + Py_VISIT(traverse_module_state->__pyx_n_s_throw); + Py_VISIT(traverse_module_state->__pyx_n_s_wrap); + Py_VISIT(traverse_module_state->__pyx_int_2); + Py_VISIT(traverse_module_state->__pyx_int_12); + Py_VISIT(traverse_module_state->__pyx_tuple_); + Py_VISIT(traverse_module_state->__pyx_slice__4); + Py_VISIT(traverse_module_state->__pyx_tuple__3); + Py_VISIT(traverse_module_state->__pyx_tuple__7); + Py_VISIT(traverse_module_state->__pyx_tuple__9); + Py_VISIT(traverse_module_state->__pyx_tuple__11); + Py_VISIT(traverse_module_state->__pyx_codeobj__2); + Py_VISIT(traverse_module_state->__pyx_codeobj__8); + Py_VISIT(traverse_module_state->__pyx_codeobj__10); + Py_VISIT(traverse_module_state->__pyx_codeobj__12); + return 0; +} +#endif +/* #### Code section: module_state_defines ### */ +#define __pyx_d __pyx_mstate_global->__pyx_d +#define __pyx_b __pyx_mstate_global->__pyx_b +#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime +#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple +#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes +#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode +#ifdef __Pyx_CyFunction_USED +#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType +#endif +#ifdef __Pyx_FusedFunction_USED +#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType +#endif +#ifdef __Pyx_Generator_USED +#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType +#endif +#ifdef __Pyx_IterableCoroutine_USED +#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType +#endif +#ifdef __Pyx_Coroutine_USED +#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType +#endif +#ifdef __Pyx_Coroutine_USED +#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#endif +#if CYTHON_USE_MODULE_STATE +#define __pyx_type_4jcvi_7formats_6cblast_Blast __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast_Blast +#define __pyx_type_4jcvi_7formats_6cblast_BlastLine __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast_BlastLine +#define __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr +#define __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc __pyx_mstate_global->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc +#endif +#define __pyx_ptype_4jcvi_7formats_6cblast_Blast __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast_Blast +#define __pyx_ptype_4jcvi_7formats_6cblast_BlastLine __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine +#define __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr +#define __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc __pyx_mstate_global->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc +#define __pyx_n_s_Blast __pyx_mstate_global->__pyx_n_s_Blast +#define __pyx_n_s_BlastLine __pyx_mstate_global->__pyx_n_s_BlastLine +#define __pyx_n_s_BlastLine___get___locals_genexpr __pyx_mstate_global->__pyx_n_s_BlastLine___get___locals_genexpr +#define __pyx_n_s_BlastLine___reduce __pyx_mstate_global->__pyx_n_s_BlastLine___reduce +#define __pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1 __pyx_mstate_global->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1 +#define __pyx_n_s_Blast___reduce_cython __pyx_mstate_global->__pyx_n_s_Blast___reduce_cython +#define __pyx_n_s_Blast___setstate_cython __pyx_mstate_global->__pyx_n_s_Blast___setstate_cython +#define __pyx_kp_s_Blast_s __pyx_mstate_global->__pyx_kp_s_Blast_s +#define __pyx_n_s_IndexError __pyx_mstate_global->__pyx_n_s_IndexError +#define __pyx_n_s_OverflowError __pyx_mstate_global->__pyx_n_s_OverflowError +#define __pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma __pyx_mstate_global->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma +#define __pyx_n_s_StopIteration __pyx_mstate_global->__pyx_n_s_StopIteration +#define __pyx_n_s_TypeError __pyx_mstate_global->__pyx_n_s_TypeError +#define __pyx_kp_s_UTF_8 __pyx_mstate_global->__pyx_kp_s_UTF_8 +#define __pyx_n_s__13 __pyx_mstate_global->__pyx_n_s__13 +#define __pyx_kp_s__5 __pyx_mstate_global->__pyx_kp_s__5 +#define __pyx_n_s__6 __pyx_mstate_global->__pyx_n_s__6 +#define __pyx_n_s_args __pyx_mstate_global->__pyx_n_s_args +#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines +#define __pyx_n_s_cfunc_to_py __pyx_mstate_global->__pyx_n_s_cfunc_to_py +#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback +#define __pyx_n_s_close __pyx_mstate_global->__pyx_n_s_close +#define __pyx_kp_u_disable __pyx_mstate_global->__pyx_kp_u_disable +#define __pyx_kp_u_enable __pyx_mstate_global->__pyx_kp_u_enable +#define __pyx_n_s_encode __pyx_mstate_global->__pyx_n_s_encode +#define __pyx_n_s_enumerate __pyx_mstate_global->__pyx_n_s_enumerate +#define __pyx_n_s_evalue __pyx_mstate_global->__pyx_n_s_evalue +#define __pyx_n_s_filename __pyx_mstate_global->__pyx_n_s_filename +#define __pyx_kp_u_gc __pyx_mstate_global->__pyx_kp_u_gc +#define __pyx_n_s_genexpr __pyx_mstate_global->__pyx_n_s_genexpr +#define __pyx_n_s_getstate __pyx_mstate_global->__pyx_n_s_getstate +#define __pyx_n_s_hitlen __pyx_mstate_global->__pyx_n_s_hitlen +#define __pyx_n_s_id __pyx_mstate_global->__pyx_n_s_id +#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import +#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing +#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine +#define __pyx_kp_u_isenabled __pyx_mstate_global->__pyx_kp_u_isenabled +#define __pyx_n_s_jcvi_formats_cblast __pyx_mstate_global->__pyx_n_s_jcvi_formats_cblast +#define __pyx_n_s_join __pyx_mstate_global->__pyx_n_s_join +#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main +#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name +#define __pyx_n_s_ngaps __pyx_mstate_global->__pyx_n_s_ngaps +#define __pyx_n_s_nmismatch __pyx_mstate_global->__pyx_n_s_nmismatch +#define __pyx_kp_s_no_default___reduce___due_to_non __pyx_mstate_global->__pyx_kp_s_no_default___reduce___due_to_non +#define __pyx_n_s_orientation __pyx_mstate_global->__pyx_n_s_orientation +#define __pyx_n_s_pctid __pyx_mstate_global->__pyx_n_s_pctid +#define __pyx_n_s_pyx_state __pyx_mstate_global->__pyx_n_s_pyx_state +#define __pyx_n_s_qi __pyx_mstate_global->__pyx_n_s_qi +#define __pyx_n_s_qseqid __pyx_mstate_global->__pyx_n_s_qseqid +#define __pyx_n_s_qstart __pyx_mstate_global->__pyx_n_s_qstart +#define __pyx_n_s_qstop __pyx_mstate_global->__pyx_n_s_qstop +#define __pyx_n_s_query __pyx_mstate_global->__pyx_n_s_query +#define __pyx_n_s_reduce __pyx_mstate_global->__pyx_n_s_reduce +#define __pyx_n_s_reduce_cython __pyx_mstate_global->__pyx_n_s_reduce_cython +#define __pyx_n_s_reduce_ex __pyx_mstate_global->__pyx_n_s_reduce_ex +#define __pyx_n_s_richcmp __pyx_mstate_global->__pyx_n_s_richcmp +#define __pyx_n_s_s __pyx_mstate_global->__pyx_n_s_s +#define __pyx_n_s_score __pyx_mstate_global->__pyx_n_s_score +#define __pyx_n_s_self __pyx_mstate_global->__pyx_n_s_self +#define __pyx_n_s_send __pyx_mstate_global->__pyx_n_s_send +#define __pyx_n_s_setstate __pyx_mstate_global->__pyx_n_s_setstate +#define __pyx_n_s_setstate_cython __pyx_mstate_global->__pyx_n_s_setstate_cython +#define __pyx_n_s_si __pyx_mstate_global->__pyx_n_s_si +#define __pyx_n_s_slots __pyx_mstate_global->__pyx_n_s_slots +#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec +#define __pyx_kp_s_src_jcvi_formats_cblast_pyx __pyx_mstate_global->__pyx_kp_s_src_jcvi_formats_cblast_pyx +#define __pyx_n_s_sseqid __pyx_mstate_global->__pyx_n_s_sseqid +#define __pyx_n_s_sstart __pyx_mstate_global->__pyx_n_s_sstart +#define __pyx_n_s_sstop __pyx_mstate_global->__pyx_n_s_sstop +#define __pyx_kp_s_stringsource __pyx_mstate_global->__pyx_kp_s_stringsource +#define __pyx_n_s_subject __pyx_mstate_global->__pyx_n_s_subject +#define __pyx_n_s_sys __pyx_mstate_global->__pyx_n_s_sys +#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test +#define __pyx_kp_s_that_comparison_not_implemented __pyx_mstate_global->__pyx_kp_s_that_comparison_not_implemented +#define __pyx_n_s_throw __pyx_mstate_global->__pyx_n_s_throw +#define __pyx_n_s_wrap __pyx_mstate_global->__pyx_n_s_wrap +#define __pyx_int_2 __pyx_mstate_global->__pyx_int_2 +#define __pyx_int_12 __pyx_mstate_global->__pyx_int_12 +#define __pyx_tuple_ __pyx_mstate_global->__pyx_tuple_ +#define __pyx_slice__4 __pyx_mstate_global->__pyx_slice__4 +#define __pyx_tuple__3 __pyx_mstate_global->__pyx_tuple__3 +#define __pyx_tuple__7 __pyx_mstate_global->__pyx_tuple__7 +#define __pyx_tuple__9 __pyx_mstate_global->__pyx_tuple__9 +#define __pyx_tuple__11 __pyx_mstate_global->__pyx_tuple__11 +#define __pyx_codeobj__2 __pyx_mstate_global->__pyx_codeobj__2 +#define __pyx_codeobj__8 __pyx_mstate_global->__pyx_codeobj__8 +#define __pyx_codeobj__10 __pyx_mstate_global->__pyx_codeobj__10 +#define __pyx_codeobj__12 __pyx_mstate_global->__pyx_codeobj__12 +/* #### Code section: module_code ### */ + +/* "cfunc.to_py":67 + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +PyDoc_STRVAR(__pyx_doc_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap, "wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'"); +static PyMethodDef __pyx_mdef_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap = {"wrap", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap}; +static PyObject *__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap(PyObject *__pyx_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + char *__pyx_v_query; + char *__pyx_v_subject; + float __pyx_v_pctid; + int __pyx_v_hitlen; + int __pyx_v_nmismatch; + int __pyx_v_ngaps; + int __pyx_v_qstart; + int __pyx_v_qstop; + int __pyx_v_sstart; + int __pyx_v_sstop; + float __pyx_v_evalue; + float __pyx_v_score; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[12] = {0,0,0,0,0,0,0,0,0,0,0,0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("wrap (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_query,&__pyx_n_s_subject,&__pyx_n_s_pctid,&__pyx_n_s_hitlen,&__pyx_n_s_nmismatch,&__pyx_n_s_ngaps,&__pyx_n_s_qstart,&__pyx_n_s_qstop,&__pyx_n_s_sstart,&__pyx_n_s_sstop,&__pyx_n_s_evalue,&__pyx_n_s_score,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 12: values[11] = __Pyx_Arg_FASTCALL(__pyx_args, 11); + CYTHON_FALLTHROUGH; + case 11: values[10] = __Pyx_Arg_FASTCALL(__pyx_args, 10); + CYTHON_FALLTHROUGH; + case 10: values[9] = __Pyx_Arg_FASTCALL(__pyx_args, 9); + CYTHON_FALLTHROUGH; + case 9: values[8] = __Pyx_Arg_FASTCALL(__pyx_args, 8); + CYTHON_FALLTHROUGH; + case 8: values[7] = __Pyx_Arg_FASTCALL(__pyx_args, 7); + CYTHON_FALLTHROUGH; + case 7: values[6] = __Pyx_Arg_FASTCALL(__pyx_args, 6); + CYTHON_FALLTHROUGH; + case 6: values[5] = __Pyx_Arg_FASTCALL(__pyx_args, 5); + CYTHON_FALLTHROUGH; + case 5: values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4); + CYTHON_FALLTHROUGH; + case 4: values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3); + CYTHON_FALLTHROUGH; + case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_query)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_subject)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[1]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 1); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (likely((values[2] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pctid)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[2]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 2); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 3: + if (likely((values[3] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_hitlen)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[3]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 3); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 4: + if (likely((values[4] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_nmismatch)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[4]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 4); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 5: + if (likely((values[5] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_ngaps)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[5]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 5); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 6: + if (likely((values[6] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_qstart)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[6]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 6); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 7: + if (likely((values[7] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_qstop)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[7]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 7); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 8: + if (likely((values[8] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_sstart)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[8]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 8); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 9: + if (likely((values[9] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_sstop)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[9]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 9); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 10: + if (likely((values[10] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_evalue)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[10]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 10); __PYX_ERR(1, 67, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 11: + if (likely((values[11] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_score)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[11]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + else { + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 11); __PYX_ERR(1, 67, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "wrap") < 0)) __PYX_ERR(1, 67, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 12)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); + values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); + values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3); + values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4); + values[5] = __Pyx_Arg_FASTCALL(__pyx_args, 5); + values[6] = __Pyx_Arg_FASTCALL(__pyx_args, 6); + values[7] = __Pyx_Arg_FASTCALL(__pyx_args, 7); + values[8] = __Pyx_Arg_FASTCALL(__pyx_args, 8); + values[9] = __Pyx_Arg_FASTCALL(__pyx_args, 9); + values[10] = __Pyx_Arg_FASTCALL(__pyx_args, 10); + values[11] = __Pyx_Arg_FASTCALL(__pyx_args, 11); + } + __pyx_v_query = __Pyx_PyObject_AsWritableString(values[0]); if (unlikely((!__pyx_v_query) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_subject = __Pyx_PyObject_AsWritableString(values[1]); if (unlikely((!__pyx_v_subject) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_pctid = __pyx_PyFloat_AsFloat(values[2]); if (unlikely((__pyx_v_pctid == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_hitlen = __Pyx_PyInt_As_int(values[3]); if (unlikely((__pyx_v_hitlen == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_nmismatch = __Pyx_PyInt_As_int(values[4]); if (unlikely((__pyx_v_nmismatch == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_ngaps = __Pyx_PyInt_As_int(values[5]); if (unlikely((__pyx_v_ngaps == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_qstart = __Pyx_PyInt_As_int(values[6]); if (unlikely((__pyx_v_qstart == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_qstop = __Pyx_PyInt_As_int(values[7]); if (unlikely((__pyx_v_qstop == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_sstart = __Pyx_PyInt_As_int(values[8]); if (unlikely((__pyx_v_sstart == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_sstop = __Pyx_PyInt_As_int(values[9]); if (unlikely((__pyx_v_sstop == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_evalue = __pyx_PyFloat_AsFloat(values[10]); if (unlikely((__pyx_v_evalue == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_v_score = __pyx_PyFloat_AsFloat(values[11]); if (unlikely((__pyx_v_score == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, __pyx_nargs); __PYX_ERR(1, 67, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc.wrap", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(__pyx_self, __pyx_v_query, __pyx_v_subject, __pyx_v_pctid, __pyx_v_hitlen, __pyx_v_nmismatch, __pyx_v_ngaps, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_score); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(PyObject *__pyx_self, char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score) { + struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_cur_scope; + struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_outer_scope; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("wrap", 1); + __pyx_outer_scope = (struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *) __Pyx_CyFunction_GetClosure(__pyx_self); + __pyx_cur_scope = __pyx_outer_scope; + + /* "cfunc.to_py":69 + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) # <<<<<<<<<<<<<< + * return wrap + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = ((PyObject *)__pyx_cur_scope->__pyx_v_f(__pyx_v_query, __pyx_v_subject, __pyx_v_pctid, __pyx_v_hitlen, __pyx_v_nmismatch, __pyx_v_ngaps, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_score)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 69, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "cfunc.to_py":67 + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc.wrap", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "cfunc.to_py":66 + * + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + */ + +static PyObject *__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*__pyx_v_f)(char *, char *, float, int, int, int, int, int, int, int, float, float)) { + struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_cur_scope; + PyObject *__pyx_v_wrap = 0; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", 0); + __pyx_cur_scope = (struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, __pyx_empty_tuple, NULL); + if (unlikely(!__pyx_cur_scope)) { + __pyx_cur_scope = ((struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)Py_None); + __Pyx_INCREF(Py_None); + __PYX_ERR(1, 66, __pyx_L1_error) + } else { + __Pyx_GOTREF((PyObject *)__pyx_cur_scope); + } + __pyx_cur_scope->__pyx_v_f = __pyx_v_f; + + /* "cfunc.to_py":67 + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) + */ + __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap, 0, __pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma, ((PyObject*)__pyx_cur_scope), __pyx_n_s_cfunc_to_py, __pyx_d, ((PyObject *)__pyx_codeobj__2)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 67, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_wrap = __pyx_t_1; + __pyx_t_1 = 0; + + /* "cfunc.to_py":70 + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) + * return wrap # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_wrap); + __pyx_r = __pyx_v_wrap; + goto __pyx_L0; + + /* "cfunc.to_py":66 + * + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_wrap); + __Pyx_DECREF((PyObject *)__pyx_cur_scope); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "carray.from_py":79 + * + * @cname("__Pyx_carray_from_py_char") + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: # <<<<<<<<<<<<<< + * cdef Py_ssize_t i = length + * try: + */ + +static int __Pyx_carray_from_py_char(PyObject *__pyx_v_o, char *__pyx_v_v, Py_ssize_t __pyx_v_length) { + Py_ssize_t __pyx_v_i; + PyObject *__pyx_v_item = NULL; + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + Py_ssize_t __pyx_t_4; + int __pyx_t_5; + int __pyx_t_6; + PyObject *__pyx_t_7 = NULL; + Py_ssize_t __pyx_t_8; + PyObject *(*__pyx_t_9)(PyObject *); + PyObject *__pyx_t_10 = NULL; + char __pyx_t_11; + char const *__pyx_t_12; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__Pyx_carray_from_py_char", 1); + + /* "carray.from_py":80 + * @cname("__Pyx_carray_from_py_char") + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: + * cdef Py_ssize_t i = length # <<<<<<<<<<<<<< + * try: + * i = len(o) + */ + __pyx_v_i = __pyx_v_length; + + /* "carray.from_py":81 + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: + * cdef Py_ssize_t i = length + * try: # <<<<<<<<<<<<<< + * i = len(o) + * except (TypeError, OverflowError): + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "carray.from_py":82 + * cdef Py_ssize_t i = length + * try: + * i = len(o) # <<<<<<<<<<<<<< + * except (TypeError, OverflowError): + * pass + */ + __pyx_t_4 = PyObject_Length(__pyx_v_o); if (unlikely(__pyx_t_4 == ((Py_ssize_t)-1))) __PYX_ERR(1, 82, __pyx_L3_error) + __pyx_v_i = __pyx_t_4; + + /* "carray.from_py":81 + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: + * cdef Py_ssize_t i = length + * try: # <<<<<<<<<<<<<< + * i = len(o) + * except (TypeError, OverflowError): + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "carray.from_py":83 + * try: + * i = len(o) + * except (TypeError, OverflowError): # <<<<<<<<<<<<<< + * pass + * if i == length: + */ + __pyx_t_5 = __Pyx_PyErr_ExceptionMatches2(__pyx_builtin_TypeError, __pyx_builtin_OverflowError); + if (__pyx_t_5) { + __Pyx_ErrRestore(0,0,0); + goto __pyx_L4_exception_handled; + } + goto __pyx_L5_except_error; + + /* "carray.from_py":81 + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: + * cdef Py_ssize_t i = length + * try: # <<<<<<<<<<<<<< + * i = len(o) + * except (TypeError, OverflowError): + */ + __pyx_L5_except_error:; + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L4_exception_handled:; + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + __pyx_L8_try_end:; + } + + /* "carray.from_py":85 + * except (TypeError, OverflowError): + * pass + * if i == length: # <<<<<<<<<<<<<< + * for i, item in enumerate(o): + * if i >= length: + */ + __pyx_t_6 = (__pyx_v_i == __pyx_v_length); + if (__pyx_t_6) { + + /* "carray.from_py":86 + * pass + * if i == length: + * for i, item in enumerate(o): # <<<<<<<<<<<<<< + * if i >= length: + * break + */ + __pyx_t_4 = 0; + if (likely(PyList_CheckExact(__pyx_v_o)) || PyTuple_CheckExact(__pyx_v_o)) { + __pyx_t_7 = __pyx_v_o; __Pyx_INCREF(__pyx_t_7); + __pyx_t_8 = 0; + __pyx_t_9 = NULL; + } else { + __pyx_t_8 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_v_o); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 86, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_9 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 86, __pyx_L1_error) + } + for (;;) { + if (likely(!__pyx_t_9)) { + if (likely(PyList_CheckExact(__pyx_t_7))) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_7); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(1, 86, __pyx_L1_error) + #endif + if (__pyx_t_8 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_10 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_8); __Pyx_INCREF(__pyx_t_10); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(1, 86, __pyx_L1_error) + #else + __pyx_t_10 = __Pyx_PySequence_ITEM(__pyx_t_7, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 86, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_10); + #endif + } else { + { + Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_7); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(1, 86, __pyx_L1_error) + #endif + if (__pyx_t_8 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_8); __Pyx_INCREF(__pyx_t_10); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(1, 86, __pyx_L1_error) + #else + __pyx_t_10 = __Pyx_PySequence_ITEM(__pyx_t_7, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 86, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_10); + #endif + } + } else { + __pyx_t_10 = __pyx_t_9(__pyx_t_7); + if (unlikely(!__pyx_t_10)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(1, 86, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_10); + } + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_10); + __pyx_t_10 = 0; + __pyx_v_i = __pyx_t_4; + __pyx_t_4 = (__pyx_t_4 + 1); + + /* "carray.from_py":87 + * if i == length: + * for i, item in enumerate(o): + * if i >= length: # <<<<<<<<<<<<<< + * break + * v[i] = item + */ + __pyx_t_6 = (__pyx_v_i >= __pyx_v_length); + if (__pyx_t_6) { + + /* "carray.from_py":88 + * for i, item in enumerate(o): + * if i >= length: + * break # <<<<<<<<<<<<<< + * v[i] = item + * else: + */ + goto __pyx_L11_break; + + /* "carray.from_py":87 + * if i == length: + * for i, item in enumerate(o): + * if i >= length: # <<<<<<<<<<<<<< + * break + * v[i] = item + */ + } + + /* "carray.from_py":89 + * if i >= length: + * break + * v[i] = item # <<<<<<<<<<<<<< + * else: + * i += 1 # convert index to length + */ + __pyx_t_11 = __Pyx_PyInt_As_char(__pyx_v_item); if (unlikely((__pyx_t_11 == (char)-1) && PyErr_Occurred())) __PYX_ERR(1, 89, __pyx_L1_error) + (__pyx_v_v[__pyx_v_i]) = __pyx_t_11; + + /* "carray.from_py":86 + * pass + * if i == length: + * for i, item in enumerate(o): # <<<<<<<<<<<<<< + * if i >= length: + * break + */ + } + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + goto __pyx_L13_for_else; + __pyx_L11_break:; + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + goto __pyx_L14_for_end; + /*else*/ { + __pyx_L13_for_else:; + + /* "carray.from_py":91 + * v[i] = item + * else: + * i += 1 # convert index to length # <<<<<<<<<<<<<< + * if i == length: + * return 0 + */ + __pyx_v_i = (__pyx_v_i + 1); + + /* "carray.from_py":92 + * else: + * i += 1 # convert index to length + * if i == length: # <<<<<<<<<<<<<< + * return 0 + * + */ + __pyx_t_6 = (__pyx_v_i == __pyx_v_length); + if (__pyx_t_6) { + + /* "carray.from_py":93 + * i += 1 # convert index to length + * if i == length: + * return 0 # <<<<<<<<<<<<<< + * + * PyErr_Format( + */ + __pyx_r = 0; + goto __pyx_L0; + + /* "carray.from_py":92 + * else: + * i += 1 # convert index to length + * if i == length: # <<<<<<<<<<<<<< + * return 0 + * + */ + } + } + __pyx_L14_for_end:; + + /* "carray.from_py":85 + * except (TypeError, OverflowError): + * pass + * if i == length: # <<<<<<<<<<<<<< + * for i, item in enumerate(o): + * if i >= length: + */ + } + + /* "carray.from_py":98 + * IndexError, + * ("too many values found during array assignment, expected %zd" + * if i >= length else # <<<<<<<<<<<<<< + * "not enough values found during array assignment, expected %zd, got %zd"), + * length, i) + */ + __pyx_t_6 = (__pyx_v_i >= __pyx_v_length); + if (__pyx_t_6) { + __pyx_t_12 = ((char const *)"too many values found during array assignment, expected %zd"); + } else { + __pyx_t_12 = ((char const *)"not enough values found during array assignment, expected %zd, got %zd"); + } + + /* "carray.from_py":95 + * return 0 + * + * PyErr_Format( # <<<<<<<<<<<<<< + * IndexError, + * ("too many values found during array assignment, expected %zd" + */ + __pyx_t_7 = PyErr_Format(__pyx_builtin_IndexError, __pyx_t_12, __pyx_v_length, __pyx_v_i); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 95, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + + /* "carray.from_py":79 + * + * @cname("__Pyx_carray_from_py_char") + * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: # <<<<<<<<<<<<<< + * cdef Py_ssize_t i = length + * try: + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_10); + __Pyx_AddTraceback("carray.from_py.__Pyx_carray_from_py_char", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_item); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":26 + * object filename + * + * def __cinit__(self, char* filename): # <<<<<<<<<<<<<< + * self.fh = fopen(filename, 'r') + * self.filename = filename + */ + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + char *__pyx_v_filename; + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[1] = {0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1; + #endif + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_filename,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_filename)) != 0)) { + (void)__Pyx_Arg_NewRef_VARARGS(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 26, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__cinit__") < 0)) __PYX_ERR(0, 26, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 1)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); + } + __pyx_v_filename = __Pyx_PyObject_AsWritableString(values[0]); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) __PYX_ERR(0, 26, __pyx_L3_error) + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 26, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return -1; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self), __pyx_v_filename); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, char *__pyx_v_filename) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__cinit__", 1); + + /* "jcvi/formats/cblast.pyx":27 + * + * def __cinit__(self, char* filename): + * self.fh = fopen(filename, 'r') # <<<<<<<<<<<<<< + * self.filename = filename + * + */ + __pyx_v_self->fh = fopen(__pyx_v_filename, ((char const *)"r")); + + /* "jcvi/formats/cblast.pyx":28 + * def __cinit__(self, char* filename): + * self.fh = fopen(filename, 'r') + * self.filename = filename # <<<<<<<<<<<<<< + * + * def __iter__(self): + */ + __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 28, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + __Pyx_GOTREF(__pyx_v_self->filename); + __Pyx_DECREF(__pyx_v_self->filename); + __pyx_v_self->filename = __pyx_t_1; + __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":26 + * object filename + * + * def __cinit__(self, char* filename): # <<<<<<<<<<<<<< + * self.fh = fopen(filename, 'r') + * self.filename = filename + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":30 + * self.filename = filename + * + * def __iter__(self): # <<<<<<<<<<<<<< + * rewind(self.fh) + * return self + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__iter__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__iter__", 1); + + /* "jcvi/formats/cblast.pyx":31 + * + * def __iter__(self): + * rewind(self.fh) # <<<<<<<<<<<<<< + * return self + * + */ + rewind(__pyx_v_self->fh); + + /* "jcvi/formats/cblast.pyx":32 + * def __iter__(self): + * rewind(self.fh) + * return self # <<<<<<<<<<<<<< + * + * def __next__(self): + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF((PyObject *)__pyx_v_self); + __pyx_r = ((PyObject *)__pyx_v_self); + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":30 + * self.filename = filename + * + * def __iter__(self): # <<<<<<<<<<<<<< + * rewind(self.fh) + * return self + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":34 + * return self + * + * def __next__(self): # <<<<<<<<<<<<<< + * cdef: + * float pct = 0.0, evalue = 0.0, bit = 0.0 + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__next__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { + float __pyx_v_pct; + float __pyx_v_evalue; + float __pyx_v_bit; + char __pyx_v_qname[0x80]; + char __pyx_v_sname[0x80]; + int __pyx_v_hlen; + int __pyx_v_nmiss; + int __pyx_v_ngap; + int __pyx_v_qstart; + int __pyx_v_qstop; + int __pyx_v_sstart; + int __pyx_v_sstop; + int __pyx_v_success; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_error_without_exception = 0; /* StopIteration */ + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__next__", 1); + + /* "jcvi/formats/cblast.pyx":36 + * def __next__(self): + * cdef: + * float pct = 0.0, evalue = 0.0, bit = 0.0 # <<<<<<<<<<<<<< + * char qname[128] + * char sname[128] + */ + __pyx_v_pct = 0.0; + __pyx_v_evalue = 0.0; + __pyx_v_bit = 0.0; + + /* "jcvi/formats/cblast.pyx":43 + * int success + * + * success = fscanf(self.fh, blast_format_line, qname, sname, \ # <<<<<<<<<<<<<< + * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ + * &sstart, &sstop, &evalue, &bit ) + */ + __pyx_v_success = fscanf(__pyx_v_self->fh, __pyx_v_4jcvi_7formats_6cblast_blast_format_line, __pyx_v_qname, __pyx_v_sname, (&__pyx_v_pct), (&__pyx_v_hlen), (&__pyx_v_nmiss), (&__pyx_v_ngap), (&__pyx_v_qstart), (&__pyx_v_qstop), (&__pyx_v_sstart), (&__pyx_v_sstop), (&__pyx_v_evalue), (&__pyx_v_bit)); + + /* "jcvi/formats/cblast.pyx":46 + * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ + * &sstart, &sstop, &evalue, &bit ) + * if success == EOF: # <<<<<<<<<<<<<< + * raise StopIteration + * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, + */ + __pyx_t_1 = (__pyx_v_success == EOF); + if (unlikely(__pyx_t_1)) { + + /* "jcvi/formats/cblast.pyx":47 + * &sstart, &sstop, &evalue, &bit ) + * if success == EOF: + * raise StopIteration # <<<<<<<<<<<<<< + * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, + * qstart, qstop, sstart, sstop, evalue, bit) + */ + __pyx_error_without_exception = 1; + goto __pyx_L1_error;; + + /* "jcvi/formats/cblast.pyx":46 + * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ + * &sstart, &sstop, &evalue, &bit ) + * if success == EOF: # <<<<<<<<<<<<<< + * raise StopIteration + * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, + */ + } + + /* "jcvi/formats/cblast.pyx":48 + * if success == EOF: + * raise StopIteration + * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, # <<<<<<<<<<<<<< + * qstart, qstop, sstart, sstop, evalue, bit) + * + */ + __Pyx_XDECREF(__pyx_r); + + /* "jcvi/formats/cblast.pyx":49 + * raise StopIteration + * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, + * qstart, qstop, sstart, sstop, evalue, bit) # <<<<<<<<<<<<<< + * + * def __dealloc__(self): + */ + __pyx_t_2 = ((PyObject *)__pyx_f_4jcvi_7formats_6cblast_create_blast_line(__pyx_v_qname, __pyx_v_sname, __pyx_v_pct, __pyx_v_hlen, __pyx_v_nmiss, __pyx_v_ngap, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_bit)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 48, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":34 + * return self + * + * def __next__(self): # <<<<<<<<<<<<<< + * cdef: + * float pct = 0.0, evalue = 0.0, bit = 0.0 + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + if (!__pyx_error_without_exception) { + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__next__", __pyx_clineno, __pyx_lineno, __pyx_filename); + } + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":51 + * qstart, qstop, sstart, sstop, evalue, bit) + * + * def __dealloc__(self): # <<<<<<<<<<<<<< + * fclose(self.fh) + * + */ + +/* Python wrapper */ +static void __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(PyObject *__pyx_v_self); /*proto*/ +static void __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); +} + +static void __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { + + /* "jcvi/formats/cblast.pyx":52 + * + * def __dealloc__(self): + * fclose(self.fh) # <<<<<<<<<<<<<< + * + * def __repr__(self): + */ + (void)(fclose(__pyx_v_self->fh)); + + /* "jcvi/formats/cblast.pyx":51 + * qstart, qstop, sstart, sstop, evalue, bit) + * + * def __dealloc__(self): # <<<<<<<<<<<<<< + * fclose(self.fh) + * + */ + + /* function exit code */ +} + +/* "jcvi/formats/cblast.pyx":54 + * fclose(self.fh) + * + * def __repr__(self): # <<<<<<<<<<<<<< + * return "Blast('%s')" % (self.filename, ) + * + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__repr__", 1); + + /* "jcvi/formats/cblast.pyx":55 + * + * def __repr__(self): + * return "Blast('%s')" % (self.filename, ) # <<<<<<<<<<<<<< + * + * # Python 2 and 3 differ in str and unicode handling + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 55, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_INCREF(__pyx_v_self->filename); + __Pyx_GIVEREF(__pyx_v_self->filename); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_self->filename)) __PYX_ERR(0, 55, __pyx_L1_error); + __pyx_t_2 = __Pyx_PyString_Format(__pyx_kp_s_Blast_s, __pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 55, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":54 + * fclose(self.fh) + * + * def __repr__(self): # <<<<<<<<<<<<<< + * return "Blast('%s')" % (self.filename, ) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__ = {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + if (unlikely(__pyx_nargs > 0)) { + __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} + if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__reduce_cython__", 1); + + /* "(tree fragment)":2 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< + * def __setstate_cython__(self, __pyx_state): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); + __PYX_ERR(1, 2, __pyx_L1_error) + + /* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__ = {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[1] = {0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { + (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 1)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); + } + __pyx_v___pyx_state = values[0]; + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self), __pyx_v___pyx_state); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__setstate_cython__", 1); + + /* "(tree fragment)":4 + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< + */ + __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); + __PYX_ERR(1, 4, __pyx_L1_error) + + /* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":59 + * # Python 2 and 3 differ in str and unicode handling + * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython + * cdef bytes c_str(str s): # <<<<<<<<<<<<<< + * return s.encode("UTF-8") + * + */ + +static PyObject *__pyx_f_4jcvi_7formats_6cblast_c_str(PyObject *__pyx_v_s) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("c_str", 1); + + /* "jcvi/formats/cblast.pyx":60 + * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython + * cdef bytes c_str(str s): + * return s.encode("UTF-8") # <<<<<<<<<<<<<< + * + * cdef str py_str(bytes s): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PyString_Type_encode, __pyx_v_s, __pyx_kp_s_UTF_8); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 60, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(PyBytes_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_t_1))) __PYX_ERR(0, 60, __pyx_L1_error) + __pyx_r = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":59 + * # Python 2 and 3 differ in str and unicode handling + * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython + * cdef bytes c_str(str s): # <<<<<<<<<<<<<< + * return s.encode("UTF-8") + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.c_str", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":62 + * return s.encode("UTF-8") + * + * cdef str py_str(bytes s): # <<<<<<<<<<<<<< + * return s.decode("UTF-8", "replace") + * + */ + +static PyObject *__pyx_f_4jcvi_7formats_6cblast_py_str(PyObject *__pyx_v_s) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("py_str", 1); + + /* "jcvi/formats/cblast.pyx":63 + * + * cdef str py_str(bytes s): + * return s.decode("UTF-8", "replace") # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + if (unlikely(__pyx_v_s == Py_None)) { + PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "decode"); + __PYX_ERR(0, 63, __pyx_L1_error) + } + __pyx_t_1 = __Pyx_decode_bytes(__pyx_v_s, 0, PY_SSIZE_T_MAX, NULL, ((char const *)"replace"), PyUnicode_DecodeUTF8); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 63, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(PyString_CheckExact(__pyx_t_1)) || __Pyx_RaiseUnexpectedTypeError("str", __pyx_t_1))) __PYX_ERR(0, 63, __pyx_L1_error) + __pyx_r = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":62 + * return s.encode("UTF-8") + * + * cdef str py_str(bytes s): # <<<<<<<<<<<<<< + * return s.decode("UTF-8", "replace") + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.py_str", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":95 + * + * property query: + * def __get__(self): # <<<<<<<<<<<<<< + * return py_str(self._query) + * def __set__(self, val: str): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + + /* "jcvi/formats/cblast.pyx":96 + * property query: + * def __get__(self): + * return py_str(self._query) # <<<<<<<<<<<<<< + * def __set__(self, val: str): + * strcpy(self._query, c_str(val)) + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 96, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 96, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":95 + * + * property query: + * def __get__(self): # <<<<<<<<<<<<<< + * return py_str(self._query) + * def __set__(self, val: str): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.query.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":97 + * def __get__(self): + * return py_str(self._query) + * def __set__(self, val: str): # <<<<<<<<<<<<<< + * strcpy(self._query, c_str(val)) + * + */ + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_val), (&PyString_Type), 0, "val", 1))) __PYX_ERR(0, 97, __pyx_L1_error) + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject*)__pyx_v_val)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + char const *__pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__set__", 1); + + /* "jcvi/formats/cblast.pyx":98 + * return py_str(self._query) + * def __set__(self, val: str): + * strcpy(self._query, c_str(val)) # <<<<<<<<<<<<<< + * + * property subject: + */ + __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(__pyx_v_val); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 98, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (unlikely(__pyx_t_1 == Py_None)) { + PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); + __PYX_ERR(0, 98, __pyx_L1_error) + } + __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_t_1); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 98, __pyx_L1_error) + (void)(strcpy(__pyx_v_self->_query, __pyx_t_2)); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":97 + * def __get__(self): + * return py_str(self._query) + * def __set__(self, val: str): # <<<<<<<<<<<<<< + * strcpy(self._query, c_str(val)) + * + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.query.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":101 + * + * property subject: + * def __get__(self): # <<<<<<<<<<<<<< + * return py_str(self._subject) + * def __set__(self, val: str): + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + + /* "jcvi/formats/cblast.pyx":102 + * property subject: + * def __get__(self): + * return py_str(self._subject) # <<<<<<<<<<<<<< + * def __set__(self, val: str): + * strcpy(self._subject, c_str(val)) + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 102, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":101 + * + * property subject: + * def __get__(self): # <<<<<<<<<<<<<< + * return py_str(self._subject) + * def __set__(self, val: str): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.subject.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":103 + * def __get__(self): + * return py_str(self._subject) + * def __set__(self, val: str): # <<<<<<<<<<<<<< + * strcpy(self._subject, c_str(val)) + * + */ + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_val), (&PyString_Type), 0, "val", 1))) __PYX_ERR(0, 103, __pyx_L1_error) + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject*)__pyx_v_val)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + char const *__pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__set__", 1); + + /* "jcvi/formats/cblast.pyx":104 + * return py_str(self._subject) + * def __set__(self, val: str): + * strcpy(self._subject, c_str(val)) # <<<<<<<<<<<<<< + * + * def __init__(self, s): + */ + __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(__pyx_v_val); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 104, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (unlikely(__pyx_t_1 == Py_None)) { + PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); + __PYX_ERR(0, 104, __pyx_L1_error) + } + __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_t_1); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 104, __pyx_L1_error) + (void)(strcpy(__pyx_v_self->_subject, __pyx_t_2)); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":103 + * def __get__(self): + * return py_str(self._subject) + * def __set__(self, val: str): # <<<<<<<<<<<<<< + * strcpy(self._subject, c_str(val)) + * + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.subject.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":106 + * strcpy(self._subject, c_str(val)) + * + * def __init__(self, s): # <<<<<<<<<<<<<< + * sline = c_str(s) + * sscanf(sline, blast_format, self._query, self._subject, + */ + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_s = 0; + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject* values[1] = {0}; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__init__ (wrapper)", 0); + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1; + #endif + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + { + PyObject **__pyx_pyargnames[] = {&__pyx_n_s_s,0}; + if (__pyx_kwds) { + Py_ssize_t kw_args; + switch (__pyx_nargs) { + case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); + switch (__pyx_nargs) { + case 0: + if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_s)) != 0)) { + (void)__Pyx_Arg_NewRef_VARARGS(values[0]); + kw_args--; + } + else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 106, __pyx_L3_error) + else goto __pyx_L5_argtuple_error; + } + if (unlikely(kw_args > 0)) { + const Py_ssize_t kwd_pos_args = __pyx_nargs; + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__init__") < 0)) __PYX_ERR(0, 106, __pyx_L3_error) + } + } else if (unlikely(__pyx_nargs != 1)) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); + } + __pyx_v_s = values[0]; + } + goto __pyx_L6_skip; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 106, __pyx_L3_error) + __pyx_L6_skip:; + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); + } + } + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return -1; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), __pyx_v_s); + + /* function exit code */ + { + Py_ssize_t __pyx_temp; + for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { + __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); + } + } + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_s) { + PyObject *__pyx_v_sline = NULL; + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + char const *__pyx_t_2; + int __pyx_t_3; + int __pyx_t_4; + int __pyx_t_5; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__init__", 1); + + /* "jcvi/formats/cblast.pyx":107 + * + * def __init__(self, s): + * sline = c_str(s) # <<<<<<<<<<<<<< + * sscanf(sline, blast_format, self._query, self._subject, + * &self.pctid, &self.hitlen, &self.nmismatch, &self.ngaps, + */ + if (!(likely(PyString_CheckExact(__pyx_v_s))||((__pyx_v_s) == Py_None) || __Pyx_RaiseUnexpectedTypeError("str", __pyx_v_s))) __PYX_ERR(0, 107, __pyx_L1_error) + __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(((PyObject*)__pyx_v_s)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 107, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_sline = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":108 + * def __init__(self, s): + * sline = c_str(s) + * sscanf(sline, blast_format, self._query, self._subject, # <<<<<<<<<<<<<< + * &self.pctid, &self.hitlen, &self.nmismatch, &self.ngaps, + * &self.qstart, &self.qstop, + */ + if (unlikely(__pyx_v_sline == Py_None)) { + PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); + __PYX_ERR(0, 108, __pyx_L1_error) + } + __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_v_sline); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 108, __pyx_L1_error) + + /* "jcvi/formats/cblast.pyx":112 + * &self.qstart, &self.qstop, + * &self.sstart, &self.sstop, + * &self.evalue, &self.score) # <<<<<<<<<<<<<< + * + * self.orientation = '+' + */ + (void)(sscanf(__pyx_t_2, __pyx_v_4jcvi_7formats_6cblast_blast_format, __pyx_v_self->_query, __pyx_v_self->_subject, (&__pyx_v_self->pctid), (&__pyx_v_self->hitlen), (&__pyx_v_self->nmismatch), (&__pyx_v_self->ngaps), (&__pyx_v_self->qstart), (&__pyx_v_self->qstop), (&__pyx_v_self->sstart), (&__pyx_v_self->sstop), (&__pyx_v_self->evalue), (&__pyx_v_self->score))); + + /* "jcvi/formats/cblast.pyx":114 + * &self.evalue, &self.score) + * + * self.orientation = '+' # <<<<<<<<<<<<<< + * if self.qstart > self.qstop: + * self.qstart, self.qstop = self.qstop, self.qstart + */ + __pyx_v_self->orientation = '+'; + + /* "jcvi/formats/cblast.pyx":115 + * + * self.orientation = '+' + * if self.qstart > self.qstop: # <<<<<<<<<<<<<< + * self.qstart, self.qstop = self.qstop, self.qstart + * self.orientation = '-' + */ + __pyx_t_3 = (__pyx_v_self->qstart > __pyx_v_self->qstop); + if (__pyx_t_3) { + + /* "jcvi/formats/cblast.pyx":116 + * self.orientation = '+' + * if self.qstart > self.qstop: + * self.qstart, self.qstop = self.qstop, self.qstart # <<<<<<<<<<<<<< + * self.orientation = '-' + * if self.sstart > self.sstop: + */ + __pyx_t_4 = __pyx_v_self->qstop; + __pyx_t_5 = __pyx_v_self->qstart; + __pyx_v_self->qstart = __pyx_t_4; + __pyx_v_self->qstop = __pyx_t_5; + + /* "jcvi/formats/cblast.pyx":117 + * if self.qstart > self.qstop: + * self.qstart, self.qstop = self.qstop, self.qstart + * self.orientation = '-' # <<<<<<<<<<<<<< + * if self.sstart > self.sstop: + * self.sstart, self.sstop = self.sstop, self.sstart + */ + __pyx_v_self->orientation = '-'; + + /* "jcvi/formats/cblast.pyx":115 + * + * self.orientation = '+' + * if self.qstart > self.qstop: # <<<<<<<<<<<<<< + * self.qstart, self.qstop = self.qstop, self.qstart + * self.orientation = '-' + */ + } + + /* "jcvi/formats/cblast.pyx":118 + * self.qstart, self.qstop = self.qstop, self.qstart + * self.orientation = '-' + * if self.sstart > self.sstop: # <<<<<<<<<<<<<< + * self.sstart, self.sstop = self.sstop, self.sstart + * self.orientation = '-' + */ + __pyx_t_3 = (__pyx_v_self->sstart > __pyx_v_self->sstop); + if (__pyx_t_3) { + + /* "jcvi/formats/cblast.pyx":119 + * self.orientation = '-' + * if self.sstart > self.sstop: + * self.sstart, self.sstop = self.sstop, self.sstart # <<<<<<<<<<<<<< + * self.orientation = '-' + * + */ + __pyx_t_5 = __pyx_v_self->sstop; + __pyx_t_4 = __pyx_v_self->sstart; + __pyx_v_self->sstart = __pyx_t_5; + __pyx_v_self->sstop = __pyx_t_4; + + /* "jcvi/formats/cblast.pyx":120 + * if self.sstart > self.sstop: + * self.sstart, self.sstop = self.sstop, self.sstart + * self.orientation = '-' # <<<<<<<<<<<<<< + * + * def __richcmp__(BlastLine self, BlastLine other, size_t op): + */ + __pyx_v_self->orientation = '-'; + + /* "jcvi/formats/cblast.pyx":118 + * self.qstart, self.qstop = self.qstop, self.qstart + * self.orientation = '-' + * if self.sstart > self.sstop: # <<<<<<<<<<<<<< + * self.sstart, self.sstop = self.sstop, self.sstart + * self.orientation = '-' + */ + } + + /* "jcvi/formats/cblast.pyx":106 + * strcpy(self._subject, c_str(val)) + * + * def __init__(self, s): # <<<<<<<<<<<<<< + * sline = c_str(s) + * sscanf(sline, blast_format, self._query, self._subject, + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_sline); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":122 + * self.orientation = '-' + * + * def __richcmp__(BlastLine self, BlastLine other, size_t op): # <<<<<<<<<<<<<< + * if op == 2: # == + * if self.query != other.query and self.qstart != other.qstart: + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__(PyObject *__pyx_v_self, PyObject *__pyx_v_other, int __pyx_arg_op); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__(PyObject *__pyx_v_self, PyObject *__pyx_v_other, int __pyx_arg_op) { + size_t __pyx_v_op; + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__richcmp__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_v_op = __pyx_arg_op; + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_other), __pyx_ptype_4jcvi_7formats_6cblast_BlastLine, 1, "other", 0))) __PYX_ERR(0, 122, __pyx_L1_error) + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_other), ((size_t)__pyx_v_op)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_other, size_t __pyx_v_op) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + int __pyx_t_5; + PyObject *__pyx_t_6 = NULL; + unsigned int __pyx_t_7; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__richcmp__", 1); + + /* "jcvi/formats/cblast.pyx":123 + * + * def __richcmp__(BlastLine self, BlastLine other, size_t op): + * if op == 2: # == # <<<<<<<<<<<<<< + * if self.query != other.query and self.qstart != other.qstart: + * return False + */ + switch (__pyx_v_op) { + case 2: + + /* "jcvi/formats/cblast.pyx":124 + * def __richcmp__(BlastLine self, BlastLine other, size_t op): + * if op == 2: # == + * if self.query != other.query and self.qstart != other.qstart: # <<<<<<<<<<<<<< + * return False + * return self.subject == other.subject and \ + */ + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 124, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_other), __pyx_n_s_query); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 124, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_t_2, __pyx_t_3, Py_NE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 124, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 124, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_5) { + } else { + __pyx_t_1 = __pyx_t_5; + goto __pyx_L4_bool_binop_done; + } + __pyx_t_5 = (__pyx_v_self->qstart != __pyx_v_other->qstart); + __pyx_t_1 = __pyx_t_5; + __pyx_L4_bool_binop_done:; + if (__pyx_t_1) { + + /* "jcvi/formats/cblast.pyx":125 + * if op == 2: # == + * if self.query != other.query and self.qstart != other.qstart: + * return False # <<<<<<<<<<<<<< + * return self.subject == other.subject and \ + * self.qstop == other.qstop and \ + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(Py_False); + __pyx_r = Py_False; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":124 + * def __richcmp__(BlastLine self, BlastLine other, size_t op): + * if op == 2: # == + * if self.query != other.query and self.qstart != other.qstart: # <<<<<<<<<<<<<< + * return False + * return self.subject == other.subject and \ + */ + } + + /* "jcvi/formats/cblast.pyx":126 + * if self.query != other.query and self.qstart != other.qstart: + * return False + * return self.subject == other.subject and \ # <<<<<<<<<<<<<< + * self.qstop == other.qstop and \ + * self.sstop == other.sstop and \ + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 126, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_other), __pyx_n_s_subject); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 126, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_6 = PyObject_RichCompare(__pyx_t_3, __pyx_t_2, Py_EQ); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 126, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 126, __pyx_L1_error) + if (__pyx_t_1) { + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + } else { + __Pyx_INCREF(__pyx_t_6); + __pyx_t_4 = __pyx_t_6; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + goto __pyx_L6_bool_binop_done; + } + + /* "jcvi/formats/cblast.pyx":127 + * return False + * return self.subject == other.subject and \ + * self.qstop == other.qstop and \ # <<<<<<<<<<<<<< + * self.sstop == other.sstop and \ + * self.evalue == other.evalue and \ + */ + __pyx_t_1 = (__pyx_v_self->qstop == __pyx_v_other->qstop); + if (__pyx_t_1) { + } else { + __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 127, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_4 = __pyx_t_6; + __pyx_t_6 = 0; + goto __pyx_L6_bool_binop_done; + } + + /* "jcvi/formats/cblast.pyx":128 + * return self.subject == other.subject and \ + * self.qstop == other.qstop and \ + * self.sstop == other.sstop and \ # <<<<<<<<<<<<<< + * self.evalue == other.evalue and \ + * self.hitlen == other.hitlen + */ + __pyx_t_1 = (__pyx_v_self->sstop == __pyx_v_other->sstop); + if (__pyx_t_1) { + } else { + __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 128, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_4 = __pyx_t_6; + __pyx_t_6 = 0; + goto __pyx_L6_bool_binop_done; + } + + /* "jcvi/formats/cblast.pyx":129 + * self.qstop == other.qstop and \ + * self.sstop == other.sstop and \ + * self.evalue == other.evalue and \ # <<<<<<<<<<<<<< + * self.hitlen == other.hitlen + * + */ + __pyx_t_1 = (__pyx_v_self->evalue == __pyx_v_other->evalue); + if (__pyx_t_1) { + } else { + __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 129, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_4 = __pyx_t_6; + __pyx_t_6 = 0; + goto __pyx_L6_bool_binop_done; + } + + /* "jcvi/formats/cblast.pyx":130 + * self.sstop == other.sstop and \ + * self.evalue == other.evalue and \ + * self.hitlen == other.hitlen # <<<<<<<<<<<<<< + * + * elif op == 3: # != + */ + __pyx_t_1 = (__pyx_v_self->hitlen == __pyx_v_other->hitlen); + __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 130, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_4 = __pyx_t_6; + __pyx_t_6 = 0; + __pyx_L6_bool_binop_done:; + __pyx_r = __pyx_t_4; + __pyx_t_4 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":123 + * + * def __richcmp__(BlastLine self, BlastLine other, size_t op): + * if op == 2: # == # <<<<<<<<<<<<<< + * if self.query != other.query and self.qstart != other.qstart: + * return False + */ + break; + case 3: + + /* "jcvi/formats/cblast.pyx":133 + * + * elif op == 3: # != + * return not self.__richcmp__(other, 2) # <<<<<<<<<<<<<< + * else: + * raise Exception("that comparison not implemented") + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_richcmp); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_2 = NULL; + __pyx_t_7 = 0; + #if CYTHON_UNPACK_METHODS + if (likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_6); + if (likely(__pyx_t_2)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); + __Pyx_INCREF(__pyx_t_2); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_6, function); + __pyx_t_7 = 1; + } + } + #endif + { + PyObject *__pyx_callargs[3] = {__pyx_t_2, ((PyObject *)__pyx_v_other), __pyx_int_2}; + __pyx_t_4 = __Pyx_PyObject_FastCall(__pyx_t_6, __pyx_callargs+1-__pyx_t_7, 2+__pyx_t_7); + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + } + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_PyBool_FromLong((!__pyx_t_1)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_r = __pyx_t_4; + __pyx_t_4 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":132 + * self.hitlen == other.hitlen + * + * elif op == 3: # != # <<<<<<<<<<<<<< + * return not self.__richcmp__(other, 2) + * else: + */ + break; + default: + + /* "jcvi/formats/cblast.pyx":135 + * return not self.__richcmp__(other, 2) + * else: + * raise Exception("that comparison not implemented") # <<<<<<<<<<<<<< + * + * def __hash__(self): + */ + __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])), __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 135, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_Raise(__pyx_t_4, 0, 0, 0); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __PYX_ERR(0, 135, __pyx_L1_error) + break; + } + + /* "jcvi/formats/cblast.pyx":122 + * self.orientation = '-' + * + * def __richcmp__(BlastLine self, BlastLine other, size_t op): # <<<<<<<<<<<<<< + * if op == 2: # == + * if self.query != other.query and self.qstart != other.qstart: + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__richcmp__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":137 + * raise Exception("that comparison not implemented") + * + * def __hash__(self): # <<<<<<<<<<<<<< + * return id(self) + * + */ + +/* Python wrapper */ +static Py_hash_t __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__(PyObject *__pyx_v_self); /*proto*/ +static Py_hash_t __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + Py_hash_t __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__hash__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static Py_hash_t __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + Py_hash_t __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + Py_hash_t __pyx_t_2; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__hash__", 1); + + /* "jcvi/formats/cblast.pyx":138 + * + * def __hash__(self): + * return id(self) # <<<<<<<<<<<<<< + * + * def __repr__(self): + */ + __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_builtin_id, ((PyObject *)__pyx_v_self)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 138, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_AsHash_t(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_hash_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 138, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":137 + * raise Exception("that comparison not implemented") + * + * def __hash__(self): # <<<<<<<<<<<<<< + * return id(self) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__hash__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + if (unlikely(__pyx_r == -1) && !PyErr_Occurred()) __pyx_r = -2; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":140 + * return id(self) + * + * def __repr__(self): # <<<<<<<<<<<<<< + * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ + * (self.query, self.subject, self.evalue, self.score) + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__repr__", 1); + + /* "jcvi/formats/cblast.pyx":141 + * + * def __repr__(self): + * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ # <<<<<<<<<<<<<< + * (self.query, self.subject, self.evalue, self.score) + * + */ + __Pyx_XDECREF(__pyx_r); + + /* "jcvi/formats/cblast.pyx":142 + * def __repr__(self): + * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ + * (self.query, self.subject, self.evalue, self.score) # <<<<<<<<<<<<<< + * + * def __str__(self): + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = PyTuple_New(4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 142, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_1)) __PYX_ERR(0, 142, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_2)) __PYX_ERR(0, 142, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_3); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_3)) __PYX_ERR(0, 142, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_4); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_4)) __PYX_ERR(0, 142, __pyx_L1_error); + __pyx_t_1 = 0; + __pyx_t_2 = 0; + __pyx_t_3 = 0; + __pyx_t_4 = 0; + + /* "jcvi/formats/cblast.pyx":141 + * + * def __repr__(self): + * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ # <<<<<<<<<<<<<< + * (self.query, self.subject, self.evalue, self.score) + * + */ + __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1, __pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 141, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; + __pyx_r = __pyx_t_4; + __pyx_t_4 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":140 + * return id(self) + * + * def __repr__(self): # <<<<<<<<<<<<<< + * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ + * (self.query, self.subject, self.evalue, self.score) + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":144 + * (self.query, self.subject, self.evalue, self.score) + * + * def __str__(self): # <<<<<<<<<<<<<< + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * if self.orientation == '-': + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__str__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_v_args = NULL; + char __pyx_v_result[0x200]; + PyObject *__pyx_v_attr = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + Py_ssize_t __pyx_t_4; + PyObject *(*__pyx_t_5)(PyObject *); + int __pyx_t_6; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__str__", 1); + + /* "jcvi/formats/cblast.pyx":145 + * + * def __str__(self): + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + */ + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_n_s_slots); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetSlice(__pyx_t_2, 0, 12, NULL, NULL, &__pyx_slice__4, 0, 1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + if (likely(PyList_CheckExact(__pyx_t_3)) || PyTuple_CheckExact(__pyx_t_3)) { + __pyx_t_2 = __pyx_t_3; __Pyx_INCREF(__pyx_t_2); + __pyx_t_4 = 0; + __pyx_t_5 = NULL; + } else { + __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_5 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 145, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + for (;;) { + if (likely(!__pyx_t_5)) { + if (likely(PyList_CheckExact(__pyx_t_2))) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 145, __pyx_L1_error) + #endif + if (__pyx_t_4 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 145, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } else { + { + Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 145, __pyx_L1_error) + #endif + if (__pyx_t_4 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 145, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } + } else { + __pyx_t_3 = __pyx_t_5(__pyx_t_2); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 145, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_attr, __pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_GetAttr(((PyObject *)__pyx_v_self), __pyx_v_attr); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_3))) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_v_args = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":146 + * def __str__(self): + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * if self.orientation == '-': # <<<<<<<<<<<<<< + * args[8], args[9] = args[9], args[8] + * + */ + __pyx_t_6 = (__pyx_v_self->orientation == '-'); + if (__pyx_t_6) { + + /* "jcvi/formats/cblast.pyx":147 + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] # <<<<<<<<<<<<<< + * + * cdef char result[512] + */ + __pyx_t_1 = PyList_GET_ITEM(__pyx_v_args, 9); + __Pyx_INCREF(__pyx_t_1); + __pyx_t_2 = PyList_GET_ITEM(__pyx_v_args, 8); + __Pyx_INCREF(__pyx_t_2); + if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 8, __pyx_t_1, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 147, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 9, __pyx_t_2, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 147, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/formats/cblast.pyx":146 + * def __str__(self): + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * if self.orientation == '-': # <<<<<<<<<<<<<< + * args[8], args[9] = args[9], args[8] + * + */ + } + + /* "jcvi/formats/cblast.pyx":150 + * + * cdef char result[512] + * sprintf(result, blast_output, self._query, self._subject, # <<<<<<<<<<<<<< + * self.pctid, self.hitlen, self.nmismatch, self.ngaps, + * self.qstart, self.qstop, + */ + (void)(sprintf(__pyx_v_result, __pyx_v_4jcvi_7formats_6cblast_blast_output, __pyx_v_self->_query, __pyx_v_self->_subject, __pyx_v_self->pctid, __pyx_v_self->hitlen, __pyx_v_self->nmismatch, __pyx_v_self->ngaps, __pyx_v_self->qstart, __pyx_v_self->qstop, __pyx_v_self->sstart, __pyx_v_self->sstop, __pyx_v_self->evalue, __pyx_v_self->score)); + + /* "jcvi/formats/cblast.pyx":156 + * self.evalue, self.score) + * + * return py_str(result) # <<<<<<<<<<<<<< + * + * @property + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_2 = __Pyx_PyObject_FromString(__pyx_v_result); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 156, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_2)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 156, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":144 + * (self.query, self.subject, self.evalue, self.score) + * + * def __str__(self): # <<<<<<<<<<<<<< + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * if self.orientation == '-': + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__str__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_args); + __Pyx_XDECREF(__pyx_v_attr); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":158 + * return py_str(result) + * + * @property # <<<<<<<<<<<<<< + * def has_score(self): + * return hasattr(self, "score") + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + + /* "jcvi/formats/cblast.pyx":160 + * @property + * def has_score(self): + * return hasattr(self, "score") # <<<<<<<<<<<<<< + * + * @property + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_HasAttr(((PyObject *)__pyx_v_self), __pyx_n_s_score); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(0, 160, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 160, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":158 + * return py_str(result) + * + * @property # <<<<<<<<<<<<<< + * def has_score(self): + * return hasattr(self, "score") + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.has_score.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":162 + * return hasattr(self, "score") + * + * @property # <<<<<<<<<<<<<< + * def swapped(self): + * """ + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} +static PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value); /* proto */ + +/* "jcvi/formats/cblast.pyx":172 + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< + * return BlastLine(b) + * + */ + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_genexpr_arg_0) { + struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_cur_scope; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("genexpr", 0); + __pyx_cur_scope = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, __pyx_empty_tuple, NULL); + if (unlikely(!__pyx_cur_scope)) { + __pyx_cur_scope = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)Py_None); + __Pyx_INCREF(Py_None); + __PYX_ERR(0, 172, __pyx_L1_error) + } else { + __Pyx_GOTREF((PyObject *)__pyx_cur_scope); + } + __pyx_cur_scope->__pyx_genexpr_arg_0 = __pyx_genexpr_arg_0; + __Pyx_INCREF(__pyx_cur_scope->__pyx_genexpr_arg_0); + __Pyx_GIVEREF(__pyx_cur_scope->__pyx_genexpr_arg_0); + { + __pyx_CoroutineObject *gen = __Pyx_Generator_New((__pyx_coroutine_body_t) __pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator, NULL, (PyObject *) __pyx_cur_scope, __pyx_n_s_genexpr, __pyx_n_s_BlastLine___get___locals_genexpr, __pyx_n_s_jcvi_formats_cblast); if (unlikely(!gen)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_DECREF(__pyx_cur_scope); + __Pyx_RefNannyFinishContext(); + return (PyObject *) gen; + } + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.swapped.__get__.genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __Pyx_DECREF((PyObject *)__pyx_cur_scope); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value) /* generator body */ +{ + struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_cur_scope = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)__pyx_generator->closure); + PyObject *__pyx_r = NULL; + PyObject *__pyx_t_1 = NULL; + Py_ssize_t __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("genexpr", 0); + switch (__pyx_generator->resume_label) { + case 0: goto __pyx_L3_first_run; + case 1: goto __pyx_L6_resume_from_yield; + default: /* CPython raises the right error here */ + __Pyx_RefNannyFinishContext(); + return NULL; + } + __pyx_L3_first_run:; + if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 172, __pyx_L1_error) + if (unlikely(!__pyx_cur_scope->__pyx_genexpr_arg_0)) { __Pyx_RaiseUnboundLocalError(".0"); __PYX_ERR(0, 172, __pyx_L1_error) } + __pyx_t_1 = __pyx_cur_scope->__pyx_genexpr_arg_0; __Pyx_INCREF(__pyx_t_1); + __pyx_t_2 = 0; + for (;;) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 172, __pyx_L1_error) + #endif + if (__pyx_t_2 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely((0 < 0))) __PYX_ERR(0, 172, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_x); + __Pyx_XDECREF_SET(__pyx_cur_scope->__pyx_v_x, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Str(__pyx_cur_scope->__pyx_v_x); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_r = __pyx_t_3; + __pyx_t_3 = 0; + __Pyx_XGIVEREF(__pyx_t_1); + __pyx_cur_scope->__pyx_t_0 = __pyx_t_1; + __pyx_cur_scope->__pyx_t_1 = __pyx_t_2; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + __Pyx_Coroutine_ResetAndClearException(__pyx_generator); + /* return from generator, yielding value */ + __pyx_generator->resume_label = 1; + return __pyx_r; + __pyx_L6_resume_from_yield:; + __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; + __pyx_cur_scope->__pyx_t_0 = 0; + __Pyx_XGOTREF(__pyx_t_1); + __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; + if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 172, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + CYTHON_MAYBE_UNUSED_VAR(__pyx_cur_scope); + + /* function exit code */ + PyErr_SetNone(PyExc_StopIteration); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_AddTraceback("genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_L0:; + __Pyx_XDECREF(__pyx_r); __pyx_r = 0; + #if !CYTHON_USE_EXC_INFO_STACK + __Pyx_Coroutine_ResetAndClearException(__pyx_generator); + #endif + __pyx_generator->resume_label = -1; + __Pyx_Coroutine_clear((PyObject*)__pyx_generator); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":162 + * return hasattr(self, "score") + * + * @property # <<<<<<<<<<<<<< + * def swapped(self): + * """ + */ + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_v_args = NULL; + PyObject *__pyx_v_b = NULL; + PyObject *__pyx_v_attr = NULL; + PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator = 0; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + Py_ssize_t __pyx_t_4; + PyObject *(*__pyx_t_5)(PyObject *); + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + int __pyx_t_8; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + + /* "jcvi/formats/cblast.pyx":167 + * Swap query and subject. + * """ + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< + * args[0:2] = [self.subject, self.query] + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + */ + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_n_s_slots); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetSlice(__pyx_t_2, 0, 12, NULL, NULL, &__pyx_slice__4, 0, 1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + if (likely(PyList_CheckExact(__pyx_t_3)) || PyTuple_CheckExact(__pyx_t_3)) { + __pyx_t_2 = __pyx_t_3; __Pyx_INCREF(__pyx_t_2); + __pyx_t_4 = 0; + __pyx_t_5 = NULL; + } else { + __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_5 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 167, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + for (;;) { + if (likely(!__pyx_t_5)) { + if (likely(PyList_CheckExact(__pyx_t_2))) { + { + Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 167, __pyx_L1_error) + #endif + if (__pyx_t_4 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 167, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } else { + { + Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); + #if !CYTHON_ASSUME_SAFE_MACROS + if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 167, __pyx_L1_error) + #endif + if (__pyx_t_4 >= __pyx_temp) break; + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 167, __pyx_L1_error) + #else + __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } + } else { + __pyx_t_3 = __pyx_t_5(__pyx_t_2); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 167, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_attr, __pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_GetAttr(((PyObject *)__pyx_v_self), __pyx_v_attr); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_3))) __PYX_ERR(0, 167, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_v_args = ((PyObject*)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":168 + * """ + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * args[0:2] = [self.subject, self.query] # <<<<<<<<<<<<<< + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + * if self.orientation == '-': + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = PyList_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 0, __pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 1, __pyx_t_2)) __PYX_ERR(0, 168, __pyx_L1_error); + __pyx_t_1 = 0; + __pyx_t_2 = 0; + if (__Pyx_PyObject_SetSlice(__pyx_v_args, __pyx_t_3, 0, 2, NULL, NULL, NULL, 1, 1, 0) < 0) __PYX_ERR(0, 168, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "jcvi/formats/cblast.pyx":169 + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + * args[0:2] = [self.subject, self.query] + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] # <<<<<<<<<<<<<< + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + */ + __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_7 = PyList_New(4); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __Pyx_GIVEREF(__pyx_t_3); + if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 0, __pyx_t_3)) __PYX_ERR(0, 169, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 1, __pyx_t_2)) __PYX_ERR(0, 169, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 2, __pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 3, __pyx_t_6)) __PYX_ERR(0, 169, __pyx_L1_error); + __pyx_t_3 = 0; + __pyx_t_2 = 0; + __pyx_t_1 = 0; + __pyx_t_6 = 0; + if (__Pyx_PyObject_SetSlice(__pyx_v_args, __pyx_t_7, 6, 10, NULL, NULL, NULL, 1, 1, 0) < 0) __PYX_ERR(0, 169, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + + /* "jcvi/formats/cblast.pyx":170 + * args[0:2] = [self.subject, self.query] + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + * if self.orientation == '-': # <<<<<<<<<<<<<< + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) + */ + __pyx_t_8 = (__pyx_v_self->orientation == '-'); + if (__pyx_t_8) { + + /* "jcvi/formats/cblast.pyx":171 + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] # <<<<<<<<<<<<<< + * b = "\t".join(str(x) for x in args) + * return BlastLine(b) + */ + __pyx_t_7 = PyList_GET_ITEM(__pyx_v_args, 9); + __Pyx_INCREF(__pyx_t_7); + __pyx_t_6 = PyList_GET_ITEM(__pyx_v_args, 8); + __Pyx_INCREF(__pyx_t_6); + if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 8, __pyx_t_7, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 171, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 9, __pyx_t_6, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 171, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + + /* "jcvi/formats/cblast.pyx":170 + * args[0:2] = [self.subject, self.query] + * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + * if self.orientation == '-': # <<<<<<<<<<<<<< + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) + */ + } + + /* "jcvi/formats/cblast.pyx":172 + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< + * return BlastLine(b) + * + */ + __pyx_t_6 = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(NULL, __pyx_v_args); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_7 = __Pyx_PyString_Join(__pyx_kp_s__5, __pyx_t_6); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 172, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_v_b = ((PyObject*)__pyx_t_7); + __pyx_t_7 = 0; + + /* "jcvi/formats/cblast.pyx":173 + * args[8], args[9] = args[9], args[8] + * b = "\t".join(str(x) for x in args) + * return BlastLine(b) # <<<<<<<<<<<<<< + * + * @property + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_7 = __Pyx_PyObject_CallOneArg(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_v_b); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 173, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __pyx_r = __pyx_t_7; + __pyx_t_7 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":162 + * return hasattr(self, "score") + * + * @property # <<<<<<<<<<<<<< + * def swapped(self): + * """ + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.swapped.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_args); + __Pyx_XDECREF(__pyx_v_b); + __Pyx_XDECREF(__pyx_v_attr); + __Pyx_XDECREF(__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":175 + * return BlastLine(b) + * + * @property # <<<<<<<<<<<<<< + * def bedline(self): + * cdef char result[512] + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + char __pyx_v_result[0x200]; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + + /* "jcvi/formats/cblast.pyx":178 + * def bedline(self): + * cdef char result[512] + * sprintf(result, bed_output, # <<<<<<<<<<<<<< + * self._subject, self.sstart - 1, self.sstop, + * self._query, self.qstart, self.qstop, + */ + (void)(sprintf(__pyx_v_result, __pyx_v_4jcvi_7formats_6cblast_bed_output, __pyx_v_self->_subject, (__pyx_v_self->sstart - 1), __pyx_v_self->sstop, __pyx_v_self->_query, __pyx_v_self->qstart, __pyx_v_self->qstop, __pyx_v_self->score, __pyx_v_self->orientation)); + + /* "jcvi/formats/cblast.pyx":183 + * self.score, self.orientation) + * + * return py_str(result) # <<<<<<<<<<<<<< + * + * def __reduce__(self): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_result); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 183, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 183, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":175 + * return BlastLine(b) + * + * @property # <<<<<<<<<<<<<< + * def bedline(self): + * cdef char result[512] + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.bedline.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":185 + * return py_str(result) + * + * def __reduce__(self): # <<<<<<<<<<<<<< + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +); /*proto*/ +static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_9BlastLine_11__reduce__ = {"__reduce__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__(PyObject *__pyx_v_self, +#if CYTHON_METH_FASTCALL +PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds +#else +PyObject *__pyx_args, PyObject *__pyx_kwds +#endif +) { + #if !CYTHON_METH_FASTCALL + CYTHON_UNUSED Py_ssize_t __pyx_nargs; + #endif + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__reduce__ (wrapper)", 0); + #if !CYTHON_METH_FASTCALL + #if CYTHON_ASSUME_SAFE_MACROS + __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); + #else + __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; + #endif + #endif + __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); + if (unlikely(__pyx_nargs > 0)) { + __Pyx_RaiseArgtupleInvalid("__reduce__", 1, 0, 0, __pyx_nargs); return NULL;} + if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce__", 0))) return NULL; + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + PyObject *__pyx_t_9 = NULL; + PyObject *__pyx_t_10 = NULL; + PyObject *__pyx_t_11 = NULL; + PyObject *__pyx_t_12 = NULL; + PyObject *__pyx_t_13 = NULL; + PyObject *__pyx_t_14 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__reduce__", 1); + + /* "jcvi/formats/cblast.pyx":186 + * + * def __reduce__(self): + * return create_blast_line, ( # <<<<<<<<<<<<<< + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(__pyx_f_4jcvi_7formats_6cblast_create_blast_line); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 186, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + + /* "jcvi/formats/cblast.pyx":187 + * def __reduce__(self): + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, # <<<<<<<<<<<<<< + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + * self.evalue, self.score) + */ + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyFloat_FromDouble(__pyx_v_self->pctid); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_self->hitlen); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_5); + __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_self->nmismatch); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + + /* "jcvi/formats/cblast.pyx":188 + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, # <<<<<<<<<<<<<< + * self.evalue, self.score) + * + */ + __pyx_t_7 = __Pyx_PyInt_From_int(__pyx_v_self->ngaps); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_9 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_11 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 188, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_11); + + /* "jcvi/formats/cblast.pyx":189 + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + * self.evalue, self.score) # <<<<<<<<<<<<<< + * + * + */ + __pyx_t_12 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_12); + __pyx_t_13 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + + /* "jcvi/formats/cblast.pyx":187 + * def __reduce__(self): + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, # <<<<<<<<<<<<<< + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + * self.evalue, self.score) + */ + __pyx_t_14 = PyTuple_New(12); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __Pyx_GIVEREF(__pyx_t_2); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_2)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_3); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 1, __pyx_t_3)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_4); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 2, __pyx_t_4)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_5); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 3, __pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_6); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 4, __pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_7); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 5, __pyx_t_7)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_8); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 6, __pyx_t_8)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_9); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 7, __pyx_t_9)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_10); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 8, __pyx_t_10)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_11); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 9, __pyx_t_11)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_12); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 10, __pyx_t_12)) __PYX_ERR(0, 187, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_13); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 11, __pyx_t_13)) __PYX_ERR(0, 187, __pyx_L1_error); + __pyx_t_2 = 0; + __pyx_t_3 = 0; + __pyx_t_4 = 0; + __pyx_t_5 = 0; + __pyx_t_6 = 0; + __pyx_t_7 = 0; + __pyx_t_8 = 0; + __pyx_t_9 = 0; + __pyx_t_10 = 0; + __pyx_t_11 = 0; + __pyx_t_12 = 0; + __pyx_t_13 = 0; + + /* "jcvi/formats/cblast.pyx":186 + * + * def __reduce__(self): + * return create_blast_line, ( # <<<<<<<<<<<<<< + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + */ + __pyx_t_13 = PyTuple_New(2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 186, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_GIVEREF(__pyx_t_1); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1)) __PYX_ERR(0, 186, __pyx_L1_error); + __Pyx_GIVEREF(__pyx_t_14); + if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_t_14)) __PYX_ERR(0, 186, __pyx_L1_error); + __pyx_t_1 = 0; + __pyx_t_14 = 0; + __pyx_r = __pyx_t_13; + __pyx_t_13 = 0; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":185 + * return py_str(result) + * + * def __reduce__(self): # <<<<<<<<<<<<<< + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_XDECREF(__pyx_t_9); + __Pyx_XDECREF(__pyx_t_10); + __Pyx_XDECREF(__pyx_t_11); + __Pyx_XDECREF(__pyx_t_12); + __Pyx_XDECREF(__pyx_t_13); + __Pyx_XDECREF(__pyx_t_14); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__reduce__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":85 + * + * cdef public: + * char _query[128] # <<<<<<<<<<<<<< + * char _subject[128] + * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 85, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._query.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + char __pyx_t_1[0x80]; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + if (unlikely((__Pyx_carray_from_py_char(__pyx_v_value, __pyx_t_1, 0x80) < 0))) __PYX_ERR(0, 85, __pyx_L1_error) + if (unlikely((0x80) != (0x80))) { + PyErr_Format(PyExc_ValueError, "Assignment to slice of wrong length, expected %" CYTHON_FORMAT_SSIZE_T "d, got %" CYTHON_FORMAT_SSIZE_T "d", (Py_ssize_t)(0x80), (Py_ssize_t)(0x80)); + __PYX_ERR(0, 85, __pyx_L1_error) + } + memcpy(&(__pyx_v_self->_query[0]), __pyx_t_1, sizeof(__pyx_v_self->_query[0]) * (0x80)); + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._query.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":86 + * cdef public: + * char _query[128] + * char _subject[128] # <<<<<<<<<<<<<< + * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop + * float pctid, score + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 86, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._subject.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + char __pyx_t_1[0x80]; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + if (unlikely((__Pyx_carray_from_py_char(__pyx_v_value, __pyx_t_1, 0x80) < 0))) __PYX_ERR(0, 86, __pyx_L1_error) + if (unlikely((0x80) != (0x80))) { + PyErr_Format(PyExc_ValueError, "Assignment to slice of wrong length, expected %" CYTHON_FORMAT_SSIZE_T "d, got %" CYTHON_FORMAT_SSIZE_T "d", (Py_ssize_t)(0x80), (Py_ssize_t)(0x80)); + __PYX_ERR(0, 86, __pyx_L1_error) + } + memcpy(&(__pyx_v_self->_subject[0]), __pyx_t_1, sizeof(__pyx_v_self->_subject[0]) * (0x80)); + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._subject.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":87 + * char _query[128] + * char _subject[128] + * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop # <<<<<<<<<<<<<< + * float pctid, score + * double evalue + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->hitlen); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.hitlen.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->hitlen = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.hitlen.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->nmismatch); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.nmismatch.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->nmismatch = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.nmismatch.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->ngaps); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.ngaps.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->ngaps = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.ngaps.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstart.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->qstart = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstart.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstop.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->qstop = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstop.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstart.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->sstart = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstart.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstop.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) + __pyx_v_self->sstop = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstop.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":88 + * char _subject[128] + * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop + * float pctid, score # <<<<<<<<<<<<<< + * double evalue + * object qseqid, sseqid + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->pctid); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 88, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.pctid.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + float __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __pyx_PyFloat_AsFloat(__pyx_v_value); if (unlikely((__pyx_t_1 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 88, __pyx_L1_error) + __pyx_v_self->pctid = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.pctid.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 88, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.score.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + float __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __pyx_PyFloat_AsFloat(__pyx_v_value); if (unlikely((__pyx_t_1 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 88, __pyx_L1_error) + __pyx_v_self->score = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.score.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":89 + * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop + * float pctid, score + * double evalue # <<<<<<<<<<<<<< + * object qseqid, sseqid + * int qi, si + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 89, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.evalue.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + double __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __pyx_PyFloat_AsDouble(__pyx_v_value); if (unlikely((__pyx_t_1 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 89, __pyx_L1_error) + __pyx_v_self->evalue = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.evalue.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":90 + * float pctid, score + * double evalue + * object qseqid, sseqid # <<<<<<<<<<<<<< + * int qi, si + * char orientation + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_self->qseqid); + __pyx_r = __pyx_v_self->qseqid; + goto __pyx_L0; + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__", 1); + __Pyx_INCREF(__pyx_v_value); + __Pyx_GIVEREF(__pyx_v_value); + __Pyx_GOTREF(__pyx_v_self->qseqid); + __Pyx_DECREF(__pyx_v_self->qseqid); + __pyx_v_self->qseqid = __pyx_v_value; + + /* function exit code */ + __pyx_r = 0; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(PyObject *__pyx_v_self); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__del__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__del__", 1); + __Pyx_INCREF(Py_None); + __Pyx_GIVEREF(Py_None); + __Pyx_GOTREF(__pyx_v_self->qseqid); + __Pyx_DECREF(__pyx_v_self->qseqid); + __pyx_v_self->qseqid = Py_None; + + /* function exit code */ + __pyx_r = 0; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_self->sseqid); + __pyx_r = __pyx_v_self->sseqid; + goto __pyx_L0; + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__", 1); + __Pyx_INCREF(__pyx_v_value); + __Pyx_GIVEREF(__pyx_v_value); + __Pyx_GOTREF(__pyx_v_self->sseqid); + __Pyx_DECREF(__pyx_v_self->sseqid); + __pyx_v_self->sseqid = __pyx_v_value; + + /* function exit code */ + __pyx_r = 0; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(PyObject *__pyx_v_self); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__del__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__del__", 1); + __Pyx_INCREF(Py_None); + __Pyx_GIVEREF(Py_None); + __Pyx_GOTREF(__pyx_v_self->sseqid); + __Pyx_DECREF(__pyx_v_self->sseqid); + __pyx_v_self->sseqid = Py_None; + + /* function exit code */ + __pyx_r = 0; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":91 + * double evalue + * object qseqid, sseqid + * int qi, si # <<<<<<<<<<<<<< + * char orientation + * + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 91, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qi.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 91, __pyx_L1_error) + __pyx_v_self->qi = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qi.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->si); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 91, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.si.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + int __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 91, __pyx_L1_error) + __pyx_v_self->si = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.si.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":92 + * object qseqid, sseqid + * int qi, si + * char orientation # <<<<<<<<<<<<<< + * + * property query: + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(PyObject *__pyx_v_self) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 1); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_PyInt_From_char(__pyx_v_self->orientation); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 92, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.orientation.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ +static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { + CYTHON_UNUSED PyObject *const *__pyx_kwvalues; + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); + __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); + __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { + int __pyx_r; + char __pyx_t_1; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __pyx_t_1 = __Pyx_PyInt_As_char(__pyx_v_value); if (unlikely((__pyx_t_1 == (char)-1) && PyErr_Occurred())) __PYX_ERR(0, 92, __pyx_L1_error) + __pyx_v_self->orientation = __pyx_t_1; + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.orientation.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + return __pyx_r; +} + +/* "jcvi/formats/cblast.pyx":192 + * + * + * cdef BlastLine create_blast_line(char *query, char *subject, float pctid, int hitlen, # <<<<<<<<<<<<<< + * int nmismatch, int ngaps, int qstart, int qstop, + * int sstart, int sstop, float evalue, float score): + */ + +static struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_f_4jcvi_7formats_6cblast_create_blast_line(char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score) { + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_b = 0; + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("create_blast_line", 1); + + /* "jcvi/formats/cblast.pyx":197 + * """ Factory method. + * """ + * cdef BlastLine b = BlastLine.__new__(BlastLine) # <<<<<<<<<<<<<< + * b.query = query + * b.subject = subject + */ + __pyx_t_1 = ((PyObject *)__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(((PyTypeObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_empty_tuple, NULL)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error) + __Pyx_GOTREF((PyObject *)__pyx_t_1); + __pyx_v_b = ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":198 + * """ + * cdef BlastLine b = BlastLine.__new__(BlastLine) + * b.query = query # <<<<<<<<<<<<<< + * b.subject = subject + * b.pctid = pctid + */ + __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 198, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (__Pyx_PyObject_SetAttrStr(((PyObject *)__pyx_v_b), __pyx_n_s_query, __pyx_t_1) < 0) __PYX_ERR(0, 198, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":199 + * cdef BlastLine b = BlastLine.__new__(BlastLine) + * b.query = query + * b.subject = subject # <<<<<<<<<<<<<< + * b.pctid = pctid + * b.hitlen = hitlen + */ + __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 199, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (__Pyx_PyObject_SetAttrStr(((PyObject *)__pyx_v_b), __pyx_n_s_subject, __pyx_t_1) < 0) __PYX_ERR(0, 199, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "jcvi/formats/cblast.pyx":200 + * b.query = query + * b.subject = subject + * b.pctid = pctid # <<<<<<<<<<<<<< + * b.hitlen = hitlen + * b.nmismatch = nmismatch + */ + __pyx_v_b->pctid = __pyx_v_pctid; + + /* "jcvi/formats/cblast.pyx":201 + * b.subject = subject + * b.pctid = pctid + * b.hitlen = hitlen # <<<<<<<<<<<<<< + * b.nmismatch = nmismatch + * b.ngaps = ngaps + */ + __pyx_v_b->hitlen = __pyx_v_hitlen; + + /* "jcvi/formats/cblast.pyx":202 + * b.pctid = pctid + * b.hitlen = hitlen + * b.nmismatch = nmismatch # <<<<<<<<<<<<<< + * b.ngaps = ngaps + * b.qstart = qstart + */ + __pyx_v_b->nmismatch = __pyx_v_nmismatch; + + /* "jcvi/formats/cblast.pyx":203 + * b.hitlen = hitlen + * b.nmismatch = nmismatch + * b.ngaps = ngaps # <<<<<<<<<<<<<< + * b.qstart = qstart + * b.qstop = qstop + */ + __pyx_v_b->ngaps = __pyx_v_ngaps; + + /* "jcvi/formats/cblast.pyx":204 + * b.nmismatch = nmismatch + * b.ngaps = ngaps + * b.qstart = qstart # <<<<<<<<<<<<<< + * b.qstop = qstop + * b.sstart = sstart + */ + __pyx_v_b->qstart = __pyx_v_qstart; + + /* "jcvi/formats/cblast.pyx":205 + * b.ngaps = ngaps + * b.qstart = qstart + * b.qstop = qstop # <<<<<<<<<<<<<< + * b.sstart = sstart + * b.sstop = sstop + */ + __pyx_v_b->qstop = __pyx_v_qstop; + + /* "jcvi/formats/cblast.pyx":206 + * b.qstart = qstart + * b.qstop = qstop + * b.sstart = sstart # <<<<<<<<<<<<<< + * b.sstop = sstop + * b.evalue = evalue + */ + __pyx_v_b->sstart = __pyx_v_sstart; + + /* "jcvi/formats/cblast.pyx":207 + * b.qstop = qstop + * b.sstart = sstart + * b.sstop = sstop # <<<<<<<<<<<<<< + * b.evalue = evalue + * b.score = score + */ + __pyx_v_b->sstop = __pyx_v_sstop; + + /* "jcvi/formats/cblast.pyx":208 + * b.sstart = sstart + * b.sstop = sstop + * b.evalue = evalue # <<<<<<<<<<<<<< + * b.score = score + * return b + */ + __pyx_v_b->evalue = __pyx_v_evalue; + + /* "jcvi/formats/cblast.pyx":209 + * b.sstop = sstop + * b.evalue = evalue + * b.score = score # <<<<<<<<<<<<<< + * return b + */ + __pyx_v_b->score = __pyx_v_score; + + /* "jcvi/formats/cblast.pyx":210 + * b.evalue = evalue + * b.score = score + * return b # <<<<<<<<<<<<<< + */ + __Pyx_XDECREF((PyObject *)__pyx_r); + __Pyx_INCREF((PyObject *)__pyx_v_b); + __pyx_r = __pyx_v_b; + goto __pyx_L0; + + /* "jcvi/formats/cblast.pyx":192 + * + * + * cdef BlastLine create_blast_line(char *query, char *subject, float pctid, int hitlen, # <<<<<<<<<<<<<< + * int nmismatch, int ngaps, int qstart, int qstop, + * int sstart, int sstop, float evalue, float score): + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("jcvi.formats.cblast.create_blast_line", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XDECREF((PyObject *)__pyx_v_b); + __Pyx_XGIVEREF((PyObject *)__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_Blast(PyTypeObject *t, PyObject *a, PyObject *k) { + struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p; + PyObject *o; + #if CYTHON_COMPILING_IN_LIMITED_API + allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); + o = alloc_func(t, 0); + #else + if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { + o = (*t->tp_alloc)(t, 0); + } else { + o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); + } + if (unlikely(!o)) return 0; + #endif + p = ((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o); + p->filename = Py_None; Py_INCREF(Py_None); + if (unlikely(__pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(o, a, k) < 0)) goto bad; + return o; + bad: + Py_DECREF(o); o = 0; + return NULL; +} + +static void __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast(PyObject *o) { + struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; + #if CYTHON_USE_TP_FINALIZE + if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { + if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast) { + if (PyObject_CallFinalizerFromDealloc(o)) return; + } + } + #endif + PyObject_GC_UnTrack(o); + { + PyObject *etype, *eval, *etb; + PyErr_Fetch(&etype, &eval, &etb); + __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1); + __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(o); + __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1); + PyErr_Restore(etype, eval, etb); + } + Py_CLEAR(p->filename); + #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + (*Py_TYPE(o)->tp_free)(o); + #else + { + freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); + if (tp_free) tp_free(o); + } + #endif +} + +static int __pyx_tp_traverse_4jcvi_7formats_6cblast_Blast(PyObject *o, visitproc v, void *a) { + int e; + struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; + if (p->filename) { + e = (*v)(p->filename, a); if (e) return e; + } + return 0; +} + +static int __pyx_tp_clear_4jcvi_7formats_6cblast_Blast(PyObject *o) { + PyObject* tmp; + struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; + tmp = ((PyObject*)p->filename); + p->filename = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + return 0; +} + +static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *self, CYTHON_UNUSED PyObject *arg) { + PyObject *res = __pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(self); + if (!res && !PyErr_Occurred()) { PyErr_SetNone(PyExc_StopIteration); } + return res; +} +static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *self, CYTHON_UNUSED PyObject *arg) { + return __pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(self); +} + +static PyMethodDef __pyx_methods_4jcvi_7formats_6cblast_Blast[] = { + {"__next__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__, METH_NOARGS|METH_COEXIST, 0}, + {"__repr__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__, METH_NOARGS|METH_COEXIST, 0}, + {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, + {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, + {0, 0, 0, 0} +}; +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_type_4jcvi_7formats_6cblast_Blast_slots[] = { + {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast}, + {Py_tp_repr, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__}, + {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast_Blast}, + {Py_tp_clear, (void *)__pyx_tp_clear_4jcvi_7formats_6cblast_Blast}, + {Py_tp_iter, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__}, + {Py_tp_iternext, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__}, + {Py_tp_methods, (void *)__pyx_methods_4jcvi_7formats_6cblast_Blast}, + {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast_Blast}, + {0, 0}, +}; +static PyType_Spec __pyx_type_4jcvi_7formats_6cblast_Blast_spec = { + "jcvi.formats.cblast.Blast", + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_Blast), + 0, + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, + __pyx_type_4jcvi_7formats_6cblast_Blast_slots, +}; +#else + +static PyTypeObject __pyx_type_4jcvi_7formats_6cblast_Blast = { + PyVarObject_HEAD_INIT(0, 0) + "jcvi.formats.cblast.""Blast", /*tp_name*/ + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_Blast), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast, /*tp_dealloc*/ + #if PY_VERSION_HEX < 0x030800b4 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030800b4 + 0, /*tp_vectorcall_offset*/ + #endif + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #endif + #if PY_MAJOR_VERSION >= 3 + 0, /*tp_as_async*/ + #endif + __pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /*tp_doc*/ + __pyx_tp_traverse_4jcvi_7formats_6cblast_Blast, /*tp_traverse*/ + __pyx_tp_clear_4jcvi_7formats_6cblast_Blast, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + __pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__, /*tp_iter*/ + __pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__, /*tp_iternext*/ + __pyx_methods_4jcvi_7formats_6cblast_Blast, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + #if !CYTHON_USE_TYPE_SPECS + 0, /*tp_dictoffset*/ + #endif + 0, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_4jcvi_7formats_6cblast_Blast, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0, /*tp_version_tag*/ + #if PY_VERSION_HEX >= 0x030400a1 + #if CYTHON_USE_TP_FINALIZE + 0, /*tp_finalize*/ + #else + NULL, /*tp_finalize*/ + #endif + #endif + #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, /*tp_vectorcall*/ + #endif + #if __PYX_NEED_TP_PRINT_SLOT == 1 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030C0000 + 0, /*tp_watched*/ + #endif + #if PY_VERSION_HEX >= 0x030d00A4 + 0, /*tp_versions_used*/ + #endif + #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, /*tp_pypy_flags*/ + #endif +}; +#endif + +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p; + PyObject *o; + #if CYTHON_COMPILING_IN_LIMITED_API + allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); + o = alloc_func(t, 0); + #else + if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { + o = (*t->tp_alloc)(t, 0); + } else { + o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); + } + if (unlikely(!o)) return 0; + #endif + p = ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o); + p->qseqid = Py_None; Py_INCREF(Py_None); + p->sseqid = Py_None; Py_INCREF(Py_None); + return o; +} + +static void __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine(PyObject *o) { + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; + #if CYTHON_USE_TP_FINALIZE + if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { + if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine) { + if (PyObject_CallFinalizerFromDealloc(o)) return; + } + } + #endif + PyObject_GC_UnTrack(o); + Py_CLEAR(p->qseqid); + Py_CLEAR(p->sseqid); + #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + (*Py_TYPE(o)->tp_free)(o); + #else + { + freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); + if (tp_free) tp_free(o); + } + #endif +} + +static int __pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine(PyObject *o, visitproc v, void *a) { + int e; + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; + if (p->qseqid) { + e = (*v)(p->qseqid, a); if (e) return e; + } + if (p->sseqid) { + e = (*v)(p->sseqid, a); if (e) return e; + } + return 0; +} + +static int __pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine(PyObject *o) { + PyObject* tmp; + struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; + tmp = ((PyObject*)p->qseqid); + p->qseqid = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + tmp = ((PyObject*)p->sseqid); + p->sseqid = Py_None; Py_INCREF(Py_None); + Py_XDECREF(tmp); + return 0; +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_query(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_query(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_subject(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_subject(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_has_score(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(o); +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_swapped(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(o); +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_bedline(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(o); +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__query(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__query(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__subject(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__subject(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_hitlen(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_hitlen(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_ngaps(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_ngaps(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstart(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstart(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstop(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstop(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstart(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstart(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstop(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstop(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_pctid(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_pctid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_score(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_score(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_evalue(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_evalue(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qseqid(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qseqid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(o, v); + } + else { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(o); + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sseqid(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sseqid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(o, v); + } + else { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(o); + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qi(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qi(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_si(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_si(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_orientation(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(o); +} + +static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_orientation(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { + if (v) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(o, v); + } + else { + PyErr_SetString(PyExc_NotImplementedError, "__del__"); + return -1; + } +} + +static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *self, CYTHON_UNUSED PyObject *arg) { + return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(self); +} + +static PyMethodDef __pyx_methods_4jcvi_7formats_6cblast_BlastLine[] = { + {"__repr__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__, METH_NOARGS|METH_COEXIST, 0}, + {"__reduce__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, + {0, 0, 0, 0} +}; + +static struct PyGetSetDef __pyx_getsets_4jcvi_7formats_6cblast_BlastLine[] = { + {(char *)"query", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_query, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_query, (char *)0, 0}, + {(char *)"subject", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_subject, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_subject, (char *)0, 0}, + {(char *)"has_score", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_has_score, 0, (char *)0, 0}, + {(char *)"swapped", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_swapped, 0, (char *)PyDoc_STR("\n Swap query and subject.\n "), 0}, + {(char *)"bedline", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_bedline, 0, (char *)0, 0}, + {(char *)"_query", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__query, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__query, (char *)0, 0}, + {(char *)"_subject", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__subject, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__subject, (char *)0, 0}, + {(char *)"hitlen", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_hitlen, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_hitlen, (char *)0, 0}, + {(char *)"nmismatch", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch, (char *)0, 0}, + {(char *)"ngaps", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_ngaps, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_ngaps, (char *)0, 0}, + {(char *)"qstart", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstart, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstart, (char *)0, 0}, + {(char *)"qstop", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstop, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstop, (char *)0, 0}, + {(char *)"sstart", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstart, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstart, (char *)0, 0}, + {(char *)"sstop", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstop, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstop, (char *)0, 0}, + {(char *)"pctid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_pctid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_pctid, (char *)0, 0}, + {(char *)"score", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_score, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_score, (char *)0, 0}, + {(char *)"evalue", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_evalue, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_evalue, (char *)0, 0}, + {(char *)"qseqid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qseqid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qseqid, (char *)0, 0}, + {(char *)"sseqid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sseqid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sseqid, (char *)0, 0}, + {(char *)"qi", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qi, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qi, (char *)0, 0}, + {(char *)"si", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_si, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_si, (char *)0, 0}, + {(char *)"orientation", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_orientation, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_orientation, (char *)0, 0}, + {0, 0, 0, 0, 0} +}; +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_type_4jcvi_7formats_6cblast_BlastLine_slots[] = { + {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine}, + {Py_tp_repr, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__}, + {Py_tp_hash, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__}, + {Py_tp_str, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__}, + {Py_tp_doc, (void *)PyDoc_STR("\n Given a string of tab-delimited (-m 8) blast output, parse it and create\n an object with the usual attrs:\n\n >>> b = BlastLine(\"Os09g11510\tOs08g13650\t92.31\t39\t3\t0\t2273\t2311\t3237\t3199\t0.001\t54.0\")\n >>> b.query\n 'Os09g11510'\n >>> attrs = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', ... 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score')\n >>> [getattr(b, attr) for attr in attrs] # doctest: +ELLIPSIS\n ['Os09g11510', 'Os08g13650', 92.3..., 39, 3, 0, 2273, 2311, 3237, 3199, 0.001..., 54.0]\n ")}, + {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine}, + {Py_tp_clear, (void *)__pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine}, + {Py_tp_richcompare, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__}, + {Py_tp_methods, (void *)__pyx_methods_4jcvi_7formats_6cblast_BlastLine}, + {Py_tp_getset, (void *)__pyx_getsets_4jcvi_7formats_6cblast_BlastLine}, + {Py_tp_init, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__}, + {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine}, + {0, 0}, +}; +static PyType_Spec __pyx_type_4jcvi_7formats_6cblast_BlastLine_spec = { + "jcvi.formats.cblast.BlastLine", + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine), + 0, + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, + __pyx_type_4jcvi_7formats_6cblast_BlastLine_slots, +}; +#else + +static PyTypeObject __pyx_type_4jcvi_7formats_6cblast_BlastLine = { + PyVarObject_HEAD_INIT(0, 0) + "jcvi.formats.cblast.""BlastLine", /*tp_name*/ + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine, /*tp_dealloc*/ + #if PY_VERSION_HEX < 0x030800b4 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030800b4 + 0, /*tp_vectorcall_offset*/ + #endif + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #endif + #if PY_MAJOR_VERSION >= 3 + 0, /*tp_as_async*/ + #endif + __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__, /*tp_hash*/ + 0, /*tp_call*/ + __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + PyDoc_STR("\n Given a string of tab-delimited (-m 8) blast output, parse it and create\n an object with the usual attrs:\n\n >>> b = BlastLine(\"Os09g11510\tOs08g13650\t92.31\t39\t3\t0\t2273\t2311\t3237\t3199\t0.001\t54.0\")\n >>> b.query\n 'Os09g11510'\n >>> attrs = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', ... 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score')\n >>> [getattr(b, attr) for attr in attrs] # doctest: +ELLIPSIS\n ['Os09g11510', 'Os08g13650', 92.3..., 39, 3, 0, 2273, 2311, 3237, 3199, 0.001..., 54.0]\n "), /*tp_doc*/ + __pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine, /*tp_traverse*/ + __pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine, /*tp_clear*/ + __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + __pyx_methods_4jcvi_7formats_6cblast_BlastLine, /*tp_methods*/ + 0, /*tp_members*/ + __pyx_getsets_4jcvi_7formats_6cblast_BlastLine, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + #if !CYTHON_USE_TYPE_SPECS + 0, /*tp_dictoffset*/ + #endif + __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_4jcvi_7formats_6cblast_BlastLine, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0, /*tp_version_tag*/ + #if PY_VERSION_HEX >= 0x030400a1 + #if CYTHON_USE_TP_FINALIZE + 0, /*tp_finalize*/ + #else + NULL, /*tp_finalize*/ + #endif + #endif + #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, /*tp_vectorcall*/ + #endif + #if __PYX_NEED_TP_PRINT_SLOT == 1 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030C0000 + 0, /*tp_watched*/ + #endif + #if PY_VERSION_HEX >= 0x030d00A4 + 0, /*tp_versions_used*/ + #endif + #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, /*tp_pypy_flags*/ + #endif +}; +#endif + +#if CYTHON_USE_FREELISTS +static struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[8]; +static int __pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = 0; +#endif + +static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + PyObject *o; + #if CYTHON_COMPILING_IN_LIMITED_API + allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); + o = alloc_func(t, 0); + #else + #if CYTHON_USE_FREELISTS + if (likely((int)(__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)))) { + o = (PyObject*)__pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[--__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr]; + memset(o, 0, sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)); + (void) PyObject_INIT(o, t); + PyObject_GC_Track(o); + } else + #endif + { + o = (*t->tp_alloc)(t, 0); + if (unlikely(!o)) return 0; + } + #endif + return o; +} + +static void __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyObject *o) { + struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *p = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o; + #if CYTHON_USE_TP_FINALIZE + if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { + if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) { + if (PyObject_CallFinalizerFromDealloc(o)) return; + } + } + #endif + PyObject_GC_UnTrack(o); + Py_CLEAR(p->__pyx_genexpr_arg_0); + Py_CLEAR(p->__pyx_v_x); + Py_CLEAR(p->__pyx_t_0); + #if CYTHON_USE_FREELISTS + if (((int)(__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)))) { + __pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr++] = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o); + } else + #endif + { + #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + (*Py_TYPE(o)->tp_free)(o); + #else + { + freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); + if (tp_free) tp_free(o); + } + #endif + } +} + +static int __pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyObject *o, visitproc v, void *a) { + int e; + struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *p = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o; + if (p->__pyx_genexpr_arg_0) { + e = (*v)(p->__pyx_genexpr_arg_0, a); if (e) return e; + } + if (p->__pyx_v_x) { + e = (*v)(p->__pyx_v_x, a); if (e) return e; + } + if (p->__pyx_t_0) { + e = (*v)(p->__pyx_t_0, a); if (e) return e; + } + return 0; +} +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_slots[] = { + {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, + {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, + {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, + {0, 0}, +}; +static PyType_Spec __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec = { + "jcvi.formats.cblast.__pyx_scope_struct__genexpr", + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr), + 0, + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, + __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_slots, +}; +#else + +static PyTypeObject __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = { + PyVarObject_HEAD_INIT(0, 0) + "jcvi.formats.cblast.""__pyx_scope_struct__genexpr", /*tp_name*/ + sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_dealloc*/ + #if PY_VERSION_HEX < 0x030800b4 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030800b4 + 0, /*tp_vectorcall_offset*/ + #endif + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #endif + #if PY_MAJOR_VERSION >= 3 + 0, /*tp_as_async*/ + #endif + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ + 0, /*tp_doc*/ + __pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + 0, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + #if !CYTHON_USE_TYPE_SPECS + 0, /*tp_dictoffset*/ + #endif + 0, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0, /*tp_version_tag*/ + #if PY_VERSION_HEX >= 0x030400a1 + #if CYTHON_USE_TP_FINALIZE + 0, /*tp_finalize*/ + #else + NULL, /*tp_finalize*/ + #endif + #endif + #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, /*tp_vectorcall*/ + #endif + #if __PYX_NEED_TP_PRINT_SLOT == 1 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030C0000 + 0, /*tp_watched*/ + #endif + #if PY_VERSION_HEX >= 0x030d00A4 + 0, /*tp_versions_used*/ + #endif + #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, /*tp_pypy_flags*/ + #endif +}; +#endif + +#if CYTHON_USE_FREELISTS +static struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[8]; +static int __pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = 0; +#endif + +static PyObject *__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { + PyObject *o; + #if CYTHON_COMPILING_IN_LIMITED_API + allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); + o = alloc_func(t, 0); + #else + #if CYTHON_USE_FREELISTS + if (likely((int)(__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)))) { + o = (PyObject*)__pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[--__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc]; + memset(o, 0, sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)); + (void) PyObject_INIT(o, t); + } else + #endif + { + o = (*t->tp_alloc)(t, 0); + if (unlikely(!o)) return 0; + } + #endif + return o; +} + +static void __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyObject *o) { + #if CYTHON_USE_TP_FINALIZE + if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && (!PyType_IS_GC(Py_TYPE(o)) || !__Pyx_PyObject_GC_IsFinalized(o))) { + if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) { + if (PyObject_CallFinalizerFromDealloc(o)) return; + } + } + #endif + #if CYTHON_USE_FREELISTS + if (((int)(__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)))) { + __pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc++] = ((struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)o); + } else + #endif + { + #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + (*Py_TYPE(o)->tp_free)(o); + #else + { + freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); + if (tp_free) tp_free(o); + } + #endif + } +} +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_slots[] = { + {Py_tp_dealloc, (void *)__pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc}, + {Py_tp_new, (void *)__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc}, + {0, 0}, +}; +static PyType_Spec __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec = { + "jcvi.formats.cblast.__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", + sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc), + 0, + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_FINALIZE, + __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_slots, +}; +#else + +static PyTypeObject __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = { + PyVarObject_HEAD_INIT(0, 0) + "jcvi.formats.cblast.""__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", /*tp_name*/ + sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, /*tp_dealloc*/ + #if PY_VERSION_HEX < 0x030800b4 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030800b4 + 0, /*tp_vectorcall_offset*/ + #endif + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + #if PY_MAJOR_VERSION < 3 + 0, /*tp_compare*/ + #endif + #if PY_MAJOR_VERSION >= 3 + 0, /*tp_as_async*/ + #endif + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ + 0, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + 0, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + #if !CYTHON_USE_TYPE_SPECS + 0, /*tp_dictoffset*/ + #endif + 0, /*tp_init*/ + 0, /*tp_alloc*/ + __pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ + 0, /*tp_bases*/ + 0, /*tp_mro*/ + 0, /*tp_cache*/ + 0, /*tp_subclasses*/ + 0, /*tp_weaklist*/ + 0, /*tp_del*/ + 0, /*tp_version_tag*/ + #if PY_VERSION_HEX >= 0x030400a1 + #if CYTHON_USE_TP_FINALIZE + 0, /*tp_finalize*/ + #else + NULL, /*tp_finalize*/ + #endif + #endif + #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, /*tp_vectorcall*/ + #endif + #if __PYX_NEED_TP_PRINT_SLOT == 1 + 0, /*tp_print*/ + #endif + #if PY_VERSION_HEX >= 0x030C0000 + 0, /*tp_watched*/ + #endif + #if PY_VERSION_HEX >= 0x030d00A4 + 0, /*tp_versions_used*/ + #endif + #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, /*tp_pypy_flags*/ + #endif +}; +#endif + +static PyMethodDef __pyx_methods[] = { + {0, 0, 0, 0} +}; +#ifndef CYTHON_SMALL_CODE +#if defined(__clang__) + #define CYTHON_SMALL_CODE +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define CYTHON_SMALL_CODE __attribute__((cold)) +#else + #define CYTHON_SMALL_CODE +#endif +#endif +/* #### Code section: pystring_table ### */ + +static int __Pyx_CreateStringTabAndInitStrings(void) { + __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_n_s_Blast, __pyx_k_Blast, sizeof(__pyx_k_Blast), 0, 0, 1, 1}, + {&__pyx_n_s_BlastLine, __pyx_k_BlastLine, sizeof(__pyx_k_BlastLine), 0, 0, 1, 1}, + {&__pyx_n_s_BlastLine___get___locals_genexpr, __pyx_k_BlastLine___get___locals_genexpr, sizeof(__pyx_k_BlastLine___get___locals_genexpr), 0, 0, 1, 1}, + {&__pyx_n_s_BlastLine___reduce, __pyx_k_BlastLine___reduce, sizeof(__pyx_k_BlastLine___reduce), 0, 0, 1, 1}, + {&__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1, __pyx_k_BlastLine_s_to_s_eval_3f_score_1, sizeof(__pyx_k_BlastLine_s_to_s_eval_3f_score_1), 0, 0, 1, 0}, + {&__pyx_n_s_Blast___reduce_cython, __pyx_k_Blast___reduce_cython, sizeof(__pyx_k_Blast___reduce_cython), 0, 0, 1, 1}, + {&__pyx_n_s_Blast___setstate_cython, __pyx_k_Blast___setstate_cython, sizeof(__pyx_k_Blast___setstate_cython), 0, 0, 1, 1}, + {&__pyx_kp_s_Blast_s, __pyx_k_Blast_s, sizeof(__pyx_k_Blast_s), 0, 0, 1, 0}, + {&__pyx_n_s_IndexError, __pyx_k_IndexError, sizeof(__pyx_k_IndexError), 0, 0, 1, 1}, + {&__pyx_n_s_OverflowError, __pyx_k_OverflowError, sizeof(__pyx_k_OverflowError), 0, 0, 1, 1}, + {&__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma, __pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma, sizeof(__pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma), 0, 0, 1, 1}, + {&__pyx_n_s_StopIteration, __pyx_k_StopIteration, sizeof(__pyx_k_StopIteration), 0, 0, 1, 1}, + {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1}, + {&__pyx_kp_s_UTF_8, __pyx_k_UTF_8, sizeof(__pyx_k_UTF_8), 0, 0, 1, 0}, + {&__pyx_n_s__13, __pyx_k__13, sizeof(__pyx_k__13), 0, 0, 1, 1}, + {&__pyx_kp_s__5, __pyx_k__5, sizeof(__pyx_k__5), 0, 0, 1, 0}, + {&__pyx_n_s__6, __pyx_k__6, sizeof(__pyx_k__6), 0, 0, 1, 1}, + {&__pyx_n_s_args, __pyx_k_args, sizeof(__pyx_k_args), 0, 0, 1, 1}, + {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1}, + {&__pyx_n_s_cfunc_to_py, __pyx_k_cfunc_to_py, sizeof(__pyx_k_cfunc_to_py), 0, 0, 1, 1}, + {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, + {&__pyx_n_s_close, __pyx_k_close, sizeof(__pyx_k_close), 0, 0, 1, 1}, + {&__pyx_kp_u_disable, __pyx_k_disable, sizeof(__pyx_k_disable), 0, 1, 0, 0}, + {&__pyx_kp_u_enable, __pyx_k_enable, sizeof(__pyx_k_enable), 0, 1, 0, 0}, + {&__pyx_n_s_encode, __pyx_k_encode, sizeof(__pyx_k_encode), 0, 0, 1, 1}, + {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, + {&__pyx_n_s_evalue, __pyx_k_evalue, sizeof(__pyx_k_evalue), 0, 0, 1, 1}, + {&__pyx_n_s_filename, __pyx_k_filename, sizeof(__pyx_k_filename), 0, 0, 1, 1}, + {&__pyx_kp_u_gc, __pyx_k_gc, sizeof(__pyx_k_gc), 0, 1, 0, 0}, + {&__pyx_n_s_genexpr, __pyx_k_genexpr, sizeof(__pyx_k_genexpr), 0, 0, 1, 1}, + {&__pyx_n_s_getstate, __pyx_k_getstate, sizeof(__pyx_k_getstate), 0, 0, 1, 1}, + {&__pyx_n_s_hitlen, __pyx_k_hitlen, sizeof(__pyx_k_hitlen), 0, 0, 1, 1}, + {&__pyx_n_s_id, __pyx_k_id, sizeof(__pyx_k_id), 0, 0, 1, 1}, + {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, + {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1}, + {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1}, + {&__pyx_kp_u_isenabled, __pyx_k_isenabled, sizeof(__pyx_k_isenabled), 0, 1, 0, 0}, + {&__pyx_n_s_jcvi_formats_cblast, __pyx_k_jcvi_formats_cblast, sizeof(__pyx_k_jcvi_formats_cblast), 0, 0, 1, 1}, + {&__pyx_n_s_join, __pyx_k_join, sizeof(__pyx_k_join), 0, 0, 1, 1}, + {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, + {&__pyx_n_s_ngaps, __pyx_k_ngaps, sizeof(__pyx_k_ngaps), 0, 0, 1, 1}, + {&__pyx_n_s_nmismatch, __pyx_k_nmismatch, sizeof(__pyx_k_nmismatch), 0, 0, 1, 1}, + {&__pyx_kp_s_no_default___reduce___due_to_non, __pyx_k_no_default___reduce___due_to_non, sizeof(__pyx_k_no_default___reduce___due_to_non), 0, 0, 1, 0}, + {&__pyx_n_s_orientation, __pyx_k_orientation, sizeof(__pyx_k_orientation), 0, 0, 1, 1}, + {&__pyx_n_s_pctid, __pyx_k_pctid, sizeof(__pyx_k_pctid), 0, 0, 1, 1}, + {&__pyx_n_s_pyx_state, __pyx_k_pyx_state, sizeof(__pyx_k_pyx_state), 0, 0, 1, 1}, + {&__pyx_n_s_qi, __pyx_k_qi, sizeof(__pyx_k_qi), 0, 0, 1, 1}, + {&__pyx_n_s_qseqid, __pyx_k_qseqid, sizeof(__pyx_k_qseqid), 0, 0, 1, 1}, + {&__pyx_n_s_qstart, __pyx_k_qstart, sizeof(__pyx_k_qstart), 0, 0, 1, 1}, + {&__pyx_n_s_qstop, __pyx_k_qstop, sizeof(__pyx_k_qstop), 0, 0, 1, 1}, + {&__pyx_n_s_query, __pyx_k_query, sizeof(__pyx_k_query), 0, 0, 1, 1}, + {&__pyx_n_s_reduce, __pyx_k_reduce, sizeof(__pyx_k_reduce), 0, 0, 1, 1}, + {&__pyx_n_s_reduce_cython, __pyx_k_reduce_cython, sizeof(__pyx_k_reduce_cython), 0, 0, 1, 1}, + {&__pyx_n_s_reduce_ex, __pyx_k_reduce_ex, sizeof(__pyx_k_reduce_ex), 0, 0, 1, 1}, + {&__pyx_n_s_richcmp, __pyx_k_richcmp, sizeof(__pyx_k_richcmp), 0, 0, 1, 1}, + {&__pyx_n_s_s, __pyx_k_s, sizeof(__pyx_k_s), 0, 0, 1, 1}, + {&__pyx_n_s_score, __pyx_k_score, sizeof(__pyx_k_score), 0, 0, 1, 1}, + {&__pyx_n_s_self, __pyx_k_self, sizeof(__pyx_k_self), 0, 0, 1, 1}, + {&__pyx_n_s_send, __pyx_k_send, sizeof(__pyx_k_send), 0, 0, 1, 1}, + {&__pyx_n_s_setstate, __pyx_k_setstate, sizeof(__pyx_k_setstate), 0, 0, 1, 1}, + {&__pyx_n_s_setstate_cython, __pyx_k_setstate_cython, sizeof(__pyx_k_setstate_cython), 0, 0, 1, 1}, + {&__pyx_n_s_si, __pyx_k_si, sizeof(__pyx_k_si), 0, 0, 1, 1}, + {&__pyx_n_s_slots, __pyx_k_slots, sizeof(__pyx_k_slots), 0, 0, 1, 1}, + {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1}, + {&__pyx_kp_s_src_jcvi_formats_cblast_pyx, __pyx_k_src_jcvi_formats_cblast_pyx, sizeof(__pyx_k_src_jcvi_formats_cblast_pyx), 0, 0, 1, 0}, + {&__pyx_n_s_sseqid, __pyx_k_sseqid, sizeof(__pyx_k_sseqid), 0, 0, 1, 1}, + {&__pyx_n_s_sstart, __pyx_k_sstart, sizeof(__pyx_k_sstart), 0, 0, 1, 1}, + {&__pyx_n_s_sstop, __pyx_k_sstop, sizeof(__pyx_k_sstop), 0, 0, 1, 1}, + {&__pyx_kp_s_stringsource, __pyx_k_stringsource, sizeof(__pyx_k_stringsource), 0, 0, 1, 0}, + {&__pyx_n_s_subject, __pyx_k_subject, sizeof(__pyx_k_subject), 0, 0, 1, 1}, + {&__pyx_n_s_sys, __pyx_k_sys, sizeof(__pyx_k_sys), 0, 0, 1, 1}, + {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_kp_s_that_comparison_not_implemented, __pyx_k_that_comparison_not_implemented, sizeof(__pyx_k_that_comparison_not_implemented), 0, 0, 1, 0}, + {&__pyx_n_s_throw, __pyx_k_throw, sizeof(__pyx_k_throw), 0, 0, 1, 1}, + {&__pyx_n_s_wrap, __pyx_k_wrap, sizeof(__pyx_k_wrap), 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0} + }; + return __Pyx_InitStrings(__pyx_string_tab); +} +/* #### Code section: cached_builtins ### */ +static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { + __pyx_builtin_StopIteration = __Pyx_GetBuiltinName(__pyx_n_s_StopIteration); if (!__pyx_builtin_StopIteration) __PYX_ERR(0, 47, __pyx_L1_error) + __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) __PYX_ERR(1, 2, __pyx_L1_error) + __pyx_builtin_id = __Pyx_GetBuiltinName(__pyx_n_s_id); if (!__pyx_builtin_id) __PYX_ERR(0, 138, __pyx_L1_error) + __pyx_builtin_OverflowError = __Pyx_GetBuiltinName(__pyx_n_s_OverflowError); if (!__pyx_builtin_OverflowError) __PYX_ERR(1, 83, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(1, 86, __pyx_L1_error) + __pyx_builtin_IndexError = __Pyx_GetBuiltinName(__pyx_n_s_IndexError); if (!__pyx_builtin_IndexError) __PYX_ERR(1, 96, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: cached_constants ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); + + /* "cfunc.to_py":67 + * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") + * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): + * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< + * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" + * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) + */ + __pyx_tuple_ = PyTuple_Pack(12, __pyx_n_s_query, __pyx_n_s_subject, __pyx_n_s_pctid, __pyx_n_s_hitlen, __pyx_n_s_nmismatch, __pyx_n_s_ngaps, __pyx_n_s_qstart, __pyx_n_s_qstop, __pyx_n_s_sstart, __pyx_n_s_sstop, __pyx_n_s_evalue, __pyx_n_s_score); if (unlikely(!__pyx_tuple_)) __PYX_ERR(1, 67, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple_); + __Pyx_GIVEREF(__pyx_tuple_); + __pyx_codeobj__2 = (PyObject*)__Pyx_PyCode_New(12, 0, 0, 12, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple_, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_wrap, 67, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__2)) __PYX_ERR(1, 67, __pyx_L1_error) + + /* "jcvi/formats/cblast.pyx":135 + * return not self.__richcmp__(other, 2) + * else: + * raise Exception("that comparison not implemented") # <<<<<<<<<<<<<< + * + * def __hash__(self): + */ + __pyx_tuple__3 = PyTuple_Pack(1, __pyx_kp_s_that_comparison_not_implemented); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 135, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__3); + __Pyx_GIVEREF(__pyx_tuple__3); + + /* "jcvi/formats/cblast.pyx":145 + * + * def __str__(self): + * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< + * if self.orientation == '-': + * args[8], args[9] = args[9], args[8] + */ + __pyx_slice__4 = PySlice_New(Py_None, __pyx_int_12, Py_None); if (unlikely(!__pyx_slice__4)) __PYX_ERR(0, 145, __pyx_L1_error) + __Pyx_GOTREF(__pyx_slice__4); + __Pyx_GIVEREF(__pyx_slice__4); + + /* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + __pyx_tuple__7 = PyTuple_Pack(1, __pyx_n_s_self); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(1, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__7); + __Pyx_GIVEREF(__pyx_tuple__7); + __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_reduce_cython, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(1, 1, __pyx_L1_error) + + /* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + __pyx_tuple__9 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_pyx_state); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(1, 3, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__9); + __Pyx_GIVEREF(__pyx_tuple__9); + __pyx_codeobj__10 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__9, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_setstate_cython, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__10)) __PYX_ERR(1, 3, __pyx_L1_error) + + /* "jcvi/formats/cblast.pyx":80 + * """ + * + * __slots__ = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ # <<<<<<<<<<<<<< + * 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score', \ + * 'qseqid', 'sseqid', 'qi', 'si', 'orientation') + */ + __pyx_tuple__11 = PyTuple_Pack(17, __pyx_n_s_query, __pyx_n_s_subject, __pyx_n_s_pctid, __pyx_n_s_hitlen, __pyx_n_s_nmismatch, __pyx_n_s_ngaps, __pyx_n_s_qstart, __pyx_n_s_qstop, __pyx_n_s_sstart, __pyx_n_s_sstop, __pyx_n_s_evalue, __pyx_n_s_score, __pyx_n_s_qseqid, __pyx_n_s_sseqid, __pyx_n_s_qi, __pyx_n_s_si, __pyx_n_s_orientation); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(0, 80, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__11); + __Pyx_GIVEREF(__pyx_tuple__11); + + /* "jcvi/formats/cblast.pyx":185 + * return py_str(result) + * + * def __reduce__(self): # <<<<<<<<<<<<<< + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + */ + __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_jcvi_formats_cblast_pyx, __pyx_n_s_reduce, 185, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) __PYX_ERR(0, 185, __pyx_L1_error) + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} +/* #### Code section: init_constants ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) { + __pyx_umethod_PyString_Type_encode.type = (PyObject*)&PyString_Type; + __pyx_umethod_PyString_Type_encode.method_name = &__pyx_n_s_encode; + if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error); + __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_int_12 = PyInt_FromLong(12); if (unlikely(!__pyx_int_12)) __PYX_ERR(0, 1, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} +/* #### Code section: init_globals ### */ + +static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) { + return 0; +} +/* #### Code section: init_module ### */ + +static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/ +static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/ + +static int __Pyx_modinit_global_init_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0); + /*--- Global init code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_variable_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0); + /*--- Variable export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_export_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0); + /*--- Function export code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_type_init_code(void) { + __Pyx_RefNannyDeclarations + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); + /*--- Type init code ---*/ + #if CYTHON_USE_TYPE_SPECS + __pyx_ptype_4jcvi_7formats_6cblast_Blast = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast_Blast_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast_Blast)) __PYX_ERR(0, 21, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast_Blast_spec, __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) + #else + __pyx_ptype_4jcvi_7formats_6cblast_Blast = &__pyx_type_4jcvi_7formats_6cblast_Blast; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + #endif + #if !CYTHON_USE_TYPE_SPECS + if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) + #endif + #if PY_MAJOR_VERSION < 3 + __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_print = 0; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_getattro == PyObject_GenericGetAttr)) { + __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_getattro = __Pyx_PyObject_GenericGetAttr; + } + #endif + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_Blast, (PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) + #if !CYTHON_COMPILING_IN_LIMITED_API + if (__Pyx_setup_reduce((PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) + #endif + #if CYTHON_USE_TYPE_SPECS + __pyx_ptype_4jcvi_7formats_6cblast_BlastLine = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast_BlastLine_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast_BlastLine)) __PYX_ERR(0, 66, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast_BlastLine_spec, __pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) + #else + __pyx_ptype_4jcvi_7formats_6cblast_BlastLine = &__pyx_type_4jcvi_7formats_6cblast_BlastLine; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + #endif + #if !CYTHON_USE_TYPE_SPECS + if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) + #endif + #if PY_MAJOR_VERSION < 3 + __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_print = 0; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_getattro == PyObject_GenericGetAttr)) { + __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_getattro = __Pyx_PyObject_GenericGetAttr; + } + #endif + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_BlastLine, (PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) + #if CYTHON_USE_TYPE_SPECS + __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)) __PYX_ERR(0, 172, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec, __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) < 0) __PYX_ERR(0, 172, __pyx_L1_error) + #else + __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = &__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + #endif + #if !CYTHON_USE_TYPE_SPECS + if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) < 0) __PYX_ERR(0, 172, __pyx_L1_error) + #endif + #if PY_MAJOR_VERSION < 3 + __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_print = 0; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_getattro == PyObject_GenericGetAttr)) { + __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; + } + #endif + #if CYTHON_USE_TYPE_SPECS + __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec, NULL); if (unlikely(!__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)) __PYX_ERR(1, 66, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec, __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) < 0) __PYX_ERR(1, 66, __pyx_L1_error) + #else + __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = &__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + #endif + #if !CYTHON_USE_TYPE_SPECS + if (__Pyx_PyType_Ready(__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) < 0) __PYX_ERR(1, 66, __pyx_L1_error) + #endif + #if PY_MAJOR_VERSION < 3 + __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_print = 0; + #endif + #if !CYTHON_COMPILING_IN_LIMITED_API + if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_dictoffset && __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_getattro == PyObject_GenericGetAttr)) { + __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; + } + #endif + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} + +static int __Pyx_modinit_type_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0); + /*--- Type import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_variable_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0); + /*--- Variable import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + +static int __Pyx_modinit_function_import_code(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0); + /*--- Function import code ---*/ + __Pyx_RefNannyFinishContext(); + return 0; +} + + +#if PY_MAJOR_VERSION >= 3 +#if CYTHON_PEP489_MULTI_PHASE_INIT +static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ +static int __pyx_pymod_exec_cblast(PyObject* module); /*proto*/ +static PyModuleDef_Slot __pyx_moduledef_slots[] = { + {Py_mod_create, (void*)__pyx_pymod_create}, + {Py_mod_exec, (void*)__pyx_pymod_exec_cblast}, + {0, NULL} +}; +#endif + +#ifdef __cplusplus +namespace { + struct PyModuleDef __pyx_moduledef = + #else + static struct PyModuleDef __pyx_moduledef = + #endif + { + PyModuleDef_HEAD_INIT, + "cblast", + __pyx_k_Cythonized_fast_version_of_Blas, /* m_doc */ + #if CYTHON_PEP489_MULTI_PHASE_INIT + 0, /* m_size */ + #elif CYTHON_USE_MODULE_STATE + sizeof(__pyx_mstate), /* m_size */ + #else + -1, /* m_size */ + #endif + __pyx_methods /* m_methods */, + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_moduledef_slots, /* m_slots */ + #else + NULL, /* m_reload */ + #endif + #if CYTHON_USE_MODULE_STATE + __pyx_m_traverse, /* m_traverse */ + __pyx_m_clear, /* m_clear */ + NULL /* m_free */ + #else + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ + #endif + }; + #ifdef __cplusplus +} /* anonymous namespace */ +#endif +#endif + +#ifndef CYTHON_NO_PYINIT_EXPORT +#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC +#elif PY_MAJOR_VERSION < 3 +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" void +#else +#define __Pyx_PyMODINIT_FUNC void +#endif +#else +#ifdef __cplusplus +#define __Pyx_PyMODINIT_FUNC extern "C" PyObject * +#else +#define __Pyx_PyMODINIT_FUNC PyObject * +#endif +#endif + + +#if PY_MAJOR_VERSION < 3 +__Pyx_PyMODINIT_FUNC initcblast(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC initcblast(void) +#else +__Pyx_PyMODINIT_FUNC PyInit_cblast(void) CYTHON_SMALL_CODE; /*proto*/ +__Pyx_PyMODINIT_FUNC PyInit_cblast(void) +#if CYTHON_PEP489_MULTI_PHASE_INIT +{ + return PyModuleDef_Init(&__pyx_moduledef); +} +static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) { + #if PY_VERSION_HEX >= 0x030700A1 + static PY_INT64_T main_interpreter_id = -1; + PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp); + if (main_interpreter_id == -1) { + main_interpreter_id = current_id; + return (unlikely(current_id == -1)) ? -1 : 0; + } else if (unlikely(main_interpreter_id != current_id)) + #else + static PyInterpreterState *main_interpreter = NULL; + PyInterpreterState *current_interpreter = PyThreadState_Get()->interp; + if (!main_interpreter) { + main_interpreter = current_interpreter; + } else if (unlikely(main_interpreter != current_interpreter)) + #endif + { + PyErr_SetString( + PyExc_ImportError, + "Interpreter change detected - this module can only be loaded into one interpreter per process."); + return -1; + } + return 0; +} +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none) +#else +static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) +#endif +{ + PyObject *value = PyObject_GetAttrString(spec, from_name); + int result = 0; + if (likely(value)) { + if (allow_none || value != Py_None) { +#if CYTHON_COMPILING_IN_LIMITED_API + result = PyModule_AddObject(module, to_name, value); +#else + result = PyDict_SetItemString(moddict, to_name, value); +#endif + } + Py_DECREF(value); + } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } else { + result = -1; + } + return result; +} +static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) { + PyObject *module = NULL, *moddict, *modname; + CYTHON_UNUSED_VAR(def); + if (__Pyx_check_single_interpreter()) + return NULL; + if (__pyx_m) + return __Pyx_NewRef(__pyx_m); + modname = PyObject_GetAttrString(spec, "name"); + if (unlikely(!modname)) goto bad; + module = PyModule_NewObject(modname); + Py_DECREF(modname); + if (unlikely(!module)) goto bad; +#if CYTHON_COMPILING_IN_LIMITED_API + moddict = module; +#else + moddict = PyModule_GetDict(module); + if (unlikely(!moddict)) goto bad; +#endif + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad; + return module; +bad: + Py_XDECREF(module); + return NULL; +} + + +static CYTHON_SMALL_CODE int __pyx_pymod_exec_cblast(PyObject *__pyx_pyinit_module) +#endif +#endif +{ + int stringtab_initialized = 0; + #if CYTHON_USE_MODULE_STATE + int pystate_addmodule_run = 0; + #endif + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannyDeclarations + #if CYTHON_PEP489_MULTI_PHASE_INIT + if (__pyx_m) { + if (__pyx_m == __pyx_pyinit_module) return 0; + PyErr_SetString(PyExc_RuntimeError, "Module 'cblast' has already been imported. Re-initialisation is not supported."); + return -1; + } + #elif PY_MAJOR_VERSION >= 3 + if (__pyx_m) return __Pyx_NewRef(__pyx_m); + #endif + /*--- Module creation code ---*/ + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_m = __pyx_pyinit_module; + Py_INCREF(__pyx_m); + #else + #if PY_MAJOR_VERSION < 3 + __pyx_m = Py_InitModule4("cblast", __pyx_methods, __pyx_k_Cythonized_fast_version_of_Blas, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #elif CYTHON_USE_MODULE_STATE + __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) + { + int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef); + __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "cblast" pseudovariable */ + if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + pystate_addmodule_run = 1; + } + #else + __pyx_m = PyModule_Create(&__pyx_moduledef); + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #endif + CYTHON_UNUSED_VAR(__pyx_t_1); + __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) + Py_INCREF(__pyx_d); + __pyx_b = __Pyx_PyImport_AddModuleRef(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_cython_runtime = __Pyx_PyImport_AddModuleRef((const char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if CYTHON_REFNANNY +__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); +if (!__Pyx_RefNanny) { + PyErr_Clear(); + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); + if (!__Pyx_RefNanny) + Py_FatalError("failed to import 'refnanny' module"); +} +#endif + __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_cblast(void)", 0); + if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pxy_PyFrame_Initialize_Offsets + __Pxy_PyFrame_Initialize_Offsets(); + #endif + __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pyx_CyFunction_USED + if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_FusedFunction_USED + if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Coroutine_USED + if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Generator_USED + if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_StopAsyncIteration_USED + if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + /*--- Library function declarations ---*/ + /*--- Threads initialization code ---*/ + #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS + PyEval_InitThreads(); + #endif + /*--- Initialize various global constants etc. ---*/ + if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + stringtab_initialized = 1; + if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + if (__pyx_module_is_main_jcvi__formats__cblast) { + if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + } + #if PY_MAJOR_VERSION >= 3 + { + PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) + if (!PyDict_GetItemString(modules, "jcvi.formats.cblast")) { + if (unlikely((PyDict_SetItemString(modules, "jcvi.formats.cblast", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + } + } + #endif + /*--- Builtin init code ---*/ + if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Constants init code ---*/ + if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Global type/function init code ---*/ + (void)__Pyx_modinit_global_init_code(); + (void)__Pyx_modinit_variable_export_code(); + (void)__Pyx_modinit_function_export_code(); + if (unlikely((__Pyx_modinit_type_init_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error) + (void)__Pyx_modinit_type_import_code(); + (void)__Pyx_modinit_variable_import_code(); + (void)__Pyx_modinit_function_import_code(); + /*--- Execution code ---*/ + #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + + /* "jcvi/formats/cblast.pyx":9 + * + * """ + * import sys # <<<<<<<<<<<<<< + * from libc.stdio cimport FILE, EOF, fopen, fscanf, rewind, fclose, sscanf, \ + * fgets, sprintf + */ + __pyx_t_2 = __Pyx_ImportDottedModuleRelFirst(__pyx_n_s_sys, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_sys, __pyx_t_2) < 0) __PYX_ERR(0, 9, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/formats/cblast.pyx":15 + * + * + * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" # <<<<<<<<<<<<<< + * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" + * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" + */ + __pyx_v_4jcvi_7formats_6cblast_blast_format = ((char const *)"%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f"); + + /* "jcvi/formats/cblast.pyx":16 + * + * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" + * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" # <<<<<<<<<<<<<< + * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" + * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" + */ + __pyx_v_4jcvi_7formats_6cblast_blast_format_line = ((char const *)"%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n"); + + /* "jcvi/formats/cblast.pyx":17 + * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" + * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" + * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" # <<<<<<<<<<<<<< + * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" + * + */ + __pyx_v_4jcvi_7formats_6cblast_blast_output = ((char const *)"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g"); + + /* "jcvi/formats/cblast.pyx":18 + * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" + * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" + * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" # <<<<<<<<<<<<<< + * + * + */ + __pyx_v_4jcvi_7formats_6cblast_bed_output = ((char const *)"%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c"); + + /* "(tree fragment)":1 + * def __reduce_cython__(self): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): + */ + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Blast___reduce_cython, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__8)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_reduce_cython, __pyx_t_2) < 0) __PYX_ERR(1, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "(tree fragment)":3 + * def __reduce_cython__(self): + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< + * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" + */ + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Blast___setstate_cython, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__10)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 3, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_setstate_cython, __pyx_t_2) < 0) __PYX_ERR(1, 3, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "jcvi/formats/cblast.pyx":80 + * """ + * + * __slots__ = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ # <<<<<<<<<<<<<< + * 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score', \ + * 'qseqid', 'sseqid', 'qi', 'si', 'orientation') + */ + if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine, __pyx_n_s_slots, __pyx_tuple__11) < 0) __PYX_ERR(0, 80, __pyx_L1_error) + PyType_Modified(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); + + /* "jcvi/formats/cblast.pyx":185 + * return py_str(result) + * + * def __reduce__(self): # <<<<<<<<<<<<<< + * return create_blast_line, ( + * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + */ + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_BlastLine___reduce, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__12)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 185, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine, __pyx_n_s_reduce, __pyx_t_2) < 0) __PYX_ERR(0, 185, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + PyType_Modified(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); + + /* "jcvi/formats/cblast.pyx":1 + * # cython: language_level=2, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True # <<<<<<<<<<<<<< + * + * """ + */ + __pyx_t_2 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /*--- Wrapped vars code ---*/ + + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + if (__pyx_m) { + if (__pyx_d && stringtab_initialized) { + __Pyx_AddTraceback("init jcvi.formats.cblast", __pyx_clineno, __pyx_lineno, __pyx_filename); + } + #if !CYTHON_USE_MODULE_STATE + Py_CLEAR(__pyx_m); + #else + Py_DECREF(__pyx_m); + if (pystate_addmodule_run) { + PyObject *tp, *value, *tb; + PyErr_Fetch(&tp, &value, &tb); + PyState_RemoveModule(&__pyx_moduledef); + PyErr_Restore(tp, value, tb); + } + #endif + } else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ImportError, "init jcvi.formats.cblast"); + } + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + #if CYTHON_PEP489_MULTI_PHASE_INIT + return (__pyx_m != NULL) ? 0 : -1; + #elif PY_MAJOR_VERSION >= 3 + return __pyx_m; + #else + return; + #endif +} +/* #### Code section: cleanup_globals ### */ +/* #### Code section: cleanup_module ### */ +/* #### Code section: main_method ### */ +/* #### Code section: utility_code_pragmas ### */ +#ifdef _MSC_VER +#pragma warning( push ) +/* Warning 4127: conditional expression is constant + * Cython uses constant conditional expressions to allow in inline functions to be optimized at + * compile-time, so this warning is not useful + */ +#pragma warning( disable : 4127 ) +#endif + + + +/* #### Code section: utility_code_def ### */ + +/* --- Runtime support code --- */ +/* Refnanny */ +#if CYTHON_REFNANNY +static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { + PyObject *m = NULL, *p = NULL; + void *r = NULL; + m = PyImport_ImportModule(modname); + if (!m) goto end; + p = PyObject_GetAttrString(m, "RefNannyAPI"); + if (!p) goto end; + r = PyLong_AsVoidPtr(p); +end: + Py_XDECREF(p); + Py_XDECREF(m); + return (__Pyx_RefNannyAPIStruct *)r; +} +#endif + +/* PyErrExceptionMatches */ +#if CYTHON_FAST_THREAD_STATE +static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; i= 0x030C00A6 + PyObject *current_exception = tstate->current_exception; + if (unlikely(!current_exception)) return 0; + exc_type = (PyObject*) Py_TYPE(current_exception); + if (exc_type == err) return 1; +#else + exc_type = tstate->curexc_type; + if (exc_type == err) return 1; + if (unlikely(!exc_type)) return 0; +#endif + #if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(exc_type); + #endif + if (unlikely(PyTuple_Check(err))) { + result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); + } else { + result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err); + } + #if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(exc_type); + #endif + return result; +} +#endif + +/* PyErrFetchRestore */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { +#if PY_VERSION_HEX >= 0x030C00A6 + PyObject *tmp_value; + assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value))); + if (value) { + #if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb)) + #endif + PyException_SetTraceback(value, tb); + } + tmp_value = tstate->current_exception; + tstate->current_exception = value; + Py_XDECREF(tmp_value); + Py_XDECREF(type); + Py_XDECREF(tb); +#else + PyObject *tmp_type, *tmp_value, *tmp_tb; + tmp_type = tstate->curexc_type; + tmp_value = tstate->curexc_value; + tmp_tb = tstate->curexc_traceback; + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +#endif +} +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { +#if PY_VERSION_HEX >= 0x030C00A6 + PyObject* exc_value; + exc_value = tstate->current_exception; + tstate->current_exception = 0; + *value = exc_value; + *type = NULL; + *tb = NULL; + if (exc_value) { + *type = (PyObject*) Py_TYPE(exc_value); + Py_INCREF(*type); + #if CYTHON_COMPILING_IN_CPYTHON + *tb = ((PyBaseExceptionObject*) exc_value)->traceback; + Py_XINCREF(*tb); + #else + *tb = PyException_GetTraceback(exc_value); + #endif + } +#else + *type = tstate->curexc_type; + *value = tstate->curexc_value; + *tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +#endif +} +#endif + +/* PyObjectGetAttrStr */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro)) + return tp->tp_getattro(obj, attr_name); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_getattr)) + return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); +#endif + return PyObject_GetAttr(obj, attr_name); +} +#endif + +/* PyObjectGetAttrStrNoError */ +#if __PYX_LIMITED_VERSION_HEX < 0x030d00A1 +static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError))) + __Pyx_PyErr_Clear(); +} +#endif +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) { + PyObject *result; +#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 + (void) PyObject_GetOptionalAttr(obj, attr_name, &result); + return result; +#else +#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1 + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) { + return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1); + } +#endif + result = __Pyx_PyObject_GetAttrStr(obj, attr_name); + if (unlikely(!result)) { + __Pyx_PyObject_GetAttrStr_ClearAttributeError(); + } + return result; +#endif +} + +/* GetBuiltinName */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name) { + PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name); + if (unlikely(!result) && !PyErr_Occurred()) { + PyErr_Format(PyExc_NameError, +#if PY_MAJOR_VERSION >= 3 + "name '%U' is not defined", name); +#else + "name '%.200s' is not defined", PyString_AS_STRING(name)); +#endif + } + return result; +} + +/* TupleAndListFromArray */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) { + PyObject *v; + Py_ssize_t i; + for (i = 0; i < length; i++) { + v = dest[i] = src[i]; + Py_INCREF(v); + } +} +static CYTHON_INLINE PyObject * +__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) +{ + PyObject *res; + if (n <= 0) { + Py_INCREF(__pyx_empty_tuple); + return __pyx_empty_tuple; + } + res = PyTuple_New(n); + if (unlikely(res == NULL)) return NULL; + __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n); + return res; +} +static CYTHON_INLINE PyObject * +__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n) +{ + PyObject *res; + if (n <= 0) { + return PyList_New(0); + } + res = PyList_New(n); + if (unlikely(res == NULL)) return NULL; + __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n); + return res; +} +#endif + +/* BytesEquals */ +static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API + return PyObject_RichCompareBool(s1, s2, equals); +#else + if (s1 == s2) { + return (equals == Py_EQ); + } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { + const char *ps1, *ps2; + Py_ssize_t length = PyBytes_GET_SIZE(s1); + if (length != PyBytes_GET_SIZE(s2)) + return (equals == Py_NE); + ps1 = PyBytes_AS_STRING(s1); + ps2 = PyBytes_AS_STRING(s2); + if (ps1[0] != ps2[0]) { + return (equals == Py_NE); + } else if (length == 1) { + return (equals == Py_EQ); + } else { + int result; +#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000) + Py_hash_t hash1, hash2; + hash1 = ((PyBytesObject*)s1)->ob_shash; + hash2 = ((PyBytesObject*)s2)->ob_shash; + if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { + return (equals == Py_NE); + } +#endif + result = memcmp(ps1, ps2, (size_t)length); + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { + return (equals == Py_NE); + } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { + return (equals == Py_NE); + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +#endif +} + +/* UnicodeEquals */ +static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { +#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API + return PyObject_RichCompareBool(s1, s2, equals); +#else +#if PY_MAJOR_VERSION < 3 + PyObject* owned_ref = NULL; +#endif + int s1_is_unicode, s2_is_unicode; + if (s1 == s2) { + goto return_eq; + } + s1_is_unicode = PyUnicode_CheckExact(s1); + s2_is_unicode = PyUnicode_CheckExact(s2); +#if PY_MAJOR_VERSION < 3 + if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { + owned_ref = PyUnicode_FromObject(s2); + if (unlikely(!owned_ref)) + return -1; + s2 = owned_ref; + s2_is_unicode = 1; + } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { + owned_ref = PyUnicode_FromObject(s1); + if (unlikely(!owned_ref)) + return -1; + s1 = owned_ref; + s1_is_unicode = 1; + } else if (((!s2_is_unicode) & (!s1_is_unicode))) { + return __Pyx_PyBytes_Equals(s1, s2, equals); + } +#endif + if (s1_is_unicode & s2_is_unicode) { + Py_ssize_t length; + int kind; + void *data1, *data2; + if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0)) + return -1; + length = __Pyx_PyUnicode_GET_LENGTH(s1); + if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { + goto return_ne; + } +#if CYTHON_USE_UNICODE_INTERNALS + { + Py_hash_t hash1, hash2; + #if CYTHON_PEP393_ENABLED + hash1 = ((PyASCIIObject*)s1)->hash; + hash2 = ((PyASCIIObject*)s2)->hash; + #else + hash1 = ((PyUnicodeObject*)s1)->hash; + hash2 = ((PyUnicodeObject*)s2)->hash; + #endif + if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { + goto return_ne; + } + } +#endif + kind = __Pyx_PyUnicode_KIND(s1); + if (kind != __Pyx_PyUnicode_KIND(s2)) { + goto return_ne; + } + data1 = __Pyx_PyUnicode_DATA(s1); + data2 = __Pyx_PyUnicode_DATA(s2); + if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { + goto return_ne; + } else if (length == 1) { + goto return_eq; + } else { + int result = memcmp(data1, data2, (size_t)(length * kind)); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_EQ) ? (result == 0) : (result != 0); + } + } else if ((s1 == Py_None) & s2_is_unicode) { + goto return_ne; + } else if ((s2 == Py_None) & s1_is_unicode) { + goto return_ne; + } else { + int result; + PyObject* py_result = PyObject_RichCompare(s1, s2, equals); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + if (!py_result) + return -1; + result = __Pyx_PyObject_IsTrue(py_result); + Py_DECREF(py_result); + return result; + } +return_eq: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_EQ); +return_ne: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(owned_ref); + #endif + return (equals == Py_NE); +#endif +} + +/* fastcall */ +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s) +{ + Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames); + for (i = 0; i < n; i++) + { + if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i]; + } + for (i = 0; i < n; i++) + { + int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ); + if (unlikely(eq != 0)) { + if (unlikely(eq < 0)) return NULL; + return kwvalues[i]; + } + } + return NULL; +} +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 +CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) { + Py_ssize_t i, nkwargs = PyTuple_GET_SIZE(kwnames); + PyObject *dict; + dict = PyDict_New(); + if (unlikely(!dict)) + return NULL; + for (i=0; i= 3 + "%s() got multiple values for keyword argument '%U'", func_name, kw_name); + #else + "%s() got multiple values for keyword argument '%s'", func_name, + PyString_AsString(kw_name)); + #endif +} + +/* ParseKeywords */ +static int __Pyx_ParseOptionalKeywords( + PyObject *kwds, + PyObject *const *kwvalues, + PyObject **argnames[], + PyObject *kwds2, + PyObject *values[], + Py_ssize_t num_pos_args, + const char* function_name) +{ + PyObject *key = 0, *value = 0; + Py_ssize_t pos = 0; + PyObject*** name; + PyObject*** first_kw_arg = argnames + num_pos_args; + int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds)); + while (1) { + Py_XDECREF(key); key = NULL; + Py_XDECREF(value); value = NULL; + if (kwds_is_tuple) { + Py_ssize_t size; +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(kwds); +#else + size = PyTuple_Size(kwds); + if (size < 0) goto bad; +#endif + if (pos >= size) break; +#if CYTHON_AVOID_BORROWED_REFS + key = __Pyx_PySequence_ITEM(kwds, pos); + if (!key) goto bad; +#elif CYTHON_ASSUME_SAFE_MACROS + key = PyTuple_GET_ITEM(kwds, pos); +#else + key = PyTuple_GetItem(kwds, pos); + if (!key) goto bad; +#endif + value = kwvalues[pos]; + pos++; + } + else + { + if (!PyDict_Next(kwds, &pos, &key, &value)) break; +#if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(key); +#endif + } + name = first_kw_arg; + while (*name && (**name != key)) name++; + if (*name) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + Py_INCREF(value); + Py_DECREF(key); +#endif + key = NULL; + value = NULL; + continue; + } +#if !CYTHON_AVOID_BORROWED_REFS + Py_INCREF(key); +#endif + Py_INCREF(value); + name = first_kw_arg; + #if PY_MAJOR_VERSION < 3 + if (likely(PyString_Check(key))) { + while (*name) { + if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) + && _PyString_Eq(**name, key)) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + value = NULL; +#endif + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + if ((**argname == key) || ( + (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) + && _PyString_Eq(**argname, key))) { + goto arg_passed_twice; + } + argname++; + } + } + } else + #endif + if (likely(PyUnicode_Check(key))) { + while (*name) { + int cmp = ( + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**name, key) + ); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) { + values[name-argnames] = value; +#if CYTHON_AVOID_BORROWED_REFS + value = NULL; +#endif + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + int cmp = (**argname == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : + #endif + PyUnicode_Compare(**argname, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) goto arg_passed_twice; + argname++; + } + } + } else + goto invalid_keyword_type; + if (kwds2) { + if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; + } else { + goto invalid_keyword; + } + } + Py_XDECREF(key); + Py_XDECREF(value); + return 0; +arg_passed_twice: + __Pyx_RaiseDoubleKeywordsError(function_name, key); + goto bad; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + goto bad; +invalid_keyword: + #if PY_MAJOR_VERSION < 3 + PyErr_Format(PyExc_TypeError, + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + PyErr_Format(PyExc_TypeError, + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif +bad: + Py_XDECREF(key); + Py_XDECREF(value); + return -1; +} + +/* FixUpExtensionType */ +#if CYTHON_USE_TYPE_SPECS +static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) { +#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + CYTHON_UNUSED_VAR(spec); + CYTHON_UNUSED_VAR(type); +#else + const PyType_Slot *slot = spec->slots; + while (slot && slot->slot && slot->slot != Py_tp_members) + slot++; + if (slot && slot->slot == Py_tp_members) { + int changed = 0; +#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON) + const +#endif + PyMemberDef *memb = (PyMemberDef*) slot->pfunc; + while (memb && memb->name) { + if (memb->name[0] == '_' && memb->name[1] == '_') { +#if PY_VERSION_HEX < 0x030900b1 + if (strcmp(memb->name, "__weaklistoffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); + type->tp_weaklistoffset = memb->offset; + changed = 1; + } + else if (strcmp(memb->name, "__dictoffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); + type->tp_dictoffset = memb->offset; + changed = 1; + } +#if CYTHON_METH_FASTCALL + else if (strcmp(memb->name, "__vectorcalloffset__") == 0) { + assert(memb->type == T_PYSSIZET); + assert(memb->flags == READONLY); +#if PY_VERSION_HEX >= 0x030800b4 + type->tp_vectorcall_offset = memb->offset; +#else + type->tp_print = (printfunc) memb->offset; +#endif + changed = 1; + } +#endif +#else + if ((0)); +#endif +#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON + else if (strcmp(memb->name, "__module__") == 0) { + PyObject *descr; + assert(memb->type == T_OBJECT); + assert(memb->flags == 0 || memb->flags == READONLY); + descr = PyDescr_NewMember(type, memb); + if (unlikely(!descr)) + return -1; + if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) { + Py_DECREF(descr); + return -1; + } + Py_DECREF(descr); + changed = 1; + } +#endif + } + memb++; + } + if (changed) + PyType_Modified(type); + } +#endif + return 0; +} +#endif + +/* FetchSharedCythonModule */ +static PyObject *__Pyx_FetchSharedCythonABIModule(void) { + return __Pyx_PyImport_AddModuleRef((char*) __PYX_ABI_MODULE_NAME); +} + +/* FetchCommonType */ +static int __Pyx_VerifyCachedType(PyObject *cached_type, + const char *name, + Py_ssize_t basicsize, + Py_ssize_t expected_basicsize) { + if (!PyType_Check(cached_type)) { + PyErr_Format(PyExc_TypeError, + "Shared Cython type %.200s is not a type object", name); + return -1; + } + if (basicsize != expected_basicsize) { + PyErr_Format(PyExc_TypeError, + "Shared Cython type %.200s has the wrong size, try recompiling", + name); + return -1; + } + return 0; +} +#if !CYTHON_USE_TYPE_SPECS +static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) { + PyObject* abi_module; + const char* object_name; + PyTypeObject *cached_type = NULL; + abi_module = __Pyx_FetchSharedCythonABIModule(); + if (!abi_module) return NULL; + object_name = strrchr(type->tp_name, '.'); + object_name = object_name ? object_name+1 : type->tp_name; + cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name); + if (cached_type) { + if (__Pyx_VerifyCachedType( + (PyObject *)cached_type, + object_name, + cached_type->tp_basicsize, + type->tp_basicsize) < 0) { + goto bad; + } + goto done; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; + PyErr_Clear(); + if (PyType_Ready(type) < 0) goto bad; + if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0) + goto bad; + Py_INCREF(type); + cached_type = type; +done: + Py_DECREF(abi_module); + return cached_type; +bad: + Py_XDECREF(cached_type); + cached_type = NULL; + goto done; +} +#else +static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) { + PyObject *abi_module, *cached_type = NULL; + const char* object_name = strrchr(spec->name, '.'); + object_name = object_name ? object_name+1 : spec->name; + abi_module = __Pyx_FetchSharedCythonABIModule(); + if (!abi_module) return NULL; + cached_type = PyObject_GetAttrString(abi_module, object_name); + if (cached_type) { + Py_ssize_t basicsize; +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *py_basicsize; + py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__"); + if (unlikely(!py_basicsize)) goto bad; + basicsize = PyLong_AsSsize_t(py_basicsize); + Py_DECREF(py_basicsize); + py_basicsize = 0; + if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; +#else + basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1; +#endif + if (__Pyx_VerifyCachedType( + cached_type, + object_name, + basicsize, + spec->basicsize) < 0) { + goto bad; + } + goto done; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; + PyErr_Clear(); + CYTHON_UNUSED_VAR(module); + cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases); + if (unlikely(!cached_type)) goto bad; + if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad; + if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad; +done: + Py_DECREF(abi_module); + assert(cached_type == NULL || PyType_Check(cached_type)); + return (PyTypeObject *) cached_type; +bad: + Py_XDECREF(cached_type); + cached_type = NULL; + goto done; +} +#endif + +/* PyVectorcallFastCallDict */ +#if CYTHON_METH_FASTCALL +static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) +{ + PyObject *res = NULL; + PyObject *kwnames; + PyObject **newargs; + PyObject **kwvalues; + Py_ssize_t i, pos; + size_t j; + PyObject *key, *value; + unsigned long keys_are_strings; + Py_ssize_t nkw = PyDict_GET_SIZE(kw); + newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0])); + if (unlikely(newargs == NULL)) { + PyErr_NoMemory(); + return NULL; + } + for (j = 0; j < nargs; j++) newargs[j] = args[j]; + kwnames = PyTuple_New(nkw); + if (unlikely(kwnames == NULL)) { + PyMem_Free(newargs); + return NULL; + } + kwvalues = newargs + nargs; + pos = i = 0; + keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; + while (PyDict_Next(kw, &pos, &key, &value)) { + keys_are_strings &= Py_TYPE(key)->tp_flags; + Py_INCREF(key); + Py_INCREF(value); + PyTuple_SET_ITEM(kwnames, i, key); + kwvalues[i] = value; + i++; + } + if (unlikely(!keys_are_strings)) { + PyErr_SetString(PyExc_TypeError, "keywords must be strings"); + goto cleanup; + } + res = vc(func, newargs, nargs, kwnames); +cleanup: + Py_DECREF(kwnames); + for (i = 0; i < nkw; i++) + Py_DECREF(kwvalues[i]); + PyMem_Free(newargs); + return res; +} +static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) +{ + if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) { + return vc(func, args, nargs, NULL); + } + return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw); +} +#endif + +/* CythonFunctionShared */ +#if CYTHON_COMPILING_IN_LIMITED_API +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { + if (__Pyx_CyFunction_Check(func)) { + return PyCFunction_GetFunction(((__pyx_CyFunctionObject*)func)->func) == (PyCFunction) cfunc; + } else if (PyCFunction_Check(func)) { + return PyCFunction_GetFunction(func) == (PyCFunction) cfunc; + } + return 0; +} +#else +static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { + return __Pyx_CyOrPyCFunction_Check(func) && __Pyx_CyOrPyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; +} +#endif +static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) { +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + __Pyx_Py_XDECREF_SET( + __Pyx_CyFunction_GetClassObj(f), + ((classobj) ? __Pyx_NewRef(classobj) : NULL)); +#else + __Pyx_Py_XDECREF_SET( + ((PyCMethodObject *) (f))->mm_class, + (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL)); +#endif +} +static PyObject * +__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure) +{ + CYTHON_UNUSED_VAR(closure); + if (unlikely(op->func_doc == NULL)) { +#if CYTHON_COMPILING_IN_LIMITED_API + op->func_doc = PyObject_GetAttrString(op->func, "__doc__"); + if (unlikely(!op->func_doc)) return NULL; +#else + if (((PyCFunctionObject*)op)->m_ml->ml_doc) { +#if PY_MAJOR_VERSION >= 3 + op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); +#else + op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); +#endif + if (unlikely(op->func_doc == NULL)) + return NULL; + } else { + Py_INCREF(Py_None); + return Py_None; + } +#endif + } + Py_INCREF(op->func_doc); + return op->func_doc; +} +static int +__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (value == NULL) { + value = Py_None; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_doc, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(op->func_name == NULL)) { +#if CYTHON_COMPILING_IN_LIMITED_API + op->func_name = PyObject_GetAttrString(op->func, "__name__"); +#elif PY_MAJOR_VERSION >= 3 + op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); +#else + op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); +#endif + if (unlikely(op->func_name == NULL)) + return NULL; + } + Py_INCREF(op->func_name); + return op->func_name; +} +static int +__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__name__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_name, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + Py_INCREF(op->func_qualname); + return op->func_qualname; +} +static int +__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__qualname__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_qualname, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(op->func_dict == NULL)) { + op->func_dict = PyDict_New(); + if (unlikely(op->func_dict == NULL)) + return NULL; + } + Py_INCREF(op->func_dict); + return op->func_dict; +} +static int +__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); + if (unlikely(value == NULL)) { + PyErr_SetString(PyExc_TypeError, + "function's dictionary may not be deleted"); + return -1; + } + if (unlikely(!PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "setting function's dictionary to a non-dict"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->func_dict, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(context); + Py_INCREF(op->func_globals); + return op->func_globals; +} +static PyObject * +__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context) +{ + CYTHON_UNUSED_VAR(op); + CYTHON_UNUSED_VAR(context); + Py_INCREF(Py_None); + return Py_None; +} +static PyObject * +__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context) +{ + PyObject* result = (op->func_code) ? op->func_code : Py_None; + CYTHON_UNUSED_VAR(context); + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) { + int result = 0; + PyObject *res = op->defaults_getter((PyObject *) op); + if (unlikely(!res)) + return -1; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + op->defaults_tuple = PyTuple_GET_ITEM(res, 0); + Py_INCREF(op->defaults_tuple); + op->defaults_kwdict = PyTuple_GET_ITEM(res, 1); + Py_INCREF(op->defaults_kwdict); + #else + op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0); + if (unlikely(!op->defaults_tuple)) result = -1; + else { + op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1); + if (unlikely(!op->defaults_kwdict)) result = -1; + } + #endif + Py_DECREF(res); + return result; +} +static int +__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value) { + value = Py_None; + } else if (unlikely(value != Py_None && !PyTuple_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__defaults__ must be set to a tuple object"); + return -1; + } + PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not " + "currently affect the values used in function calls", 1); + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->defaults_tuple, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->defaults_tuple; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + if (op->defaults_getter) { + if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; + result = op->defaults_tuple; + } else { + result = Py_None; + } + } + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value) { + value = Py_None; + } else if (unlikely(value != Py_None && !PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__kwdefaults__ must be set to a dict object"); + return -1; + } + PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not " + "currently affect the values used in function calls", 1); + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->defaults_kwdict; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + if (op->defaults_getter) { + if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; + result = op->defaults_kwdict; + } else { + result = Py_None; + } + } + Py_INCREF(result); + return result; +} +static int +__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + if (!value || value == Py_None) { + value = NULL; + } else if (unlikely(!PyDict_Check(value))) { + PyErr_SetString(PyExc_TypeError, + "__annotations__ must be set to a dict object"); + return -1; + } + Py_XINCREF(value); + __Pyx_Py_XDECREF_SET(op->func_annotations, value); + return 0; +} +static PyObject * +__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) { + PyObject* result = op->func_annotations; + CYTHON_UNUSED_VAR(context); + if (unlikely(!result)) { + result = PyDict_New(); + if (unlikely(!result)) return NULL; + op->func_annotations = result; + } + Py_INCREF(result); + return result; +} +static PyObject * +__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) { + int is_coroutine; + CYTHON_UNUSED_VAR(context); + if (op->func_is_coroutine) { + return __Pyx_NewRef(op->func_is_coroutine); + } + is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE; +#if PY_VERSION_HEX >= 0x03050000 + if (is_coroutine) { + PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine; + fromlist = PyList_New(1); + if (unlikely(!fromlist)) return NULL; + Py_INCREF(marker); +#if CYTHON_ASSUME_SAFE_MACROS + PyList_SET_ITEM(fromlist, 0, marker); +#else + if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) { + Py_DECREF(marker); + Py_DECREF(fromlist); + return NULL; + } +#endif + module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0); + Py_DECREF(fromlist); + if (unlikely(!module)) goto ignore; + op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker); + Py_DECREF(module); + if (likely(op->func_is_coroutine)) { + return __Pyx_NewRef(op->func_is_coroutine); + } +ignore: + PyErr_Clear(); + } +#endif + op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine); + return __Pyx_NewRef(op->func_is_coroutine); +} +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject * +__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) { + CYTHON_UNUSED_VAR(context); + return PyObject_GetAttrString(op->func, "__module__"); +} +static int +__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) { + CYTHON_UNUSED_VAR(context); + return PyObject_SetAttrString(op->func, "__module__", value); +} +#endif +static PyGetSetDef __pyx_CyFunction_getsets[] = { + {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, + {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, + {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, + {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, + {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0}, + {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, + {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, + {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, + {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, + {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, + {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, + {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, + {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, + {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, + {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, + {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0}, + {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, + {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0}, +#if CYTHON_COMPILING_IN_LIMITED_API + {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0}, +#endif + {0, 0, 0, 0, 0} +}; +static PyMemberDef __pyx_CyFunction_members[] = { +#if !CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0}, +#endif +#if CYTHON_USE_TYPE_SPECS + {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0}, +#if CYTHON_METH_FASTCALL +#if CYTHON_BACKPORT_VECTORCALL + {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0}, +#else +#if !CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0}, +#endif +#endif +#endif +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0}, +#else + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0}, +#endif +#endif + {0, 0, 0, 0, 0} +}; +static PyObject * +__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args) +{ + CYTHON_UNUSED_VAR(args); +#if PY_MAJOR_VERSION >= 3 + Py_INCREF(m->func_qualname); + return m->func_qualname; +#else + return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name); +#endif +} +static PyMethodDef __pyx_CyFunction_methods[] = { + {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0}, + {0, 0, 0, 0} +}; +#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API +#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist) +#else +#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist) +#endif +static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname, + PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { +#if !CYTHON_COMPILING_IN_LIMITED_API + PyCFunctionObject *cf = (PyCFunctionObject*) op; +#endif + if (unlikely(op == NULL)) + return NULL; +#if CYTHON_COMPILING_IN_LIMITED_API + op->func = PyCFunction_NewEx(ml, (PyObject*)op, module); + if (unlikely(!op->func)) return NULL; +#endif + op->flags = flags; + __Pyx_CyFunction_weakreflist(op) = NULL; +#if !CYTHON_COMPILING_IN_LIMITED_API + cf->m_ml = ml; + cf->m_self = (PyObject *) op; +#endif + Py_XINCREF(closure); + op->func_closure = closure; +#if !CYTHON_COMPILING_IN_LIMITED_API + Py_XINCREF(module); + cf->m_module = module; +#endif + op->func_dict = NULL; + op->func_name = NULL; + Py_INCREF(qualname); + op->func_qualname = qualname; + op->func_doc = NULL; +#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API + op->func_classobj = NULL; +#else + ((PyCMethodObject*)op)->mm_class = NULL; +#endif + op->func_globals = globals; + Py_INCREF(op->func_globals); + Py_XINCREF(code); + op->func_code = code; + op->defaults_pyobjects = 0; + op->defaults_size = 0; + op->defaults = NULL; + op->defaults_tuple = NULL; + op->defaults_kwdict = NULL; + op->defaults_getter = NULL; + op->func_annotations = NULL; + op->func_is_coroutine = NULL; +#if CYTHON_METH_FASTCALL + switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { + case METH_NOARGS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS; + break; + case METH_O: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O; + break; + case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD; + break; + case METH_FASTCALL | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS; + break; + case METH_VARARGS | METH_KEYWORDS: + __Pyx_CyFunction_func_vectorcall(op) = NULL; + break; + default: + PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); + Py_DECREF(op); + return NULL; + } +#endif + return (PyObject *) op; +} +static int +__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m) +{ + Py_CLEAR(m->func_closure); +#if CYTHON_COMPILING_IN_LIMITED_API + Py_CLEAR(m->func); +#else + Py_CLEAR(((PyCFunctionObject*)m)->m_module); +#endif + Py_CLEAR(m->func_dict); + Py_CLEAR(m->func_name); + Py_CLEAR(m->func_qualname); + Py_CLEAR(m->func_doc); + Py_CLEAR(m->func_globals); + Py_CLEAR(m->func_code); +#if !CYTHON_COMPILING_IN_LIMITED_API +#if PY_VERSION_HEX < 0x030900B1 + Py_CLEAR(__Pyx_CyFunction_GetClassObj(m)); +#else + { + PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class; + ((PyCMethodObject *) (m))->mm_class = NULL; + Py_XDECREF(cls); + } +#endif +#endif + Py_CLEAR(m->defaults_tuple); + Py_CLEAR(m->defaults_kwdict); + Py_CLEAR(m->func_annotations); + Py_CLEAR(m->func_is_coroutine); + if (m->defaults) { + PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); + int i; + for (i = 0; i < m->defaults_pyobjects; i++) + Py_XDECREF(pydefaults[i]); + PyObject_Free(m->defaults); + m->defaults = NULL; + } + return 0; +} +static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m) +{ + if (__Pyx_CyFunction_weakreflist(m) != NULL) + PyObject_ClearWeakRefs((PyObject *) m); + __Pyx_CyFunction_clear(m); + __Pyx_PyHeapTypeObject_GC_Del(m); +} +static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m) +{ + PyObject_GC_UnTrack(m); + __Pyx__CyFunction_dealloc(m); +} +static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg) +{ + Py_VISIT(m->func_closure); +#if CYTHON_COMPILING_IN_LIMITED_API + Py_VISIT(m->func); +#else + Py_VISIT(((PyCFunctionObject*)m)->m_module); +#endif + Py_VISIT(m->func_dict); + Py_VISIT(m->func_name); + Py_VISIT(m->func_qualname); + Py_VISIT(m->func_doc); + Py_VISIT(m->func_globals); + Py_VISIT(m->func_code); +#if !CYTHON_COMPILING_IN_LIMITED_API + Py_VISIT(__Pyx_CyFunction_GetClassObj(m)); +#endif + Py_VISIT(m->defaults_tuple); + Py_VISIT(m->defaults_kwdict); + Py_VISIT(m->func_is_coroutine); + if (m->defaults) { + PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); + int i; + for (i = 0; i < m->defaults_pyobjects; i++) + Py_VISIT(pydefaults[i]); + } + return 0; +} +static PyObject* +__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) +{ +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_FromFormat("", + op->func_qualname, (void *)op); +#else + return PyString_FromFormat("", + PyString_AsString(op->func_qualname), (void *)op); +#endif +} +static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) { +#if CYTHON_COMPILING_IN_LIMITED_API + PyObject *f = ((__pyx_CyFunctionObject*)func)->func; + PyObject *py_name = NULL; + PyCFunction meth; + int flags; + meth = PyCFunction_GetFunction(f); + if (unlikely(!meth)) return NULL; + flags = PyCFunction_GetFlags(f); + if (unlikely(flags < 0)) return NULL; +#else + PyCFunctionObject* f = (PyCFunctionObject*)func; + PyCFunction meth = f->m_ml->ml_meth; + int flags = f->m_ml->ml_flags; +#endif + Py_ssize_t size; + switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) { + case METH_VARARGS: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) + return (*meth)(self, arg); + break; + case METH_VARARGS | METH_KEYWORDS: + return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw); + case METH_NOARGS: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) { +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(arg); +#else + size = PyTuple_Size(arg); + if (unlikely(size < 0)) return NULL; +#endif + if (likely(size == 0)) + return (*meth)(self, NULL); +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, + "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + py_name, size); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + f->m_ml->ml_name, size); +#endif + return NULL; + } + break; + case METH_O: + if (likely(kw == NULL || PyDict_Size(kw) == 0)) { +#if CYTHON_ASSUME_SAFE_MACROS + size = PyTuple_GET_SIZE(arg); +#else + size = PyTuple_Size(arg); + if (unlikely(size < 0)) return NULL; +#endif + if (likely(size == 1)) { + PyObject *result, *arg0; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + arg0 = PyTuple_GET_ITEM(arg, 0); + #else + arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL; + #endif + result = (*meth)(self, arg0); + #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) + Py_DECREF(arg0); + #endif + return result; + } +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, + "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + py_name, size); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + f->m_ml->ml_name, size); +#endif + return NULL; + } + break; + default: + PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); + return NULL; + } +#if CYTHON_COMPILING_IN_LIMITED_API + py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); + if (!py_name) return NULL; + PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments", + py_name); + Py_DECREF(py_name); +#else + PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", + f->m_ml->ml_name); +#endif + return NULL; +} +static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *self, *result; +#if CYTHON_COMPILING_IN_LIMITED_API + self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func); + if (unlikely(!self) && PyErr_Occurred()) return NULL; +#else + self = ((PyCFunctionObject*)func)->m_self; +#endif + result = __Pyx_CyFunction_CallMethod(func, self, arg, kw); + return result; +} +static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) { + PyObject *result; + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; +#if CYTHON_METH_FASTCALL + __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc); + if (vc) { +#if CYTHON_ASSUME_SAFE_MACROS + return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw); +#else + (void) &__Pyx_PyVectorcall_FastCallDict; + return PyVectorcall_Call(func, args, kw); +#endif + } +#endif + if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { + Py_ssize_t argc; + PyObject *new_args; + PyObject *self; +#if CYTHON_ASSUME_SAFE_MACROS + argc = PyTuple_GET_SIZE(args); +#else + argc = PyTuple_Size(args); + if (unlikely(!argc) < 0) return NULL; +#endif + new_args = PyTuple_GetSlice(args, 1, argc); + if (unlikely(!new_args)) + return NULL; + self = PyTuple_GetItem(args, 0); + if (unlikely(!self)) { + Py_DECREF(new_args); +#if PY_MAJOR_VERSION > 2 + PyErr_Format(PyExc_TypeError, + "unbound method %.200S() needs an argument", + cyfunc->func_qualname); +#else + PyErr_SetString(PyExc_TypeError, + "unbound method needs an argument"); +#endif + return NULL; + } + result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw); + Py_DECREF(new_args); + } else { + result = __Pyx_CyFunction_Call(func, args, kw); + } + return result; +} +#if CYTHON_METH_FASTCALL +static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames) +{ + int ret = 0; + if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { + if (unlikely(nargs < 1)) { + PyErr_Format(PyExc_TypeError, "%.200s() needs an argument", + ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); + return -1; + } + ret = 1; + } + if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); + return -1; + } + return ret; +} +static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + if (unlikely(nargs != 0)) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", + def->ml_name, nargs); + return NULL; + } + return def->ml_meth(self, NULL); +} +static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + if (unlikely(nargs != 1)) { + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", + def->ml_name, nargs); + return NULL; + } + return def->ml_meth(self, args[0]); +} +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + return ((__Pyx_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames); +} +static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; + PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; + PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc); +#if CYTHON_BACKPORT_VECTORCALL + Py_ssize_t nargs = (Py_ssize_t)nargsf; +#else + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); +#endif + PyObject *self; + switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { + case 1: + self = args[0]; + args += 1; + nargs -= 1; + break; + case 0: + self = ((PyCFunctionObject*)cyfunc)->m_self; + break; + default: + return NULL; + } + return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames); +} +#endif +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_CyFunctionType_slots[] = { + {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc}, + {Py_tp_repr, (void *)__Pyx_CyFunction_repr}, + {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod}, + {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse}, + {Py_tp_clear, (void *)__Pyx_CyFunction_clear}, + {Py_tp_methods, (void *)__pyx_CyFunction_methods}, + {Py_tp_members, (void *)__pyx_CyFunction_members}, + {Py_tp_getset, (void *)__pyx_CyFunction_getsets}, + {Py_tp_descr_get, (void *)__Pyx_PyMethod_New}, + {0, 0}, +}; +static PyType_Spec __pyx_CyFunctionType_spec = { + __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", + sizeof(__pyx_CyFunctionObject), + 0, +#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR + Py_TPFLAGS_METHOD_DESCRIPTOR | +#endif +#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL) + _Py_TPFLAGS_HAVE_VECTORCALL | +#endif + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + __pyx_CyFunctionType_slots +}; +#else +static PyTypeObject __pyx_CyFunctionType_type = { + PyVarObject_HEAD_INIT(0, 0) + __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", + sizeof(__pyx_CyFunctionObject), + 0, + (destructor) __Pyx_CyFunction_dealloc, +#if !CYTHON_METH_FASTCALL + 0, +#elif CYTHON_BACKPORT_VECTORCALL + (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall), +#else + offsetof(PyCFunctionObject, vectorcall), +#endif + 0, + 0, +#if PY_MAJOR_VERSION < 3 + 0, +#else + 0, +#endif + (reprfunc) __Pyx_CyFunction_repr, + 0, + 0, + 0, + 0, + __Pyx_CyFunction_CallAsMethod, + 0, + 0, + 0, + 0, +#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR + Py_TPFLAGS_METHOD_DESCRIPTOR | +#endif +#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL + _Py_TPFLAGS_HAVE_VECTORCALL | +#endif + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + 0, + (traverseproc) __Pyx_CyFunction_traverse, + (inquiry) __Pyx_CyFunction_clear, + 0, +#if PY_VERSION_HEX < 0x030500A0 + offsetof(__pyx_CyFunctionObject, func_weakreflist), +#else + offsetof(PyCFunctionObject, m_weakreflist), +#endif + 0, + 0, + __pyx_CyFunction_methods, + __pyx_CyFunction_members, + __pyx_CyFunction_getsets, + 0, + 0, + __Pyx_PyMethod_New, + 0, + offsetof(__pyx_CyFunctionObject, func_dict), + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +#if PY_VERSION_HEX >= 0x030400a1 + 0, +#endif +#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, +#endif +#if __PYX_NEED_TP_PRINT_SLOT + 0, +#endif +#if PY_VERSION_HEX >= 0x030C0000 + 0, +#endif +#if PY_VERSION_HEX >= 0x030d00A4 + 0, +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, +#endif +}; +#endif +static int __pyx_CyFunction_init(PyObject *module) { +#if CYTHON_USE_TYPE_SPECS + __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL); +#else + CYTHON_UNUSED_VAR(module); + __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type); +#endif + if (unlikely(__pyx_CyFunctionType == NULL)) { + return -1; + } + return 0; +} +static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults = PyObject_Malloc(size); + if (unlikely(!m->defaults)) + return PyErr_NoMemory(); + memset(m->defaults, 0, size); + m->defaults_pyobjects = pyobjects; + m->defaults_size = size; + return m->defaults; +} +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults_tuple = tuple; + Py_INCREF(tuple); +} +static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->defaults_kwdict = dict; + Py_INCREF(dict); +} +static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) { + __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; + m->func_annotations = dict; + Py_INCREF(dict); +} + +/* CythonFunction */ +static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname, + PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { + PyObject *op = __Pyx_CyFunction_Init( + PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType), + ml, flags, qualname, closure, module, globals, code + ); + if (likely(op)) { + PyObject_GC_Track(op); + } + return op; +} + +/* GetTopmostException */ +#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE +static _PyErr_StackItem * +__Pyx_PyErr_GetTopmostException(PyThreadState *tstate) +{ + _PyErr_StackItem *exc_info = tstate->exc_info; + while ((exc_info->exc_value == NULL || exc_info->exc_value == Py_None) && + exc_info->previous_item != NULL) + { + exc_info = exc_info->previous_item; + } + return exc_info; +} +#endif + +/* SaveResetException */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 + _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); + PyObject *exc_value = exc_info->exc_value; + if (exc_value == NULL || exc_value == Py_None) { + *value = NULL; + *type = NULL; + *tb = NULL; + } else { + *value = exc_value; + Py_INCREF(*value); + *type = (PyObject*) Py_TYPE(exc_value); + Py_INCREF(*type); + *tb = PyException_GetTraceback(exc_value); + } + #elif CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); + *type = exc_info->exc_type; + *value = exc_info->exc_value; + *tb = exc_info->exc_traceback; + Py_XINCREF(*type); + Py_XINCREF(*value); + Py_XINCREF(*tb); + #else + *type = tstate->exc_type; + *value = tstate->exc_value; + *tb = tstate->exc_traceback; + Py_XINCREF(*type); + Py_XINCREF(*value); + Py_XINCREF(*tb); + #endif +} +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { + #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 + _PyErr_StackItem *exc_info = tstate->exc_info; + PyObject *tmp_value = exc_info->exc_value; + exc_info->exc_value = value; + Py_XDECREF(tmp_value); + Py_XDECREF(type); + Py_XDECREF(tb); + #else + PyObject *tmp_type, *tmp_value, *tmp_tb; + #if CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = tstate->exc_info; + tmp_type = exc_info->exc_type; + tmp_value = exc_info->exc_value; + tmp_tb = exc_info->exc_traceback; + exc_info->exc_type = type; + exc_info->exc_value = value; + exc_info->exc_traceback = tb; + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = type; + tstate->exc_value = value; + tstate->exc_traceback = tb; + #endif + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); + #endif +} +#endif + +/* FastTypeChecks */ +#if CYTHON_COMPILING_IN_CPYTHON +static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { + while (a) { + a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*); + if (a == b) + return 1; + } + return b == &PyBaseObject_Type; +} +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (a == b) return 1; + mro = a->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(a, b); +} +static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (cls == a || cls == b) return 1; + mro = cls->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + PyObject *base = PyTuple_GET_ITEM(mro, i); + if (base == (PyObject *)a || base == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b); +} +#if PY_MAJOR_VERSION == 2 +static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { + PyObject *exception, *value, *tb; + int res; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&exception, &value, &tb); + res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + if (!res) { + res = PyObject_IsSubclass(err, exc_type2); + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + } + __Pyx_ErrRestore(exception, value, tb); + return res; +} +#else +static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { + if (exc_type1) { + return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2); + } else { + return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); + } +} +#endif +static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + assert(PyExceptionClass_Check(exc_type)); + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; i= 0x030C00A6 + PyException_SetTraceback(value, tb); + #elif CYTHON_FAST_THREAD_STATE + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject* tmp_tb = tstate->curexc_traceback; + if (tb != tmp_tb) { + Py_INCREF(tb); + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_tb); + } +#else + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); + Py_INCREF(tb); + PyErr_Restore(tmp_type, tmp_value, tb); + Py_XDECREF(tmp_tb); +#endif + } +bad: + Py_XDECREF(owned_instance); + return; +} +#endif + +/* PyObjectCall */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = Py_TYPE(func)->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) + return NULL; + #endif + result = (*call)(func, arg, kw); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* UnpackUnboundCMethod */ +static PyObject *__Pyx_SelflessCall(PyObject *method, PyObject *args, PyObject *kwargs) { + PyObject *result; + PyObject *selfless_args = PyTuple_GetSlice(args, 1, PyTuple_Size(args)); + if (unlikely(!selfless_args)) return NULL; + result = PyObject_Call(method, selfless_args, kwargs); + Py_DECREF(selfless_args); + return result; +} +static PyMethodDef __Pyx_UnboundCMethod_Def = { + "CythonUnboundCMethod", + __PYX_REINTERPRET_FUNCION(PyCFunction, __Pyx_SelflessCall), + METH_VARARGS | METH_KEYWORDS, + NULL +}; +static int __Pyx_TryUnpackUnboundCMethod(__Pyx_CachedCFunction* target) { + PyObject *method; + method = __Pyx_PyObject_GetAttrStr(target->type, *target->method_name); + if (unlikely(!method)) + return -1; + target->method = method; +#if CYTHON_COMPILING_IN_CPYTHON + #if PY_MAJOR_VERSION >= 3 + if (likely(__Pyx_TypeCheck(method, &PyMethodDescr_Type))) + #else + if (likely(!__Pyx_CyOrPyCFunction_Check(method))) + #endif + { + PyMethodDescrObject *descr = (PyMethodDescrObject*) method; + target->func = descr->d_method->ml_meth; + target->flag = descr->d_method->ml_flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_STACKLESS); + } else +#endif +#if CYTHON_COMPILING_IN_PYPY +#else + if (PyCFunction_Check(method)) +#endif + { + PyObject *self; + int self_found; +#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY + self = PyObject_GetAttrString(method, "__self__"); + if (!self) { + PyErr_Clear(); + } +#else + self = PyCFunction_GET_SELF(method); +#endif + self_found = (self && self != Py_None); +#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY + Py_XDECREF(self); +#endif + if (self_found) { + PyObject *unbound_method = PyCFunction_New(&__Pyx_UnboundCMethod_Def, method); + if (unlikely(!unbound_method)) return -1; + Py_DECREF(method); + target->method = unbound_method; + } + } + return 0; +} + +/* CallUnboundCMethod1 */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg) { + if (likely(cfunc->func)) { + int flag = cfunc->flag; + if (flag == METH_O) { + return (*(cfunc->func))(self, arg); + } else if ((PY_VERSION_HEX >= 0x030600B1) && flag == METH_FASTCALL) { + #if PY_VERSION_HEX >= 0x030700A0 + return (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)cfunc->func)(self, &arg, 1); + #else + return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL); + #endif + } else if ((PY_VERSION_HEX >= 0x030700A0) && flag == (METH_FASTCALL | METH_KEYWORDS)) { + return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL); + } + } + return __Pyx__CallUnboundCMethod1(cfunc, self, arg); +} +#endif +static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg){ + PyObject *args, *result = NULL; + if (unlikely(!cfunc->func && !cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL; +#if CYTHON_COMPILING_IN_CPYTHON + if (cfunc->func && (cfunc->flag & METH_VARARGS)) { + args = PyTuple_New(1); + if (unlikely(!args)) goto bad; + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 0, arg); + if (cfunc->flag & METH_KEYWORDS) + result = (*(PyCFunctionWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, NULL); + else + result = (*cfunc->func)(self, args); + } else { + args = PyTuple_New(2); + if (unlikely(!args)) goto bad; + Py_INCREF(self); + PyTuple_SET_ITEM(args, 0, self); + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 1, arg); + result = __Pyx_PyObject_Call(cfunc->method, args, NULL); + } +#else + args = PyTuple_Pack(2, self, arg); + if (unlikely(!args)) goto bad; + result = __Pyx_PyObject_Call(cfunc->method, args, NULL); +#endif +bad: + Py_XDECREF(args); + return result; +} + +/* RaiseUnexpectedTypeError */ +static int +__Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj) +{ + __Pyx_TypeName obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, "Expected %s, got " __Pyx_FMT_TYPENAME, + expected, obj_type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return 0; +} + +/* decode_c_bytes */ +static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes( + const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop, + const char* encoding, const char* errors, + PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { + if (unlikely((start < 0) | (stop < 0))) { + if (start < 0) { + start += length; + if (start < 0) + start = 0; + } + if (stop < 0) + stop += length; + } + if (stop > length) + stop = length; + if (unlikely(stop <= start)) + return __Pyx_NewRef(__pyx_empty_unicode); + length = stop - start; + cstring += start; + if (decode_func) { + return decode_func(cstring, length, errors); + } else { + return PyUnicode_Decode(cstring, length, encoding, errors); + } +} + +/* ArgTypeTest */ +static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) +{ + __Pyx_TypeName type_name; + __Pyx_TypeName obj_type_name; + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + else if (exact) { + #if PY_MAJOR_VERSION == 2 + if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; + #endif + } + else { + if (likely(__Pyx_TypeCheck(obj, type))) return 1; + } + type_name = __Pyx_PyType_GetName(type); + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME + ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name); + __Pyx_DECREF_TypeName(type_name); + __Pyx_DECREF_TypeName(obj_type_name); + return 0; +} + +/* PyFunctionFastCall */ +#if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL +static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, + PyObject *globals) { + PyFrameObject *f; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject **fastlocals; + Py_ssize_t i; + PyObject *result; + assert(globals != NULL); + /* XXX Perhaps we should create a specialized + PyFrame_New() that doesn't take locals, but does + take builtins without sanity checking them. + */ + assert(tstate != NULL); + f = PyFrame_New(tstate, co, globals, NULL); + if (f == NULL) { + return NULL; + } + fastlocals = __Pyx_PyFrame_GetLocalsplus(f); + for (i = 0; i < na; i++) { + Py_INCREF(*args); + fastlocals[i] = *args++; + } + result = PyEval_EvalFrameEx(f,0); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return result; +} +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) { + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + PyObject *globals = PyFunction_GET_GLOBALS(func); + PyObject *argdefs = PyFunction_GET_DEFAULTS(func); + PyObject *closure; +#if PY_MAJOR_VERSION >= 3 + PyObject *kwdefs; +#endif + PyObject *kwtuple, **k; + PyObject **d; + Py_ssize_t nd; + Py_ssize_t nk; + PyObject *result; + assert(kwargs == NULL || PyDict_Check(kwargs)); + nk = kwargs ? PyDict_Size(kwargs) : 0; + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) { + return NULL; + } + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) { + return NULL; + } + #endif + if ( +#if PY_MAJOR_VERSION >= 3 + co->co_kwonlyargcount == 0 && +#endif + likely(kwargs == NULL || nk == 0) && + co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { + if (argdefs == NULL && co->co_argcount == nargs) { + result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); + goto done; + } + else if (nargs == 0 && argdefs != NULL + && co->co_argcount == Py_SIZE(argdefs)) { + /* function called with no arguments, but all parameters have + a default value: use default values as arguments .*/ + args = &PyTuple_GET_ITEM(argdefs, 0); + result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); + goto done; + } + } + if (kwargs != NULL) { + Py_ssize_t pos, i; + kwtuple = PyTuple_New(2 * nk); + if (kwtuple == NULL) { + result = NULL; + goto done; + } + k = &PyTuple_GET_ITEM(kwtuple, 0); + pos = i = 0; + while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { + Py_INCREF(k[i]); + Py_INCREF(k[i+1]); + i += 2; + } + nk = i / 2; + } + else { + kwtuple = NULL; + k = NULL; + } + closure = PyFunction_GET_CLOSURE(func); +#if PY_MAJOR_VERSION >= 3 + kwdefs = PyFunction_GET_KW_DEFAULTS(func); +#endif + if (argdefs != NULL) { + d = &PyTuple_GET_ITEM(argdefs, 0); + nd = Py_SIZE(argdefs); + } + else { + d = NULL; + nd = 0; + } +#if PY_MAJOR_VERSION >= 3 + result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, kwdefs, closure); +#else + result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, + args, (int)nargs, + k, (int)nk, + d, (int)nd, closure); +#endif + Py_XDECREF(kwtuple); +done: + Py_LeaveRecursiveCall(); + return result; +} +#endif + +/* PyObjectCallMethO */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = __Pyx_CyOrPyCFunction_GET_FUNCTION(func); + self = __Pyx_CyOrPyCFunction_GET_SELF(func); + #if PY_MAJOR_VERSION < 3 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + #else + if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) + return NULL; + #endif + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectFastCall */ +#if PY_VERSION_HEX < 0x03090000 || CYTHON_COMPILING_IN_LIMITED_API +static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) { + PyObject *argstuple; + PyObject *result = 0; + size_t i; + argstuple = PyTuple_New((Py_ssize_t)nargs); + if (unlikely(!argstuple)) return NULL; + for (i = 0; i < nargs; i++) { + Py_INCREF(args[i]); + if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad; + } + result = __Pyx_PyObject_Call(func, argstuple, kwargs); + bad: + Py_DECREF(argstuple); + return result; +} +#endif +static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) { + Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs); +#if CYTHON_COMPILING_IN_CPYTHON + if (nargs == 0 && kwargs == NULL) { + if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_NOARGS)) + return __Pyx_PyObject_CallMethO(func, NULL); + } + else if (nargs == 1 && kwargs == NULL) { + if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_O)) + return __Pyx_PyObject_CallMethO(func, args[0]); + } +#endif + #if PY_VERSION_HEX < 0x030800B1 + #if CYTHON_FAST_PYCCALL + if (PyCFunction_Check(func)) { + if (kwargs) { + return _PyCFunction_FastCallDict(func, args, nargs, kwargs); + } else { + return _PyCFunction_FastCallKeywords(func, args, nargs, NULL); + } + } + #if PY_VERSION_HEX >= 0x030700A1 + if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) { + return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL); + } + #endif + #endif + #if CYTHON_FAST_PYCALL + if (PyFunction_Check(func)) { + return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs); + } + #endif + #endif + if (kwargs == NULL) { + #if CYTHON_VECTORCALL + #if PY_VERSION_HEX < 0x03090000 + vectorcallfunc f = _PyVectorcall_Function(func); + #else + vectorcallfunc f = PyVectorcall_Function(func); + #endif + if (f) { + return f(func, args, (size_t)nargs, NULL); + } + #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL + if (__Pyx_CyFunction_CheckExact(func)) { + __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func); + if (f) return f(func, args, (size_t)nargs, NULL); + } + #endif + } + if (nargs == 0) { + return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs); + } + #if PY_VERSION_HEX >= 0x03090000 && !CYTHON_COMPILING_IN_LIMITED_API + return PyObject_VectorcallDict(func, args, (size_t)nargs, kwargs); + #else + return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs); + #endif +} + +/* PyObjectCallOneArg */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *args[2] = {NULL, arg}; + return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); +} + +/* SliceObject */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(PyObject* obj, + Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, + int has_cstart, int has_cstop, int wraparound) { + __Pyx_TypeName obj_type_name; +#if CYTHON_USE_TYPE_SLOTS + PyMappingMethods* mp; +#if PY_MAJOR_VERSION < 3 + PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; + if (likely(ms && ms->sq_slice)) { + if (!has_cstart) { + if (_py_start && (*_py_start != Py_None)) { + cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); + if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; + } else + cstart = 0; + } + if (!has_cstop) { + if (_py_stop && (*_py_stop != Py_None)) { + cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); + if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; + } else + cstop = PY_SSIZE_T_MAX; + } + if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { + Py_ssize_t l = ms->sq_length(obj); + if (likely(l >= 0)) { + if (cstop < 0) { + cstop += l; + if (cstop < 0) cstop = 0; + } + if (cstart < 0) { + cstart += l; + if (cstart < 0) cstart = 0; + } + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + goto bad; + PyErr_Clear(); + } + } + return ms->sq_slice(obj, cstart, cstop); + } +#else + CYTHON_UNUSED_VAR(wraparound); +#endif + mp = Py_TYPE(obj)->tp_as_mapping; + if (likely(mp && mp->mp_subscript)) +#else + CYTHON_UNUSED_VAR(wraparound); +#endif + { + PyObject* result; + PyObject *py_slice, *py_start, *py_stop; + if (_py_slice) { + py_slice = *_py_slice; + } else { + PyObject* owned_start = NULL; + PyObject* owned_stop = NULL; + if (_py_start) { + py_start = *_py_start; + } else { + if (has_cstart) { + owned_start = py_start = PyInt_FromSsize_t(cstart); + if (unlikely(!py_start)) goto bad; + } else + py_start = Py_None; + } + if (_py_stop) { + py_stop = *_py_stop; + } else { + if (has_cstop) { + owned_stop = py_stop = PyInt_FromSsize_t(cstop); + if (unlikely(!py_stop)) { + Py_XDECREF(owned_start); + goto bad; + } + } else + py_stop = Py_None; + } + py_slice = PySlice_New(py_start, py_stop, Py_None); + Py_XDECREF(owned_start); + Py_XDECREF(owned_stop); + if (unlikely(!py_slice)) goto bad; + } +#if CYTHON_USE_TYPE_SLOTS + result = mp->mp_subscript(obj, py_slice); +#else + result = PyObject_GetItem(obj, py_slice); +#endif + if (!_py_slice) { + Py_DECREF(py_slice); + } + return result; + } + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "'" __Pyx_FMT_TYPENAME "' object is unsliceable", obj_type_name); + __Pyx_DECREF_TypeName(obj_type_name); +bad: + return NULL; +} + +/* GetAttr */ +static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *o, PyObject *n) { +#if CYTHON_USE_TYPE_SLOTS +#if PY_MAJOR_VERSION >= 3 + if (likely(PyUnicode_Check(n))) +#else + if (likely(PyString_Check(n))) +#endif + return __Pyx_PyObject_GetAttrStr(o, n); +#endif + return PyObject_GetAttr(o, n); +} + +/* SetItemInt */ +static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) { + int r; + if (unlikely(!j)) return -1; + r = PyObject_SetItem(o, j, v); + Py_DECREF(j); + return r; +} +static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list, + CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS + if (is_list || PyList_CheckExact(o)) { + Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o)); + if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o)))) { + PyObject* old = PyList_GET_ITEM(o, n); + Py_INCREF(v); + PyList_SET_ITEM(o, n, v); + Py_DECREF(old); + return 1; + } + } else { + PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; + PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; + if (mm && mm->mp_ass_subscript) { + int r; + PyObject *key = PyInt_FromSsize_t(i); + if (unlikely(!key)) return -1; + r = mm->mp_ass_subscript(o, key, v); + Py_DECREF(key); + return r; + } + if (likely(sm && sm->sq_ass_item)) { + if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { + Py_ssize_t l = sm->sq_length(o); + if (likely(l >= 0)) { + i += l; + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + return -1; + PyErr_Clear(); + } + } + return sm->sq_ass_item(o, i, v); + } + } +#else + if (is_list || !PyMapping_Check(o)) + { + return PySequence_SetItem(o, i, v); + } +#endif + return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v); +} + +/* HasAttr */ +static CYTHON_INLINE int __Pyx_HasAttr(PyObject *o, PyObject *n) { + PyObject *r; + if (unlikely(!__Pyx_PyBaseString_Check(n))) { + PyErr_SetString(PyExc_TypeError, + "hasattr(): attribute name must be string"); + return -1; + } + r = __Pyx_GetAttr(o, n); + if (!r) { + PyErr_Clear(); + return 0; + } else { + Py_DECREF(r); + return 1; + } +} + +/* RaiseUnboundLocalError */ +static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname) { + PyErr_Format(PyExc_UnboundLocalError, "local variable '%s' referenced before assignment", varname); +} + +/* SliceObject */ +static CYTHON_INLINE int __Pyx_PyObject_SetSlice(PyObject* obj, PyObject* value, + Py_ssize_t cstart, Py_ssize_t cstop, + PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, + int has_cstart, int has_cstop, int wraparound) { + __Pyx_TypeName obj_type_name; +#if CYTHON_USE_TYPE_SLOTS + PyMappingMethods* mp; +#if PY_MAJOR_VERSION < 3 + PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; + if (likely(ms && ms->sq_ass_slice)) { + if (!has_cstart) { + if (_py_start && (*_py_start != Py_None)) { + cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); + if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; + } else + cstart = 0; + } + if (!has_cstop) { + if (_py_stop && (*_py_stop != Py_None)) { + cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); + if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; + } else + cstop = PY_SSIZE_T_MAX; + } + if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { + Py_ssize_t l = ms->sq_length(obj); + if (likely(l >= 0)) { + if (cstop < 0) { + cstop += l; + if (cstop < 0) cstop = 0; + } + if (cstart < 0) { + cstart += l; + if (cstart < 0) cstart = 0; + } + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + goto bad; + PyErr_Clear(); + } + } + return ms->sq_ass_slice(obj, cstart, cstop, value); + } +#else + CYTHON_UNUSED_VAR(wraparound); +#endif + mp = Py_TYPE(obj)->tp_as_mapping; + if (likely(mp && mp->mp_ass_subscript)) +#else + CYTHON_UNUSED_VAR(wraparound); +#endif + { + int result; + PyObject *py_slice, *py_start, *py_stop; + if (_py_slice) { + py_slice = *_py_slice; + } else { + PyObject* owned_start = NULL; + PyObject* owned_stop = NULL; + if (_py_start) { + py_start = *_py_start; + } else { + if (has_cstart) { + owned_start = py_start = PyInt_FromSsize_t(cstart); + if (unlikely(!py_start)) goto bad; + } else + py_start = Py_None; + } + if (_py_stop) { + py_stop = *_py_stop; + } else { + if (has_cstop) { + owned_stop = py_stop = PyInt_FromSsize_t(cstop); + if (unlikely(!py_stop)) { + Py_XDECREF(owned_start); + goto bad; + } + } else + py_stop = Py_None; + } + py_slice = PySlice_New(py_start, py_stop, Py_None); + Py_XDECREF(owned_start); + Py_XDECREF(owned_stop); + if (unlikely(!py_slice)) goto bad; + } +#if CYTHON_USE_TYPE_SLOTS + result = mp->mp_ass_subscript(obj, py_slice, value); +#else + result = value ? PyObject_SetItem(obj, py_slice, value) : PyObject_DelItem(obj, py_slice); +#endif + if (!_py_slice) { + Py_DECREF(py_slice); + } + return result; + } + obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); + PyErr_Format(PyExc_TypeError, + "'" __Pyx_FMT_TYPENAME "' object does not support slice %.10s", + obj_type_name, value ? "assignment" : "deletion"); + __Pyx_DECREF_TypeName(obj_type_name); +bad: + return -1; +} + +/* PyObjectCall2Args */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2) { + PyObject *args[3] = {NULL, arg1, arg2}; + return __Pyx_PyObject_FastCall(function, args+1, 2 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); +} + +/* PyObjectGetMethod */ +static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) { + PyObject *attr; +#if CYTHON_UNPACK_METHODS && CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_PYTYPE_LOOKUP + __Pyx_TypeName type_name; + PyTypeObject *tp = Py_TYPE(obj); + PyObject *descr; + descrgetfunc f = NULL; + PyObject **dictptr, *dict; + int meth_found = 0; + assert (*method == NULL); + if (unlikely(tp->tp_getattro != PyObject_GenericGetAttr)) { + attr = __Pyx_PyObject_GetAttrStr(obj, name); + goto try_unpack; + } + if (unlikely(tp->tp_dict == NULL) && unlikely(PyType_Ready(tp) < 0)) { + return 0; + } + descr = _PyType_Lookup(tp, name); + if (likely(descr != NULL)) { + Py_INCREF(descr); +#if defined(Py_TPFLAGS_METHOD_DESCRIPTOR) && Py_TPFLAGS_METHOD_DESCRIPTOR + if (__Pyx_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR)) +#elif PY_MAJOR_VERSION >= 3 + #ifdef __Pyx_CyFunction_USED + if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type) || __Pyx_CyFunction_Check(descr))) + #else + if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type))) + #endif +#else + #ifdef __Pyx_CyFunction_USED + if (likely(PyFunction_Check(descr) || __Pyx_CyFunction_Check(descr))) + #else + if (likely(PyFunction_Check(descr))) + #endif +#endif + { + meth_found = 1; + } else { + f = Py_TYPE(descr)->tp_descr_get; + if (f != NULL && PyDescr_IsData(descr)) { + attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); + Py_DECREF(descr); + goto try_unpack; + } + } + } + dictptr = _PyObject_GetDictPtr(obj); + if (dictptr != NULL && (dict = *dictptr) != NULL) { + Py_INCREF(dict); + attr = __Pyx_PyDict_GetItemStr(dict, name); + if (attr != NULL) { + Py_INCREF(attr); + Py_DECREF(dict); + Py_XDECREF(descr); + goto try_unpack; + } + Py_DECREF(dict); + } + if (meth_found) { + *method = descr; + return 1; + } + if (f != NULL) { + attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); + Py_DECREF(descr); + goto try_unpack; + } + if (likely(descr != NULL)) { + *method = descr; + return 0; + } + type_name = __Pyx_PyType_GetName(tp); + PyErr_Format(PyExc_AttributeError, +#if PY_MAJOR_VERSION >= 3 + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", + type_name, name); +#else + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", + type_name, PyString_AS_STRING(name)); +#endif + __Pyx_DECREF_TypeName(type_name); + return 0; +#else + attr = __Pyx_PyObject_GetAttrStr(obj, name); + goto try_unpack; +#endif +try_unpack: +#if CYTHON_UNPACK_METHODS + if (likely(attr) && PyMethod_Check(attr) && likely(PyMethod_GET_SELF(attr) == obj)) { + PyObject *function = PyMethod_GET_FUNCTION(attr); + Py_INCREF(function); + Py_DECREF(attr); + *method = function; + return 1; + } +#endif + *method = attr; + return 0; +} + +/* PyObjectCallMethod1 */ +#if !(CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C00A2) +static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) { + PyObject *result = __Pyx_PyObject_CallOneArg(method, arg); + Py_DECREF(method); + return result; +} +#endif +static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) { +#if CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C00A2 + PyObject *args[2] = {obj, arg}; + (void) __Pyx_PyObject_GetMethod; + (void) __Pyx_PyObject_CallOneArg; + (void) __Pyx_PyObject_Call2Args; + return PyObject_VectorcallMethod(method_name, args, 2 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); +#else + PyObject *method = NULL, *result; + int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); + if (likely(is_method)) { + result = __Pyx_PyObject_Call2Args(method, obj, arg); + Py_DECREF(method); + return result; + } + if (unlikely(!method)) return NULL; + return __Pyx__PyObject_CallMethod1(method, arg); +#endif +} + +/* StringJoin */ +static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values) { + (void) __Pyx_PyObject_CallMethod1; +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION < 3 + return _PyString_Join(sep, values); +#elif CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 + return _PyBytes_Join(sep, values); +#else + return __Pyx_PyObject_CallMethod1(sep, __pyx_n_s_join, values); +#endif +} + +/* PyObjectSetAttrStr */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_setattro)) + return tp->tp_setattro(obj, attr_name, value); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_setattr)) + return tp->tp_setattr(obj, PyString_AS_STRING(attr_name), value); +#endif + return PyObject_SetAttr(obj, attr_name, value); +} +#endif + +/* PyObjectCallNoArg */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { + PyObject *arg[2] = {NULL, NULL}; + return __Pyx_PyObject_FastCall(func, arg + 1, 0 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); +} + +/* PyObjectCallMethod0 */ +static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) { + PyObject *method = NULL, *result = NULL; + int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); + if (likely(is_method)) { + result = __Pyx_PyObject_CallOneArg(method, obj); + Py_DECREF(method); + return result; + } + if (unlikely(!method)) goto bad; + result = __Pyx_PyObject_CallNoArg(method); + Py_DECREF(method); +bad: + return result; +} + +/* ValidateBasesTuple */ +#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS +static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases) { + Py_ssize_t i, n; +#if CYTHON_ASSUME_SAFE_MACROS + n = PyTuple_GET_SIZE(bases); +#else + n = PyTuple_Size(bases); + if (n < 0) return -1; +#endif + for (i = 1; i < n; i++) + { +#if CYTHON_AVOID_BORROWED_REFS + PyObject *b0 = PySequence_GetItem(bases, i); + if (!b0) return -1; +#elif CYTHON_ASSUME_SAFE_MACROS + PyObject *b0 = PyTuple_GET_ITEM(bases, i); +#else + PyObject *b0 = PyTuple_GetItem(bases, i); + if (!b0) return -1; +#endif + PyTypeObject *b; +#if PY_MAJOR_VERSION < 3 + if (PyClass_Check(b0)) + { + PyErr_Format(PyExc_TypeError, "base class '%.200s' is an old-style class", + PyString_AS_STRING(((PyClassObject*)b0)->cl_name)); +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + return -1; + } +#endif + b = (PyTypeObject*) b0; + if (!__Pyx_PyType_HasFeature(b, Py_TPFLAGS_HEAPTYPE)) + { + __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); + PyErr_Format(PyExc_TypeError, + "base class '" __Pyx_FMT_TYPENAME "' is not a heap type", b_name); + __Pyx_DECREF_TypeName(b_name); +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + return -1; + } + if (dictoffset == 0) + { + Py_ssize_t b_dictoffset = 0; +#if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY + b_dictoffset = b->tp_dictoffset; +#else + PyObject *py_b_dictoffset = PyObject_GetAttrString((PyObject*)b, "__dictoffset__"); + if (!py_b_dictoffset) goto dictoffset_return; + b_dictoffset = PyLong_AsSsize_t(py_b_dictoffset); + Py_DECREF(py_b_dictoffset); + if (b_dictoffset == -1 && PyErr_Occurred()) goto dictoffset_return; +#endif + if (b_dictoffset) { + { + __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); + PyErr_Format(PyExc_TypeError, + "extension type '%.200s' has no __dict__ slot, " + "but base type '" __Pyx_FMT_TYPENAME "' has: " + "either add 'cdef dict __dict__' to the extension type " + "or add '__slots__ = [...]' to the base type", + type_name, b_name); + __Pyx_DECREF_TypeName(b_name); + } +#if !(CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY) + dictoffset_return: +#endif +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + return -1; + } + } +#if CYTHON_AVOID_BORROWED_REFS + Py_DECREF(b0); +#endif + } + return 0; +} +#endif + +/* PyType_Ready */ +static int __Pyx_PyType_Ready(PyTypeObject *t) { +#if CYTHON_USE_TYPE_SPECS || !(CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API) || defined(PYSTON_MAJOR_VERSION) + (void)__Pyx_PyObject_CallMethod0; +#if CYTHON_USE_TYPE_SPECS + (void)__Pyx_validate_bases_tuple; +#endif + return PyType_Ready(t); +#else + int r; + PyObject *bases = __Pyx_PyType_GetSlot(t, tp_bases, PyObject*); + if (bases && unlikely(__Pyx_validate_bases_tuple(t->tp_name, t->tp_dictoffset, bases) == -1)) + return -1; +#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) + { + int gc_was_enabled; + #if PY_VERSION_HEX >= 0x030A00b1 + gc_was_enabled = PyGC_Disable(); + (void)__Pyx_PyObject_CallMethod0; + #else + PyObject *ret, *py_status; + PyObject *gc = NULL; + #if PY_VERSION_HEX >= 0x030700a1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM+0 >= 0x07030400) + gc = PyImport_GetModule(__pyx_kp_u_gc); + #endif + if (unlikely(!gc)) gc = PyImport_Import(__pyx_kp_u_gc); + if (unlikely(!gc)) return -1; + py_status = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_isenabled); + if (unlikely(!py_status)) { + Py_DECREF(gc); + return -1; + } + gc_was_enabled = __Pyx_PyObject_IsTrue(py_status); + Py_DECREF(py_status); + if (gc_was_enabled > 0) { + ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_disable); + if (unlikely(!ret)) { + Py_DECREF(gc); + return -1; + } + Py_DECREF(ret); + } else if (unlikely(gc_was_enabled == -1)) { + Py_DECREF(gc); + return -1; + } + #endif + t->tp_flags |= Py_TPFLAGS_HEAPTYPE; +#if PY_VERSION_HEX >= 0x030A0000 + t->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; +#endif +#else + (void)__Pyx_PyObject_CallMethod0; +#endif + r = PyType_Ready(t); +#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) + t->tp_flags &= ~Py_TPFLAGS_HEAPTYPE; + #if PY_VERSION_HEX >= 0x030A00b1 + if (gc_was_enabled) + PyGC_Enable(); + #else + if (gc_was_enabled) { + PyObject *tp, *v, *tb; + PyErr_Fetch(&tp, &v, &tb); + ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_enable); + if (likely(ret || r == -1)) { + Py_XDECREF(ret); + PyErr_Restore(tp, v, tb); + } else { + Py_XDECREF(tp); + Py_XDECREF(v); + Py_XDECREF(tb); + r = -1; + } + } + Py_DECREF(gc); + #endif + } +#endif + return r; +#endif +} + +/* PyObject_GenericGetAttrNoDict */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static PyObject *__Pyx_RaiseGenericGetAttributeError(PyTypeObject *tp, PyObject *attr_name) { + __Pyx_TypeName type_name = __Pyx_PyType_GetName(tp); + PyErr_Format(PyExc_AttributeError, +#if PY_MAJOR_VERSION >= 3 + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", + type_name, attr_name); +#else + "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", + type_name, PyString_AS_STRING(attr_name)); +#endif + __Pyx_DECREF_TypeName(type_name); + return NULL; +} +static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name) { + PyObject *descr; + PyTypeObject *tp = Py_TYPE(obj); + if (unlikely(!PyString_Check(attr_name))) { + return PyObject_GenericGetAttr(obj, attr_name); + } + assert(!tp->tp_dictoffset); + descr = _PyType_Lookup(tp, attr_name); + if (unlikely(!descr)) { + return __Pyx_RaiseGenericGetAttributeError(tp, attr_name); + } + Py_INCREF(descr); + #if PY_MAJOR_VERSION < 3 + if (likely(PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_HAVE_CLASS))) + #endif + { + descrgetfunc f = Py_TYPE(descr)->tp_descr_get; + if (unlikely(f)) { + PyObject *res = f(descr, obj, (PyObject *)tp); + Py_DECREF(descr); + return res; + } + } + return descr; +} +#endif + +/* PyObject_GenericGetAttr */ +#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 +static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name) { + if (unlikely(Py_TYPE(obj)->tp_dictoffset)) { + return PyObject_GenericGetAttr(obj, attr_name); + } + return __Pyx_PyObject_GenericGetAttrNoDict(obj, attr_name); +} +#endif + +/* SetupReduce */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __Pyx_setup_reduce_is_named(PyObject* meth, PyObject* name) { + int ret; + PyObject *name_attr; + name_attr = __Pyx_PyObject_GetAttrStrNoError(meth, __pyx_n_s_name); + if (likely(name_attr)) { + ret = PyObject_RichCompareBool(name_attr, name, Py_EQ); + } else { + ret = -1; + } + if (unlikely(ret < 0)) { + PyErr_Clear(); + ret = 0; + } + Py_XDECREF(name_attr); + return ret; +} +static int __Pyx_setup_reduce(PyObject* type_obj) { + int ret = 0; + PyObject *object_reduce = NULL; + PyObject *object_getstate = NULL; + PyObject *object_reduce_ex = NULL; + PyObject *reduce = NULL; + PyObject *reduce_ex = NULL; + PyObject *reduce_cython = NULL; + PyObject *setstate = NULL; + PyObject *setstate_cython = NULL; + PyObject *getstate = NULL; +#if CYTHON_USE_PYTYPE_LOOKUP + getstate = _PyType_Lookup((PyTypeObject*)type_obj, __pyx_n_s_getstate); +#else + getstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_getstate); + if (!getstate && PyErr_Occurred()) { + goto __PYX_BAD; + } +#endif + if (getstate) { +#if CYTHON_USE_PYTYPE_LOOKUP + object_getstate = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_getstate); +#else + object_getstate = __Pyx_PyObject_GetAttrStrNoError((PyObject*)&PyBaseObject_Type, __pyx_n_s_getstate); + if (!object_getstate && PyErr_Occurred()) { + goto __PYX_BAD; + } +#endif + if (object_getstate != getstate) { + goto __PYX_GOOD; + } + } +#if CYTHON_USE_PYTYPE_LOOKUP + object_reduce_ex = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; +#else + object_reduce_ex = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; +#endif + reduce_ex = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce_ex); if (unlikely(!reduce_ex)) goto __PYX_BAD; + if (reduce_ex == object_reduce_ex) { +#if CYTHON_USE_PYTYPE_LOOKUP + object_reduce = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; +#else + object_reduce = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; +#endif + reduce = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce); if (unlikely(!reduce)) goto __PYX_BAD; + if (reduce == object_reduce || __Pyx_setup_reduce_is_named(reduce, __pyx_n_s_reduce_cython)) { + reduce_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_reduce_cython); + if (likely(reduce_cython)) { + ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce, reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + } else if (reduce == object_reduce || PyErr_Occurred()) { + goto __PYX_BAD; + } + setstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate); + if (!setstate) PyErr_Clear(); + if (!setstate || __Pyx_setup_reduce_is_named(setstate, __pyx_n_s_setstate_cython)) { + setstate_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate_cython); + if (likely(setstate_cython)) { + ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate, setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; + } else if (!setstate || PyErr_Occurred()) { + goto __PYX_BAD; + } + } + PyType_Modified((PyTypeObject*)type_obj); + } + } + goto __PYX_GOOD; +__PYX_BAD: + if (!PyErr_Occurred()) { + __Pyx_TypeName type_obj_name = + __Pyx_PyType_GetName((PyTypeObject*)type_obj); + PyErr_Format(PyExc_RuntimeError, + "Unable to initialize pickling for " __Pyx_FMT_TYPENAME, type_obj_name); + __Pyx_DECREF_TypeName(type_obj_name); + } + ret = -1; +__PYX_GOOD: +#if !CYTHON_USE_PYTYPE_LOOKUP + Py_XDECREF(object_reduce); + Py_XDECREF(object_reduce_ex); + Py_XDECREF(object_getstate); + Py_XDECREF(getstate); +#endif + Py_XDECREF(reduce); + Py_XDECREF(reduce_ex); + Py_XDECREF(reduce_cython); + Py_XDECREF(setstate); + Py_XDECREF(setstate_cython); + return ret; +} +#endif + +/* Import */ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + PyObject *module = 0; + PyObject *empty_dict = 0; + PyObject *empty_list = 0; + #if PY_MAJOR_VERSION < 3 + PyObject *py_import; + py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); + if (unlikely(!py_import)) + goto bad; + if (!from_list) { + empty_list = PyList_New(0); + if (unlikely(!empty_list)) + goto bad; + from_list = empty_list; + } + #endif + empty_dict = PyDict_New(); + if (unlikely(!empty_dict)) + goto bad; + { + #if PY_MAJOR_VERSION >= 3 + if (level == -1) { + if (strchr(__Pyx_MODULE_NAME, '.') != NULL) { + module = PyImport_ImportModuleLevelObject( + name, __pyx_d, empty_dict, from_list, 1); + if (unlikely(!module)) { + if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) + goto bad; + PyErr_Clear(); + } + } + level = 0; + } + #endif + if (!module) { + #if PY_MAJOR_VERSION < 3 + PyObject *py_level = PyInt_FromLong(level); + if (unlikely(!py_level)) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, __pyx_d, empty_dict, from_list, level); + #endif + } + } +bad: + Py_XDECREF(empty_dict); + Py_XDECREF(empty_list); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_import); + #endif + return module; +} + +/* ImportDottedModule */ +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) { + PyObject *partial_name = NULL, *slice = NULL, *sep = NULL; + if (unlikely(PyErr_Occurred())) { + PyErr_Clear(); + } + if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) { + partial_name = name; + } else { + slice = PySequence_GetSlice(parts_tuple, 0, count); + if (unlikely(!slice)) + goto bad; + sep = PyUnicode_FromStringAndSize(".", 1); + if (unlikely(!sep)) + goto bad; + partial_name = PyUnicode_Join(sep, slice); + } + PyErr_Format( +#if PY_MAJOR_VERSION < 3 + PyExc_ImportError, + "No module named '%s'", PyString_AS_STRING(partial_name)); +#else +#if PY_VERSION_HEX >= 0x030600B1 + PyExc_ModuleNotFoundError, +#else + PyExc_ImportError, +#endif + "No module named '%U'", partial_name); +#endif +bad: + Py_XDECREF(sep); + Py_XDECREF(slice); + Py_XDECREF(partial_name); + return NULL; +} +#endif +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) { + PyObject *imported_module; +#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) + PyObject *modules = PyImport_GetModuleDict(); + if (unlikely(!modules)) + return NULL; + imported_module = __Pyx_PyDict_GetItemStr(modules, name); + Py_XINCREF(imported_module); +#else + imported_module = PyImport_GetModule(name); +#endif + return imported_module; +} +#endif +#if PY_MAJOR_VERSION >= 3 +static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) { + Py_ssize_t i, nparts; + nparts = PyTuple_GET_SIZE(parts_tuple); + for (i=1; i < nparts && module; i++) { + PyObject *part, *submodule; +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + part = PyTuple_GET_ITEM(parts_tuple, i); +#else + part = PySequence_ITEM(parts_tuple, i); +#endif + submodule = __Pyx_PyObject_GetAttrStrNoError(module, part); +#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) + Py_DECREF(part); +#endif + Py_DECREF(module); + module = submodule; + } + if (unlikely(!module)) { + return __Pyx__ImportDottedModule_Error(name, parts_tuple, i); + } + return module; +} +#endif +static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) { +#if PY_MAJOR_VERSION < 3 + PyObject *module, *from_list, *star = __pyx_n_s__6; + CYTHON_UNUSED_VAR(parts_tuple); + from_list = PyList_New(1); + if (unlikely(!from_list)) + return NULL; + Py_INCREF(star); + PyList_SET_ITEM(from_list, 0, star); + module = __Pyx_Import(name, from_list, 0); + Py_DECREF(from_list); + return module; +#else + PyObject *imported_module; + PyObject *module = __Pyx_Import(name, NULL, 0); + if (!parts_tuple || unlikely(!module)) + return module; + imported_module = __Pyx__ImportDottedModule_Lookup(name); + if (likely(imported_module)) { + Py_DECREF(module); + return imported_module; + } + PyErr_Clear(); + return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); +#endif +} +static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1 + PyObject *module = __Pyx__ImportDottedModule_Lookup(name); + if (likely(module)) { + PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec); + if (likely(spec)) { + PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing); + if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) { + Py_DECREF(spec); + spec = NULL; + } + Py_XDECREF(unsafe); + } + if (likely(!spec)) { + PyErr_Clear(); + return module; + } + Py_DECREF(spec); + Py_DECREF(module); + } else if (PyErr_Occurred()) { + PyErr_Clear(); + } +#endif + return __Pyx__ImportDottedModule(name, parts_tuple); +} + +/* ImportDottedModuleRelFirst */ +static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple) { + PyObject *module; + PyObject *from_list = NULL; +#if PY_MAJOR_VERSION < 3 + PyObject *star = __pyx_n_s__6; + from_list = PyList_New(1); + if (unlikely(!from_list)) + return NULL; + Py_INCREF(star); + PyList_SET_ITEM(from_list, 0, star); +#endif + module = __Pyx_Import(name, from_list, -1); + Py_XDECREF(from_list); + if (module) { + #if PY_MAJOR_VERSION >= 3 + if (parts_tuple) { + module = __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); + } + #endif + return module; + } + if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) + return NULL; + PyErr_Clear(); + return __Pyx_ImportDottedModule(name, parts_tuple); +} + +/* PyDictVersioning */ +#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0; +} +static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) { + PyObject **dictptr = NULL; + Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset; + if (offset) { +#if CYTHON_COMPILING_IN_CPYTHON + dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj); +#else + dictptr = _PyObject_GetDictPtr(obj); +#endif + } + return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0; +} +static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) { + PyObject *dict = Py_TYPE(obj)->tp_dict; + if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict))) + return 0; + return obj_dict_version == __Pyx_get_object_dict_version(obj); +} +#endif + +/* CLineInTraceback */ +#ifndef CYTHON_CLINE_IN_TRACEBACK +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) { + PyObject *use_cline; + PyObject *ptype, *pvalue, *ptraceback; +#if CYTHON_COMPILING_IN_CPYTHON + PyObject **cython_runtime_dict; +#endif + CYTHON_MAYBE_UNUSED_VAR(tstate); + if (unlikely(!__pyx_cython_runtime)) { + return c_line; + } + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); +#if CYTHON_COMPILING_IN_CPYTHON + cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); + if (likely(cython_runtime_dict)) { + __PYX_PY_DICT_LOOKUP_IF_MODIFIED( + use_cline, *cython_runtime_dict, + __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback)) + } else +#endif + { + PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); + if (use_cline_obj) { + use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; + Py_DECREF(use_cline_obj); + } else { + PyErr_Clear(); + use_cline = NULL; + } + } + if (!use_cline) { + c_line = 0; + (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); + } + else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) { + c_line = 0; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + return c_line; +} +#endif + +/* CodeObjectCache */ +#if !CYTHON_COMPILING_IN_LIMITED_API +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { + int start = 0, mid = 0, end = count - 1; + if (end >= 0 && code_line > entries[end].code_line) { + return count; + } + while (start < end) { + mid = start + (end - start) / 2; + if (code_line < entries[mid].code_line) { + end = mid; + } else if (code_line > entries[mid].code_line) { + start = mid + 1; + } else { + return mid; + } + } + if (code_line <= entries[mid].code_line) { + return mid; + } else { + return mid + 1; + } +} +static PyCodeObject *__pyx_find_code_object(int code_line) { + PyCodeObject* code_object; + int pos; + if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { + return NULL; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { + return NULL; + } + code_object = __pyx_code_cache.entries[pos].code_object; + Py_INCREF(code_object); + return code_object; +} +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { + int pos, i; + __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; + if (unlikely(!code_line)) { + return; + } + if (unlikely(!entries)) { + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); + if (likely(entries)) { + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = 64; + __pyx_code_cache.count = 1; + entries[0].code_line = code_line; + entries[0].code_object = code_object; + Py_INCREF(code_object); + } + return; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { + PyCodeObject* tmp = entries[pos].code_object; + entries[pos].code_object = code_object; + Py_DECREF(tmp); + return; + } + if (__pyx_code_cache.count == __pyx_code_cache.max_count) { + int new_max = __pyx_code_cache.max_count + 64; + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( + __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry)); + if (unlikely(!entries)) { + return; + } + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = new_max; + } + for (i=__pyx_code_cache.count; i>pos; i--) { + entries[i] = entries[i-1]; + } + entries[pos].code_line = code_line; + entries[pos].code_object = code_object; + __pyx_code_cache.count++; + Py_INCREF(code_object); +} +#endif + +/* AddTraceback */ +#include "compile.h" +#include "frameobject.h" +#include "traceback.h" +#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif +#if CYTHON_COMPILING_IN_LIMITED_API +static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict, + PyObject *firstlineno, PyObject *name) { + PyObject *replace = NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL; + replace = PyObject_GetAttrString(code, "replace"); + if (likely(replace)) { + PyObject *result; + result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict); + Py_DECREF(replace); + return result; + } + PyErr_Clear(); + #if __PYX_LIMITED_VERSION_HEX < 0x030780000 + { + PyObject *compiled = NULL, *result = NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL; + if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL; + compiled = Py_CompileString( + "out = type(code)(\n" + " code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n" + " code.co_flags, code.co_code, code.co_consts, code.co_names,\n" + " code.co_varnames, code.co_filename, co_name, co_firstlineno,\n" + " code.co_lnotab)\n", "", Py_file_input); + if (!compiled) return NULL; + result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict); + Py_DECREF(compiled); + if (!result) PyErr_Print(); + Py_DECREF(result); + result = PyDict_GetItemString(scratch_dict, "out"); + if (result) Py_INCREF(result); + return result; + } + #else + return NULL; + #endif +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL; + PyObject *replace = NULL, *getframe = NULL, *frame = NULL; + PyObject *exc_type, *exc_value, *exc_traceback; + int success = 0; + if (c_line) { + (void) __pyx_cfilenm; + (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line); + } + PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); + code_object = Py_CompileString("_getframe()", filename, Py_eval_input); + if (unlikely(!code_object)) goto bad; + py_py_line = PyLong_FromLong(py_line); + if (unlikely(!py_py_line)) goto bad; + py_funcname = PyUnicode_FromString(funcname); + if (unlikely(!py_funcname)) goto bad; + dict = PyDict_New(); + if (unlikely(!dict)) goto bad; + { + PyObject *old_code_object = code_object; + code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname); + Py_DECREF(old_code_object); + } + if (unlikely(!code_object)) goto bad; + getframe = PySys_GetObject("_getframe"); + if (unlikely(!getframe)) goto bad; + if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad; + frame = PyEval_EvalCode(code_object, dict, dict); + if (unlikely(!frame) || frame == Py_None) goto bad; + success = 1; + bad: + PyErr_Restore(exc_type, exc_value, exc_traceback); + Py_XDECREF(code_object); + Py_XDECREF(py_py_line); + Py_XDECREF(py_funcname); + Py_XDECREF(dict); + Py_XDECREF(replace); + if (success) { + PyTraceBack_Here( + (struct _frame*)frame); + } + Py_XDECREF(frame); +} +#else +static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( + const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = NULL; + PyObject *py_funcname = NULL; + #if PY_MAJOR_VERSION < 3 + PyObject *py_srcfile = NULL; + py_srcfile = PyString_FromString(filename); + if (!py_srcfile) goto bad; + #endif + if (c_line) { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + if (!py_funcname) goto bad; + #else + py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + if (!py_funcname) goto bad; + funcname = PyUnicode_AsUTF8(py_funcname); + if (!funcname) goto bad; + #endif + } + else { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromString(funcname); + if (!py_funcname) goto bad; + #endif + } + #if PY_MAJOR_VERSION < 3 + py_code = __Pyx_PyCode_New( + 0, + 0, + 0, + 0, + 0, + 0, + __pyx_empty_bytes, /*PyObject *code,*/ + __pyx_empty_tuple, /*PyObject *consts,*/ + __pyx_empty_tuple, /*PyObject *names,*/ + __pyx_empty_tuple, /*PyObject *varnames,*/ + __pyx_empty_tuple, /*PyObject *freevars,*/ + __pyx_empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + py_line, + __pyx_empty_bytes /*PyObject *lnotab*/ + ); + Py_DECREF(py_srcfile); + #else + py_code = PyCode_NewEmpty(filename, funcname, py_line); + #endif + Py_XDECREF(py_funcname); + return py_code; +bad: + Py_XDECREF(py_funcname); + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_srcfile); + #endif + return NULL; +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject *ptype, *pvalue, *ptraceback; + if (c_line) { + c_line = __Pyx_CLineForTraceback(tstate, c_line); + } + py_code = __pyx_find_code_object(c_line ? -c_line : py_line); + if (!py_code) { + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); + py_code = __Pyx_CreateCodeObjectForTraceback( + funcname, c_line, py_line, filename); + if (!py_code) { + /* If the code object creation fails, then we should clear the + fetched exception references and propagate the new exception */ + Py_XDECREF(ptype); + Py_XDECREF(pvalue); + Py_XDECREF(ptraceback); + goto bad; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); + } + py_frame = PyFrame_New( + tstate, /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + __Pyx_PyFrame_SetLineNumber(py_frame, py_line); + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} +#endif + +/* CIntFromPyVerify */ +#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) +#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) +#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ + {\ + func_type value = func_value;\ + if (sizeof(target_type) < sizeof(func_type)) {\ + if (unlikely(value != (func_type) (target_type) value)) {\ + func_type zero = 0;\ + if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ + return (target_type) -1;\ + if (is_unsigned && unlikely(value < zero))\ + goto raise_neg_overflow;\ + else\ + goto raise_overflow;\ + }\ + }\ + return (target_type) value;\ + } + +/* CIntFromPy */ +static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const size_t neg_one = (size_t) -1, const_zero = (size_t) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(size_t) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (size_t) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + size_t val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (size_t) -1; + val = __Pyx_PyInt_As_size_t(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(size_t, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(size_t) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) >= 2 * PyLong_SHIFT)) { + return (size_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(size_t) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) >= 3 * PyLong_SHIFT)) { + return (size_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(size_t) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) >= 4 * PyLong_SHIFT)) { + return (size_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (size_t) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(size_t) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(size_t, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(size_t) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(size_t, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(size_t, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(size_t) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { + return (size_t) (((size_t)-1)*(((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(size_t) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { + return (size_t) ((((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { + return (size_t) (((size_t)-1)*(((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(size_t) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { + return (size_t) ((((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 4 * PyLong_SHIFT)) { + return (size_t) (((size_t)-1)*(((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(size_t) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(size_t) - 1 > 4 * PyLong_SHIFT)) { + return (size_t) ((((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(size_t) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(size_t, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(size_t) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(size_t, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + size_t val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (size_t) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (size_t) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (size_t) -1; + } else { + stepval = v; + } + v = NULL; + val = (size_t) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(size_t) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((size_t) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(size_t) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((size_t) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((size_t) 1) << (sizeof(size_t) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (size_t) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to size_t"); + return (size_t) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to size_t"); + return (size_t) -1; +} + +/* CIntToPy */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(long) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(long) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(long), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* CIntToPy */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(int) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(int) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(int) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(int), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(int)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* CIntFromPy */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const int neg_one = (int) -1, const_zero = (int) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(int) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (int) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + int val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (int) -1; + val = __Pyx_PyInt_As_int(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) { + return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) { + return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) { + return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(int) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { + return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { + return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(int) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + int val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (int) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (int) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (int) -1; + } else { + stepval = v; + } + v = NULL; + val = (int) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((int) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((int) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (int) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to int"); + return (int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; +} + +/* CIntFromPy */ +static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const char neg_one = (char) -1, const_zero = (char) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(char) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(char, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (char) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + char val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (char) -1; + val = __Pyx_PyInt_As_char(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(char, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(char) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) >= 2 * PyLong_SHIFT)) { + return (char) (((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(char) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) >= 3 * PyLong_SHIFT)) { + return (char) (((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(char) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) >= 4 * PyLong_SHIFT)) { + return (char) (((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (char) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(char) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(char, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(char) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(char, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(char, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(char) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { + return (char) (((char)-1)*(((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(char) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { + return (char) ((((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { + return (char) (((char)-1)*(((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(char) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { + return (char) ((((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 4 * PyLong_SHIFT)) { + return (char) (((char)-1)*(((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(char) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(char) - 1 > 4 * PyLong_SHIFT)) { + return (char) ((((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(char) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(char, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(char) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(char, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + char val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (char) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (char) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (char) -1; + } else { + stepval = v; + } + v = NULL; + val = (char) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(char) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((char) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(char) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((char) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((char) 1) << (sizeof(char) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (char) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to char"); + return (char) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to char"); + return (char) -1; +} + +/* CIntToPy */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_char(char value) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const char neg_one = (char) -1, const_zero = (char) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(char) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(char) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(char) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(char) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(char) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + unsigned char *bytes = (unsigned char *)&value; +#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 + if (is_unsigned) { + return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); + } else { + return PyLong_FromNativeBytes(bytes, sizeof(value), -1); + } +#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 + int one = 1; int little = (int)*(unsigned char *)&one; + return _PyLong_FromByteArray(bytes, sizeof(char), + little, !is_unsigned); +#else + int one = 1; int little = (int)*(unsigned char *)&one; + PyObject *from_bytes, *result = NULL; + PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; + from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); + if (!from_bytes) return NULL; + py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(char)); + if (!py_bytes) goto limited_bad; + order_str = PyUnicode_FromString(little ? "little" : "big"); + if (!order_str) goto limited_bad; + arg_tuple = PyTuple_Pack(2, py_bytes, order_str); + if (!arg_tuple) goto limited_bad; + if (!is_unsigned) { + kwds = PyDict_New(); + if (!kwds) goto limited_bad; + if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; + } + result = PyObject_Call(from_bytes, arg_tuple, kwds); + limited_bad: + Py_XDECREF(kwds); + Py_XDECREF(arg_tuple); + Py_XDECREF(order_str); + Py_XDECREF(py_bytes); + Py_XDECREF(from_bytes); + return result; +#endif + } +} + +/* FormatTypeName */ +#if CYTHON_COMPILING_IN_LIMITED_API +static __Pyx_TypeName +__Pyx_PyType_GetName(PyTypeObject* tp) +{ + PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp, + __pyx_n_s_name); + if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) { + PyErr_Clear(); + Py_XDECREF(name); + name = __Pyx_NewRef(__pyx_n_s__13); + } + return name; +} +#endif + +/* CIntFromPy */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif + const long neg_one = (long) -1, const_zero = (long) 0; +#ifdef __Pyx_HAS_GCC_DIAGNOSTIC +#pragma GCC diagnostic pop +#endif + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if ((sizeof(long) < sizeof(long))) { + __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (long) val; + } + } +#endif + if (unlikely(!PyLong_Check(x))) { + long val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (long) -1; + val = __Pyx_PyInt_As_long(tmp); + Py_DECREF(tmp); + return val; + } + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + if (unlikely(__Pyx_PyLong_IsNeg(x))) { + goto raise_neg_overflow; + } else if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_DigitCount(x)) { + case 2: + if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) { + return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 3: + if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) { + return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 4: + if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) { + return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + } + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (long) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if ((sizeof(long) <= sizeof(unsigned long))) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + if (__Pyx_PyLong_IsCompact(x)) { + __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) + } else { + const digit* digits = __Pyx_PyLong_Digits(x); + assert(__Pyx_PyLong_DigitCount(x) > 1); + switch (__Pyx_PyLong_SignedDigitCount(x)) { + case -2: + if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 2: + if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -3: + if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 3: + if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -4: + if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { + return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 4: + if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { + if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { + return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + } + } +#endif + if ((sizeof(long) <= sizeof(long))) { + __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) { + __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { + long val; + int ret = -1; +#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API + Py_ssize_t bytes_copied = PyLong_AsNativeBytes( + x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); + if (unlikely(bytes_copied == -1)) { + } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { + goto raise_overflow; + } else { + ret = 0; + } +#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + ret = _PyLong_AsByteArray((PyLongObject *)x, + bytes, sizeof(val), + is_little, !is_unsigned); +#else + PyObject *v; + PyObject *stepval = NULL, *mask = NULL, *shift = NULL; + int bits, remaining_bits, is_negative = 0; + int chunk_size = (sizeof(long) < 8) ? 30 : 62; + if (likely(PyLong_CheckExact(x))) { + v = __Pyx_NewRef(x); + } else { + v = PyNumber_Long(x); + if (unlikely(!v)) return (long) -1; + assert(PyLong_CheckExact(v)); + } + { + int result = PyObject_RichCompareBool(v, Py_False, Py_LT); + if (unlikely(result < 0)) { + Py_DECREF(v); + return (long) -1; + } + is_negative = result == 1; + } + if (is_unsigned && unlikely(is_negative)) { + Py_DECREF(v); + goto raise_neg_overflow; + } else if (is_negative) { + stepval = PyNumber_Invert(v); + Py_DECREF(v); + if (unlikely(!stepval)) + return (long) -1; + } else { + stepval = v; + } + v = NULL; + val = (long) 0; + mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; + shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; + for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) { + PyObject *tmp, *digit; + long idigit; + digit = PyNumber_And(stepval, mask); + if (unlikely(!digit)) goto done; + idigit = PyLong_AsLong(digit); + Py_DECREF(digit); + if (unlikely(idigit < 0)) goto done; + val |= ((long) idigit) << bits; + tmp = PyNumber_Rshift(stepval, shift); + if (unlikely(!tmp)) goto done; + Py_DECREF(stepval); stepval = tmp; + } + Py_DECREF(shift); shift = NULL; + Py_DECREF(mask); mask = NULL; + { + long idigit = PyLong_AsLong(stepval); + if (unlikely(idigit < 0)) goto done; + remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1); + if (unlikely(idigit >= (1L << remaining_bits))) + goto raise_overflow; + val |= ((long) idigit) << bits; + } + if (!is_unsigned) { + if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1)))) + goto raise_overflow; + if (is_negative) + val = ~val; + } + ret = 0; + done: + Py_XDECREF(shift); + Py_XDECREF(mask); + Py_XDECREF(stepval); +#endif + if (unlikely(ret)) + return (long) -1; + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to long"); + return (long) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; +} + +/* SwapException */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + PyObject *tmp_type, *tmp_value, *tmp_tb; + #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 + _PyErr_StackItem *exc_info = tstate->exc_info; + tmp_value = exc_info->exc_value; + exc_info->exc_value = *value; + if (tmp_value == NULL || tmp_value == Py_None) { + Py_XDECREF(tmp_value); + tmp_value = NULL; + tmp_type = NULL; + tmp_tb = NULL; + } else { + tmp_type = (PyObject*) Py_TYPE(tmp_value); + Py_INCREF(tmp_type); + #if CYTHON_COMPILING_IN_CPYTHON + tmp_tb = ((PyBaseExceptionObject*) tmp_value)->traceback; + Py_XINCREF(tmp_tb); + #else + tmp_tb = PyException_GetTraceback(tmp_value); + #endif + } + #elif CYTHON_USE_EXC_INFO_STACK + _PyErr_StackItem *exc_info = tstate->exc_info; + tmp_type = exc_info->exc_type; + tmp_value = exc_info->exc_value; + tmp_tb = exc_info->exc_traceback; + exc_info->exc_type = *type; + exc_info->exc_value = *value; + exc_info->exc_traceback = *tb; + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = *type; + tstate->exc_value = *value; + tstate->exc_traceback = *tb; + #endif + *type = tmp_type; + *value = tmp_value; + *tb = tmp_tb; +} +#else +static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb) { + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_GetExcInfo(&tmp_type, &tmp_value, &tmp_tb); + PyErr_SetExcInfo(*type, *value, *tb); + *type = tmp_type; + *value = tmp_value; + *tb = tmp_tb; +} +#endif + +/* CoroutineBase */ +#include +#if PY_VERSION_HEX >= 0x030b00a6 + #ifndef Py_BUILD_CORE + #define Py_BUILD_CORE 1 + #endif + #include "internal/pycore_frame.h" +#endif +#define __Pyx_Coroutine_Undelegate(gen) Py_CLEAR((gen)->yieldfrom) +static int __Pyx_PyGen__FetchStopIterationValue(PyThreadState *__pyx_tstate, PyObject **pvalue) { + PyObject *et, *ev, *tb; + PyObject *value = NULL; + CYTHON_UNUSED_VAR(__pyx_tstate); + __Pyx_ErrFetch(&et, &ev, &tb); + if (!et) { + Py_XDECREF(tb); + Py_XDECREF(ev); + Py_INCREF(Py_None); + *pvalue = Py_None; + return 0; + } + if (likely(et == PyExc_StopIteration)) { + if (!ev) { + Py_INCREF(Py_None); + value = Py_None; + } +#if PY_VERSION_HEX >= 0x030300A0 + else if (likely(__Pyx_IS_TYPE(ev, (PyTypeObject*)PyExc_StopIteration))) { + value = ((PyStopIterationObject *)ev)->value; + Py_INCREF(value); + Py_DECREF(ev); + } +#endif + else if (unlikely(PyTuple_Check(ev))) { + if (PyTuple_GET_SIZE(ev) >= 1) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + value = PyTuple_GET_ITEM(ev, 0); + Py_INCREF(value); +#else + value = PySequence_ITEM(ev, 0); +#endif + } else { + Py_INCREF(Py_None); + value = Py_None; + } + Py_DECREF(ev); + } + else if (!__Pyx_TypeCheck(ev, (PyTypeObject*)PyExc_StopIteration)) { + value = ev; + } + if (likely(value)) { + Py_XDECREF(tb); + Py_DECREF(et); + *pvalue = value; + return 0; + } + } else if (!__Pyx_PyErr_GivenExceptionMatches(et, PyExc_StopIteration)) { + __Pyx_ErrRestore(et, ev, tb); + return -1; + } + PyErr_NormalizeException(&et, &ev, &tb); + if (unlikely(!PyObject_TypeCheck(ev, (PyTypeObject*)PyExc_StopIteration))) { + __Pyx_ErrRestore(et, ev, tb); + return -1; + } + Py_XDECREF(tb); + Py_DECREF(et); +#if PY_VERSION_HEX >= 0x030300A0 + value = ((PyStopIterationObject *)ev)->value; + Py_INCREF(value); + Py_DECREF(ev); +#else + { + PyObject* args = __Pyx_PyObject_GetAttrStr(ev, __pyx_n_s_args); + Py_DECREF(ev); + if (likely(args)) { + value = PySequence_GetItem(args, 0); + Py_DECREF(args); + } + if (unlikely(!value)) { + __Pyx_ErrRestore(NULL, NULL, NULL); + Py_INCREF(Py_None); + value = Py_None; + } + } +#endif + *pvalue = value; + return 0; +} +static CYTHON_INLINE +void __Pyx_Coroutine_ExceptionClear(__Pyx_ExcInfoStruct *exc_state) { +#if PY_VERSION_HEX >= 0x030B00a4 + Py_CLEAR(exc_state->exc_value); +#else + PyObject *t, *v, *tb; + t = exc_state->exc_type; + v = exc_state->exc_value; + tb = exc_state->exc_traceback; + exc_state->exc_type = NULL; + exc_state->exc_value = NULL; + exc_state->exc_traceback = NULL; + Py_XDECREF(t); + Py_XDECREF(v); + Py_XDECREF(tb); +#endif +} +#define __Pyx_Coroutine_AlreadyRunningError(gen) (__Pyx__Coroutine_AlreadyRunningError(gen), (PyObject*)NULL) +static void __Pyx__Coroutine_AlreadyRunningError(__pyx_CoroutineObject *gen) { + const char *msg; + CYTHON_MAYBE_UNUSED_VAR(gen); + if ((0)) { + #ifdef __Pyx_Coroutine_USED + } else if (__Pyx_Coroutine_Check((PyObject*)gen)) { + msg = "coroutine already executing"; + #endif + #ifdef __Pyx_AsyncGen_USED + } else if (__Pyx_AsyncGen_CheckExact((PyObject*)gen)) { + msg = "async generator already executing"; + #endif + } else { + msg = "generator already executing"; + } + PyErr_SetString(PyExc_ValueError, msg); +} +#define __Pyx_Coroutine_NotStartedError(gen) (__Pyx__Coroutine_NotStartedError(gen), (PyObject*)NULL) +static void __Pyx__Coroutine_NotStartedError(PyObject *gen) { + const char *msg; + CYTHON_MAYBE_UNUSED_VAR(gen); + if ((0)) { + #ifdef __Pyx_Coroutine_USED + } else if (__Pyx_Coroutine_Check(gen)) { + msg = "can't send non-None value to a just-started coroutine"; + #endif + #ifdef __Pyx_AsyncGen_USED + } else if (__Pyx_AsyncGen_CheckExact(gen)) { + msg = "can't send non-None value to a just-started async generator"; + #endif + } else { + msg = "can't send non-None value to a just-started generator"; + } + PyErr_SetString(PyExc_TypeError, msg); +} +#define __Pyx_Coroutine_AlreadyTerminatedError(gen, value, closing) (__Pyx__Coroutine_AlreadyTerminatedError(gen, value, closing), (PyObject*)NULL) +static void __Pyx__Coroutine_AlreadyTerminatedError(PyObject *gen, PyObject *value, int closing) { + CYTHON_MAYBE_UNUSED_VAR(gen); + CYTHON_MAYBE_UNUSED_VAR(closing); + #ifdef __Pyx_Coroutine_USED + if (!closing && __Pyx_Coroutine_Check(gen)) { + PyErr_SetString(PyExc_RuntimeError, "cannot reuse already awaited coroutine"); + } else + #endif + if (value) { + #ifdef __Pyx_AsyncGen_USED + if (__Pyx_AsyncGen_CheckExact(gen)) + PyErr_SetNone(__Pyx_PyExc_StopAsyncIteration); + else + #endif + PyErr_SetNone(PyExc_StopIteration); + } +} +static +PyObject *__Pyx_Coroutine_SendEx(__pyx_CoroutineObject *self, PyObject *value, int closing) { + __Pyx_PyThreadState_declare + PyThreadState *tstate; + __Pyx_ExcInfoStruct *exc_state; + PyObject *retval; + assert(!self->is_running); + if (unlikely(self->resume_label == 0)) { + if (unlikely(value && value != Py_None)) { + return __Pyx_Coroutine_NotStartedError((PyObject*)self); + } + } + if (unlikely(self->resume_label == -1)) { + return __Pyx_Coroutine_AlreadyTerminatedError((PyObject*)self, value, closing); + } +#if CYTHON_FAST_THREAD_STATE + __Pyx_PyThreadState_assign + tstate = __pyx_tstate; +#else + tstate = __Pyx_PyThreadState_Current; +#endif + exc_state = &self->gi_exc_state; + if (exc_state->exc_value) { + #if CYTHON_COMPILING_IN_PYPY + #else + PyObject *exc_tb; + #if PY_VERSION_HEX >= 0x030B00a4 && !CYTHON_COMPILING_IN_CPYTHON + exc_tb = PyException_GetTraceback(exc_state->exc_value); + #elif PY_VERSION_HEX >= 0x030B00a4 + exc_tb = ((PyBaseExceptionObject*) exc_state->exc_value)->traceback; + #else + exc_tb = exc_state->exc_traceback; + #endif + if (exc_tb) { + PyTracebackObject *tb = (PyTracebackObject *) exc_tb; + PyFrameObject *f = tb->tb_frame; + assert(f->f_back == NULL); + #if PY_VERSION_HEX >= 0x030B00A1 + f->f_back = PyThreadState_GetFrame(tstate); + #else + Py_XINCREF(tstate->frame); + f->f_back = tstate->frame; + #endif + #if PY_VERSION_HEX >= 0x030B00a4 && !CYTHON_COMPILING_IN_CPYTHON + Py_DECREF(exc_tb); + #endif + } + #endif + } +#if CYTHON_USE_EXC_INFO_STACK + exc_state->previous_item = tstate->exc_info; + tstate->exc_info = exc_state; +#else + if (exc_state->exc_type) { + __Pyx_ExceptionSwap(&exc_state->exc_type, &exc_state->exc_value, &exc_state->exc_traceback); + } else { + __Pyx_Coroutine_ExceptionClear(exc_state); + __Pyx_ExceptionSave(&exc_state->exc_type, &exc_state->exc_value, &exc_state->exc_traceback); + } +#endif + self->is_running = 1; + retval = self->body(self, tstate, value); + self->is_running = 0; +#if CYTHON_USE_EXC_INFO_STACK + exc_state = &self->gi_exc_state; + tstate->exc_info = exc_state->previous_item; + exc_state->previous_item = NULL; + __Pyx_Coroutine_ResetFrameBackpointer(exc_state); +#endif + return retval; +} +static CYTHON_INLINE void __Pyx_Coroutine_ResetFrameBackpointer(__Pyx_ExcInfoStruct *exc_state) { +#if CYTHON_COMPILING_IN_PYPY + CYTHON_UNUSED_VAR(exc_state); +#else + PyObject *exc_tb; + #if PY_VERSION_HEX >= 0x030B00a4 + if (!exc_state->exc_value) return; + exc_tb = PyException_GetTraceback(exc_state->exc_value); + #else + exc_tb = exc_state->exc_traceback; + #endif + if (likely(exc_tb)) { + PyTracebackObject *tb = (PyTracebackObject *) exc_tb; + PyFrameObject *f = tb->tb_frame; + Py_CLEAR(f->f_back); + #if PY_VERSION_HEX >= 0x030B00a4 + Py_DECREF(exc_tb); + #endif + } +#endif +} +static CYTHON_INLINE +PyObject *__Pyx_Coroutine_MethodReturn(PyObject* gen, PyObject *retval) { + CYTHON_MAYBE_UNUSED_VAR(gen); + if (unlikely(!retval)) { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + if (!__Pyx_PyErr_Occurred()) { + PyObject *exc = PyExc_StopIteration; + #ifdef __Pyx_AsyncGen_USED + if (__Pyx_AsyncGen_CheckExact(gen)) + exc = __Pyx_PyExc_StopAsyncIteration; + #endif + __Pyx_PyErr_SetNone(exc); + } + } + return retval; +} +#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) +static CYTHON_INLINE +PyObject *__Pyx_PyGen_Send(PyGenObject *gen, PyObject *arg) { +#if PY_VERSION_HEX <= 0x030A00A1 + return _PyGen_Send(gen, arg); +#else + PyObject *result; + if (PyIter_Send((PyObject*)gen, arg ? arg : Py_None, &result) == PYGEN_RETURN) { + if (PyAsyncGen_CheckExact(gen)) { + assert(result == Py_None); + PyErr_SetNone(PyExc_StopAsyncIteration); + } + else if (result == Py_None) { + PyErr_SetNone(PyExc_StopIteration); + } + else { +#if PY_VERSION_HEX < 0x030d00A1 + _PyGen_SetStopIterationValue(result); +#else + if (!PyTuple_Check(result) && !PyExceptionInstance_Check(result)) { + PyErr_SetObject(PyExc_StopIteration, result); + } else { + PyObject *exc = __Pyx_PyObject_CallOneArg(PyExc_StopIteration, result); + if (likely(exc != NULL)) { + PyErr_SetObject(PyExc_StopIteration, exc); + Py_DECREF(exc); + } + } +#endif + } + Py_DECREF(result); + result = NULL; + } + return result; +#endif +} +#endif +static CYTHON_INLINE +PyObject *__Pyx_Coroutine_FinishDelegation(__pyx_CoroutineObject *gen) { + PyObject *ret; + PyObject *val = NULL; + __Pyx_Coroutine_Undelegate(gen); + __Pyx_PyGen__FetchStopIterationValue(__Pyx_PyThreadState_Current, &val); + ret = __Pyx_Coroutine_SendEx(gen, val, 0); + Py_XDECREF(val); + return ret; +} +static PyObject *__Pyx_Coroutine_Send(PyObject *self, PyObject *value) { + PyObject *retval; + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject*) self; + PyObject *yf = gen->yieldfrom; + if (unlikely(gen->is_running)) + return __Pyx_Coroutine_AlreadyRunningError(gen); + if (yf) { + PyObject *ret; + gen->is_running = 1; + #ifdef __Pyx_Generator_USED + if (__Pyx_Generator_CheckExact(yf)) { + ret = __Pyx_Coroutine_Send(yf, value); + } else + #endif + #ifdef __Pyx_Coroutine_USED + if (__Pyx_Coroutine_Check(yf)) { + ret = __Pyx_Coroutine_Send(yf, value); + } else + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_PyAsyncGenASend_CheckExact(yf)) { + ret = __Pyx_async_gen_asend_send(yf, value); + } else + #endif + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) + if (PyGen_CheckExact(yf)) { + ret = __Pyx_PyGen_Send((PyGenObject*)yf, value == Py_None ? NULL : value); + } else + #endif + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03050000 && defined(PyCoro_CheckExact) && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) + if (PyCoro_CheckExact(yf)) { + ret = __Pyx_PyGen_Send((PyGenObject*)yf, value == Py_None ? NULL : value); + } else + #endif + { + if (value == Py_None) + ret = __Pyx_PyObject_GetIterNextFunc(yf)(yf); + else + ret = __Pyx_PyObject_CallMethod1(yf, __pyx_n_s_send, value); + } + gen->is_running = 0; + if (likely(ret)) { + return ret; + } + retval = __Pyx_Coroutine_FinishDelegation(gen); + } else { + retval = __Pyx_Coroutine_SendEx(gen, value, 0); + } + return __Pyx_Coroutine_MethodReturn(self, retval); +} +static int __Pyx_Coroutine_CloseIter(__pyx_CoroutineObject *gen, PyObject *yf) { + PyObject *retval = NULL; + int err = 0; + #ifdef __Pyx_Generator_USED + if (__Pyx_Generator_CheckExact(yf)) { + retval = __Pyx_Coroutine_Close(yf); + if (!retval) + return -1; + } else + #endif + #ifdef __Pyx_Coroutine_USED + if (__Pyx_Coroutine_Check(yf)) { + retval = __Pyx_Coroutine_Close(yf); + if (!retval) + return -1; + } else + if (__Pyx_CoroutineAwait_CheckExact(yf)) { + retval = __Pyx_CoroutineAwait_Close((__pyx_CoroutineAwaitObject*)yf, NULL); + if (!retval) + return -1; + } else + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_PyAsyncGenASend_CheckExact(yf)) { + retval = __Pyx_async_gen_asend_close(yf, NULL); + } else + if (__pyx_PyAsyncGenAThrow_CheckExact(yf)) { + retval = __Pyx_async_gen_athrow_close(yf, NULL); + } else + #endif + { + PyObject *meth; + gen->is_running = 1; + meth = __Pyx_PyObject_GetAttrStrNoError(yf, __pyx_n_s_close); + if (unlikely(!meth)) { + if (unlikely(PyErr_Occurred())) { + PyErr_WriteUnraisable(yf); + } + } else { + retval = __Pyx_PyObject_CallNoArg(meth); + Py_DECREF(meth); + if (unlikely(!retval)) + err = -1; + } + gen->is_running = 0; + } + Py_XDECREF(retval); + return err; +} +static PyObject *__Pyx_Generator_Next(PyObject *self) { + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject*) self; + PyObject *yf = gen->yieldfrom; + if (unlikely(gen->is_running)) + return __Pyx_Coroutine_AlreadyRunningError(gen); + if (yf) { + PyObject *ret; + gen->is_running = 1; + #ifdef __Pyx_Generator_USED + if (__Pyx_Generator_CheckExact(yf)) { + ret = __Pyx_Generator_Next(yf); + } else + #endif + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) + if (PyGen_CheckExact(yf)) { + ret = __Pyx_PyGen_Send((PyGenObject*)yf, NULL); + } else + #endif + #ifdef __Pyx_Coroutine_USED + if (__Pyx_Coroutine_Check(yf)) { + ret = __Pyx_Coroutine_Send(yf, Py_None); + } else + #endif + ret = __Pyx_PyObject_GetIterNextFunc(yf)(yf); + gen->is_running = 0; + if (likely(ret)) { + return ret; + } + return __Pyx_Coroutine_FinishDelegation(gen); + } + return __Pyx_Coroutine_SendEx(gen, Py_None, 0); +} +static PyObject *__Pyx_Coroutine_Close_Method(PyObject *self, PyObject *arg) { + CYTHON_UNUSED_VAR(arg); + return __Pyx_Coroutine_Close(self); +} +static PyObject *__Pyx_Coroutine_Close(PyObject *self) { + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; + PyObject *retval, *raised_exception; + PyObject *yf = gen->yieldfrom; + int err = 0; + if (unlikely(gen->is_running)) + return __Pyx_Coroutine_AlreadyRunningError(gen); + if (yf) { + Py_INCREF(yf); + err = __Pyx_Coroutine_CloseIter(gen, yf); + __Pyx_Coroutine_Undelegate(gen); + Py_DECREF(yf); + } + if (err == 0) + PyErr_SetNone(PyExc_GeneratorExit); + retval = __Pyx_Coroutine_SendEx(gen, NULL, 1); + if (unlikely(retval)) { + const char *msg; + Py_DECREF(retval); + if ((0)) { + #ifdef __Pyx_Coroutine_USED + } else if (__Pyx_Coroutine_Check(self)) { + msg = "coroutine ignored GeneratorExit"; + #endif + #ifdef __Pyx_AsyncGen_USED + } else if (__Pyx_AsyncGen_CheckExact(self)) { +#if PY_VERSION_HEX < 0x03060000 + msg = "async generator ignored GeneratorExit - might require Python 3.6+ finalisation (PEP 525)"; +#else + msg = "async generator ignored GeneratorExit"; +#endif + #endif + } else { + msg = "generator ignored GeneratorExit"; + } + PyErr_SetString(PyExc_RuntimeError, msg); + return NULL; + } + raised_exception = PyErr_Occurred(); + if (likely(!raised_exception || __Pyx_PyErr_GivenExceptionMatches2(raised_exception, PyExc_GeneratorExit, PyExc_StopIteration))) { + if (raised_exception) PyErr_Clear(); + Py_INCREF(Py_None); + return Py_None; + } + return NULL; +} +static PyObject *__Pyx__Coroutine_Throw(PyObject *self, PyObject *typ, PyObject *val, PyObject *tb, + PyObject *args, int close_on_genexit) { + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; + PyObject *yf = gen->yieldfrom; + if (unlikely(gen->is_running)) + return __Pyx_Coroutine_AlreadyRunningError(gen); + if (yf) { + PyObject *ret; + Py_INCREF(yf); + if (__Pyx_PyErr_GivenExceptionMatches(typ, PyExc_GeneratorExit) && close_on_genexit) { + int err = __Pyx_Coroutine_CloseIter(gen, yf); + Py_DECREF(yf); + __Pyx_Coroutine_Undelegate(gen); + if (err < 0) + return __Pyx_Coroutine_MethodReturn(self, __Pyx_Coroutine_SendEx(gen, NULL, 0)); + goto throw_here; + } + gen->is_running = 1; + if (0 + #ifdef __Pyx_Generator_USED + || __Pyx_Generator_CheckExact(yf) + #endif + #ifdef __Pyx_Coroutine_USED + || __Pyx_Coroutine_Check(yf) + #endif + ) { + ret = __Pyx__Coroutine_Throw(yf, typ, val, tb, args, close_on_genexit); + #ifdef __Pyx_Coroutine_USED + } else if (__Pyx_CoroutineAwait_CheckExact(yf)) { + ret = __Pyx__Coroutine_Throw(((__pyx_CoroutineAwaitObject*)yf)->coroutine, typ, val, tb, args, close_on_genexit); + #endif + } else { + PyObject *meth = __Pyx_PyObject_GetAttrStrNoError(yf, __pyx_n_s_throw); + if (unlikely(!meth)) { + Py_DECREF(yf); + if (unlikely(PyErr_Occurred())) { + gen->is_running = 0; + return NULL; + } + __Pyx_Coroutine_Undelegate(gen); + gen->is_running = 0; + goto throw_here; + } + if (likely(args)) { + ret = __Pyx_PyObject_Call(meth, args, NULL); + } else { + PyObject *cargs[4] = {NULL, typ, val, tb}; + ret = __Pyx_PyObject_FastCall(meth, cargs+1, 3 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); + } + Py_DECREF(meth); + } + gen->is_running = 0; + Py_DECREF(yf); + if (!ret) { + ret = __Pyx_Coroutine_FinishDelegation(gen); + } + return __Pyx_Coroutine_MethodReturn(self, ret); + } +throw_here: + __Pyx_Raise(typ, val, tb, NULL); + return __Pyx_Coroutine_MethodReturn(self, __Pyx_Coroutine_SendEx(gen, NULL, 0)); +} +static PyObject *__Pyx_Coroutine_Throw(PyObject *self, PyObject *args) { + PyObject *typ; + PyObject *val = NULL; + PyObject *tb = NULL; + if (unlikely(!PyArg_UnpackTuple(args, (char *)"throw", 1, 3, &typ, &val, &tb))) + return NULL; + return __Pyx__Coroutine_Throw(self, typ, val, tb, args, 1); +} +static CYTHON_INLINE int __Pyx_Coroutine_traverse_excstate(__Pyx_ExcInfoStruct *exc_state, visitproc visit, void *arg) { +#if PY_VERSION_HEX >= 0x030B00a4 + Py_VISIT(exc_state->exc_value); +#else + Py_VISIT(exc_state->exc_type); + Py_VISIT(exc_state->exc_value); + Py_VISIT(exc_state->exc_traceback); +#endif + return 0; +} +static int __Pyx_Coroutine_traverse(__pyx_CoroutineObject *gen, visitproc visit, void *arg) { + Py_VISIT(gen->closure); + Py_VISIT(gen->classobj); + Py_VISIT(gen->yieldfrom); + return __Pyx_Coroutine_traverse_excstate(&gen->gi_exc_state, visit, arg); +} +static int __Pyx_Coroutine_clear(PyObject *self) { + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; + Py_CLEAR(gen->closure); + Py_CLEAR(gen->classobj); + Py_CLEAR(gen->yieldfrom); + __Pyx_Coroutine_ExceptionClear(&gen->gi_exc_state); +#ifdef __Pyx_AsyncGen_USED + if (__Pyx_AsyncGen_CheckExact(self)) { + Py_CLEAR(((__pyx_PyAsyncGenObject*)gen)->ag_finalizer); + } +#endif + Py_CLEAR(gen->gi_code); + Py_CLEAR(gen->gi_frame); + Py_CLEAR(gen->gi_name); + Py_CLEAR(gen->gi_qualname); + Py_CLEAR(gen->gi_modulename); + return 0; +} +static void __Pyx_Coroutine_dealloc(PyObject *self) { + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; + PyObject_GC_UnTrack(gen); + if (gen->gi_weakreflist != NULL) + PyObject_ClearWeakRefs(self); + if (gen->resume_label >= 0) { + PyObject_GC_Track(self); +#if PY_VERSION_HEX >= 0x030400a1 && CYTHON_USE_TP_FINALIZE + if (unlikely(PyObject_CallFinalizerFromDealloc(self))) +#else + Py_TYPE(gen)->tp_del(self); + if (unlikely(Py_REFCNT(self) > 0)) +#endif + { + return; + } + PyObject_GC_UnTrack(self); + } +#ifdef __Pyx_AsyncGen_USED + if (__Pyx_AsyncGen_CheckExact(self)) { + /* We have to handle this case for asynchronous generators + right here, because this code has to be between UNTRACK + and GC_Del. */ + Py_CLEAR(((__pyx_PyAsyncGenObject*)self)->ag_finalizer); + } +#endif + __Pyx_Coroutine_clear(self); + __Pyx_PyHeapTypeObject_GC_Del(gen); +} +static void __Pyx_Coroutine_del(PyObject *self) { + PyObject *error_type, *error_value, *error_traceback; + __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; + __Pyx_PyThreadState_declare + if (gen->resume_label < 0) { + return; + } +#if !CYTHON_USE_TP_FINALIZE + assert(self->ob_refcnt == 0); + __Pyx_SET_REFCNT(self, 1); +#endif + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&error_type, &error_value, &error_traceback); +#ifdef __Pyx_AsyncGen_USED + if (__Pyx_AsyncGen_CheckExact(self)) { + __pyx_PyAsyncGenObject *agen = (__pyx_PyAsyncGenObject*)self; + PyObject *finalizer = agen->ag_finalizer; + if (finalizer && !agen->ag_closed) { + PyObject *res = __Pyx_PyObject_CallOneArg(finalizer, self); + if (unlikely(!res)) { + PyErr_WriteUnraisable(self); + } else { + Py_DECREF(res); + } + __Pyx_ErrRestore(error_type, error_value, error_traceback); + return; + } + } +#endif + if (unlikely(gen->resume_label == 0 && !error_value)) { +#ifdef __Pyx_Coroutine_USED +#ifdef __Pyx_Generator_USED + if (!__Pyx_Generator_CheckExact(self)) +#endif + { + PyObject_GC_UnTrack(self); +#if PY_MAJOR_VERSION >= 3 || defined(PyErr_WarnFormat) + if (unlikely(PyErr_WarnFormat(PyExc_RuntimeWarning, 1, "coroutine '%.50S' was never awaited", gen->gi_qualname) < 0)) + PyErr_WriteUnraisable(self); +#else + {PyObject *msg; + char *cmsg; + #if CYTHON_COMPILING_IN_PYPY + msg = NULL; + cmsg = (char*) "coroutine was never awaited"; + #else + char *cname; + PyObject *qualname; + qualname = gen->gi_qualname; + cname = PyString_AS_STRING(qualname); + msg = PyString_FromFormat("coroutine '%.50s' was never awaited", cname); + if (unlikely(!msg)) { + PyErr_Clear(); + cmsg = (char*) "coroutine was never awaited"; + } else { + cmsg = PyString_AS_STRING(msg); + } + #endif + if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, cmsg, 1) < 0)) + PyErr_WriteUnraisable(self); + Py_XDECREF(msg);} +#endif + PyObject_GC_Track(self); + } +#endif + } else { + PyObject *res = __Pyx_Coroutine_Close(self); + if (unlikely(!res)) { + if (PyErr_Occurred()) + PyErr_WriteUnraisable(self); + } else { + Py_DECREF(res); + } + } + __Pyx_ErrRestore(error_type, error_value, error_traceback); +#if !CYTHON_USE_TP_FINALIZE + assert(Py_REFCNT(self) > 0); + if (likely(--self->ob_refcnt == 0)) { + return; + } + { + Py_ssize_t refcnt = Py_REFCNT(self); + _Py_NewReference(self); + __Pyx_SET_REFCNT(self, refcnt); + } +#if CYTHON_COMPILING_IN_CPYTHON + assert(PyType_IS_GC(Py_TYPE(self)) && + _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED); + _Py_DEC_REFTOTAL; +#endif +#ifdef COUNT_ALLOCS + --Py_TYPE(self)->tp_frees; + --Py_TYPE(self)->tp_allocs; +#endif +#endif +} +static PyObject * +__Pyx_Coroutine_get_name(__pyx_CoroutineObject *self, void *context) +{ + PyObject *name = self->gi_name; + CYTHON_UNUSED_VAR(context); + if (unlikely(!name)) name = Py_None; + Py_INCREF(name); + return name; +} +static int +__Pyx_Coroutine_set_name(__pyx_CoroutineObject *self, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__name__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(self->gi_name, value); + return 0; +} +static PyObject * +__Pyx_Coroutine_get_qualname(__pyx_CoroutineObject *self, void *context) +{ + PyObject *name = self->gi_qualname; + CYTHON_UNUSED_VAR(context); + if (unlikely(!name)) name = Py_None; + Py_INCREF(name); + return name; +} +static int +__Pyx_Coroutine_set_qualname(__pyx_CoroutineObject *self, PyObject *value, void *context) +{ + CYTHON_UNUSED_VAR(context); +#if PY_MAJOR_VERSION >= 3 + if (unlikely(value == NULL || !PyUnicode_Check(value))) +#else + if (unlikely(value == NULL || !PyString_Check(value))) +#endif + { + PyErr_SetString(PyExc_TypeError, + "__qualname__ must be set to a string object"); + return -1; + } + Py_INCREF(value); + __Pyx_Py_XDECREF_SET(self->gi_qualname, value); + return 0; +} +static PyObject * +__Pyx_Coroutine_get_frame(__pyx_CoroutineObject *self, void *context) +{ + PyObject *frame = self->gi_frame; + CYTHON_UNUSED_VAR(context); + if (!frame) { + if (unlikely(!self->gi_code)) { + Py_RETURN_NONE; + } + frame = (PyObject *) PyFrame_New( + PyThreadState_Get(), /*PyThreadState *tstate,*/ + (PyCodeObject*) self->gi_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (unlikely(!frame)) + return NULL; + self->gi_frame = frame; + } + Py_INCREF(frame); + return frame; +} +static __pyx_CoroutineObject *__Pyx__Coroutine_New( + PyTypeObject* type, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, + PyObject *name, PyObject *qualname, PyObject *module_name) { + __pyx_CoroutineObject *gen = PyObject_GC_New(__pyx_CoroutineObject, type); + if (unlikely(!gen)) + return NULL; + return __Pyx__Coroutine_NewInit(gen, body, code, closure, name, qualname, module_name); +} +static __pyx_CoroutineObject *__Pyx__Coroutine_NewInit( + __pyx_CoroutineObject *gen, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, + PyObject *name, PyObject *qualname, PyObject *module_name) { + gen->body = body; + gen->closure = closure; + Py_XINCREF(closure); + gen->is_running = 0; + gen->resume_label = 0; + gen->classobj = NULL; + gen->yieldfrom = NULL; + #if PY_VERSION_HEX >= 0x030B00a4 + gen->gi_exc_state.exc_value = NULL; + #else + gen->gi_exc_state.exc_type = NULL; + gen->gi_exc_state.exc_value = NULL; + gen->gi_exc_state.exc_traceback = NULL; + #endif +#if CYTHON_USE_EXC_INFO_STACK + gen->gi_exc_state.previous_item = NULL; +#endif + gen->gi_weakreflist = NULL; + Py_XINCREF(qualname); + gen->gi_qualname = qualname; + Py_XINCREF(name); + gen->gi_name = name; + Py_XINCREF(module_name); + gen->gi_modulename = module_name; + Py_XINCREF(code); + gen->gi_code = code; + gen->gi_frame = NULL; + PyObject_GC_Track(gen); + return gen; +} + +/* PatchModuleWithCoroutine */ +static PyObject* __Pyx_Coroutine_patch_module(PyObject* module, const char* py_code) { +#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + int result; + PyObject *globals, *result_obj; + globals = PyDict_New(); if (unlikely(!globals)) goto ignore; + result = PyDict_SetItemString(globals, "_cython_coroutine_type", + #ifdef __Pyx_Coroutine_USED + (PyObject*)__pyx_CoroutineType); + #else + Py_None); + #endif + if (unlikely(result < 0)) goto ignore; + result = PyDict_SetItemString(globals, "_cython_generator_type", + #ifdef __Pyx_Generator_USED + (PyObject*)__pyx_GeneratorType); + #else + Py_None); + #endif + if (unlikely(result < 0)) goto ignore; + if (unlikely(PyDict_SetItemString(globals, "_module", module) < 0)) goto ignore; + if (unlikely(PyDict_SetItemString(globals, "__builtins__", __pyx_b) < 0)) goto ignore; + result_obj = PyRun_String(py_code, Py_file_input, globals, globals); + if (unlikely(!result_obj)) goto ignore; + Py_DECREF(result_obj); + Py_DECREF(globals); + return module; +ignore: + Py_XDECREF(globals); + PyErr_WriteUnraisable(module); + if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, "Cython module failed to patch module with custom type", 1) < 0)) { + Py_DECREF(module); + module = NULL; + } +#else + py_code++; +#endif + return module; +} + +/* PatchGeneratorABC */ +#ifndef CYTHON_REGISTER_ABCS +#define CYTHON_REGISTER_ABCS 1 +#endif +#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) +static PyObject* __Pyx_patch_abc_module(PyObject *module); +static PyObject* __Pyx_patch_abc_module(PyObject *module) { + module = __Pyx_Coroutine_patch_module( + module, "" +"if _cython_generator_type is not None:\n" +" try: Generator = _module.Generator\n" +" except AttributeError: pass\n" +" else: Generator.register(_cython_generator_type)\n" +"if _cython_coroutine_type is not None:\n" +" try: Coroutine = _module.Coroutine\n" +" except AttributeError: pass\n" +" else: Coroutine.register(_cython_coroutine_type)\n" + ); + return module; +} +#endif +static int __Pyx_patch_abc(void) { +#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + static int abc_patched = 0; + if (CYTHON_REGISTER_ABCS && !abc_patched) { + PyObject *module; + module = PyImport_ImportModule((PY_MAJOR_VERSION >= 3) ? "collections.abc" : "collections"); + if (unlikely(!module)) { + PyErr_WriteUnraisable(NULL); + if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, + ((PY_MAJOR_VERSION >= 3) ? + "Cython module failed to register with collections.abc module" : + "Cython module failed to register with collections module"), 1) < 0)) { + return -1; + } + } else { + module = __Pyx_patch_abc_module(module); + abc_patched = 1; + if (unlikely(!module)) + return -1; + Py_DECREF(module); + } + module = PyImport_ImportModule("backports_abc"); + if (module) { + module = __Pyx_patch_abc_module(module); + Py_XDECREF(module); + } + if (!module) { + PyErr_Clear(); + } + } +#else + if ((0)) __Pyx_Coroutine_patch_module(NULL, NULL); +#endif + return 0; +} + +/* Generator */ +static PyMethodDef __pyx_Generator_methods[] = { + {"send", (PyCFunction) __Pyx_Coroutine_Send, METH_O, + (char*) PyDoc_STR("send(arg) -> send 'arg' into generator,\nreturn next yielded value or raise StopIteration.")}, + {"throw", (PyCFunction) __Pyx_Coroutine_Throw, METH_VARARGS, + (char*) PyDoc_STR("throw(typ[,val[,tb]]) -> raise exception in generator,\nreturn next yielded value or raise StopIteration.")}, + {"close", (PyCFunction) __Pyx_Coroutine_Close_Method, METH_NOARGS, + (char*) PyDoc_STR("close() -> raise GeneratorExit inside generator.")}, + {0, 0, 0, 0} +}; +static PyMemberDef __pyx_Generator_memberlist[] = { + {(char *) "gi_running", T_BOOL, offsetof(__pyx_CoroutineObject, is_running), READONLY, NULL}, + {(char*) "gi_yieldfrom", T_OBJECT, offsetof(__pyx_CoroutineObject, yieldfrom), READONLY, + (char*) PyDoc_STR("object being iterated by 'yield from', or None")}, + {(char*) "gi_code", T_OBJECT, offsetof(__pyx_CoroutineObject, gi_code), READONLY, NULL}, + {(char *) "__module__", T_OBJECT, offsetof(__pyx_CoroutineObject, gi_modulename), 0, 0}, +#if CYTHON_USE_TYPE_SPECS + {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CoroutineObject, gi_weakreflist), READONLY, 0}, +#endif + {0, 0, 0, 0, 0} +}; +static PyGetSetDef __pyx_Generator_getsets[] = { + {(char *) "__name__", (getter)__Pyx_Coroutine_get_name, (setter)__Pyx_Coroutine_set_name, + (char*) PyDoc_STR("name of the generator"), 0}, + {(char *) "__qualname__", (getter)__Pyx_Coroutine_get_qualname, (setter)__Pyx_Coroutine_set_qualname, + (char*) PyDoc_STR("qualified name of the generator"), 0}, + {(char *) "gi_frame", (getter)__Pyx_Coroutine_get_frame, NULL, + (char*) PyDoc_STR("Frame of the generator"), 0}, + {0, 0, 0, 0, 0} +}; +#if CYTHON_USE_TYPE_SPECS +static PyType_Slot __pyx_GeneratorType_slots[] = { + {Py_tp_dealloc, (void *)__Pyx_Coroutine_dealloc}, + {Py_tp_traverse, (void *)__Pyx_Coroutine_traverse}, + {Py_tp_iter, (void *)PyObject_SelfIter}, + {Py_tp_iternext, (void *)__Pyx_Generator_Next}, + {Py_tp_methods, (void *)__pyx_Generator_methods}, + {Py_tp_members, (void *)__pyx_Generator_memberlist}, + {Py_tp_getset, (void *)__pyx_Generator_getsets}, + {Py_tp_getattro, (void *) __Pyx_PyObject_GenericGetAttrNoDict}, +#if CYTHON_USE_TP_FINALIZE + {Py_tp_finalize, (void *)__Pyx_Coroutine_del}, +#endif + {0, 0}, +}; +static PyType_Spec __pyx_GeneratorType_spec = { + __PYX_TYPE_MODULE_PREFIX "generator", + sizeof(__pyx_CoroutineObject), + 0, + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, + __pyx_GeneratorType_slots +}; +#else +static PyTypeObject __pyx_GeneratorType_type = { + PyVarObject_HEAD_INIT(0, 0) + __PYX_TYPE_MODULE_PREFIX "generator", + sizeof(__pyx_CoroutineObject), + 0, + (destructor) __Pyx_Coroutine_dealloc, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, + 0, + (traverseproc) __Pyx_Coroutine_traverse, + 0, + 0, + offsetof(__pyx_CoroutineObject, gi_weakreflist), + 0, + (iternextfunc) __Pyx_Generator_Next, + __pyx_Generator_methods, + __pyx_Generator_memberlist, + __pyx_Generator_getsets, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +#if CYTHON_USE_TP_FINALIZE + 0, +#else + __Pyx_Coroutine_del, +#endif + 0, +#if CYTHON_USE_TP_FINALIZE + __Pyx_Coroutine_del, +#elif PY_VERSION_HEX >= 0x030400a1 + 0, +#endif +#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) + 0, +#endif +#if __PYX_NEED_TP_PRINT_SLOT + 0, +#endif +#if PY_VERSION_HEX >= 0x030C0000 + 0, +#endif +#if PY_VERSION_HEX >= 0x030d00A4 + 0, +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 + 0, +#endif +}; +#endif +static int __pyx_Generator_init(PyObject *module) { +#if CYTHON_USE_TYPE_SPECS + __pyx_GeneratorType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_GeneratorType_spec, NULL); +#else + CYTHON_UNUSED_VAR(module); + __pyx_GeneratorType_type.tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; + __pyx_GeneratorType_type.tp_iter = PyObject_SelfIter; + __pyx_GeneratorType = __Pyx_FetchCommonType(&__pyx_GeneratorType_type); +#endif + if (unlikely(!__pyx_GeneratorType)) { + return -1; + } + return 0; +} + +/* CheckBinaryVersion */ +static unsigned long __Pyx_get_runtime_version(void) { +#if __PYX_LIMITED_VERSION_HEX >= 0x030B00A4 + return Py_Version & ~0xFFUL; +#else + const char* rt_version = Py_GetVersion(); + unsigned long version = 0; + unsigned long factor = 0x01000000UL; + unsigned int digit = 0; + int i = 0; + while (factor) { + while ('0' <= rt_version[i] && rt_version[i] <= '9') { + digit = digit * 10 + (unsigned int) (rt_version[i] - '0'); + ++i; + } + version += factor * digit; + if (rt_version[i] != '.') + break; + digit = 0; + factor >>= 8; + ++i; + } + return version; +#endif +} +static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer) { + const unsigned long MAJOR_MINOR = 0xFFFF0000UL; + if ((rt_version & MAJOR_MINOR) == (ct_version & MAJOR_MINOR)) + return 0; + if (likely(allow_newer && (rt_version & MAJOR_MINOR) > (ct_version & MAJOR_MINOR))) + return 1; + { + char message[200]; + PyOS_snprintf(message, sizeof(message), + "compile time Python version %d.%d " + "of module '%.100s' " + "%s " + "runtime version %d.%d", + (int) (ct_version >> 24), (int) ((ct_version >> 16) & 0xFF), + __Pyx_MODULE_NAME, + (allow_newer) ? "was newer than" : "does not match", + (int) (rt_version >> 24), (int) ((rt_version >> 16) & 0xFF) + ); + return PyErr_WarnEx(NULL, message, 1); + } +} + +/* InitStrings */ +#if PY_MAJOR_VERSION >= 3 +static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) { + if (t.is_unicode | t.is_str) { + if (t.intern) { + *str = PyUnicode_InternFromString(t.s); + } else if (t.encoding) { + *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL); + } else { + *str = PyUnicode_FromStringAndSize(t.s, t.n - 1); + } + } else { + *str = PyBytes_FromStringAndSize(t.s, t.n - 1); + } + if (!*str) + return -1; + if (PyObject_Hash(*str) == -1) + return -1; + return 0; +} +#endif +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { + while (t->p) { + #if PY_MAJOR_VERSION >= 3 + __Pyx_InitString(*t, t->p); + #else + if (t->is_unicode) { + *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); + } else if (t->intern) { + *t->p = PyString_InternFromString(t->s); + } else { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + } + if (!*t->p) + return -1; + if (PyObject_Hash(*t->p) == -1) + return -1; + #endif + ++t; + } + return 0; +} + +#include +static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) { + size_t len = strlen(s); + if (unlikely(len > (size_t) PY_SSIZE_T_MAX)) { + PyErr_SetString(PyExc_OverflowError, "byte string is too long"); + return -1; + } + return (Py_ssize_t) len; +} +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { + Py_ssize_t len = __Pyx_ssize_strlen(c_str); + if (unlikely(len < 0)) return NULL; + return __Pyx_PyUnicode_FromStringAndSize(c_str, len); +} +static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char* c_str) { + Py_ssize_t len = __Pyx_ssize_strlen(c_str); + if (unlikely(len < 0)) return NULL; + return PyByteArray_FromStringAndSize(c_str, len); +} +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { + Py_ssize_t ignore; + return __Pyx_PyObject_AsStringAndSize(o, &ignore); +} +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#if !CYTHON_PEP393_ENABLED +static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; + } + } + } +#endif + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +} +#else +static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + if (likely(PyUnicode_IS_ASCII(o))) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } +#else + return PyUnicode_AsUTF8AndSize(o, length); +#endif +} +#endif +#endif +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && +#endif + PyUnicode_Check(o)) { + return __Pyx_PyUnicode_AsStringAndSize(o, length); + } else +#endif +#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + if (PyByteArray_Check(o)) { + *length = PyByteArray_GET_SIZE(o); + return PyByteArray_AS_STRING(o); + } else +#endif + { + char* result; + int r = PyBytes_AsStringAndSize(o, &result, length); + if (unlikely(r < 0)) { + return NULL; + } else { + return result; + } + } +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { + int is_true = x == Py_True; + if (is_true | (x == Py_False) | (x == Py_None)) return is_true; + else return PyObject_IsTrue(x); +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) { + int retval; + if (unlikely(!x)) return -1; + retval = __Pyx_PyObject_IsTrue(x); + Py_DECREF(x); + return retval; +} +static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { + __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result)); +#if PY_MAJOR_VERSION >= 3 + if (PyLong_Check(result)) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "__int__ returned non-int (type " __Pyx_FMT_TYPENAME "). " + "The ability to return an instance of a strict subclass of int is deprecated, " + "and may be removed in a future version of Python.", + result_type_name)) { + __Pyx_DECREF_TypeName(result_type_name); + Py_DECREF(result); + return NULL; + } + __Pyx_DECREF_TypeName(result_type_name); + return result; + } +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")", + type_name, type_name, result_type_name); + __Pyx_DECREF_TypeName(result_type_name); + Py_DECREF(result); + return NULL; +} +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { +#if CYTHON_USE_TYPE_SLOTS + PyNumberMethods *m; +#endif + const char *name = NULL; + PyObject *res = NULL; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x) || PyLong_Check(x))) +#else + if (likely(PyLong_Check(x))) +#endif + return __Pyx_NewRef(x); +#if CYTHON_USE_TYPE_SLOTS + m = Py_TYPE(x)->tp_as_number; + #if PY_MAJOR_VERSION < 3 + if (m && m->nb_int) { + name = "int"; + res = m->nb_int(x); + } + else if (m && m->nb_long) { + name = "long"; + res = m->nb_long(x); + } + #else + if (likely(m && m->nb_int)) { + name = "int"; + res = m->nb_int(x); + } + #endif +#else + if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { + res = PyNumber_Int(x); + } +#endif + if (likely(res)) { +#if PY_MAJOR_VERSION < 3 + if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { +#else + if (unlikely(!PyLong_CheckExact(res))) { +#endif + return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); + } + } + else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + } + return res; +} +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { + Py_ssize_t ival; + PyObject *x; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(b))) { + if (sizeof(Py_ssize_t) >= sizeof(long)) + return PyInt_AS_LONG(b); + else + return PyInt_AsSsize_t(b); + } +#endif + if (likely(PyLong_CheckExact(b))) { + #if CYTHON_USE_PYLONG_INTERNALS + if (likely(__Pyx_PyLong_IsCompact(b))) { + return __Pyx_PyLong_CompactValue(b); + } else { + const digit* digits = __Pyx_PyLong_Digits(b); + const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b); + switch (size) { + case 2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + } + } + #endif + return PyLong_AsSsize_t(b); + } + x = PyNumber_Index(b); + if (!x) return -1; + ival = PyInt_AsSsize_t(x); + Py_DECREF(x); + return ival; +} +static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) { + if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) { + return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o); +#if PY_MAJOR_VERSION < 3 + } else if (likely(PyInt_CheckExact(o))) { + return PyInt_AS_LONG(o); +#endif + } else { + Py_ssize_t ival; + PyObject *x; + x = PyNumber_Index(o); + if (!x) return -1; + ival = PyInt_AsLong(x); + Py_DECREF(x); + return ival; + } +} +static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { + return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False); +} +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { + return PyInt_FromSize_t(ival); +} + + +/* #### Code section: utility_code_pragmas_end ### */ +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + + + +/* #### Code section: end ### */ +#endif /* Py_PYTHON_H */ diff --git a/jcvi/formats/cblast.pyx b/jcvi/formats/cblast.pyx new file mode 100644 index 00000000..15f89a47 --- /dev/null +++ b/jcvi/formats/cblast.pyx @@ -0,0 +1,210 @@ +# cython: language_level=2, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True + +""" +Cythonized (fast) version of BlastLine + +Stolen from brentp's biostuff (thanks): + +""" +import sys +from libc.stdio cimport FILE, EOF, fopen, fscanf, rewind, fclose, sscanf, \ + fgets, sprintf +from libc.string cimport strcpy + + +cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" +cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" +cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" +cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" + + +cdef class Blast: + cdef: + FILE* fh + object filename + + def __cinit__(self, char* filename): + self.fh = fopen(filename, 'r') + self.filename = filename + + def __iter__(self): + rewind(self.fh) + return self + + def __next__(self): + cdef: + float pct = 0.0, evalue = 0.0, bit = 0.0 + char qname[128] + char sname[128] + int hlen, nmiss, ngap, qstart, qstop, sstart, sstop + char *tmp + int success + + success = fscanf(self.fh, blast_format_line, qname, sname, \ + &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ + &sstart, &sstop, &evalue, &bit ) + if success == EOF: + raise StopIteration + return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, + qstart, qstop, sstart, sstop, evalue, bit) + + def __dealloc__(self): + fclose(self.fh) + + def __repr__(self): + return "Blast('%s')" % (self.filename, ) + +# Python 2 and 3 differ in str and unicode handling +# https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython +cdef bytes c_str(str s): + return s.encode("UTF-8") + +cdef str py_str(bytes s): + return s.decode("UTF-8", "replace") + + +cdef class BlastLine: + """ + Given a string of tab-delimited (-m 8) blast output, parse it and create + an object with the usual attrs: + + >>> b = BlastLine("Os09g11510 Os08g13650 92.31 39 3 0 2273 2311 3237 3199 0.001 54.0") + >>> b.query + 'Os09g11510' + >>> attrs = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ + ... 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score') + >>> [getattr(b, attr) for attr in attrs] # doctest: +ELLIPSIS + ['Os09g11510', 'Os08g13650', 92.3..., 39, 3, 0, 2273, 2311, 3237, 3199, 0.001..., 54.0] + """ + + __slots__ = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ + 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score', \ + 'qseqid', 'sseqid', 'qi', 'si', 'orientation') + + cdef public: + char _query[128] + char _subject[128] + int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop + float pctid, score + double evalue + object qseqid, sseqid + int qi, si + char orientation + + property query: + def __get__(self): + return py_str(self._query) + def __set__(self, val: str): + strcpy(self._query, c_str(val)) + + property subject: + def __get__(self): + return py_str(self._subject) + def __set__(self, val: str): + strcpy(self._subject, c_str(val)) + + def __init__(self, s): + sline = c_str(s) + sscanf(sline, blast_format, self._query, self._subject, + &self.pctid, &self.hitlen, &self.nmismatch, &self.ngaps, + &self.qstart, &self.qstop, + &self.sstart, &self.sstop, + &self.evalue, &self.score) + + self.orientation = '+' + if self.qstart > self.qstop: + self.qstart, self.qstop = self.qstop, self.qstart + self.orientation = '-' + if self.sstart > self.sstop: + self.sstart, self.sstop = self.sstop, self.sstart + self.orientation = '-' + + def __richcmp__(BlastLine self, BlastLine other, size_t op): + if op == 2: # == + if self.query != other.query and self.qstart != other.qstart: + return False + return self.subject == other.subject and \ + self.qstop == other.qstop and \ + self.sstop == other.sstop and \ + self.evalue == other.evalue and \ + self.hitlen == other.hitlen + + elif op == 3: # != + return not self.__richcmp__(other, 2) + else: + raise Exception("that comparison not implemented") + + def __hash__(self): + return id(self) + + def __repr__(self): + return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ + (self.query, self.subject, self.evalue, self.score) + + def __str__(self): + args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + if self.orientation == '-': + args[8], args[9] = args[9], args[8] + + cdef char result[512] + sprintf(result, blast_output, self._query, self._subject, + self.pctid, self.hitlen, self.nmismatch, self.ngaps, + self.qstart, self.qstop, + self.sstart, self.sstop, + self.evalue, self.score) + + return py_str(result) + + @property + def has_score(self): + return hasattr(self, "score") + + @property + def swapped(self): + """ + Swap query and subject. + """ + args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + args[0:2] = [self.subject, self.query] + args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + if self.orientation == '-': + args[8], args[9] = args[9], args[8] + b = "\t".join(str(x) for x in args) + return BlastLine(b) + + @property + def bedline(self): + cdef char result[512] + sprintf(result, bed_output, + self._subject, self.sstart - 1, self.sstop, + self._query, self.qstart, self.qstop, + self.score, self.orientation) + + return py_str(result) + + def __reduce__(self): + return create_blast_line, ( + self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, + self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, + self.evalue, self.score) + + +cdef BlastLine create_blast_line(char *query, char *subject, float pctid, int hitlen, + int nmismatch, int ngaps, int qstart, int qstop, + int sstart, int sstop, float evalue, float score): + """ Factory method. + """ + cdef BlastLine b = BlastLine.__new__(BlastLine) + b.query = query + b.subject = subject + b.pctid = pctid + b.hitlen = hitlen + b.nmismatch = nmismatch + b.ngaps = ngaps + b.qstart = qstart + b.qstop = qstop + b.sstart = sstart + b.sstop = sstop + b.evalue = evalue + b.score = score + return b diff --git a/jcvi/formats/cdt.py b/jcvi/formats/cdt.py new file mode 100644 index 00000000..077b34f4 --- /dev/null +++ b/jcvi/formats/cdt.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog data.cdt data.nwk + +Convert the result from Eisen's CLUSTER program: data.gtr and data.cdt into NEWICK format +""" + +import csv +import sys + +from collections import namedtuple +from itertools import groupby + +from ..apps.base import OptionParser, logger + +from .base import BaseFile + + +GTRLine = namedtuple("GTRLine", "parent left_child right_child dist") + + +class CDT(BaseFile): + def __init__(self, filename): + super().__init__(filename) + + pf = filename.rsplit(".", 1)[0] + self.gtrfile = pf + ".gtr" + self.atrfile = pf + ".atr" + self.get_names() + + def get_names(self): + cdt_file = self.filename + reader = csv.reader(open(cdt_file), delimiter="\t") + + gid = next(reader) + assert gid[0] == "GID" + aid = next(reader) + if aid[0] == "AID": + eweight = next(reader) + else: + eweight = aid + assert eweight[0] == "EWEIGHT" + + self.gnames = [x[:2] for x in reader] + self.anames = list(zip(aid, gid))[4:] + + def get_gtr_tree(self): + + from ete3 import Tree + + fp = open(self.gtrfile) + reader = csv.reader(fp, delimiter="\t") + nodes = {} + gnames = dict(self.gnames) + for g in map(GTRLine._make, reader): + node = Tree() + parent_name, parent_dist = g.parent, float(g.dist) + for child in (g.left_child, g.right_child): + if child in gnames: + node.add_child(name=gnames[child], dist=1 - parent_dist) + else: + assert child in nodes, child + child_node, child_dist = nodes[child] + node.add_child(child_node, dist=child_dist - parent_dist) + + nodes[parent_name] = (node, parent_dist) + + self.gtr_tree = node + + def print_newick(self, nwk_file): + + self.gtr_tree.write(format=5, outfile=nwk_file) + logger.debug("Newick tree written to `%s`", nwk_file) + + def iter_partitions(self, cutoff=0.3, gtr=True): + from jcvi.utils.grouper import Grouper + + if gtr: + names = self.gnames + fp = open(self.gtrfile) + else: + names = self.anames + fp = open(self.atrfile) + + reader = csv.reader(fp, delimiter="\t") + grouper = Grouper() + for g in map(GTRLine._make, reader): + d = float(g.dist) + if d < cutoff: + continue + + grouper.join(g.parent, g.left_child, g.right_child) + + parents = {} + for i, group in enumerate(grouper): + for g in group: + parents[g] = i + + partitions = [[parents.get(a, x), x] for a, x in names] + for key, parts in groupby(partitions, key=lambda x: x[0]): + yield list(x[1] for x in parts) + + +def main(args): + + cdt_file, nwk_file = args + cdt = CDT(cdt_file) + cdt.get_gtr_tree() + cdt.print_newick(nwk_file) + + +if __name__ == "__main__": + + p = OptionParser(__doc__) + opts, args = p.parse_args() + + if len(args) != 2: + sys.exit(not p.print_help()) + + main(args) diff --git a/jcvi/formats/chain.py b/jcvi/formats/chain.py new file mode 100644 index 00000000..4f4f0d96 --- /dev/null +++ b/jcvi/formats/chain.py @@ -0,0 +1,311 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Create the UCSC chain file which is needed to lift over from one coordinate +system to another. + +File format: + + +chain 4900 chrY 58368225 + 25985403 25985638 chr5 151006098 - 43257292 43257528 1 + 9 1 0 + 10 0 5 + 48 + +Header Line: + chain score tName tSize tStrand tStart tEnd qName qSize qStrand qStart qEnd id +Alignment Data Lines + size dt dq + +NOTE: The last line of the alignment section contains only one number: the ungapped +alignment size of the last block. +""" + +import os.path as op +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh, which + +from .base import BaseFile, read_block +from .sizes import Sizes + + +class ChainLine(object): + def __init__(self, chain, lines): + self.chain = chain + self.blocks = [] + for line in lines: + atoms = line.split() + if len(atoms) == 1: + atoms += [0, 0] + if len(atoms) == 0: + continue + + self.blocks.append([int(x) for x in atoms]) + + self.ungapped, self.dt, self.dq = zip(*self.blocks) + self.ungapped = sum(self.ungapped) + self.dt = sum(self.dt) + self.dq = sum(self.dq) + + +class Chain(BaseFile): + def __init__(self, filename): + super().__init__(filename) + self.chains = list(self.iter_chain()) + + self.ungapped = sum(x.ungapped for x in self.chains) + self.dt = sum(x.dt for x in self.chains) + self.dq = sum(x.dq for x in self.chains) + + def __len__(self): + return len(self.chains) + + def iter_chain(self): + fp = open(self.filename) + for row in fp: + if row[0] != "#": + break + + for chain, lines in read_block(fp, "chain"): + lines = list(lines) + yield ChainLine(chain, lines) + + +def main(): + + actions = ( + ("blat", "generate PSL file using BLAT"), + ("frompsl", "generate chain file from PSL format"), + ("fromagp", "generate chain file from AGP format"), + ("summary", "provide stats of the chain file"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def summary(args): + """ + %prog summary old.new.chain old.fasta new.fasta + + Provide stats of the chain file. + """ + from jcvi.formats.fasta import summary as fsummary + from jcvi.utils.cbook import percentage, human_size + + p = OptionParser(summary.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + chainfile, oldfasta, newfasta = args + chain = Chain(chainfile) + ungapped, dt, dq = chain.ungapped, chain.dt, chain.dq + print( + "File `{0}` contains {1} chains.".format(chainfile, len(chain)), file=sys.stderr + ) + print( + "ungapped={0} dt={1} dq={2}".format( + human_size(ungapped), human_size(dt), human_size(dq) + ), + file=sys.stderr, + ) + + oldreal, oldnn, oldlen = fsummary([oldfasta, "--outfile=/dev/null"]) + print( + "Old fasta (`{0}`) mapped: {1}".format(oldfasta, percentage(ungapped, oldreal)), + file=sys.stderr, + ) + + newreal, newnn, newlen = fsummary([newfasta, "--outfile=/dev/null"]) + print( + "New fasta (`{0}`) mapped: {1}".format(newfasta, percentage(ungapped, newreal)), + file=sys.stderr, + ) + + +def fromagp(args): + """ + %prog fromagp agpfile componentfasta objectfasta + + Generate chain file from AGP format. The components represent the old + genome (target) and the objects represent new genome (query). + """ + from jcvi.formats.agp import AGP + + p = OptionParser(fromagp.__doc__) + p.add_argument( + "--novalidate", default=False, action="store_true", help="Do not validate AGP" + ) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + agpfile, componentfasta, objectfasta = args + chainfile = agpfile.rsplit(".", 1)[0] + ".chain" + fw = open(chainfile, "w") + agp = AGP(agpfile, validate=(not opts.novalidate)) + componentsizes = Sizes(componentfasta).mapping + objectsizes = Sizes(objectfasta).mapping + chain = "chain" + score = 1000 + tStrand = "+" + id = 0 + for a in agp: + if a.is_gap: + continue + + tName = a.component_id + tSize = componentsizes[tName] + tStart = a.component_beg + tEnd = a.component_end + tStart -= 1 + + qName = a.object + qSize = objectsizes[qName] + qStrand = "-" if a.orientation == "-" else "+" + qStart = a.object_beg + qEnd = a.object_end + if qStrand == "-": + _qStart = qSize - qEnd + 1 + _qEnd = qSize - qStart + 1 + qStart, qEnd = _qStart, _qEnd + qStart -= 1 + + id += 1 + size = a.object_span + headerline = "\t".join( + str(x) + for x in ( + chain, + score, + tName, + tSize, + tStrand, + tStart, + tEnd, + qName, + qSize, + qStrand, + qStart, + qEnd, + id, + ) + ) + alignmentline = size + print(headerline, file=fw) + print(alignmentline, file=fw) + print(file=fw) + + fw.close() + logger.debug("File written to `%s`.", chainfile) + + +def faToTwoBit(fastafile): + twobitfile = fastafile.rsplit(".", 1)[0] + ".2bit" + cmd = "faToTwoBit {0} {1}".format(fastafile, twobitfile) + if need_update(fastafile, twobitfile): + sh(cmd) + return twobitfile + + +def blat(args): + """ + %prog blat old.fasta new.fasta + + Generate psl file using blat. + """ + p = OptionParser(blat.__doc__) + p.add_argument( + "--minscore", + default=100, + type=int, + help="Matches minus mismatches gap penalty", + ) + p.add_argument( + "--minid", + default=98, + type=int, + help="Minimum sequence identity", + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + oldfasta, newfasta = args + twobitfiles = [] + for fastafile in args: + tbfile = faToTwoBit(fastafile) + twobitfiles.append(tbfile) + + oldtwobit, newtwobit = twobitfiles + cmd = "pblat -threads={0}".format(opts.cpus) if which("pblat") else "blat" + cmd += " {0} {1}".format(oldtwobit, newfasta) + cmd += " -tileSize=12 -minScore={0} -minIdentity={1} ".format( + opts.minscore, opts.minid + ) + pslfile = "{0}.{1}.psl".format( + *(op.basename(x).split(".")[0] for x in (newfasta, oldfasta)) + ) + cmd += pslfile + sh(cmd) + + +def frompsl(args): + """ + %prog frompsl old.new.psl old.fasta new.fasta + + Generate chain file from psl file. The pipeline is describe in: + + """ + p = OptionParser(frompsl.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + pslfile, oldfasta, newfasta = args + pf = oldfasta.split(".")[0] + + # Chain together alignments from using axtChain + chainfile = pf + ".chain" + twobitfiles = [] + for fastafile in (oldfasta, newfasta): + tbfile = faToTwoBit(fastafile) + twobitfiles.append(tbfile) + oldtwobit, newtwobit = twobitfiles + + if need_update(pslfile, chainfile): + cmd = "axtChain -linearGap=medium -psl {0}".format(pslfile) + cmd += " {0} {1} {2}".format(oldtwobit, newtwobit, chainfile) + sh(cmd) + + # Sort chain files + sortedchain = chainfile.rsplit(".", 1)[0] + ".sorted.chain" + if need_update(chainfile, sortedchain): + cmd = "chainSort {0} {1}".format(chainfile, sortedchain) + sh(cmd) + + # Make alignment nets from chains + netfile = pf + ".net" + oldsizes = Sizes(oldfasta).filename + newsizes = Sizes(newfasta).filename + if need_update((sortedchain, oldsizes, newsizes), netfile): + cmd = "chainNet {0} {1} {2}".format(sortedchain, oldsizes, newsizes) + cmd += " {0} /dev/null".format(netfile) + sh(cmd) + + # Create liftOver chain file + liftoverfile = pf + ".liftover.chain" + if need_update((netfile, sortedchain), liftoverfile): + cmd = "netChainSubset {0} {1} {2}".format(netfile, sortedchain, liftoverfile) + sh(cmd) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/contig.py b/jcvi/formats/contig.py new file mode 100644 index 00000000..48882e0d --- /dev/null +++ b/jcvi/formats/contig.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +TIGR contig format, see spec: + + +""" + +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger + +from .base import BaseFile, read_block + + +class ReadLine(object): + def __init__(self, row, contig): + # '#16(0) [RC] 3046 bases, 00000000 checksum. {3046 1} <1 3046>' + assert row[0] == "#" + self.id = row.strip("#").split("(")[0] + coords = row.split("<")[1].split(">")[0] + start, end = coords.split() + self.contig = contig + self.start = int(start) + self.end = int(end) + if self.start > self.end: + self.start, self.end = self.end, self.start + + self.orientation = "-" if "[RC]" in row else "+" + + def __str__(self): + return self.id + + @property + def bedline(self): + return "\t".join( + str(x) + for x in ( + self.contig, + self.start - 1, + self.end, + self.id, + "0", + self.orientation, + ) + ) + + __repr__ = __str__ + + +class ContigLine(object): + def __init__(self, row): + # '##1 6 8914 bases, 00000000 checksum.' + assert row[:2] == "##" + self.id = row.strip("#").split()[0] + self.reads = [] + + def __str__(self): + return ":".join((self.id, str(self.reads))) + + __repr__ = __str__ + + +class ContigFile(BaseFile): + def __init__(self, filename): + super().__init__(filename) + self.fp = open(filename) + + def iter_records(self): + c = None + for a, b in read_block(self.fp, "#"): + if a[:2] == "##": + if c: + yield c + c = ContigLine(a) + else: + c.reads.append(ReadLine(a, c.id)) + if c: # last one + yield c + + +def main(): + + actions = ( + ("bed", "convert read membership to bed format"), + ("frombed", "convert read placement to contig format"), + ) + + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def frombed(args): + """ + %prog frombed bedfile contigfasta readfasta + + Convert read placement to contig format. This is useful before running BAMBUS. + """ + from jcvi.formats.fasta import Fasta + from jcvi.formats.bed import Bed + from jcvi.utils.cbook import fill + + p = OptionParser(frombed.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + bedfile, contigfasta, readfasta = args + prefix = bedfile.rsplit(".", 1)[0] + contigfile = prefix + ".contig" + idsfile = prefix + ".ids" + + contigfasta = Fasta(contigfasta) + readfasta = Fasta(readfasta) + + bed = Bed(bedfile) + checksum = "00000000 checksum." + fw_ids = open(idsfile, "w") + fw = open(contigfile, "w") + + for ctg, reads in bed.sub_beds(): + ctgseq = contigfasta[ctg] + ctgline = "##{0} {1} {2} bases, {3}".format( + ctg, len(reads), len(ctgseq), checksum + ) + + print(ctg, file=fw_ids) + print(ctgline, file=fw) + print(fill(ctgseq.seq), file=fw) + + for b in reads: + read = b.accn + strand = b.strand + readseq = readfasta[read] + rc = " [RC]" if strand == "-" else "" + readlen = len(readseq) + rstart, rend = 1, readlen + if strand == "-": + rstart, rend = rend, rstart + + readrange = "{{{0} {1}}}".format(rstart, rend) + conrange = "<{0} {1}>".format(b.start, b.end) + readline = "#{0}(0){1} {2} bases, {3} {4} {5}".format( + read, rc, readlen, checksum, readrange, conrange + ) + print(readline, file=fw) + print(fill(readseq.seq), file=fw) + + logger.debug("Mapped contigs written to `{0}`.".format(contigfile)) + logger.debug("Contig IDs written to `{0}`.".format(idsfile)) + + +def bed(args): + """ + %prog bed contigfile + + Prints out the contigs and their associated reads. + """ + p = OptionParser(main.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (contigfile,) = args + bedfile = contigfile.rsplit(".", 1)[0] + ".bed" + fw = open(bedfile, "w") + c = ContigFile(contigfile) + + for rec in c.iter_records(): + for r in rec.reads: + print(r.bedline, file=fw) + + logger.debug("File written to `%s`.", bedfile) + + return bedfile + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/coords.py b/jcvi/formats/coords.py new file mode 100644 index 00000000..03596620 --- /dev/null +++ b/jcvi/formats/coords.py @@ -0,0 +1,612 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +parses JCVI software NUCMER (http://mummer.sourceforge.net/manual/) +output - mostly as *.coords file. +""" +import os.path as op +import sys + +from itertools import groupby +from math import exp + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + get_abs_path, + logger, + need_update, + sh, +) +from ..assembly.base import calculate_A50 + +from .base import LineFile, must_open +from .blast import AlignStats + + +Overlap_types = ("none", "a ~ b", "b ~ a", "a in b", "b in a") + + +class CoordsLine(object): + """ + The coords line looks like (in one line): + 2953 4450 | 525 2023 | 1498 1499 | 98.07 | + 8046 2023 | 18.62 74.10 | AC182814.30 contig_100476 + + the coords file needs to be generated by `show-coords -rcl` + """ + + def __init__(self, row): + + row = row.replace(" | ", "") + atoms = row.split() + assert len(atoms) in (13, 17), "expecting 13 or 17 columns" + + self.start1 = int(atoms[0]) + self.end1 = int(atoms[1]) + + self.start2 = int(atoms[2]) + self.end2 = int(atoms[3]) + + if self.start2 > self.end2: + self.start2, self.end2 = self.end2, self.start2 + self.orientation = "-" + else: + self.orientation = "+" + + self.len1 = int(atoms[4]) + self.len2 = int(atoms[5]) + + self.identity = float(atoms[6]) + + self.reflen = int(atoms[7]) + self.querylen = int(atoms[8]) + + self.refcov = float(atoms[9]) / 100.0 + self.querycov = float(atoms[10]) / 100.0 + + self.ref = atoms[11] + self.query = atoms[12] + + # this is taken from CoGeBlast: + # the coverage of the hit muliplied by percent seq identity + # range from 0-100 + self.quality = self.identity * self.querycov + self.score = int(self.identity * self.len1 / 100) + + def __str__(self): + slots = "ref start1 end1 reflen " + "query start2 end2 querylen orientation" + return "\t".join( + str(x) for x in [getattr(self, attr) for attr in slots.split()] + ) + + def bedline(self, pctid=False): + score = self.identity if pctid else self.score + return "\t".join( + str(x) + for x in ( + self.ref, + self.start1 - 1, + self.end1, + self.query, + score, + self.orientation, + ) + ) + + def qbedline(self, pctid=False): + score = self.identity if pctid else self.score + return "\t".join( + str(x) + for x in ( + self.query, + self.start2 - 1, + self.end2, + self.ref, + score, + self.orientation, + ) + ) + + @property + def blastline(self): + hitlen = max(self.len1, self.len2) + score = self.score + mismatch = int(self.len1 * (1 - self.identity / 100)) + log_prob = -score * 0.693147181 + evalue = 3.0e9 * exp(log_prob) + evalue = "{0:.1g}".format(evalue) + return "\t".join( + str(x) + for x in ( + self.query, + self.ref, + self.identity, + hitlen, + mismatch, + 0, + self.start2, + self.end2, + self.start1, + self.end1, + evalue, + score, + ) + ) + + def overlap(self, max_hang=100): + r""" + Determine the type of overlap given query, ref alignment coordinates + Consider the following alignment between sequence a and b: + + aLhang \ / aRhang + \------------/ + /------------\ + bLhang / \ bRhang + + Terminal overlap: a before b, b before a + Contain overlap: a in b, b in a + """ + aL, aR = 1, self.reflen + bL, bR = 1, self.querylen + aLhang, aRhang = self.start1 - aL, aR - self.end1 + bLhang, bRhang = self.start2 - bL, bR - self.end2 + if self.orientation == "-": + bLhang, bRhang = bRhang, bLhang + + s1 = aLhang + bRhang + s2 = aRhang + bLhang + s3 = aLhang + aRhang + s4 = bLhang + bRhang + + # Dovetail (terminal) overlap + if s1 < max_hang: + type = 2 # b ~ a + elif s2 < max_hang: + type = 1 # a ~ b + # Containment overlap + elif s3 < max_hang: + type = 3 # a in b + elif s4 < max_hang: + type = 4 # b in a + else: + type = 0 + + return type + + +class Coords(LineFile): + """ + when parsing the .coords file, first skip first 5 lines + [S1] [E1] | [S2] [E2] | [LEN 1] [LEN 2] | [% IDY] | [TAGS] + + then each row would be composed as this + """ + + def __init__(self, filename, sorted=False, header=False): + + if filename.endswith(".delta"): + coordsfile = filename.rsplit(".", 1)[0] + ".coords" + if need_update(filename, coordsfile): + fromdelta([filename]) + filename = coordsfile + + super().__init__(filename) + + fp = open(filename) + if header: + self.cmd = next(fp) + + for row in fp: + try: + self.append(CoordsLine(row)) + except AssertionError: + pass + + if sorted: + self.ref_sort() + + def ref_sort(self): + # sort by reference positions + self.sort(key=lambda x: (x.ref, x.start1)) + + def quality_sort(self): + # sort descending with score = identity * coverage + self.sort(key=lambda x: (x.query, -x.quality)) + + @property + def hits(self): + """ + returns a dict with query => blastline + """ + self.quality_sort() + + hits = dict( + (query, list(blines)) + for (query, blines) in groupby(self, lambda x: x.query) + ) + + self.ref_sort() + + return hits + + @property + def best_hits(self): + """ + returns a dict with query => best mapped position + """ + self.quality_sort() + + best_hits = dict( + (query, next(blines)) + for (query, blines) in groupby(self, lambda x: x.query) + ) + + self.ref_sort() + + return best_hits + + +def get_stats(coordsfile): + + from jcvi.utils.range import range_union + + logger.debug("Report stats on `%s`", coordsfile) + coords = Coords(coordsfile) + ref_ivs = [] + qry_ivs = [] + identicals = 0 + alignlen = 0 + alignlens = [] + + for c in coords: + + qstart, qstop = c.start2, c.end2 + if qstart > qstop: + qstart, qstop = qstop, qstart + qry_ivs.append((c.query, qstart, qstop)) + + sstart, sstop = c.start1, c.end1 + if sstart > sstop: + sstart, sstop = sstop, sstart + ref_ivs.append((c.ref, sstart, sstop)) + + alen = sstop - sstart + alignlen += alen + identicals += c.identity / 100.0 * alen + alignlens.append(alen) + + qrycovered = range_union(qry_ivs) + refcovered = range_union(ref_ivs) + _, AL50, _ = calculate_A50(alignlens) + filename = op.basename(coordsfile) + alignstats = AlignStats( + filename, qrycovered, refcovered, None, None, identicals, AL50 + ) + + return alignstats + + +def main(): + + actions = ( + ("annotate", "annotate overlap types in coordsfile"), + ("blast", "convert to blast tabular output"), + ("filter", "filter based on id% and cov%, write a new coords file"), + ("fromdelta", "convert deltafile to coordsfile"), + ("merge", "merge deltafiles"), + ("sort", "sort coords file based on query or subject"), + ("summary", "provide summary on id% and cov%"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def merge(args): + """ + %prog merge ref.fasta query.fasta *.delta + + Merge delta files into a single delta. + """ + p = OptionParser(merge.__doc__) + p.set_outfile(outfile="merged_results.delta") + opts, args = p.parse_args(args) + + if len(args) < 3: + sys.exit(not p.print_help()) + + ref, query = args[:2] + deltafiles = args[2:] + outfile = opts.outfile + + ref = get_abs_path(ref) + query = get_abs_path(query) + fw = must_open(outfile, "w") + print(" ".join((ref, query)), file=fw) + print("NUCMER", file=fw) + fw.close() + + for d in deltafiles: + cmd = "awk 'NR > 2 {{print $0}}' {0}".format(d) + sh(cmd, outfile=outfile, append=True) + + +def blast(args): + """ + %prog blast + + Covert delta or coordsfile to BLAST tabular output. + """ + p = OptionParser(blast.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (deltafile,) = args + blastfile = deltafile.rsplit(".", 1)[0] + ".blast" + + if need_update(deltafile, blastfile): + coords = Coords(deltafile) + fw = open(blastfile, "w") + for c in coords: + print(c.blastline, file=fw) + + +def fromdelta(args): + """ + %prog fromdelta deltafile + + Convert deltafile to coordsfile. + """ + p = OptionParser(fromdelta.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (deltafile,) = args + coordsfile = deltafile.rsplit(".", 1)[0] + ".coords" + cmd = "show-coords -rclH {0}".format(deltafile) + sh(cmd, outfile=coordsfile) + + return coordsfile + + +def sort(args): + """ + %prog sort coordsfile + + Sort coordsfile based on query or ref. + """ + import jcvi.formats.blast + + return jcvi.formats.blast.sort(args + ["--coords"]) + + +def coverage(args): + """ + %prog coverage coordsfile + + Report the coverage per query record, useful to see which query matches + reference. The coords file MUST be filtered with supermap:: + + jcvi.algorithms.supermap --filter query + """ + p = OptionParser(coverage.__doc__) + p.add_argument( + "-c", + dest="cutoff", + default=0.5, + type=float, + help="only report query with coverage greater than", + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (coordsfile,) = args + fp = open(coordsfile) + + coords = [] + for row in fp: + try: + c = CoordsLine(row) + except AssertionError: + continue + coords.append(c) + + coords.sort(key=lambda x: x.query) + + coverages = [] + for query, lines in groupby(coords, key=lambda x: x.query): + cumulative_cutoff = sum(x.querycov for x in lines) + coverages.append((query, cumulative_cutoff)) + + coverages.sort(key=lambda x: (-x[1], x[0])) + for query, cumulative_cutoff in coverages: + if cumulative_cutoff < opts.cutoff: + break + print("{0}\t{1:.2f}".format(query, cumulative_cutoff)) + + +def annotate(args): + """ + %prog annotate coordsfile + + Annotate coordsfile to append an additional column, with the following + overlaps: {0}. + """ + p = OptionParser(annotate.__doc__.format(", ".join(Overlap_types))) + p.add_argument( + "--maxhang", + default=100, + type=int, + help="Max hang to call dovetail overlap", + ) + p.add_argument( + "--all", + default=False, + action="store_true", + help="Output all lines [default: terminal/containment]", + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (coordsfile,) = args + fp = open(coordsfile) + + for row in fp: + try: + c = CoordsLine(row) + except AssertionError: + continue + + ov = c.overlap(opts.maxhang) + if not opts.all and ov == 0: + continue + + print("{0}\t{1}".format(row.strip(), Overlap_types[ov])) + + +def summary(args): + """ + %prog summary coordsfile + + provide summary on id% and cov%, for both query and reference + """ + + p = OptionParser(summary.__doc__) + p.add_argument( + "-s", + dest="single", + default=False, + action="store_true", + help="provide stats per reference seq", + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (coordsfile,) = args + alignstats = get_stats(coordsfile) + alignstats.print_stats() + + +def filter(args): + """ + %prog filter + + Produce a new delta/coords file and filter based on id% or cov%. + Use `delta-filter` for .delta file. + """ + p = OptionParser(filter.__doc__) + p.set_align(pctid=0, hitlen=0) + p.add_argument( + "--overlap", + default=False, + action="store_true", + help="Print overlap status (e.g. terminal, contained)", + ) + + opts, args = p.parse_args(args) + if len(args) != 1: + sys.exit(not p.print_help()) + + pctid = opts.pctid + hitlen = opts.hitlen + + (filename,) = args + if pctid == 0 and hitlen == 0: + return filename + + pf, suffix = filename.rsplit(".", 1) + outfile = "".join((pf, ".P{0}L{1}.".format(int(pctid), int(hitlen)), suffix)) + if not need_update(filename, outfile): + return outfile + + if suffix == "delta": + cmd = "delta-filter -i {0} -l {1} {2}".format(pctid, hitlen, filename) + sh(cmd, outfile=outfile) + return outfile + + fp = open(filename) + fw = must_open(outfile, "w") + for row in fp: + try: + c = CoordsLine(row) + except AssertionError: + continue + + if c.identity < pctid: + continue + if c.len2 < hitlen: + continue + if opts.overlap and not c.overlap: + continue + + outrow = row.rstrip() + if opts.overlap: + ov = Overlap_types[c.overlap] + outrow += "\t" + ov + print(outrow, file=fw) + + return outfile + + +def bed(args): + """ + %prog bed coordsfile + + will produce a bed list of mapped position and orientation (needs to + be beyond quality cutoff, say 50) in bed format + """ + p = OptionParser(bed.__doc__) + p.add_argument( + "--query", + default=False, + action="store_true", + help="print out query intervals rather than ref", + ) + p.add_argument( + "--pctid", + default=False, + action="store_true", + help="use pctid in score", + ) + p.add_argument( + "--cutoff", + dest="cutoff", + default=0, + type=float, + help="get all the alignments with quality above threshold", + ) + + opts, args = p.parse_args(args) + if len(args) != 1: + sys.exit(p.print_help()) + + (coordsfile,) = args + query = opts.query + pctid = opts.pctid + quality_cutoff = opts.cutoff + + coords = Coords(coordsfile) + + for c in coords: + if c.quality < quality_cutoff: + continue + line = c.qbedline(pctid=pctid) if query else c.bedline(pctid=pctid) + print(line) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/excel.py b/jcvi/formats/excel.py new file mode 100644 index 00000000..64dfff7f --- /dev/null +++ b/jcvi/formats/excel.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Read and write EXCEL file. + +http://www.simplistix.co.uk/presentations/python-excel.pdf + +Library dependency: xlutils +""" +import os.path as op +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger + + +class ColorMatcher(object): + def __init__(self): + self.reset() + + def reset(self): + self.unused_colors = set(self.xlwt_colors) + # Never use black. + self.unused_colors.discard((0, 0, 0)) + + # Culled from a table at http://www.mvps.org/dmcritchie/excel/colors.htm + xlwt_colors = [ + (0, 0, 0), + (255, 255, 255), + (255, 0, 0), + (0, 255, 0), + (0, 0, 255), + (255, 255, 0), + (255, 0, 255), + (0, 255, 255), + (0, 0, 0), + (255, 255, 255), + (255, 0, 0), + (0, 255, 0), + (0, 0, 255), + (255, 255, 0), + (255, 0, 255), + (0, 255, 255), + (128, 0, 0), + (0, 128, 0), + (0, 0, 128), + (128, 128, 0), + (128, 0, 128), + (0, 128, 128), + (192, 192, 192), + (128, 128, 128), + (153, 153, 255), + (153, 51, 102), + (255, 255, 204), + (204, 255, 255), + (102, 0, 102), + (255, 128, 128), + (0, 102, 204), + (204, 204, 255), + (0, 0, 128), + (255, 0, 255), + (255, 255, 0), + (0, 255, 255), + (128, 0, 128), + (128, 0, 0), + (0, 128, 128), + (0, 0, 255), + (0, 204, 255), + (204, 255, 255), + (204, 255, 204), + (255, 255, 153), + (153, 204, 255), + (255, 153, 204), + (204, 153, 255), + (255, 204, 153), + (51, 102, 255), + (51, 204, 204), + (153, 204, 0), + (255, 204, 0), + (255, 153, 0), + (255, 102, 0), + (102, 102, 153), + (150, 150, 150), + (0, 51, 102), + (51, 153, 102), + (0, 51, 0), + (51, 51, 0), + (153, 51, 0), + (153, 51, 102), + (51, 51, 153), + (51, 51, 51), + ] + + @staticmethod + def color_distance(rgb1, rgb2): + # Adapted from Colour metric by Thiadmer Riemersma, + # http://www.compuphase.com/cmetric.htm + rmean = (rgb1[0] + rgb2[0]) / 2 + r = rgb1[0] - rgb2[0] + g = rgb1[1] - rgb2[1] + b = rgb1[2] - rgb2[2] + return ( + (((512 + rmean) * r * r) / 256) + + 4 * g * g + + (((767 - rmean) * b * b) / 256) + ) + + def match_color_index(self, color): + """Takes an "R,G,B" string or wx.Color and returns a matching xlwt + color. + """ + from jcvi.utils.webcolors import color_diff + + if isinstance(color, int): + return color + if color: + if isinstance(color, str): + rgb = map(int, color.split(",")) + else: + rgb = color.Get() + logger.disable(logger.DEBUG) + distances = [color_diff(rgb, x) for x in self.xlwt_colors] + logger.disable(logger.NOTSET) + result = distances.index(min(distances)) + self.unused_colors.discard(self.xlwt_colors[result]) + return result + + def get_unused_color(self): + """Returns an xlwt color index that has not been previously returned by + this instance. Attempts to maximize the distance between the color and + all previously used colors. + """ + if not self.unused_colors: + # If we somehow run out of colors, reset the color matcher. + self.reset() + used_colors = [c for c in self.xlwt_colors if c not in self.unused_colors] + result_color = max( + self.unused_colors, + key=lambda c: min(self.color_distance(c, c2) for c2 in used_colors), + ) + result_index = self.xlwt_colors.index(result_color) + self.unused_colors.discard(result_color) + return result_index + + +def main(): + + actions = ( + ("csv", "Convert EXCEL to csv file"), + ("fromcsv", "Convert csv file to EXCEL"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def fromcsv(args): + """ + %prog fromcsv csvfile + + Convert csv file to EXCEL. + """ + from csv import reader + from xlwt import Workbook, easyxf + from jcvi.formats.base import flexible_cast + + p = OptionParser(fromcsv.__doc__) + p.add_argument( + "--noheader", + default=False, + action="store_true", + help="Do not treat the first row as header", + ) + p.add_argument("--rgb", default=-1, type=int, help="Show RGB color box") + p.set_sep() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (csvfile,) = args + header = not opts.noheader + rgb = opts.rgb + excelfile = csvfile.rsplit(".", 1)[0] + ".xls" + + data = [] + for row in reader(open(csvfile), delimiter=opts.sep): + data.append(row) + + w = Workbook() + s = w.add_sheet(op.basename(csvfile)) + + header_style = easyxf("font: bold on") + if header: + s.panes_frozen = True + s.horz_split_pos = 1 + + cm = ColorMatcher() + for i, row in enumerate(data): + for j, cell in enumerate(row): + cell = flexible_cast(cell) + if header and i == 0: + s.write(i, j, cell, header_style) + else: + if j == rgb: + cix = cm.match_color_index(cell) + color_style = easyxf("font: color_index {0}".format(cix)) + s.write(i, j, cell, color_style) + else: + s.write(i, j, cell) + + w.save(excelfile) + logger.debug("File written to `%s`.", excelfile) + return excelfile + + +def csv(args): + """ + %prog csv excelfile + + Convert EXCEL to csv file. + """ + from xlrd import open_workbook + + p = OptionParser(csv.__doc__) + p.set_sep(sep=",") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (excelfile,) = args + sep = opts.sep + csvfile = excelfile.rsplit(".", 1)[0] + ".csv" + wb = open_workbook(excelfile) + fw = open(csvfile, "w") + for s in wb.sheets(): + print("Sheet:", s.name, file=sys.stderr) + for row in range(s.nrows): + values = [] + for col in range(s.ncols): + values.append(s.cell(row, col).value) + print(sep.join(str(x) for x in values), file=fw) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/fasta.py b/jcvi/formats/fasta.py new file mode 100644 index 00000000..5ca4a516 --- /dev/null +++ b/jcvi/formats/fasta.py @@ -0,0 +1,2642 @@ +""" +Wrapper for biopython Fasta, add option to parse sequence headers +""" + +import hashlib +import os.path as op +import re +import shutil +import string +import sys + +from itertools import groupby, zip_longest +from random import choice + +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from Bio.SeqUtils.CheckSum import seguid +from more_itertools import grouper, pairwise + +from ..apps.base import ActionDispatcher, OptionParser, cleanup, logger, need_update +from ..utils.cbook import percentage +from ..utils.console import printf +from ..utils.table import write_csv + +from .base import BaseFile, DictFile, must_open +from .bed import Bed + + +class Fasta(BaseFile, dict): + def __init__(self, filename, index=False, key_function=None, lazy=False): + super().__init__(filename) + self.key_function = key_function + + if lazy: # do not incur the overhead + return + + if index: + self.index = SeqIO.index(filename, "fasta", key_function=key_function) + else: + # SeqIO.to_dict expects a different key_function that operates on + # the SeqRecord instead of the raw string + _key_function = ( + (lambda rec: key_function(rec.description)) if key_function else None + ) + self.index = SeqIO.to_dict( + SeqIO.parse(must_open(filename), "fasta"), key_function=_key_function + ) + + def _key_function(self, key): + return self.key_function(key) if self.key_function else key + + def __len__(self): + return len(self.index) + + def __contains__(self, key): + key = self._key_function(key) + return key in self.index + + def __getitem__(self, key): + key = self._key_function(key) + rec = self.index[key] + return rec + + def keys(self): + return self.index.keys() + + def iterkeys(self): + for k in self.index.keys(): + yield k + + def iteritems(self): + for k in self.iterkeys(): + yield k, self[k] + + def itersizes(self): + for k in self.iterkeys(): + yield k, len(self[k]) + + def iteritems_ordered(self): + for rec in SeqIO.parse(must_open(self.filename), "fasta"): + yield rec.name, rec + + def iterdescriptions_ordered(self): + for k, rec in self.iteritems_ordered(): + yield rec.description, rec + + def iterkeys_ordered(self): + for k, rec in self.iteritems_ordered(): + yield k + + def itersizes_ordered(self): + for k, rec in self.iteritems_ordered(): + yield k, len(rec) + + def tostring(self): + d = {} + for k, rec in self.iteritems(): + d[k] = str(rec.seq) + return d + + @property + def totalsize(self): + return sum(size for k, size in self.itersizes()) + + @classmethod + def subseq(cls, fasta, start=None, stop=None, strand=None): + """ + Take Bio.SeqRecord and slice "start:stop" from it, does proper + index and error handling + """ + start = start - 1 if start is not None else 0 + stop = stop if stop is not None else len(fasta) + + if start < 0: + msg = "start ({0}) must > 0 of `{1}`. Reset to 1".format( + start + 1, fasta.id + ) + logger.error(msg) + start = 0 + + if stop > len(fasta): + msg = "stop ({0}) must be <= length of `{1}` ({2}). Reset to {2}.".format( + stop, fasta.id, len(fasta) + ) + logger.error(msg) + stop = len(fasta) + + seq = fasta.seq[start:stop] + + if strand in (-1, "-1", "-"): + seq = seq.reverse_complement() + + return seq + + def sequence(self, f, asstring=True): + """ + Emulate brentp's pyfasta/fasta.py sequence() methods + + take a feature and use the start/stop or exon_keys to return + the sequence from the assocatied fasta file: + + f: a feature + asstring: if true, return the sequence as a string + : if false, return as a biopython Seq + + >>> f = Fasta('tests/data/three_chrs.fasta') + >>> f.sequence({'start':1, 'stop':2, 'strand':1, 'chr': 'chr1'}) + 'AC' + >>> f.sequence({'start':1, 'stop':2, 'strand': -1, 'chr': 'chr1'}) + 'GT' + """ + + assert "chr" in f, "`chr` field required" + name = f["chr"] + + assert name in self, "feature: %s not in `%s`" % (f, self.filename) + + fasta = self[f["chr"]] + + seq = Fasta.subseq(fasta, f.get("start"), f.get("stop"), f.get("strand")) + + if asstring: + return str(seq) + + return seq + + +class ORFFinder(object): + """ + Class derived from https://gist.github.com/933737 + Original code written by David Winter (https://github.com/dwinter) + + Code writted to answer this challenge at Biostar: + http://biostar.stackexchange.com/questions/5902/ + + (Code includes improvements from Brad Chapman) + + Find the longest ORF in a given sequence + "seq" is a string, if "start" is not provided any codon can be the start of + and ORF. If muliple ORFs have the longest length the first one encountered + is printed + """ + + def __init__(self, seq, start=[], stop=["TAG", "TAA", "TGA"]): + self.seq = str(seq).upper() + self.start = start + self.stop = stop + # strand, frame, start, end, length; coordinates are 1-based + self.result = ["+", 0, 0, 0, 0] + self.longest = 0 + self.size = len(seq) + + def __str__(self): + # Format similar to getorf + strand, frame, start, end, length = self.result + start += 1 # 1-based coordinates + if strand == "-": + start, end = end, start + return "[{0} - {1}]".format(start, end) + + @property + def info(self): + strand, frame, start, end, length = self.result + return "\t".join(str(x) for x in (strand, frame, start, end)) + + def codons(self, frame): + """A generator that yields DNA in one codon blocks + "frame" counts for 0. This function yields a tuple (triplet, index) with + index relative to the original DNA sequence + """ + start = frame + while start + 3 <= self.size: + yield self.sequence[start : start + 3], start + start += 3 + + def scan_sequence(self, frame, direction): + """Search in one reading frame""" + orf_start = None + for c, index in self.codons(frame): + if ( + c not in self.stop + and (c in self.start or not self.start) + and orf_start is None + ): + orf_start = index + elif c in self.stop and orf_start is not None: + self._update_longest(orf_start, index + 3, direction, frame) + orf_start = None + + if orf_start is not None: + self._update_longest(orf_start, index + 3, direction, frame) + + def _update_longest(self, orf_start, index, direction, frame): + orf_end = index + L = orf_end - orf_start + if L > self.longest: + self.longest = L + self.result = [direction, frame, orf_start, orf_end, L] + + def get_longest_orf(self): + dirs = ("+", "-") + for direction in dirs: + self.sequence = self.seq + if direction == "-": + self.sequence = rc(self.sequence) + for frame in range(3): + self.scan_sequence(frame, direction) + + strand, frame, start, end, length = self.result + size = self.size + if strand == "-": + start, end = size - end, size - start + self.result[2:4] = start, end + + assert start <= end, self.result + if start == end: + return "N" + + orf = self.seq[start:end] + if strand == "-": + orf = rc(orf) + + assert len(orf) % 3 == 0 + + return orf + + +class SequenceInfo(object): + """ + Emulate output from `sequence_info`: + + File SUBAC32.contigs.fasta + + Number of sequences 80 + + Residue counts: + Number of A's 66266 31.36 % + Number of C's 40032 18.95 % + Number of G's 39145 18.53 % + Number of T's 65799 31.14 % + Number of N's 58 0.03 % + Total 211300 + + Sequence lengths: + Minimum 242 + Maximum 8398 + Average 2641.25 + N50 4791 + """ + + def __init__(self, filename, gapstats=False): + from jcvi.utils.cbook import SummaryStats + from jcvi.assembly.base import calculate_A50 + + f = Fasta(filename) + self.filename = filename + self.header = "File|#_seqs|#_reals|#_Ns|Total|Min|Max|N50".split("|") + if gapstats: + self.header += ["Gaps"] + self.nseqs = len(f) + sizes = [] + gaps = [] + na = nc = ng = nt = 0 + for k, s in f.iteritems(): + s = str(s.seq).upper() + sizes.append(len(s)) + na += s.count("A") + nc += s.count("C") + ng += s.count("G") + nt += s.count("T") + if gapstats: + gaps += list(self.iter_gap_len(s)) + self.real = real = na + nc + ng + nt + s = SummaryStats(sizes) + self.sum = s.sum + if gapstats: + self.gaps = len(gaps) + self.nn = self.sum - real + a50, l50, nn50 = calculate_A50(sizes) + self.min = s.min + self.max = s.max + self.mean = int(s.mean) + self.n50 = l50 + self.data = [ + self.filename, + self.nseqs, + self.real, + self.nn, + self.sum, + self.min, + self.max, + self.n50, + ] + if gapstats: + self.data += [self.gaps] + assert len(self.header) == len(self.data) + + def iter_gap_len(self, seq, mingap=10): + for gap, seq in groupby(seq, lambda x: x == "N"): + if not gap: + continue + gap_len = len(list(seq)) + if gap_len >= mingap: + yield len(list(seq)) + + +def rc(s): + _complement = str.maketrans("ATCGatcgNnXx", "TAGCtagcNnXx") + cs = s.translate(_complement) + return cs[::-1] + + +def main(): + + actions = ( + ("clean", "remove irregular chars in FASTA seqs"), + ("diff", "check if two fasta records contain same information"), + ( + "extract", + "given fasta file and seq id, retrieve the sequence in fasta format", + ), + ("fastq", "combine fasta and qual to create fastq file"), + ( + "format", + "trim accession id to the first space or switch id based on 2-column mapping file", + ), + ("filter", "filter the records by size"), + ("fromtab", "convert 2-column sequence file to FASTA format"), + ("gaps", "print out a list of gap sizes within sequences"), + ("gc", "plot G+C content distribution"), + ("identical", "given 2 fasta files, find all exactly identical records"), + ("ids", "generate a list of headers"), + ("info", "run `sequence_info` on fasta files"), + ("ispcr", "reformat paired primers into isPcr query format"), + ("join", "concatenate a list of seqs and add gaps in between"), + ("longestorf", "find longest orf for CDS fasta"), + ("pair", "sort paired reads to .pairs, rest to .fragments"), + ( + "pairinplace", + "starting from fragment.fasta, find if adjacent records can form pairs", + ), + ("pool", "pool a bunch of fastafiles together and add prefix"), + ("qual", "generate dummy .qual file based on FASTA file"), + ("random", "randomly take some records"), + ("sequin", "generate a gapped fasta file for sequin submission"), + ("simulate", "simulate random fasta file for testing"), + ( + "some", + "include or exclude a list of records (also performs on .qual file if available)", + ), + ("sort", "sort the records by IDs, sizes, etc."), + ("summary", "report the real no of bases and N's in fasta files"), + ("tidy", "normalize gap sizes and remove small components in fasta"), + ("translate", "translate CDS to proteins"), + ("trim", "given a cross_match screened fasta, trim the sequence"), + ("trimsplit", "split sequences at lower-cased letters"), + ("uniq", "remove records that are the same"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def simulate_one(fw, name, size): + """ + Simulate a random sequence with name and size + """ + seq = Seq("".join(choice("ACGT") for _ in range(size))) + s = SeqRecord(seq, id=name, description="Fake sequence") + SeqIO.write([s], fw, "fasta") + + +def simulate(args): + """ + %prog simulate idsfile + + Simulate random FASTA file based on idsfile, which is a two-column + tab-separated file with sequence name and size. + """ + p = OptionParser(simulate.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (idsfile,) = args + fp = open(idsfile) + fw = must_open(opts.outfile, "w") + for row in fp: + name, size = row.split() + size = int(size) + simulate_one(fw, name, size) + fp.close() + + +def gc(args): + """ + %prog gc fastafile + + Plot G+C content distribution. + """ + p = OptionParser(gc.__doc__) + p.add_argument("--binsize", default=500, type=int, help="Bin size to use") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + binsize = opts.binsize + allbins = [] + for name, seq in parse_fasta(fastafile): + for i in range(len(seq) / binsize): + atcnt = gccnt = 0 + for c in seq[i * binsize : (i + 1) * binsize].upper(): + if c in "AT": + atcnt += 1 + elif c in "GC": + gccnt += 1 + totalcnt = atcnt + gccnt + if totalcnt == 0: + continue + gcpct = gccnt * 100 / totalcnt + allbins.append(gcpct) + + from jcvi.graphics.base import asciiplot + from collections import Counter + + title = "Total number of bins={}".format(len(allbins)) + c = Counter(allbins) + x, y = zip(*sorted(c.items())) + asciiplot(x, y, title=title) + + +def trimsplit(args): + """ + %prog trimsplit fastafile + + Split sequences at lower-cased letters and stretch of Ns. This is useful + at cleaning up the low quality bases for the QUIVER output. + """ + from jcvi.utils.cbook import SummaryStats + + p = OptionParser(trimsplit.__doc__) + p.add_argument( + "--minlength", default=1000, type=int, help="Min length of contigs to keep" + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + minlength = opts.minlength + + fw = must_open(fastafile.rsplit(".", 1)[0] + ".split.fasta", "w") + ntotal = 0 + removed = [] + Ns = [] + for name, seq in parse_fasta(fastafile): + stretches = [] + ntotal += len(seq) + for lower, stretch in groupby(seq, key=lambda x: x.islower()): + stretch = "".join(stretch) + if lower or len(stretch) < minlength: + removed.append(len(stretch)) + continue + for isN, s in groupby(stretch, key=lambda x: x in "Nn"): + s = "".join(s) + if isN or len(s) < minlength: + Ns.append(len(s)) + continue + stretches.append(s) + for i, seq in enumerate(stretches): + id = "{0}_{1}".format(name.split("|")[0], i) + s = SeqRecord(Seq(seq), id=id, description="") + SeqIO.write([s], fw, "fasta") + fw.close() + + # Reporting + if removed: + logger.debug( + "Total bases removed: {0}".format(percentage(sum(removed), ntotal)) + ) + print(SummaryStats(removed), file=sys.stderr) + if Ns: + logger.debug("Total Ns removed: {0}".format(percentage(sum(Ns), ntotal))) + print(SummaryStats(Ns), file=sys.stderr) + + +def qual(args): + """ + %prog qual fastafile + + Generate dummy .qual file based on FASTA file. + """ + from jcvi.formats.sizes import Sizes + + p = OptionParser(qual.__doc__) + p.add_argument( + "--qv", default=31, type=int, help="Dummy qv score for extended bases" + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + sizes = Sizes(fastafile) + qvchar = str(opts.qv) + fw = must_open(opts.outfile, "w") + total = 0 + for s, slen in sizes.iter_sizes(): + print(">" + s, file=fw) + print(" ".join([qvchar] * slen), file=fw) + total += 1 + fw.close() + logger.debug("Written {0} records in `{1}`.".format(total, opts.outfile)) + + +def info(args): + """ + %prog info *.fasta + + Run `sequence_info` on FASTA files. Generate a report per file. + """ + p = OptionParser(info.__doc__) + p.add_argument( + "--gaps", default=False, action="store_true", help="Count number of gaps" + ) + p.set_table() + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) == 0: + sys.exit(not p.print_help()) + + fastafiles = args + data = [] + for f in fastafiles: + s = SequenceInfo(f, gapstats=opts.gaps) + data.append(s.data) + write_csv(s.header, data, sep=opts.sep, filename=opts.outfile, align=opts.align) + + +def fromtab(args): + """ + %prog fromtab tabfile fastafile + + Convert 2-column sequence file to FASTA format. One usage for this is to + generatea `adapters.fasta` for TRIMMOMATIC. + """ + p = OptionParser(fromtab.__doc__) + p.set_sep(sep=None) + p.add_argument( + "--noheader", default=False, action="store_true", help="Ignore first line" + ) + p.add_argument("--replace", help="Replace spaces in name to char") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + tabfile, fastafile = args + sep = opts.sep + replace = opts.replace + fp = must_open(tabfile) + fw = must_open(fastafile, "w") + nseq = 0 + if opts.noheader: + next(fp) + for row in fp: + row = row.strip() + if not row or row[0] == "#": + continue + + name, seq = row.rsplit(sep, 1) + if replace: + name = name.replace(" ", replace) + print(">{0}\n{1}".format(name, seq), file=fw) + nseq += 1 + fw.close() + + logger.debug("A total of {0} sequences written to `{1}`.".format(nseq, fastafile)) + + +def longestorf(args): + """ + %prog longestorf fastafile + + Find longest ORF for each sequence in fastafile. + """ + p = OptionParser(longestorf.__doc__) + p.add_argument("--ids", action="store_true", help="Generate table with ORF info") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + pf = fastafile.rsplit(".", 1)[0] + orffile = pf + ".orf.fasta" + idsfile = None + if opts.ids: + idsfile = pf + ".orf.ids" + fwids = open(idsfile, "w") + + f = Fasta(fastafile, lazy=True) + fw = must_open(orffile, "w") + before, after = 0, 0 + for name, rec in f.iteritems_ordered(): + cds = rec.seq + before += len(cds) + # Try all six frames + orf = ORFFinder(cds) + lorf = orf.get_longest_orf() + newcds = Seq(lorf) + after += len(newcds) + newrec = SeqRecord(newcds, id=name, description=rec.description) + SeqIO.write([newrec], fw, "fasta") + if idsfile: + print("\t".join((name, orf.info)), file=fwids) + + fw.close() + if idsfile: + fwids.close() + + logger.debug( + "Longest ORFs written to `{0}` ({1}).".format( + orffile, percentage(after, before) + ) + ) + + return orffile + + +def ispcr(args): + """ + %prog ispcr fastafile + + Reformat paired primers into isPcr query format, which is three column + format: name, forward, reverse + """ + p = OptionParser(ispcr.__doc__) + p.add_argument( + "-r", + dest="rclip", + default=1, + type=int, + help="pair ID is derived from rstrip N chars", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + ispcrfile = fastafile + ".isPcr" + fw = open(ispcrfile, "w") + + N = opts.rclip + strip_name = lambda x: x[:-N] if N else str + + npairs = 0 + fastaiter = SeqIO.parse(fastafile, "fasta") + for a, b in grouper(fastaiter, 2): + + aid, bid = [strip_name(x) for x in (a.id, b.id)] + assert aid == bid, "Name mismatch {0}".format((aid, bid)) + + print("\t".join((aid, str(a.seq), str(b.seq))), file=fw) + npairs += 1 + + fw.close() + logger.debug("A total of {0} pairs written to `{1}`.".format(npairs, ispcrfile)) + + +def parse_fasta(infile, upper=False): + """ + parse a fasta-formatted file and returns header + can be a fasta file that contains multiple records. + """ + try: + fp = must_open(infile) + except: + fp = infile + # keep header + fa_iter = (x[1] for x in groupby(fp, lambda row: row[0] == ">")) + for header in fa_iter: + header = next(header) + if header[0] != ">": + continue + # drop '>' + header = header.strip()[1:] + # stitch the sequence lines together and make into upper case + seq = "".join(s.strip() for s in next(fa_iter)) + if upper: + seq = seq.upper() + yield header, seq + + +def iter_clean_fasta(fastafile): + for header, seq in parse_fasta(fastafile): + seq = "".join(x for x in seq if x in string.ascii_letters or x == "*") + yield header, seq + + +def iter_canonical_fasta(fastafile): + canonical = "ACGTN" + totalbad = 0 + for header, seq in parse_fasta(fastafile): + badcounts = sum(1 for x in seq if x not in canonical) + seq = "".join((x if x in canonical else "N") for x in seq) + totalbad += badcounts + yield header, seq + + logger.debug("Total bad char: {0}".format(totalbad)) + + +def fancyprint(fw, seq, width=60, chunk=10): + assert width % chunk == 0 + nchunks = width / chunk + seqlen = len(seq) + maxchar = len(str(seqlen)) + + s = ["".join(x) for x in grouper(seq, chunk, fillvalue="")] + s = [" ".join(x) for x in grouper(s, nchunks, fillvalue="")] + for a, b in zip(range(1, len(seq), width), s): + b = b.rstrip() + a = str(a).rjust(maxchar, " ") + print(" ".join((a, b)), file=fw) + + +def clean(args): + """ + %prog clean fastafile + + Remove irregular chars in FASTA seqs. + """ + p = OptionParser(clean.__doc__) + p.add_argument( + "--fancy", default=False, action="store_true", help="Pretty print the sequence" + ) + p.add_argument( + "--canonical", default=False, action="store_true", help="Use only acgtnACGTN" + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + fw = must_open(opts.outfile, "w") + if opts.fancy: + for header, seq in iter_clean_fasta(fastafile): + print(">" + header, file=fw) + fancyprint(fw, seq) + + return 0 + + iterator = iter_canonical_fasta if opts.canonical else iter_clean_fasta + + for header, seq in iterator(fastafile): + seq = Seq(seq) + s = SeqRecord(seq, id=header, description="") + SeqIO.write([s], fw, "fasta") + + +def translate(args): + """ + %prog translate cdsfasta + + Translate CDS to proteins. The tricky thing is that sometimes the CDS + represents a partial gene, therefore disrupting the frame of the protein. + Check all three frames to get a valid translation. + """ + from jcvi.utils.cbook import gene_name + + transl_tables = [str(x) for x in range(1, 25)] + p = OptionParser(translate.__doc__) + p.add_argument( + "--ids", + default=False, + action="store_true", + help="Create .ids file with the complete/partial/gaps label", + ) + p.add_argument( + "--longest", + default=False, + action="store_true", + help="Find the longest ORF from each input CDS", + ) + p.add_argument( + "--table", + default=1, + choices=transl_tables, + help="Specify translation table to use", + ) + p.add_argument( + "--strip_names", + default=False, + action="store_true", + help="Strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", + ) + p.add_argument( + "--unique", + default=False, + action="store_true", + help="Ensure the output FASTA contains unique identifiers", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + strip_names = opts.strip_names + unique = opts.unique + + if len(args) != 1: + sys.exit(not p.print_help()) + + (cdsfasta,) = args + if opts.longest: + cdsfasta = longestorf([cdsfasta]) + + f = Fasta(cdsfasta, lazy=True) + outfile = opts.outfile + fw = must_open(outfile, "w") + + if opts.ids: + idsfile = cdsfasta.rsplit(".", 1)[0] + ".ids" + ids = open(idsfile, "w") + else: + ids = None + + five_prime_missing = three_prime_missing = 0 + contain_ns = complete = cannot_translate = total = 0 + + seen = set() + grand_total = 0 + for name, rec in f.iteritems_ordered(): + grand_total += 1 + + if strip_names: + name = gene_name(name) + + if unique and name in seen: + continue + + cds = rec.seq + cdslen = len(cds) + peplen = cdslen // 3 + total += 1 + + # Try all three frames + pep = "" + for i in range(3): + newcds = cds[i : i + peplen * 3] + newpep = newcds.translate(table=opts.table) + if len(newpep.split("*")[0]) > len(pep.split("*")[0]): + pep = newpep + + labels = [] + if "*" in pep.rstrip("*"): + logger.error("{0} cannot translate".format(name)) + cannot_translate += 1 + labels.append("cannot_translate") + + contains_start = pep.startswith("M") + contains_stop = pep.endswith("*") + contains_ns = "X" in pep + start_ns = pep.startswith("X") + end_ns = pep.endswith("X") + + if not contains_start: + five_prime_missing += 1 + labels.append("five_prime_missing") + if not contains_stop: + three_prime_missing += 1 + labels.append("three_prime_missing") + if contains_ns: + contain_ns += 1 + labels.append("contain_ns") + if contains_start and contains_stop: + complete += 1 + labels.append("complete") + if start_ns: + labels.append("start_ns") + if end_ns: + labels.append("end_ns") + + if ids: + print("\t".join((name, ",".join(labels))), file=ids) + + peprec = SeqRecord(pep, id=name, description=rec.description) + SeqIO.write([peprec], fw, "fasta") + fw.flush() + seen.add(name) + + print( + "Complete gene models: {0}".format(percentage(complete, total)), file=sys.stderr + ) + print( + "Missing 5`-end: {0}".format(percentage(five_prime_missing, total)), + file=sys.stderr, + ) + print( + "Missing 3`-end: {0}".format(percentage(three_prime_missing, total)), + file=sys.stderr, + ) + print("Contain Ns: {0}".format(percentage(contain_ns, total)), file=sys.stderr) + + if cannot_translate: + print( + "Cannot translate: {0}".format(percentage(cannot_translate, total)), + file=sys.stderr, + ) + + fw.close() + + logger.debug( + "Total records: {}, Unique records (strip_names={}): {}".format( + grand_total, strip_names, len(seen) + ) + ) + + return cdsfasta, outfile + + +def filter(args): + """ + %prog filter fastafile 100 + + Filter the FASTA file to contain records with size >= or <= certain cutoff. + """ + p = OptionParser(filter.__doc__) + p.add_argument( + "--less", + default=False, + action="store_true", + help="filter the sizes < certain cutoff [default: >=]", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, cutoff = args + try: + cutoff = int(cutoff) + except ValueError: + sys.exit(not p.print_help()) + + f = Fasta(fastafile, lazy=True) + + fw = must_open(opts.outfile, "w") + for name, rec in f.iteritems_ordered(): + + if opts.less and len(rec) >= cutoff: + continue + + if (not opts.less) and len(rec) < cutoff: + continue + + SeqIO.write([rec], fw, "fasta") + fw.flush() + + return fw.name + + +def pool(args): + """ + %prog pool fastafiles > pool.fasta + + Pool a bunch of FASTA files, and add prefix to each record based on + filenames. File names are simplified to longest unique prefix to avoid + collisions after getting shortened. + """ + from jcvi.formats.base import longest_unique_prefix + + p = OptionParser(pool.__doc__) + p.add_argument("--sep", default=".", help="Separator between prefix and name") + p.add_argument( + "--sequential", default=False, action="store_true", help="Add sequential IDs" + ) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + for fastafile in args: + pf = longest_unique_prefix(fastafile, args) + print(fastafile, "=>", pf, file=sys.stderr) + prefixopt = "--prefix={0}{1}".format(pf, opts.sep) + format_args = [fastafile, "stdout", prefixopt] + if opts.sequential: + format_args += ["--sequential=replace"] + format(format_args) + + +def ids(args): + """ + %prog ids fastafiles + + Generate the FASTA headers without the '>'. + """ + p = OptionParser(ids.__doc__) + p.add_argument( + "--until", default=None, help="Truncate the name and description at words" + ) + p.add_argument( + "--description", + default=False, + action="store_true", + help="Generate a second column with description", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + until = opts.until + fw = must_open(opts.outfile, "w") + for row in must_open(args): + if row[0] == ">": + row = row[1:].rstrip() + if until: + row = row.split(until)[0] + + atoms = row.split(None, 1) + if opts.description: + outrow = "\t".join(atoms) + else: + outrow = atoms[0] + print(outrow, file=fw) + + fw.close() + + +def sort(args): + """ + %prog sort fastafile + + Sort a list of sequences and output with sorted IDs, etc. + """ + p = OptionParser(sort.__doc__) + p.add_argument( + "--sizes", default=False, action="store_true", help="Sort by decreasing size" + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (fastafile,) = args + sortedfastafile = fastafile.rsplit(".", 1)[0] + ".sorted.fasta" + + f = Fasta(fastafile, index=False) + fw = must_open(sortedfastafile, "w") + if opts.sizes: + # Sort by decreasing size + sortlist = sorted(f.itersizes(), key=lambda x: (-x[1], x[0])) + logger.debug( + "Sort by size: max: {0}, min: {1}".format(sortlist[0], sortlist[-1]) + ) + sortlist = [x for x, s in sortlist] + else: + sortlist = sorted(f.iterkeys()) + + for key in sortlist: + rec = f[key] + SeqIO.write([rec], fw, "fasta") + + logger.debug("Sorted file written to `{0}`.".format(sortedfastafile)) + fw.close() + + return sortedfastafile + + +def join(args): + """ + %prog join fastafile [phasefile] + + Make AGP file for a bunch of sequences, and add gaps between, and then build + the joined fastafile. This is useful by itself, but with --oo option this + can convert the .oo (BAMBUS output) into AGP and a joined fasta. + + Phasefile is optional, but must contain two columns - BAC and phase (0, 1, 2, 3). + """ + from jcvi.formats.agp import OO, Phases, build + from jcvi.formats.sizes import Sizes + + p = OptionParser(join.__doc__) + p.add_argument("--newid", default=None, help="New sequence ID") + p.add_argument( + "--gapsize", + default=100, + type=int, + help="Number of N's in between the sequences", + ) + p.add_argument( + "--gaptype", default="contig", help="Gap type to use in the AGP file" + ) + p.add_argument( + "--evidence", default="", help="Linkage evidence to report in the AGP file" + ) + p.add_argument("--oo", help="Use .oo file generated by bambus") + opts, args = p.parse_args(args) + + nargs = len(args) + if nargs not in (1, 2): + sys.exit(not p.print_help()) + + if nargs == 2: + fastafile, phasefile = args + phases = DictFile(phasefile) + phases = dict((a, Phases[int(b)]) for a, b in phases.items()) + else: + (fastafile,) = args + phases = {} + + sizes = Sizes(fastafile) + prefix = fastafile.rsplit(".", 1)[0] + agpfile = prefix + ".agp" + newid = opts.newid + oo = opts.oo + + o = OO(oo, sizes.mapping) + + if oo: + seen = o.contigs + # The leftover contigs not in the oo file + logger.debug( + "A total of {0} contigs ({1} in `{2}`)".format(len(sizes), len(seen), oo) + ) + + for ctg, size in sizes.iter_sizes(): + if ctg in seen: + continue + o.add(ctg, ctg, size) + + else: + if newid: + for ctg, size in sizes.iter_sizes(): + o.add(newid, ctg, size) + else: + for scaffold_number, (ctg, size) in enumerate(sizes.iter_sizes()): + object_id = "scaffold{0:03d}".format(scaffold_number + 1) + o.add(object_id, ctg, size) + + fw = open(agpfile, "w") + o.write_AGP( + fw, + gapsize=opts.gapsize, + gaptype=opts.gaptype, + evidence=opts.evidence, + phases=phases, + ) + fw.close() + + joinedfastafile = prefix + ".joined.fasta" + build([agpfile, fastafile, joinedfastafile]) + + return joinedfastafile + + +def summary(args): + """ + %prog summary *.fasta + + Report real bases and N's in fastafiles in a tabular report + """ + from natsort import natsort_key + + p = OptionParser(summary.__doc__) + p.add_argument( + "--suffix", default="Mb", help="make the base pair counts human readable" + ) + p.add_argument("--ids", help="write the ids that have >= 50%% N's") + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) == 0: + sys.exit(not p.print_help()) + + idsfile = opts.ids + header = "Seqid Real N's Total %_real".split() + if idsfile: + idsfile = open(idsfile, "w") + nids = 0 + + data = [] + for fastafile in args: + for rec in SeqIO.parse(must_open(fastafile), "fasta"): + seqlen = len(rec) + nns = rec.seq.count("n") + rec.seq.count("N") + reals = seqlen - nns + pct = reals * 100.0 / seqlen + pctreal = "{0:.1f}%".format(pct) + if idsfile and pct < 50: + nids += 1 + print(rec.id, file=idsfile) + + data.append((rec.id, reals, nns, seqlen, pctreal)) + + data.sort(key=natsort_key) + ids, reals, nns, seqlen, pctreal = zip(*data) + reals = sum(reals) + nns = sum(nns) + seqlen = sum(seqlen) + pctreal = "{0:.1f}%".format(reals * 100.0 / seqlen) + data.append(("Total", reals, nns, seqlen, pctreal)) + + write_csv(header, data, sep=" ", filename=opts.outfile, thousands=True) + if idsfile: + logger.debug( + "A total of {0} ids >= 50% N's written to {1}.".format(nids, idsfile.name) + ) + idsfile.close() + + return reals, nns, seqlen + + +def format(args): + """ + %prog format infasta outfasta + + Reformat FASTA file and also clean up names. + """ + sequential_choices = ("replace", "prefix", "suffix") + p = OptionParser(format.__doc__) + p.add_argument( + "--pairs", + default=False, + action="store_true", + help="Add trailing /1 and /2 for interleaved pairs", + ) + p.add_argument( + "--sequential", + default=None, + choices=sequential_choices, + help="Add sequential IDs", + ) + p.add_argument( + "--sequentialoffset", default=0, type=int, help="Sequential IDs start at" + ) + p.add_argument( + "--pad0", default=0, type=int, help="Pad a few zeros in front of sequential" + ) + p.add_argument( + "--gb", + default=False, + action="store_true", + help="For Genbank ID, get the accession", + ) + p.add_argument("--sep", default=None, help="Split description by certain symbol") + p.add_argument( + "--index", + default=0, + type=int, + help="Extract i-th field after split with --sep", + ) + p.add_argument( + "--noversion", + default=False, + action="store_true", + help="Remove the gb trailing version", + ) + p.add_argument("--prefix", help="Prepend prefix to sequence ID") + p.add_argument("--suffix", help="Append suffix to sequence ID") + p.add_argument( + "--template", + default=False, + action="store_true", + help="Extract `template=aaa dir=x library=m` to `m-aaa/x`", + ) + p.add_argument("--switch", help="Switch ID from two-column file") + p.add_argument( + "--annotation", + help="Add functional annotation from two-column file ('ID <--> Annotation')", + ) + p.add_argument("--ids", help="Generate ID conversion table") + p.add_argument( + "--upper", + default=False, + action="store_true", + help="Convert sequence to upper case", + ) + p.add_argument( + "--nodesc", + default=False, + action="store_true", + help="Remove description after identifier", + ) + p.add_argument( + "--minlength", default=0, type=int, help="Minimum sequence length to keep" + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + infasta, outfasta = args + gb = opts.gb + pairs = opts.pairs + prefix = opts.prefix + suffix = opts.suffix + noversion = opts.noversion + sequential = opts.sequential + sequentialoffset = opts.sequentialoffset + sep = opts.sep + idx = opts.index + mapfile = opts.switch + annotfile = opts.annotation + desc = not opts.nodesc + idsfile = opts.ids + idsfile = open(idsfile, "w") if idsfile else None + upper = opts.upper + minlength = opts.minlength + + if mapfile: + mapping = DictFile(mapfile, delimiter="\t") + if annotfile: + annotation = DictFile(annotfile, delimiter="\t") + + fp = SeqIO.parse(must_open(infasta), "fasta") + fw = must_open(outfasta, "w") + nremoved = 0 + for i, rec in enumerate(fp): + if len(rec) < minlength: + nremoved += 1 + continue + origid = rec.id + description = rec.description.replace(origid, "").strip() + if sep: + rec.id = rec.description.split(sep)[idx].strip() + if gb: + # gi|262233616|gb|GU123895.1| Coffea arabica clone BAC + atoms = rec.id.split("|") + if len(atoms) >= 3: + rec.id = atoms[3] + elif len(atoms) == 2: + rec.id = atoms[1] + if pairs: + id = "/1" if (i % 2 == 0) else "/2" + rec.id += id + if noversion: + rec.id = rec.id.rsplit(".", 1)[0] + if sequential: + rec.id = "{0:0{1}d}".format(sequentialoffset, opts.pad0) + if sequential == "prefix": + rec.id = "{0}-{1}".format(rec.id, origid) + elif sequential == "suffix": + rec.id = "{0}-{1}".format(origid, rec.id) + sequentialoffset += 1 + if opts.template: + template, dir, lib = [ + x.split("=")[-1] for x in rec.description.split()[1:4] + ] + rec.id = "{0}-{1}/{2}".format(lib, template, dir) + if mapfile: + if origid in mapping: + rec.id = mapping[origid] + else: + logger.error( + "{0} not found in `{1}`. ID unchanged.".format(origid, mapfile) + ) + if prefix: + rec.id = prefix + rec.id + if suffix: + rec.id += suffix + if annotfile: + rec.description = ( + annotation.get(origid, "") + if not mapfile + else annotation.get(rec.id, "") + ) + else: + rec.description = description if desc else "" + if idsfile: + print("\t".join((origid, rec.id)), file=idsfile) + if upper: + rec.seq = rec.seq.upper() + + SeqIO.write(rec, fw, "fasta") + + if idsfile: + logger.debug("Conversion table written to `{0}`.".format(idsfile.name)) + idsfile.close() + + if nremoved: + logger.debug( + "Removed {} sequences with length < {}".format(nremoved, minlength) + ) + + +def print_first_difference( + arec, brec, ignore_case=False, ignore_N=False, rc=False, report_match=True +): + """ + Returns the first different nucleotide in two sequence comparisons + runs both Plus and Minus strand + """ + plus_match = _print_first_difference( + arec, + brec, + ignore_case=ignore_case, + ignore_N=ignore_N, + report_match=report_match, + ) + if rc and not plus_match: + logger.debug("trying reverse complement of %s" % brec.id) + brec.seq = brec.seq.reverse_complement() + minus_match = _print_first_difference( + arec, + brec, + ignore_case=ignore_case, + ignore_N=ignore_N, + report_match=report_match, + ) + return minus_match + + else: + return plus_match + + +def _print_first_difference( + arec, brec, ignore_case=False, ignore_N=False, report_match=True +): + """ + Returns the first different nucleotide in two sequence comparisons + """ + aseq, bseq = arec.seq, brec.seq + asize, bsize = len(aseq), len(bseq) + + matched = True + for i, (a, b) in enumerate(zip_longest(aseq, bseq)): + if ignore_case and None not in (a, b): + a, b = a.upper(), b.upper() + + if ignore_N and ("N" in (a, b) or "X" in (a, b)): + continue + + if a != b: + matched = False + break + + if i + 1 == asize and matched: + if report_match: + printf("[green]Two sequences match") + match = True + else: + printf("[red]Two sequences do not match") + + snippet_size = 20 # show the context of the difference + + printf("[red]Sequence start to differ at position {}:".format(i + 1)) + + begin = max(i - snippet_size, 0) + aend = min(i + snippet_size, asize) + bend = min(i + snippet_size, bsize) + + printf("[red]{}|{}".format(aseq[begin:i], aseq[i:aend])) + printf("[red]{}|{}".format(bseq[begin:i], bseq[i:bend])) + match = False + + return match + + +def diff(args): + """ + %prog diff afasta bfasta + + print out whether the records in two fasta files are the same + """ + from jcvi.utils.table import banner + + p = OptionParser(diff.__doc__) + p.add_argument( + "--ignore_case", + default=False, + action="store_true", + help="ignore case when comparing sequences", + ) + p.add_argument( + "--ignore_N", + default=False, + action="store_true", + help="ignore N and X's when comparing sequences", + ) + p.add_argument( + "--ignore_stop", + default=False, + action="store_true", + help="ignore stop codon when comparing sequences", + ) + p.add_argument( + "--rc", + default=False, + action="store_true", + help="also consider reverse complement", + ) + p.add_argument( + "--quiet", + default=False, + action="store_true", + help="don't output comparison details", + ) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + afasta, bfasta = args + + afastan = len(Fasta(afasta)) + bfastan = len(Fasta(bfasta)) + + if afastan == bfastan: + printf( + "[green]Two sets contain the same number of sequences ({}, {})".format( + afastan, bfastan + ) + ) + else: + printf( + "[red]Two sets contain different number of sequences ({}, {})".format( + afastan, bfastan + ) + ) + + ah = SeqIO.parse(afasta, "fasta") + bh = SeqIO.parse(bfasta, "fasta") + + problem_ids = [] + for arec, brec in zip(ah, bh): + + if opts.ignore_stop: + arec.seq = arec.seq.rstrip("*") + brec.seq = brec.seq.rstrip("*") + + asize, bsize = len(arec), len(brec) + + if not opts.quiet: + print(banner(str(arec), [str(brec)])) + if asize == bsize: + printf("[green]Two sequence size match ({})".format(asize)) + else: + printf( + "[red]Two sequence size do not match ({}, {}})".format(asize, bsize) + ) + + # print out the first place the two sequences diff + fd = print_first_difference( + arec, + brec, + ignore_case=opts.ignore_case, + ignore_N=opts.ignore_N, + rc=opts.rc, + report_match=not opts.quiet, + ) + if not fd: + logger.error("Two sets of sequences differ at `{0}`".format(arec.id)) + problem_ids.append( + "\t".join(str(x) for x in (arec.id, asize, bsize, abs(asize - bsize))) + ) + + if problem_ids: + print("A total of {0} records mismatch.".format(len(problem_ids))) + fw = must_open("Problems.ids", "w") + print("\n".join(problem_ids), file=fw) + + +def hash_fasta( + seq, ignore_case=False, ignore_N=False, ignore_stop=False, checksum="MD5" +): + """ + Generates checksum of input sequence element + """ + if ignore_stop: + seq = seq.rstrip("*") + if ignore_case: + seq = seq.upper() + if ignore_N: + if not all(c.upper() in "ATGCN" for c in seq): + seq = re.sub("X", "", seq) + else: + seq = re.sub("N", "", seq) + + return seguid(seq) if checksum == "GCG" else hashlib.sha256(seq) + + +def identical(args): + """ + %prog identical *.fasta + + Given multiple fasta files, find all the exactly identical records + based on the computed md5 hexdigest or GCG checksum of each sequence. + + Output is an N + 1 column file (where N = number of input fasta files). + If there are duplicates within a given fasta file, they will all be + listed out in the same row separated by a comma. + + Example output: + --------------------------- + tta1.fsa tta2.fsa + t0 2131 na + t1 3420 na + t2 3836,3847 852 + t3 148 890 + t4 584 614 + t5 623 684 + t6 1281 470 + t7 3367 na + """ + from jcvi.utils.cbook import AutoVivification + + allowed_checksum = ["MD5", "GCG"] + + p = OptionParser(identical.__doc__) + p.add_argument( + "--ignore_case", + default=False, + action="store_true", + help="ignore case when comparing sequences", + ) + p.add_argument( + "--ignore_N", + default=False, + action="store_true", + help="ignore N and X's when comparing sequences", + ) + p.add_argument( + "--ignore_stop", + default=False, + action="store_true", + help="ignore stop codon when comparing sequences", + ) + p.add_argument( + "--output_uniq", + default=False, + action="store_true", + help="output uniq sequences in FASTA format", + ) + p.add_argument( + "--checksum", + default="MD5", + choices=allowed_checksum, + help="specify checksum method", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) == 0: + sys.exit(not p.print_help()) + + d = AutoVivification() + files = [] + for fastafile in args: + f = Fasta(fastafile) + pf = fastafile.rsplit(".", 1)[0] + files.append(pf) + + logger.debug("Hashing individual elements of {0}".format(fastafile)) + for name, rec in f.iteritems_ordered(): + seq = re.sub(" ", "", str(rec.seq)) + hashed = hash_fasta( + seq, + ignore_case=opts.ignore_case, + ignore_N=opts.ignore_N, + ignore_stop=opts.ignore_stop, + checksum=opts.checksum, + ) + if not d[hashed]: + d[hashed]["seq"] = seq + d[hashed]["count"] = 0 + if not d[hashed]["names"][pf]: + d[hashed]["names"][pf] = set() + d[hashed]["names"][pf].add(name) + + fw = must_open(opts.outfile, "w") + if opts.output_uniq: + uniqfile = "_".join(files) + ".uniq.fasta" + uniqfw = must_open(uniqfile, "w") + + header = "\t".join(str(x) for x in args) + print("\t".join(str(x) for x in ("", header)), file=fw) + for idx, hashed in enumerate(d.keys()): + line = [] + line.append("t{0}".format(idx)) + for fastafile in files: + if fastafile in d[hashed]["names"].keys(): + line.append(",".join(d[hashed]["names"][fastafile])) + if opts.output_uniq: + d[hashed]["count"] += len(d[hashed]["names"][fastafile]) + else: + line.append("na") + print("\t".join(line), file=fw) + + if opts.output_uniq: + seqid = "\t".join(str(x) for x in ("t{0}".format(idx), d[hashed]["count"])) + rec = SeqRecord(Seq(d[hashed]["seq"]), id=seqid, description="") + SeqIO.write([rec], uniqfw, "fasta") + + fw.close() + if opts.output_uniq: + logger.debug("Uniq sequences written to `{0}`".format(uniqfile)) + uniqfw.close() + + +QUALSUFFIX = ".qual" + + +def get_qual(fastafile, suffix=QUALSUFFIX, check=True): + """ + Check if current folder contains a qual file associated with the fastafile + """ + qualfile1 = fastafile.rsplit(".", 1)[0] + suffix + qualfile2 = fastafile + suffix + + if check: + if op.exists(qualfile1): + logger.debug("qual file `{0}` found".format(qualfile1)) + return qualfile1 + elif op.exists(qualfile2): + logger.debug("qual file `{0}` found".format(qualfile2)) + return qualfile2 + else: + return None + + return qualfile1 + + +def some(args): + """ + %prog some fastafile listfile outfastafile + + generate a subset of fastafile, based on a list + """ + from jcvi.utils.cbook import gene_name + + p = OptionParser(some.__doc__) + p.add_argument( + "--exclude", + default=False, + action="store_true", + help="Output sequences not in the list file", + ) + p.add_argument( + "--no_strip_names", + default=False, + action="store_true", + help="Do not strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", + ) + p.add_argument( + "--uniprot", default=False, action="store_true", help="Header is from uniprot" + ) + + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(p.print_help()) + + strip_names = not opts.no_strip_names + fastafile, listfile, outfastafile = args + outfastahandle = must_open(outfastafile, "w") + qualfile = get_qual(fastafile) + + names = set(open(listfile).read().split()) + if qualfile: + outqualfile = outfastafile + ".qual" + outqualhandle = open(outqualfile, "w") + parser = iter_fasta_qual(fastafile, qualfile) + else: + parser = SeqIO.parse(fastafile, "fasta") + + recs = [] + seen = set() + for rec in parser: + name = rec.id + if strip_names: + name = gene_name(name) + + if name in seen: # Only report one instance + continue + + if opts.uniprot: + name = name.split("|")[-1] + + if opts.exclude: + if name in names: + continue + else: + if name not in names: + continue + + recs.append(rec) + seen.add(name) + + for rec in recs: + SeqIO.write([rec], outfastahandle, "fasta") + if qualfile: + SeqIO.write([rec], outqualhandle, "qual") + + logger.debug("A total of %d records written to `%s`" % (len(recs), outfastafile)) + + +def fastq(args): + """ + %prog fastq fastafile + + Generate fastqfile by combining fastafile and fastafile.qual. + Also check --qv option to use a default qv score. + """ + from jcvi.formats.fastq import FastqLite + + p = OptionParser(fastq.__doc__) + p.add_argument("--qv", type=int, help="Use generic qv value") + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + fastqfile = fastafile.rsplit(".", 1)[0] + ".fastq" + fastqhandle = open(fastqfile, "w") + num_records = 0 + + if opts.qv is not None: + qv = chr(ord("!") + opts.qv) + logger.debug("QV char '{0}' ({1})".format(qv, opts.qv)) + else: + qv = None + + if qv: + f = Fasta(fastafile, lazy=True) + for name, rec in f.iteritems_ordered(): + r = FastqLite("@" + name, str(rec.seq).upper(), qv * len(rec.seq)) + print(r, file=fastqhandle) + num_records += 1 + + else: + qualfile = get_qual(fastafile) + for rec in iter_fasta_qual(fastafile, qualfile): + SeqIO.write([rec], fastqhandle, "fastq") + num_records += 1 + + fastqhandle.close() + logger.debug("A total of %d records written to `%s`" % (num_records, fastqfile)) + + +def pair(args): + """ + %prog pair fastafile + + Generate .pairs.fasta and .fragments.fasta by matching records + into the pairs and the rest go to fragments. + """ + p = OptionParser(pair.__doc__) + p.set_sep( + sep=None, + help="Separator in name to reduce to clone id" + + "e.g. GFNQ33242/1 use /, BOT01-2453H.b1 use .", + ) + p.add_argument( + "-m", + dest="matepairs", + default=False, + action="store_true", + help="generate .matepairs file [often used for Celera Assembler]", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (fastafile,) = args + qualfile = get_qual(fastafile) + + prefix = fastafile.rsplit(".", 1)[0] + pairsfile = prefix + ".pairs.fasta" + fragsfile = prefix + ".frags.fasta" + pairsfw = open(pairsfile, "w") + fragsfw = open(fragsfile, "w") + + # TODO: need a class to handle coupled fasta and qual iterating and indexing + if opts.matepairs: + matepairsfile = prefix + ".matepairs" + matepairsfw = open(matepairsfile, "w") + + if qualfile: + pairsqualfile = pairsfile + ".qual" + pairsqualhandle = open(pairsqualfile, "w") + fragsqualfile = fragsfile + ".qual" + fragsqualhandle = open(fragsqualfile, "w") + + f = Fasta(fastafile) + if qualfile: + q = SeqIO.index(qualfile, "qual") + + all_keys = list(f.keys()) + all_keys.sort() + sep = opts.sep + + if sep: + key_fun = lambda x: x.split(sep, 1)[0] + else: + key_fun = lambda x: x[:-1] + + for key, variants in groupby(all_keys, key=key_fun): + variants = list(variants) + paired = len(variants) == 2 + + if paired and opts.matepairs: + print("\t".join(("%s/1" % key, "%s/2" % key)), file=matepairsfw) + + fw = pairsfw if paired else fragsfw + if qualfile: + qualfw = pairsqualhandle if paired else fragsqualhandle + + for i, var in enumerate(variants): + rec = f[var] + if qualfile: + recqual = q[var] + newid = "%s/%d" % (key, i + 1) + + rec.id = newid + rec.description = "" + SeqIO.write([rec], fw, "fasta") + if qualfile: + recqual.id = newid + recqual.description = "" + SeqIO.write([recqual], qualfw, "qual") + + logger.debug("sequences written to `%s` and `%s`" % (pairsfile, fragsfile)) + if opts.matepairs: + logger.debug("mates written to `%s`" % matepairsfile) + + +def pairinplace(args): + """ + %prog pairinplace bulk.fasta + + Pair up the records in bulk.fasta by comparing the names for adjacent + records. If they match, print to bulk.pairs.fasta, else print to + bulk.frags.fasta. + """ + p = OptionParser(pairinplace.__doc__) + p.add_argument( + "-r", + dest="rclip", + default=1, + type=int, + help="pair ID is derived from rstrip N chars", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + base = op.basename(fastafile).split(".")[0] + + frags = base + ".frags.fasta" + pairs = base + ".pairs.fasta" + if fastafile.endswith(".gz"): + frags += ".gz" + pairs += ".gz" + + fragsfw = must_open(frags, "w") + pairsfw = must_open(pairs, "w") + + N = opts.rclip + strip_name = lambda x: x[:-N] if N else str + + skipflag = False # controls the iterator skip + fastaiter = SeqIO.parse(fastafile, "fasta") + for a, b in pairwise(fastaiter): + + aid, bid = [strip_name(x) for x in (a.id, b.id)] + + if skipflag: + skipflag = False + continue + + if aid == bid: + SeqIO.write([a, b], pairsfw, "fasta") + skipflag = True + else: + SeqIO.write([a], fragsfw, "fasta") + + # don't forget the last one, when b is None + if not skipflag: + SeqIO.write([a], fragsfw, "fasta") + + logger.debug("Reads paired into `%s` and `%s`" % (pairs, frags)) + + +def extract(args): + """ + %prog extract fasta query + + extract query out of fasta file, query needs to be in the form of + "seqname", or "seqname:start-stop", or "seqname:start-stop:-" + """ + p = OptionParser(extract.__doc__) + p.add_argument("--newname", help="Use this new name instead") + p.add_argument( + "--include", + default=False, + action="store_true", + help="search description line for match", + ) + p.add_argument( + "--exclude", + default=False, + action="store_true", + help="exclude description that matches", + ) + p.add_argument( + "--idonly", default=False, action="store_true", help="Only search identifier" + ) + p.add_argument( + "--bed", + default=None, + help="path to bed file to guide extraction by matching seqname", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) == 2: + fastafile, query = args + elif len(args) == 1 and opts.bed: + (fastafile,) = args + bedaccns = Bed(opts.bed).accns + else: + sys.exit(p.print_help()) + + if opts.bed: + fw = must_open(opts.outfile, "w") + f = Fasta(fastafile) + for accn in bedaccns: + try: + rec = f[accn] + except: + logger.error("{0} not found in {1}".format(accn, fastafile)) + continue + SeqIO.write([rec], fw, "fasta") + return fw.name + + atoms = query.split(":") + key = atoms[0] + + assert len(atoms) <= 3, "cannot have more than two ':' in your query" + + pos = "" + if len(atoms) in (2, 3): + pos = atoms[1] + + strand = "+" + if len(atoms) == 3: + strand = atoms[2] + + assert strand in ("+", "-"), "strand must be either '+' or '-'" + + feature = dict(chr=key) + + if "-" in pos: + start, stop = pos.split("-") + try: + start, stop = int(start), int(stop) + except ValueError as e: + logger.error(e) + sys.exit(p.print_help()) + + feature["start"] = start + feature["stop"] = stop + else: + start, stop = None, None + + assert ( + None + in ( + start, + stop, + ) + or start < stop + ), "start must be < stop, you have ({0}, {1})".format(start, stop) + feature["strand"] = strand + + include, exclude = opts.include, opts.exclude + # conflicting options, cannot be true at the same time + assert not ( + include and exclude + ), "--include and --exclude cannot be on at the same time" + fw = must_open(opts.outfile, "w") + + if include or exclude: + f = Fasta(fastafile, lazy=True) + fi = f.iteritems_ordered if opts.idonly else f.iterdescriptions_ordered + for k, rec in fi(): + if include and key not in k: + continue + if exclude and key in k: + continue + + seq = Fasta.subseq(rec, start, stop, strand) + newid = rec.id + if start is not None: + newid += ":{0}-{1}:{2}".format(start, stop, strand) + + rec = SeqRecord(seq, id=newid, description=k) + SeqIO.write([rec], fw, "fasta") + else: + f = Fasta(fastafile) + try: + seq = f.sequence(feature, asstring=False) + except AssertionError as e: + logger.error(e) + return + + newid = opts.newname or query + rec = SeqRecord(seq, id=newid, description="") + SeqIO.write([rec], fw, "fasta") + + return fw.name + + +def _uniq_rec(fastafile, seq=False): + """ + Returns unique records + """ + seen = set() + for rec in SeqIO.parse(must_open(fastafile), "fasta"): + name = str(rec.seq) if seq else rec.id + if name in seen: + logger.debug("ignore {0}".format(rec.id)) + continue + seen.add(name) + yield rec + + +def uniq(args): + """ + %prog uniq fasta uniq.fasta + + remove fasta records that are the same + """ + p = OptionParser(uniq.__doc__) + p.add_argument( + "--seq", default=False, action="store_true", help="Uniqify the sequences" + ) + p.add_argument( + "-t", + "--trimname", + dest="trimname", + action="store_true", + default=False, + help="turn on the defline trim to first space", + ) + + opts, args = p.parse_args(args) + if len(args) != 2: + sys.exit(p.print_help()) + + fastafile, uniqfastafile = args + fw = must_open(uniqfastafile, "w") + seq = opts.seq + + for rec in _uniq_rec(fastafile, seq=seq): + if opts.trimname: + rec.description = "" + SeqIO.write([rec], fw, "fasta") + + +def random(args): + """ + %prog random fasta 100 > random100.fasta + + Take number of records randomly from fasta + """ + from random import sample + + p = OptionParser(random.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, N = args + N = int(N) + assert N > 0 + + f = Fasta(fastafile) + fw = must_open("stdout", "w") + + for key in sample(f.keys(), N): + rec = f[key] + SeqIO.write([rec], fw, "fasta") + + fw.close() + + +XQUAL = -1000 # default quality for X +NQUAL = 5 # default quality value for N +QUAL = 10 # default quality value +OKQUAL = 15 + + +def modify_qual(rec): + qv = rec.letter_annotations["phred_quality"] + for i, (s, q) in enumerate(zip(rec.seq, qv)): + if s == "X" or s == "x": + qv[i] = XQUAL + if s == "N" or s == "x": + qv[i] = NQUAL + return rec + + +def make_qual(fastafile, score=OKQUAL): + logger.warning("assume qual ({0})".format(score)) + qualfile = fastafile.rsplit(".", 1)[0] + ".qual" + fw = open(qualfile, "w") + fasta = Fasta(fastafile, lazy=True) + score = str(score) + " " + for entry, size in fasta.itersizes_ordered(): + print(">" + entry, file=fw) + print(score * size, file=fw) + fw.close() + return qualfile + + +def iter_fasta_qual(fastafile, qualfile, defaultqual=OKQUAL, modify=False): + """ + used by trim, emits one SeqRecord with quality values in it + """ + from Bio.SeqIO.QualityIO import PairedFastaQualIterator + + if not qualfile: + qualfile = make_qual(fastafile, score=defaultqual) + + rec_iter = PairedFastaQualIterator(open(fastafile), open(qualfile)) + for rec in rec_iter: + yield rec if not modify else modify_qual(rec) + + +def write_fasta_qual(rec, fastahandle, qualhandle): + if fastahandle: + SeqIO.write([rec], fastahandle, "fasta") + if qualhandle: + SeqIO.write([rec], qualhandle, "qual") + + +def trim(args): + """ + %prog trim fasta.screen newfasta + + take the screen output from `cross_match` (against a vector db, for + example), then trim the sequences to remove X's. Will also perform quality + trim if fasta.screen.qual is found. The trimming algorithm is based on + finding the subarray that maximize the sum + """ + + from jcvi.algorithms.maxsum import max_sum + + p = OptionParser(trim.__doc__) + p.add_argument( + "-c", + dest="min_length", + type=int, + default=64, + help="minimum sequence length after trimming", + ) + p.add_argument("-s", dest="score", default=QUAL, help="quality trimming cutoff") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(p.print_help()) + + fastafile, newfastafile = args + qualfile = get_qual(fastafile) + newqualfile = get_qual(newfastafile, check=False) + + logger.debug( + "Trim bad sequence from fasta file `%s` to `%s`" % (fastafile, newfastafile) + ) + + fw = must_open(newfastafile, "w") + fw_qual = open(newqualfile, "w") + + dropped = trimmed = 0 + + for rec in iter_fasta_qual(fastafile, qualfile, modify=True): + qv = [x - opts.score for x in rec.letter_annotations["phred_quality"]] + msum, trim_start, trim_end = max_sum(qv) + score = trim_end - trim_start + 1 + + if score < opts.min_length: + dropped += 1 + continue + + if score < len(rec): + trimmed += 1 + rec = rec[trim_start : trim_end + 1] + + write_fasta_qual(rec, fw, fw_qual) + + print("A total of %d sequences modified." % trimmed, file=sys.stderr) + print( + "A total of %d sequences dropped (length < %d)." % (dropped, opts.min_length), + file=sys.stderr, + ) + + fw.close() + fw_qual.close() + + +def sequin(args): + """ + %prog sequin inputfasta + + Generate a gapped fasta format with known gap sizes embedded. suitable for + Sequin submission. + + A gapped sequence represents a newer method for describing non-contiguous + sequences, but only requires a single sequence identifier. A gap is + represented by a line that starts with >? and is immediately followed by + either a length (for gaps of known length) or "unk100" for gaps of unknown + length. For example, ">?200". The next sequence segment continues on the + next line, with no separate definition line or identifier. The difference + between a gapped sequence and a segmented sequence is that the gapped + sequence uses a single identifier and can specify known length gaps. + Gapped sequences are preferred over segmented sequences. A sample gapped + sequence file is shown here: + + >m_gagei [organism=Mansonia gagei] Mansonia gagei NADH dehydrogenase ... + ATGGAGCATACATATCAATATTCATGGATCATACCGTTTGTGCCACTTCCAATTCCTATTTTAATAGGAA + TTGGACTCCTACTTTTTCCGACGGCAACAAAAAATCTTCGTCGTATGTGGGCTCTTCCCAATATTTTATT + >?200 + GGTATAATAACAGTATTATTAGGGGCTACTTTAGCTCTTGC + TCAAAAAGATATTAAGAGGGGTTTAGCCTATTCTACAATGTCCCAACTGGGTTATATGATGTTAGCTCTA + >?unk100 + TCAATAAAACTATGGGGTAAAGAAGAACAAAAAATAATTAACAGAAATTTTCGTTTATCTCCTTTATTAA + TATTAACGATGAATAATAATGAGAAGCCATATAGAATTGGTGATAATGTAAAAAAAGGGGCTCTTATTAC + """ + p = OptionParser(sequin.__doc__) + p.add_argument("--unk", default=100, type=int, help="The size for unknown gaps") + p.add_argument("--newid", default=None, help="Use this identifier instead") + p.add_argument( + "--chromosome", default=None, help="Add [chromosome= ] to FASTA header" + ) + p.add_argument("--clone", default=None, help="Add [clone= ] to FASTA header") + p.set_mingap(default=100) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (inputfasta,) = args + unk = opts.unk + + outputfasta = inputfasta.rsplit(".", 1)[0] + ".split" + rec = next(SeqIO.parse(must_open(inputfasta), "fasta")) + seq = "" + unknowns, knowns = 0, 0 + for gap, gap_group in groupby(rec.seq, lambda x: x.upper() == "N"): + subseq = "".join(gap_group) + if gap: + gap_length = len(subseq) + if gap_length == unk: + subseq = "\n>?unk{0}\n".format(unk) + unknowns += 1 + elif gap_length >= opts.mingap: + subseq = "\n>?{0}\n".format(gap_length) + knowns += 1 + seq += subseq + + fw = must_open(outputfasta, "w") + id = opts.newid or rec.id + fastaheader = ">{0}".format(id) + if opts.chromosome: + fastaheader += " [chromosome={0}]".format(opts.chromosome) + if opts.clone: + fastaheader += " [clone={0}]".format(opts.clone) + + print(fastaheader, file=fw) + print(seq, file=fw) + fw.close() + logger.debug( + "Sequin FASTA written to `{0}` (gaps: {1} unknowns, {2} knowns).".format( + outputfasta, unknowns, knowns + ) + ) + + return outputfasta, unknowns + knowns + + +def remove_small_components(rec, minlen): + newseq = [] + removed = 0 + for gap, seq in groupby(rec.seq, lambda x: x.upper() == "N"): + seq = "".join(seq) + seqlen = len(seq) + if not gap and seqlen < minlen: + seq = seqlen * "N" # Mask small components + logger.debug("Discard component ({0}) in {1}".format(seqlen, rec.name)) + removed += seqlen + newseq.append(seq) + rec.seq = Seq("".join(newseq)) + return removed + + +def trim_terminal_Ns(rec): + rec.seq = rec.seq.strip("N") + + +def normalize_gaps(rec, gapsize): + newseq = [] + normalized = 0 + NN = gapsize * "N" + for gap, seq in groupby(rec.seq, lambda x: x.upper() == "N"): + seq = "".join(seq) + if gap: + seq = NN + normalized += 1 + newseq.append(seq) + rec.seq = Seq("".join(newseq)) + return normalized + + +def tidy(args): + """ + %prog tidy fastafile + + Trim terminal Ns, normalize gap sizes and remove small components. + """ + p = OptionParser(tidy.__doc__) + p.add_argument( + "--gapsize", + dest="gapsize", + default=0, + type=int, + help="Set all gaps to the same size", + ) + p.add_argument( + "--minlen", + dest="minlen", + default=100, + type=int, + help="Minimum component size", + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + gapsize = opts.gapsize + minlen = opts.minlen + + tidyfastafile = fastafile.rsplit(".", 1)[0] + ".tidy.fasta" + fw = must_open(tidyfastafile, "w") + + removed = normalized = 0 + fasta = Fasta(fastafile, lazy=True) + for name, rec in fasta.iteritems_ordered(): + rec.seq = rec.seq.upper() + if minlen: + removed += remove_small_components(rec, minlen) + trim_terminal_Ns(rec) + if gapsize: + normalized += normalize_gaps(rec, gapsize) + + if len(rec) == 0: + logger.debug("Drop seq {0}".format(rec.id)) + continue + SeqIO.write([rec], fw, "fasta") + + # Print statistics + if removed: + logger.debug("Total discarded bases: {0}".format(removed)) + if normalized: + logger.debug("Gaps normalized: {0}".format(normalized)) + + logger.debug("Tidy FASTA written to `{0}`.".format(tidyfastafile)) + fw.close() + + return tidyfastafile + + +def write_gaps_worker(rec): + start = 0 + seq = rec.seq.upper() + output = [] + for gap, seq in groupby(seq, lambda x: x == "N"): + seq = "".join(seq) + current_length = len(seq) + object_beg = start + 1 + object_end = start + current_length + if gap: + s = "\t".join(str(x) for x in (rec.id, object_beg - 1, object_end)) + output.append(s) + start += current_length + + return "\n".join(output) + + +def write_gaps_bed(inputfasta, prefix, mingap, cpus): + from jcvi.apps.grid import WriteJobs + from jcvi.formats.bed import sort + + bedfile = prefix + ".gaps.bed" + f = Fasta(inputfasta) + recs = list(rec for k, rec in f.iteritems()) + pool = WriteJobs(write_gaps_worker, recs, bedfile, cpus=cpus) + pool.run() + + sort([bedfile, "-i"]) + + bed = Bed(bedfile) + nbedfile = prefix + ".{0}N.bed".format(mingap) + + gapnum = 0 + fw = open(nbedfile, "w") + for b in bed: + if b.span < mingap: + continue + gapnum += 1 + gapname = "gap.{0:05d}".format(gapnum) + print("\t".join(str(x) for x in (b, gapname, b.span)), file=fw) + + shutil.move(nbedfile, bedfile) + logger.debug("Write gap (>={0}bp) locations to `{1}`.".format(mingap, bedfile)) + + +def gaps(args): + """ + %prog gaps fastafile + + Print out a list of gaps in BED format (.gaps.bed). + """ + from jcvi.formats.sizes import agp + from jcvi.formats.agp import mask, build + + p = OptionParser(gaps.__doc__) + p.add_argument( + "--split", default=False, action="store_true", help="Generate .split.fasta" + ) + p.set_mingap(default=100) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (inputfasta,) = args + mingap = opts.mingap + split = opts.split + prefix = inputfasta.rsplit(".", 1)[0] + bedfile = prefix + ".gaps.bed" + + if need_update(inputfasta, bedfile): + write_gaps_bed(inputfasta, prefix, mingap, opts.cpus) + + if split: + splitfile = prefix + ".split.fasta" + oagpfile = prefix + ".splitobject.agp" + cagpfile = prefix + ".splitcomponent.agp" + + if need_update((inputfasta, bedfile), splitfile): + + sizesagpfile = agp([inputfasta]) + + maskedagpfile = mask([sizesagpfile, bedfile, "--splitobject"]) + shutil.move(maskedagpfile, oagpfile) + logger.debug("AGP file written to `{0}`.".format(oagpfile)) + + maskedagpfile = mask([sizesagpfile, bedfile, "--splitcomponent"]) + shutil.move(maskedagpfile, cagpfile) + logger.debug("AGP file written to `{0}`.".format(cagpfile)) + + build([oagpfile, inputfasta, splitfile]) + cleanup(sizesagpfile) + + return splitfile, oagpfile, cagpfile + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/fastq.py b/jcvi/formats/fastq.py new file mode 100644 index 00000000..e076fa83 --- /dev/null +++ b/jcvi/formats/fastq.py @@ -0,0 +1,1104 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Processing fastq files +""" +import os.path as op +import sys +import re +import json + +from itertools import islice + +from Bio import SeqIO +from Bio.SeqIO.QualityIO import FastqGeneralIterator + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + logger, + mkdir, + need_update, + sh, + which, +) +from ..utils.cbook import percentage + +from .base import DictFile +from .fasta import must_open, rc + + +qual_offset = lambda x: 33 if x == "sanger" else 64 +allowed_dialect_conversions = { + ">=1.8": "<1.8", + "sra": "<1.8", +} + + +class FastqLite(object): + def __init__(self, name, seq, qual): + self.name = name + self.seq = seq + self.qual = qual + + def __str__(self): + return "\n".join((self.name, self.seq, "+", self.qual)) + + def rc(self): + self.seq = rc(self.seq) + self.qual = self.qual[::-1] + + +class FastqRecord(object): + def __init__(self, fh, offset=0, key=None): + self.name = self.header = fh.readline() + if not self.name: + return + self.name = self.name.split()[0] + self.seq = fh.readline().rstrip() + self.l3 = fh.readline().rstrip() + self.qual = fh.readline().rstrip() + if offset != 0: + self.qual = "".join(chr(ord(x) + offset) for x in self.qual) + self.length = len(self.seq) + assert self.length == len( + self.qual + ), "length mismatch: seq(%s) and qual(%s)" % (self.seq, self.qual) + if key: + self.name = key(self.name) + + def __str__(self): + return "\n".join((self.name, self.seq, "+", self.qual)) + + def __len__(self): + return self.length + + @property + def quality(self): + return [ord(x) for x in self.qual] + + +class FastqHeader(object): + def __init__(self, row): + header = row.strip().split(" ") + self.readId, self.readLen, self.readNum = None, None, None + self.multiplexId = 0 + self.paired = False + if len(header) == 3 and "length" in header[2]: + self.dialect = "sra" + self.readId = header[0].lstrip("@") + m = re.search(r"length=(\d+)", header[2]) + if m: + self.readLen = m.group(1) + h = header[1].split(":") + + self.instrument = h[0] + if len(h) == 7: + self.runId, self.flowcellId = int(h[1]), h[2] + self.laneNum, self.tileNum = int(h[3]), int(h[4]) + self.xPos, self.yPos = h[5], h[6] + else: + self.runId, self.flowcellId = None, None + self.laneNum, self.tileNum = int(h[1]), int(h[2]) + self.xPos, self.yPos = h[3], h[4] + else: + h = header[0].split(":") + self.instrument = h[0].lstrip("@") + if len(header) == 2 and header[1].find(":"): + self.dialect = ">=1.8" # Illumina Casava 1.8+ format + + self.runId = int(h[1]) + self.flowcellId = h[2] + self.laneNum = int(h[3]) + self.tileNum = int(h[4]) + self.xPos = int(h[5]) + self.yPos = h[6] + if re.search("/", self.yPos): + self.paired = True + self.yPos, self.readNum = self.yPos.split("/") + + a = header[1].split(":") + self.readNum = int(a[0]) + self.isFiltered = a[1] + self.controlNum = int(a[2]) + self.barcode = a[3] + else: + self.dialect = "<1.8" # Old Illumina Casava format (< 1.8) + self.laneNum = int(h[1]) + self.tileNum = int(h[2]) + self.xPos = int(h[3]) + self.yPos = h[4] + m = re.search(r"(\d+)(#\S+)/(\d+)", self.yPos) + if m: + self.paired = True + self.yPos, self.multiplexId, self.readNum = ( + m.group(1), + m.group(2), + m.group(3), + ) + + def __str__(self): + if self.dialect == "sra": + h0 = self.readId + if self.readNum: + h0 += "/{0}".format(self.readNum) + + h1elems = [ + self.instrument, + self.laneNum, + self.tileNum, + self.xPos, + self.yPos, + ] + if self.runId and self.flowcellId: + h1elems[1:1] = [self.runId, self.flowcellId] + h1 = ":".join(str(x) for x in h1elems) + h2 = "length={0}".format(self.readLen) + + return "@{0} {1} {2}".format(h0, h1, h2) + elif self.dialect == ">=1.8": + yPos = ( + "{0}/{1}".format(self.yPos, self.readNum) if self.paired else self.yPos + ) + + h0 = ":".join( + str(x) + for x in ( + self.instrument, + self.runId, + self.flowcellId, + self.laneNum, + self.tileNum, + self.xPos, + yPos, + ) + ) + h1 = ":".join( + str(x) + for x in (self.readNum, self.isFiltered, self.controlNum, self.barcode) + ) + + return "@{0} {1}".format(h0, h1) + else: + yPos = ( + "{0}#{1}/{2}".format(self.yPos, self.multiplexId, self.readNum) + if self.paired + else self.yPos + ) + h0 = ":".join( + str(x) + for x in (self.instrument, self.laneNum, self.tileNum, self.xPos, yPos) + ) + + return "@{0}".format(h0) + + def format_header(self, dialect=None, tag=None): + if dialect: + if self.dialect == dialect: + logger.error("Input and output dialect are the same") + elif dialect not in allowed_dialect_conversions[self.dialect]: + logger.error( + "Error: Cannot convert from `{0}` to `{1}` dialect".format( + self.dialect, dialect + ) + ) + logger.error( + "Allowed conversions: {0}".format( + json.dumps(allowed_dialect_conversions, indent=4) + ) + ) + sys.exit() + else: + self.dialect = dialect + + if tag: + readNum = tag.split("/")[1] + self.readNum = readNum + self.paired = True + + return str(self) + + +def pairspf(pp, commonprefix=True): + if commonprefix: + pf = op.commonprefix(pp).rstrip("._-") + else: + pf = min(pp) + pf = op.basename(pf) + if not pf.strip(): + pf = op.basename(pp[0]) + return pf + + +def iter_fastq(filename, offset=0, key=None): + if isinstance(filename, str): + logger.debug("Read file `{0}`".format(filename)) + fh = must_open(filename) + else: + fh = filename + + while True: + rec = FastqRecord(fh, offset=offset, key=key) + if not rec.name: + break + yield rec + yield None # sentinel + + +def main(): + + actions = ( + ("size", "total base pairs in the fastq files"), + ("shuffle", "shuffle paired reads into the same file interleaved"), + ("split", "split paired reads into two files"), + ("splitread", "split appended reads (from JGI)"), + ("catread", "cat pairs together (reverse of splitread)"), + ("pairinplace", "collect pairs by checking adjacent ids"), + ("convert", "convert between illumina and sanger offset"), + ("first", "get first N reads from file"), + ("filter", "filter to get high qv reads"), + ("suffix", "filter reads based on suffix"), + ("trim", "trim reads using fastx_trimmer"), + ("some", "select a subset of fastq reads"), + ("guessoffset", "guess the quality offset of the fastq records"), + ("readlen", "calculate read length"), + ( + "format", + "format fastq file, convert header from casava 1.8+ to older format", + ), + ("fasta", "convert fastq to fasta and qual file"), + ("fromsra", "convert sra to fastq using `fastq-dump`"), + ("uniq", "retain only first instance of duplicate (by name) reads"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def uniq(args): + """ + %prog uniq fastqfile + + Retain only first instance of duplicate reads. Duplicate is defined as + having the same read name. + """ + p = OptionParser(uniq.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastqfile,) = args + fw = must_open(opts.outfile, "w") + nduplicates = nreads = 0 + seen = set() + for rec in iter_fastq(fastqfile): + nreads += 1 + if rec is None: + break + name = rec.name + if name in seen: + nduplicates += 1 + continue + seen.add(name) + print(rec, file=fw) + logger.debug("Removed duplicate reads: {}".format(percentage(nduplicates, nreads))) + + +def suffix(args): + """ + %prog suffix fastqfile CAG + + Filter reads based on suffix. + """ + p = OptionParser(suffix.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastqfile, sf = args + fw = must_open(opts.outfile, "w") + nreads = nselected = 0 + for rec in iter_fastq(fastqfile): + nreads += 1 + if rec is None: + break + if rec.seq.endswith(sf): + print(rec, file=fw) + nselected += 1 + logger.debug( + "Selected reads with suffix {0}: {1}".format(sf, percentage(nselected, nreads)) + ) + + +def calc_readlen(f, first): + from jcvi.utils.cbook import SummaryStats + + L = [] + ai = iter_fastq(f) + rec = next(ai) + while rec: + L.append(rec.length) + if len(L) > first: + break + rec = next(ai) + s = SummaryStats(L) + + return s + + +def is_fastq(f): + fq = f.replace(".gz", "") if f.endswith(".gz") else f + return fq.endswith((".fastq", ".fq")) + + +def readlen(args): + """ + %prog readlen fastqfile + + Calculate read length, will only try the first N reads. Output min, max, and + avg for each file. + """ + p = OptionParser(readlen.__doc__) + p.set_firstN() + p.add_argument( + "--silent", + default=False, + action="store_true", + help="Do not print read length stats", + ) + p.add_argument( + "--nocheck", + default=False, + action="store_true", + help="Do not check file type suffix", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (f,) = args + if (not opts.nocheck) and (not is_fastq(f)): + logger.debug("File `{}` does not endswith .fastq or .fq".format(f)) + return 0 + + s = calc_readlen(f, opts.firstN) + if not opts.silent: + print("\t".join(str(x) for x in (f, s.min, s.max, s.mean, s.median))) + + return int(s.max) + + +def fasta(args): + """ + %prog fasta fastqfiles + + Convert fastq to fasta and qual file. + """ + p = OptionParser(fasta.__doc__) + p.add_argument( + "--seqtk", default=False, action="store_true", help="Use seqtk to convert" + ) + p.set_outdir() + p.set_outfile(outfile=None) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + fastqfiles = args + outdir = opts.outdir + if outdir and outdir != ".": + mkdir(outdir) + + fastqfile = fastqfiles[0] + pf = op.basename(fastqfile) + gzinput = pf.endswith(".gz") + if gzinput: + pf = pf.rsplit(".", 1)[0] + + pf, sf = pf.rsplit(".", 1) + if sf not in ("fq", "fastq"): + logger.debug("Assumed FASTA: suffix not `fq` or `fastq`") + return fastqfile, None + + fastafile, qualfile = pf + ".fasta", pf + ".qual" + outfile = opts.outfile or fastafile + outfile = op.join(outdir, outfile) + if opts.seqtk: + if need_update(fastqfiles, outfile): + for i, fastqfile in enumerate(fastqfiles): + cmd = "seqtk seq -A {0} -L 30 -l 70".format(fastqfile) + # First one creates file, following ones append to it + sh(cmd, outfile=outfile, append=i) + else: + logger.debug("Outfile `{0}` already exists.".format(outfile)) + return outfile, None + + for fastqfile in fastqfiles: + SeqIO.convert(fastqfile, "fastq", fastafile, "fasta") + SeqIO.convert(fastqfile, "fastq", qualfile, "qual") + + return fastafile, qualfile + + +def first(args): + """ + %prog first N fastqfile(s) + + Get first N reads from file. + """ + from jcvi.apps.base import need_update + + p = OptionParser(first.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + N = int(args[0]) + nlines = N * 4 + fastqfiles = args[1:] + fastqfile = fastqfiles[0] + outfile = opts.outfile + if not need_update(fastqfiles, outfile): + logger.debug("File `{0}` exists. Will not overwrite.".format(outfile)) + return + + gz = fastqfile.endswith(".gz") + for fastqfile in fastqfiles: + if gz: + cmd = "zcat {0} | head -n {1}".format(fastqfile, nlines) + else: + cmd = "head -n {0} {1}".format(nlines, fastqfile) + + sh(cmd, outfile=opts.outfile, append=True) + + +def FastqPairedIterator(read1, read2): + if read1 == read2: + p1fp = p2fp = must_open(read1) + else: + p1fp = must_open(read1) + p2fp = must_open(read2) + + return p1fp, p2fp + + +def isHighQv(qs, qvchar, pct=90): + cutoff = len(qs) * pct / 100 + highs = sum(1 for x in qs if x >= qvchar) + return highs >= cutoff + + +def filter(args): + """ + %prog filter paired.fastq + + Filter to get high qv reads. Use interleaved format (one file) or paired + format (two files) to filter on paired reads. + """ + p = OptionParser(filter.__doc__) + p.add_argument( + "-q", + dest="qv", + default=20, + type=int, + help="Minimum quality score to keep", + ) + p.add_argument( + "-p", + dest="pct", + default=95, + type=int, + help="Minimum percent of bases that have [-q] quality", + ) + + opts, args = p.parse_args(args) + + if len(args) not in (1, 2): + sys.exit(not p.print_help()) + + if len(args) == 1: + r1 = r2 = args[0] + else: + r1, r2 = args + + qv = opts.qv + pct = opts.pct + + offset = guessoffset([r1]) + qvchar = chr(offset + qv) + logger.debug("Call base qv >= {0} as good.".format(qvchar)) + outfile = r1.rsplit(".", 1)[0] + ".q{0}.paired.fastq".format(qv) + fw = open(outfile, "w") + + p1fp, p2fp = FastqPairedIterator(r1, r2) + while True: + a = list(islice(p1fp, 4)) + if not a: + break + + b = list(islice(p2fp, 4)) + q1 = a[-1].rstrip() + q2 = b[-1].rstrip() + + if isHighQv(q1, qvchar, pct=pct) and isHighQv(q2, qvchar, pct=pct): + fw.writelines(a) + fw.writelines(b) + + +def checkShuffleSizes(p1, p2, pairsfastq, extra=0): + from jcvi.apps.base import getfilesize + + pairssize = getfilesize(pairsfastq) + p1size = getfilesize(p1) + p2size = getfilesize(p2) + assert ( + pairssize == p1size + p2size + extra + ), "The sizes do not add up: {0} + {1} + {2} != {3}".format( + p1size, p2size, extra, pairssize + ) + + +def shuffle(args): + """ + %prog shuffle p1.fastq p2.fastq + + Shuffle pairs into interleaved format. + """ + p = OptionParser(shuffle.__doc__) + p.set_tag() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + p1, p2 = args + pairsfastq = pairspf((p1, p2)) + ".fastq" + tag = opts.tag + + p1fp = must_open(p1) + p2fp = must_open(p2) + pairsfw = must_open(pairsfastq, "w") + nreads = 0 + while True: + a = list(islice(p1fp, 4)) + if not a: + break + + b = list(islice(p2fp, 4)) + if tag: + name = a[0].rstrip() + a[0] = name + "/1\n" + b[0] = name + "/2\n" + + pairsfw.writelines(a) + pairsfw.writelines(b) + nreads += 2 + + pairsfw.close() + extra = nreads * 2 if tag else 0 + checkShuffleSizes(p1, p2, pairsfastq, extra=extra) + + logger.debug( + "File `{0}` verified after writing {1} reads.".format(pairsfastq, nreads) + ) + return pairsfastq + + +def split(args): + """ + %prog split pairs.fastq + + Split shuffled pairs into `.1.fastq` and `.2.fastq`, using `sed`. Can work + on gzipped file. + + + """ + from jcvi.apps.grid import Jobs + + p = OptionParser(split.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (pairsfastq,) = args + gz = pairsfastq.endswith(".gz") + pf = pairsfastq.replace(".gz", "").rsplit(".", 1)[0] + p1 = pf + ".1.fastq" + p2 = pf + ".2.fastq" + + cmd = "zcat" if gz else "cat" + p1cmd = cmd + " {0} | sed -ne '1~8{{N;N;N;p}}'".format(pairsfastq) + p2cmd = cmd + " {0} | sed -ne '5~8{{N;N;N;p}}'".format(pairsfastq) + + if gz: + p1cmd += " | gzip" + p2cmd += " | gzip" + p1 += ".gz" + p2 += ".gz" + + p1cmd += " > " + p1 + p2cmd += " > " + p2 + + args = [(p1cmd,), (p2cmd,)] + m = Jobs(target=sh, args=args) + m.run() + + checkShuffleSizes(p1, p2, pairsfastq) + + +def guessoffset(args): + r""" + %prog guessoffset fastqfile + + Guess the quality offset of the fastqfile, whether 33 or 64. + See encoding schemes: + + SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS............................... + ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII + .................................JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ + LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL............................... + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh + | | | | | + 33 59 64 73 104 + + S - Sanger Phred+33, raw reads typically (0, 40) + X - Solexa Solexa+64, raw reads typically (-5, 40) + I - Illumina 1.3+ Phred+64, raw reads typically (0, 40) + J - Illumina 1.5+ Phred+64, raw reads typically (3, 40) + L - Illumina 1.8+ Phred+33, raw reads typically (0, 40) + with 0=unused, 1=unused, 2=Read Segment Quality Control Indicator (bold) + """ + p = OptionParser(guessoffset.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastqfile,) = args + ai = iter_fastq(fastqfile) + rec = next(ai) + offset = 64 + while rec: + quality = rec.quality + lowcounts = len([x for x in quality if x < 59]) + highcounts = len([x for x in quality if x > 74]) + diff = highcounts - lowcounts + if diff > 10: + break + elif diff < -10: + offset = 33 + break + rec = next(ai) + + if offset == 33: + print("Sanger encoding (offset=33)", file=sys.stderr) + elif offset == 64: + print("Illumina encoding (offset=64)", file=sys.stderr) + + return offset + + +def format(args): + """ + %prog format fastqfile + + Format FASTQ file. Currently provides option to convert FASTQ header from + one dialect to another. + """ + p = OptionParser(format.__doc__) + + p.add_argument( + "--convert", + default=None, + choices=[">=1.8", "<1.8", "sra"], + help="Convert fastq header to a different format", + ) + p.set_tag(specify_tag=True) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastqfile,) = args + ai = iter_fastq(fastqfile) + rec = next(ai) + dialect = None + while rec: + h = FastqHeader(rec.header) + if not dialect: + dialect = h.dialect + logger.debug("Input fastq dialect: `{0}`".format(dialect)) + if opts.convert: + logger.debug("Output fastq dialect: `{0}`".format(opts.convert)) + + rec.name = h.format_header(dialect=opts.convert, tag=opts.tag) + + print(rec) + rec = next(ai) + + +def some(args): + """ + %prog some idsfile afastq [bfastq] + + Select a subset of the reads with ids present in the idsfile. + `bfastq` is optional (only if reads are paired) + """ + p = OptionParser(some.__doc__) + opts, args = p.parse_args(args) + + if len(args) not in (2, 3): + sys.exit(not p.print_help()) + + ( + idsfile, + afastq, + ) = args[:2] + bfastq = args[2] if len(args) == 3 else None + + ids = DictFile(idsfile, valuepos=None) + + ai = iter_fastq(open(afastq)) + arec = next(ai) + if bfastq: + bi = iter_fastq(open(bfastq)) + brec = next(bi) + + while arec: + if arec.name[1:] in ids: + print(arec) + if bfastq: + print(brec) + + arec = next(ai) + if bfastq: + brec = next(bi) + + +def trim(args): + """ + %prog trim fastqfile + + Wraps `fastx_trimmer` to trim from begin or end of reads. + """ + p = OptionParser(trim.__doc__) + p.add_argument( + "-f", + dest="first", + default=0, + type=int, + help="First base to keep. Default is 1.", + ) + p.add_argument( + "-l", + dest="last", + default=0, + type=int, + help="Last base to keep. Default is entire read.", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastqfile,) = args + obfastqfile = op.basename(fastqfile) + fq = obfastqfile.rsplit(".", 1)[0] + ".ntrimmed.fastq" + if fastqfile.endswith(".gz"): + fq = obfastqfile.rsplit(".", 2)[0] + ".ntrimmed.fastq.gz" + + cmd = "fastx_trimmer -Q33 " + if opts.first: + cmd += "-f {0.first} ".format(opts) + if opts.last: + cmd += "-l {0.last} ".format(opts) + + sh(cmd, infile=fastqfile, outfile=fq) + + +def catread(args): + """ + %prog catread fastqfile1 fastqfile2 + + Concatenate paired end reads into one. Useful for example to do single-end + mapping and perform filtering on the whole read pair level. + """ + p = OptionParser(catread.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + r1, r2 = args + p1fp, p2fp = FastqPairedIterator(r1, r2) + outfile = pairspf((r1, r2)) + ".cat.fastq" + fw = must_open(outfile, "w") + while True: + a = list(islice(p1fp, 4)) + if not a: + break + atitle, aseq, _, aqual = a + btitle, bseq, _, bqual = list(islice(p2fp, 4)) + print( + "\n".join( + ( + atitle.strip(), + aseq.strip() + bseq.strip(), + "+", + aqual.strip() + bqual.strip(), + ) + ), + file=fw, + ) + + +def splitread(args): + """ + %prog splitread fastqfile + + Split fastqfile into two read fastqfiles, cut in the middle. + """ + p = OptionParser(splitread.__doc__) + p.add_argument( + "-n", + dest="n", + default=76, + type=int, + help="Split at N-th base position", + ) + p.add_argument( + "--rc", + default=False, + action="store_true", + help="Reverse complement second read", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (pairsfastq,) = args + + base = op.basename(pairsfastq).split(".")[0] + fq1 = base + ".1.fastq" + fq2 = base + ".2.fastq" + fw1 = must_open(fq1, "w") + fw2 = must_open(fq2, "w") + + fp = must_open(pairsfastq) + n = opts.n + minsize = n * 8 / 5 + + for name, seq, qual in FastqGeneralIterator(fp): + if len(seq) < minsize: + logger.error("Skipping read {0}, length={1}".format(name, len(seq))) + continue + + name = "@" + name + rec1 = FastqLite(name, seq[:n], qual[:n]) + rec2 = FastqLite(name, seq[n:], qual[n:]) + if opts.rc: + rec2.rc() + + print(rec1, file=fw1) + print(rec2, file=fw2) + + logger.debug("Reads split into `{0},{1}`".format(fq1, fq2)) + fw1.close() + fw2.close() + + +def size(args): + """ + %prog size fastqfile + + Find the total base pairs in a list of fastq files + """ + p = OptionParser(size.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + total_size = total_numrecords = 0 + for f in args: + cur_size = cur_numrecords = 0 + for rec in iter_fastq(f): + if not rec: + break + cur_numrecords += 1 + cur_size += len(rec) + + print(" ".join(str(x) for x in (op.basename(f), cur_numrecords, cur_size))) + total_numrecords += cur_numrecords + total_size += cur_size + + if len(args) > 1: + print(" ".join(str(x) for x in ("Total", total_numrecords, total_size))) + + +def convert(args): + """ + %prog convert in.fastq + + illumina fastq quality encoding uses offset 64, and sanger uses 33. This + script creates a new file with the correct encoding. Output gzipped file if + input is also gzipped. + """ + p = OptionParser(convert.__doc__) + p.set_phred() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (infastq,) = args + phred = opts.phred or str(guessoffset([infastq])) + ophred = {"64": "33", "33": "64"}[phred] + + gz = infastq.endswith(".gz") + outfastq = infastq.rsplit(".", 1)[0] if gz else infastq + pf, sf = outfastq.rsplit(".", 1) + outfastq = "{0}.q{1}.{2}".format(pf, ophred, sf) + if gz: + outfastq += ".gz" + + fin = "illumina" if phred == "64" else "sanger" + fout = "sanger" if phred == "64" else "illumina" + + seqret = "seqret" + if infastq.endswith(".gz"): + cmd = "zcat {0} | ".format(infastq) + cmd += seqret + " fastq-{0}::stdin fastq-{1}::stdout".format(fin, fout) + else: + cmd = seqret + " fastq-{0}::{1} fastq-{2}::stdout".format(fin, infastq, fout) + + sh(cmd, outfile=outfastq) + + return outfastq + + +def pairinplace(args): + """ + %prog pairinplace bulk.fastq + + Pair up the records in bulk.fastq by comparing the names for adjancent + records. If they match, print to bulk.pairs.fastq, else print to + bulk.frags.fastq. + """ + from more_itertools import pairwise + + p = OptionParser(pairinplace.__doc__) + p.set_rclip() + p.set_tag() + p.add_argument("--base", help="Base name for the output files") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastqfile,) = args + base = opts.base or op.basename(fastqfile).split(".")[0] + + frags = base + ".frags.fastq" + pairs = base + ".pairs.fastq" + if fastqfile.endswith(".gz"): + frags += ".gz" + pairs += ".gz" + + fragsfw = must_open(frags, "w") + pairsfw = must_open(pairs, "w") + + N = opts.rclip + tag = opts.tag + strip_name = (lambda x: x[:-N]) if N else None + + fh_iter = iter_fastq(fastqfile, key=strip_name) + skipflag = False # controls the iterator skip + for a, b in pairwise(fh_iter): + if b is None: # hit the eof + break + + if skipflag: + skipflag = False + continue + + if a.name == b.name: + if tag: + a.name += "/1" + b.name += "/2" + print(a, file=pairsfw) + print(b, file=pairsfw) + skipflag = True + else: + print(a, file=fragsfw) + + # don't forget the last one, when b is None + if not skipflag: + print(a, file=fragsfw) + + logger.debug("Reads paired into `%s` and `%s`" % (pairs, frags)) + return pairs + + +def fromsra(args): + """ + %prog fromsra srafile + + Convert sra file to fastq using the sratoolkit `fastq-dump` + """ + p = OptionParser(fromsra.__doc__) + p.add_argument( + "--paired", + default=False, + action="store_true", + help="Specify if library layout is paired-end", + ) + p.add_argument( + "--compress", + default=None, + choices=["gzip", "bzip2"], + help="Compress output fastq files", + ) + p.set_outdir() + p.set_grid() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (srafile,) = args + paired = opts.paired + compress = opts.compress + outdir = opts.outdir + + script_path = which("fastq-dump") + if not script_path: + logger.error("Cannot find `fastq-dump` in the PATH") + sys.exit() + + cmd = [script_path] + if compress: + cmd.append("--{0}".format(compress)) + if paired: + cmd.append("--split-files") + if outdir: + cmd.append("--outdir {0}".format(outdir)) + cmd.append(srafile) + + outcmd = " ".join(cmd) + sh(outcmd, grid=opts.grid) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/genbank.py b/jcvi/formats/genbank.py new file mode 100644 index 00000000..28a85e22 --- /dev/null +++ b/jcvi/formats/genbank.py @@ -0,0 +1,522 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Genbank record operations based on biopython Bio.SeqIO +https://github.com/biopython/biopython/blob/master/Bio/SeqIO/InsdcIO.py +""" +import os.path as op +import sys + +from collections import defaultdict + +from Bio import SeqIO + +from ..apps.fetch import entrez +from ..apps.base import ActionDispatcher, OptionParser, cleanup, glob, logger, mkdir, sh + +from .base import BaseFile, get_number, must_open +from .gff import GffLine + + +MT = "mol_type" +LT = "locus_tag" + + +class MultiGenBank(BaseFile): + """ + Wrapper for parsing concatenated GenBank records. + """ + + def __init__(self, filename, source="JCVI"): + super().__init__(filename) + assert op.exists(filename) + + pf = filename.rsplit(".", 1)[0] + fastafile, gfffile = pf + ".fasta", pf + ".gff" + fasta_fw = must_open(fastafile, "w") + gff_fw = must_open(gfffile, "w") + + self.source = source + self.counter = defaultdict(list) + + nrecs, nfeats = 0, 0 + for rec in SeqIO.parse(filename, "gb"): + seqid = rec.name + rec.id = seqid + SeqIO.write([rec], fasta_fw, "fasta") + rf = rec.features + for f in rf: + self.print_gffline(gff_fw, f, seqid) + nfeats += 1 + nrecs += 1 + + logger.debug( + "A total of {0} records written to `{1}`.".format(nrecs, fastafile) + ) + fasta_fw.close() + + logger.debug( + "A total of {0} features written to `{1}`.".format(nfeats, gfffile) + ) + gff_fw.close() + + def print_gffline(self, fw, f, seqid, parent=None): + + score = phase = "." + type = f.type + if type == "source": + type = "contig" + + attr = "ID=tmp" + source = self.source + + start = get_number(f.location.start) + 1 + end = get_number(f.location.end) + strand = "-" if f.strand < 0 else "+" + g = "\t".join( + str(x) + for x in (seqid, source, type, start, end, score, strand, phase, attr) + ) + g = GffLine(g) + + qual = f.qualifiers + if MT in qual: + id = seqid + elif LT in qual: + (id,) = qual[LT] + else: + qual[LT] = [self.current_id] + (id,) = qual[LT] + + id = id.split()[0] + + if parent: + (id,) = parent.qualifiers[LT] + id = id.split()[0] + + assert id != "tmp", f + oid = id + self.counter[(oid, type)].append((start, end)) + count = len(self.counter[(oid, type)]) + + if type in ("mRNA", "gene"): + if type == "gene" and count > 1: + return + self.start = min(a for a, b in self.counter[(id, type)]) + self.end = max(a for a, b in self.counter[(id, type)]) + self.set_attribute("gene", "Alias", qual, g) + self.set_attribute("product", "Note", qual, g) + else: + suffix = ".{0}.{1}".format(type.lower(), count) + id = id + suffix + g.attributes["Parent"] = [oid] + self.set_attribute("product", "Note", qual, g) + + g.attributes["ID"] = [id] + g.update_attributes() + print(g, file=fw) + + self.current_id = oid + + def set_attribute(self, gb_tag, gff_tag, qual, g): + if gb_tag in qual: + (tag,) = qual[gb_tag] + g.attributes[gff_tag] = [tag] + + +class GenBank(dict): + """ + Wrapper of the GenBank record object in biopython SeqIO. + """ + + def __init__(self, filenames=None, accessions=None, idfile=None): + super(GenBank, self).__init__() + self.accessions = accessions + self.idfile = idfile + + if filenames is not None: + self.accessions = [op.basename(f).split(".")[0] for f in filenames] + d = dict( + next(iter(SeqIO.to_dict(SeqIO.parse(f, "gb")).items())) + for f in filenames + ) + for k, v in d.items(): + self[k.split(".")[0]] = v + + elif idfile is not None: + gbdir = self._get_records() + d = dict( + next(iter(SeqIO.to_dict(SeqIO.parse(f, "gb")).items())) + for f in glob(gbdir + "/*.gb") + ) + for k, v in d.items(): + self[k.split(".")[0]] = v + + else: + sys.exit( + "GenBank object is initiated from either gb files or " "accession IDs." + ) + + def __getitem__(self, accession): + rec = self[accession] + return rec + + def __repr__(self): + recs = [] + for accession in self.keys(): + recs.append([accession, self.__getitem__(accession)]) + return recs + + def _get_records(self): + gbdir = "gb" + dirmade = mkdir(gbdir) + if not dirmade: + sh( + "rm -rf {0}_old; mv -f {0} {0}_old".format( + gbdir, + ) + ) + assert mkdir(gbdir) + + entrez( + [ + self.idfile, + "--format=gb", + "--database=nuccore", + "--outdir={0}".format(gbdir), + ] + ) + + logger.debug("GenBank records written to {0}.".format(gbdir)) + return gbdir + + @classmethod + def write_genes_bed(cls, gbrec, outfile): + seqid = gbrec.id.split(".")[0] + if not seqid: + seqid = gbrec.name.split(".")[0] + + genecount = 0 + consecutivecds = 0 + for feature in gbrec.features: + if feature.type == "gene": + genecount += 1 + consecutivecds = 0 + continue + + if feature.type == "CDS": + if consecutivecds: + genecount += 1 + consecutivecds = 1 + start = feature.location.start + stop = feature.location.end + if start > stop: + start, stop = stop, start + if feature.strand < 0: + strand = "-" + else: + strand = "+" + score = "." + accn = ( + feature.qualifiers[LT][0] + if LT in feature.qualifiers + else "{}_{}".format(seqid, genecount) + ) + + start = str(start).lstrip("><") + stop = str(stop).lstrip("><") + bedline = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format( + seqid, start, stop, accn, score, strand + ) + outfile.write(bedline) + + @classmethod + def write_genes_fasta(cls, gbrec, fwcds, fwpep): + seqid = gbrec.id.split(".")[0] + if not seqid: + seqid = gbrec.name.split(".")[0] + + genecount = 0 + consecutivecds = 0 + for feature in gbrec.features: + if feature.type == "gene": + genecount += 1 + consecutivecds = 0 + continue + + if feature.type == "CDS": + if consecutivecds: + genecount += 1 + consecutivecds = 1 + accn = ( + feature.qualifiers[LT][0] + if LT in feature.qualifiers + else "{}_{}".format(seqid, genecount) + ) + + seq = feature.extract(gbrec.seq) + + fwcds.write(">{0}\n{1}\n".format(accn, seq)) + fwpep.write(">{0}\n{1}\n".format(accn, seq.translate())) + + def write_genes(self, output="gbout", individual=False, pep=True): + if not individual: + fwbed = must_open(output + ".bed", "w") + fwcds = must_open(output + ".cds", "w") + fwpep = must_open(output + ".pep", "w") + + for recid, rec in self.items(): + if individual: + mkdir(output) + fwbed = must_open(op.join(output, recid + ".bed"), "w") + fwcds = must_open(op.join(output, recid + ".cds"), "w") + fwpep = must_open(op.join(output, recid + ".pep"), "w") + + GenBank.write_genes_bed(rec, fwbed) + GenBank.write_genes_fasta(rec, fwcds, fwpep) + + if not pep: + cleanup(fwpep.name) + + def write_fasta(self, output="gbfasta", individual=False): + if not individual: + fw = must_open(output + ".fasta", "w") + + for recid, rec in self.items(): + if individual: + mkdir(output) + fw = must_open(op.join(output, recid + ".fasta"), "w") + + seqid = rec.id.split(".")[0] + if not seqid: + seqid = rec.name.split(".")[0] + seq = rec.seq + fw.write(">{0}\n{1}\n".format(seqid, seq)) + + +def main(): + + actions = ( + ("tofasta", "generate fasta file for multiple gb records"), + ("getgenes", "extract protein coding genes from Genbank file"), + ("getquals", "extract qualifiers from Genbank file"), + ("gff", "convert Genbank file to GFF file"), + ) + + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def gff(args): + """ + %prog gff seq.gbk + + Convert Genbank file to GFF and FASTA file. + The Genbank file can contain multiple records. + """ + p = OptionParser(gff.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gbkfile,) = args + MultiGenBank(gbkfile) + + +def preparegb(p, args): + p.add_argument( + "--gb_dir", default=None, help="path to dir containing GanBank files (.gb)" + ) + p.add_argument( + "--id", + default=None, + help="GenBank accession IDs in a file. One ID per row, or all IDs" + " in one row comma separated.", + ) + p.add_argument( + "--simple", + default=None, + type=str, + help="GenBank accession IDs comma separated " + "(for lots of IDs please use --id instead).", + ) + p.add_argument( + "--individual", + default=False, + action="store_true", + help="parse gb accessions individually", + ) + opts, args = p.parse_args(args) + accessions = opts.id + filenames = opts.gb_dir + + if not (opts.gb_dir or opts.id or opts.simple): + sys.exit(not p.print_help()) + + if opts.gb_dir: + filenames = glob(opts.gb_dir + "/*.gb") + + if opts.id: + rows = open(opts.id).readlines() + accessions = [] + for row in rows: + accessions += map(str.strip, row.strip().split(",")) + + if opts.simple: + accessions = opts.simple.split(",") + + if opts.id or opts.simple: + fw = must_open("GenBank_accession_IDs.txt", "w") + for atom in accessions: + print(atom, file=fw) + fw.close() + idfile = fw.name + else: + idfile = None + + return filenames, accessions, idfile, opts, args + + +def tofasta(args): + """ + %prog tofasta [--options] + + Read GenBank file, or retrieve from web. + Output fasta file with one record per file + or all records in one file + """ + p = OptionParser(tofasta.__doc__) + p.add_argument("--prefix", default="gbfasta", help="prefix of output files") + filenames, accessions, idfile, opts, args = preparegb(p, args) + prefix = opts.prefix + + GenBank(filenames=filenames, accessions=accessions, idfile=idfile).write_fasta( + output=prefix, individual=opts.individual + ) + + if opts.individual: + logger.debug("Output written dir {0}".format(prefix)) + else: + logger.debug("Output written to {0}.fasta".format(prefix)) + + +def getgenes(args): + """ + %prog getgenes [--options] + + Read GenBank file, or retrieve from web. + Output bed, cds files, and pep file (can turn off with --nopep). + Either --gb_dir or --id/--simple should be provided. + """ + p = OptionParser(getgenes.__doc__) + p.add_argument("--prefix", default="gbout", help="prefix of output files") + p.add_argument( + "--nopep", + default=False, + action="store_true", + help="Only get cds and bed, no pep", + ) + filenames, accessions, idfile, opts, args = preparegb(p, args) + prefix = opts.prefix + + GenBank(filenames=filenames, accessions=accessions, idfile=idfile).write_genes( + output=prefix, individual=opts.individual, pep=(not opts.nopep) + ) + + if opts.individual: + logger.debug("Output written dir {0}".format(prefix)) + elif opts.nopep: + logger.debug( + "Output written to {0}.bed, {0}.cds".format( + prefix, + ) + ) + else: + logger.debug( + "Output written to {0}.bed, {0}.cds, {0}.pep".format( + prefix, + ) + ) + + +def print_locus_quals(locus_tag, locus, quals_ftypes): + """ + Given a locus_tag and dict of features, print out 3-column output: + locus_tag, qualifier, value + + Replace locus_tag with protein_id if processing an "mRNA" or "CDS" + """ + for ftype in quals_ftypes: + for i, quals in enumerate(locus[locus_tag][ftype]): + for elem in quals: + elem_id = elem[0] + if len(locus[locus_tag]["protein_id"]) > 0 and ftype in ("mRNA", "CDS"): + elem_id = locus[locus_tag]["protein_id"][i] + if ftype == "misc_RNA": + ftype = "ncRNA" + print("\t".join(str(x) for x in (elem_id, elem[1], elem[2], ftype))) + + +def getquals(args): + """ + %prog getquals [--options] gbkfile > qualsfile + + Read GenBank file and extract all qualifiers per feature type + into a tab-delimited file + """ + p = OptionParser(getquals.__doc__) + p.add_argument( + "--types", + default="gene,mRNA,CDS", + type=str, + dest="quals_ftypes", + help="Feature types from which to extract qualifiers", + ) + p.add_argument( + "--ignore", + default="locus_tag,product,codon_start,translation", + type=str, + dest="quals_ignore", + help="Qualifiers to exclude from parsing", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gbkfile,) = args + quals_ftypes = opts.quals_ftypes.split(",") + quals_ignore = opts.quals_ignore.split(",") + + locus = dict() + for rec in SeqIO.parse(gbkfile, "gb"): + for f in rec.features: + if f.type in quals_ftypes: + locus_tag = f.qualifiers[LT][0] + if locus_tag not in locus: + locus[locus_tag] = dict() + for ftype in quals_ftypes: + if ftype not in locus[locus_tag]: + locus[locus_tag][ftype] = [] + if ftype == "CDS": # store the CDS protein_id + locus[locus_tag]["protein_id"] = [] + + quals = [] + for qual in f.qualifiers: + if qual in quals_ignore: + continue + for qval in f.qualifiers[qual]: + quals.append((locus_tag, qual, qval)) + if qual == "protein_id": + locus[locus_tag]["protein_id"].append(qval) + if len(quals) > 0: + locus[locus_tag][f.type].append(quals) + + for locus_tag in locus: + print_locus_quals(locus_tag, locus, quals_ftypes) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/gff.py b/jcvi/formats/gff.py new file mode 100644 index 00000000..da13f3b6 --- /dev/null +++ b/jcvi/formats/gff.py @@ -0,0 +1,3768 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +import os +import os.path as op +import re +import sys + +from collections import defaultdict +from urllib.parse import quote, unquote + +from ..annotation.reformat import atg_name +from ..apps.base import ( + ActionDispatcher, + OptionParser, + cleanup, + flatten, + logger, + mkdir, + need_update, + parse_multi_values, + sh, +) +from ..utils.cbook import AutoVivification +from ..utils.orderedcollections import DefaultOrderedDict, OrderedDict, parse_qs +from ..utils.range import Range, range_minmax + +from .base import DictFile, LineFile, must_open, is_number +from .bed import Bed, BedLine, natsorted +from .fasta import Fasta, SeqIO + + +Valid_strands = ("+", "-", "?", ".") +Valid_phases = ("0", "1", "2", ".") +FastaTag = "##FASTA" +RegionTag = "##sequence-region" +valid_gff_parent_child = { + "match": "match_part", + "cDNA_match": "match_part", + "EST_match": "match_part", + "nucleotide_to_protein_match": "match_part", + "expressed_sequence_match": "match_part", + "protein_match": "match_part", + "transposable_element": "transposon_fragment", + "gene": "mRNA", + "mRNA": "exon,CDS,five_prime_UTR,three_prime_UTR", +} +valid_gff_to_gtf_type = { + "exon": "exon", + "pseudogenic_exon": "exon", + "CDS": "CDS", + "start_codon": "start_codon", + "stop_codon": "stop_codon", + "five_prime_UTR": "5UTR", + "three_prime_UTR": "3UTR", +} +valid_gff_type = tuple(valid_gff_parent_child.keys()) +reserved_gff_attributes = ( + "ID", + "Name", + "Alias", + "Parent", + "Target", + "Gap", + "Derives_from", + "Note", + "Dbxref", + "Ontology_term", + "Is_circular", +) +multiple_gff_attributes = ("Parent", "Alias", "Dbxref", "Ontology_term") +safechars = " /:?~#+!$'@()*[]|" +VALID_HUMAN_CHROMOSMES = set([str(x) for x in range(1, 23)] + ["X", "Y"]) + + +class GffLine(object): + """ + Specification here (http://www.sequenceontology.org/gff3.shtml) + """ + + def __init__( + self, + sline, + key="ID", + parent_key="Parent", + gff3=True, + line_index=None, + strict=True, + append_source=False, + append_ftype=False, + append_attrib=None, + score_attrib=False, + keep_attr_order=True, + compute_signature=False, + ): + sline = sline.strip() + args = sline.split("\t") + if len(args) != 9: + args = sline.split() + if strict: + assert len(args) == 9, "Malformed line ({0} columns != 9): {1}".format( + len(args), args + ) + self.seqid = args[0] + self.source = args[1] + self.type = args[2] + self.start = int(args[3]) + self.end = int(args[4]) + self.score = args[5] + self.strand = args[6] + assert self.strand in Valid_strands, "strand must be one of {0}".format( + Valid_strands + ) + self.phase = args[7] + assert self.phase in Valid_phases, "phase must be one of {0}".format( + Valid_phases + ) + self.attributes_text = "" if len(args) <= 8 else args[8].strip() + self.attributes = make_attributes( + self.attributes_text, gff3=gff3, keep_attr_order=keep_attr_order + ) + # key is not in the gff3 field, this indicates the conversion to accn + self.key = key # usually it's `ID=xxxxx;` + self.parent_key = parent_key # usually it's `Parent=xxxxx;` + self.gff3 = gff3 + + if append_ftype and self.key in self.attributes: + # if `append_ftype` is True, append the gff `self.type` + # to `self.key`. use this option to enhance the `self.accn` + # column in bed file + self.attributes[self.key][0] = ":".join( + (self.type, self.attributes[self.key][0]) + ) + + if append_source and self.key in self.attributes: + # if `append_source` is True, append the gff `self.source` + # to `self.key`. use this option to enhance the `self.accn` + # column in bed file + self.attributes[self.key][0] = ":".join( + (self.source, self.attributes[self.key][0]) + ) + + if append_attrib and append_attrib in self.attributes: + self.attributes[self.key][0] = ":".join( + (self.attributes[self.key][0], self.attributes[append_attrib][0]) + ) + + if ( + score_attrib + and score_attrib in self.attributes + and is_number(self.attributes[score_attrib][0]) + ): + # if `score_attrib` is specified, check if it is indeed an + # attribute or not. If yes, check if the value of attribute + # is numeric or not. If not, keep original GFF score value + self.score = self.attributes[score_attrib][0] + + if line_index is not None and is_number(line_index): + # if `line_index` in provided, initialize an idx variable + # used to autcompute the ID for a feature + self.idx = line_index + + if compute_signature: + # if `compute_signature` is specified, compute a signature for + # the gff line and store in variable `sign` + self.sign = self.signature + + def __getitem__(self, key): + return getattr(self, key) + + def __str__(self): + return "\t".join( + str(x) + for x in ( + self.seqid, + self.source, + self.type, + self.start, + self.end, + self.score, + self.strand, + self.phase, + self.attributes_text, + ) + ) + + def get_attr(self, key, first=True): + if key in self.attributes: + if first: + return self.attributes[key][0] + return self.attributes[key] + return None + + def set_attr( + self, key, value, update=False, append=False, dbtag=None, urlquote=False + ): + if value is None: + self.attributes.pop(key, None) + else: + if key == "Dbxref" and dbtag: + value = value.split(",") + value = ["{0}:{1}".format(dbtag, x) for x in value] + if type(value) is not list: + value = [value] + if key not in self.attributes or not append: + self.attributes[key] = [] + self.attributes[key].extend(value) + if update: + self.update_attributes(gff3=self.gff3, urlquote=urlquote) + + def update_attributes(self, skipEmpty=True, gff3=True, gtf=None, urlquote=True): + attributes = [] + if gtf: + gff3 = None + elif gff3 is None: + gff3 = self.gff3 + + sep = ";" if gff3 else "; " + for tag, val in self.attributes.items(): + if not val and skipEmpty: + continue + val = ",".join(val) + val = '"{0}"'.format(val) if (" " in val and (not gff3)) or gtf else val + equal = "=" if gff3 else " " + if urlquote: + sc = safechars + if tag in multiple_gff_attributes: + sc += "," + val = quote(val, safe=sc) + attributes.append(equal.join((tag, val))) + + self.attributes_text = sep.join(attributes) + if gtf: + self.attributes_text += ";" + + def update_tag(self, old_tag, new_tag): + if old_tag not in self.attributes: + return + self.attributes[new_tag] = self.attributes[old_tag] + del self.attributes[old_tag] + + @property + def accn(self): + if self.key: # GFF3 format + if self.key not in self.attributes: + a = ["{0}_{1}".format(str(self.type).lower(), self.idx)] + else: + a = self.attributes[self.key] + else: # GFF2 format + a = self.attributes_text.split() + return quote(",".join(a), safe=safechars) + + id = accn + + @property + def name(self): + return self.attributes["Name"][0] if "Name" in self.attributes else None + + @property + def parent(self): + return ( + self.attributes[self.parent_key][0] + if self.parent_key in self.attributes + else None + ) + + @property + def span(self): + return self.end - self.start + 1 + + @property + def bedline(self): + score = "0" if self.score == "." else self.score + row = "\t".join( + ( + self.seqid, + str(self.start - 1), + str(self.end), + self.accn, + score, + self.strand, + ) + ) + return BedLine(row) + + @property + def signature(self): + """ + create a unique signature for any GFF line based on joining + columns 1,2,3,4,5,7,8 (into a comma separated string) + """ + sig_elems = [ + self.seqid, + self.source, + self.type, + self.start, + self.end, + self.strand, + self.phase, + ] + if re.search("exon|CDS|UTR", self.type): + parent = self.get_attr("Parent") + if parent: + (locus, iso) = atg_name(parent, retval="locus,iso", trimpad0=False) + if locus: + sig_elems.append(locus) + else: + sig_elems.extend([self.accn]) + + return ",".join(str(elem) for elem in sig_elems) + + +class Gff(LineFile): + def __init__( + self, + filename, + key="ID", + parent_key="Parent", + strict=True, + append_source=False, + append_ftype=False, + append_attrib=None, + score_attrib=False, + keep_attr_order=True, + make_gff_store=False, + compute_signature=False, + ): + super().__init__(filename) + self.make_gff_store = make_gff_store + self.gff3 = True + if self.make_gff_store: + self.gffstore = [] + gff = Gff( + self.filename, + key=key, + parent_key=parent_key, + strict=True, + append_source=append_source, + append_ftype=append_ftype, + score_attrib=score_attrib, + keep_attr_order=keep_attr_order, + compute_signature=compute_signature, + ) + for g in gff: + self.gffstore.append(g) + else: + self.key = key + self.parent_key = parent_key + self.strict = strict + self.append_source = append_source + self.append_ftype = append_ftype + self.append_attrib = append_attrib + self.score_attrib = score_attrib + self.keep_attr_order = keep_attr_order + self.compute_signature = compute_signature + if filename in ("-", "stdin") or filename.endswith(".gz"): + if ".gtf" in filename: + self.gff3 = False + logger.debug("File is not gff3 standard.") + return + + self.set_gff_type() + + def set_gff_type(self): + # Determine file type + row = None + for row in self: + break + gff3 = False if not row else "=" in row.attributes_text + if not gff3: + logger.debug("File is not gff3 standard.") + + self.gff3 = gff3 + self.fp.seek(0) + + def __iter__(self): + if self.make_gff_store: + for row in self.gffstore: + yield row + else: + self.fp = must_open(self.filename) + for idx, row in enumerate(self.fp): + row = row.strip() + if row.strip() == "": + continue + if row[0] == "#": + if row == FastaTag: + break + continue + yield GffLine( + row, + key=self.key, + parent_key=self.parent_key, + line_index=idx, + strict=self.strict, + append_source=self.append_source, + append_ftype=self.append_ftype, + append_attrib=self.append_attrib, + score_attrib=self.score_attrib, + keep_attr_order=self.keep_attr_order, + compute_signature=self.compute_signature, + gff3=self.gff3, + ) + + @property + def seqids(self): + return set(x.seqid for x in self) + + +class GffFeatureTracker(object): + def __init__(self): + self.ftype = "exon|CDS|UTR|fragment" + self.tracker = {} + self.symbolstore = {} + + def track(self, parent, g): + if re.search(self.ftype, g.type): + if parent not in self.tracker: + self.tracker[parent] = {} + if g.type not in self.tracker[parent]: + self.tracker[parent][g.type] = set() + self.tracker[parent][g.type].add((g.start, g.end, g.sign)) + + def _sort(self, parent, ftype, reverse=False): + if not isinstance(self.tracker[parent][ftype], list): + self.tracker[parent][ftype] = sorted( + list(self.tracker[parent][ftype]), + key=lambda x: (x[0], x[1]), + reverse=reverse, + ) + + def feat_index(self, parent, ftype, strand, feat_tuple): + reverse = True if strand == "-" else False + self._sort(parent, ftype, reverse=reverse) + return self.tracker[parent][ftype].index(feat_tuple) + + def store_symbol(self, g): + for symbol_attr in ("symbol", "ID"): + if symbol_attr in g.attributes: + break + self.symbolstore[g.accn] = g.get_attr(symbol_attr) + + def get_symbol(self, parent): + return self.symbolstore[parent] + + +def make_attributes(s, gff3=True, keep_attr_order=True): + """ + In GFF3, the last column is typically: + ID=cds00002;Parent=mRNA00002; + + In GFF2, the last column is typically: + Gene 22240.t000374; Note "Carbonic anhydrase" + """ + if gff3: + """ + hack: temporarily replace the '+' sign in the attributes column + with the string 'PlusSign' to prevent urlparse.parse_qsl() from + replacing the '+' sign with a space + """ + s = s.replace("+", "PlusSign") + d = parse_qs(s, separator=";", keep_attr_order=keep_attr_order) + for key in d: + d[key][0] = unquote(d[key][0].replace("PlusSign", "+").replace('"', "")) + else: + attributes = s.split(";") + d = DefaultOrderedDict(list) if keep_attr_order else defaultdict(list) + for a in attributes: + a = a.strip() + if " " not in a: + continue + key, val = a.split(" ", 1) + val = unquote(val.replace('"', "").replace("=", " ").strip()) + d[key].append(val) + + for key, val in d.items(): + d[key] = flatten([v.split(",") for v in val]) + + return d + + +def to_range(obj, score=None, id=None, strand=None): + """ + Given a gffutils object, convert it to a range object + """ + if score or id: + _score = score if score else obj.score + _id = id if id else obj.id + return Range( + seqid=obj.seqid, start=obj.start, end=obj.end, score=_score, id=_id + ) + elif strand: + return obj.seqid, obj.start, obj.end, obj.strand + + return obj.seqid, obj.start, obj.end + + +def main(): + actions = ( + ("addparent", "merge sister features and infer their parent"), + ("bed", "parse gff and produce bed file for particular feature type"), + ("bed12", "produce bed12 file for coding features"), + ("chain", "fill in parent features by chaining children"), + ("children", "find all children that belongs to the same parent"), + ("cluster", "cluster transcripts based on shared splicing structure"), + ("extract", "extract contig or features from gff file"), + ("filter", "filter the gff file based on Identity and Coverage"), + ( + "fixboundaries", + "fix boundaries of parent features by range chaining child features", + ), + ( + "fixpartials", + "fix 5/3 prime partial transcripts, locate nearest in-frame start/stop", + ), + ("format", "format the gff file, change seqid, etc."), + ("frombed", "convert from bed format to gff3"), + ("fromgtf", "convert gtf to gff3 format"), + ("fromsoap", "convert from soap format to gff3"), + ("gapsplit", "split alignment GFF3 at gaps based on CIGAR string"), + ("gb", "convert gff3 to genbank format"), + ("gtf", "convert gff3 to gtf format"), + ("liftover", "adjust gff coordinates based on tile number"), + ("load", "extract the feature (e.g. CDS) sequences and concatenate"), + ("merge", "merge several gff files into one"), + ("note", "extract certain attribute field for each feature"), + ("orient", "orient the coding features based on translation"), + ("parents", "find the parents given a list of IDs"), + ("rename", "change the IDs within the gff3"), + ("sizes", "calculate sizes of features in gff file"), + ("sort", "sort the gff file"), + ("splicecov", "tag gff introns with coverage info from junctions.bed"), + ("split", "split the gff into one contig per file"), + ("summary", "print summary stats for features of different types"), + ("uniq", "remove the redundant gene models"), + ) + + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def addparent(args): + """ + %prog addparent file.gff + + Merge sister features and infer parents. + """ + p = OptionParser(addparent.__doc__) + p.add_argument("--childfeat", default="CDS", help="Type of children feature") + p.add_argument("--parentfeat", default="mRNA", help="Type of merged feature") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gff_file,) = args + gff = Gff(gff_file) + data = defaultdict(list) + for g in gff: + if g.type != opts.childfeat: + continue + data[g.parent].append(g) + + logger.debug("A total of %d %s features clustered", len(data), opts.childfeat) + + parents = [] + for parent, dd in data.items(): + d = dd[0] + start, end = min(x.start for x in dd), max(x.end for x in dd) + gffline = "\t".join( + str(x) + for x in ( + d.seqid, + d.source, + opts.parentfeat, + start, + end, + ".", + d.strand, + ".", + "ID={0};Name={0}".format(parent), + ) + ) + parents.append(GffLine(gffline)) + parents.sort(key=lambda x: (x.seqid, x.start)) + logger.debug("Merged feature sorted") + + fw = must_open(opts.outfile, "w") + for parent in parents: + print(parent, file=fw) + parent_id = parent.id + for d in data[parent_id]: + if d.accn == parent_id: + new_id = "{0}.{1}1".format(parent_id, opts.childfeat) + d.set_attr("ID", new_id) + d.set_attr("Name", new_id, update=True) + print(d, file=fw) + fw.close() + + +def _fasta_slice(fasta, seqid, start, stop, strand): + """ + Return slice of fasta, given (seqid, start, stop, strand) + """ + _strand = 1 if strand == "+" else -1 + return fasta.sequence( + {"chr": seqid, "start": start, "stop": stop, "strand": _strand} + ) + + +def is_valid_codon(codon, type="start"): + """ + Given a codon sequence, check if it is a valid start/stop codon + """ + if len(codon) != 3: + return False + + if type == "start": + if codon != "ATG": + return False + elif type == "stop": + if not any(_codon == codon for _codon in ("TGA", "TAG", "TAA")): + return False + else: + logger.error( + "`%s` is not a valid codon type. Should be one of (`start` or `stop`)", type + ) + sys.exit() + + return True + + +def scan_for_valid_codon(codon_span, strand, seqid, genome, type="start"): + """ + Given a codon span, strand and reference seqid, scan upstream/downstream + to find a valid in-frame start/stop codon + """ + s, e = codon_span[0], codon_span[1] + while True: + if (type == "start" and strand == "+") or (type == "stop" and strand == "-"): + s, e = s - 3, e - 3 + else: + s, e = s + 3, e + 3 + + codon = _fasta_slice(genome, seqid, s, e, strand) + is_valid = is_valid_codon(codon, type=type) + if not is_valid: + if type == "start": + # if we are scanning upstream for a valid start codon, + # stop scanning when we encounter a stop + if is_valid_codon(codon, type="stop"): + return None, None + elif type == "stop": + # if we are scanning downstream for a valid stop codon, + # stop scanning when we encounter a start + if is_valid_codon(codon, type="start"): + return None, None + continue + break + + return s, e + + +def fixpartials(args): + """ + %prog fixpartials genes.gff genome.fasta partials.ids + + Given a gff file of features, fix partial (5'/3' incomplete) transcripts + by trying to locate nearest in-frame start/stop codon + """ + p = OptionParser(fixpartials.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + ( + gffile, + gfasta, + partials, + ) = args + + gff = make_index(gffile) + genome = Fasta(gfasta, index=True) + partials = LineFile(partials, load=True).lines + + # all_transcripts = [f.id for f in gff.features_of_type("mRNA", \ + # order_by=("seqid", "start"))] + seen = set() + fw = must_open(opts.outfile, "w") + for gene in gff.features_of_type("gene", order_by=("seqid", "start")): + children = AutoVivification() + cflag = False + transcripts = list(gff.children(gene, level=1, order_by="start")) + for transcript in transcripts: + trid, seqid, strand = transcript.id, transcript.seqid, transcript.strand + + for child in gff.children(transcript, order_by="start"): + ftype = child.featuretype + if ftype not in children[trid]: + children[trid][ftype] = [] + children[trid][ftype].append(child) + + five_prime, three_prime = True, True + nstart, nstop = (None, None), (None, None) + cds_span = [children[trid]["CDS"][0].start, children[trid]["CDS"][-1].stop] + new_cds_span = [x for x in cds_span] + + start_codon = (cds_span[0], cds_span[0] + 2) + stop_codon = (cds_span[1] - 2, cds_span[1]) + if strand == "-": + start_codon, stop_codon = stop_codon, start_codon + + if trid in partials: + seen.add(trid) + start_codon_fasta = _fasta_slice( + genome, seqid, start_codon[0], start_codon[1], strand + ) + stop_codon_fasta = _fasta_slice( + genome, seqid, stop_codon[0], stop_codon[1], strand + ) + + if not is_valid_codon(start_codon_fasta, type="start"): + five_prime = False + nstart = scan_for_valid_codon( + start_codon, strand, seqid, genome, type="start" + ) + + if not is_valid_codon(stop_codon_fasta, type="stop"): + three_prime = False + nstop = scan_for_valid_codon( + stop_codon, strand, seqid, genome, type="stop" + ) + + logger.debug( + "feature=%s (%s), 5'=%s, 3'=%s, %d <== %d ==> %d", + trid, + strand, + five_prime, + three_prime, + nstart if strand == "+" else nstop, + cds_span, + nstop if strand == "+" else nstart, + ) + + if not five_prime or not three_prime: + if nstart != (None, None) and (start_codon != nstart): + i = 0 if strand == "+" else 1 + new_cds_span[i] = nstart[i] + if nstop != (None, None) and (stop_codon != nstop): + i = 1 if strand == "+" else 0 + new_cds_span[i] = nstop[i] + new_cds_span.sort() + + if set(cds_span) != set(new_cds_span): + cflag = True + # if CDS has been extended, appropriately adjust all relevent + # child feature (CDS, exon, UTR) coordinates + for ftype in children[trid]: + for idx in range(len(children[trid][ftype])): + child_span = ( + children[trid][ftype][idx].start, + children[trid][ftype][idx].stop, + ) + if ftype in ("exon", "CDS"): + # if exons/CDSs, adjust start and stop according to + # new CDS start and stop, respectively + if child_span[0] == cds_span[0]: + children[trid][ftype][idx].start = new_cds_span[0] + if child_span[1] == cds_span[1]: + children[trid][ftype][idx].stop = new_cds_span[1] + elif ftype.endswith("UTR"): + # if *_prime_UTR, adjust stop according to new CDS start and + # adjust start according to new CDS stop + if child_span[1] == cds_span[0]: + children[trid][ftype][idx].stop = new_cds_span[0] + if child_span[0] == cds_span[1]: + children[trid][ftype][idx].start = new_cds_span[1] + + transcript.start, transcript.stop = ( + children[trid]["exon"][0].start, + children[trid]["exon"][-1].stop, + ) + + if cflag: + _gene_span = range_minmax([(tr.start, tr.stop) for tr in transcripts]) + gene.start, gene.stop = _gene_span[0], _gene_span[1] + + # print gff file + print(gene, file=fw) + for transcript in transcripts: + trid = transcript.id + print(transcript, file=fw) + for cftype in children[trid]: + for child in children[trid][cftype]: + print(child, file=fw) + + fw.close() + + +def sizes(args): + """ + %prog sizes gffile + + Given a gff file of features, calculate the sizes of chosen parent feature + based on summation of sizes of child features. + + For example, for parent 'mRNA' and child 'CDS' feature types, calcuate sizes of + mRNA by summing the sizes of the disjoint CDS parts. + """ + p = OptionParser(sizes.__doc__) + p.set_outfile() + p.add_argument( + "--parents", + dest="parents", + default="mRNA", + help="parent feature(s) for which size is to be calculated", + ) + p.add_argument( + "--child", + dest="child", + default="CDS", + help="child feature to use for size calculations", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + parents, cftype = set(opts.parents.split(",")), opts.child + + gff = make_index(gffile) + + fw = must_open(opts.outfile, "w") + for parent in parents: + for feat in gff.features_of_type(parent, order_by=("seqid", "start")): + fsize = 0 + fsize = ( + feat.end - feat.start + 1 + if cftype == parent + else gff.children_bp(feat, child_featuretype=cftype) + ) + print("\t".join(str(x) for x in (feat.id, fsize)), file=fw) + fw.close() + + +def cluster(args): + """ + %prog cluster gffile + + Given a gff file of gene structures (multiple transcripts per gene locus), + cluster/consolidate all transcripts based on shared splicing structure. + + If `slop` is enabled, clustering/consolidation will collapse any variation + in terminal UTR lengths, keeping only the longest as representative. + """ + from jcvi.utils.grouper import Grouper + from itertools import combinations + + p = OptionParser(cluster.__doc__) + p.add_argument( + "--slop", + default=False, + action="store_true", + help="allow minor variation in terminal 5'/3' UTR" + " start/stop position", + ) + p.add_argument( + "--inferUTR", + default=False, + action="store_true", + help="infer presence of UTRs from exon coordinates", + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + slop = opts.slop + inferUTR = opts.inferUTR + + gff = make_index(gffile) + + fw = must_open(opts.outfile, "w") + print("##gff-version 3", file=fw) + seen = {} + for gene in gff.features_of_type("gene", order_by=("seqid", "start")): + g = Grouper() + mrnas = list( + combinations( + [ + mrna + for mrna in gff.children(gene, featuretype="mRNA", order_by="start") + ], + 2, + ) + ) + if len(mrnas) > 0: + for mrna1, mrna2 in mrnas: + mrna1s, mrna2s = ( + gff.children_bp(mrna1, child_featuretype="exon"), + gff.children_bp(mrna2, child_featuretype="exon"), + ) + g.join((mrna1.id, mrna1s)) + g.join((mrna2.id, mrna2s)) + + if match_subfeats(mrna1, mrna2, gff, gff, featuretype="CDS"): + res = [] + ftypes = ( + ["exon"] if inferUTR else ["five_prime_UTR", "three_prime_UTR"] + ) + for ftype in ftypes: + res.append( + match_subfeats( + mrna1, mrna2, gff, gff, featuretype=ftype, slop=slop + ) + ) + + if all(res): + g.join((mrna1.id, mrna1s), (mrna2.id, mrna2s)) + else: + for mrna1 in gff.children(gene, featuretype="mRNA", order_by="start"): + mrna1s = gff.children_bp(mrna1, child_featuretype="exon") + g.join((mrna1.id, mrna1s)) + + print(gene, file=fw) + for group in sorted(g): + group.sort(key=lambda x: x[1], reverse=True) + mrnas = [el[0] for el in group] + m = mrnas[0] + + _mrnaid = [] + for x in mrnas: + if x not in _mrnaid: + _mrnaid.append(x) + mrnaid = "{0}".format("-".join(_mrnaid)) + if mrnaid not in seen: + seen[mrnaid] = 0 + else: + seen[mrnaid] += 1 + mrnaid = "{0}-{1}".format(mrnaid, seen[mrnaid]) + + _mrna = gff[m] + _mrna.attributes["ID"] = [mrnaid] + _mrna.attributes["Parent"] = [gene.id] + children = gff.children(m, order_by="start") + print(_mrna, file=fw) + for child in children: + child.attributes["ID"] = ["{0}".format(child.id)] + child.attributes["Parent"] = [mrnaid] + print(child, file=fw) + + fw.close() + + +def summary(args): + """ + %prog summary gffile + + Print summary stats for features of different types. + """ + from jcvi.formats.base import SetFile + from jcvi.formats.bed import BedSummary + from jcvi.utils.table import tabulate + + p = OptionParser(summary.__doc__) + p.add_argument( + "--isoform", + default=False, + action="store_true", + help="Find longest isoform of each id", + ) + p.add_argument("--ids", help="Only include features from certain IDs") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gff_file,) = args + ids = opts.ids + + if ids: + ids = SetFile(ids) + logger.debug("Total ids loaded: %d", len(ids)) + + if opts.isoform: + pids = set() + gff = Gff(gff_file) + for g in gff: + if g.type != "mRNA": + continue + if g.parent not in ids: + continue + if "longest" not in g.attributes: + pids = set(x + ".1" for x in ids) + break + if g.attributes["longest"][0] == "0": + continue + pids.add(g.id) + ids = pids + logger.debug("After checking longest: %d", len(ids)) + + # Collects aliases + gff = Gff(gff_file) + for g in gff: + if g.name in ids: + ids.add(g.id) + logger.debug("Total ids including aliases: %d", len(ids)) + + gff = Gff(gff_file) + beds = defaultdict(list) + for g in gff: + if ids and not (g.id in ids or g.name in ids or g.parent in ids): + continue + + beds[g.type].append(g.bedline) + + table = {} + for type, bb in sorted(beds.items()): + bs = BedSummary(bb) + table[(type, "Features")] = bs.nfeats + table[(type, "Unique bases")] = bs.unique_bases + table[(type, "Total bases")] = bs.total_bases + + print(tabulate(table), file=sys.stdout) + + +def gb(args): + """ + %prog gb gffile fastafile + + Convert GFF3 to Genbank format. Recipe taken from: + + """ + try: + from BCBio import GFF + except ImportError: + print( + "You need to install dep first: $ easy_install bcbio-gff", file=sys.stderr + ) + + p = OptionParser(gb.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gff_file, fasta_file = args + pf = op.splitext(gff_file)[0] + out_file = pf + ".gb" + fasta_input = SeqIO.to_dict(SeqIO.parse(fasta_file, "fasta")) + gff_iter = GFF.parse(gff_file, fasta_input) + SeqIO.write(gff_iter, out_file, "genbank") + + +def orient(args): + """ + %prog orient in.gff3 features.fasta > out.gff3 + + Change the feature orientations based on translation. This script is often + needed in fixing the strand information after mapping RNA-seq transcripts. + + You can generate the features.fasta similar to this command: + + $ %prog load --parents=EST_match --children=match_part clc.JCVIv4a.gff + JCVI.Medtr.v4.fasta -o features.fasta + """ + from jcvi.formats.fasta import longestorf + + p = OptionParser(orient.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + ingff3, fastafile = args + idsfile = fastafile.rsplit(".", 1)[0] + ".orf.ids" + if need_update(fastafile, idsfile): + longestorf([fastafile, "--ids"]) + + orientations = DictFile(idsfile) + gff = Gff(ingff3) + flipped = 0 + for g in gff: + id = None + for tag in ("ID", "Parent"): + if tag in g.attributes: + (id,) = g.attributes[tag] + break + assert id + + orientation = orientations.get(id, "+") + if orientation == "-": + g.strand = {"+": "-", "-": "+"}[g.strand] + flipped += 1 + + print(g) + + logger.debug("A total of %d features flipped.", flipped) + + +def rename(args): + """ + %prog rename in.gff3 switch.ids > reindexed.gff3 + + Change the IDs within the gff3. + """ + p = OptionParser(rename.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + ingff3, switch = args + switch = DictFile(switch) + + gff = Gff(ingff3) + for g in gff: + (id,) = g.attributes["ID"] + newname = switch.get(id, id) + g.attributes["ID"] = [newname] + + if "Parent" in g.attributes: + parents = g.attributes["Parent"] + g.attributes["Parent"] = [switch.get(x, x) for x in parents] + + g.update_attributes() + print(g) + + +def parents(args): + """ + %prog parents gffile models.ids + + Find the parents given a list of IDs in "models.ids". + """ + p = OptionParser(parents.__doc__) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gff_file, idsfile = args + g = make_index(gff_file) + fp = open(idsfile) + for row in fp: + cid = row.strip() + b = next(g.parents(cid, 1)) + print("\t".join((cid, b.id))) + + +def filter(args): + """ + %prog filter gffile > filtered.gff + + Filter the gff file based on criteria below: + (1) feature attribute values: [Identity, Coverage]. + You can get this type of gff by using gmap + $ gmap -f 2 .... + + (2) Total bp length of child features + """ + p = OptionParser(filter.__doc__) + p.add_argument( + "--type", default="mRNA", help="The feature to scan for the attributes" + ) + g1 = p.add_argument_group("Filter by identity/coverage attribute values") + g1.add_argument("--id", default=95, type=float, help="Minimum identity") + g1.add_argument("--coverage", default=90, type=float, help="Minimum coverage") + g1.add_argument( + "--nocase", + default=False, + action="store_true", + help="Case insensitive lookup of attribute names", + ) + g2 = p.add_argument_group("Filter by child feature bp length") + g2.add_argument( + "--child_ftype", default=None, type=str, help="Child featuretype to consider" + ) + g2.add_argument( + "--child_bp", + default=None, + type=int, + help="Filter by total bp of children of chosen ftype", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + otype, oid, ocov = opts.type, opts.id, opts.coverage + cftype, clenbp = opts.child_ftype, opts.child_bp + + id_attr, cov_attr = "Identity", "Coverage" + if opts.nocase: + id_attr, cov_attr = id_attr.lower(), cov_attr.lower() + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + + gffdb = make_index(gffile) + bad = set() + ptype = None + for g in gffdb.features_of_type(otype, order_by=("seqid", "start")): + if not ptype: + parent = list(gffdb.parents(g)) + ptype = parent[0].featuretype if len(parent) > 0 else otype + if cftype and clenbp: + if gffdb.children_bp(g, child_featuretype=cftype) < clenbp: + bad.add(g.id) + elif oid and ocov: + identity = float(g.attributes[id_attr][0]) + coverage = float(g.attributes[cov_attr][0]) + if identity < oid or coverage < ocov: + bad.add(g.id) + + logger.debug("%d bad accns marked.", len(bad)) + + fw = must_open(opts.outfile, "w") + for g in gffdb.features_of_type(ptype, order_by=("seqid", "start")): + if ptype != otype: + feats = list(gffdb.children(g, featuretype=otype, order_by="start")) + ok_feats = [f for f in feats if f.id not in bad] + if len(ok_feats) > 0: + g.keep_order = True + print(g, file=fw) + for feat in ok_feats: + feat.keep_order = True + print(feat, file=fw) + for child in gffdb.children(feat, order_by="start"): + child.keep_order = True + print(child, file=fw) + else: + if g.id not in bad: + print(g, file=fw) + for child in gffdb.children(g, order_by="start"): + print(child, file=fw) + fw.close() + + +def fix_gsac(g, notes): + a = g.attributes + + if g.type == "gene": + note = a["Name"] + elif g.type == "mRNA": + parent = a["Parent"][0] + note = notes[parent] + else: + return + + a["Name"] = a["ID"] + a["Note"] = note + g.update_attributes() + + +def gapsplit(args): + """ + %prog gapsplit gffile > split.gff + + Read in the gff (normally generated by GMAP) and print it out after splitting + each feature into one parent and multiple child features based on alignment + information encoded in CIGAR string. + """ + p = OptionParser(gapsplit.__doc__) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + + gff = Gff(gffile) + for g in gff: + if re.match("EST_match", g.type): + """ + hacky implementation: + since the standard urlparse.parse_qsl() replaces all "+" symbols with spaces + we will write a regex to check either for a "-" or a " " (space) + """ + match = re.search(r"\S+ (\d+) \d+ ([\s{1}\-])", g.attributes["Target"][0]) + if match.group(2) == "-": + strand = match.group(2) + else: + strand = "+" + g.attributes["Target"][0] = " ".join( + str(x) for x in [g.attributes["Target"][0].rstrip(), strand] + ) + + if g.strand == "?": + g.strand = strand + else: + match = re.match(r"\S+ (\d+) \d+", g.attributes["Target"][0]) + target_start = int(match.group(1)) + + re_cigar = re.compile(r"(\D+)(\d+)") + cigar = g.attributes["Gap"][0].split(" ") + g.attributes["Gap"] = None + + parts = [] + if g.strand == "+": + for event in cigar: + match = re_cigar.match(event) + op, count = match.group(1), int(match.group(2)) + if op in "IHS": + target_start += count + elif op in "DN": + g.start += count + elif op == "P": + continue + else: + parts.append( + [ + g.start, + g.start + count - 1, + target_start, + target_start + count - 1, + ] + ) + g.start += count + target_start += count + else: + for event in cigar: + match = re_cigar.match(event) + op, count = match.group(1), int(match.group(2)) + if op in "IHS": + target_start += count + elif op in "DN": + g.end -= count + elif op == "P": + continue + else: + parts.append( + [ + g.end - count + 1, + g.end, + target_start, + target_start + count - 1, + ] + ) + g.end -= count + target_start += count + + g.update_attributes() + print(g) + + parent = g.attributes["Name"][0] + g.type = "match_part" + g.attributes.clear() + + for part in parts: + g.start, g.end = part[0], part[1] + g.score, g.strand, g.phase = ".", g.strand, "." + + if re.match("EST", g.type): + target_list = [parent, part[2], part[3], g.strand] + else: + target_list = [parent, part[2], part[3]] + target = " ".join(str(x) for x in target_list) + + g.attributes["Parent"] = [parent] + g.attributes["Target"] = [target] + + g.update_attributes() + print(g) + + +def chain(args): + """ + %prog chain gffile > chained.gff + + Fill in parent features by chaining child features and return extent of the + child coordinates. + """ + valid_merge_op = ("sum", "min", "max", "mean", "collapse") + + p = OptionParser(chain.__doc__) + p.add_argument( + "--key", + dest="attrib_key", + default=None, + help="Attribute to use as `key` for chaining operation", + ) + p.add_argument( + "--chain_ftype", + default="cDNA_match", + help="GFF feature type to use for chaining operation", + ) + p.add_argument( + "--parent_ftype", + default=None, + help="GFF feature type to use for the chained coordinates", + ) + p.add_argument( + "--break", + dest="break_chain", + action="store_true", + help="Break long chains which are non-contiguous", + ) + p.add_argument( + "--transfer_attrib", + dest="attrib_list", + help="Attributes to transfer to parent feature; accepts comma" + + " separated list of attribute names", + ) + p.add_argument( + "--transfer_score", + dest="score_merge_op", + choices=valid_merge_op, + help="Transfer value stored in score field to parent feature." + + " Score is reported based on chosen operation", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + attrib_key = opts.attrib_key + attrib_list = opts.attrib_list + score_merge_op = opts.score_merge_op + break_chain = opts.break_chain + + chain_ftype = opts.chain_ftype + parent_ftype = opts.parent_ftype if opts.parent_ftype else chain_ftype + + gffdict = {} + fw = must_open(opts.outfile, "w") + gff = Gff(gffile) + if break_chain: + ctr, prev_gid = dict(), None + for g in gff: + if g.type != chain_ftype: + print(g, file=fw) + continue + + id = g.accn + gid = id + if attrib_key: + assert ( + attrib_key in g.attributes.keys() + ), "Attribute `{0}` not present in GFF3".format(attrib_key) + gid = g.get_attr(attrib_key) + curr_gid = gid + if break_chain: + if prev_gid != curr_gid: + if curr_gid not in ctr: + ctr[curr_gid] = 0 + else: + ctr[curr_gid] += 1 + gid = "{0}:{1}".format(gid, ctr[curr_gid]) + gkey = (g.seqid, gid) + if gkey not in gffdict: + gffdict[gkey] = { + "seqid": g.seqid, + "source": g.source, + "strand": g.strand, + "type": parent_ftype, + "coords": [], + "children": [], + "score": [], + "attrs": DefaultOrderedDict(set), + } + gffdict[gkey]["attrs"]["ID"].add(gid) + + if attrib_list: + for a in attrib_list.split(","): + if a in g.attributes: + [gffdict[gkey]["attrs"][a].add(x) for x in g.attributes[a]] + del g.attributes[a] + + if break_chain: + _attrib = "Alias" if attrib_list and ("Name" not in attrib_list) else "Name" + gffdict[gkey]["attrs"][_attrib].add(curr_gid) + + gffdict[gkey]["coords"].append((g.start, g.end)) + if score_merge_op: + if is_number(g.score): + gffdict[gkey]["score"].append(float(g.score)) + g.score = "." + + g.attributes["Parent"] = [gid] + g.attributes["ID"] = ["{0}-{1}".format(gid, len(gffdict[gkey]["children"]) + 1)] + g.type = valid_gff_parent_child[g.type] + g.update_attributes() + gffdict[gkey]["children"].append(g) + if break_chain: + prev_gid = curr_gid + + for gkey, v in sorted(gffdict.items()): + gseqid, key = gkey + seqid = v["seqid"] + source = v["source"] + type = v["type"] + strand = v["strand"] + start, stop = range_minmax(gffdict[gkey]["coords"]) + + score = "." + if score_merge_op: + v["score"].sort() + if score_merge_op == "sum": + score = sum(v["score"]) + elif score_merge_op == "min": + score = min(v["score"]) + elif score_merge_op == "max": + score = max(v["score"]) + elif score_merge_op == "mean": + score = sum(v["score"], 0.0) / len(v["score"]) + elif score_merge_op == "collapse": + score = ",".join((str(x) for x in v["score"])) + + g = GffLine( + "\t".join( + str(x) + for x in [seqid, source, type, start, stop, score, strand, ".", None] + ) + ) + g.attributes = v["attrs"] + g.update_attributes() + + print(g, file=fw) + + for child in gffdict[gkey]["children"]: + print(child, file=fw) + + fw.close() + + +def format(args): + """ + %prog format gffile > formatted.gff + + Read in the gff and print it out, changing seqid, etc. + """ + from jcvi.formats.obo import GODag_from_SO, validate_term + + valid_multiparent_ops = ["split", "merge"] + + p = OptionParser(format.__doc__) + + g1 = p.add_argument_group("Parameter(s) used to modify GFF attributes (9th column)") + g1.add_argument("--name", help="Add Name attribute from two-column file") + g1.add_argument("--note", help="Add Note attribute from two-column file") + g1.add_argument( + "--add_attribute", + dest="attrib_files", + help="Add new attribute(s) " + + "from two-column file(s); attribute name comes from filename; " + + "accepts comma-separated list of files", + ) + g1.add_argument( + "--add_dbxref", + dest="dbxref_files", + help="Add new Dbxref value(s) (DBTAG:ID) " + + "from two-column file(s). DBTAG comes from filename, ID comes from 2nd column; " + + "accepts comma-separated list of files", + ) + g1.add_argument( + "--nostrict", + default=False, + action="store_true", + help="Disable strict parsing of GFF file and/or mapping file", + ) + g1.add_argument( + "--remove_attr", + dest="remove_attrs", + help="Specify attributes to remove; " + + "accepts comma-separated list of attribute names", + ) + g1.add_argument( + "--copy_id_attr_to_name", + default=False, + action="store_true", + help="Copy `ID` attribute value to `Name`, when `Name` is not defined", + ) + g1.add_argument( + "--invent_name_attr", + default=False, + action="store_true", + help="Invent `Name` attribute for 2nd level child features; " + + "Formatted like PARENT:FEAT_TYPE:FEAT_INDEX", + ) + g1.add_argument( + "--no_keep_attr_order", + default=False, + action="store_true", + help="Do not maintain attribute order", + ) + + g2 = p.add_argument_group("Parameter(s) used to modify content within columns 1-8") + g2.add_argument( + "--seqid", + help="Switch seqid from two-column file. If not" + + " a file, value will globally replace GFF seqid", + ) + g2.add_argument( + "--source", + help="Switch GFF source from two-column file. If not" + + " a file, value will globally replace GFF source", + ) + g2.add_argument( + "--type", + help="Switch GFF feature type from two-column file. If not" + + " a file, value will globally replace GFF type", + ) + g2.add_argument( + "--fixphase", + default=False, + action="store_true", + help="Change phase 1<->2, 2<->1", + ) + + g3 = p.add_argument_group( + "Other parameter(s) to perform manipulations to the GFF file content" + ) + g3.add_argument( + "--unique", default=False, action="store_true", help="Make IDs unique" + ) + g3.add_argument( + "--chaindup", + default=None, + dest="duptype", + help="Chain duplicate features of a particular GFF3 `type`," + + " sharing the same ID attribute", + ) + g3.add_argument( + "--multiparents", + default=None, + choices=valid_multiparent_ops, + help="Split/merge identical features (same `seqid`, `source`, `type`, `coord-range`, `strand`, `phase`) mapping to multiple parents", + ) + g3.add_argument( + "--remove_feats", help="Comma separated list of features to remove by type" + ) + g3.add_argument( + "--remove_feats_by_ID", + help="List of features to remove by ID;" + + " accepts comma-separated list or list file", + ) + g3.add_argument( + "--gsac", + default=False, + action="store_true", + help="Fix GSAC GFF3 file attributes", + ) + g3.add_argument( + "--invent_protein_feat", + default=False, + action="store_true", + help="Invent a protein feature span (chain CDS feats)", + ) + g3.add_argument( + "--process_ftype", + default=None, + type=str, + help="Specify feature types to process; " + "accepts comma-separated list of feature types", + ) + g3.add_argument( + "--gff3", default=False, action="store_true", help="Print output in GFF3 format" + ) + g3.add_argument( + "--make_gff_store", + default=False, + action="store_true", + help="Store entire GFF file in memory during first iteration", + ) + + p.set_outfile() + p.set_SO_opts() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + mapfile = opts.seqid + names = opts.name + note = opts.note + source = opts.source + ftype = opts.type + attrib_files = opts.attrib_files.split(",") if opts.attrib_files else None + dbxref_files = opts.dbxref_files.split(",") if opts.dbxref_files else None + remove_attrs = opts.remove_attrs.split(",") if opts.remove_attrs else None + process_ftype = opts.process_ftype.split(",") if opts.process_ftype else None + gsac = opts.gsac + assert not ( + opts.unique and opts.duptype + ), "Cannot use `--unique` and `--chaindup` together" + assert not ( + opts.type and opts.duptype + ), "Cannot use `--type` and `--chaindup` together" + unique = opts.unique + duptype = opts.duptype + fixphase = opts.fixphase + phaseT = {"1": "2", "2": "1"} + remove_feats = opts.remove_feats.split(",") if opts.remove_feats else None + remove_feats_by_ID = None + if opts.remove_feats_by_ID: + remove_feats_by_ID = ( + LineFile(opts.remove_feats_by_ID, load=True).lines + if op.isfile(opts.remove_feats_by_ID) + else opts.remove_feats_by_ID.split(",") + ) + strict = False if opts.nostrict else True + make_gff_store = True if gffile in ("-", "stdin") else opts.make_gff_store + assert not ( + opts.copy_id_attr_to_name and opts.invent_name_attr + ), "Cannot use `--copy_id_attr_to_name` and `--invent_name_attr` together" + copy_id_attr_to_name = opts.copy_id_attr_to_name + invent_name_attr = opts.invent_name_attr + invent_protein_feat = opts.invent_protein_feat + compute_signature = False + + outfile = opts.outfile + + mapping = None + mod_attrs = set() + if mapfile and op.isfile(mapfile): + mapping = DictFile(mapfile, delimiter="\t", strict=strict) + mod_attrs.add("ID") + if note: + note = DictFile(note, delimiter="\t", strict=strict) + mod_attrs.add("Note") + if source and op.isfile(source): + source = DictFile(source, delimiter="\t", strict=strict) + if ftype and op.isfile(ftype): + ftype = DictFile(ftype, delimiter="\t", strict=strict) + if names: + names = DictFile(names, delimiter="\t", strict=strict) + mod_attrs.add("Name") + + if attrib_files: + attr_values = {} + for fn in attrib_files: + attr_name = op.basename(fn).rsplit(".", 1)[0] + if attr_name not in reserved_gff_attributes: + attr_name = attr_name.lower() + attr_values[attr_name] = DictFile(fn, delimiter="\t", strict=strict) + mod_attrs.add(attr_name) + if dbxref_files: + dbxref_values = {} + for fn in dbxref_files: + dbtag = op.basename(fn).rsplit(".", 1)[0] + dbxref_values[dbtag] = DictFile(fn, delimiter="\t", strict=strict) + mod_attrs.add("Dbxref") + + if remove_attrs: + mod_remove_attrs = [] + for remove_attr in remove_attrs: + if remove_attr in mod_attrs: + mod_remove_attrs.append(remove_attr) + + if mod_remove_attrs: + logger.error( + "Attributes `%s` cannot be removed and modified", + ",".join(mod_remove_attrs), + ) + sys.exit() + + if gsac: # setting gsac will force IDs to be unique + unique = True + notes = {} + + remove = set() + if ( + unique + or duptype + or remove_feats + or remove_feats_by_ID + or opts.multiparents == "merge" + or invent_name_attr + or make_gff_store + or invent_protein_feat + ): + if unique: + dupcounts = defaultdict(int) + seen = defaultdict(int) + newparentid = {} + elif duptype: + dupranges = AutoVivification() + skip = defaultdict(int) + if opts.multiparents == "merge": + merge_feats = AutoVivification() + if invent_name_attr: + ft = GffFeatureTracker() + elif copy_id_attr_to_name: + pass + if invent_protein_feat: + cds_track = {} + if opts.multiparents == "merge" or invent_name_attr: + make_gff_store = compute_signature = True + gff = Gff( + gffile, + keep_attr_order=(not opts.no_keep_attr_order), + make_gff_store=make_gff_store, + compute_signature=compute_signature, + strict=strict, + ) + for g in gff: + if process_ftype and g.type not in process_ftype: + continue + id = g.accn + if remove_feats and g.type in remove_feats: + remove.add(id) + if remove_feats_by_ID and id in remove_feats_by_ID: + remove.add(id) + if unique: + dupcounts[id] += 1 + elif duptype and g.type == duptype: + dupranges[g.seqid][id][g.idx] = (g.start, g.end) + if opts.multiparents == "merge" and g.type != "CDS": # don't merge CDS + pp = g.get_attr("Parent", first=False) + if pp and len(pp) > 0: + for parent in pp: + if parent not in remove: + sig = g.sign + if sig not in merge_feats: + merge_feats[sig]["parents"] = [] + if parent not in merge_feats[sig]["parents"]: + merge_feats[sig]["parents"].append(parent) + if invent_name_attr: + parent, iso = atg_name(g.get_attr("Parent"), retval="locus,iso") + if not parent: + parent = g.get_attr("Parent") + ft.track(parent, g) + if invent_protein_feat: + if g.type == "CDS": + cds_parent = g.get_attr("Parent") + if cds_parent not in cds_track: + cds_track[cds_parent] = [] + cds_track[cds_parent].append((g.start, g.end)) + + if opts.verifySO: + so, _ = GODag_from_SO() + valid_soterm = {} + + fw = must_open(outfile, "w") + if not make_gff_store: + gff = Gff(gffile, keep_attr_order=(not opts.no_keep_attr_order), strict=strict) + for g in gff: + if process_ftype and g.type not in process_ftype: + print(g, file=fw) + continue + + id = g.accn + + if opts.multiparents == "merge" and g.type != "CDS": # don't merge CDS + sig = g.sign + if len(merge_feats[sig]["parents"]) > 1: + if "candidate" not in merge_feats[sig]: + merge_feats[sig]["candidate"] = id + g.set_attr("Parent", merge_feats[sig]["parents"]) + else: + continue + + if remove_feats or remove_feats_by_ID: + if id in remove: + continue + else: + if "Parent" in g.attributes: + keep, parent = [], g.get_attr("Parent", first=False) + for i, pid in enumerate(parent): + if pid not in remove: + keep.append(parent[i]) + else: + remove.add(id) + if len(keep) == 0: + continue + parent = g.set_attr("Parent", keep) + + if remove_attrs: + for remove_attr in remove_attrs: + if remove_attr in g.attributes: + g.set_attr(remove_attr, None) + + if opts.verifySO: + if g.type not in valid_soterm: + valid_soterm[g.type] = validate_term( + g.type, so=so, method=opts.verifySO + ) + ntype = valid_soterm[g.type] + if ntype and g.type != ntype: + g.type = ntype + + origid = g.seqid + if fixphase: + phase = g.phase + g.phase = phaseT.get(phase, phase) + + if mapfile: + if isinstance(mapping, dict): + if origid in mapping: + g.seqid = mapping[origid] + else: + logger.error("%s not found in `%s`. ID unchanged.", origid, mapfile) + else: + g.seqid = mapfile + + if source: + if isinstance(source, dict) and g.source in source: + g.source = source[g.source] + else: + g.source = source + + if names: + if id in names: + g.set_attr("Name", names[id]) + + if note: + name = g.get_attr("Name") + tag = None + if id in note: + tag = note[id] + elif name and name in note: + tag = note[name] + + if tag: + g.set_attr("Note", tag, update=False) + + if attrib_files: + for attr_name in attr_values: + name = g.get_attr("Name") + if id in attr_values[attr_name]: + g.set_attr(attr_name, attr_values[attr_name][id]) + elif name and name in attr_values[attr_name]: + g.set_attr(attr_name, attr_values[attr_name][name]) + + if dbxref_files: + for dbtag in dbxref_values: + if id in dbxref_values[dbtag]: + g.set_attr( + "Dbxref", dbxref_values[dbtag][id], dbtag=dbtag, append=True + ) + + if unique: + if dupcounts[id] > 1: + seen[id] += 1 + old_id = id + id = "{0}-{1}".format(old_id, seen[old_id]) + newparentid[old_id] = id + g.set_attr("ID", id) + + if "Parent" in g.attributes: + parent = g.attributes["Parent"][0] + if dupcounts[parent] > 1: + g.set_attr("Parent", newparentid[parent]) + + if duptype: + if duptype == g.type and len(dupranges[g.seqid][id]) > 1: + p = sorted(dupranges[g.seqid][id]) + s, e = dupranges[g.seqid][id][p[0]][ + 0:2 + ] # get coords of first encountered feature + if g.start == s and g.end == e and p[0] == g.idx: + r = [dupranges[g.seqid][id][x] for x in dupranges[g.seqid][id]] + g.start, g.end = range_minmax(r) + else: + skip[(g.seqid, g.idx, id, g.start, g.end)] = 1 + + if gsac and g.type == "gene": + notes[id] = g.attributes["Name"] + + if ftype: + if isinstance(ftype, dict) and g.type in ftype: + g.type = ftype[g.type] + else: + g.type = ftype + + if invent_name_attr: + ft.store_symbol(g) + if re.search(ft.ftype, g.type): + parent, iso = atg_name(g.get_attr("Parent"), retval="locus,iso") + if not parent: + parent = g.get_attr("Parent") + if parent in ft.tracker: + fidx = ft.feat_index( + parent, g.type, g.strand, (g.start, g.end, g.sign) + ) + symbol = ft.get_symbol(parent) + attr = "ID" if symbol == parent else "Name" + g.set_attr(attr, "{0}:{1}:{2}".format(symbol, g.type, fidx + 1)) + if opts.multiparents == "merge" and attr == "Name": + g.set_attr("ID", "{0}:{1}:{2}".format(parent, g.type, fidx + 1)) + elif copy_id_attr_to_name: + if "Name" not in g.attributes.keys(): + g.set_attr("Name", g.get_attr("ID")) + + protein_feat = None + if invent_protein_feat: + if g.type == "mRNA": + if id in cds_track: + pstart, pstop = range_minmax(cds_track[id]) + protein_feat = GffLine( + "\t".join( + str(x) + for x in [ + g.seqid, + g.source, + "protein", + pstart, + pstop, + ".", + g.strand, + ".", + "ID={0}-Protein;Name={0};Derives_from={0}".format(id), + ] + ) + ) + elif g.type == "CDS": + parent = g.get_attr("Parent") + if parent in cds_track: + _parent = [parent, "{0}-Protein".format(parent)] + g.set_attr("Parent", _parent) + + pp = g.get_attr("Parent", first=False) + if ( + opts.multiparents == "split" and (pp and len(pp) > 1) and g.type != "CDS" + ): # separate features with multiple parents + id = g.get_attr("ID") + for i, parent in enumerate(pp): + if id: + g.set_attr("ID", "{0}-{1}".format(id, i + 1)) + g.set_attr("Parent", parent, update=True, urlquote=True) + if gsac: + fix_gsac(g, notes) + print(g, file=fw) + else: + if g.gff3 and not opts.gff3: + opts.gff3 = True + g.update_attributes(gff3=opts.gff3) + if gsac: + fix_gsac(g, notes) + if duptype == g.type and skip[(g.seqid, g.idx, id, g.start, g.end)] == 1: + continue + print(g, file=fw) + if g.type == "mRNA" and invent_protein_feat: + print(protein_feat, file=fw) + + fw.close() + + +def fixboundaries(args): + """ + %prog fixboundaries gffile --type="gene" --child_ftype="mRNA" > gffile.fixed + + Adjust the boundary coordinates of parents features based on + range chained child features, extracting their min and max values + """ + p = OptionParser(fixboundaries.__doc__) + p.add_argument( + "--type", + default="gene", + type=str, + help="Feature type for which to adjust boundaries", + ) + p.add_argument( + "--child_ftype", + default="mRNA", + type=str, + help="Child featuretype(s) to use for identifying boundaries", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + gffdb = make_index(gffile) + + fw = must_open(opts.outfile, "w") + for f in gffdb.all_features(order_by=("seqid", "start")): + if f.featuretype == opts.type: + child_coords = [] + for cftype in opts.child_ftype.split(","): + for c in gffdb.children(f, featuretype=cftype, order_by="start"): + child_coords.append((c.start, c.stop)) + f.start, f.stop = range_minmax(child_coords) + + print(f, file=fw) + + fw.close() + + +def liftover(args): + """ + %prog liftover gffile > liftover.gff + + Adjust gff coordinates based on tile number. For example, + "gannotation.asmbl.000095.7" is the 8-th tile on asmbl.000095. + """ + p = OptionParser(liftover.__doc__) + p.add_argument("--tilesize", default=50000, type=int, help="The size for each tile") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + gff = Gff(gffile) + for g in gff: + seqid = g.seqid + seqid, tilenum = seqid.rsplit(".", 1) + tilenum = int(tilenum) + g.seqid = seqid + offset = tilenum * opts.tilesize + g.start += offset + g.end += offset + print(g) + + +def get_piles(allgenes): + """ + Before running uniq, we need to compute all the piles. The piles are a set + of redundant features we want to get rid of. Input are a list of GffLines + features. Output are list of list of features distinct "piles". + """ + from jcvi.utils.range import Range, range_piles + + ranges = [Range(a.seqid, a.start, a.end, 0, i) for i, a in enumerate(allgenes)] + + for pile in range_piles(ranges): + yield [allgenes[x] for x in pile] + + +def match_span(f1, f2): + return (f1.start == f2.start) and (f1.stop == f2.stop) + + +def match_ftype(f1, f2): + return f1.featuretype == f2.featuretype + + +def match_nchildren(f1c, f2c): + return len(f1c) == len(f2c) + + +def match_child_ftype(f1c, f2c): + from collections import Counter + + return len( + set(Counter(i.featuretype for i in f1c).keys()) + ^ set(Counter(i.featuretype for i in f2c).keys()) + ) + + +def match_Nth_child(f1c, f2c, N=1, slop=False): + i = N - 1 + f1, f2 = f1c[i], f2c[i] + + if slop: + if 1 == len(f1c): + if f1.featuretype.endswith("UTR"): + if f1.strand == "+": + Npos = "F" if f1.featuretype.startswith("five_prime") else "L" + elif f1.strand == "-": + Npos = "L" if f1.featuretype.startswith("five_prime") else "F" + elif f1.featuretype == "exon": + return not match_span(f1, f2) + elif N == 1: + Npos = "F" + elif N == len(f1c): + Npos = "L" + + if Npos == "F": + return f1.stop == f2.stop + elif Npos == "L": + return f1.start == f2.start + + return match_span(f1, f2) + + +def match_subfeats(f1, f2, dbx1, dbx2, featuretype=None, slop=False): + """ + Given 2 gffutils features located in 2 separate gffutils databases, + iterate through all subfeatures of a certain type and check whether + they are identical or not + + The `slop` parameter allows for variation in the terminal UTR region + """ + f1c, f2c = ( + list(dbx1.children(f1, featuretype=featuretype, order_by="start")), + list(dbx2.children(f2, featuretype=featuretype, order_by="start")), + ) + + lf1c, lf2c = len(f1c), len(f2c) + if match_nchildren(f1c, f2c): + if lf1c > 0 and lf2c > 0: + exclN = set() + if featuretype.endswith("UTR") or featuretype == "exon": + N = [] + if featuretype.startswith("five_prime"): + N = [1] if f1.strand == "+" else [lf1c] + elif featuretype.startswith("three_prime"): + N = [lf1c] if f1.strand == "+" else [1] + else: # infer UTR from exon collection + N = [1] if 1 == lf1c else [1, lf1c] + + for n in N: + if match_Nth_child(f1c, f2c, N=n, slop=slop): + exclN.add(n - 1) + else: + return False + + for i, (cf1, cf2) in enumerate(zip(f1c, f2c)): + if i in exclN: + continue + if not match_span(cf1, cf2): + return False + else: + if (lf1c, lf2c) in [(0, 1), (1, 0)] and slop and featuretype.endswith("UTR"): + return True + + return False + + return True + + +def import_feats(gffile, type="gene"): + gff = Gff(gffile) + allgenes = [] + for g in gff: + if g.type != type: + continue + allgenes.append(g) + + logger.debug("A total of %d %s features imported.", len(allgenes), type) + allgenes.sort(key=lambda x: (x.seqid, x.start)) + return allgenes + + +def uniq(args): + """ + %prog uniq gffile > uniq.gff + + Remove redundant gene models. For overlapping gene models, take the longest + gene. A second scan takes only the genes selected. + + --mode controls whether you want larger feature, or higher scoring feature. + --best controls how many redundant features to keep, e.g. 10 for est2genome. + """ + supported_modes = ("span", "score") + p = OptionParser(uniq.__doc__) + p.add_argument("--type", default="gene", help="Types of features to non-redundify") + p.add_argument("--mode", default="span", choices=supported_modes, help="Pile mode") + p.add_argument("--best", default=1, type=int, help="Use best N features") + p.add_argument( + "--name", + default=False, + action="store_true", + help="Non-redundify Name attribute", + ) + p.add_argument( + "--iter", + default="2", + choices=("1", "2"), + help="Number of iterations to grab children", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + mode = opts.mode + bestn = opts.best + + allgenes = import_feats(gffile, opts.type) + g = get_piles(allgenes) + + bestids = set() + for group in g: + if mode == "span": + sort_key = lambda x: -x.span + else: + sort_key = lambda x: -float(x.score) + + group.sort(key=sort_key) + seen = set() + for x in group: + if len(seen) >= bestn: + break + + name = x.attributes["Name"][0] if opts.name else x.accn + if name in seen: + continue + + seen.add(name) + bestids.add(x.accn) + + populate_children(opts.outfile, bestids, gffile, iter=opts.iter) + + +def populate_children(outfile, ids, gffile, iter="2", types=None): + ids = set(ids) + fw = must_open(outfile, "w") + logger.debug("A total of %d features selected.", len(ids)) + logger.debug("Populate children. Iteration 1..") + gff = Gff(gffile) + children = set() + for g in gff: + if types and g.type in types: + ids.add(g.accn) + if "Parent" not in g.attributes: + continue + for parent in g.attributes["Parent"]: + if parent in ids: + children.add(g.accn) + + if iter == "2": + logger.debug("Populate grand children. Iteration 2..") + gff = Gff(gffile) + for g in gff: + if "Parent" not in g.attributes: + continue + for parent in g.attributes["Parent"]: + if parent in children: + children.add(g.accn) + + logger.debug("Populate parents..") + gff = Gff(gffile) + parents = set() + for g in gff: + if g.accn not in ids: + continue + if "Parent" not in g.attributes: + continue + for parent in g.attributes["Parent"]: + parents.add(parent) + + combined = ids | children | parents + logger.debug("Original: %d", len(ids)) + logger.debug("Children: %d", len(children)) + logger.debug("Parents: %d", len(parents)) + logger.debug("Combined: %d", len(combined)) + + logger.debug("Filter gff file..") + gff = Gff(gffile) + seen = set() + for g in gff: + accn = g.accn + if accn in seen: + continue + if accn in combined: + seen.add(accn) + print(g, file=fw) + fw.close() + + +def sort(args): + """ + %prog sort gffile + + Sort gff file using plain old unix sort based on [chromosome, start coordinate]. + or topologically based on hierarchy of features using the gt (genometools) toolkit + """ + valid_sort_methods = ("unix", "topo") + + p = OptionParser(sort.__doc__) + p.add_argument( + "--method", + default="unix", + choices=valid_sort_methods, + help="Specify sort method", + ) + p.add_argument( + "-i", + dest="inplace", + default=False, + action="store_true", + help="If doing a unix sort, perform sort inplace", + ) + p.set_tmpdir() + p.set_outfile() + p.set_home("gt") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + sortedgff = opts.outfile + if opts.inplace: + if opts.method == "topo" or ( + opts.method == "unix" and gffile in ("-", "stdin") + ): + logger.error( + "Cannot perform inplace sort when method is `topo`" + + " or method is `unix` and input is `stdin` stream" + ) + sys.exit() + + if opts.method == "unix": + cmd = "sort" + cmd += " -k1,1 -k4,4n {0}".format(gffile) + if opts.tmpdir: + cmd += " -T {0}".format(opts.tmpdir) + if opts.inplace: + cmd += " -o {0}".gffile + sortedgff = None + sh(cmd, outfile=sortedgff) + elif opts.method == "topo": + GT_HOME = opts.gt_home + if not op.isdir(GT_HOME): + logger.error("GT_HOME=%s directory does not exist", GT_HOME) + sys.exit() + cmd = "{0}".format(op.join(GT_HOME, "bin", "gt")) + cmd += " gff3 -sort -tidy -retainids -addids no {0}".format(gffile) + sh(cmd, outfile=sortedgff) + + +def fromgtf(args): + """ + %prog fromgtf gtffile + + Convert gtf to gff file. In gtf, the "transcript_id" will convert to "ID=", + the "transcript_id" in exon/CDS feature will be converted to "Parent=". + """ + p = OptionParser(fromgtf.__doc__) + p.add_argument( + "--transcript_id", default="transcript_id", help="Field name for transcript" + ) + p.add_argument("--gene_id", default="gene_id", help="Field name for gene") + p.add_argument( + "--augustus", default=False, action="store_true", help="Input is AUGUSTUS gtf" + ) + p.set_home("augustus") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gtffile,) = args + outfile = opts.outfile + if opts.augustus: + ahome = opts.augustus_home + s = op.join(ahome, "scripts/gtf2gff.pl") + cmd = "{0} --gff3 < {1} --out={2}".format(s, gtffile, outfile) + sh(cmd) + return + + gff = Gff(gtffile) + fw = must_open(outfile, "w") + transcript_id = opts.transcript_id + gene_id = opts.gene_id + nfeats = 0 + for g in gff: + if g.type in ("transcript", "mRNA"): + g.type = "mRNA" + g.update_tag(transcript_id, "ID") + g.update_tag("mRNA", "ID") + g.update_tag(gene_id, "Parent") + g.update_tag("Gene", "Parent") + elif g.type in ("exon", "CDS") or "UTR" in g.type: + g.update_tag("transcript_id", "Parent") + g.update_tag(g.type, "Parent") + elif g.type == "gene": + g.update_tag(gene_id, "ID") + g.update_tag("Gene", "ID") + else: + assert 0, "Don't know how to deal with {0}".format(g.type) + + g.update_attributes() + print(g, file=fw) + nfeats += 1 + + logger.debug("A total of %d features written.", nfeats) + + +def frombed(args): + """ + %prog frombed bed_file [--options] > gff_file + + Convert bed to gff file. In bed, the accn will convert to key='ID' + Default type will be `match` and default source will be `source` + """ + p = OptionParser(frombed.__doc__) + p.add_argument("--type", default="match", help="GFF feature type") + p.add_argument("--source", default="default", help="GFF source qualifier") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + bed = Bed(bedfile) + + for b in bed: + print(b.gffline(type=opts.type, source=opts.source)) + + +def fromsoap(args): + """ + %prog fromsoap soapfile > gff_file + + """ + p = OptionParser(fromsoap.__doc__) + p.add_argument("--type", default="nucleotide_match", help="GFF feature type") + p.add_argument("--source", default="soap", help="GFF source qualifier") + p.set_fixchrnames(orgn="maize") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (soapfile,) = args + pad0 = len(str(sum(1 for line in open(soapfile)))) + + fw = must_open(opts.outfile, "w") + fp = must_open(soapfile) + for idx, line in enumerate(fp): + if opts.fix_chr_name: + from jcvi.utils.cbook import fixChromName + + line = fixChromName(line, orgn=opts.fix_chr_name) + + atoms = line.strip().split("\t") + attributes = "ID=match{0};Name={1}".format(str(idx).zfill(pad0), atoms[0]) + start, end = int(atoms[8]), int(atoms[5]) + int(atoms[8]) - 1 + seqid = atoms[7] + + print( + "\t".join( + str(x) + for x in ( + seqid, + opts.source, + opts.type, + start, + end, + ".", + atoms[6], + ".", + attributes, + ) + ), + file=fw, + ) + + +def gtf(args): + """ + %prog gtf gffile + + Convert gff to gtf file. In gtf, only exon/CDS features are important. The + first 8 columns are the same as gff, but in the attributes field, we need to + specify "gene_id" and "transcript_id". + """ + p = OptionParser(gtf.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + gff = Gff(gffile) + transcript_info = AutoVivification() + for g in gff: + if g.type.endswith(("RNA", "transcript")): + if "ID" in g.attributes and "Parent" in g.attributes: + transcript_id = g.get_attr("ID") + gene_id = g.get_attr("Parent") + elif "mRNA" in g.attributes and "Gene" in g.attributes: + transcript_id = g.get_attr("mRNA") + gene_id = g.get_attr("Gene") + else: + transcript_id = g.get_attr("ID") + gene_id = transcript_id + transcript_info[transcript_id]["gene_id"] = gene_id + transcript_info[transcript_id]["gene_type"] = g.type + continue + + if g.type not in valid_gff_to_gtf_type.keys(): + continue + + try: + transcript_id = g.get_attr("Parent", first=False) + except IndexError: + transcript_id = g.get_attr("mRNA", first=False) + + g.type = valid_gff_to_gtf_type[g.type] + for tid in transcript_id: + if tid not in transcript_info: + continue + gene_type = transcript_info[tid]["gene_type"] + if not gene_type.endswith("RNA") and not gene_type.endswith("transcript"): + continue + gene_id = transcript_info[tid]["gene_id"] + g.attributes = OrderedDict( + [("gene_id", [gene_id]), ("transcript_id", [tid])] + ) + g.update_attributes(gtf=True, urlquote=False) + + print(g) + + +def merge(args): + """ + %prog merge gffiles + + Merge several gff files into one. When only one file is given, it is assumed + to be a file with a list of gff files. + """ + p = OptionParser(merge.__doc__) + p.add_argument( + "--seq", + default=False, + action="store_true", + help="Print FASTA sequences at the end", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + nargs = len(args) + if nargs < 1: + sys.exit(not p.print_help()) + + if nargs == 1: + (listfile,) = args + fp = open(listfile) + gffiles = [x.strip() for x in fp] + else: + gffiles = args + + outfile = opts.outfile + + deflines = set() + fw = must_open(outfile, "w") + fastarecs = {} + for gffile in natsorted(gffiles, key=lambda x: op.basename(x)): + logger.debug(gffile) + fp = open(gffile) + for row in fp: + row = row.rstrip() + if not row or row[0] == "#": + if row == FastaTag: + break + if row in deflines: + continue + else: + deflines.add(row) + + print(row, file=fw) + + if not opts.seq: + continue + + f = Fasta(gffile, lazy=True) + for key, rec in f.iteritems_ordered(): + if key in fastarecs: + continue + fastarecs[key] = rec + + if opts.seq: + print(FastaTag, file=fw) + SeqIO.write(fastarecs.values(), fw, "fasta") + + fw.close() + + +def extract(args): + """ + %prog extract gffile + + --contigs: Extract particular contig(s) from the gff file. If multiple contigs are + involved, use "," to separate, e.g. "contig_12,contig_150"; or provide a file + with multiple contig IDs, one per line + --names: Process particular ID(s) from the gff file. If multiple IDs are + involved, use "," to separate; or provide a file with multiple IDs, one per line + """ + p = OptionParser(extract.__doc__) + p.add_argument("--contigs", help="Extract features from certain contigs") + p.add_argument("--names", help="Extract features with certain names") + p.add_argument( + "--types", + type=str, + default=None, + help="Extract features of certain feature types", + ) + p.add_argument( + "--children", + default=0, + choices=["1", "2"], + help="Specify number of iterations: `1` grabs children, " + + "`2` grabs grand-children", + ) + p.add_argument("--tag", default="ID", help="Scan the tags for the names") + p.add_argument( + "--fasta", default=False, action="store_true", help="Write FASTA if available" + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + contigfile = opts.contigs + namesfile = opts.names + typesfile = opts.types + nametag = opts.tag + + contigID = parse_multi_values(contigfile) + names = parse_multi_values(namesfile) + types = parse_multi_values(typesfile) + outfile = opts.outfile + + if opts.children: + assert types is not None or names is not None, "Must set --names or --types" + if names is None: + names = list() + populate_children(outfile, names, gffile, iter=opts.children, types=types) + return + + fp = must_open(gffile) + fw = must_open(opts.outfile, "w") + for row in fp: + atoms = row.split() + if len(atoms) == 0: + continue + tag = atoms[0] + if row[0] == "#": + if row.strip() == "###": + continue + if not (tag == RegionTag and contigID and atoms[1] not in contigID): + print(row.rstrip(), file=fw) + if tag == FastaTag: + break + continue + + b = GffLine(row) + attrib = b.attributes + if contigID and tag not in contigID: + continue + if types and b.type in types: + _id = b.accn + if _id not in names: + names.append(_id) + if names is not None: + if nametag not in attrib: + continue + if attrib[nametag][0] not in names: + continue + + print(row.rstrip(), file=fw) + + if not opts.fasta: + return + + f = Fasta(gffile) + for s in contigID: + if s in f: + SeqIO.write([f[s]], fw, "fasta") + + +def split(args): + """ + %prog split gffile outdir + + Split the gff into one contig per file. Will also take sequences if the file + contains FASTA sequences. + """ + p = OptionParser(split.__doc__) + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + gffile, outdir = args + mkdir(outdir) + + g = Gff(gffile) + seqids = g.seqids + + for s in seqids: + outfile = op.join(outdir, s + ".gff") + extract([gffile, "--contigs=" + s, "--outfile=" + outfile]) + + +def note(args): + """ + %prog note gffile > tabfile + + Extract certain attribute field for each feature. + """ + p = OptionParser(note.__doc__) + p.add_argument( + "--type", + default=None, + help="Only process certain types, multiple types allowed with comma", + ) + p.add_argument( + "--attribute", + default="Parent,Note", + help="Attribute field to extract, multiple fields allowd with comma", + ) + p.add_argument("--AED", type=float, help="Only extract lines with AED score <=") + p.add_argument( + "--exoncount", + default=False, + action="store_true", + help="Get the exon count for each mRNA feat", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + type = opts.type + if type: + type = type.split(",") + + exoncounts = {} + if opts.exoncount: + g = make_index(gffile) + for feat in g.features_of_type("mRNA"): + nexons = 0 + for c in g.children(feat.id, 1): + if c.featuretype != "exon": + continue + nexons += 1 + exoncounts[feat.id] = nexons + + attrib = opts.attribute.split(",") + + gff = Gff(gffile) + seen = set() + AED = opts.AED + for g in gff: + if type and g.type not in type: + continue + if AED is not None and float(g.attributes["_AED"][0]) > AED: + continue + keyval = [g.accn] + [ + ",".join(g.attributes.get(x, ["nan"])) for x in attrib + ] + if exoncounts: + nexons = exoncounts.get(g.accn, 0) + keyval.append(str(nexons)) + keyval = tuple(keyval) + if keyval not in seen: + print("\t".join(keyval)) + seen.add(keyval) + + +def splicecov(args): + """ + %prog splicecov annotation.gff3 junctions.bed + + Given an annotation GFF file (containing introns) and a + TopHat junctions.bed file (preprocessed using formats.bed.juncs(), + each intron gets tagged with the JUNC identifier and read coverage. + + Output is a summary table listing for each gene locus, the isoform number, + number of splice junctions and {average, median, min & max} read coverage + across the junctions. + """ + from tempfile import mkstemp + from pybedtools import BedTool + from jcvi.utils.cbook import SummaryStats + + p = OptionParser(splicecov.__doc__) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + ( + gfffile, + juncsbed, + ) = args + tagged = "{0}.{1}.gff3".format(gfffile.rsplit(".", 1)[0], "tag_introns") + + gff3, junc = BedTool(gfffile), BedTool(juncsbed) + ab = gff3.intersect(junc, wao=True, f=1.0, s=True) + abfh = must_open(ab.fn) + + seen = set() + scov = AutoVivification() + + fh, tmpgff = mkstemp(suffix=".gff3") + fw = must_open(tmpgff, "w") + for line in abfh: + args = line.strip().split("\t") + g = GffLine("\t".join(str(x) for x in args[:9])) + if g.type == "intron" and args[10] != -1: + ispan, jspan = g.span, int(args[11]) - int(args[10]) + if ispan == jspan: + g.set_attr("ID", args[12], update=True) + g.score = int(args[13]) + + pparts = g.get_attr("Parent").split(".") + locus, iso = pparts[0], ".".join(pparts[1:]) + seen.add(iso) + if not scov[locus][iso]: + scov[locus][iso] = [] + scov[locus][iso].append(g.score) + else: + continue + print(g, file=fw) + fw.close() + + format([tmpgff, "--unique", "-o", tagged]) + os.unlink(tmpgff) + + isos = sorted(list(seen)) + fw = must_open(opts.outfile, "w") + h1, h2, stats = ["#"], ["#locus"], ["N", "mean", "median", "min", "max"] + for iso in isos: + h1.extend([str(iso)] + [""] * (len(stats) - 1)) + h2.extend(stats) + print("\t".join(str(x) for x in h1), file=fw) + print("\t".join(str(x) for x in h2), file=fw) + for locus in scov.keys(): + out = [locus] + for iso in isos: + if iso in scov[locus].keys(): + juncs = scov[locus][iso] + jstats = SummaryStats(juncs, dtype=int) + out.extend( + [jstats.size, jstats.mean, jstats.median, jstats.min, jstats.max] + ) + else: + out.extend(["-"] * len(stats)) + print("\t".join(str(x) for x in out), file=fw) + fw.close() + + +def bed(args): + """ + %prog bed gff_file [--options] + + Parses the start, stop locations of the selected features out of GFF and + generate a bed file + """ + p = OptionParser(bed.__doc__) + p.add_argument( + "--type", + dest="type", + default="gene", + help="Feature type to extract, use comma for multiple, and `all` for all", + ) + p.add_argument("--key", default="ID", help="Key in the attributes to extract") + p.add_argument("--accn", help="Use fixed accn in the 4th column") + p.add_argument("--source", help="Source to extract from, use comma for multiple") + p.add_argument( + "--span", + default=False, + action="store_true", + help="Use feature span in the score column", + ) + p.add_argument( + "--score_attrib", + dest="score_attrib", + default=False, + help="Attribute whose value is to be used as score in `bedline`", + ) + p.add_argument( + "--append_source", + default=False, + action="store_true", + help="Append GFF source name to extracted key value", + ) + p.add_argument( + "--append_ftype", + default=False, + action="store_true", + help="Append GFF feature type to extracted key value", + ) + p.add_argument( + "--append_attrib", + default=None, + help="Append attribute to extracted key value", + ) + p.add_argument( + "--nosort", + default=False, + action="store_true", + help="Do not sort the output bed file", + ) + p.add_argument( + "--primary_only", + default=False, + action="store_true", + help="Only retains a single transcript per gene", + ) + p.add_argument( + "--parent_key", + default="Parent", + help="Parent gene key to group with --primary_only", + ) + p.add_argument( + "--human_chr", + default=False, + action="store_true", + help="Only allow 1-22XY, and add `chr` prefix to seqid", + ) + p.add_argument( + "--ensembl_cds", + default=False, + action="store_true", + help="Use transcript_name.exon_number as accn", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + key = opts.key or None + accn = opts.accn + span = opts.span + primary_only = opts.primary_only + parent_key = opts.parent_key + human_chr = opts.human_chr + ensembl_cds = opts.ensembl_cds + if opts.type and opts.type != "all": + type = set(x.strip() for x in opts.type.split(",")) + else: + type = set() + if opts.source: + source = set(x.strip() for x in opts.source.split(",")) + else: + source = set() + if ensembl_cds: + type = {"CDS"} + + gff = Gff( + gffile, + key=key, + parent_key=parent_key, + append_source=opts.append_source, + append_ftype=opts.append_ftype, + append_attrib=opts.append_attrib, + score_attrib=opts.score_attrib, + ) + b = Bed() + seen_parents = set() # used with --primary_only + seen = set() # used with --ensembl_cds + skipped_identical_range = 0 + skipped_non_primary = 0 + + for g in gff: + if type and g.type not in type: + continue + if source and g.source not in source: + continue + if primary_only: + if g.parent in seen_parents: + skipped_non_primary += 1 + continue + elif g.parent: + seen_parents.add(g.parent) + + bl = g.bedline + if accn: + bl.accn = accn + if span: + bl.score = bl.span + if human_chr: + if bl.seqid not in VALID_HUMAN_CHROMOSMES: + continue + bl.seqid = "chr" + bl.seqid + if ensembl_cds: + if g.get_attr("gene_biotype") != "protein_coding": + continue + bl.accn = "{0}.{1}".format( + g.get_attr("transcript_name"), g.get_attr("exon_number") + ) + position = (bl.seqid, bl.start, bl.end) + if position in seen: + skipped_identical_range += 1 + continue + seen.add(position) + + b.append(bl) + + sorted = not opts.nosort + b.print_to_file(opts.outfile, sorted=sorted) + logger.debug( + "Extracted %d features (type=%s id=%s parent=%s)", + len(b), + ",".join(type), + key, + parent_key, + ) + if primary_only: + logger.debug("Skipped non-primary: %d", skipped_non_primary) + if ensembl_cds: + logger.debug("Skipped due to identical range: %d", skipped_identical_range) + + +def make_index(gff_file): + """ + Make a sqlite database for fast retrieval of features. + """ + import gffutils + + db_file = gff_file + ".db" + + if need_update(gff_file, db_file): + cleanup(db_file) + logger.debug("Indexing `%s`", gff_file) + gffutils.create_db(gff_file, db_file, merge_strategy="create_unique") + else: + logger.debug("Load index `%s`", gff_file) + + return gffutils.FeatureDB(db_file) + + +def get_parents(gff_file, parents): + gff = Gff(gff_file) + for g in gff: + if g.type not in parents: + continue + yield g + + +def children(args): + """ + %prog children gff_file + + Get the children that have the same parent. + """ + p = OptionParser(children.__doc__) + p.add_argument( + "--parents", + default="gene", + help="list of features to extract, use comma to separate (e.g. 'gene,mRNA')", + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gff_file,) = args + g = make_index(gff_file) + parents = set(opts.parents.split(",")) + + for feat in get_parents(gff_file, parents): + cc = [c.id for c in g.children(feat.id, 1)] + if len(cc) <= 1: + continue + + print("\t".join(str(x) for x in (feat.id, feat.start, feat.stop, "|".join(cc)))) + + +def load(args): + """ + %prog load gff_file fasta_file [--options] + + Parses the selected features out of GFF, with subfeatures concatenated. + For example, to get the CDS sequences, do this: + $ %prog load athaliana.gff athaliana.fa --parents mRNA --children CDS + + To get 500bp upstream of a genes Transcription Start Site (TSS), do this: + $ %prog load athaliana.gff athaliana.fa --feature=upstream:TSS:500 + + Switch TSS with TrSS for Translation Start Site. + + To get 500bp downstream of a gene's Transcription End Site (TES), do this: + $ %prog load athaliana.gff athaliana.fa --feature=downstream:TES:500 + + To get up- or downstream sequences of a certain max length not overlapping + with the next feature, use `--avoidFeatures`. Features may be avoided on both + strands or on the strand containing each feature, use either "both_strands" or + "strand_specific" + $ %prog load athaliana.gff athaliana.fa --feature=downstream:TES:500 --avoidFeatures=both_strands + """ + from datetime import datetime as dt + from jcvi.formats.fasta import Seq, SeqRecord + + # can request output fasta sequence id to be picked from following attributes + valid_id_attributes = ["ID", "Name", "Parent", "Alias", "Target", "orig_protein_id"] + + valid_avoid_features = ["both_strands", "strand_specific"] + + p = OptionParser(load.__doc__) + p.add_argument( + "--parents", + dest="parents", + default="mRNA", + help="list of features to extract, use comma to separate (e.g." + + "'gene,mRNA')", + ) + p.add_argument( + "--children", + dest="children", + default="CDS", + help="list of features to extract, use comma to separate (e.g." + + "'five_prime_UTR,CDS,three_prime_UTR')", + ) + p.add_argument( + "--feature", + dest="feature", + help="feature type to extract (e.g. `--feature=CDS`). Extract " + + "up- or downstream using " + + "upstream|downstream:TSS|TrSS|TES|TrES:length " + + "(e.g. `--feature=upstream:TSS:500`)", + ) + p.add_argument( + "--avoidFeatures", + default=None, + choices=["both_strands", "strand_specific"], + help="Specify whether or not to avoid up or downstream features", + ) + p.add_argument( + "--id_attribute", + choices=valid_id_attributes, + help="The attribute field to extract and use as FASTA sequence ID", + ) + p.add_argument( + "--desc_attribute", + default="Note", + help="The attribute field to extract and use as FASTA sequence description", + ) + p.add_argument( + "--full_header", + default=None, + choices=["default", "tair"], + help="Specify if full FASTA header (with seqid, coordinates and datestamp) should be generated", + ) + + g1 = p.add_argument_group("Optional parameters (if generating full header)") + g1.add_argument( + "--sep", + dest="sep", + default=" ", + help="Specify separator used to delimiter header elements", + ) + g1.add_argument( + "--datestamp", + dest="datestamp", + help="Specify a datestamp in the format YYYYMMDD or automatically pick `today`", + ) + g1.add_argument( + "--conf_class", + dest="conf_class", + default=False, + action="store_true", + help="Specify if `conf_class` attribute should be parsed and placed in the header", + ) + + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(p.print_help()) + + gff_file, fasta_file = args + + if opts.feature: + ( + opts.feature, + opts.parent, + opts.children, + site, + fLen, + flag, + error_msg, + ) = parse_feature_param(opts.feature) + if flag: + sys.exit(error_msg) + if opts.avoidFeatures: + if opts.avoidFeatures not in valid_avoid_features: + sys.exit("[error] avoidFeatures must be one of {valid_avoid_features}") + + parents = set(opts.parents.split(",")) + children_list = set(opts.children.split(",")) + + """ + In a situation where we want to extract sequence for only the top-level + parent feature, specify feature type of parent == child + """ + skipChildren = ( + True if len(parents.symmetric_difference(children_list)) == 0 else False + ) + + id_attr = opts.id_attribute + desc_attr = opts.desc_attribute + sep = opts.sep + + import gffutils + + g = make_index(gff_file) + f = Fasta(fasta_file, index=False) + seqlen = {} + for seqid, size in f.itersizes(): + seqlen[seqid] = size + + fw = must_open(opts.outfile, "w") + + for feat in get_parents(gff_file, parents): + desc = "" + if desc_attr: + fparent = ( + feat.attributes["Parent"][0] if "Parent" in feat.attributes else None + ) + if fparent: + try: + g_fparent = g[fparent] + except gffutils.exceptions.FeatureNotFoundError: + logger.error("%s not found in index .. skipped", fparent) + continue + if desc_attr in g_fparent.attributes: + desc = ",".join(g_fparent.attributes[desc_attr]) + if not desc and desc_attr in feat.attributes: + desc = ",".join(feat.attributes[desc_attr]) + + if opts.full_header: + desc_parts = [] + desc_parts.append(desc) + + if opts.conf_class and "conf_class" in feat.attributes: + desc_parts.append(feat.attributes["conf_class"][0]) + + if opts.full_header == "tair": + orient = "REVERSE" if feat.strand == "-" else "FORWARD" + feat_coords = "{0}:{1}-{2} {3} LENGTH=[LEN]".format( + feat.seqid, feat.start, feat.end, orient + ) + else: + (s, e) = ( + (feat.start, feat.end) + if (feat.strand == "+") + else (feat.end, feat.start) + ) + feat_coords = "{0}:{1}-{2}".format(feat.seqid, s, e) + desc_parts.append(feat_coords) + + datestamp = ( + opts.datestamp + if opts.datestamp + else "{0}{1}{2}".format(dt.now().year, dt.now().month, dt.now().day) + ) + desc_parts.append(datestamp) + + desc = sep.join(str(x) for x in desc_parts) + desc = "".join(str(x) for x in (sep, desc)).strip() + + if opts.feature == "upstream" or opts.feature == "downstream": + start, stop = get_coords( + opts.feature, site, fLen, seqlen[feat.seqid], feat, children_list, g + ) + + overlap = None + if opts.avoidFeatures: + stranded = opts.avoidFeatures == "strand_specific" + start, stop, overlap = update_coords_avoidFeatures( + stranded, opts.feature, site, fLen, start, stop, feat, g + ) + + if not start or not stop or overlap: + continue + + feat_seq = f.sequence( + dict( + chr=feat.seqid, + start=start, + stop=stop, + strand=feat.strand, + ) + ) + + (s, e) = (start, stop) if feat.strand == "+" else (stop, start) + seq_loc = str(feat.seqid) + ":" + str(s) + "-" + str(e) + desc = sep.join( + str(x) + for x in (desc, seq_loc, "FLANKLEN=" + str(abs(stop - start) + 1)) + ) + else: + children = [] + if not skipChildren: + for c in g.children(feat.id, 1): + if c.featuretype not in children_list: + continue + child = f.sequence( + dict(chr=c.chrom, start=c.start, stop=c.stop, strand=c.strand) + ) + children.append((child, c)) + + if not children: + print( + "[warning] %s has no children with type %s" + % (feat.id, ",".join(children_list)), + file=sys.stderr, + ) + continue + else: + child = f.sequence( + dict( + chr=feat.seqid, + start=feat.start, + stop=feat.end, + strand=feat.strand, + ) + ) + children.append((child, feat)) + + # sort children in incremental position + children.sort(key=lambda x: x[1].start) + # reverse children if negative strand + if feat.strand == "-": + children.reverse() + feat_seq = "".join(x[0] for x in children) + + desc = desc.replace('"', "") + + id = ( + ",".join(feat.attributes[id_attr]) + if id_attr and feat.attributes[id_attr] + else feat.id + ) + + if opts.full_header == "tair": + desc = desc.replace("[LEN]", str(len(feat_seq))) + + rec = SeqRecord(Seq(feat_seq), id=id, description=desc) + SeqIO.write([rec], fw, "fasta") + fw.flush() + + +def parse_feature_param(feature): + """ + Take the --feature param (coming from gff.load() and parse it. + Returns feature, parents and children terms. + + Also returns length of up or downstream sequence (and start site) requested + + If erroneous, returns a flag and error message to be displayed on exit + """ + # can request up- or downstream sequence only from the following valid sites + valid_sites = ["TSS", "TrSS", "TES", "TrES"] + + site, fLen = None, None + flag, error_msg = None, None + parents, children = None, None + if re.match(r"upstream", feature) or re.match(r"downstream", feature): + parents, children = "mRNA", "CDS" + feature, site, fLen = re.search(r"([A-z]+):([A-z]+):(\S+)", feature).groups() + + if not is_number(fLen): + flag, error_msg = ( + 1, + "Error: len `" + fLen + "` should be an integer", + ) + + fLen = int(fLen) + if fLen < 0: + flag, error_msg = ( + 1, + "Error: len `" + str(fLen) + "` should be > 0", + ) + + if site not in valid_sites: + flag, error_msg = ( + 1, + f"Error: site `{site}` not valid. Please choose from {valid_sites}", + ) + elif feature == "upstream" and site not in ["TSS", "TrSS"]: + flag, error_msg = ( + 1, + f"Error: site `{site}` not valid for upstream. Please choose from `TSS TrSS`", + ) + elif feature == "downstream" and site not in ["TES", "TrES"]: + flag, error_msg = ( + 1, + f"Error: site `{site}` not valid for downstream. Please use `TES`", + ) + elif feature == "CDS": + parents, children = "mRNA", "CDS" + else: + flag, error_msg = 1, "Error: unrecognized option --feature=" + feature + + return feature, parents, children, site, fLen, flag, error_msg + + +def get_coords(feature, site, fLen, seqlen, feat, children_list, gffdb): + """ + Subroutine takes feature, site, length, reference sequence length, + parent mRNA feature (GffLine object), list of child feature types + and a GFFutils.GFFDB object as the input + + If upstream of TSS is requested, use the parent feature coords + to extract the upstream sequence + + If upstream of TrSS is requested, iterates through all the + children (CDS features stored in the sqlite GFFDB) and use child + feature coords to extract the upstream sequence + + If downstream of TES is requested, use parent feature coords to + extract the downstream sequence + + If downstream of TrES is requested, iterates through all the + children (CDS features stored in the sqlite GFFDB) and use child + feature coords to extract the downstream sequence + + If success, returns the start and stop coordinates + else, returns None + """ + if site in ["TSS", "TES"]: + if feature == "upstream" and site == "TSS": + (start, stop) = ( + (feat.start - fLen, feat.start - 1) + if feat.strand == "+" + else (feat.end + 1, feat.end + fLen) + ) + if feature == "downstream" and site == "TES": + (start, stop) = ( + (feat.end + 1, feat.end + fLen) + if feat.strand == "+" + else (feat.start - fLen, feat.start - 1) + ) + elif site in ["TrSS", "TrES"]: + children = [] + for c in gffdb.children(feat.id, 1): + if c.featuretype not in children_list: + continue + children.append((c.start, c.stop)) + + if not children: + print( + "[warning] %s has no children with type %s" + % (feat.id, ",".join(children_list)), + file=sys.stderr, + ) + return None, None + + cds_start, cds_stop = range_minmax(children) + if feature == "upstream" and site == "TrSS": + (start, stop) = ( + (cds_start - fLen, cds_start - 1) + if feat.strand == "+" + else (cds_stop + 1, cds_stop + fLen) + ) + elif feature == "downstream" and site == "TrES": + (start, stop) = ( + (cds_stop + 1, cds_stop + fLen) + if feat.strand == "+" + else (cds_start - fLen, cds_start - 1) + ) + + if feat.strand == "+" and start < 1: + start = 1 + elif feat.strand == "-" and stop > seqlen: + stop = seqlen + + actual_len = stop - start + 1 + + if actual_len < fLen: + print( + "[warning] sequence upstream of {0} ({1} bp) is less than upstream length {2}".format( + feat.id, actual_len, fLen + ), + file=sys.stderr, + ) + return None, None + + return start, stop + + +def update_coords_avoidFeatures( + stranded, feature, site, fLen, start, stop, feat, gffdb +): + """ + Subroutine takes start and stop coordinates for a given feature and updates the + coordinates to avoid overlapping with unrelated up- or downstream features. + + This is done on a strand-dependent or -independent manner based on the value of + --avoidFeatures. + + Returns, updated start and stop coordinates for loading sequences. + + Genes with overlapping neighbor features raise a flag and the feature is skipped. + """ + flag = None + collisions = [] + s = feat.strand if stranded else (None) + + allChildren = [] + for c in gffdb.children(feat.parent): + allChildren.append(c.id) + + for r in gffdb.region(seqid=feat.seqid, start=start, end=stop, strand=s): + if r.id in allChildren or r.id == feat.parent: + continue + + if feature == "upstream" and feat.strand == "+": + collisions.append(r.end) + elif feature == "upstream" and feat.strand == "-": + collisions.append(r.start) + elif feature == "downstream" and feat.strand == "+": + collisions.append(r.start) + elif feature == "downstream" and feat.strand == "-": + collisions.append(r.end) + + if site in ["TrSS", "TrES"]: + children = [] + for c in gffdb.children(feat.id, 1): + if c.featuretype != "CDS": + continue + children.append((c.start, c.stop)) + + if not children: + feat_start = feat.start + feat_end = feat.end + else: + feat_start, feat_end = range_minmax(children) + else: + feat_start = feat.start + feat_end = feat.end + + # Identify up- or downstream features that overlap with the current feature. Skip these... + if len(collisions) > 0: + if feature == "upstream" and feat.strand == "+": + start = max(collisions) + if start > feat_start: + flag = 1 + elif feature == "upstream" and feat.strand == "-": + stop = min(collisions) + if stop < feat_end: + flag = 1 + elif feature == "downstream" and feat.strand == "+": + stop = min(collisions) + if stop < feat_end: + flag = 1 + elif feature == "downstream" and feat.strand == "-": + start = max(collisions) + if start > feat_start: + flag = 1 + + if flag: + print( + "Overlap detected while searching {0}. Skipping {1}:{2} strand:{3}".format( + feature, feat.parent, feat.id, feat.strand + ), + file=sys.stderr, + ) + else: + print( + "[avoidFeatures] a feature {0} of {1} is within {2} bp. Using {0} length of {3} bp".format( + feature, feat.id, fLen, abs(start - stop) + 1 + ), + file=sys.stderr, + ) + + return start, stop, flag + + +def bed12(args): + """ + %prog bed12 gffile > bedfile + + Produce bed12 file for coding features. The exons will be converted to blocks. + The CDS range will be shown between thickStart to thickEnd. For reference, + bed format consists of the following fields: + + 1. chrom + 2. chromStart + 3. chromEnd + 4. name + 5. score + 6. strand + 7. thickStart + 8. thickEnd + 9. itemRgb + 10. blockCount + 11. blockSizes + 12. blockStarts + """ + p = OptionParser(bed12.__doc__) + p.add_argument("--parent", default="mRNA", help="Top feature type") + p.add_argument("--block", default="exon", help="Feature type for regular blocks") + p.add_argument("--thick", default="CDS", help="Feature type for thick blocks") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gffile,) = args + parent, block, thick = opts.parent, opts.block, opts.thick + outfile = opts.outfile + + g = make_index(gffile) + fw = must_open(outfile, "w") + + for f in g.features_of_type(parent): + chrom = f.chrom + chromStart = f.start - 1 + chromEnd = f.stop + name = f.id + score = 0 + strand = f.strand + # When there is no thick part, thickStart and thickEnd are usually set + # to the chromStart position + # + thickStart = chromStart + thickEnd = chromStart + blocks = [] + + for c in g.children(name, 1): + cstart, cend = c.start - 1, c.stop + + if c.featuretype == block: + blockStart = cstart - chromStart + blockSize = cend - cstart + blocks.append((blockStart, blockSize)) + + elif c.featuretype == thick: + thickStart = min(thickStart, cstart) + thickEnd = max(thickEnd, cend) + + blocks.sort() + blockStarts, blockSizes = zip(*blocks) + blockCount = len(blocks) + blockSizes = ",".join(str(x) for x in blockSizes) + "," + blockStarts = ",".join(str(x) for x in blockStarts) + "," + itemRgb = 0 + + print( + "\t".join( + str(x) + for x in ( + chrom, + chromStart, + chromEnd, + name, + score, + strand, + thickStart, + thickEnd, + itemRgb, + blockCount, + blockSizes, + blockStarts, + ) + ), + file=fw, + ) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/html.py b/jcvi/formats/html.py new file mode 100644 index 00000000..657b7fca --- /dev/null +++ b/jcvi/formats/html.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Parse html pages. +""" +import os.path as op +import sys + +from urllib.parse import urljoin + +from BeautifulSoup import BeautifulSoup + +from ..apps.base import ActionDispatcher, OptionParser, download, logger + + +def main(): + + actions = ( + ("table", "convert HTML tables to csv"), + ("links", "extract all links from web page"), + ("gallery", "convert a folder of figures to a HTML table"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def gallery(args): + """ + %prog gallery folder link_prefix + + Convert a folder of figures to a HTML table. For example: + + $ python -m jcvi.formats.html gallery Paper-figures/ + https://dl.dropboxusercontent.com/u/15937715/Data/Paper-figures/ + + Maps the images from local to remote. + """ + from more_itertools import grouper + from jcvi.apps.base import iglob + + p = OptionParser(gallery.__doc__) + p.add_argument("--columns", default=3, type=int, help="How many cells per row") + p.add_argument("--width", default=200, type=int, help="Image width") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + folder, link_prefix = args + width = opts.width + images = iglob(folder, "*.jpg,*.JPG,*.png") + td = '{0}
' + print("") + for ims in grouper(images, opts.columns): + print(''.format(width + 5)) + for im in ims: + if not im: + continue + im = op.basename(im) + pf = im.split(".")[0].replace("_", "-") + link = link_prefix.rstrip("/") + "/" + im + print(td.format(pf, link, width)) + print("") + print("
") + + +def links(args): + """ + %prog links url + + Extract all the links "" from web page. + """ + p = OptionParser(links.__doc__) + p.add_argument( + "--img", + default=False, + action="store_true", + help="Extract tags", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (url,) = args + img = opts.img + + htmlfile = download(url) + page = open(htmlfile).read() + soup = BeautifulSoup(page) + + tag = "img" if img else "a" + src = "src" if img else "href" + aa = soup.findAll(tag) + for a in aa: + link = a.get(src) + link = urljoin(url, link) + print(link) + + +def unescape(s, unicode_action="replace"): + """ + Unescape HTML strings, and convert & etc. + """ + from html.parser import HTMLParser + + hp = HTMLParser.HTMLParser() + s = hp.unescape(s) + s = s.encode("ascii", unicode_action) + s = s.replace("\n", "").strip() + return s + + +def table(args): + """ + %prog table page.html + + Convert HTML tables to csv. + """ + import csv + + p = OptionParser(table.__doc__) + p.set_sep(sep=",") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (htmlfile,) = args + page = open(htmlfile).read() + soup = BeautifulSoup(page) + + for i, tabl in enumerate(soup.findAll("table")): + nrows = 0 + csvfile = htmlfile.rsplit(".", 1)[0] + ".{0}.csv".format(i) + writer = csv.writer(open(csvfile, "w"), delimiter=opts.sep) + rows = tabl.findAll("tr") + for tr in rows: + cols = tr.findAll("td") + if not cols: + cols = tr.findAll("th") + + row = [] + for td in cols: + try: + cell = "".join(td.find(text=True)) + cell = unescape(cell) + except TypeError: + cell = "" + row.append(cell) + writer.writerow(row) + nrows += 1 + logger.debug("Table with %d rows written to `%s`.", nrows, csvfile) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/maf.py b/jcvi/formats/maf.py new file mode 100644 index 00000000..dd63b1d8 --- /dev/null +++ b/jcvi/formats/maf.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +MAF format specification: + +""" +import sys + +from bisect import bisect +from dataclasses import dataclass + +from Bio import AlignIO +from Bio import SeqIO +from bx import interval_index_file +from bx.align import maf + +from ..apps.base import ActionDispatcher, OptionParser, need_update +from ..apps.lastz import blastz_score_to_ncbi_expectation, blastz_score_to_ncbi_bits + +from .base import BaseFile, logger + + +FLANK = 60 + + +class Maf(BaseFile, dict): + def __init__(self, filename, index=False): + super().__init__(filename) + + indexfile = filename + ".idx" + if index: + if need_update(filename, indexfile): + self.build_index(filename, indexfile) + + self.index = maf.Index(filename, indexfile) + + fp = open(filename) + self.reader = maf.Reader(fp) + + def build_index(self, filename, indexfile): + """ + Recipe from Brad Chapman's blog + + """ + indexes = interval_index_file.Indexes() + in_handle = open(filename) + + reader = maf.Reader(in_handle) + while True: + pos = reader.file.tell() + rec = next(reader) + if rec is None: + break + for c in rec.components: + indexes.add( + c.src, + c.forward_strand_start, + c.forward_strand_end, + pos, + max=c.src_size, + ) + + index_handle = open(indexfile, "w") + indexes.write(index_handle) + index_handle.close() + + +@dataclass +class Breakpoint: + arec: str + astart: int + brec: str + bstart: int + + def __str__(self): + return f"{self.arec}:{self.astart}-{self.brec}:{self.bstart}" + + +def main(): + + actions = ( + ("bed", "convert MAF to BED format"), + ("blast", "convert MAF to BLAST tabular format"), + ("breakpoints", "find breakpoints in MAF and 'simulate' chimeric contigs"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def breakpoints(args): + """ + %prog breakpoints A.B.maf A.fa B.fa AB 1000000 2000000 + + Find breakpoints in MAF and 'simulate' chimeric contigs in `AB.fa`. + Breakpoints are 'roughly' around the user defined positions. The idea is + to simulate chimeric contigs, which are useful for testing algorithms, + e.g. klassify. + """ + p = OptionParser(breakpoints.__doc__) + p.add_argument( + "--minsize", + default=10000, + type=int, + help="Minimum size of alignment to consider", + ) + opts, args = p.parse_args(args) + + if len(args) not in (5, 6): + sys.exit(not p.print_help()) + + maf_file, a_fasta, b_fasta, ab = args[:4] + bps = sorted(int(x) for x in args[4:]) + minsize = opts.minsize + + filtered_msa = [] + for msa in AlignIO.parse(maf_file, "maf"): + arec, brec = msa + if brec.annotations["size"] < minsize: + continue + filtered_msa.append((brec.annotations["start"], arec, brec)) + logger.info("Total alignments: %d", len(filtered_msa)) + + final = [] + # Load the sequences + ar = next(SeqIO.parse(a_fasta, "fasta")) + br = next(SeqIO.parse(b_fasta, "fasta")) + for bp in bps: + i = bisect(filtered_msa, (bp,)) + _, arec, brec = filtered_msa[i] + logger.info("%s", arec) + logger.info("%s", brec) + assert len(arec) == len(brec) + # Find the midpoint, safe to crossover there + midpoint = len(arec) // 2 + aseq = arec.seq[:midpoint] + astart = arec.annotations["start"] + len(aseq) - aseq.count("-") + logger.info("%s|%s", aseq[-FLANK:], arec.seq[midpoint:][:FLANK]) + bseq = brec.seq[:midpoint] + bstart = brec.annotations["start"] + len(bseq) - bseq.count("-") + logger.info("%s|%s", bseq[-FLANK:], brec.seq[midpoint:][:FLANK]) + bpt = Breakpoint(arec.id, astart, brec.id, bstart) + logger.info("-" * FLANK * 2 + ">") + logger.info("%s|%s", ar.seq[:astart][-FLANK:], br.seq[bstart:][:FLANK]) + final.append(bpt) + + logger.info("Breakpoints found: %s", final) + if len(final) == 2: + bp1, bp2 = final[:2] + # ====-------======= + # bp1 bp2 + abseq = ( + ar.seq[: bp1.astart] + + br.seq[bp1.bstart : bp2.bstart] + + ar.seq[bp2.astart :] + ) + elif len(final) == 1: + bp = final[0] + abseq = ar.seq[: bp.astart] + br.seq[bp.bstart :] + abrec = SeqIO.SeqRecord(abseq, id=ab, description="") + ab_fasta = f"{ab}.fa" + SeqIO.write([abrec], ab_fasta, "fasta") + logger.info("Writing to %s", ab_fasta) + + +def bed(args): + """ + %prog bed maffiles > out.bed + + Convert a folder of maf alignments to the bed features + then useful to check coverage, etc. + """ + p = OptionParser(bed.__doc__) + _, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + flist = args + prefix = flist[0].split(".")[0] + + j = 0 + for f in flist: + reader = Maf(f).reader + for rec in reader: + a, b = rec.components + + for a, tag in zip((a, b), "ab"): + name = "{0}_{1:07d}{2}".format(prefix, j, tag) + print( + "\t".join( + str(x) + for x in ( + a.src, + a.forward_strand_start, + a.forward_strand_end, + name, + ) + ) + ) + + j += 1 + + +def alignment_details(a, b): + nmatch = 0 + nmismatch = 0 + ngaps = 0 + + assert len(a) == len(b) + l = len(a) + + for i in range(l): + if a[i] == b[i]: + nmatch += 1 + elif a[i] == "-" or b[i] == "-": + ngaps += 1 + else: + nmismatch += 1 + + pctid = 100.0 * nmatch / l + return pctid, nmismatch, ngaps + + +def maf_to_blast8(f): + """ + Convert a MAF file to BLAST tabular format. + """ + reader = Maf(f).reader + for rec in reader: + a, b = rec.components + query = a.src + subject = b.src + qstart = a.forward_strand_start + qstop = a.forward_strand_end + sstart = b.forward_strand_start + sstop = b.forward_strand_end + score = rec.score + + evalue = blastz_score_to_ncbi_expectation(score) + score = blastz_score_to_ncbi_bits(score) + evalue, score = "{0:.2g}".format(evalue), "{0:.1f}".format(score) + hitlen = len(a.text) + + pctid, nmismatch, ngaps = alignment_details(a.text, b.text) + print( + "\t".join( + str(x) + for x in ( + query, + subject, + pctid, + hitlen, + nmismatch, + ngaps, + qstart, + qstop, + sstart, + sstop, + evalue, + score, + ) + ) + ) + + +def blast(args): + """ + %prog blast maffiles > out.blast + + From a folder of .maf files, generate .blast file with tabular format. + """ + p = OptionParser(blast.__doc__) + _, args = p.parse_args(args) + + if len(args) == 0: + sys.exit(p.print_help()) + + flist = args + + for f in flist: + maf_to_blast8(f) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/obo.py b/jcvi/formats/obo.py new file mode 100755 index 00000000..001df4e6 --- /dev/null +++ b/jcvi/formats/obo.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog obo_file + +Parses obo_file and plot GO lineage +""" +import sys + +from collections import deque +from functools import partial +from typing import IO, Optional + +from goatools.obo_parser import GODag + +from ..apps.base import OptionParser, logger + +GO_URL = "http://purl.obolibrary.org/obo/go/go-basic.obo" +SO_URL = ( + "http://obo.cvs.sourceforge.net/viewvc/obo/obo/ontology/genomic-proteomic/so.obo" +) + + +def load_GODag(obo_url: str, prt: Optional[IO] = None) -> (GODag, str): + """ + Load given obo url and returns GODag object. + + Args: + obo_url (str): URL to the remote OBO file. + prt (Optional[IO]): IO stream to print verbose information. + + Returns: + (GODag, str): GODag object that contains the dict, and path to the downloaded OBO file. + """ + + from jcvi.apps.base import download + + so_file = download(obo_url, debug=False) + + return GODag(so_file, prt=prt), so_file + + +GODag_from_GO = partial(load_GODag, obo_url=GO_URL) +GODag_from_SO = partial(load_GODag, obo_url=SO_URL) + + +def validate_term(term, so=None, method="verify"): + """ + Validate an SO term against so.obo + """ + if so is None: + so, _ = GODag_from_SO() + + oterm = term + valid_names = set(x.name for x in so.values()) + if term not in valid_names: + if "resolve" in method: + if "_" in term: + tparts = deque(term.split("_")) + tparts.pop() if "prefix" in method else tparts.popleft() + nterm = "_".join(tparts).strip() + term = validate_term(nterm, so=so, method=method) + if term is None: + return None + else: + logger.error("Term `%s` does not exist", term) + sys.exit(1) + + if oterm != term: + logger.debug("Resolved term `%s` to `%s`", oterm, term) + return term + + +if __name__ == "__main__": + p = OptionParser(__doc__) + p.add_argument( + "--term", + help="Write the parents and children of this query term", + ) + + opts, args = p.parse_args() + + if len(args) != 1: + sys.exit(p.print_help()) + + (obo_file,) = args + + def description(record): + level = "level-{:>02}".format(record.level) + desc = "{} [{}]".format(record.name, record.namespace) + if record.is_obsolete: + desc += " obsolete" + alt_ids = ",".join(record.alt_ids) + return "\t".join((record.item_id, level, desc, alt_ids)) + + g = GODag(obo_file, prt=None) + header = "\t".join(("#id", "level", "name", "alt_ids")) + print(header) + for rec in sorted(set(g.values()), key=lambda x: x.item_id): + print(description(rec)) + + # run a test case + if opts.term: + rec = g.query_term(opts.term, verbose=True) + g.draw_lineage([rec]) diff --git a/jcvi/formats/paf.py b/jcvi/formats/paf.py new file mode 100644 index 00000000..aefc93b5 --- /dev/null +++ b/jcvi/formats/paf.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# +# paf.py +# formats +# +# Created by Haibao Tang on 09/03/20 +# Copyright © 2020 Haibao Tang. All rights reserved. +# + +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger + +from .base import must_open + + +class PAFLine: + """ + PAF specification + https://github.com/lh3/miniasm/blob/master/PAF.md + """ + + __slots__ = ( + "query", + "qsize", + "qstart", + "qstop", + "orientation", + "subject", + "ssize", + "sstart", + "sstop", + "nmatch", + "hitlen", + "mapq", + ) + + def __init__(self, row): + args = row.split() + self.query = args[0] + self.qsize = int(args[1]) + self.qstart = int(args[2]) + 1 + self.qstop = int(args[3]) + self.orientation = args[4] + self.subject = args[5] + self.ssize = int(args[6]) + self.sstart = int(args[7]) + 1 + self.sstop = int(args[8]) + self.nmatch = int(args[9]) + self.hitlen = int(args[10]) + self.mapq = int(args[11]) + + @property + def sbedline(self): + return "\t".join( + str(x) + for x in ( + self.subject, + self.sstart - 1, + self.sstop, + self.query, + self.hitlen, + self.orientation, + ) + ) + + @property + def qbedline(self): + return "\t".join( + str(x) + for x in ( + self.query, + self.qstart - 1, + self.qstop, + self.subject, + self.hitlen, + self.orientation, + ) + ) + + +def bed(args): + """ + %prog bed paffile + + Print out BED file based on coordinates in BLAST PAF results. By default, + write out subject positions. Use --swap to write query positions. + """ + from jcvi.formats.bed import sort as sort_bed + + p = OptionParser(bed.__doc__) + p.add_argument( + "--swap", default=False, action="store_true", help="Write query positions" + ) + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (paffile,) = args + write_qbed = opts.swap + bedfile = "{}.{}.bed".format( + paffile.rsplit(".", 1)[0], "query" if write_qbed else "subject" + ) + with must_open(paffile) as fp, open(bedfile, "w") as fw: + for row in fp: + b = PAFLine(row) + if write_qbed: + print(b.qbedline, file=fw) + else: + print(b.sbedline, file=fw) + + logger.debug("File written to `%s`.", bedfile) + sort_bed([bedfile, "-i"]) + return bedfile + + +def main(): + actions = (("bed", "get BED file from PAF"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/pdf.py b/jcvi/formats/pdf.py new file mode 100644 index 00000000..abe4a010 --- /dev/null +++ b/jcvi/formats/pdf.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Manipulate PDF files, using PyPDF2 library. +""" +import sys + +from natsort import natsorted + +from pypdf import PdfMerger, parse_filename_page_ranges +from pypdf.pagerange import PageRange + +from ..apps.base import ActionDispatcher, OptionParser, cleanup, logger + +from .base import must_open + +PAGE_RANGE_HELP = PageRange.__init__.__doc__ + + +def main(): + + actions = (("cat", "concatenate pages from pdf files into a single pdf file"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def cat(args): + """ + %prog cat *.pdf -o output.pdf + + Concatenate pages from pdf files into a single pdf file. + + Page ranges refer to the previously-named file. + A file not followed by a page range means all the pages of the file. + + PAGE RANGES are like Python slices. + {page_range_help} + EXAMPLES + pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1 + Concatenate all of head.pdf, all but page seven of content.pdf, + and the last page of tail.pdf, producing output.pdf. + + pdfcat chapter*.pdf >book.pdf + You can specify the output file by redirection. + + pdfcat chapter?.pdf chapter10.pdf >book.pdf + In case you don't want chapter 10 before chapter 2. + """ + p = OptionParser(cat.__doc__.format(page_range_help=PAGE_RANGE_HELP)) + p.add_argument( + "--nosort", default=False, action="store_true", help="Do not sort file names" + ) + p.add_argument( + "--cleanup", + default=False, + action="store_true", + help="Remove individual pdfs after merging", + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + outfile = opts.outfile + if outfile in args: + args.remove(outfile) + + should_sort = not opts.nosort + if not all(x.endswith(".pdf") for x in args): + should_sort = False + logger.debug("Not sorting filenames because non-pdf args") + + if should_sort: + args = natsorted(args) + + filename_page_ranges = parse_filename_page_ranges(args) + nfiles = len(filename_page_ranges) + merger = PdfMerger() + with must_open(outfile, "wb") as fw: + in_fs = {} + try: + for filename, page_range in filename_page_ranges: + logger.debug("%s: %s", filename, page_range) + if filename not in in_fs: + in_fs[filename] = open(filename, "rb") + merger.append(in_fs[filename], pages=page_range) + except Exception as e: + logger.error("Error while reading %s: %s", filename, e) + sys.exit(1) + merger.write(fw) + logger.info("Extracted %d files into `%s`", nfiles, outfile) + + if opts.cleanup: + logger.debug("Cleaning up %d files", nfiles) + cleanup(args) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/psl.py b/jcvi/formats/psl.py new file mode 100755 index 00000000..df1774c8 --- /dev/null +++ b/jcvi/formats/psl.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Classes to handle the .psl files +""" +import math +import re +import sys + +from ..apps.base import ActionDispatcher, OptionParser + +from .base import LineFile, must_open + + +class PslLine(object): + def __init__(self, sline): + args = sline.strip().split() + self.nargs = len(args) + self.matches = int(args[0]) + self.misMatches = int(args[1]) + self.repMatches = int(args[2]) + self.nCount = int(args[3]) + self.qNumInsert = int(args[4]) + self.qBaseInsert = int(args[5]) + self.tNumInsert = int(args[6]) + self.tBaseInsert = int(args[7]) + self.qstrand, self.strand = args[8], None + m = re.match(r"(?P[+-]?)(?P[+-])", self.qstrand) + if m: + self.qstrand, self.strand = m.group("qs"), m.group("gs") + self.qName = args[9] + self.qSize = int(args[10]) + self.qStart = int(args[11]) + self.qEnd = int(args[12]) + self.tName = args[13] + self.tSize = int(args[14]) + self.tStart = int(args[15]) + self.tEnd = int(args[16]) + self.blockCount = int(args[17]) + self.blockSizes = [int(x) for x in args[18].strip().split(",")[:-1]] + self.qStarts = [int(x) for x in args[19].strip().split(",")[:-1]] + self.tStarts = [int(x) for x in args[20].strip().split(",")[:-1]] + + def __str__(self): + args = [ + self.matches, + self.misMatches, + self.repMatches, + self.nCount, + self.qNumInsert, + self.qBaseInsert, + self.tNumInsert, + self.tBaseInsert, + self.strand, + self.qName, + self.qSize, + self.qStart, + self.qEnd, + self.tName, + self.tSize, + self.tStart, + self.tEnd, + self.blockCount, + self.blockSizes, + self.qStarts, + self.tStarts, + ] + + s = "\t".join(str(x) for x in args) + return s + + def __getitem__(self, key): + return getattr(self, key) + + @property + def qspan(self): + return self.qEnd - self.qStart + + @property + def tspan(self): + return self.tEnd - self.tStart + + @property + def score(self): + sizeMult = self._sizeMult + + return ( + sizeMult * (self.matches + (self.repMatches >> 1)) + - sizeMult * self.misMatches + - self.qNumInsert + - self.tNumInsert + ) + + @property + def coverage(self): + return ( + 100 + * (self.matches + self.misMatches + self.repMatches + self.nCount) + / self.qSize + ) + + def swap(self): + self.qName, self.qSize, self.tName, self.tSize = ( + self.tName, + self.tSize, + self.qName, + self.qSize, + ) + + self.qStart, self.qEnd, self.tStart, self.tEnd = ( + self.tStart, + self.tEnd, + self.qStart, + self.qEnd, + ) + + self.qStarts, self.tStarts = self.tStarts, self.qStarts + + @property + def _sizeMult(self): + """ + decide the size multiplier based on sequence space (protein/nucleotide) + """ + return 3 if self._isProtein else 1 + + @property + def _isProtein(self): + """ + check if blockSizes and scores are in the protein space or not + """ + last = self.blockCount - 1 + return ( + (self.tEnd == self.tStarts[last] + 3 * self.blockSizes[last]) + and self.strand == "+" + ) or ( + ( + self.tStart + == self.tSize - (self.tStarts[last] + 3 * self.blockSizes[last]) + and self.strand == "-" + ) + ) + + def _milliBad(self, ismRNA=False): + """ + calculate badness in parts per thousand + i.e. number of non-identical matches + """ + sizeMult = self._sizeMult + + qAlnSize, tAlnSize = self.qspan * sizeMult, self.tspan + alnSize = min(qAlnSize, tAlnSize) + if alnSize <= 0: + return 0 + + sizeDiff = qAlnSize - tAlnSize + if sizeDiff < 0: + sizeDiff = 0 if ismRNA else -sizeDiff + + insertFactor = self.qNumInsert + if not ismRNA: + insertFactor += self.tNumInsert + + total = (self.matches + self.repMatches + self.misMatches) * sizeMult + + return ( + ( + 1000 + * ( + self.misMatches * sizeMult + + insertFactor + + round(3 * math.log(1 + sizeDiff)) + ) + ) + / total + if total != 0 + else 0 + ) + + def pct_id(self, simple=None): + return ( + 100.00 - self._milliBad(ismRNA=True) * 0.1 + if not simple + else 100.00 * self.matches / (self.matches + self.misMatches) + ) + # else 100.00 * self.score / self.qSize + + def gffline( + self, + source="GMAP", + type="match_part", + primary_tag="Parent", + alt_score=None, + suffix=".match", + count=0, + ): + + score = "." if type == "match_part" else "{0:.2f}".format(self.score) + + target = " ".join(str(x) for x in [self.qName, self.qStart, self.qEnd]) + + attributes = [ + primary_tag + "=" + self.qName + suffix + str(count), + "Target=" + target, + ] + if primary_tag == "ID": + attributes.extend( + [ + "identity={0:.2f}".format(self.pct_id(simple=alt_score)), + "coverage={0:.2f}".format(self.coverage), + ] + ) + attrs = ";".join(str(x) for x in attributes) + + line = "\t".join( + str(x) + for x in [ + self.tName, + source, + type, + self.tStart, + self.tEnd, + score, + self.strand, + ".", + attrs, + ] + ) + return line + + @property + def bed12line(self): + color = "255,0,0" + self.blockStarts = ",".join([str(x - self.tStart) for x in self.tStarts]) + line = "\t".join( + str(x) + for x in ( + self.tName, + self.tStart, + self.tEnd, + self.qName, + "{0:.2f}".format(self.pct_id()), + self.strand, + self.tStart, + self.tEnd, + color, + self.blockCount, + ",".join(str(bs) for bs in self.blockSizes), + self.blockStarts, + ) + ) + return line + + +class Psl(LineFile): + def __init__(self, filename=None): + super().__init__(filename) + self.mCounts = {} # dict to hold match counts + if not filename: + return + + for line in must_open(filename): + if not re.match(r"\d+", line[0]): + continue + self.append(PslLine(line)) + + def trackMatches(self, id): + self.mCounts[id] = self.mCounts.get(id, 0) + 1 + + def getMatchCount(self, id): + return self.mCounts[id] + + +def main(): + + actions = ( + ("gff", "convert psl to gff3 format"), + ("bed", "convert psl to bed12 format"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def bed(args): + """ + %prog bed pslfile + + Convert to bed format. + """ + p = OptionParser(bed.__doc__) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (pslfile,) = args + fw = must_open(opts.outfile, "w") + + psl = Psl(pslfile) + for p in psl: + print(p.bed12line, file=fw) + + +def gff(args): + """ + %prog gff pslfile + + Convert to gff format. + """ + p = OptionParser(gff.__doc__) + p.add_argument("--source", default="GMAP", help="specify GFF source") + p.add_argument( + "--type", + default="EST_match", + help="specify GFF feature type", + ) + p.add_argument("--suffix", default=".match", help="match ID suffix") + p.add_argument( + "--swap", + default=False, + action="store_true", + help="swap query and target features", + ) + p.add_argument( + "--simple_score", + default=False, + action="store_true", + help="calculate a simple percent score", + ) + p.set_outfile() + + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (pslfile,) = args + fw = must_open(opts.outfile, "w") + + print("##gff-version 3", file=fw) + psl = Psl(pslfile) + for p in psl: + if opts.swap: + p.swap() + + psl.trackMatches(p.qName) + # switch from 0-origin to 1-origin + p.qStart += 1 + p.tStart += 1 + + print( + p.gffline( + source=opts.source, + type=opts.type, + suffix=opts.suffix, + primary_tag="ID", + alt_score=opts.simple_score, + count=psl.getMatchCount(p.qName), + ), + file=fw, + ) + + # create an empty PslLine() object and load only + # the targetName, queryName and strand info + part = PslLine("\t".join(str(x) for x in [0] * p.nargs)) + part.tName, part.qName, part.strand = p.tName, p.qName, p.strand + + nparts = len(p.qStarts) + for n in range(nparts): + part.qStart, part.tStart, aLen = ( + p.qStarts[n] + 1, + p.tStarts[n] + 1, + p.blockSizes[n], + ) + part.qEnd = part.qStart + aLen - 1 + part.tEnd = part.tStart + aLen - 1 + + if part.strand == "-": + part.qStart = p.qSize - (p.qStarts[n] + p.blockSizes[n]) + 1 + part.qEnd = p.qSize - p.qStarts[n] + + print( + part.gffline( + source=opts.source, + suffix=opts.suffix, + count=psl.getMatchCount(part.qName), + ), + file=fw, + ) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/pyblast.py b/jcvi/formats/pyblast.py new file mode 100644 index 00000000..23926f52 --- /dev/null +++ b/jcvi/formats/pyblast.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Python implementation of BlastLine, an alternative Cython implementation is +available in .cblast.BlastLine, which may be up to 2x faster +""" + + +class BlastLine(object): + __slots__ = ( + "query", + "subject", + "pctid", + "hitlen", + "nmismatch", + "ngaps", + "qstart", + "qstop", + "sstart", + "sstop", + "evalue", + "score", + "qseqid", + "sseqid", + "qi", + "si", + "orientation", + ) + + def __init__(self, sline): + args = sline.split("\t") + self.query = args[0] + self.subject = args[1] + self.pctid = float(args[2]) + self.hitlen = int(args[3]) + self.nmismatch = int(args[4]) + self.ngaps = int(args[5]) + self.qstart = int(args[6]) + self.qstop = int(args[7]) + self.sstart = int(args[8]) + self.sstop = int(args[9]) + if len(args) > 10: + self.evalue = float(args[10]) + self.score = float(args[11]) + + self.orientation = "+" + if self.qstart > self.qstop: + self.qstart, self.qstop = self.qstop, self.qstart + self.orientation = "-" + if self.sstart > self.sstop: + self.sstart, self.sstop = self.sstop, self.sstart + self.orientation = "-" + + @property + def has_score(self): + return hasattr(self, "score") + + def __repr__(self): + return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % ( + self.query, + self.subject, + self.evalue, + self.score, + ) + + def __str__(self): + if self.has_score: + args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + else: + args = [getattr(self, attr) for attr in BlastLine.__slots__[:10]] + if self.orientation == "-": + args[8], args[9] = args[9], args[8] + return "\t".join(str(x) for x in args) + + @property + def swapped(self): + """ + Swap query and subject. + """ + args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] + args[0:2] = [self.subject, self.query] + args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] + if self.orientation == "-": + args[8], args[9] = args[9], args[8] + b = "\t".join(str(x) for x in args) + return BlastLine(b) + + @property + def bedline(self): + return "\t".join( + str(x) + for x in ( + self.subject, + self.sstart - 1, + self.sstop, + self.query, + self.score, + self.orientation, + ) + ) diff --git a/jcvi/formats/sam.py b/jcvi/formats/sam.py new file mode 100644 index 00000000..01b4d904 --- /dev/null +++ b/jcvi/formats/sam.py @@ -0,0 +1,1025 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +SAM alignment format. There are other tools that handles better SAM and BAM. +This script simply parses the lines in SAM into human readable fields. + +http://samtools.sourceforge.net/SAM1.pdf +""" +import os +import os.path as op +import sys + +from collections import defaultdict +from itertools import groupby + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + PIPE, + Popen, + cleanup, + get_abs_path, + glob, + logger, + mkdir, + need_update, + popen, + sh, +) +from ..utils.cbook import fill +from ..assembly.base import Astat + +from .base import LineFile, must_open +from .fasta import Fasta +from .sizes import Sizes + + +class SamLine(object): + def __init__(self, row): + + args = row.strip().split("\t") + self.qname = args[0] + self.flag = int(args[1]) + self.rname = args[2] + self.pos = args[3] + self.mapq = args[4] + self.cigar = args[5] + self.mrnm = args[6] + self.mpos = args[7] + self.isize = args[8] + self.seq = args[9] + self.qual = args[10] + self.extra = args[11:] + + def __str__(self): + return "\t".join( + str(x) + for x in ( + self.qname, + self.flag, + self.rname, + self.pos, + self.mapq, + self.cigar, + self.mrnm, + self.mpos, + self.isize, + self.seq, + self.qual, + "\t".join(self.extra), + ) + ) + + @property + def orientation(self): + return "-" if self.flag & 0x10 == 0 else "+" + + def update_readname(self): + if self.flag & 0x40 == 0: + tag = "/1" + elif self.flag & 0x80 == 0: + tag = "/2" + else: + tag = "" + self.qname += tag + + @property + def pairline(self): + qpos = self.cigar.split("H", 1)[0] + return "%s:%s\t%s:%s" % (self.qname, qpos, self.rname, self.pos) + + +class Sam(LineFile): + def __init__(self, filename, callback=None): + super().__init__(filename) + fp = open(filename) + for row in fp: + if row[0] == "@": + continue + s = SamLine(row) + if callback: + callback(s) + + +def output_bam(cmd, outfile, cpus=8): + bam = outfile.endswith(".bam") + if not bam: + return cmd + " > {0}".format(outfile) + + outcmd, mflag = ("samtools view -bS", "-@ {0}".format(cpus)) + cmd += " | {0} {1} - > {2}".format(outcmd, mflag, outfile) + + return cmd + + +class GenomeCoverageLine(object): + def __init__(self, row): + args = row.split() + self.seqid = args[0] + self.depth = int(args[1]) + self.positions = int(args[2]) + self.length = int(args[3]) + self.freq = float(args[4]) + + +class GenomeCoverageFile(LineFile): + def __init__(self, filename): + super().__init__(filename) + fp = open(filename) + for row in fp: + self.append(GenomeCoverageLine(row)) + + def iter_coverage_seqid(self): + for seqid, lines in groupby(self, key=lambda x: x.seqid): + lines = list(lines) + length = lines[0].length + counts = 0 + for r in lines: + counts += r.depth * r.positions + yield seqid, counts * 1.0 / length + + +def get_prefix(readfile, dbfile): + rdpf = op.basename(readfile).replace(".gz", "").rsplit(".", 1)[0] + dbpf = op.basename(dbfile).split(".")[0] + return ".".join((rdpf, dbpf)) + + +def get_samfile( + readfile, dbfile, bam=False, mapped=False, unmapped=False, bowtie=False +): + prefix = get_prefix(readfile, dbfile) + ext = ".bam" if bam else ".sam" + samfile = prefix + ext + ext = ".fastq" if bowtie else ext + mapped = (prefix + ".mapped" + ext) if mapped else None + unmapped = (prefix + ".unmapped" + ext) if unmapped else None + return samfile, mapped, unmapped + + +def get_minibam(bamfile, region, overwrite=True): + xregion = region.replace(":", "_").replace("-", "_").replace(",", "") + minibamfile = op.basename(bamfile).replace(".bam", ".{}.bam".format(xregion)) + baifile = minibamfile + ".bai" + if op.exists(baifile): + sh("rm {}".format(baifile)) + + if not overwrite and op.exists(minibamfile): + logger.error("Output name exists: `{}`".format(minibamfile)) + return + + cmd = "samtools view {} {} -b".format(bamfile, region) + cmd += " -o {0}".format(minibamfile) + + sh(cmd) + sh("samtools index {0}".format(minibamfile)) + + return minibamfile + + +def get_minibam_bed(bamfile, bedfile, minibam=None): + """samtools view -L could do the work, but it is NOT random access. Here we + are processing multiple regions sequentially. See also: + + https://www.biostars.org/p/49306/ + """ + pf = op.basename(bedfile).split(".")[0] + minibamfile = minibam or op.basename(bamfile).replace(".bam", ".{}.bam".format(pf)) + minisamfile = minibam.replace(".bam", ".sam") + baifile = minibamfile + ".bai" + if op.exists(baifile): + sh("rm {}".format(baifile)) + + cmd = "samtools view -H {} > {}".format(bamfile, minisamfile) + sh(cmd) + + cmd = "cat {}".format(bedfile) + cmd += " | perl -lane 'print \"$F[0]:$F[1]-$F[2]\"'" + cmd += " | xargs -n1 -t -I \{\}" + cmd += " samtools view {}".format(bamfile) + cmd += " \{\} >> " + minisamfile + sh(cmd) + + cmd = "samtools view {} -b".format(minisamfile) + cmd += " | samtools sort -" + cmd += " -o {0}".format(minibamfile) + + sh(cmd) + sh("samtools index {0}".format(minibamfile)) + return minibamfile + + +def main(): + + actions = ( + # Alter read names + ("append", "append or prepend string to read names"), + # Extract info + ("bed", "convert bam files to bed"), + ("fastq", "convert bam files to paired fastq"), + ("pair", "parse sam file and get pairs"), + ("pairs", "print paired-end reads from BAM file"), + ("chimera", "parse sam file from `bwasw` and list multi-hit reads"), + ("noclip", "remove clipped reads from bam"), + ("ace", "convert sam file to ace"), + ("consensus", "convert bam alignments to consensus FASTA"), + ("fpkm", "calculate FPKM values from BAM file"), + ("coverage", "calculate depth for BAM file"), + ("vcf", "call SNPs on a set of bam files"), + ("mapped", "extract mapped/unmapped reads from samfile"), + ("count", "count the number of reads mapped using htseq"), + ("merge", "merge bam files"), + # Convenience function + ("index", "convert to bam, sort and then index"), + ("mini", "extract mini-bam for a single region"), + ) + + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def fastq(args): + """ + %prog fastq bamfile prefix + + Convert BAM files to paired FASTQ files. + """ + p = OptionParser(fastq.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bamfile, pf = args + singletons = pf + ".se.fastq" + a = pf + ".read1.fastq" + b = pf + ".read2.fastq" + + cmd = "samtools collate -uOn 128 {} tmp-prefix".format(bamfile) + cmd += " | samtools fastq -s {} -1 {} -2 {} -".format(singletons, a, b) + sh(cmd) + + if os.stat(singletons).st_size == 0: # singleton file is empty + cleanup(singletons) + return a, b + + +def mini(args): + """ + %prog mini bamfile region + + Extract mini-bam for a single region. + """ + p = OptionParser(mini.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bamfile, region = args + get_minibam(bamfile, region) + + +def noclip(args): + """ + %prog noclip bamfile + + Remove clipped reads from BAM. + """ + p = OptionParser(noclip.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bamfile,) = args + noclipbam = bamfile.replace(".bam", ".noclip.bam") + cmd = "samtools view -h {} | awk -F '\t' '($6 !~ /H|S/)'".format(bamfile) + cmd += " | samtools view -@ 4 -b -o {}".format(noclipbam) + sh(cmd) + + sh("samtools index {}".format(noclipbam)) + + +def append(args): + """ + %prog append bamfile + + Append /1 or /2 to read names. Useful for using the Tophat2 bam file for + training AUGUSTUS gene models. + """ + p = OptionParser(append.__doc__) + p.add_argument("--prepend", help="Prepend string to read names") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bamfile,) = args + prepend = opts.prepend + + icmd = "samtools view -h {0}".format(bamfile) + bamfile = bamfile.rsplit(".", 1)[0] + ".append.bam" + ocmd = "samtools view -b -@ 64 - -o {0}".format(bamfile) + p = Popen(ocmd, stdin=PIPE) + for row in popen(icmd): + if row[0] == "@": + print(row.strip(), file=p.stdin) + else: + s = SamLine(row) + if prepend: + s.qname = prepend + "_" + s.qname + else: + s.update_readname() + print(s, file=p.stdin) + + +def bed(args): + """ + %prog bed bedfile bamfiles + + Convert bam files to bed. + """ + p = OptionParser(bed.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + bedfile = args[0] + bamfiles = args[1:] + for bamfile in bamfiles: + cmd = "bamToBed -i {0}".format(bamfile) + sh(cmd, outfile=bedfile, append=True) + + +def merge(args): + """ + %prog merge merged_bams bams1_dir bams2_dir ... + + Merge BAM files. Treat the bams with the same prefix as a set. + Output the commands first. + """ + from jcvi.apps.grid import MakeManager + + p = OptionParser(merge.__doc__) + p.set_sep(sep="_", help="Separator to group per prefix") + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + merged_bams = args[0] + bamdirs = args[1:] + + mkdir(merged_bams) + bams = [] + for x in bamdirs: + bams += glob(op.join(x, "*.bam")) + bams = [x for x in bams if "nsorted" not in x] + + logger.debug("Found a total of {0} BAM files.".format(len(bams))) + + sep = opts.sep + key = lambda x: op.basename(x).split(sep)[0] + bams.sort(key=key) + mm = MakeManager() + for prefix, files in groupby(bams, key=key): + files = sorted(list(files)) + nfiles = len(files) + source = " ".join(files) + target = op.join(merged_bams, op.basename(files[0])) + if nfiles == 1: + source = get_abs_path(source) + cmd = "ln -s {0} {1}".format(source, target) + mm.add("", target, cmd) + else: + cmd = "samtools merge -@ 8 {0} {1}".format(target, source) + mm.add(files, target, cmd, remove=True) + mm.write() + + +def count(args): + """ + %prog count bamfile gtf + + Count the number of reads mapped using `htseq-count`. + """ + p = OptionParser(count.__doc__) + p.add_argument("--type", default="exon", help="Only count feature type") + p.set_cpus(cpus=8) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bamfile, gtf = args + cpus = opts.cpus + pf = bamfile.split(".")[0] + countfile = pf + ".count" + if not need_update(bamfile, countfile): + return + + nsorted = pf + "_nsorted" + nsortedbam, nsortedsam = nsorted + ".bam", nsorted + ".sam" + if need_update(bamfile, nsortedsam): + cmd = "samtools sort -@ {0} -n {1} {2}".format(cpus, bamfile, nsorted) + sh(cmd) + cmd = "samtools view -@ {0} -h {1}".format(cpus, nsortedbam) + sh(cmd, outfile=nsortedsam) + + if need_update(nsortedsam, countfile): + cmd = "htseq-count --stranded=no --minaqual=10" + cmd += " -t {0}".format(opts.type) + cmd += " {0} {1}".format(nsortedsam, gtf) + sh(cmd, outfile=countfile) + + +def coverage(args): + """ + %prog coverage fastafile bamfile + + Calculate coverage for BAM file. BAM file will be sorted unless with + --nosort. + """ + p = OptionParser(coverage.__doc__) + p.add_argument( + "--format", + default="bigwig", + choices=("bedgraph", "bigwig", "coverage"), + help="Output format", + ) + p.add_argument( + "--nosort", default=False, action="store_true", help="Do not sort BAM" + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, bamfile = args + format = opts.format + if opts.nosort: + logger.debug("BAM sorting skipped") + else: + bamfile = index([bamfile, "--fasta={0}".format(fastafile)]) + + pf = bamfile.rsplit(".", 2)[0] + sizesfile = Sizes(fastafile).filename + cmd = "genomeCoverageBed -ibam {0} -g {1}".format(bamfile, sizesfile) + if format in ("bedgraph", "bigwig"): + cmd += " -bg" + bedgraphfile = pf + ".bedgraph" + sh(cmd, outfile=bedgraphfile) + + if format == "bedgraph": + return bedgraphfile + + bigwigfile = pf + ".bigwig" + cmd = "bedGraphToBigWig {0} {1} {2}".format(bedgraphfile, sizesfile, bigwigfile) + sh(cmd) + return bigwigfile + + coveragefile = pf + ".coverage" + if need_update(fastafile, coveragefile): + sh(cmd, outfile=coveragefile) + + gcf = GenomeCoverageFile(coveragefile) + fw = must_open(opts.outfile, "w") + for seqid, cov in gcf.iter_coverage_seqid(): + print("\t".join((seqid, "{0:.1f}".format(cov))), file=fw) + fw.close() + + +def fpkm(args): + """ + %prog fpkm fastafile *.bam + + Calculate FPKM values from BAM file. + """ + p = OptionParser(fpkm.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + fastafile = args[0] + bamfiles = args[1:] + # Create a DUMMY gff file for cuffdiff + gffile = fastafile.rsplit(".", 1)[0] + ".gff" + if need_update(fastafile, gffile): + fw = open(gffile, "w") + f = Fasta(fastafile, lazy=True) + for key, size in f.itersizes_ordered(): + print( + "\t".join( + str(x) + for x in ( + key, + "dummy", + "transcript", + 1, + size, + ".", + ".", + ".", + "ID=" + key, + ) + ), + file=fw, + ) + fw.close() + logger.debug("Dummy GFF created: {0}".format(gffile)) + + cmd = "cuffdiff {0} {1}".format(gffile, " ".join(bamfiles)) + sh(cmd) + + +def pairs(args): + """ + See __doc__ for OptionParser.set_pairs(). + """ + import jcvi.formats.bed + + p = OptionParser(pairs.__doc__) + p.set_pairs() + opts, targs = p.parse_args(args) + + if len(targs) != 1: + sys.exit(not p.print_help()) + + (samfile,) = targs + bedfile = samfile.rsplit(".", 1)[0] + ".bed" + if need_update(samfile, bedfile): + cmd = "bamToBed -i {0}".format(samfile) + sh(cmd, outfile=bedfile) + + args[args.index(samfile)] = bedfile + + return jcvi.formats.bed.pairs(args) + + +def consensus(args): + """ + %prog consensus fastafile bamfile + + Convert bam alignments to consensus FASTQ/FASTA. See also: + https://cbc.brown.edu/blog/consensus-vcf/ + """ + valid_callers = ("bcftools", "gatk4") + p = OptionParser(consensus.__doc__) + p.add_argument( + "--nosort", default=False, action="store_true", help="Do not sort the BAM files" + ) + p.add_argument( + "--caller", + default="bcftools", + choices=valid_callers, + help="Use consensus caller", + ) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + fastafile, bamfile = args + pf = bamfile.rsplit(".", 1)[0] + cnsfile = pf + ".cns.fasta" + vcfgzfile = pf + ".vcf.gz" + vcf_args = [fastafile, bamfile, "-o", vcfgzfile] + if opts.nosort: + vcf_args += ["--nosort"] + vcf(vcf_args) + if opts.caller == "bcftools": + cmd = "bcftools consensus -f {} -o {} {}".format(fastafile, cnsfile, vcfgzfile) + else: + cmd = "gatk4 FastaAlternateReferenceMaker -R {} -O {} -V {}".format( + fastafile, cnsfile, vcfgzfile + ) + sh(cmd) + + +def vcf(args): + """ + %prog vcf fastafile bamfiles > out.vcf.gz + + Call SNPs on bam files. + """ + from jcvi.apps.grid import Jobs + + valid_callers = ("mpileup", "freebayes") + p = OptionParser(vcf.__doc__) + p.set_outfile(outfile="out.vcf.gz") + p.add_argument( + "--nosort", default=False, action="store_true", help="Do not sort the BAM files" + ) + p.add_argument( + "--caller", default="mpileup", choices=valid_callers, help="Use variant caller" + ) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + fastafile = args[0] + bamfiles = args[1:] + caller = opts.caller + + unsorted = [x for x in bamfiles if ".sorted." not in x] + if opts.nosort: + bamfiles = unsorted + else: + jargs = [[[x, "--unique"]] for x in unsorted] + jobs = Jobs(index, args=jargs) + jobs.run() + bamfiles = [x.replace(".sorted.bam", ".bam") for x in bamfiles] + bamfiles = [x.replace(".bam", ".sorted.bam") for x in bamfiles] + + if caller == "mpileup": + cmd = "bcftools mpileup -Ou -f" + cmd += " {} {}".format(fastafile, " ".join(bamfiles)) + cmd += " | bcftools call -mv -Oz -o {}".format(opts.outfile) + elif caller == "freebayes": + cmd = "freebayes -f" + cmd += " {} {} > {}".format(fastafile, " ".join(bamfiles), opts.outfile) + sh(cmd) + + cmd = "bcftools index {}".format(opts.outfile) + sh(cmd) + + +def breakpoint(r): + op_prev = None + cum_length = 0 + is_clip = lambda x: x in (4, 5) + rl = sum(l for o, l in r.cigartuples) + for op, length in r.cigartuples: + if is_clip(op) != is_clip(op_prev) and op_prev is not None: + yield rl - cum_length if r.is_reverse else cum_length + op_prev = op + cum_length += length + + +def chimera(args): + """ + %prog chimera bamfile + + Parse BAM file from `bwasw` and list multi-hit reads and breakpoints. + """ + import pysam + from natsort import natsorted + + p = OptionParser(chimera.__doc__) + p.set_verbose() + opts, args = p.parse_args(args) + if len(args) != 1: + sys.exit(not p.print_help()) + + (samfile,) = args + samfile = pysam.AlignmentFile(samfile) + rstore = defaultdict(list) + hstore = defaultdict(int) + for r in samfile.fetch(): + rstore[r.query_name] += list(breakpoint(r)) + hstore[r.query_name] += 1 + if opts.verbose: + print( + r.query_name, + "+-"[r.is_reverse], + sum(l for o, l in r.cigartuples), + r.cigarstring, + list(breakpoint(r)), + file=sys.stderr, + ) + + for rn, bps in natsorted(rstore.items()): + bps = "|".join(str(x) for x in sorted(bps)) if bps else "na" + print("\t".join((rn, str(hstore[rn]), bps))) + + +def index(args): + """ + %prog index samfile/bamfile + + If SAM file, convert to BAM, sort and then index, using SAMTOOLS + """ + p = OptionParser(index.__doc__) + p.add_argument( + "--fasta", dest="fasta", default=None, help="add @SQ header to the BAM file" + ) + p.add_argument( + "--unique", + default=False, + action="store_true", + help="only retain uniquely mapped reads", + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (samfile,) = args + cpus = opts.cpus + fastafile = opts.fasta + if fastafile: + assert op.exists(fastafile) + + bamfile = samfile.replace(".sam", ".bam") + if fastafile: + faifile = fastafile + ".fai" + if need_update(fastafile, faifile): + sh("samtools faidx {0}".format(fastafile)) + cmd = "samtools view -bt {0} {1} -o {2}".format(faifile, samfile, bamfile) + else: + cmd = "samtools view -bS {0} -o {1}".format(samfile, bamfile) + + cmd += " -@ {0}".format(cpus) + if opts.unique: + cmd += " -q 1" + + if samfile.endswith(".sam") and need_update(samfile, bamfile): + sh(cmd) + + # Already sorted? + if bamfile.endswith(".sorted.bam"): + sortedbamfile = bamfile + else: + prefix = bamfile.replace(".bam", "") + sortedbamfile = prefix + ".sorted.bam" + + if need_update(bamfile, sortedbamfile): + cmd = "samtools sort {0} -o {1}".format(bamfile, sortedbamfile) + cmd += " -@ {0}".format(cpus) + sh(cmd) + + baifile = sortedbamfile + ".bai" + if need_update(sortedbamfile, baifile): + sh("samtools index {0}".format(sortedbamfile)) + + return sortedbamfile + + +def mapped(args): + """ + %prog mapped sam/bamfile + + Given an input sam/bam file, output a sam/bam file containing only the mapped reads. + Optionally, extract the unmapped reads into a separate file + """ + import pysam + from jcvi.apps.grid import Jobs + + p = OptionParser(mapped.__doc__) + p.set_sam_options(extra=False) + + opts, args = p.parse_args(args) + if len(args) != 1: + sys.exit(p.print_help()) + + (samfile,) = args + + view_opts = [] + oext, mopts = (".sam", ["-S"]) if samfile.endswith(".sam") else (".bam", []) + + flag, ext = ("-b", ".bam") if opts.bam else ("-h", ".sam") + mopts.append(flag) + + if opts.uniq: + mopts.append("-q1") + ext = ".uniq{0}".format(ext) + + if opts.unmapped: + uopts = [x for x in mopts] + uoutfile = samfile.replace(oext, ".unmapped{0}".format(ext)) + uopts.extend(["-f4", samfile, "-o{0}".format(uoutfile)]) + view_opts.append(uopts) + + outfile = samfile.replace(oext, ".mapped{0}".format(ext)) + mopts.extend(["-F4", samfile, "-o{0}".format(outfile)]) + view_opts.append(mopts) + + for vo in view_opts: + logger.debug("samtools view {0}".format(" ".join(vo))) + + jobs = Jobs(pysam.view, [(z for z in x) for x in view_opts]) + jobs.run() + + +def pair(args): + """ + %prog pair samfile + + Parses the sam file and retrieve in pairs format, + query:pos ref:pos + """ + p = OptionParser(pair.__doc__) + + opts, args = p.parse_args(args) + if len(args) != 1: + sys.exit(p.print_help()) + + def callback(s): + print(s.pairline) + + Sam(args[0], callback=callback) + + +def cigar_to_seq(a, gap="*"): + """ + Accepts a pysam row. + + cigar alignment is presented as a list of tuples (operation,length). For + example, the tuple [ (0,3), (1,5), (0,2) ] refers to an alignment with 3 + matches, 5 insertions and another 2 matches. + + Op BAM Description + M 0 alignment match (can be a sequence match or mismatch) + I 1 insertion to the reference + D 2 deletion from the reference + N 3 skipped region from the reference + S 4 soft clipping (clipped sequences present in SEQ) + H 5 hard clipping (clipped sequences NOT present in SEQ) + P 6 padding (silent deletion from padded reference) + = 7 sequence match + X 8 sequence mismatch + + convert the sequence based on the cigar string. For example: + """ + seq, cigar = a.seq, a.cigar + start = 0 + subseqs = [] + npadded = 0 + if cigar is None: + return None, npadded + + for operation, length in cigar: + end = start if operation == 2 else start + length + + if operation == 0: # match + subseq = seq[start:end] + elif operation == 1: # insertion + subseq = "" + elif operation == 2: # deletion + subseq = gap * length + npadded += length + elif operation == 3: # skipped + subseq = "N" * length + elif operation in (4, 5): # clip + subseq = "" + else: + raise NotImplementedError + + subseqs.append(subseq) + start = end + + return "".join(subseqs), npadded + + +def ace(args): + """ + %prog ace bamfile fastafile + + convert bam format to ace format. This often allows the remapping to be + assessed as a denovo assembly format. bam file needs to be indexed. also + creates a .mates file to be used in amos/bambus, and .astat file to mark + whether the contig is unique or repetitive based on A-statistics in Celera + assembler. + """ + p = OptionParser(ace.__doc__) + p.add_argument( + "--splitdir", + dest="splitdir", + default="outRoot", + help="split the ace per contig to dir", + ) + p.add_argument( + "--unpaired", + dest="unpaired", + default=False, + help="remove read pairs on the same contig", + ) + p.add_argument( + "--minreadno", + dest="minreadno", + default=3, + type=int, + help="minimum read numbers per contig", + ) + p.add_argument( + "--minctgsize", + dest="minctgsize", + default=100, + type=int, + help="minimum contig size per contig", + ) + p.add_argument( + "--astat", + default=False, + action="store_true", + help="create .astat to list repetitiveness", + ) + p.add_argument( + "--readids", + default=False, + action="store_true", + help="create file of mapped and unmapped ids", + ) + + from pysam import Samfile + + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bamfile, fastafile = args + astat = opts.astat + readids = opts.readids + + f = Fasta(fastafile) + prefix = bamfile.split(".")[0] + acefile = prefix + ".ace" + readsfile = prefix + ".reads" + astatfile = prefix + ".astat" + + logger.debug("Load {0}".format(bamfile)) + s = Samfile(bamfile, "rb") + + ncontigs = s.nreferences + genomesize = sum(x for a, x in f.itersizes()) + logger.debug("Total {0} contigs with size {1} base".format(ncontigs, genomesize)) + qual = "20" # default qual + + totalreads = sum(s.count(x) for x in s.references) + logger.debug("Total {0} reads mapped".format(totalreads)) + + fw = open(acefile, "w") + if astat: + astatfw = open(astatfile, "w") + if readids: + readsfw = open(readsfile, "w") + + print("AS {0} {1}".format(ncontigs, totalreads), file=fw) + print(file=fw) + + for i, contig in enumerate(s.references): + cseq = f[contig] + nbases = len(cseq) + + mapped_reads = [x for x in s.fetch(contig) if not x.is_unmapped] + nreads = len(mapped_reads) + + nsegments = 0 + print("CO {0} {1} {2} {3} U".format(contig, nbases, nreads, nsegments), file=fw) + print(fill(str(cseq.seq)), file=fw) + print(file=fw) + + if astat: + astat = Astat(nbases, nreads, genomesize, totalreads) + print("{0}\t{1:.1f}".format(contig, astat), file=astatfw) + + text = fill([qual] * nbases, delimiter=" ", width=30) + print("BQ\n{0}".format(text), file=fw) + print(file=fw) + + rnames = [] + for a in mapped_reads: + readname = a.qname + rname = readname + + if readids: + print(readname, file=readsfw) + rnames.append(rname) + + strand = "C" if a.is_reverse else "U" + paddedstart = a.pos + 1 # 0-based to 1-based + af = "AF {0} {1} {2}".format(rname, strand, paddedstart) + print(af, file=fw) + + print(file=fw) + + for a, rname in zip(mapped_reads, rnames): + aseq, npadded = cigar_to_seq(a) + if aseq is None: + continue + + ninfos = 0 + ntags = 0 + alen = len(aseq) + rd = "RD {0} {1} {2} {3}\n{4}".format( + rname, alen, ninfos, ntags, fill(aseq) + ) + qs = "QA 1 {0} 1 {0}".format(alen) + + print(rd, file=fw) + print(file=fw) + print(qs, file=fw) + print(file=fw) + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/sizes.py b/jcvi/formats/sizes.py new file mode 100644 index 00000000..c2817eaf --- /dev/null +++ b/jcvi/formats/sizes.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +import os.path as op +import sys + +import numpy as np + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + cleanup, + get_abs_path, + logger, + need_update, +) +from .base import LineFile + + +class Sizes(LineFile): + """ + Two-column .sizes file, often generated by `faSize -detailed` + contigID size + """ + + def __init__(self, filename, select=None): + assert op.exists(filename), "File `{0}` not found".format(filename) + + # filename can be both .sizes file or FASTA formatted file + sizesname = filename + + if not filename.endswith(".sizes"): + sizesname = filename + ".sizes" + filename = get_abs_path(filename) + if need_update(filename, sizesname): + from jcvi.formats.fasta import Fasta + + f = Fasta(filename) + with open(sizesname, "w") as fw: + for k, size in f.itersizes_ordered(): + print("\t".join((k, str(size))), file=fw) + + filename = sizesname + + assert filename.endswith(".sizes") + + super().__init__(filename) + self.fp = open(filename) + self.filename = filename + + # get sizes for individual contigs, both in list and dict + # this is to preserve the input order in the sizes file + sizes = list(self.iter_sizes()) + if select: + assert select > 0 + sizes = [x for x in sizes if x[1] >= select] + self.sizes_mapping = dict(sizes) + + # get cumulative sizes, both in list and dict + ctgs, sizes = zip(*sizes) + self.sizes = sizes + cumsizes = np.cumsum([0] + list(sizes)) + self.ctgs = ctgs + self.cumsizes = cumsizes + self.cumsizes_mapping = dict(zip(ctgs, cumsizes)) + + def __len__(self): + return len(self.sizes) + + def get_size(self, ctg): + return self.sizes_mapping[ctg] + + def get_cumsize(self, ctg): + return self.cumsizes_mapping[ctg] + + def close(self, clean=False): + self.fp.close() + if clean: + cleanup(self.filename) + + @property + def mapping(self): + return self.sizes_mapping + + @property + def totalsize(self): + return sum(self.sizes) + + def iter_sizes(self): + self.fp.seek(0) + for row in self.fp: + ctg, size = row.split()[:2] + yield ctg, int(size) + + def iter_names(self): + self.fp.seek(0) + for row in self.fp: + ctg, size = row.split()[:2] + yield ctg + + def get_position(self, ctg, pos): + if ctg not in self.cumsizes_mapping: + return None + return self.cumsizes_mapping[ctg] + pos + + def get_breaks(self): + for i in range(len(self)): + yield self.ctgs[i], self.cumsizes[i], self.cumsizes[i + 1] + + @property + def summary(self): + from jcvi.assembly.base import calculate_A50 + + ctgsizes = self.sizes + a50, l50, n50 = calculate_A50(ctgsizes) + return sum(ctgsizes), l50, n50 + + +def main(): + + actions = ( + ("agp", "write to AGP format from sizes file"), + ("extract", "extract the lines containing only the given IDs"), + ("histogram", "plot read/contig length distribution"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def histogram(args): + """ + %prog histogram [reads.fasta|reads.fastq] + + Plot read length distribution for reads. The plot would be similar to the + one generated by SMRT-portal, for example: + + http://blog.pacificbiosciences.com/2013/10/data-release-long-read-shotgun.html + + Plot has two axes - corresponding to pdf and cdf, respectively. Also adding + number of reads, average/median, N50, and total length. + """ + from jcvi.utils.cbook import human_size, thousands, SUFFIXES + from jcvi.formats.fastq import fasta + from jcvi.graphics.histogram import stem_leaf_plot + from jcvi.graphics.base import ( + plt, + markup, + human_formatter, + human_base_formatter, + savefig, + set2, + set_ticklabels_helvetica, + ) + + p = OptionParser(histogram.__doc__) + p.set_histogram( + vmax=50000, bins=100, xlabel="Read length", title="Read length distribution" + ) + p.add_argument("--ylabel1", default="Counts", help="Label of y-axis on the left") + p.add_argument( + "--color", + default="0", + choices=[str(x) for x in range(8)], + help="Color of bars, which is an index 0-7 in brewer set2", + ) + opts, args, iopts = p.set_image_options(args, figsize="6x6", style="dark") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + fastafile, qualfile = fasta([fastafile, "--seqtk"]) + sizes = Sizes(fastafile) + all_sizes = sorted(sizes.sizes) + xmin, xmax, bins = opts.vmin, opts.vmax, opts.bins + left, height = stem_leaf_plot(all_sizes, xmin, xmax, bins) + + plt.figure(1, (iopts.w, iopts.h)) + ax1 = plt.gca() + + width = (xmax - xmin) * 0.5 / bins + color = set2[int(opts.color)] + ax1.bar(left, height, width=width, linewidth=0, fc=color, align="center") + ax1.set_xlabel(markup(opts.xlabel)) + ax1.set_ylabel(opts.ylabel1) + + ax2 = ax1.twinx() + cur_size = 0 + total_size, l50, n50 = sizes.summary + cdf = {} + hsize = human_size(total_size) + tag = hsize[-2:] + unit = 1000 ** SUFFIXES[1000].index(tag) + + for x in all_sizes: + if x not in cdf: + cdf[x] = (total_size - cur_size) * 1.0 / unit + cur_size += x + x, y = zip(*sorted(cdf.items())) + ax2.plot(x, y, "-", color="darkslategray") + ylabel2 = "{0} above read length".format(tag) + ax2.set_ylabel(ylabel2) + + for ax in (ax1, ax2): + set_ticklabels_helvetica(ax) + ax.set_xlim((xmin - width / 2, xmax + width / 2)) + + tc = "gray" + axt = ax1.transAxes + xx, yy = 0.95, 0.95 + ma = "Total bases: {0}".format(hsize) + mb = "Total reads: {0}".format(thousands(len(sizes))) + mc = "Average read length: {0}bp".format(thousands(np.mean(all_sizes))) + md = "Median read length: {0}bp".format(thousands(np.median(all_sizes))) + me = "N50 read length: {0}bp".format(thousands(l50)) + for t in (ma, mb, mc, md, me): + print(t, file=sys.stderr) + ax1.text(xx, yy, t, color=tc, transform=axt, ha="right") + yy -= 0.05 + + ax1.set_title(markup(opts.title)) + # Seaborn removes ticks for all styles except 'ticks'. Now add them back: + ax1.tick_params( + axis="x", + direction="out", + length=3, + left=False, + right=False, + top=False, + bottom=True, + ) + ax1.xaxis.set_major_formatter(human_base_formatter) + ax1.yaxis.set_major_formatter(human_formatter) + figname = sizes.filename + ".pdf" + savefig(figname) + + +def extract(args): + """ + %prog extract idsfile sizesfile + + Extract the lines containing only the given IDs. + """ + p = OptionParser(extract.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + idsfile, sizesfile = args + sizes = Sizes(sizesfile).mapping + fp = open(idsfile) + for row in fp: + name = row.strip() + size = sizes[name] + print("\t".join(str(x) for x in (name, size))) + + +def agp(args): + """ + %prog agp + + Convert the sizes file to a trivial AGP file. + """ + from jcvi.formats.agp import OO + + p = OptionParser(agp.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (sizesfile,) = args + sizes = Sizes(sizesfile) + agpfile = sizes.filename.rsplit(".", 1)[0] + ".agp" + fw = open(agpfile, "w") + o = OO() # Without a filename + for ctg, size in sizes.iter_sizes(): + o.add(ctg, ctg, size) + + o.write_AGP(fw) + fw.close() + logger.debug("AGP file written to `%s`.", agpfile) + + return agpfile + + +if __name__ == "__main__": + main() diff --git a/jcvi/formats/vcf.py b/jcvi/formats/vcf.py new file mode 100644 index 00000000..7b675713 --- /dev/null +++ b/jcvi/formats/vcf.py @@ -0,0 +1,849 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Variant call format. +""" +import os.path as op +import sys + +from collections import defaultdict +from itertools import groupby +from pyfaidx import Fasta +from pyliftover import LiftOver + +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh +from ..utils.cbook import percentage + +from .base import must_open +from .sizes import Sizes + + +class VcfLine: + def __init__(self, row): + args = row.strip().split("\t") + self.seqid = args[0] + self.pos = int(args[1]) + self.rsid = args[2] + self.ref = args[3] + self.alt = args[4] + self.qual = args[5] + self.filter = args[6] + self.info = args[7] + self.format = args[8] + self.genotype = args[9] + + def __str__(self): + return "\t".join( + str(x) + for x in ( + self.seqid, + self.pos, + self.rsid, + self.ref, + self.alt, + self.qual, + self.filter, + self.info, + self.format, + self.genotype, + ) + ) + + +class UniqueLiftover(object): + def __init__(self, chainfile): + """ + This object will perform unique single positional liftovers - it will only lift over chromosome positions that + map unique to the new genome and if the strand hasn't changed. + Note: You should run a VCF Normalization sweep on all lifted ofer CPRAs to check for variants that need to be + re-normalized, and to remove variants where the REF now doesn't match after a liftover. + The combination of these steps will ensure high quality liftovers. However, it should be noted that this won't + prevent the situation where multiple positions in the old genome pile up uniquely in the new genome, so one + needs to check for this. + It's organised as an object rather than a collection of functions so that the LiftOver chainfile + only gets opened/passed once and not for every position to be lifted over. + :param chainfile: A string containing the path to the local UCSC .gzipped chainfile + :return: + """ + + self.liftover = LiftOver(chainfile) + + def liftover_cpra(self, chromosome, position, verbose=False): + """ + Given chromosome, position in 1-based co-ordinates, + This will use pyliftover to liftover a CPRA, will return a (c,p) tuple or raise NonUniqueLiftover if no unique + and strand maintaining liftover is possible + :param chromosome: string with the chromosome as it's represented in the from_genome + :param position: position on chromosome (will be cast to int) + :param verbose: print verbose information for debugging + :return: ((str) chromosome, (int) position) or None if no liftover + """ + + chromosome = str(chromosome) + position = int(position) + + # Perform the liftover lookup, shift the position by 1 as pyliftover deals in 0-based co-ords + new = self.liftover.convert_coordinate(chromosome, position - 1) + # This has to be here as new will be NoneType when the chromosome doesn't exist in the chainfile + if new: + # If the liftover is unique + if len(new) == 1: + # If the liftover hasn't changed strand + if new[0][2] == "+": + # Set the co-ordinates to the lifted-over ones and write out + new_chromosome = str(new[0][0]) + # Shift the position forward by one to convert back to a 1-based co-ords + new_position = int(new[0][1]) + 1 + return new_chromosome, new_position + else: + exception_string = ( + "{},{} has a flipped strand in liftover: {}".format( + chromosome, position, new + ) + ) + else: + exception_string = "{},{} lifts over to multiple positions: {}".format( + chromosome, position, new + ) + elif new is None: + exception_string = "Chromosome '{}' provided not in chain file".format( + chromosome + ) + + if verbose: + logger.error(exception_string) + return None, None + + +CM = dict( + list( + zip([str(x) for x in range(1, 23)], ["chr{0}".format(x) for x in range(1, 23)]) + ) + + [("X", "chrX"), ("Y", "chrY"), ("MT", "chrM")] +) + + +def main(): + + actions = ( + ("from23andme", "convert 23andme file to vcf file"), + ("fromimpute2", "convert impute2 output to vcf file"), + ("liftover", "lift over coordinates in vcf file"), + ("location", "given SNP locations characterize the locations"), + ("mstmap", "convert vcf format to mstmap input"), + ("refallele", "make refAllele file"), + ("sample", "sample subset of vcf file"), + ("summary", "summarize the genotype calls in table"), + ("uniq", "retain only the first entry in vcf file"), + ("validate", "fast validation of vcf file"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def validate(args): + """ + %prog validate input.vcf genome.fasta + + Fasta validation of vcf file. + """ + import pyfasta + + p = OptionParser(validate.__doc__) + p.add_argument("--prefix", help="Add prefix to seqid") + opts, args = p.parse_args(args) + + vcffile, fastafile = args + pf = opts.prefix + genome = pyfasta.Fasta(fastafile, record_class=pyfasta.MemoryRecord) + fp = must_open(vcffile) + match_ref = match_alt = total = 0 + for row in fp: + if row[0] == "#": + continue + seqid, pos, id, ref, alt = row.split()[:5] + total += 1 + if pf: + seqid = pf + seqid + pos = int(pos) + if seqid not in genome: + continue + true_ref = genome[seqid][pos - 1] + if total % 100000 == 0: + print(total, "sites parsed", file=sys.stderr) + if ref == true_ref: + match_ref += 1 + elif alt == true_ref: + match_alt += 1 + + logger.debug("Match REF: {}".format(percentage(match_ref, total))) + logger.debug("Match ALT: {}".format(percentage(match_alt, total))) + + +def uniq(args): + """ + %prog uniq vcffile + + Retain only the first entry in vcf file. + """ + from urllib.parse import parse_qs + + p = OptionParser(uniq.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (vcffile,) = args + fp = must_open(vcffile) + data = [] + for row in fp: + if row[0] == "#": + print(row.strip()) + continue + v = VcfLine(row) + data.append(v) + + for pos, vv in groupby(data, lambda x: x.pos): + vv = list(vv) + if len(vv) == 1: + print(vv[0]) + continue + bestv = max(vv, key=lambda x: float(parse_qs(x.info)["R2"][0])) + print(bestv) + + +def sample(args): + """ + %prog sample vcffile 0.9 + + Sample subset of vcf file. + """ + from random import random + + p = OptionParser(sample.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + vcffile, ratio = args + ratio = float(ratio) + fp = open(vcffile) + pf = vcffile.rsplit(".", 1)[0] + kept = pf + ".kept.vcf" + withheld = pf + ".withheld.vcf" + fwk = open(kept, "w") + fww = open(withheld, "w") + nkept = nwithheld = 0 + for row in fp: + if row[0] == "#": + print(row.strip(), file=fwk) + continue + if random() < ratio: + nkept += 1 + print(row.strip(), file=fwk) + else: + nwithheld += 1 + print(row.strip(), file=fww) + logger.debug("{0} records kept to `{1}`".format(nkept, kept)) + logger.debug("{0} records withheld to `{1}`".format(nwithheld, withheld)) + + +def get_vcfstanza(fastafile, sampleid="SAMP_001"): + from jcvi.formats.base import timestamp + + # VCF spec + m = "##fileformat=VCFv4.1\n" + m += "##fileDate={0}\n".format(timestamp()) + m += "##source={0}\n".format(__file__) + m += "##reference=file://{0}\n".format(op.abspath(fastafile).strip("/")) + m += '##INFO=\n' + m += '##INFO=\n' + m += '##FORMAT=\n' + m += '##FORMAT=\n' + header = "CHROM POS ID REF ALT QUAL FILTER INFO FORMAT\n".split() + [sampleid] + m += "#" + "\t".join(header) + return m + + +def fromimpute2(args): + """ + %prog fromimpute2 impute2file fastafile 1 + + Convert impute2 output to vcf file. Imputed file looks like: + + --- 1:10177:A:AC 10177 A AC 0.451 0.547 0.002 + """ + p = OptionParser(fromimpute2.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + impute2file, fastafile, chr = args + fasta = Fasta(fastafile) + print(get_vcfstanza(fastafile)) + fp = open(impute2file) + seen = set() + for row in fp: + snp_id, rsid, pos, ref, alt, aa, ab, bb = row.split() + pos = int(pos) + if pos in seen: + continue + seen.add(pos) + code = max((float(aa), "0/0"), (float(ab), "0/1"), (float(bb), "1/1"))[-1] + tag = "PR" if snp_id == chr else "IM" + print( + "\t".join( + str(x) + for x in ( + chr, + pos, + rsid, + ref, + alt, + ".", + ".", + tag, + "GT:GP", + code + ":" + ",".join((aa, ab, bb)), + ) + ) + ) + + +def read_rsid(seqid, legend): + if seqid in ["Y", "MT"]: + return {} + # Read rsid + fp = open(legend) + # rs145072688:10352:T:TA + register = {} + for row in fp: + atoms = row.strip().split(":") + if len(atoms) == 4: + rsid, pos, ref, alt = atoms + else: + continue + pos = int(pos) + # Use position for non-rsid + rsids = [pos] if rsid == seqid else [rsid, pos] + for rsid in rsids: + if rsid in register: + pos1, ref1, alt1 = register[rsid] + if alt not in alt1: + register[rsid][-1].append(alt) + else: + register[rsid] = (pos, ref, [alt]) + logger.debug( + "A total of {0} sites imported from `{1}`".format(len(register), legend) + ) + return register + + +def from23andme(args): + """ + %prog from23andme txtfile 1 + + Convert from23andme file to vcf file. + + --ref points to the folder that contains chr1.rsids + + $ zcat 1000GP_Phase3/1000GP_Phase3_chr1.legend.gz \\ + | cut -d" " -f1 | grep ":" > chr1.rsids + """ + p = OptionParser(from23andme.__doc__) + p.set_ref() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + txtfile, seqid = args + ref_dir = opts.ref + fastafile = op.join(ref_dir, "hs37d5.fa") + fasta = Fasta(fastafile) + + pf = txtfile.rsplit(".", 1)[0] + px = CM[seqid] + chrvcf = pf + ".{0}.vcf".format(px) + legend = op.join(ref_dir, "1000GP_Phase3/{0}.rsids".format(px)) + register = read_rsid(seqid, legend) + + fw = open(chrvcf, "w") + print(get_vcfstanza(fastafile, txtfile), file=fw) + + fp = open(txtfile) + seen = set() + duplicates = skipped = missing = 0 + for row in fp: + if row[0] == "#": + continue + rsid, chr, pos, genotype = row.split() + if chr != seqid: + continue + pos = int(pos) + if (chr, pos) in seen: + duplicates += 1 + continue + seen.add((chr, pos)) + genotype = list(genotype) + if "-" in genotype: # missing daa + missing += 1 + continue + + # Y or MT + if not register: + assert len(genotype) == 1 + ref = fasta[chr][pos - 1].seq.upper() + if "D" in genotype or "I" in genotype: + skipped += 1 + continue + genotype = genotype[0] + code = "0/0" if ref == genotype else "1/1" + alt = "." if ref == genotype else genotype + print( + "\t".join( + str(x) + for x in (chr, pos, rsid, ref, alt, ".", ".", "PR", "GT", code) + ), + file=fw, + ) + continue + + # If rsid is seen in the db, use that + if rsid in register: + pos, ref, alt = register[rsid] + elif pos in register: + pos, ref, alt = register[pos] + else: + skipped += 1 # Not in reference panel + continue + + assert fasta[chr][pos - 1 : pos + len(ref) - 1].seq.upper() == ref + # Keep it bi-allelic + not_seen = [x for x in alt if x not in genotype] + while len(alt) > 1 and not_seen: + alt.remove(not_seen.pop()) + if len(alt) > 1: + alt = [alt[0]] + alleles = [ref] + alt + + if len(genotype) == 1: + genotype = [genotype[0]] * 2 + + alt = ",".join(alt) or "." + if "D" in genotype or "I" in genotype: + max_allele = max((len(x), x) for x in alleles)[1] + alleles = [("I" if x == max_allele else "D") for x in alleles] + assert "I" in alleles and "D" in alleles + a, b = genotype + try: + ia, ib = alleles.index(a), alleles.index(b) + except ValueError: # alleles not seen + logger.error( + "{0}: alleles={1}, genotype={2}".format(rsid, alleles, genotype) + ) + skipped += 1 + continue + code = "/".join(str(x) for x in sorted((ia, ib))) + + print( + "\t".join( + str(x) for x in (chr, pos, rsid, ref, alt, ".", ".", "PR", "GT", code) + ), + file=fw, + ) + + logger.debug( + "duplicates={0} skipped={1} missing={2}".format(duplicates, skipped, missing) + ) + + +def refallele(args): + """ + %prog refallele vcffile > out.refAllele + + Make refAllele file which can be used to convert PLINK file to VCF file. + """ + p = OptionParser(refallele.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (vcffile,) = args + fp = open(vcffile) + for row in fp: + if row[0] == "#": + continue + atoms = row.split() + marker = "{0}:{1}".format(*atoms[:2]) + ref = atoms[3] + print("\t".join((marker, ref))) + + +def location(args): + """ + %prog location bedfile fastafile + + Given SNP locations, summarize the locations in the sequences. For example, + find out if there are more 3`-SNPs than 5`-SNPs. + """ + from jcvi.formats.bed import BedLine + from jcvi.graphics.histogram import stem_leaf_plot + + p = OptionParser(location.__doc__) + p.add_argument( + "--dist", + default=100, + type=int, + help="Distance cutoff to call 5` and 3`", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, fastafile = args + dist = opts.dist + sizes = Sizes(fastafile).mapping + fp = open(bedfile) + fiveprime = threeprime = total = 0 + percentages = [] + for row in fp: + b = BedLine(row) + pos = b.start + size = sizes[b.seqid] + if pos < dist: + fiveprime += 1 + if size - pos < dist: + threeprime += 1 + total += 1 + percentages.append(100 * pos / size) + + m = "Five prime (within {0}bp of start codon): {1}\n".format(dist, fiveprime) + m += "Three prime (within {0}bp of stop codon): {1}\n".format(dist, threeprime) + m += "Total: {0}".format(total) + print(m, file=sys.stderr) + + bins = 10 + title = "Locations within the gene [0=Five-prime, 100=Three-prime]" + stem_leaf_plot(percentages, 0, 100, bins, title=title) + + +def summary(args): + """ + %prog summary txtfile fastafile + + The txtfile can be generated by: %prog mstmap --noheader --freq=0 + + Tabulate on all possible combinations of genotypes and provide results + in a nicely-formatted table. Give a fastafile for SNP rate (average + # of SNPs per Kb). + + Only three-column file is supported: + locus_id intra- genotype inter- genotype + """ + from jcvi.utils.cbook import thousands + from jcvi.utils.table import tabulate + + p = OptionParser(summary.__doc__) + p.add_argument("--counts", help="Print SNP counts in a txt file") + p.add_argument("--bed", help="Print SNPs locations in a bed file") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + txtfile, fastafile = args + bedfw = open(opts.bed, "w") if opts.bed else None + + fp = open(txtfile) + header = next(fp).split() # Header + snps = defaultdict(list) # contig => list of loci + combinations = defaultdict(int) + intraSNPs = interSNPs = 0 + distinctSet = set() # set of genes that show A-B pattern + ref, alt = header[1:3] + snpcounts, goodsnpcounts = defaultdict(int), defaultdict(int) + for row in fp: + atoms = row.split() + assert len(atoms) == 3, "Only three-column file is supported" + locus, intra, inter = atoms + ctg, pos = locus.rsplit(".", 1) + pos = int(pos) + snps[ctg].append(pos) + snpcounts[ctg] += 1 + + if intra == "X": + intraSNPs += 1 + if inter in ("B", "X"): + interSNPs += 1 + if intra == "A" and inter == "B": + distinctSet.add(ctg) + goodsnpcounts[ctg] += 1 + # Tabulate all possible combinations + intra = ref + "-" + intra + inter = alt + "-" + inter + combinations[(intra, inter)] += 1 + + if bedfw: + print("\t".join(str(x) for x in (ctg, pos - 1, pos, locus)), file=bedfw) + + if bedfw: + logger.debug("SNP locations written to `{0}`.".format(opts.bed)) + bedfw.close() + + nsites = sum(len(x) for x in snps.values()) + sizes = Sizes(fastafile) + bpsize = sizes.totalsize + snprate = lambda a: a * 1000.0 / bpsize + m = "Dataset `{0}` contains {1} contigs ({2} bp).\n".format( + fastafile, len(sizes), thousands(bpsize) + ) + m += "A total of {0} SNPs within {1} contigs ({2} bp).\n".format( + nsites, len(snps), thousands(sum(sizes.mapping[x] for x in snps.keys())) + ) + m += "SNP rate: {0:.1f}/Kb, ".format(snprate(nsites)) + m += "IntraSNPs: {0} ({1:.1f}/Kb), InterSNPs: {2} ({3:.1f}/Kb)".format( + intraSNPs, snprate(intraSNPs), interSNPs, snprate(interSNPs) + ) + print(m, file=sys.stderr) + print(tabulate(combinations), file=sys.stderr) + + leg = "Legend: A - homozygous same, B - homozygous different, X - heterozygous" + print(leg, file=sys.stderr) + + tag = (ref + "-A", alt + "-B") + distinctSNPs = combinations[tag] + tag = str(tag).replace("'", "") + print( + "A total of {0} disparate {1} SNPs in {2} contigs.".format( + distinctSNPs, tag, len(distinctSet) + ), + file=sys.stderr, + ) + + if not opts.counts: + return + + snpcountsfile = opts.counts + fw = open(snpcountsfile, "w") + header = "\t".join(("Contig", "#_SNPs", "#_AB_SNP")) + print(header, file=fw) + + assert sum(snpcounts.values()) == nsites + assert sum(goodsnpcounts.values()) == distinctSNPs + + for ctg in sorted(snps.keys()): + snpcount = snpcounts[ctg] + goodsnpcount = goodsnpcounts[ctg] + print("\t".join(str(x) for x in (ctg, snpcount, goodsnpcount)), file=fw) + + fw.close() + logger.debug("SNP counts per contig is written to `{0}`.".format(snpcountsfile)) + + +g2x = {"0/0": "A", "0/1": "X", "1/1": "B", "./.": "-", ".": "-"} + + +def encode_genotype(s, mindepth=3, depth_index=2, nohet=False): + """ + >>> encode_genotype("1/1:128,18,0:6:18") # homozygote B + 'B' + >>> encode_genotype("0/1:0,0,0:0:3") # missing data + '-' + >>> encode_genotype("0/1:128,0,26:7:22") # heterozygous A/B + 'X' + """ + atoms = s.split(":") + if len(atoms) < 3: + return g2x[atoms[0]] + + inferred = atoms[0] + depth = int(atoms[depth_index]) + if depth < mindepth: + return "-" + if inferred == "0/0": + return "A" + if inferred == "0/1": + return "-" if nohet else "X" + if inferred == "1/1": + return "B" + return "-" + + +def mstmap(args): + """ + %prog mstmap bcffile/vcffile > matrixfile + + Convert bcf/vcf format to mstmap input. + """ + from jcvi.assembly.geneticmap import MSTMatrix + + p = OptionParser(mstmap.__doc__) + p.add_argument( + "--dh", + default=False, + action="store_true", + help="Double haploid population, no het", + ) + p.add_argument( + "--freq", + default=0.2, + type=float, + help="Allele must be above frequency", + ) + p.add_argument( + "--mindepth", + default=3, + type=int, + help="Only trust genotype calls with depth", + ) + p.add_argument( + "--missing_threshold", + default=0.25, + type=float, + help="Fraction missing must be below", + ) + p.add_argument( + "--noheader", + default=False, + action="store_true", + help="Do not print MSTmap run parameters", + ) + p.add_argument( + "--pv4", + default=False, + action="store_true", + help="Enable filtering strand-bias, tail distance bias, etc.", + ) + p.add_argument( + "--freebayes", + default=False, + action="store_true", + help="VCF output from freebayes", + ) + p.set_sep(sep=".", help="Use separator to simplify individual names") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (vcffile,) = args + if vcffile.endswith(".bcf"): + bcffile = vcffile + vcffile = bcffile.rsplit(".", 1)[0] + ".vcf" + cmd = "bcftools view {0}".format(bcffile) + cmd += " | vcfutils.pl varFilter" + if not opts.pv4: + cmd += " -1 0 -2 0 -3 0 -4 0 -e 0" + if need_update(bcffile, vcffile): + sh(cmd, outfile=vcffile) + + freq = opts.freq + sep = opts.sep + depth_index = 1 if opts.freebayes else 2 + + ptype = "DH" if opts.dh else "RIL6" + nohet = ptype == "DH" + fp = open(vcffile) + genotypes = [] + for row in fp: + if row[:2] == "##": + continue + atoms = row.split() + if row[0] == "#": + ind = [x.split(sep)[0] for x in atoms[9:]] + nind = len(ind) + mh = ["locus_name"] + ind + continue + + marker = "{0}.{1}".format(*atoms[:2]) + + geno = atoms[9:] + geno = [ + encode_genotype( + x, mindepth=opts.mindepth, depth_index=depth_index, nohet=nohet + ) + for x in geno + ] + assert len(geno) == nind + f = 1.0 / nind + + if geno.count("A") * f < freq: + continue + if geno.count("B") * f < freq: + continue + if geno.count("-") * f > opts.missing_threshold: + continue + + genotype = [marker] + geno + genotypes.append(genotype) + + mm = MSTMatrix(genotypes, mh, ptype, opts.missing_threshold) + mm.write(opts.outfile, header=(not opts.noheader)) + + +def liftover(args): + """ + %prog liftover old.vcf hg19ToHg38.over.chain.gz new.vcf + + Lift over coordinates in vcf file. + """ + p = OptionParser(liftover.__doc__) + p.add_argument( + "--newid", default=False, action="store_true", help="Make new identifiers" + ) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + oldvcf, chainfile, newvcf = args + ul = UniqueLiftover(chainfile) + num_excluded = 0 + fp = open(oldvcf) + fw = open(newvcf, "w") + for row in fp: + row = row.strip() + if row[0] == "#": + if row.startswith("##source="): + row = "##source={0}".format(__file__) + elif row.startswith("##reference="): + row = "##reference=hg38" + elif row.startswith("##contig="): + continue + print(row.strip(), file=fw) + continue + + v = VcfLine(row) + # GRCh37.p2 has the same MT sequence as hg38 (but hg19 is different) + if v.seqid == "MT": + v.seqid = "chrM" + print(v, file=fw) + continue + + try: + new_chrom, new_pos = ul.liftover_cpra(CM[v.seqid], v.pos) + except: + num_excluded += 1 + continue + + if new_chrom is not None and new_pos is not None: + v.seqid, v.pos = new_chrom, new_pos + if opts.newid: + v.rsid = "{0}:{1}".format(new_chrom.replace("chr", ""), new_pos) + print(v, file=fw) + else: + num_excluded += 1 + + logger.debug("Excluded {0}".format(num_excluded)) + + +if __name__ == "__main__": + main() diff --git a/jcvi/graphics/__init__.py b/jcvi/graphics/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/graphics/__main__.py b/jcvi/graphics/__main__.py new file mode 100644 index 00000000..41088d82 --- /dev/null +++ b/jcvi/graphics/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Suite of visualization tools for dot-plots, histograms, karytotypes, macro-/micro-synteny plots, seed counting using GRABSEEDS, etc. +""" + +from ..apps.base import dmain + + +if __name__ == "__main__": + dmain(__file__) diff --git a/jcvi/graphics/align.py b/jcvi/graphics/align.py new file mode 100644 index 00000000..a8ab05a7 --- /dev/null +++ b/jcvi/graphics/align.py @@ -0,0 +1,554 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog demo + +Illustrate three different types of alignments. +- Pairwise sequence alignment, aka, "dot plot" +- Read alignment, similar to the visualization of a BAM file +- Optical map alignment, matchings between restriction fragments +""" + + +import sys + +from bisect import bisect +from random import choice, randint + +from more_itertools import pairwise + +from ..apps.base import OptionParser +from ..utils.range import range_overlap + +from .base import FancyArrow, Rectangle, plt, savefig, normalize_axes +from .chromosome import Chromosome, HorizontalChromosome +from .glyph import BaseGlyph, GeneGlyph + + +class BaseAlign(object): + def __init__(self, fig, xywh, xpad=0, ypad=0, xmax=100): + x, y, w, h = xywh + self.ax = fig.add_axes(xywh) + self.sax = fig.add_axes( + [x + xpad * w, y + ypad * h, (1 - 2 * xpad) * w, (1 - 2 * ypad) * h] + ) + self.amax = self.bmax = xmax + self.a = [(1, xmax)] + self.b = [(1, xmax)] + self.apatch = self.bpatch = None + self.apatchcolor = self.bpatchcolor = "darkslategrey" + self.xpad = xpad + self.ypad = ypad + self.canvas = 1 - 2 * xpad + + def convert(self, pos, xmax): + return self.xpad + pos * self.canvas / xmax + + def invert(self, a, b): + self.a = [(1, a), (a, b), (b, self.amax)] + self.b = [(1, a), (b, a), (b, self.bmax)] + self.apatch = (self.convert(a, self.amax), self.convert(b, self.amax)) + self.bpatch = (self.convert(a, self.bmax), self.convert(b, self.bmax)) + self.bpatchcolor = "y" + + def delete(self, a, b): + self.bmax -= b - a + self.a = [(1, a), (b, self.amax)] + self.b = [(1, a), (a, self.bmax)] + self.apatch = (self.convert(a, self.amax), self.convert(b, self.amax)) + + def duplicate(self, a, b, gap=0): + self.bmax += b - a + gap + self.a = [(1, b), (a, self.amax)] + self.b = [(1, b), (b + gap, self.bmax)] + self.apatch = (self.convert(a, self.amax), self.convert(b, self.amax)) + self.bpatch = ( + self.convert(a, self.bmax), + self.convert(b, self.bmax), + self.convert(b + gap, self.bmax), + self.convert(2 * b - a + gap, self.bmax), + ) + self.bpatchcolor = "tomato" + + +class PairwiseAlign(BaseAlign): + def __init__(self, fig, xywh, xpad=0.15, ypad=0.15): + super().__init__(fig, xywh, xpad, ypad) + + def draw(self, width=0.03): + HorizontalChromosome( + self.ax, + self.xpad, + 1 - self.xpad, + self.ypad - 0.05, + height=width * 1.5, + patch=self.apatch, + lw=2, + ) + Chromosome( + self.ax, + self.xpad - 0.05, + self.ypad, + 1 - self.ypad, + width=width, + patch=self.bpatch, + patchcolor=self.bpatchcolor, + lw=2, + ) + for a, b in zip(self.a, self.b): + self.sax.plot(a, b, "-", color="darkslategrey", lw=2) + self.sax.set_xticklabels([]) + self.sax.set_yticklabels([]) + self.sax.set_xlim((1, self.amax)) + self.sax.set_ylim((1, self.bmax)) + normalize_axes(self.ax) + + +class ReadAlign(BaseAlign): + def __init__(self, fig, xywh, xpad=0.05, ypad=0.2, readlen=6, gap=3): + super().__init__(fig, xywh, xpad, ypad) + self.readlen = readlen + self.gap = gap + self.reads = [] + self.ymax = 12 + self.ntracks = 0 + self.layout(1, self.amax) + + def layout(self, start, end, maxtracks=8): + readrange = 2 * self.readlen + self.gap + end -= readrange + assert start < end, "end must be > start + readlen" + reads = [] + for x in range(100): + pos = randint(start, end) + reads.append(PairedRead(pos, readlen=self.readlen, gap=self.gap)) + reads, ntracks = self.arrange(reads, self.ntracks, maxtracks=maxtracks) + self.reads += reads + self.ntracks += ntracks + + def arrange(self, reads, ntracks, maxtracks=8): + track_ends = [0] + reads.sort(key=lambda x: x.start) + for r in reads: + m = min(track_ends) + mi = track_ends.index(m) + if r.start > m + 0.005: + track_ends[mi] = r.end + else: + if len(track_ends) >= maxtracks: + continue + track_ends.append(r.end) + mi = len(track_ends) - 1 + r.set_y(ntracks + mi) + ntracks = len(track_ends) + reads = [x for x in reads if x.y is not None] + return reads, ntracks + + def remove(self, a, b, maxtracks=0): + self.reads = [ + r + for r in self.reads + if not (a <= r.start <= b and a <= r.end <= b and r.y >= maxtracks) + ] + + def draw(self, width=0.03): + HorizontalChromosome( + self.ax, + self.xpad, + 1 - self.xpad, + self.ypad - width / 2, + height=width * 1.5, + patch=self.apatch, + lw=2, + ) + for r in self.reads: + r.draw(self.sax) + self.sax.set_xlim((1, self.amax)) + self.sax.set_ylim((-1, self.ymax)) + normalize_axes(self.ax) + self.sax.set_axis_off() + + def highlight(self, a, b): + self.apatch = (self.convert(a, self.amax), self.convert(b, self.amax)) + self.sax.plot((a, a), (-1, self.ntracks), "m-", lw=2) + self.sax.plot((b, b), (-1, self.ntracks), "m-", lw=2) + + def invert(self, a, b): + reads = [] + for r in self.reads: + r.set_y(None) + keep = True + if r.start < a < r.end or r.start < b < r.end: + adist, bdist = abs(a - r.mid), abs(b - r.mid) + flipr = r.r2 if adist > bdist else r.r1 + flipr.x1 = a + b - flipr.x1 + flipr.x2 = a + b - flipr.x2 + flipr.color = "y" + if adist > self.gap and bdist > self.gap: + keep = False + if keep: + reads.append(r) + self.reads, self.ntracks = self.arrange(reads, 0) + self.highlight(a, b) + + def delete(self, a, b): + self.remove(a, b) + for r in self.reads: + r.breakpoint(a, "g", "lightgrey") + r.breakpoint(b, "lightgrey", "g") + self.highlight(a, b) + + def duplicate(self, a, b, gap=0): + self.layout(1, self.amax, maxtracks=4) + self.remove(1, a, maxtracks=6) + self.remove(b, self.amax, maxtracks=6) + for r in self.reads: + r.paint(a, b, "tomato") + r.breakpoint(a, "k", "tomato") + r.breakpoint(b, "tomato", "k") + r.breakpoint(a, "lightgrey", "tomato", ystart=6) + r.breakpoint(b, "tomato", "lightgrey", ystart=6) + self.highlight(a, b) + + +class OpticalMapAlign(BaseAlign): + def __init__(self, fig, xywh, xpad=0.05, ypad=0.3): + super().__init__(fig, xywh, xpad, ypad) + om = self.from_silico() + self.om1 = OpticalMapTrack(self.sax, om) + self.om2 = OpticalMapTrack(self.sax, om, ystart=-3, color="orange") + + def from_silico(self, filename="Ecoli.silico", nfrags=25): + fp = open(filename) + next(fp) + ar = [0] + [int(x) for x in next(fp).split()] + sizes = [] # Only retain frags beyond certain size + for a, b in pairwise(ar): + size = b - a + if size < max(ar[:nfrags]) / 100: + continue + sizes.append(size) + + sizes = [choice(sizes) for x in range(nfrags)] + return sizes + + def draw(self): + self.om1.draw() + self.om2.draw() + self.sax.set_xlim(0, self.om1.amax) + self.sax.set_ylim(-8, 8) + normalize_axes(self.ax) + self.sax.set_axis_off() + + def invert(self, a, b): + ai, bi = self.om2.invert(a, b) + self.om1.highlight(ai, bi, "lightslategrey") + self.om2.highlight(ai, bi, "y", arrow_inverse=True) + + def delete(self, a, b): + ai, bi = self.om2.delete(a, b) + self.om1.highlight(ai, bi, "lightslategrey") + self.om2.highlight(ai, bi, None) + + def duplicate(self, a, b, gap=0): + (ai, bi), (ci, di) = self.om1.duplicate(a, b) + (ai, bi), (ci, di) = self.om2.duplicate(a, b) + self.om1.highlight(ai, bi, None) + self.om1.highlight(ci, di, "lightslategrey") + self.om2.highlight(ai, bi, "tomato") + self.om2.highlight(ci, di, "tomato") + + +class OpticalMapTrack(BaseGlyph): + def __init__(self, ax, sizes, ystart=0, color="darkslategrey", height=1, wiggle=3): + + super().__init__(ax) + self.ax = ax + self.sizes = sizes[:] + self.ystart = ystart + self.height = height + self.color = color + self.wiggle = wiggle + self.make_wiggles() + + def draw(self): + ar = self.ar + pad = self.pad + pads = 0 + for (a, b), w, color in zip(pairwise(ar), self.wiggles, self.colors): + yf = self.ystart + w * 1.0 / self.wiggle + if color: + p = Rectangle((a + pads, yf), b - a, self.height, color=color) + self.append(p) + pads += pad + self.add_patches() + + def get_endpoints(self, a, b, xmax=100): + ar = self.ar + a, b = max(ar) * a / xmax, max(ar) * b / xmax + return bisect(ar, a) - 1, bisect(ar, b) + + def invert(self, a, b): + ai, bi = self.get_endpoints(a, b) + bb = self.sizes[ai:bi] + self.sizes = self.sizes[:ai] + bb[::-1] + self.sizes[bi:] + return ai, bi + + def delete(self, a, b): + return self.get_endpoints(a, b) + + def duplicate(self, a, b): + ai, bi = self.get_endpoints(a, b) + ai += self.wiggle / 2 + bi += self.wiggle / 2 + ci, di = ai - self.wiggle, ai + bb = self.sizes[ai:bi] + bs = len(bb) + self.sizes = self.sizes[:ci] + bb + self.sizes[ci:] + self.make_wiggles() + return (ci, ci + bs), (di + bs, di + 2 * bs) + + def highlight(self, ai, bi, color, arrow_inverse=False): + self.colors[ai:bi] = [color] * (bi - ai) + ar = self.ar + a, b = ar[ai], ar[bi] + a += self.pad * (ai - 1) + b += self.pad * (bi - 1) + if self.ystart < 0: + yy = self.ystart - 2 + shape = "left" + else: + yy = self.ystart + 4 + shape = "right" + if arrow_inverse: + a, b = b, a + shape = "right" if shape == "left" else "left" + if not color: + return + p = FancyArrow( + a, + yy, + b - a, + 0, + fc=color, + lw=0, + shape=shape, + length_includes_head=True, + width=1, + head_length=abs(b - a) * 0.15, + head_width=3, + ) + self.ax.add_patch(p) + + @property + def amax(self): + return sum(self.sizes) + (self.length - 1) * self.pad + + @property + def length(self): + return len(self.sizes) + + @property + def ar(self): + cumsizes = [0] + for a in self.sizes: + cumsizes.append(cumsizes[-1] + a) + return cumsizes + + def make_wiggles(self): + ar = [self.wiggle / 2 + 1] + while len(ar) <= self.length: + ar += range(self.wiggle, 0, -1) + self.wiggles = ar[: self.length] + self.colors = [self.color] * self.length + ar = self.ar + self.pad = max(ar) / 100 + + +class SingleRead(object): + def __init__(self, start, readlen, sign=1): + self.x1 = start + self.x2 = start + sign * readlen + self.y = None + self.color = "k" + self.broken = None + + @property + def sign(self): + return 1 if self.x2 >= self.x1 else -1 + + @property + def start(self): + return min(self.x1, self.x2) + + @property + def end(self): + return max(self.x1, self.x2) + + @property + def span(self): + return self.end - self.start + 1 + + def draw(self, ax, height=0.6): + if self.broken is None: + GeneGlyph( + ax, + self.x1, + self.x2, + self.y, + height, + tip=2, + color=self.color, + gradient=True, + ) + else: + a, lcolor, rcolor = self.broken + if self.sign < 0: + lcolor, rcolor = rcolor, lcolor + GeneGlyph( + ax, self.x1, a, self.y, height, tip=0, color=lcolor, gradient=True + ) + GeneGlyph( + ax, a, self.x2, self.y, height, tip=2, color=rcolor, gradient=True + ) + + def breakpoint(self, a, lcolor, rcolor): + if a > self.end: + self.color = lcolor + elif a < self.start: + self.color = rcolor + else: + self.broken = (a, lcolor, rcolor) + + +class PairedRead(object): + def __init__(self, start, readlen, gap): + self.r1 = SingleRead(start, readlen) + self.r2 = SingleRead(start + gap + 2 * readlen, readlen, sign=-1) + self.color = "k" + self.y = None + + @property + def start(self): + return min(self.r1.start, self.r2.start) + + @property + def end(self): + return max(self.r1.end, self.r2.end) + + @property + def i1(self): + return min(self.r1.end, self.r2.end) + + @property + def i2(self): + return max(self.r1.start, self.r2.start) + + @property + def mid(self): + return (self.start + self.end) * 0.5 + + def set_y(self, y): + self.y = y + self.r1.y = self.r2.y = y + + def draw(self, ax): + self.r1.draw(ax) + self.r2.draw(ax) + ax.plot((self.i1, self.i2), (self.y, self.y), "-", color=self.color) + + def paint(self, a, b, color): + if range_overlap((0, self.start + 1, self.end - 1), (0, a, b)): + self.r1.color = self.r2.color = self.color = color + + def breakpoint(self, a, lcolor, rcolor, ystart=0): + if not self.start < a < self.end: + return + if self.y < ystart: + return + self.color = lcolor if a > self.mid else rcolor + self.r1.breakpoint(a, lcolor, rcolor) + self.r2.breakpoint(a, lcolor, rcolor) + + +def main(): + p = OptionParser(__doc__) + opts, args, iopts = p.set_image_options(figsize="9x7") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (mode,) = args + assert mode == "demo" + + a, b = 30, 70 + pad = 0.08 + w = 0.31 + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + # Row separators + yy = 1 - pad + for i in range(3): + root.plot((0, 1), (yy, yy), "-", lw=2, color="lightgray") + yy -= w + + # Row headers + xx = pad * 0.6 + yy = 1 - pad - 0.5 * w + for title in ("Inversion", "Indel", "Duplication"): + root.text(xx, yy, title, ha="center", va="center") + yy -= w + + # Column headers + xx = pad + 0.5 * w + yy = 1 - pad / 2 + for title in ("Assembly alignment", "Read alignment", "Optical map alignment"): + root.text(xx, yy, title, ha="center", va="center") + xx += w + + p = PairwiseAlign(fig, [pad, 2 * w, w, w]) + p.invert(a, b) + p.draw() + + p = PairwiseAlign(fig, [pad, w, w, w]) + p.delete(a, b) + p.draw() + + p = PairwiseAlign(fig, [pad, 0, w, w]) + p.duplicate(a, b, gap=5) + p.draw() + + p = ReadAlign(fig, [pad + w, 2 * w, w, w]) + p.invert(a, b) + p.draw() + + p = ReadAlign(fig, [pad + w, w, w, w]) + p.delete(a, b) + p.draw() + + p = ReadAlign(fig, [pad + w, 0, w, w]) + p.duplicate(a, b) + p.draw() + + p = OpticalMapAlign(fig, [pad + 2 * w, 2 * w, w, w]) + p.invert(a, b) + p.draw() + + p = OpticalMapAlign(fig, [pad + 2 * w, w, w, w]) + p.delete(a, b) + p.draw() + + p = OpticalMapAlign(fig, [pad + 2 * w, 0, w, w]) + p.duplicate(a, b) + p.draw() + + normalize_axes(root) + + image_name = mode + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main() diff --git a/jcvi/graphics/assembly.py b/jcvi/graphics/assembly.py new file mode 100644 index 00000000..1ea11a84 --- /dev/null +++ b/jcvi/graphics/assembly.py @@ -0,0 +1,516 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Assembly QC plots, including general statistics, base and mate coverages, and +scaffolding consistencies. +""" +import os.path as op +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update +from ..assembly.base import calculate_A50 +from ..assembly.coverage import Coverage +from ..formats.bed import Bed, BedLine +from ..formats.fasta import Fasta +from ..formats.sizes import Sizes +from ..utils.cbook import thousands + +from .base import plt, Rectangle, set_human_base_axis, savefig + + +def main(): + actions = ( + ("A50", "compare A50 graphics for a set of FASTA files"), + ("coverage", "plot coverage from a set of BED files"), + ("qc", "performs QC graphics on given contig/scaffold"), + ("scaffold", "plot the alignment of the scaffold to other evidences"), + ("covlen", "plot coverage vs length"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def covlen(args): + """ + %prog covlen covfile fastafile + + Plot coverage vs length. `covfile` is two-column listing contig id and + depth of coverage. + """ + import numpy as np + import pandas as pd + import seaborn as sns + from jcvi.formats.base import DictFile + + p = OptionParser(covlen.__doc__) + p.add_argument("--maxsize", default=1000000, type=int, help="Max contig size") + p.add_argument("--maxcov", default=100, type=int, help="Max contig size") + p.add_argument("--color", default="m", help="Color of the data points") + p.add_argument( + "--kind", + default="scatter", + choices=("scatter", "reg", "resid", "kde", "hex"), + help="Kind of plot to draw", + ) + opts, args, iopts = p.set_image_options(args, figsize="8x8") + + if len(args) != 2: + sys.exit(not p.print_help()) + + covfile, fastafile = args + cov = DictFile(covfile, cast=float) + s = Sizes(fastafile) + data = [] + maxsize, maxcov = opts.maxsize, opts.maxcov + for ctg, size in s.iter_sizes(): + c = cov.get(ctg, 0) + if size > maxsize: + continue + if c > maxcov: + continue + data.append((size, c)) + + x, y = zip(*data) + x = np.array(x) + y = np.array(y) + logger.debug("X size {0}, Y size {1}".format(x.size, y.size)) + + df = pd.DataFrame() + xlab, ylab = "Length", "Coverage of depth (X)" + df[xlab] = x + df[ylab] = y + sns.jointplot( + xlab, + ylab, + kind=opts.kind, + data=df, + xlim=(0, maxsize), + ylim=(0, maxcov), + stat_func=None, + edgecolor="w", + color=opts.color, + ) + + figname = covfile + ".pdf" + savefig(figname, dpi=iopts.dpi, iopts=iopts) + + +def coverage(args): + """ + %prog coverage fastafile ctg bedfile1 bedfile2 .. + + Plot coverage from a set of BED files that contain the read mappings. The + paired read span will be converted to a new bedfile that contain the happy + mates. ctg is the chr/scf/ctg that you want to plot the histogram on. + + If the bedfiles already contain the clone spans, turn on --spans. + """ + from jcvi.formats.bed import mates, bedpe + + p = OptionParser(coverage.__doc__) + p.add_argument("--ymax", default=None, type=int, help="Limit ymax") + p.add_argument( + "--spans", + default=False, + action="store_true", + help="BED files already contain clone spans", + ) + opts, args, iopts = p.set_image_options(args, figsize="8x5") + + if len(args) < 3: + sys.exit(not p.print_help()) + + fastafile, ctg = args[0:2] + bedfiles = args[2:] + + sizes = Sizes(fastafile) + size = sizes.mapping[ctg] + + plt.figure(1, (iopts.w, iopts.h)) + ax = plt.gca() + + bins = 100 # smooth the curve + lines = [] + legends = [] + not_covered = [] + yy = 0.9 + for bedfile, c in zip(bedfiles, "rgbcky"): + if not opts.spans: + pf = bedfile.rsplit(".", 1)[0] + matesfile = pf + ".mates" + if need_update(bedfile, matesfile): + matesfile, matesbedfile = mates([bedfile, "--lib"]) + + bedspanfile = pf + ".spans.bed" + if need_update(matesfile, bedspanfile): + bedpefile, bedspanfile = bedpe( + [bedfile, "--span", "--mates={0}".format(matesfile)] + ) + bedfile = bedspanfile + + bedsum = Bed(bedfile).sum(seqid=ctg) + notcoveredbases = size - bedsum + + legend = bedfile.split(".")[0] + msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) + not_covered.append(msg) + print(msg, file=sys.stderr) + ax.text(0.1, yy, msg, color=c, size=9, transform=ax.transAxes) + yy -= 0.08 + + cov = Coverage(bedfile, sizes.filename) + x, y = cov.get_plot_data(ctg, bins=bins) + (line,) = ax.plot(x, y, "-", color=c, lw=2, alpha=0.5) + lines.append(line) + legends.append(legend) + + leg = ax.legend(lines, legends, shadow=True, fancybox=True) + leg.get_frame().set_alpha(0.5) + + ylabel = "Average depth per {0}Kb".format(size / bins / 1000) + ax.set_xlim(0, size) + ax.set_ylim(0, opts.ymax) + ax.set_xlabel(ctg) + ax.set_ylabel(ylabel) + set_human_base_axis(ax) + + figname = "{0}.{1}.pdf".format(fastafile, ctg) + savefig(figname, dpi=iopts.dpi, iopts=iopts) + + +def scaffolding(ax, scaffoldID, blastf, qsizes, ssizes, qbed, sbed, highlights=None): + + from jcvi.graphics.blastplot import blastplot + + # qsizes, qbed are properties for the evidences + # ssizes, sbed are properties for the current scaffoldID + blastplot( + ax, + blastf, + qsizes, + ssizes, + qbed, + sbed, + style="circle", + insetLabels=True, + stripNames=True, + highlights=highlights, + ) + + # FPC_scf.bed => FPC + fname = qbed.filename.split(".")[0].split("_")[0] + xtitle = fname + if xtitle == "FPC": + ax.set_xticklabels([""] * len(ax.get_xticklabels())) + ax.set_xlabel(xtitle, color="g") + for x in ax.get_xticklines(): + x.set_visible(False) + + +def plot_one_scaffold( + scaffoldID, ssizes, sbed, trios, imagename, iopts, highlights=None +): + ntrios = len(trios) + fig = plt.figure(1, (14, 8)) + plt.cla() + plt.clf() + root = fig.add_axes([0, 0, 1, 1]) + axes = [fig.add_subplot(1, ntrios, x) for x in range(1, ntrios + 1)] + scafsize = ssizes.get_size(scaffoldID) + + for trio, ax in zip(trios, axes): + blastf, qsizes, qbed = trio + scaffolding( + ax, scaffoldID, blastf, qsizes, ssizes, qbed, sbed, highlights=highlights + ) + + root.text( + 0.5, + 0.95, + "{0} (size={1})".format(scaffoldID, thousands(scafsize)), + size=18, + ha="center", + color="b", + ) + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + savefig(imagename, dpi=iopts.dpi, iopts=iopts) + + +def scaffold(args): + """ + %prog scaffold scaffold.fasta synteny.blast synteny.sizes synteny.bed + physicalmap.blast physicalmap.sizes physicalmap.bed + + As evaluation of scaffolding, visualize external line of evidences: + * Plot synteny to an external genome + * Plot alignments to physical map + * Plot alignments to genetic map (TODO) + + Each trio defines one panel to be plotted. blastfile defines the matchings + between the evidences vs scaffolds. Then the evidence sizes, and evidence + bed to plot dot plots. + + This script will plot a dot in the dot plot in the corresponding location + the plots are one contig/scaffold per plot. + """ + from more_itertools import grouper + + p = OptionParser(scaffold.__doc__) + p.add_argument( + "--cutoff", + type=int, + default=1000000, + help="Plot scaffolds with size larger than", + ) + p.add_argument( + "--highlights", + help="A set of regions in BED format to highlight", + ) + opts, args, iopts = p.set_image_options(args, figsize="14x8", dpi=150) + + if len(args) < 4 or len(args) % 3 != 1: + sys.exit(not p.print_help()) + + highlights = opts.highlights + scafsizes = Sizes(args[0]) + trios = list(grouper(args[1:], 3)) + trios = [(a, Sizes(b), Bed(c)) for a, b, c in trios] + if highlights: + hlbed = Bed(highlights) + + for scaffoldID, scafsize in scafsizes.iter_sizes(): + if scafsize < opts.cutoff: + continue + logger.debug("Loading {0} (size={1})".format(scaffoldID, thousands(scafsize))) + + tmpname = scaffoldID + ".sizes" + tmp = open(tmpname, "w") + tmp.write("{0}\t{1}".format(scaffoldID, scafsize)) + tmp.close() + + tmpsizes = Sizes(tmpname) + tmpsizes.close(clean=True) + + if highlights: + subhighlights = list(hlbed.sub_bed(scaffoldID)) + + imagename = ".".join((scaffoldID, opts.format)) + plot_one_scaffold( + scaffoldID, + tmpsizes, + None, + trios, + imagename, + iopts, + highlights=subhighlights, + ) + + +def qc(args): + """ + %prog qc prefix + + Expects data files including: + 1. `prefix.bedpe` draws Bezier curve between paired reads + 2. `prefix.sizes` draws length of the contig/scaffold + 3. `prefix.gaps.bed` mark the position of the gaps in sequence + 4. `prefix.bed.coverage` plots the base coverage + 5. `prefix.pairs.bed.coverage` plots the clone coverage + + See assembly.coverage.posmap() for the generation of these files. + """ + from jcvi.graphics.glyph import Bezier + + p = OptionParser(qc.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(p.print_help()) + + (prefix,) = args + scf = prefix + + # All these files *must* be present in the current folder + fastafile = prefix + ".fasta" + sizesfile = prefix + ".sizes" + gapsbedfile = prefix + ".gaps.bed" + bedfile = prefix + ".bed" + bedpefile = prefix + ".bedpe" + pairsbedfile = prefix + ".pairs.bed" + + sizes = Sizes(fastafile).mapping + size = sizes[scf] + + fig = plt.figure(1, (8, 5)) + root = fig.add_axes([0, 0, 1, 1]) + + # the scaffold + root.add_patch(Rectangle((0.1, 0.15), 0.8, 0.03, fc="k")) + + # basecoverage and matecoverage + ax = fig.add_axes([0.1, 0.45, 0.8, 0.45]) + + bins = 200 # Smooth the curve + basecoverage = Coverage(bedfile, sizesfile) + matecoverage = Coverage(pairsbedfile, sizesfile) + + x, y = basecoverage.get_plot_data(scf, bins=bins) + (baseline,) = ax.plot(x, y, "g-") + x, y = matecoverage.get_plot_data(scf, bins=bins) + (mateline,) = ax.plot(x, y, "r-") + legends = ("Base coverage", "Mate coverage") + leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True) + leg.get_frame().set_alpha(0.5) + ax.set_xlim(0, size) + + # draw the read pairs + fp = open(bedpefile) + pairs = [] + for row in fp: + scf, astart, aend, scf, bstart, bend, clonename = row.split() + astart, bstart = int(astart), int(bstart) + aend, bend = int(aend), int(bend) + start = min(astart, bstart) + 1 + end = max(aend, bend) + pairs.append((start, end)) + + bpratio = 0.8 / size + cutoff = 1000 # inserts smaller than this are not plotted + # this convert from base => x-coordinate + pos = lambda x: (0.1 + x * bpratio) + ypos = 0.15 + 0.03 + for start, end in pairs: + dist = end - start + + if dist < cutoff: + continue + + dist = min(dist, 10000) + # 10Kb == .25 canvas height + height = 0.25 * dist / 10000 + xstart = pos(start) + xend = pos(end) + p0 = (xstart, ypos) + p1 = (xstart, ypos + height) + p2 = (xend, ypos + height) + p3 = (xend, ypos) + Bezier(root, p0, p1, p2, p3) + + # gaps on the scaffold + fp = open(gapsbedfile) + for row in fp: + b = BedLine(row) + start, end = b.start, b.end + xstart = pos(start) + xend = pos(end) + root.add_patch(Rectangle((xstart, 0.15), xend - xstart, 0.03, fc="w")) + + root.text(0.5, 0.1, scf, color="b", ha="center") + warn_msg = "Only the inserts > {0}bp are shown".format(cutoff) + root.text(0.5, 0.1, scf, color="b", ha="center") + root.text(0.5, 0.05, warn_msg, color="gray", ha="center") + # clean up and output + set_human_base_axis(ax) + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + figname = prefix + ".pdf" + savefig(figname, dpi=300) + + +def generate_plot(filename, rplot="A50.rplot", rpdf="A50.pdf"): + + from jcvi.apps.r import RTemplate + + rplot_template = """ + library(ggplot2) + + data <- read.table("$rplot", header=T, sep="\t") + g <- ggplot(data, aes(x=index, y=cumsize, group=fasta)) + g + geom_line(aes(colour=fasta)) + + xlab("Contigs") + ylab("Cumulative size (Mb)") + + opts(title="A50 plot", legend.position="top") + + ggsave(file="$rpdf") + """ + + rtemplate = RTemplate(rplot_template, locals()) + rtemplate.run() + + +def A50(args): + """ + %prog A50 contigs_A.fasta contigs_B.fasta ... + + Plots A50 graphics, see blog post (http://blog.malde.org/index.php/a50/) + """ + p = OptionParser(A50.__doc__) + p.add_argument( + "--overwrite", + default=False, + action="store_true", + help="overwrite .rplot file if exists", + ) + p.add_argument( + "--cutoff", + default=0, + type=int, + dest="cutoff", + help="use contigs above certain size", + ) + p.add_argument( + "--stepsize", + default=10, + type=int, + dest="stepsize", + help="stepsize for the distribution", + ) + opts, args = p.parse_args(args) + + if not args: + sys.exit(p.print_help()) + + import numpy as np + from jcvi.utils.table import loadtable + + stepsize = opts.stepsize # use stepsize to speed up drawing + rplot = "A50.rplot" + if not op.exists(rplot) or opts.overwrite: + fw = open(rplot, "w") + header = "\t".join(("index", "cumsize", "fasta")) + statsheader = ("Fasta", "L50", "N50", "Min", "Max", "Average", "Sum", "Counts") + statsrows = [] + print(header, file=fw) + for fastafile in args: + f = Fasta(fastafile, index=False) + ctgsizes = [length for k, length in f.itersizes()] + ctgsizes = np.array(ctgsizes) + + a50, l50, n50 = calculate_A50(ctgsizes, cutoff=opts.cutoff) + cmin, cmax, cmean = min(ctgsizes), max(ctgsizes), np.mean(ctgsizes) + csum, counts = np.sum(ctgsizes), len(ctgsizes) + cmean = int(round(cmean)) + statsrows.append((fastafile, l50, n50, cmin, cmax, cmean, csum, counts)) + + logger.debug("`{0}` ctgsizes: {1}".format(fastafile, ctgsizes)) + + tag = "{0} (L50={1})".format(op.basename(fastafile).rsplit(".", 1)[0], l50) + logger.debug(tag) + + for i, s in zip(range(0, len(a50), stepsize), a50[::stepsize]): + print("\t".join((str(i), str(s / 1000000.0), tag)), file=fw) + fw.close() + + table = loadtable(statsheader, statsrows) + print(table, file=sys.stderr) + + generate_plot(rplot) + + +if __name__ == "__main__": + main() diff --git a/jcvi/graphics/base.py b/jcvi/graphics/base.py new file mode 100644 index 00000000..6b40aa45 --- /dev/null +++ b/jcvi/graphics/base.py @@ -0,0 +1,843 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +import copy +import os.path as op +import re +import sys + +from os import remove + +from functools import partial +from typing import Optional, List, Tuple, Union + +import numpy as np +import matplotlib as mpl +import seaborn as sns + +mpl.use("Agg") + +import matplotlib.pyplot as plt +import matplotlib.ticker as ticker + +from brewer2mpl import get_map +from matplotlib import cm, rc, rcParams +from matplotlib.colors import Colormap +from matplotlib.patches import ( + Rectangle, + Polygon, + CirclePolygon, + Ellipse, + PathPatch, + FancyArrow, + FancyArrowPatch, + FancyBboxPatch, +) + +from ..apps.base import datadir, glob, logger, sample_N, which +from ..formats.base import LineFile +from ..utils.cbook import human_size + +Extent = Tuple[float, float, float, float] + +CHARS = { + "&": r"\&", + "%": r"\%", + "$": r"\$", + "#": r"\#", + "_": r"\_", + "{": r"\{", + "}": r"\}", +} + +GRAPHIC_FORMATS = ( + "emf", + "eps", + "pdf", + "png", + "ps", + "raw", + "rgba", + "svg", + "svgz", +) + + +def is_tex_available() -> bool: + """Check if latex command is available""" + return bool(which("latex")) and bool(which("lp")) + + +class ImageOptions(object): + def __init__(self, opts): + self.w, self.h = [int(x) for x in opts.figsize.split("x")] + self.dpi = opts.dpi + self.format = opts.format + self.cmap = mpl.colormaps[opts.cmap] + self.seed = opts.seed + self.usetex = is_tex_available() and not opts.notex + self.opts = opts + + def __str__(self): + return "({0}px x {1}px)".format(self.dpi * self.w, self.dpi * self.h) + + @property + def diverge(self): + colors = get_map(self.opts.diverge, "diverging", 5).mpl_colors + return colors[0], colors[-1] + + +class TextHandler(object): + def __init__(self, fig, usetex: bool = True): + self.heights = [] + try: + self.build_height_array(fig, usetex=usetex) + except ValueError as e: + logger.debug( + "Failed to init heights (error: %s). Variable label sizes skipped.", e + ) + + @classmethod + def get_text_width_height(cls, fig, txt="chr01", size=12, usetex: bool = True): + tp = mpl.textpath.TextPath((0, 0), txt, size=size, usetex=usetex) + bb = tp.get_extents() + xmin, ymin = fig.transFigure.inverted().transform((bb.xmin, bb.ymin)) + xmax, ymax = fig.transFigure.inverted().transform((bb.xmax, bb.ymax)) + return xmax - xmin, ymax - ymin + + def build_height_array(self, fig, start=1, stop=36, usetex: bool = True): + for i in range(start, stop + 1): + w, h = TextHandler.get_text_width_height(fig, size=i, usetex=usetex) + self.heights.append((h, i)) + + def select_fontsize(self, height, minsize=1, maxsize=12): + if not self.heights: + return maxsize if height > 0.01 else minsize + + from bisect import bisect_left + + i = bisect_left(self.heights, (height,)) + size = self.heights[i - 1][1] if i else minsize + size = min(size, maxsize) + return size + + +class AbstractLayout(LineFile): + """ + Simple csv layout file for complex plotting settings. Typically, each line + represents a subplot, a track or a panel. + """ + + def __init__(self, filename): + super().__init__(filename) + + def assign_array(self, attrib, array): + assert len(array) == len(self) + for x, c in zip(self, array): + if not getattr(x, attrib): + setattr(x, attrib, c) + + def assign_colors(self, seed: Optional[int] = None): + number = len(self) + palette = set2_n if number <= 8 else set3_n + # Restrict palette numbers between [3, 12] + palette_number = max(3, min(number, 12)) + colorset = palette(palette_number) + colorset = sample_N(colorset, number, seed=seed) + self.assign_array("color", colorset) + + def assign_markers(self, seed: Optional[int] = None): + markerset = sample_N(mpl.lines.Line2D.filled_markers, len(self), seed=seed) + self.assign_array("marker", markerset) + + def __str__(self): + return "\n".join(str(x) for x in self) + + +def adjust_extent(extent: Extent, root_extent: Extent) -> Extent: + """ + Adjust the extent of the root axes. + """ + rx, ry, rw, rh = root_extent + ex, ey, ew, eh = extent + return rx + ex * rw, ry + ey * rh, ew * rw, eh * rh + + +def linear_blend(from_color, to_color, fraction=0.5): + """Interpolate a new color between two colors. + + https://github.com/PimpTrizkit/PJs/wiki/12.-Shade,-Blend-and-Convert-a-Web-Color-(pSBC.js) + + Args: + from_color (matplotlib color): starting color + to_color (matplotlib color): ending color + fraction (float, optional): Range is 0 (closer to starting color) to 1 + (closer to ending color). Defaults to 0.5. + """ + from matplotlib.colors import to_rgb + + def lerp(v0, v1, t): + # Precise method, which guarantees v = v1 when t = 1 + return (1 - t) * v0 + t * v1 + + r1, g1, b1 = to_rgb(from_color) + r2, g2, b2 = to_rgb(to_color) + return lerp(r1, r2, fraction), lerp(g1, g2, fraction), lerp(b1, b2, fraction) + + +def linear_shade(from_color, fraction=0.5): + """Interpolate a lighter or darker color. + + https://github.com/PimpTrizkit/PJs/wiki/12.-Shade,-Blend-and-Convert-a-Web-Color-(pSBC.js) + + Args: + from_color (matplotlib color): starting color + fraction (float, optional): Range is -1 (darker) to 1 (lighter). Defaults to 0.5. + """ + assert -1 <= fraction <= 1, "Fraction must be between -1 and 1" + if fraction < 0: + return linear_blend("k", from_color, 1 + fraction) + return linear_blend(from_color, "w", fraction) + + +def load_image(filename: str) -> np.ndarray: + """ + Load an image file and return as numpy array. + """ + img = plt.imread(filename) + if len(img.shape) == 2: # Gray-scale image, convert to RGB + # http://www.socouldanyone.com/2013/03/converting-grayscale-to-rgb-with-numpy.html + h, w = img.shape + ret = np.empty((h, w, 3), dtype=np.uint8) + ret[:, :, 2] = ret[:, :, 1] = ret[:, :, 0] = img + img = ret + else: + h, w, _ = img.shape + logger.debug("Image `%s` loaded (%dpx x %dpx).", filename, w, h) + return img + + +def latex(s): + """Latex doesn't work well with certain characters, like '_', in plain text. + These characters would be interpreted as control characters, so we sanitize + these strings. + + Args: + s (str): Input string + + Returns: + str: Output string sanitized + """ + return "".join([CHARS.get(char, char) for char in s]) + + +def shorten(s, maxchar=20, mid="..."): + if len(s) <= maxchar or len(mid) >= maxchar: + return s + pad = (maxchar - len(mid)) // 2 + right_pad = maxchar - len(mid) - pad + return s[:pad] + mid + s[-right_pad:] + + +def set1_n(number=9): + return get_map("Set1", "qualitative", number).hex_colors + + +def set2_n(number=8): + # Get Set2 from ColorBrewer, a set of colors deemed colorblind-safe and + # pleasant to look at by Drs. Cynthia Brewer and Mark Harrower of Pennsylvania + # State University. These colors look lovely together, and are less + # saturated than those colors in Set1. + return get_map("Set2", "qualitative", number).hex_colors + + +def set3_n(number=12): + return get_map("Set3", "qualitative", number).hex_colors + + +def paired_n(number=12): + """See also: https://colorbrewer2.org/#type=qualitative&scheme=Paired&n=12""" + return get_map("Paired", "qualitative", number).hex_colors + + +set1, set2, set3, paired = set1_n(), set2_n(), set3_n(), paired_n() + + +def prettyplot(): + reds = copy.copy(mpl.cm.Reds) + reds.set_bad("white") + reds.set_under("white") + + blues_r = copy.copy(mpl.cm.Blues_r) + blues_r.set_bad("white") + blues_r.set_under("white") + + # Need to 'reverse' red to blue so that blue=cold=small numbers, + # and red=hot=large numbers with '_r' suffix + blue_red = get_map("RdBu", "diverging", 11, reverse=True).mpl_colormap + green_purple = get_map("PRGn", "diverging", 11).mpl_colormap + red_purple = get_map("RdPu", "sequential", 9).mpl_colormap + + return blues_r, reds, blue_red, green_purple, red_purple + + +blues_r, reds, blue_red, green_purple, red_purple = prettyplot() + + +def normalize_axes(*axes): + """ + Normalize the axes to have the same scale. + """ + for ax in axes: + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + ax.set_axis_off() + + +def panel_labels(ax, labels, size: int = 16): + """ + Add panel labels (A, B, ...) to a figure. + """ + for xx, yy, panel_label in labels: + if rcParams["text.usetex"]: + panel_label = r"$\textbf{{{0}}}$".format(panel_label) + ax.text(xx, yy, panel_label, size=size, ha="center", va="center") + + +def update_figname(figname: str, format: str) -> str: + """Update the name of a figure to include the format. + + Args: + figname (str): Path to the figure + format (str): Figure format, must be one of GRAPHIC_FORMATS + + Returns: + str: New file path + """ + _, ext = op.splitext(figname) + if ext.strip(".") in GRAPHIC_FORMATS: # User suffix has precedence + return figname + # When the user has not supplied a format in the filename, use the requested format + assert format in GRAPHIC_FORMATS, "Invalid format" + return figname + "." + format + + +def savefig(figname, dpi=150, iopts=None, cleanup=True): + try: + format = figname.rsplit(".", 1)[-1].lower() + except: + format = "pdf" + try: + logger.debug("Matplotlib backend is: %s", mpl.get_backend()) + plt.savefig(figname, dpi=dpi, format=format) + except Exception as e: + logger.error("savefig failed with message:\n%s", e) + logger.info("Try running again with --notex option to disable latex.") + if op.exists(figname): + if op.getsize(figname) < 1000: + logger.debug("Cleaning up empty file: %s", figname) + remove(figname) + sys.exit(1) + + msg = f"Figure saved to `{figname}`" + if iopts: + msg += f" {iopts}" + logger.debug(msg) + + if cleanup: + plt.rcdefaults() + + +# human readable size (Kb, Mb, Gb) +def human_readable(x: Union[str, int], _, base=False): + x = str(int(x)) + if x.endswith("000000000"): + x = x[:-9] + "G" + elif x.endswith("000000"): + x = x[:-6] + "M" + elif x.endswith("000"): + x = x[:-3] + "K" + if base and x[-1] in "MKG": + x += "b" + return x + + +human_readable_base = partial(human_readable, base=True) +human_formatter = ticker.FuncFormatter(human_readable) +human_base_formatter = ticker.FuncFormatter(human_readable_base) +mb_formatter = ticker.FuncFormatter(lambda x, pos: "{0}M".format(int(x / 1000000))) +mb_float_formatter = ticker.FuncFormatter( + lambda x, pos: "{0:.1f}M".format(x / 1000000.0) +) +kb_formatter = ticker.FuncFormatter(lambda x, pos: "{0}K".format(int(x / 1000))) + + +def set_human_axis(ax, formatter=human_formatter): + ax.xaxis.set_major_formatter(formatter) + ax.yaxis.set_major_formatter(formatter) + + +set_human_base_axis = partial(set_human_axis, formatter=human_base_formatter) + + +def set_helvetica_axis(ax): + xtick_locs = ax.get_xticks().tolist() + ytick_locs = ax.get_yticks().tolist() + # If we dont do the following, we have + # UserWarning: FixedFormatter should only be used together with FixedLocator + ax.xaxis.set_major_locator(mpl.ticker.FixedLocator(xtick_locs)) + ax.yaxis.set_major_locator(mpl.ticker.FixedLocator(ytick_locs)) + ax.set_xticklabels([int(x) for x in xtick_locs], family="Helvetica") + ax.set_yticklabels([int(x) for x in ytick_locs], family="Helvetica") + + +available_fonts = [op.basename(x) for x in glob(datadir + "/*.ttf")] + + +def fontprop(ax, name, size=12): + assert name in available_fonts, "Font must be one of {0}.".format(available_fonts) + + import matplotlib.font_manager as fm + + fname = op.join(datadir, name) + prop = fm.FontProperties(fname=fname, size=size) + + logger.debug("Set font to `%s` (`%s`)", name, prop.get_file()) + for text in ax.texts: + text.set_fontproperties(prop) + + return prop + + +def markup(s: str): + """ + Change the string to latex format, and italicize the text between *. + """ + if not rcParams["text.usetex"]: + return s + if "$" in s: + return s + s = latex(s) + s = re.sub(r"\*(.*)\*", r"\\textit{\1}", s) + return s + + +def append_percentage(s): + # The percent symbol needs escaping in latex + if rcParams["text.usetex"]: + return s + r"$\%$" + else: + return s + "%" + + +def setup_theme( + context="notebook", + style="darkgrid", + palette="deep", + font="Helvetica", + usetex: bool = True, +): + try: + extra_rc = { + "lines.linewidth": 1, + "lines.markeredgewidth": 1, + "patch.edgecolor": "k", + } + sns.set_theme(context=context, style=style, palette=palette, rc=extra_rc) + except (ImportError, SyntaxError): + pass + + if usetex: + rc("text", usetex=True) + else: + logger.info("Set text.usetex=%s. Font styles may be inconsistent.", usetex) + rc("text", usetex=False) + + if font == "Helvetica": + rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"]}) + elif font == "Palatino": + rc("font", **{"family": "serif", "serif": ["Palatino"]}) + elif font == "Schoolbook": + rc("font", **{"family": "serif", "serif": ["Century Schoolbook L"]}) + + +def asciiaxis(x, digit=1): + if isinstance(x, int): + x = str(x) + elif isinstance(x, float): + x = "{0:.{1}f}".format(x, digit) + elif isinstance(x, np.int64): + x = str(x) + elif isinstance(x, np.ndarray): + assert len(x) == 2 + x = str(x).replace("]", ")") # upper bound not inclusive + + return x + + +def asciiplot(x, y, digit=1, width=50, title=None, char="="): + """ + Print out a horizontal plot using ASCII chars. + width is the textwidth (height) of the plot. + """ + ax = np.array(x) + ay = np.array(y) + + if title: + print("[bold white]".format(title), file=sys.stderr) + + az = ay * width // ay.max() + tx = [asciiaxis(x, digit=digit) for x in ax] + rjust = max([len(x) for x in tx]) + 1 + + for x, y, z in zip(tx, ay, az): + x = x.rjust(rjust) + y = y or "" + z = "[green]{}".format(char * z) + print("{} | {} {}".format(x, z, y), file=sys.stderr) + + +def print_colors(palette, outfile="Palette.png"): + """ + print color palette (a tuple) to a PNG file for quick check + """ + fig = plt.figure() + ax = fig.add_subplot(111) + + xmax = 20 * (len(palette) + 1) + x1s = np.arange(0, xmax, 20) + xintervals = [10] * len(palette) + xx = zip(x1s, xintervals) + ax.broken_barh(xx, (5, 10), facecolors=palette) + + ax.set_ylim(0, 20) + ax.set_xlim(0, xmax) + ax.set_axis_off() + + savefig(outfile) + + +def plot_heatmap( + ax, + M: np.ndarray, + breaks: List[int], + groups: List[Tuple[int, int, List[Tuple[int, str]], str]] = [], + plot_breaks: bool = False, + cmap: Optional[Union[str, Colormap]] = None, + binsize: Optional[int] = None, +): + """Plot heatmap illustrating the contact probabilities in Hi-C data. + + Args: + ax (pyplot.axes): Matplotlib axis + M (np.array): 2D numpy-array + breaks (List[int]): Positions of chromosome starts. Can be None. + iopts (OptionParser options): Graphical options passed in from commandline + groups (List, optional): [(start, end, [(position, seqid)], color)]. Defaults to []. + plot_breaks (bool): Whether to plot white breaks. Defaults to False. + cmap (str | Colormap, optional): Colormap. Defaults to None, which uses cubehelix. + binsize (int, optional): Resolution of the heatmap. + """ + cmap = cmap or sns.cubehelix_palette(rot=0.5, as_cmap=True) + ax.imshow(M, cmap=cmap, interpolation="none") + _, xmax = ax.get_xlim() + xlim = (0, xmax) + if plot_breaks: + for b in breaks[:-1]: + ax.plot([b, b], xlim, "w-") + ax.plot(xlim, [b, b], "w-") + + def simplify_seqid(seqid): + seqid = seqid.replace("_", "") + if seqid[:3].lower() == "chr": + seqid = seqid[3:] + return seqid.lstrip("0") + + for start, end, position_seqids, color in groups: + # Plot a square + ax.plot([start, start], [start, end], "-", color=color) + ax.plot([start, end], [start, start], "-", color=color) + ax.plot([start, end], [end, end], "-", color=color) + ax.plot([end, end], [start, end], "-", color=color) + for position, seqid in position_seqids: + seqid = simplify_seqid(seqid) + ax.text(position, end, seqid, ha="center", va="top") + + ax.set_xlim(xlim) + ax.set_ylim((xlim[1], xlim[0])) # Flip the y-axis so the origin is at the top + ax.set_xticklabels(ax.get_xticks(), family="Helvetica", color="gray") + ax.set_yticklabels(ax.get_yticks(), family="Helvetica", color="gray", rotation=90) + ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True) + if binsize is not None: + formatter = ticker.FuncFormatter( + lambda x, pos: human_readable(int(x) * binsize, pos, base=True) + ) + ax.xaxis.set_major_formatter(formatter) + ax.yaxis.set_major_formatter(formatter) + title = f"Resolution = {human_size(binsize, precision=0)} per bin" + ax.set_xlabel(title) + + +def discrete_rainbow(N=7, cmap=cm.Set1, usepreset=True, shuffle=False, plot=False): + """ + Return a discrete colormap and the set of colors. + + modified from + + + cmap: colormap instance, eg. cm.jet. + N: Number of colors. + + Example + >>> x = resize(arange(100), (5,100)) + >>> djet = cmap_discretize(cm.jet, 5) + >>> imshow(x, cmap=djet) + + See available matplotlib colormaps at: + + + If N>20 the sampled colors might not be very distinctive. + If you want to error and try anyway, set usepreset=False + """ + import random + from scipy import interpolate + + if usepreset: + if 0 < N <= 5: + cmap = cm.gist_rainbow + elif N <= 20: + cmap = cm.Set1 + else: + sys.exit(discrete_rainbow.__doc__) + + cdict = cmap._segmentdata.copy() + # N colors + colors_i = np.linspace(0, 1.0, N) + # N+1 indices + indices = np.linspace(0, 1.0, N + 1) + rgbs = [] + for key in ("red", "green", "blue"): + # Find the N colors + D = np.array(cdict[key]) + I = interpolate.interp1d(D[:, 0], D[:, 1]) + colors = I(colors_i) + rgbs.append(colors) + # Place these colors at the correct indices. + A = np.zeros((N + 1, 3), float) + A[:, 0] = indices + A[1:, 1] = colors + A[:-1, 2] = colors + # Create a tuple for the dictionary. + L = [] + for l in A: + L.append(tuple(l)) + cdict[key] = tuple(L) + + palette = zip(*rgbs) + + if shuffle: + random.shuffle(palette) + + if plot: + print_colors(palette) + + # Return (colormap object, RGB tuples) + return mpl.colors.LinearSegmentedColormap("colormap", cdict, 1024), palette + + +def get_intensity(octal): + from math import sqrt + + r, g, b = octal[1:3], octal[3:5], octal[5:] + r, g, b = int(r, 16), int(g, 16), int(b, 16) + intensity = sqrt((r * r + g * g + b * b) / 3) + return intensity + + +def adjust_spines(ax, spines, outward=False, color="lightslategray"): + # Modified from + for loc, spine in ax.spines.items(): + if loc in spines: + if outward: + spine.set_position(("outward", 8)) # outward by 10 points + spine.set_color(color) + else: + spine.set_color("none") # don't draw spine + + if "left" in spines: + ax.yaxis.set_ticks_position("left") + else: + ax.yaxis.set_ticks_position("right") + + if "bottom" in spines: + ax.xaxis.set_ticks_position("bottom") + else: + ax.xaxis.set_ticks_position("top") + + # Change tick styles directly + ax.tick_params(color=color) + set_helvetica_axis(ax) + + +def set_ticklabels_helvetica(ax, xcast=int, ycast=int): + xticklabels = [xcast(x) for x in ax.get_xticks()] + yticklabels = [ycast(x) for x in ax.get_yticks()] + ax.set_xticklabels(xticklabels, family="Helvetica") + ax.set_yticklabels(yticklabels, family="Helvetica") + + +def draw_cmap(ax, cmap_text, vmin, vmax, cmap=None, reverse=False): + # Draw a horizontal colormap at bottom-right corder of the canvas + Y = np.outer(np.ones(10), np.arange(0, 1, 0.01)) + if reverse: + Y = Y[::-1] + xmin, xmax = 0.6, 0.9 + ymin, ymax = 0.02, 0.04 + ax.imshow(Y, extent=(xmin, xmax, ymin, ymax), cmap=cmap) + ax.text( + xmin - 0.01, + (ymin + ymax) * 0.5, + markup(cmap_text), + ha="right", + va="center", + size=10, + ) + vmiddle = (vmin + vmax) * 0.5 + xmiddle = (xmin + xmax) * 0.5 + for x, v in zip((xmin, xmiddle, xmax), (vmin, vmiddle, vmax)): + ax.text(x, ymin - 0.005, "%.1f" % v, ha="center", va="top", size=10) + + +def write_messages(ax, messages: List[str], ypad: float = 0.04): + """ + Write text on canvas, usually on the top right corner. + """ + tc = "gray" + axt = ax.transAxes + yy = 0.95 + for msg in messages: + ax.text(0.95, yy, markup(msg), color=tc, transform=axt, ha="right") + yy -= ypad + + +def quickplot_ax( + ax, + data, + xmin, + xmax, + xlabel, + title=None, + ylabel="Counts", + counts=True, + percentage=True, + highlight=None, +): + # TODO: redundant with quickplot(), need to be refactored. + if percentage: + total_length = sum(data.values()) + data = dict((k, v * 100.0 / total_length) for (k, v) in data.items()) + + left, height = zip(*sorted(data.items())) + pad = max(height) * 0.01 + c1, c2 = "darkslategray", "tomato" + if counts: + for l, h in zip(left, height): + if xmax and l > xmax: + break + tag = str(int(h)) + rotation = 90 + if percentage: + tag = append_percentage(tag) if int(tag) > 0 else "" + rotation = 0 + color = c1 + if highlight is not None and l in highlight: + color = c2 + ax.text( + l, + h + pad, + tag, + color=color, + size=8, + ha="center", + va="bottom", + rotation=rotation, + ) + if xmax is None: + xmax = max(left) + + ax.bar(left, height, align="center", fc=c1) + if highlight: + for h in highlight: + ax.bar([h], [data[h]], align="center", ec=c2, fc=c2) + + ax.set_xlabel(markup(xlabel)) + if ylabel: + ax.set_ylabel(markup(ylabel)) + if title: + ax.set_title(markup(title)) + ax.set_xlim((xmin - 0.5, xmax + 0.5)) + if percentage: + ax.set_ylim(0, 100) + + +def quickplot( + data, + xmin, + xmax, + xlabel, + title, + ylabel="Counts", + figname="plot.pdf", + counts=True, + print_stats=True, +): + """ + Simple plotting function - given a dictionary of data, produce a bar plot + with the counts shown on the plot. + """ + plt.figure(1, (6, 6)) + left, height = zip(*sorted(data.items())) + pad = max(height) * 0.01 + if counts: + for l, h in zip(left, height): + if xmax and l > xmax: + break + plt.text( + l, + h + pad, + str(h), + color="darkslategray", + size=8, + ha="center", + va="bottom", + rotation=90, + ) + if xmax is None: + xmax = max(left) + + plt.bar(left, height, align="center") + plt.xlabel(markup(xlabel)) + plt.ylabel(markup(ylabel)) + plt.title(markup(title)) + plt.xlim((xmin - 0.5, xmax + 0.5)) + + # Basic statistics + messages = [] + counts_over_xmax = sum([v for k, v in data.items() if k > xmax]) + if counts_over_xmax: + messages += ["Counts over xmax({0}): {1}".format(xmax, counts_over_xmax)] + kk = [] + for k, v in data.items(): + kk += [k] * v + messages += ["Total: {0}".format(np.sum(height))] + messages += ["Maximum: {0}".format(np.max(kk))] + messages += ["Minimum: {0}".format(np.min(kk))] + messages += ["Average: {0:.2f}".format(np.mean(kk))] + messages += ["Median: {0}".format(np.median(kk))] + ax = plt.gca() + if print_stats: + write_messages(ax, messages) + + set_human_axis(ax) + set_ticklabels_helvetica(ax) + savefig(figname) diff --git a/jcvi/graphics/blastplot.py b/jcvi/graphics/blastplot.py new file mode 100755 index 00000000..998278fb --- /dev/null +++ b/jcvi/graphics/blastplot.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog blastfile --qsizes query.sizes --ssizes subject.sizes + +Visualize the blastfile in a dotplot. At least one of --qsizes and --qbed must +be specified, also at least one of --ssizes and --sbed. The --sizes options help +to define the molecule border as well as the drawing order. The --bed options +help to position names maker (e.g. genes) onto the dot plot. So depending on +whether you are BLASTing raw sequences or makers, you need to place --sizes or +--bed options. +""" + +import os.path as op +import sys + +from random import sample + +import numpy as np + +from ..apps.base import OptionParser, logger +from ..formats.base import is_number +from ..formats.bed import Bed, BedLine +from ..formats.blast import BlastLine +from ..formats.sizes import Sizes + +from .base import Rectangle, plt, savefig, set_human_base_axis + + +DotStyles = ("line", "circle", "dot") + + +def rename_seqid(seqid): + seqid = seqid.split("_")[-1] + seqid = seqid.replace("supercont", "s") + seqid = seqid.replace("contig", "c").replace("scaffold", "s") + return "c{}".format(int(seqid)) if is_number(seqid, int) else seqid + + +def blastplot( + ax, + blastfile, + qsizes, + ssizes, + qbed, + sbed, + style="dot", + sampleN=None, + baseticks=False, + insetLabels=False, + stripNames=False, + highlights=None, +): + + assert style in DotStyles + fp = open(blastfile) + + qorder = qbed.order if qbed else None + sorder = sbed.order if sbed else None + + data = [] + + for row in fp: + b = BlastLine(row) + query, subject = b.query, b.subject + + if stripNames: + query = query.rsplit(".", 1)[0] + subject = subject.rsplit(".", 1)[0] + + if qorder: + if query not in qorder: + continue + qi, q = qorder[query] + query = q.seqid + qstart, qend = q.start, q.end + else: + qstart, qend = b.qstart, b.qstop + + if sorder: + if subject not in sorder: + continue + si, s = sorder[subject] + subject = s.seqid + sstart, send = s.start, s.end + else: + sstart, send = b.sstart, b.sstop + + qi = qsizes.get_position(query, qstart) + qj = qsizes.get_position(query, qend) + si = ssizes.get_position(subject, sstart) + sj = ssizes.get_position(subject, send) + + if None in (qi, si): + continue + data.append(((qi, qj), (si, sj))) + + if sampleN: + if len(data) > sampleN: + data = sample(data, sampleN) + + if not data: + return logger.error("no blast data imported") + + xsize, ysize = qsizes.totalsize, ssizes.totalsize + logger.debug("xsize=%d ysize=%d" % (xsize, ysize)) + + if style == "line": + for a, b in data: + ax.plot(a, b, "ro-", mfc="w", mec="r", ms=3) + else: + data = [(x[0], y[0]) for x, y in data] + x, y = zip(*data) + + if style == "circle": + ax.plot(x, y, "mo", mfc="w", mec="m", ms=3) + elif style == "dot": + ax.scatter(x, y, s=3, lw=0) + + xlim = (0, xsize) + ylim = (ysize, 0) # invert the y-axis + + xchr_labels, ychr_labels = [], [] + ignore = True # tag to mark whether to plot chr name (skip small ones) + ignore_size_x = ignore_size_y = 0 + + # plot the chromosome breaks + logger.debug("xbreaks={0} ybreaks={1}".format(len(qsizes), len(ssizes))) + for seqid, beg, end in qsizes.get_breaks(): + ignore = abs(end - beg) < ignore_size_x + if ignore: + continue + seqid = rename_seqid(seqid) + + xchr_labels.append((seqid, (beg + end) / 2, ignore)) + ax.plot([end, end], ylim, "-", lw=1, color="grey") + + for seqid, beg, end in ssizes.get_breaks(): + ignore = abs(end - beg) < ignore_size_y + if ignore: + continue + seqid = rename_seqid(seqid) + + ychr_labels.append((seqid, (beg + end) / 2, ignore)) + ax.plot(xlim, [end, end], "-", lw=1, color="grey") + + # plot the chromosome labels + for label, pos, ignore in xchr_labels: + if not ignore: + if insetLabels: + ax.text(pos, 0, label, size=8, ha="center", va="top", color="grey") + else: + pos = 0.1 + pos * 0.8 / xsize + root.text( + pos, + 0.91, + label, + size=10, + ha="center", + va="bottom", + rotation=45, + color="grey", + ) + + # remember y labels are inverted + for label, pos, ignore in ychr_labels: + if not ignore: + if insetLabels: + continue + pos = 0.9 - pos * 0.8 / ysize + root.text(0.91, pos, label, size=10, va="center", color="grey") + + # Highlight regions based on a list of BedLine + qhighlights = shighlights = None + if highlights: + if isinstance(highlights[0], BedLine): + shighlights = highlights + elif len(highlights) == 2: + qhighlights, shighlights = highlights + + if qhighlights: + for hl in qhighlights: + hls = qsizes.get_position(hl.seqid, hl.start) + ax.add_patch(Rectangle((hls, 0), hl.span, ysize, fc="r", alpha=0.2, lw=0)) + if shighlights: + for hl in shighlights: + hls = ssizes.get_position(hl.seqid, hl.start) + ax.add_patch(Rectangle((0, hls), xsize, hl.span, fc="r", alpha=0.2, lw=0)) + + if baseticks: + + def increaseDensity(a, ratio=4): + assert len(a) > 1 + stepsize = a[1] - a[0] + newstepsize = int(stepsize / ratio) + return np.arange(0, a[-1], newstepsize) + + # Increase the density of the ticks + xticks = ax.get_xticks() + yticks = ax.get_yticks() + xticks = increaseDensity(xticks, ratio=2) + yticks = increaseDensity(yticks, ratio=2) + ax.set_xticks(xticks) + + # Plot outward ticklines + for pos in xticks[1:]: + if pos > xsize: + continue + pos = 0.1 + pos * 0.8 / xsize + root.plot((pos, pos), (0.08, 0.1), "-", color="grey", lw=2) + + for pos in yticks[1:]: + if pos > ysize: + continue + pos = 0.9 - pos * 0.8 / ysize + root.plot((0.09, 0.1), (pos, pos), "-", color="grey", lw=2) + + ax.set_xlim(xlim) + ax.set_ylim(ylim) + + # beautify the numeric axis + for tick in ax.get_xticklines() + ax.get_yticklines(): + tick.set_visible(False) + + set_human_base_axis(ax) + + plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="gray", size=10) + plt.setp(ax.get_yticklabels(), rotation=90) + + +if __name__ == "__main__": + + from jcvi.formats.bed import sizes + + p = OptionParser(__doc__) + p.add_argument("--qsizes", help="Path to two column qsizes file") + p.add_argument("--ssizes", help="Path to two column ssizes file") + p.add_argument("--qbed", help="Path to qbed") + p.add_argument("--sbed", help="Path to sbed") + p.add_argument( + "--qselect", + default=0, + type=int, + help="Minimum size of query contigs to select", + ) + p.add_argument( + "--sselect", + default=0, + type=int, + help="Minimum size of subject contigs to select", + ) + p.add_argument("--qh", help="Path to highlight bed for query") + p.add_argument("--sh", help="Path to highlight bed for subject") + p.add_argument( + "--dotstyle", + default="dot", + choices=DotStyles, + help="Style of the dots", + ) + p.add_argument( + "--proportional", + default=False, + action="store_true", + help="Make image width:height equal to seq ratio", + ) + p.add_argument( + "--stripNames", + default=False, + action="store_true", + help="Remove trailing .? from gene names", + ) + p.add_argument( + "--nmax", + default=None, + type=int, + help="Only plot maximum of N dots", + ) + opts, args, iopts = p.set_image_options(figsize="8x8", style="dark", dpi=150) + + qsizes, ssizes = opts.qsizes, opts.ssizes + qbed, sbed = opts.qbed, opts.sbed + proportional = opts.proportional + + if len(args) != 1: + sys.exit(not p.print_help()) + + if qbed: + qsizes = qsizes or sizes([qbed]) + qbed = Bed(qbed) + if sbed: + ssizes = ssizes or sizes([sbed]) + sbed = Bed(sbed) + + assert qsizes and ssizes, "You must specify at least one of --sizes of --bed" + + qsizes = Sizes(qsizes, select=opts.qselect) + ssizes = Sizes(ssizes, select=opts.sselect) + + (blastfile,) = args + + image_name = op.splitext(blastfile)[0] + "." + opts.format + plt.rcParams["xtick.major.pad"] = 16 + plt.rcParams["ytick.major.pad"] = 16 + + # Fix the width + xsize, ysize = qsizes.totalsize, ssizes.totalsize + + # get highlight beds + qh, sh = opts.qh, opts.sh + qh = Bed(qh) if qh else None + sh = Bed(sh) if sh else None + highlights = (qh, sh) if qh or sh else None + + ratio = ysize * 1.0 / xsize if proportional else 1 + width = iopts.w + height = iopts.h * ratio + fig = plt.figure(1, (width, height)) + root = fig.add_axes([0, 0, 1, 1]) # the whole canvas + ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # the dot plot + + blastplot( + ax, + blastfile, + qsizes, + ssizes, + qbed, + sbed, + style=opts.dotstyle, + sampleN=opts.nmax, + baseticks=True, + stripNames=opts.stripNames, + highlights=highlights, + ) + + # add genome names + to_ax_label = lambda fname: op.basename(fname).split(".")[0] + gx, gy = [to_ax_label(x.filename) for x in (qsizes, ssizes)] + ax.set_xlabel(gx, size=16) + ax.set_ylabel(gy, size=16) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + savefig(image_name, dpi=iopts.dpi, iopts=iopts) diff --git a/jcvi/graphics/chromosome.py b/jcvi/graphics/chromosome.py new file mode 100644 index 00000000..6aa6fada --- /dev/null +++ b/jcvi/graphics/chromosome.py @@ -0,0 +1,730 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Legacy script to plot distribution of certain classes onto chromosomes. Adapted +from the script used in the Tang et al. PNAS 2010 paper, sigma figure. +""" +import sys + +from itertools import groupby +from math import ceil +from typing import Optional, Tuple + +import numpy as np + +from natsort import natsorted + +from ..apps.base import OptionParser, datafile, logger, sample_N +from ..formats.base import DictFile, get_number +from ..formats.bed import Bed +from ..formats.sizes import Sizes + +from .base import ( + CirclePolygon, + Polygon, + Rectangle, + latex, + markup, + normalize_axes, + plt, + savefig, + set1_n, + set3_n, +) +from .glyph import BaseGlyph, plot_cap + + +class Chromosome(BaseGlyph): + # Chromosome styles: rect - rectangle, roundrect - rounded rectangle, auto - + # automatically pick the best style + Styles = ("auto", "rect", "roundrect") + + def __init__( + self, + ax, + x, + y1, + y2, + width=0.015, + ec="k", + patch=None, + patchcolor="lightgrey", + lw=1, + zorder=2, + ): + """ + Chromosome with positions given in (x, y1) => (x, y2) + + The chromosome can also be patched, e.g. to show scaffold composition in + alternating shades. Use a list of starting locations to segment. + """ + y1, y2 = sorted((y1, y2)) + super().__init__(ax) + pts, r = self.get_pts(x, y1, y2, width) + self.append(Polygon(pts, fill=False, lw=lw, ec=ec, zorder=zorder)) + if patch: + rr = r * 0.9 # Shrink a bit for the patches + # First patch is colored if there is an even number of patches, otherwise not colored + start = len(patch) % 2 + for i in range(start, len(patch), 2): + if i + 1 > len(patch) - 1: + continue + p1, p2 = patch[i], patch[i + 1] + self.append( + Rectangle((x - rr, p1), 2 * rr, p2 - p1, lw=0, fc=patchcolor) + ) + + self.add_patches() + + def get_pts(self, x, y1, y2, width): + w = width / 2 + r = width / (3**0.5) + + pts = [] + pts += plot_cap((x, y1 + r), np.radians(range(210, 330)), r) + pts += [[x + w, y1 + r / 2], [x + w, y2 - r / 2]] + pts += plot_cap((x, y2 - r), np.radians(range(30, 150)), r) + pts += [[x - w, y2 - r / 2], [x - w, y1 + r / 2]] + + return pts, r + + +class HorizontalChromosome(BaseGlyph): + def __init__( + self, + ax, + x1, + x2, + y, + height=0.015, + ec="k", + patch=None, + patchcolor="lightgrey", + lw=1, + fc=None, + zorder=2, + style="auto", + ): + """ + Horizontal version of the Chromosome glyph above. + """ + assert style in Chromosome.Styles, f"Unknown style `{style}`" + + x1, x2 = sorted((x1, x2)) + super().__init__(ax) + pts, r = self.get_pts(x1, x2, y, height, style=style) + self.append(Polygon(pts, fill=False, lw=lw, ec=ec, zorder=zorder + 1)) + + if fc: + pts, r = self.get_pts(x1, x2, y, height / 2, style=style) + self.append(Polygon(pts, fc=fc, lw=0, zorder=zorder)) + if patch: + rr = r * 0.9 # Shrink a bit for the patches + # First patch is colored if there is an even number of patches, otherwise not colored + start = len(patch) % 2 + for i in range(start, len(patch), 2): + if i + 1 > len(patch) - 1: + continue + p1, p2 = patch[i], patch[i + 1] + self.append( + Rectangle((p1, y - rr), p2 - p1, 2 * rr, lw=0, fc=patchcolor) + ) + + self.add_patches() + + def get_pts(self, x1, x2, y, height, style="auto") -> Tuple[list, float]: + h = height / 2 + r = height / (3**0.5) + + if style == "rect" or ( + style == "auto" and x2 - x1 < 2 * height + ): # rectangle for small chromosomes + return [[x1, y + h], [x1, y - h], [x2, y - h], [x2, y + h]], r + + pts = [] + pts += plot_cap((x1 + r, y), np.radians(range(120, 240)), r) + pts += [[x1 + r / 2, y - h], [x2 - r / 2, y - h]] + pts += plot_cap((x2 - r, y), np.radians(range(-60, 60)), r) + pts += [[x2 - r / 2, y + h], [x1 + r / 2, y + h]] + + return pts, r + + +class ChromosomeWithCentromere(object): + def __init__(self, ax, x, y1, y2, y3, width=0.015, fc="k", fill=False, zorder=2): + """ + Chromosome with centromeres at y2 position + """ + pts = [] + r = width * 0.5 + pts += plot_cap((x, y1 - r), np.radians(range(180)), r) + pts += [[x - r, y1 - r], [x - r, y2 + r]] + pts += plot_cap((x, y2 + r), np.radians(range(180, 360)), r) + pts += [[x + r, y2 + r], [x + r, y1 - r]] + ax.add_patch(Polygon(pts, fc=fc, fill=fill, zorder=zorder)) + pts = [] + pts += plot_cap((x, y2 - r), np.radians(range(180)), r) + pts += [[x - r, y2 - r], [x - r, y3 + r]] + pts += plot_cap((x, y3 + r), np.radians(range(180, 360)), r) + pts += [[x + r, y3 + r], [x + r, y2 - r]] + ax.add_patch(Polygon(pts, fc=fc, fill=fill, zorder=zorder)) + ax.add_patch( + CirclePolygon((x, y2), radius=r * 0.5, fc="k", ec="k", zorder=zorder) + ) + + +class ChromosomeMap(object): + """ + Line plots along the chromosome. + """ + + def __init__( + self, + fig, + root, + xstart, + xend, + ystart, + yend, + pad, + ymin, + ymax, + bins, + title, + subtitle, + patchstart=None, + ): + width, height = xend - xstart, yend - ystart + + y = ystart - pad + HorizontalChromosome(root, xstart, xend, y, patch=patchstart, height=0.03) + + # Gauge + lsg = "lightslategrey" + root.plot( + [xstart - pad, xstart - pad], [ystart, ystart + height], lw=2, color=lsg + ) + root.plot([xend + pad, xend + pad], [ystart, ystart + height], lw=2, color=lsg) + root.text( + (xstart + xend) / 2, + ystart + height + 2 * pad, + title, + ha="center", + va="center", + color=lsg, + ) + + iv = (ymax - ymin) / bins + iv_height = height / bins + val = ymin + yy = ystart + while val <= ymax: + root.text(xstart - 2 * pad, yy, str(val), ha="right", va="center", size=10) + val += iv + yy += iv_height + + root.text( + (xstart + xend) / 2, y - 0.05, subtitle, ha="center", va="center", color=lsg + ) + + self.axes = fig.add_axes([xstart, ystart, width, height]) + + +class GeneticMap(BaseGlyph): + def __init__( + self, ax, x, y1, y2, markers, unit="cM", tip=0.008, fc="k", flip=False + ): + super().__init__(ax) + # tip = length of the ticks + y1, y2 = sorted((y1, y2)) + ax.plot([x, x], [y1, y2], "-", color=fc, lw=2) + _, max_chr_len = max(markers, key=lambda x: x[-1]) + r = y2 - y1 + ratio = r / max_chr_len + marker_pos = {} + for marker_name, cm in markers: + yy = (y1 + ratio * cm) if flip else (y2 - ratio * cm) + ax.plot((x - tip, x + tip), (yy, yy), "-", color=fc) + marker_pos[marker_name] = yy + self.marker_pos = marker_pos + + t = tip / 2 + end_cm_labels = ( + ((y2 + t, max_chr_len, "bottom"), (y1 - t, 0, "top")) + if flip + else ((y2 + t, 0, "bottom"), (y1 - t, max_chr_len, "top")) + ) + for yy, cm, va in end_cm_labels: + label = "{0} {1}".format(int(cm), unit) + ax.text(x, yy, label, color="gray", va=va, ha="center") + + +class Gauge(BaseGlyph): + def __init__( + self, + ax, + x, + y1, + y2, + max_chr_len, + step=1e6, + tip=0.008, + extra=0.006, + fc="lightslategray", + ): + """ + Args: + ax (matplotlib.Axes): axes + x (float): x position + y1 (float): y start position + y2 (float): y end position + max_chr_len (int): maximum chromosome size + step (int): step to show the ticks + tip (float): length of the ticks + extra (float): offset for the unit label + fc (str): face color of the glyph + """ + super().__init__(ax) + ax.plot([x, x], [y1, y2], "-", color=fc, lw=2) + r = y2 - y1 + yy = y2 + gauge = int(ceil(max_chr_len / step)) + ratio = r / max_chr_len + yinterval = 2 * ratio * step + for g in range(0, gauge, 2): + if g % 10: + ax.plot((x, x + tip), (yy, yy), "-", color=fc) + else: + ax.plot((x - tip, x + tip), (yy, yy), "-", color=fc, lw=2) + ax.text(x + tip + extra, yy, g, color="gray", va="center") + yy -= yinterval + ax.text(x, yy - 0.03, "Mb", color="gray", va="center") + + +def canvas2px(coord, dmn, dpi): + """ + Convert matplotlib canvas coordinate to pixels + """ + return int(round(coord * dmn * dpi)) + + +def write_ImageMapLine(tlx, tly, brx, bry, w, h, dpi, chr, segment_start, segment_end): + """ + Write out an image map area line with the coordinates passed to this + function + + """ + tlx, brx = [canvas2px(x, w, dpi) for x in (tlx, brx)] + tly, bry = [canvas2px(y, h, dpi) for y in (tly, bry)] + chr, _ = chr.split(":") + return ( + '" + ) + + +def get_hg38_chromsizes(filename=datafile("hg38.chrom.sizes")): + chromsizes = DictFile(filename) + chromsizes = dict((k, int(v)) for k, v in chromsizes.items()) + return chromsizes + + +def get_color(tag): + if "neg" in tag: + return "w", 1 + if "acen" in tag: + return "r", 1 + try: + alpha = int(tag[4:]) * 1.0 / 100 + except: + return "w", 1 + return "k", alpha + + +def draw_cytoband( + ax, chrom, filename=datafile("hg38.band.txt"), ymid=0.5, width=0.99, height=0.11 +): + import pandas as pd + + bands = pd.read_csv(filename, sep="\t") + chrombands = bands[bands["#chrom"] == chrom] + data = [] + for _, (chr, start, end, name, gie) in chrombands.iterrows(): + data.append((chr, start, end, name, gie)) + chromsize = max(x[2] for x in data) + scale = width * 1.0 / chromsize + xstart, ystart = (1 - width) / 2, ymid - height / 2 + bp_to_pos = lambda x: xstart + x * scale + in_acen = False + for chr, start, end, name, gie in data: + color, alpha = get_color(gie) + bplen = end - start + if "acen" in gie: + if in_acen: + xys = [ + (bp_to_pos(start), ymid), + (bp_to_pos(end), ystart), + (bp_to_pos(end), ystart + height), + ] + else: + xys = [ + (bp_to_pos(start), ystart), + (bp_to_pos(start), ystart + height), + (bp_to_pos(end), ymid), + ] + p = Polygon(xys, closed=True, ec="k", fc=color, alpha=alpha) + in_acen = True + else: + p = Rectangle( + (bp_to_pos(start), ystart), + bplen * scale, + height, + ec="k", + fc=color, + alpha=alpha, + ) + # print bp_to_pos(end) + ax.add_patch(p) + ax.text( + bp_to_pos((start + end) / 2), + ymid + height * 0.8, + name, + rotation=40, + color="lightslategray", + ) + + ax.text(0.5, ystart - height, chrom, size=16, ha="center", va="center") + + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + ax.set_axis_off() + + +def main(): + """ + %prog bedfile id_mappings + + Takes a bedfile that contains the coordinates of features to plot on the + chromosomes, and `id_mappings` file that map the ids to certain class. Each + class will get assigned a unique color. `id_mappings` file is optional (if + omitted, will not paint the chromosome features, except the centromere). + + The extent of the chromosomes are given by --sizes, which contains + chrsize, one per line. If not specified, the extent of the chromosomes + are assumed to be the end for the last feature, which might be an underestimate. + """ + + p = OptionParser(main.__doc__) + p.add_argument( + "--sizes", help="FASTA sizes file, which contains chrsize, one per line" + ) + g = p.add_argument_group("Display accessories") + g.add_argument( + "--title", + help="title of the image", + ) + g.add_argument( + "--gauge", + default=False, + action="store_true", + help="draw a gauge with size label", + ) + + g = p.add_argument_group("HTML image map") + g.add_argument( + "--imagemap", + default=False, + action="store_true", + help="generate an HTML image map associated with the image", + ) + g.add_argument( + "--winsize", + default=50000, + type=int, + help="if drawing an imagemap, specify the window size (bases) of each map element ", + ) + + g = p.add_argument_group("Color legend") + g.add_argument( + "--nolegend", + dest="legend", + default=True, + action="store_false", + help="Do not generate color legend", + ) + g.add_argument( + "--mergedist", default=0, type=int, help="Merge regions closer than " + ) + g.add_argument("--empty", help="Write legend for unpainted region") + + opts, args, iopts = p.set_image_options(figsize="6x6", dpi=300) + + if len(args) not in (1, 2): + sys.exit(p.print_help()) + + bedfile = args[0] + mappingfile = None + if len(args) == 2: + mappingfile = args[1] + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + draw_chromosomes( + root, + bedfile, + sizes=opts.sizes, + iopts=iopts, + mergedist=opts.mergedist, + winsize=opts.winsize, + imagemap=opts.imagemap, + mappingfile=mappingfile, + gauge=opts.gauge, + legend=opts.legend, + empty=opts.empty, + title=opts.title, + ) + + normalize_axes(root) + + prefix = bedfile.rsplit(".", 1)[0] + figname = prefix + "." + opts.format + savefig(figname, dpi=iopts.dpi, iopts=iopts) + + +def draw_chromosomes( + root, + bedfile, + sizes, + iopts, + mergedist: int, + winsize: int, + imagemap: bool = False, + mappingfile: Optional[str] = None, + gauge: bool = False, + legend: bool = True, + empty: bool = False, + title: Optional[str] = None, +): + bed = Bed(bedfile) + prefix = bedfile.rsplit(".", 1)[0] + + if imagemap: + imgmapfile = prefix + ".map" + mapfh = open(imgmapfile, "w") + print('', file=mapfh) + + if mappingfile: + mappings = DictFile(mappingfile, delimiter="\t") + classes = sorted(set(mappings.values())) + preset_colors = ( + DictFile(mappingfile, keypos=1, valuepos=2, delimiter="\t") + if DictFile.num_columns(mappingfile) >= 3 + else {} + ) + else: + classes = sorted(set(x.accn for x in bed)) + mappings = dict((x, x) for x in classes) + preset_colors = {} + + logger.debug("A total of %d classes found: %s", len(classes), ",".join(classes)) + + # Assign colors to classes + ncolors = max(3, min(len(classes), 12)) + palette = set1_n if ncolors <= 8 else set3_n + colorset = palette(number=ncolors) + colorset = sample_N(colorset, len(classes), seed=iopts.seed) + class_colors = dict(zip(classes, colorset)) + class_colors.update(preset_colors) + logger.debug("Assigned colors: %s", class_colors) + + chr_lens = {} + centromeres = {} + if sizes: + chr_lens = Sizes(sizes).sizes_mapping + else: + for b, blines in groupby(bed, key=lambda x: x.seqid): + blines = list(blines) + maxlen = max(x.end for x in blines) + chr_lens[b] = maxlen + + for b in bed: + accn = b.accn + if accn == "centromere": + centromeres[b.seqid] = b.start + if accn in mappings: + b.accn = mappings[accn] + else: + b.accn = "-" + + chr_number = len(chr_lens) + if centromeres: + assert chr_number == len( + centromeres + ), "chr_number = {}, centromeres = {}".format(chr_number, centromeres) + + r = 0.7 # width and height of the whole chromosome set + xstart, ystart = 0.15, 0.85 + xinterval = r / chr_number + xwidth = xinterval * 0.5 # chromosome width + max_chr_len = max(chr_lens.values()) + ratio = r / max_chr_len # canvas / base + + # first the chromosomes + chr_locations = {} + for a, (chr, clen) in enumerate(natsorted(chr_lens.items())): + xx = xstart + a * xinterval + 0.5 * xwidth + chr_locations[chr] = xx + root.text(xx, ystart + 0.01, str(get_number(chr)), ha="center") + if centromeres: + yy = ystart - centromeres[chr] * ratio + ChromosomeWithCentromere( + root, xx, ystart, yy, ystart - clen * ratio, width=xwidth + ) + else: + Chromosome(root, xx, ystart, ystart - clen * ratio, width=xwidth) + + alpha = 1 + # color the regions + for chr in sorted(chr_lens.keys()): + excess = 0 + bac_list = [] + prev_end, prev_klass = 0, None + xx = chr_locations[chr] - 0.5 * xwidth + for b in bed.sub_bed(chr): + klass = b.accn + if klass == "centromere": + continue + start = b.start + end = b.end + if start < prev_end + mergedist and klass == prev_klass: + start = prev_end + yystart = ystart - end * ratio + yyend = ystart - start * ratio + root.add_patch( + Rectangle( + (xx, yystart), + xwidth, + yyend - yystart, + fc=class_colors.get(klass, "lightslategray"), + lw=0, + alpha=alpha, + ) + ) + prev_end, prev_klass = b.end, klass + + if imagemap: + # `segment` : size of current BAC being investigated + `excess` + # `excess` : left-over bases from the previous BAC, as a result of + # iterating over `winsize` regions of `segment` + if excess == 0: + segment_start = start + segment = (end - start + 1) + excess + while True: + if segment < winsize: + bac_list.append(b.accn) + excess = segment + break + segment_end = segment_start + winsize - 1 + tlx, tly, brx, bry = ( + xx, + (1 - ystart) + segment_start * ratio, + xx + xwidth, + (1 - ystart) + segment_end * ratio, + ) + print( + "\t" + + write_ImageMapLine( + tlx, + tly, + brx, + bry, + iopts.w, + iopts.h, + iopts.dpi, + chr + ":" + ",".join(bac_list), + segment_start, + segment_end, + ), + file=mapfh, + ) + + segment_start += winsize + segment -= winsize + bac_list = [] + + if imagemap and excess > 0: + bac_list.append(b.accn) + segment_end = end + tlx, tly, brx, bry = ( + xx, + (1 - ystart) + segment_start * ratio, + xx + xwidth, + (1 - ystart) + segment_end * ratio, + ) + print( + "\t" + + write_ImageMapLine( + tlx, + tly, + brx, + bry, + iopts.w, + iopts.h, + iopts.dpi, + chr + ":" + ",".join(bac_list), + segment_start, + segment_end, + ), + file=mapfh, + ) + + if imagemap: + print("", file=mapfh) + mapfh.close() + logger.debug("Image map written to `%s`", mapfh.name) + + if gauge: + xstart, ystart = 0.9, 0.85 + Gauge(root, xstart, ystart - r, ystart, max_chr_len) + + if "centromere" in class_colors: + del class_colors["centromere"] + + # class legends, four in a row + if legend: + xstart = 0.1 + xinterval = 0.8 / len(class_colors) + xwidth = 0.04 + yy = 0.08 + for klass, cc in sorted(class_colors.items()): + if klass == "-": + continue + root.add_patch( + Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha) + ) + root.text(xstart + xwidth + 0.01, yy, latex(klass), fontsize=10) + xstart += xinterval + + if empty: + root.add_patch(Rectangle((xstart, yy), xwidth, xwidth, fill=False, lw=1)) + root.text(xstart + xwidth + 0.01, yy, empty, fontsize=10) + + if title: + root.text(0.5, 0.95, markup(title), ha="center", va="center") + + +if __name__ == "__main__": + main() diff --git a/jcvi/graphics/coverage.py b/jcvi/graphics/coverage.py new file mode 100644 index 00000000..05179642 --- /dev/null +++ b/jcvi/graphics/coverage.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog chrC01 chr.sizes data + +Read coverage histogram, similar to wiggle plot. Data contains all the track +data in the form of tab-delimited (x, y) lists. +""" + +import os.path as op +import sys + +import numpy as np + +from ..apps.base import OptionParser, glob, logger +from ..formats.sizes import Sizes + +from .base import ( + Rectangle, + adjust_spines, + get_map, + mb_float_formatter, + mb_formatter, + plt, + savefig, +) + + +class XYtrack(object): + def __init__(self, ax, datafile, color=None, ymax=40): + self.ax = ax + self.xy = [] + fp = open(datafile) + for row in fp: + atoms = row.split() + self.xy.append([int(atoms[0]), float(atoms[1])]) + fp.close() + + self.x, self.y = zip(*self.xy) + logger.debug("File `{0}` imported (records={1}).".format(datafile, len(self.x))) + self.color = color or "k" + self.ymax = ymax + + @property + def mapping(self): + return dict(zip(self.x, self.y)) + + def interpolate(self, maxsize, unit=10000): + maxsize = int(maxsize) + for pos in range(unit, maxsize + unit, unit): + if pos in self.x: + continue + self.xy.append([pos, 0]) + self.xy.sort() + self.x, self.y = zip(*self.xy) + logger.debug("After interpolate: {0}".format(len(self.x))) + + def cap(self, ymax): + self.xy = [[a, 0] if b > ymax else [a, b] for a, b in self.xy] + self.x, self.y = zip(*self.xy) + + def draw(self): + ax = self.ax + color = self.color + ax.plot(self.x, self.y, lw=0) + ax.fill_between(self.x, self.y, color=color, lw=0) + ax.set_ylim(0, self.ymax) + ax.set_axis_off() + + def import_hlfile(self, hlfile, chr, unit=10000, diverge=("r", "g")): + rr, gg = diverge + fp = open(hlfile) + imported = 0 + mapping = self.mapping + for row in fp: + if row.strip() == "": + continue + seqid, start, end, tag = row.split() + if seqid != chr: + continue + start = int(start) + end = int(end) + if tag == "double": + self.highlight(mapping, start, end, color=rr, unit=unit) + else: + self.highlight(mapping, start, end, color=gg, unit=unit) + imported += 1 + logger.debug("Imported {0} regions from file `{1}`.".format(imported, hlfile)) + + def highlight(self, mapping, start, end, color="r", unit=10000, zorder=10): + ax = self.ax + x = range(start, end + unit, unit) + y = [mapping[z] for z in x] + # Mask the highlight region so that they don't appear in background + for a in self.xy: + if start <= a[0] <= end: + a[1] = 0 + self.x, self.y = zip(*self.xy) + ax.plot(x, y, lw=0) + ax.fill_between(x, y, color=color, lw=0, zorder=zorder) + + def vlines(self, xs, color="m"): + for x in xs: + self.ax.plot((x, x), (0, self.ymax), "-", color=color, lw=2) + + +class Coverage(object): + def __init__( + self, + fig, + root, + canvas, + chr, + xlim, + datadir, + order=None, + hlsuffix=None, + palette=None, + cap=50, + gauge="bottom", + plot_label=True, + plot_chr_label=True, + gauge_step=5000000, + vlines=None, + labels_dict={}, + diverge=("r", "g"), + ): + x, y, w, h = canvas + p = 0.01 + root.add_patch( + Rectangle( + (x - p, y - p), + w + 2 * p, + h + 2 * p, + lw=1, + fill=False, + ec="darkslategray", + zorder=10, + ) + ) + datafiles = glob(op.join(datadir, chr + "*")) + + if order: + datafiles = [z for z in datafiles if z.split(".")[1] in order] + datafiles.sort(key=lambda x: order.index(x.split(".")[1])) + + ntracks = len(datafiles) + yinterval = h / ntracks + yy = y + h + + if palette is None: + # Get the palette + set2 = get_map("Set2", "qualitative", ntracks).mpl_colors + else: + set2 = [palette] * ntracks + + if gauge == "top": + gauge_ax = fig.add_axes([x, yy + p, w, 0.0001]) + adjust_spines(gauge_ax, ["top"]) + tpos = yy + 0.07 + elif gauge == "bottom": + gauge_ax = fig.add_axes([x, y - p, w, 0.0001]) + adjust_spines(gauge_ax, ["bottom"]) + tpos = y - 0.07 + + start, end = xlim + if gauge: + fs = gauge_step < 1000000 + setup_gauge_ax(gauge_ax, start, end, gauge_step, float_formatter=fs) + + if plot_chr_label: + root.text( + x + w / 2, + tpos, + chr, + ha="center", + va="center", + color="darkslategray", + size=16, + ) + + yys = [] + for label, datafile, c in zip(order, datafiles, set2): + yy -= yinterval + yys.append(yy) + ax = fig.add_axes([x, yy, w, yinterval * 0.9]) + xy = XYtrack(ax, datafile, color=c) + xy.interpolate(end) + xy.cap(ymax=cap) + if vlines: + xy.vlines(vlines) + if hlsuffix: + hlfile = op.join(datadir, ".".join((label, hlsuffix))) + xy.import_hlfile(hlfile, chr, diverge=diverge) + if plot_label: + label = labels_dict.get(label, label.capitalize()) + label = r"\textit{{{0}}}".format(label) + root.text(x - 0.015, yy + yinterval / 2, label, ha="right", va="center") + xy.draw() + ax.set_xlim(*xlim) + + self.yys = yys + + +def setup_gauge_ax(gauge_ax, start, end, gauge_step, float_formatter=False): + gauge_ax.set_xlim(start, end) + formatter = mb_float_formatter if float_formatter else mb_formatter + gauge_ax.xaxis.set_major_formatter(formatter) + gauge_ax.xaxis.set_ticks(np.arange(start + gauge_step, end, gauge_step)) + gauge_ax.yaxis.set_ticks([]) + + +def main(): + p = OptionParser(__doc__) + p.add_argument("--order", help="The order to plot the tracks, comma-separated") + opts, args, iopts = p.set_image_options() + + if len(args) != 3: + sys.exit(not p.print_help()) + + chr, sizes, datadir = args + order = opts.order + hlsuffix = opts.hlsuffix + if order: + order = order.split(",") + sizes = Sizes(sizes) + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + canvas = (0.12, 0.35, 0.8, 0.35) + chr_size = sizes.get_size(chr) + Coverage( + fig, root, canvas, chr, (0, chr_size), datadir, order=order, hlsuffix=hlsuffix + ) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + image_name = chr + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main() diff --git a/jcvi/graphics/dotplot.py b/jcvi/graphics/dotplot.py new file mode 100755 index 00000000..59dbbf16 --- /dev/null +++ b/jcvi/graphics/dotplot.py @@ -0,0 +1,549 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog [anchorfile|ksfile] --qbed query.bed --sbed subject.bed + +visualize the anchorfile in a dotplot. anchorfile contains two columns +indicating gene pairs, followed by an optional column (e.g. Ks value). + +The option --colormap specifies the block color to highlight certain blocks in +a file. Block ids are 1-based (non-digit chars will be removed). For example, below +requests that block 1 is class 'sigma' and block 2 is class 'tau'. + +1 sigma +2 tau +3 tau + +These classes will be mapped to auto-assigned colors and figure legend added to +the bottom of the figure. + +*Important* + +Before running this script it is recommended to check/install +TeX Live (http://www.tug.org/texlive/) and +Ghostscript (http://www.ghostscript.com/) +see more here: http://matplotlib.sourceforge.net/users/usetex.html +""" + +import os.path as op +import string +import sys + +from copy import deepcopy +from random import sample +from typing import Optional + +from ..apps.base import OptionParser, logger, need_update +from ..compara.base import AnchorFile +from ..compara.synteny import batch_scan, check_beds, get_orientation +from ..utils.cbook import seqid_parse, thousands + +from .base import ( + Rectangle, + TextHandler, + draw_cmap, + latex, + markup, + normalize_axes, + plt, + savefig, + set_human_axis, + set1, +) + + +class Palette(dict): + def __init__(self, palettedict=None, palettefile=None): + """Instantiate a palette to map from block_id to color + + Args: + palettedict (Dict, optional): Get the mapping from a dict. Defaults to None. + palettefile (str, optional): Get the mapping from a two-column file. Defaults to None. + """ + super().__init__() + if palettedict is not None: + self.update(palettedict) + if palettefile is None: + return + + pal = "rbcygmk" + + fp = open(palettefile) + for row in fp: + a, b = row.split() + a = "".join(x for x in a if x in string.digits) + a = int(a) + self[a] = b + + self.categories = sorted(set(self.values())) + self.colors = dict(zip(self.categories, pal)) + + logger.debug( + "Color info ({0} categories) imported for {1} blocks.".format( + len(self.colors), len(self) + ) + ) + logger.debug(str(self.colors)) + + for k, v in self.items(): # Update from categories to colors + self[k] = self.colors[v] + + @classmethod + def from_block_orientation( + cls, anchorfile, qbed, sbed, forward_color="#e7298a", reverse_color="#3690c0" + ): + """Generate a palette which contains mapping from block_id (1-based) to colors. + + Args: + anchorfile (str): Path to the .anchors file + qbed (BedFile): Query BED + sbed (BedFile): Subject BED + forward_color (str, optional): Color of forward block. Defaults to "#e7298a". + reverse_color (str, optional): Color of reverse block. Defaults to "#3690c0". + """ + ac = AnchorFile(anchorfile) + blocks = ac.blocks + palette = {} + qorder = qbed.order + sorder = sbed.order + + for i, block in enumerate(blocks): + block_id = i + 1 + + a, b, _ = zip(*block) + a = [qorder[x] for x in a] + b = [sorder[x] for x in b] + ia, _ = zip(*a) + ib, _ = zip(*b) + + orientation = get_orientation(ia, ib) + palette[block_id] = reverse_color if orientation == "-" else forward_color + return cls(palettedict=palette) + + +def draw_box(clusters, ax, color="b"): + for cluster in clusters: + xrect, yrect = zip(*cluster) + xmin, xmax, ymin, ymax = min(xrect), max(xrect), min(yrect), max(yrect) + ax.add_patch( + Rectangle( + (xmin, ymin), xmax - xmin, ymax - ymin, ec=color, fc="y", alpha=0.5 + ) + ) + + +def plot_breaks_and_labels( + fig, + root, + ax, + gx, + gy, + xsize, + ysize, + qbreaks, + sbreaks, + sep=True, + chrlw=0.1, + sepcolor="g", + minfont=5, + stdpf=True, + chpf=True, + usetex: bool = True, +): + xlim = (0, xsize) + ylim = (ysize, 0) # invert the y-axis + + # Tag to mark whether to plot chr name (skip small ones) + xchr_labels, ychr_labels = [], [] + th = TextHandler(fig, usetex=usetex) + + # plot the chromosome breaks + for seqid, beg, end in qbreaks: + xsize_ratio = abs(end - beg) * 0.8 / xsize + fontsize = th.select_fontsize(xsize_ratio) + if chpf: + seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) + + xchr_labels.append((seqid, (beg + end) / 2, fontsize)) + if sep: + ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) + + for seqid, beg, end in sbreaks: + ysize_ratio = abs(end - beg) * 0.8 / ysize + fontsize = th.select_fontsize(ysize_ratio) + if chpf: + seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) + + ychr_labels.append((seqid, (beg + end) / 2, fontsize)) + if sep: + ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) + + # plot the chromosome labels + for label, pos, fontsize in xchr_labels: + pos = 0.1 + pos * 0.8 / xsize + if fontsize >= minfont: + root.text( + pos, + 0.91, + latex(label), + size=fontsize, + ha="center", + va="bottom", + rotation=45, + color="grey", + ) + + # remember y labels are inverted + for label, pos, fontsize in ychr_labels: + pos = 0.9 - pos * 0.8 / ysize + if fontsize >= minfont: + root.text(0.91, pos, latex(label), size=fontsize, va="center", color="grey") + + # Plot the frame + ax.plot(xlim, [0, 0], "-", lw=chrlw, color=sepcolor) + ax.plot(xlim, [ysize, ysize], "-", lw=chrlw, color=sepcolor) + ax.plot([0, 0], ylim, "-", lw=chrlw, color=sepcolor) + ax.plot([xsize, xsize], ylim, "-", lw=chrlw, color=sepcolor) + + ax.set_xlim(xlim) + ax.set_ylim(ylim) + + ax.set_xlabel(gx, size=16) + ax.set_ylabel(gy, size=16) + + # beautify the numeric axis + for tick in ax.get_xticklines() + ax.get_yticklines(): + tick.set_visible(False) + + set_human_axis(ax) + + plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="gray", size=10) + + return xlim, ylim + + +def downsample(data, sample_number: int = 10000): + """ + Downsample the data to a manageable size for plotting. + """ + npairs = len(data) + # Only show random subset + if npairs > sample_number: + logger.debug( + "Showing a random subset of %d data points (total %d) for clarity.", + sample_number, + npairs, + ) + data = sample(data, sample_number) + return data + + +def dotplot( + anchorfile: str, + qbed, + sbed, + fig, + root, + ax, + vmin: float = 0, + vmax: float = 1, + is_self: bool = False, + synteny: bool = False, + cmap_text: Optional[str] = None, + cmap="copper", + genomenames=None, + sample_number: int = 10000, + minfont: int = 5, + palette: Optional[Palette] = None, + chrlw: float = 0.1, + title: Optional[str] = None, + sep: bool = True, + sepcolor: str = "g", + stdpf: bool = True, + chpf: bool = True, + usetex: bool = True, +): + """ + Draw a dotplot from an anchor file. + """ + fp = open(anchorfile, encoding="utf-8") + # add genome names + if genomenames: + gx, gy = genomenames.split("_") + else: + to_ax_label = lambda fname: op.basename(fname).split(".")[0] + gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] + + # Stylize the axis labels + gx, gy = markup(gx), markup(gy) + + qorder = qbed.order + sorder = sbed.order + + data = [] + if cmap_text: + logger.debug("Capping values within [%.1f, %.1f]", vmin, vmax) + + block_id = 0 + block_color = None + for row in fp: + atoms = row.split() + if row[0] == "#": + block_id += 1 + block_color = palette.get(block_id, "k") if palette else None + continue + + # first two columns are query and subject, and an optional third column + if len(atoms) < 2: + continue + + query, subject = atoms[:2] + value = atoms[-1] + + if cmap_text: + try: + value = float(value) + except ValueError: + value = vmax + + if value < vmin: + continue + if value > vmax: + continue + else: + value = 0 + + if query not in qorder: + continue + if subject not in sorder: + continue + + qi, q = qorder[query] + si, s = sorder[subject] + + nv = block_color or value + data.append((qi, si, nv)) + if is_self: # Mirror image + data.append((si, qi, nv)) + + npairs = len(data) + data = downsample(data, sample_number=sample_number) + x, y, c = zip(*data) + + if palette: + ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) + else: + ax.scatter( + x, y, c=c, edgecolors="none", s=2, lw=0, cmap=cmap, vmin=vmin, vmax=vmax + ) + + if synteny: + clusters = batch_scan(data, qbed, sbed) + draw_box(clusters, ax) + + if cmap_text: + draw_cmap(root, cmap_text, vmin, vmax, cmap=cmap) + + xsize, ysize = len(qbed), len(sbed) + logger.debug("xsize=%d ysize=%d", xsize, ysize) + qbreaks = qbed.get_breaks() + sbreaks = sbed.get_breaks() + xlim, _ = plot_breaks_and_labels( + fig, + root, + ax, + gx, + gy, + xsize, + ysize, + qbreaks, + sbreaks, + sep=sep, + chrlw=chrlw, + sepcolor=sepcolor, + minfont=minfont, + stdpf=stdpf, + chpf=chpf, + usetex=usetex, + ) + + # create a diagonal to separate mirror image for self comparison + if is_self: + ax.plot(xlim, (0, ysize), "m-", alpha=0.5, lw=2) + + if palette and hasattr( + palette, "colors" + ): # bottom-left has the palette, if available + colors = palette.colors + xstart, ystart = 0.1, 0.05 + for category, c in sorted(colors.items()): + root.add_patch(Rectangle((xstart, ystart), 0.03, 0.02, lw=0, fc=c)) + root.text(xstart + 0.04, ystart, category, color=c) + xstart += 0.1 + + if title is None: + title = f"Inter-genomic comparison: {gx} vs {gy}" + if is_self: + title = f"Intra-genomic comparison within {gx}" + npairs //= 2 + title += f" ({thousands(npairs)} gene pairs)" + root.set_title(title, x=0.5, y=0.96, color="k") + if title: + logger.debug("Dot plot title: %s", title) + normalize_axes(root) + + +def subset_bed(bed, seqids): + + newbed = deepcopy(bed) + del newbed[:] + for b in bed: + if b.seqid not in seqids: + continue + newbed.append(b) + return newbed + + +def dotplot_main(args): + p = OptionParser(__doc__) + p.set_beds() + p.add_argument( + "--synteny", + default=False, + action="store_true", + help="Run a fast synteny scan and display blocks", + ) + p.add_argument("--cmaptext", help="Draw colormap box on the bottom-left corner") + p.add_argument( + "--vmin", + dest="vmin", + type=float, + default=0, + help="Minimum value in the colormap", + ) + p.add_argument( + "--vmax", + dest="vmax", + type=float, + default=2, + help="Maximum value in the colormap", + ) + p.add_argument( + "--nmax", + dest="sample_number", + type=int, + default=10000, + help="Maximum number of data points to plot", + ) + p.add_argument( + "--minfont", + type=int, + default=4, + help="Do not render labels with size smaller than", + ) + p.add_argument("--colormap", help="Two column file, block id to color mapping") + p.add_argument( + "--colororientation", + action="store_true", + default=False, + help="Color the blocks based on orientation, similar to mummerplot", + ) + p.add_argument( + "--nosort", + default=False, + action="store_true", + help="Do not sort the seqids along the axes", + ) + p.add_argument( + "--nosep", default=False, action="store_true", help="Do not add contig lines" + ) + p.add_argument("--title", help="Title of the dot plot") + p.set_dotplot_opts() + p.set_outfile(outfile=None) + opts, args, iopts = p.set_image_options( + args, figsize="9x9", style="dark", dpi=90, cmap="copper" + ) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (anchorfile,) = args + qbed, sbed, qorder, sorder, is_self = check_beds( + anchorfile, p, opts, sorted=(not opts.nosort) + ) + + palette = opts.colormap + if palette: + palette = Palette(palettefile=palette) + elif opts.colororientation: + palette = Palette.from_block_orientation(anchorfile, qbed, sbed) + + cmaptext = opts.cmaptext + if anchorfile.endswith(".ks"): + from ..compara.ks import KsFile + + logger.debug("Anchors contain Ks values") + cmaptext = cmaptext or "*Ks* values" + anchorksfile = anchorfile + ".anchors" + if need_update(anchorfile, anchorksfile): + ksfile = KsFile(anchorfile) + ksfile.print_to_anchors(anchorksfile) + anchorfile = anchorksfile + + if opts.skipempty: + ac = AnchorFile(anchorfile) + if is_self: + qseqids = sseqids = set() + else: + qseqids, sseqids = set(), set() + + for pair in ac.iter_pairs(): + q, s = pair[:2] + _, q = qorder[q] + _, s = sorder[s] + qseqids.add(q.seqid) + sseqids.add(s.seqid) + + if is_self: + qbed = sbed = subset_bed(qbed, qseqids) + else: + qbed = subset_bed(qbed, qseqids) + sbed = subset_bed(sbed, sseqids) + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) # the whole canvas + ax = fig.add_axes((0.1, 0.1, 0.8, 0.8)) # the dot plot + + dotplot( + anchorfile, + qbed, + sbed, + fig, + root, + ax, + vmin=opts.vmin, + vmax=opts.vmax, + is_self=is_self, + synteny=opts.synteny, + cmap_text=opts.cmaptext, + cmap=iopts.cmap, + genomenames=opts.genomenames, + sample_number=opts.sample_number, + minfont=opts.minfont, + palette=palette, + sep=(not opts.nosep), + sepcolor=set1[int(opts.theme)], + title=opts.title, + stdpf=(not opts.nostdpf), + chpf=(not opts.nochpf), + usetex=iopts.usetex, + ) + + image_name = opts.outfile or (op.splitext(anchorfile)[0] + "." + opts.format) + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + fig.clear() + + +if __name__ == "__main__": + dotplot_main(sys.argv[1:]) diff --git a/jcvi/graphics/glyph.py b/jcvi/graphics/glyph.py new file mode 100644 index 00000000..d6de01a1 --- /dev/null +++ b/jcvi/graphics/glyph.py @@ -0,0 +1,761 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Gradient gene features +""" + +import os.path as op +import sys + +from random import choice, shuffle, random, randint + +import numpy as np + +from ..apps.base import OptionParser, ActionDispatcher +from ..utils.grouper import Grouper + +from .base import ( + CirclePolygon, + Ellipse, + FancyArrowPatch, + Polygon, + Rectangle, + get_map, + plt, + savefig, + set3, +) + + +tstep = 0.05 +Timing = np.arange(0, 1 + tstep, tstep) +arrowprops = dict( + arrowstyle="fancy", + fc="lightslategray", + ec="lightslategray", + connectionstyle="arc3,rad=-0.05", +) + + +class Bezier(object): + """ + Cubic bezier curve, see the math: + + p0 : origin, p1, p2 :control, p3: destination + """ + + def __init__(self, ax, p0, p1, p2, p3, color="m", alpha=0.2): + pts = (p0, p1, p2, p3) + px, py = zip(*pts) + xt = self.get_array(px) + yt = self.get_array(py) + + ax.plot(xt, yt, "-", color=color, alpha=alpha) + + def get_array(self, pts, t=Timing): + p0, p1, p2, p3 = pts + + # Get the coeffiencients + c = 3 * (p1 - p0) + b = 3 * (p2 - p1) - c + a = p3 - p0 - c - b + + tsquared = t**2 + tcubic = tsquared * t + return a * tcubic + b * tsquared + c * t + p0 + + +class RoundLabel(object): + """Round rectangle around the text label""" + + def __init__(self, ax, x1, x2, t, lw=0, fill=False, fc="lavender", **kwargs): + ax.text( + x1, + x2, + t, + ha="center", + bbox=dict(boxstyle="round", fill=fill, fc=fc, lw=lw), + **kwargs + ) + + +class RoundRect(object): + """Round rectangle directly""" + + def __init__(self, ax, xy, width, height, shrink=0.1, label=None, **kwargs): + shrink *= height + x, y = xy + pts = [] + # plot the four rounded cap one by one + pts += plot_cap( + (x + width - shrink, y + height - shrink), np.radians(range(0, 90)), shrink + ) + pts += [[x + width - shrink, y + height], [x + shrink, y + height]] + pts += plot_cap( + (x + shrink, y + height - shrink), np.radians(range(90, 180)), shrink + ) + pts += [[x, y + height - shrink], [x, y + shrink]] + pts += plot_cap((x + shrink, y + shrink), np.radians(range(180, 270)), shrink) + pts += [[x + shrink, y], [x + width - shrink, y]] + pts += plot_cap( + (x + width - shrink, y + shrink), np.radians(range(270, 360)), shrink + ) + pts += [[x + width, y + shrink], [x + width, y + height - shrink]] + p1 = Polygon(pts, **kwargs) + ax.add_patch(p1) + # add a white transparency ellipse filter + if label: + ax.text( + x + width / 2, + y + height / 2, + label, + size=10, + ha="center", + va="center", + color="w", + ) + + +class DoubleSquare(object): + """Square with a double-line margin""" + + def __init__(self, ax, x, y, radius=0.01, **kwargs): + d = radius * 1.5 + ax.add_patch(Rectangle((x - d, y - d), 2 * d, 2 * d, fc="w", ec="k", zorder=10)) + d = radius + ax.add_patch(Rectangle((x - d, y - d), 2 * d, 2 * d, zorder=10, **kwargs)) + + +class DoubleCircle(object): + """Circle with a double-line margin""" + + def __init__(self, ax, x, y, radius=0.01, **kwargs): + ax.add_patch(CirclePolygon((x, y), radius * 1.4, resolution=50, fc="w", ec="k")) + ax.add_patch(CirclePolygon((x, y), radius, resolution=50, **kwargs)) + + +def get_asymmetry(ax, radius): + """Calculates asymmetry of x and y axes. For axes that do not keep equal aspect ratio. + + Args: + ax (Axes): matplotlib axes + radius (float): + """ + x0, y0 = ax.transAxes.transform((0, 0)) # Lower left in pixels + x1, y1 = ax.transAxes.transform((1, 1)) # Upper right in pixels + dx = x1 - x0 + dy = y1 - y0 + maxd = max(dx, dy) + width = radius * maxd / dx + height = radius * maxd / dy + return width, height + + +class TextCircle(object): + """Circle with a character wrapped in""" + + def __init__( + self, + ax, + x, + y, + label, + radius=0.02, + fc="k", + color="w", + size=12, + zorder=4, + fontweight="bold", + **kwargs + ): + width, height = get_asymmetry(ax, radius) + circle = Ellipse((x, y), width, height, fc=fc, ec=fc, zorder=zorder, **kwargs) + ax.add_patch(circle) + ax.text( + x, + y, + label, + ha="center", + va="center", + color=color, + size=size, + zorder=zorder + 1, + fontweight=fontweight, + **kwargs + ) + + +class BasePalette(dict): + """Base class for coloring gene glyphs""" + + palette: dict + + def get_color_and_zorder(self, feature: str) -> tuple: + """Get color and zorder based on the orientation. + + Args: + feature (str): orientation, name etc. + + Returns: + (str, int): color and zorder for the given orientation + """ + color = self.palette.get(feature) + return color, 4 + + +class OrientationPalette(BasePalette): + """Color gene glyphs with forward/reverse""" + + forward, backward = "b", "g" # Genes with different orientations + palette = {"+": forward, "-": backward} + + +class OrthoGroupPalette(BasePalette): + """Color gene glyphs with random orthogroup color""" + + grouper: Grouper + palette = set3 + + def __init__(self, grouper: Grouper): + """Initialize with grouper instance indicating orthogroup assignments. + + Args: + grouper (Grouper): Orthogroup assignments + """ + super().__init__() + self.grouper = grouper + + def get_color_and_zorder(self, feature: str) -> tuple: + """Get color based on orthogroup assignement of a gene. + + Args: + feature (str): Name of the gene + + Returns: + str: color and zorder for the given gene_name based on the assignment + """ + if feature not in self.grouper: + return "gray", 3 + group = self.grouper[feature] + # Any gene part of an orthogroup gets a higher zorder + return self.palette[hash(group) % len(self.palette)], 4 + + +class BaseGlyph(list): + def __init__(self, ax): + super().__init__() + self.ax = ax + + def add_patches(self): + for p in self: + self.ax.add_patch(p) + + def set_transform(self, tr): + for p in self: + p.set_transform(tr) + + +class Glyph(BaseGlyph): + Styles = ("box", "arrow") + Palette = ("orientation", "orthogroup") + ArrowStyle = "Simple,head_length=1.5,head_width=7,tail_width=7" + + def __init__( + self, + ax, + x1, + x2, + y, + height=0.04, + gradient=True, + fc="gray", + ec="gainsboro", + lw=0, + style="box", + **kwargs + ): + """Draw a region that represent an interval feature, e.g. gene or repeat + + Args: + ax (matplotlib.axis): matplot axis object + x1 (float): start coordinate + x2 (float): end coordinate + y (float): y coordinate. Note that the feature is horizontally drawn. + height (float, optional): Height of the feature. Defaults to 0.04. + gradient (bool, optional): Shall we draw color gradient on the box? Defaults to True. + fc (str, optional): Face color of the feature. Defaults to "gray". + style (str, optional): Style, either box|arrow. Defaults to "box". + """ + + super().__init__(ax) + width = x2 - x1 + # Frame around the gradient rectangle + p1 = (x1, y - 0.5 * height) + if style == "arrow": + patch = FancyArrowPatch( + (x1, y), + (x2, y), + shrinkA=0, + shrinkB=0, + arrowstyle=self.ArrowStyle, + fc=fc, + ec=ec, + lw=lw, + **kwargs + ) + else: + patch = Rectangle(p1, width, height, fc=fc, ec=ec, lw=lw, **kwargs) + self.append(patch) + + # Several overlaying patches + if gradient: + for cascade in np.arange(0.1, 0.55, 0.05): + p1 = (x1, y - height * cascade) + self.append( + Rectangle( + p1, + width, + 2 * cascade * height, + fc="w", + lw=0, + alpha=0.1, + **kwargs + ) + ) + + self.add_patches() + + +class ExonGlyph(BaseGlyph): + """Multiple rectangles linked together.""" + + def __init__(self, ax, x, y, mrnabed, exonbeds, height=0.03, ratio=1, align="left"): + super().__init__(ax) + start, end = mrnabed.start, mrnabed.end + xa = lambda a: x + (a - start) * ratio + xb = lambda a: x - (end - a) * ratio + xc = xa if align == "left" else xb + + Glyph(ax, xc(start), xc(end), y, height=height / 3) + for b in exonbeds: + bstart, bend = b.start, b.end + Glyph(ax, xc(bstart), xc(bend), y, fc="orange") + + +class GeneGlyph(BaseGlyph): + """Draws an oriented gene symbol, with color gradient, to represent genes""" + + def __init__( + self, + ax, + x1, + x2, + y, + height, + gradient=True, + tip=0.0025, + color="k", + shadow=False, + **kwargs + ): + super().__init__(ax) + # Figure out the polygon vertices first + orientation = 1 if x1 < x2 else -1 + level = 10 + tip = min(tip, abs(x1 - x2)) + # Frame + p1 = (x1, y - height * 0.5) + p2 = (x2 - orientation * tip, y - height * 0.5) + p3 = (x2, y) + p4 = (x2 - orientation * tip, y + height * 0.5) + p5 = (x1, y + 0.5 * height) + if "fc" not in kwargs: + kwargs["fc"] = color + if "ec" not in kwargs: + kwargs["ec"] = color + P = Polygon([p1, p2, p3, p4, p5], **kwargs) + self.append(P) + + if gradient: + zz = kwargs.get("zorder", 1) + zz += 1 + # Patch (apply white mask) + for cascade in np.arange(0, 0.5, 0.5 / level): + p1 = (x1, y - height * cascade) + p2 = (x2 - orientation * tip, y - height * cascade) + p3 = (x2, y) + p4 = (x2 - orientation * tip, y + height * cascade) + p5 = (x1, y + height * cascade) + self.append( + Polygon([p1, p2, p3, p4, p5], fc="w", lw=0, alpha=0.2, zorder=zz) + ) + + if shadow: + import matplotlib.patheffects as pe + + P.set_path_effects([pe.withSimplePatchShadow((1, -1), alpha=0.4)]) + + self.add_patches() + + +class CartoonRegion(object): + """ + Draw a collection of GeneGlyphs along chromosome. + """ + + def __init__(self, n, k=12): + # Chromosome + self.n = n + self.orientations = [choice([-1, 1]) for i in range(n)] + self.assign_colors(k) + + def draw(self, ax, x, y, gene_len=0.012, strip=True, color=True): + if strip: + self.strip() + + t = gene_len * 1.2 + length = t * (self.n + 1) + x1, x2 = x - length / 2, x + length / 2 + self.x1, self.x2 = x1, x2 + self.y = y + ax.plot((x1, x2), (y, y), color="gray", lw=2, zorder=1) + bit = 0.008 + xs = (x1 - 2 * bit, x1 - bit, x2 + bit, x2 + 2 * bit) + ax.plot(xs, [y] * 4, ".", lw=2, color="gray") + pos = np.arange(x1 + t, x2, t)[: self.n] + assert len(pos) == self.n, "len(pos) = {0}".format(len(pos)) + + gl = gene_len / 2 + for x, c, o in zip(pos, self.colors, self.orientations): + x1, x2 = x - gl, x + gl + if o < 0: + x1, x2 = x2, x1 + if not color and c != "k": + c = "w" + GeneGlyph( + ax, + x1, + x2, + y, + gene_len, + color=c, + ec="k", + gradient=False, + shadow=True, + zorder=10, + ) + + def assign_colors(self, k): + from matplotlib.colors import rgb2hex + + colorset = get_map("Paired", "qualitative", k).mpl_colors + colorset = [rgb2hex(x) for x in colorset] + cs = colorset + ["w"] * (self.n - k - 1) + shuffle(cs) + self.colors = cs[: self.n / 2] + ["k"] + cs[self.n / 2 :] + lf, p, rf = self.find_k() + self.exchange(lf, p - 2) + self.exchange(rf, p + 2) + + def exchange(self, p1, p2): + self.colors[p1], self.colors[p2] = self.colors[p2], self.colors[p1] + self.orientations[p1], self.orientations[p2] = ( + self.orientations[p2], + self.orientations[p1], + ) + + def delete(self, p, waiver=None): + if waiver and self.colors[p] in waiver: + return + self.colors.pop(p) + self.orientations.pop(p) + self.n -= 1 + + def insert(self, p): + self.colors.insert(p, "w") + self.orientations.insert(p, choice([-1, 1])) + self.n += 1 + + def truncate(self, b, e): + b = max(b, 0) + e = min(self.n, e) + self.colors = self.colors[b:e] + self.orientations = self.orientations[b:e] + self.n = e - b + + def assign_flankers(self): + lf, p, rf = self.find_k() + self.flanks = [self.colors[lf], self.colors[rf]] + return p + + def truncate_between_flankers(self, target=0): + try: + lf, rf = self.flanks + except: + self.assign_flankers() + lf, rf = self.flanks + lf = self.colors.index(lf) if lf in self.colors else -1 + rf = self.colors.index(rf) if rf in self.colors else -1 + assert lf >= 0 or rf >= 0 + if rf < 0: + rf = lf + if lf < 0: + lf = rf + if rf + 1 - lf < target: + gap = target - rf - 1 + lf + lf -= gap / 2 + rf += gap / 2 + self.truncate(lf, rf + 1) + + def strip(self): + while self.colors[0] == "w": + self.delete(0) + while self.colors[-1] == "w": + self.delete(self.n - 1) + + def find_k(self): + p = self.colors.index("k") + lf = max(i for i, c in enumerate(self.colors[:p]) if c != "w") + rf = min(i for i, c in enumerate(self.colors[p + 1 :]) if c != "w") + return lf, p, rf + p + 1 + + def evolve(self, mode="S", target=10): + n = self.n + assert mode in ("S", "F", "G") + keep_k = mode == "S" + p = self.assign_flankers() + waiver = self.flanks[:] + if mode == "S": + waiver += ["k"] + if mode == "F": + self.delete(p) + elif mode == "G": + left_score = sum(1 for x in self.colors[:p] if x != "w") + right_score = sum(1 for x in self.colors[p + 1 :] if x != "w") + if left_score > right_score: + self.colors[: p + 1] = ["w"] * (p + 1) + else: + self.colors[p:] = ["w"] * (self.n - p) + while self.nonwhites > target: + if random() > 0.35: + self.delete(randint(0, self.n - 1), waiver=waiver) + if random() > 0.65 and self.n < n * 0.8: + self.insert(randint(0, self.n - 1)) + + @property + def nonwhites(self): + return sum(1 for x in self.colors if x != "w") + + +def plot_cap(center, t, r): + x, y = center + return zip(x + r * np.cos(t), y + r * np.sin(t)) + + +def main(): + actions = ( + ("demo", "run a demo to showcase some common usages of various glyphs"), + ("gff", "draw exons for genes based on gff files"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def get_cds_beds(gffile, noUTR=False): + from jcvi.formats.gff import Gff + + mrnabed = None + cdsbeds = [] + gf = Gff(gffile) + for g in gf: + if g.type == "mRNA": + mrnabed = g.bedline + elif g.type == "CDS": + cdsbeds.append(g.bedline) + + if noUTR: + mrnabed.start = min(x.start for x in cdsbeds) + mrnabed.end = max(x.end for x in cdsbeds) + + return mrnabed, cdsbeds + + +def get_setups(gffiles, canvas=0.6, noUTR=False): + setups = [] + for gffile in gffiles: + genename = op.basename(gffile).rsplit(".", 1)[0] + mrnabed, cdsbeds = get_cds_beds(gffile, noUTR=noUTR) + setups.append((genename, mrnabed, cdsbeds)) + + genenames, mrnabeds, cdsbedss = zip(*setups) + maxspan = max(x.span for x in mrnabeds) + ratio = canvas / maxspan + return setups, ratio + + +def gff(args): + """ + %prog gff *.gff + + Draw exons for genes based on gff files. Each gff file should contain only + one gene, and only the "mRNA" and "CDS" feature will be drawn on the canvas. + """ + align_choices = ("left", "center", "right") + p = OptionParser(gff.__doc__) + p.add_argument( + "--align", default="left", choices=align_choices, help="Horizontal alignment" + ) + p.add_argument( + "--noUTR", default=False, action="store_true", help="Do not plot UTRs" + ) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + fig = plt.figure(1, (8, 5)) + root = fig.add_axes([0, 0, 1, 1]) + + gffiles = args + ngenes = len(gffiles) + + canvas = 0.6 + setups, ratio = get_setups(gffiles, canvas=canvas, noUTR=opts.noUTR) + align = opts.align + xs = 0.2 if align == "left" else 0.8 + yinterval = canvas / ngenes + ys = 0.8 + tip = 0.01 + for genename, mrnabed, cdsbeds in setups: + ExonGlyph(root, xs, ys, mrnabed, cdsbeds, ratio=ratio, align=align) + if align == "left": + root.text(xs - tip, ys, genename, ha="right", va="center") + elif align == "right": + root.text(xs + tip, ys, genename, ha="left", va="center") + ys -= yinterval + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + figname = "exons.pdf" + savefig(figname, dpi=300) + + +def demo(args): + """ + %prog demo + + Draw sample gene features to illustrate the various fates of duplicate + genes - to be used in a book chapter. + """ + p = OptionParser(demo.__doc__) + opts, args = p.parse_args(args) + + fig = plt.figure(1, (8, 5)) + root = fig.add_axes([0, 0, 1, 1]) + + panel_space = 0.23 + dup_space = 0.025 + # Draw a gene and two regulatory elements at these arbitrary locations + locs = [ + (0.5, 0.9), # ancestral gene + (0.5, 0.9 - panel_space + dup_space), # identical copies + (0.5, 0.9 - panel_space - dup_space), + (0.5, 0.9 - 2 * panel_space + dup_space), # degenerate copies + (0.5, 0.9 - 2 * panel_space - dup_space), + (0.2, 0.9 - 3 * panel_space + dup_space), # sub-functionalization + (0.2, 0.9 - 3 * panel_space - dup_space), + (0.5, 0.9 - 3 * panel_space + dup_space), # neo-functionalization + (0.5, 0.9 - 3 * panel_space - dup_space), + (0.8, 0.9 - 3 * panel_space + dup_space), # non-functionalization + (0.8, 0.9 - 3 * panel_space - dup_space), + ] + + default_regulator = "gm" + regulators = [ + default_regulator, + default_regulator, + default_regulator, + "wm", + default_regulator, + "wm", + "gw", + "wb", + default_regulator, + "ww", + default_regulator, + ] + + width = 0.24 + for i, (xx, yy) in enumerate(locs): + regulator = regulators[i] + x1, x2 = xx - 0.5 * width, xx + 0.5 * width + Glyph(root, x1, x2, yy) + if i == 9: # upper copy for non-functionalization + continue + + # coding region + x1, x2 = xx - 0.16 * width, xx + 0.45 * width + Glyph(root, x1, x2, yy, fc="k") + + # two regulatory elements + x1, x2 = xx - 0.4 * width, xx - 0.28 * width + for xx, fc in zip((x1, x2), regulator): + if fc == "w": + continue + + DoubleCircle(root, xx, yy, fc=fc) + + rotation = 30 + tip = 0.02 + if i == 0: + ya = yy + tip + root.text(x1, ya, "Flower", rotation=rotation, va="bottom") + root.text(x2, ya, "Root", rotation=rotation, va="bottom") + elif i == 7: + ya = yy + tip + root.text(x2, ya, "Leaf", rotation=rotation, va="bottom") + + # Draw arrows between panels (center) + arrow_dist = 0.08 + ar_xpos = 0.5 + for ar_ypos in (0.3, 0.53, 0.76): + root.annotate( + " ", + (ar_xpos, ar_ypos), + (ar_xpos, ar_ypos + arrow_dist), + arrowprops=arrowprops, + ) + + ar_ypos = 0.3 + for ar_xpos in (0.2, 0.8): + root.annotate( + " ", (ar_xpos, ar_ypos), (0.5, ar_ypos + arrow_dist), arrowprops=arrowprops + ) + + # Duplication, Degeneration + xx = 0.6 + ys = (0.76, 0.53) + processes = ("Duplication", "Degeneration") + for yy, process in zip(ys, processes): + root.text(xx, yy + 0.02, process, fontweight="bold") + + # Label of fates + xs = (0.2, 0.5, 0.8) + fates = ("Subfunctionalization", "Neofunctionalization", "Nonfunctionalization") + yy = 0.05 + for xx, fate in zip(xs, fates): + RoundLabel(root, xx, yy, fate) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + figname = "demo.pdf" + savefig(figname, dpi=300) + + +if __name__ == "__main__": + main() diff --git a/jcvi/graphics/grabseeds.py b/jcvi/graphics/grabseeds.py new file mode 100644 index 00000000..756a2d78 --- /dev/null +++ b/jcvi/graphics/grabseeds.py @@ -0,0 +1,881 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Image processing pipelines for phenotyping projects. +""" +import json +import os.path as op +import string +import sys + +from collections import Counter +from datetime import date +from math import cos, pi, sin +from typing import Any, List, Optional, Tuple + +import numpy as np + +from ..apps.base import setup_magick_home + +# Attempt to set MACICK_HOME ENV variable if imagemagick installed with homebrew on Mac +setup_magick_home() + +from PIL.Image import open as iopen +from pyefd import elliptic_fourier_descriptors +from pytesseract import image_to_string +from scipy.ndimage import binary_fill_holes, distance_transform_edt +from scipy.optimize import fmin_bfgs as fmin +from skimage.color import gray2rgb, rgb2gray +from skimage.feature import canny, peak_local_max +from skimage.filters import roberts, sobel, threshold_otsu +from skimage.measure import find_contours, regionprops, label +from skimage.morphology import disk, closing +from skimage.segmentation import clear_border, watershed +from wand.image import Image +from webcolors import rgb_to_hex, normalize_integer_triplet + +from ..algorithms.formula import get_kmeans, reject_outliers +from ..apps.base import ( + ActionDispatcher, + OptionParser, + datadir, + logger, + iglob, + mkdir, +) +from ..formats.base import must_open +from ..formats.pdf import cat +from ..utils.webcolors import closest_color + +from .base import ( + Rectangle, + latex, + load_image, + normalize_axes, + plt, + savefig, + set_helvetica_axis, +) + + +np.seterr(all="ignore") + +RGBTuple = Tuple[int, int, int] + + +class Seed(object): + """ + Seed object with metrics. + """ + + def __init__( + self, + imagename: str, + accession: str, + seedno: int, + rgb: RGBTuple, + props: Any, + efds: np.ndarray, + exif: dict, + ): + self.imagename = imagename + self.accession = accession + self.seedno = seedno + y, x = props.centroid + self.x, self.y = int(round(x)), int(round(y)) + self.location = f"{self.x}|{self.y}" + self.area = int(round(props.area)) + self.length = int(round(props.major_axis_length)) + self.width = int(round(props.minor_axis_length)) + self.props = props + self.efds = efds + self.circularity = 4 * pi * props.area / props.perimeter**2 + self.rgb = rgb + self.colorname = closest_color(rgb) + self.datetime = exif.get("exif:DateTimeOriginal", date.today()) + self.rgbtag = triplet_to_rgb(rgb) + self.pixeltag = f"length={self.length} width={self.width} area={self.area}" + self.hashtag = " ".join((self.rgbtag, self.colorname)) + self.calibrated = False + + def __str__(self): + fields = [ + self.imagename, + self.datetime, + self.accession, + self.seedno, + self.location, + self.area, + f"{self.circularity:.2f}", + self.length, + self.width, + self.colorname, + self.rgbtag, + ] + if self.calibrated: + fields += [ + self.pixelcmratio, + self.rgbtransform, + self.correctedlength, + self.correctedwidth, + self.correctedcolorname, + self.correctedrgb, + ] + fields += [",".join(f"{x:.3f}" for x in self.efds)] + return "\t".join(str(x) for x in fields) + + @classmethod + def header(cls, calibrated: bool = False) -> str: + """ + Return header line for the TSV file. + """ + fields = ( + "ImageName DateTime Accession SeedNum Location " + "Area Circularity Length(px) Width(px) ColorName RGB".split() + ) + if calibrated: + fields += ( + "PixelCMratio RGBtransform Length(cm)" + " Width(cm) CorrectedColorName CorrectedRGB".split() + ) + fields += ["EllipticFourierDescriptors"] + return "\t".join(fields) + + def calibrate(self, pixel_cm_ratio: float, tr: np.ndarray): + """ + Calibrate pixel-inch ratio and color adjustment. + """ + self.pixelcmratio = f"{pixel_cm_ratio:.2f}" + self.rgbtransform = ",".join([f"{x:.2f}" for x in tr.flatten()]) + self.correctedlength = f"{self.length / pixel_cm_ratio:.2f}" + self.correctedwidth = f"{self.width / pixel_cm_ratio:.2f}" + correctedrgb = np.dot(tr, np.array(self.rgb)) + self.correctedrgb = triplet_to_rgb(correctedrgb) + self.correctedcolorname = closest_color(correctedrgb) + self.calibrated = True + + +def sam(img: np.ndarray, checkpoint: str) -> List[dict]: + """ + Use Segment Anything Model (SAM) to segment objects. + """ + try: + from segment_anything import sam_model_registry, SamAutomaticMaskGenerator + except ImportError: + logger.fatal("segment_anything not installed. Please install it first.") + sys.exit(1) + + model_type = "vit_h" + if not op.exists(checkpoint): + raise AssertionError( + f"File `{checkpoint}` not found, please specify --sam-checkpoint" + ) + sam = sam_model_registry[model_type](checkpoint=checkpoint) + logger.info("Using SAM model `%s` (%s)", model_type, checkpoint) + mask_generator = SamAutomaticMaskGenerator(sam) + return mask_generator.generate(img) + + +def is_overlapping(mask1: dict, mask2: dict, threshold=0.5): + """ + Check if bounding boxes of mask1 and mask2 overlap more than the given + threshold. + """ + x1, y1, w1, h1 = mask1["bbox"] + x2, y2, w2, h2 = mask2["bbox"] + x_overlap = max(0, min(x1 + w1, x2 + w2) - max(x1, x2)) + y_overlap = max(0, min(y1 + h1, y2 + h2) - max(y1, y2)) + intersection = x_overlap * y_overlap + return intersection / min(w1 * h1, w2 * h2) > threshold + + +def deduplicate_masks(masks: List[dict], threshold=0.5): + """ + Deduplicate masks to retain only the foreground objects. + """ + masks_sorted = sorted(masks, key=lambda x: x["area"]) + retained_masks = [] + + for mask in masks_sorted: + if not any( + is_overlapping(mask, retained_mask, threshold) + for retained_mask in retained_masks + ): + retained_masks.append(mask) + return retained_masks + + +def rgb_to_triplet(rgb: str) -> RGBTuple: + """ + Convert RGB string to triplet. + """ + return tuple([int(x) for x in rgb.split(",")][:3]) + + +def triplet_to_rgb(triplet: RGBTuple) -> str: + """ + Convert triplet to RGB string. + """ + triplet = normalize_integer_triplet(triplet) + return ",".join(str(int(round(x))) for x in triplet) + + +def main(): + + actions = ( + ("batchseeds", "extract seed metrics for each image in a directory"), + ("seeds", "extract seed metrics from one image"), + ("calibrate", "calibrate pixel-inch ratio and color adjustment"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def total_error(x: np.ndarray, colormap: Tuple[Tuple[np.ndarray, np.ndarray]]) -> float: + """ + Calculate total error between observed and expected colors. + """ + xs = np.reshape(x, (3, 3)) + error_squared = sum(np.linalg.norm(np.dot(xs, o) - e) ** 2 for o, e in colormap) + return error_squared**0.5 + + +def calibrate(args): + """ + %prog calibrate calibrate.JPG boxsize + + Calibrate pixel-inch ratio and color adjustment. + - `calibrate.JPG` is the photo containig a colorchecker + - `boxsize` is the measured size for the boxes on printed colorchecker, in + squared centimeter (cm2) units + """ + xargs = args[2:] + p = OptionParser(calibrate.__doc__) + _, args, _ = add_seeds_options(p, args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + imagefile, boxsize = args + boxsize = float(boxsize) + + # Read in color checker + colorcheckerfile = op.join(datadir, "colorchecker.txt") + colorchecker = [] + expected = 0 + with open(colorcheckerfile, encoding="utf-8") as file: + for row in file: + boxes = row.split() + colorchecker.append(boxes) + expected += len(boxes) + + folder = op.split(imagefile)[0] + objects = seeds([imagefile, f"--outdir={folder}"] + xargs) + nseeds = len(objects) + logger.debug("Found %d boxes (expected=%d)", nseeds, expected) + assert ( + expected - 4 <= nseeds <= expected + 4 + ), f"Number of boxes drastically different from {expected}" + + # Calculate pixel-cm ratio + boxes = [t.area for t in objects] + reject = reject_outliers(boxes) + retained_boxes = [b for r, b in zip(reject, boxes) if not r] + mbox = np.median(retained_boxes) # in pixels + pixel_cm_ratio = (mbox / boxsize) ** 0.5 + logger.debug("Median box size: %d pixels. Measured box size: %d cm2", mbox, boxsize) + logger.debug("Pixel-cm ratio: %.2f", pixel_cm_ratio) + + xs = [t.x for t in objects] + ys = [t.y for t in objects] + xs = [float(itemx) for itemx in xs] + ys = [float(itemy) for itemy in ys] + idx_xs = get_kmeans(xs, 6) + idx_ys = get_kmeans(ys, 4) + for xi, yi, s in zip(idx_xs, idx_ys, objects): + s.rank = (yi, xi) + + objects.sort(key=lambda x: x.rank) + + colormap = [] + for s in objects: + x, y = s.rank + observed, expected = s.rgb, rgb_to_triplet(colorchecker[x][y]) + colormap.append((np.array(observed), np.array(expected))) + + # Color transfer + tr0 = np.eye(3).flatten() + print("Initial distance:", total_error(tr0, colormap), file=sys.stderr) + tr = fmin(total_error, tr0, args=(colormap,)) + tr.resize((3, 3)) + print("RGB linear transform:\n", tr, file=sys.stderr) + calib = {"PixelCMratio": pixel_cm_ratio, "RGBtransform": tr.tolist()} + + jsonfile = op.join(folder, "calibrate.json") + fw = must_open(jsonfile, "w") + print(json.dumps(calib, indent=4), file=fw) + fw.close() + logger.debug("Calibration specs written to `%s`.", jsonfile) + + return jsonfile + + +def add_seeds_options(p, args): + """ + Add options to the OptionParser for seeds() and batchseeds() functions. + """ + g1 = p.add_argument_group("Image manipulation") + g1.add_argument("--rotate", default=0, type=int, help="Rotate degrees clockwise") + g1.add_argument( + "--rows", default=":", help="Crop rows e.g. `:800` from first 800 rows" + ) + g1.add_argument( + "--cols", default=":", help="Crop cols e.g. `-800:` from last 800 cols" + ) + g1.add_argument("--labelrows", help="Label rows e.g. `:800` from first 800 rows") + g1.add_argument("--labelcols", help="Label cols e.g. `-800: from last 800 rows") + valid_colors = ("red", "green", "blue", "purple", "yellow", "orange", "INVERSE") + g1.add_argument( + "--changeBackground", + default=0, + choices=valid_colors, + help="Changes background color", + ) + + g2 = p.add_argument_group("Object recognition") + g2.add_argument( + "--minsize", + default=0.2, + type=float, + help="Min percentage of object to image", + ) + g2.add_argument( + "--maxsize", default=20, type=float, help="Max percentage of object to image" + ) + g2.add_argument( + "--count", default=100, type=int, help="Report max number of objects" + ) + g2.add_argument( + "--watershed", + default=False, + action="store_true", + help="Run watershed to segment touching objects", + ) + + g3 = p.add_argument_group("De-noise") + valid_filters = ("canny", "otsu", "roberts", "sam", "sobel") + g3.add_argument( + "--filter", + default="canny", + choices=valid_filters, + help="Edge detection algorithm", + ) + g3.add_argument( + "--sigma", + default=1, + type=int, + help="Canny edge detection sigma, higher for noisy image", + ) + g3.add_argument( + "--kernel", + default=2, + type=int, + help="Edge closure, higher if the object edges are dull", + ) + g3.add_argument( + "--border", default=5, type=int, help="Remove image border of certain pixels" + ) + g3.add_argument( + "--sam-checkpoint", default="sam_vit_h_4b8939.pth", help="SAM checkpoint file" + ) + + g4 = p.add_argument_group("Output") + g4.add_argument("--calibrate", help="JSON file to correct distance and color") + g4.add_argument( + "--edges", + default=False, + action="store_true", + help="Visualize edges in middle PDF panel", + ) + g4.add_argument( + "--outdir", default=".", help="Store intermediate images and PDF in folder" + ) + g4.add_argument("--prefix", help="Output prefix") + g4.add_argument( + "--noheader", default=False, action="store_true", help="Do not print header" + ) + opts, args, iopts = p.set_image_options(args, figsize="12x6", style="white") + + return opts, args, iopts + + +def batchseeds(args): + """ + %prog batchseeds folder + + Extract seed metrics for each image in a directory. + """ + xargs = args[1:] + p = OptionParser(batchseeds.__doc__) + opts, args, _ = add_seeds_options(p, args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (folder,) = args + folder = folder.rstrip("/") + outdir = folder + "-debug" + outfile = folder + "-output.tsv" + assert op.isdir(folder) + images = [] + jsonfile = opts.calibrate or op.join(folder, "calibrate.json") + if not op.exists(jsonfile): + jsonfile = None + for im in iglob(folder, "*.jpg,*.JPG,*.png"): + if im.endswith((".resize.jpg", ".main.jpg", ".label.jpg")): + continue + if op.basename(im).startswith("calibrate"): + continue + images.append(im) + + fw = must_open(outfile, "w") + print(Seed.header(calibrated=bool(jsonfile)), file=fw) + nseeds = 0 + for im in images: + imargs = [im, "--noheader", f"--outdir={outdir}"] + xargs + if jsonfile: + imargs += [f"--calibrate={jsonfile}"] + objects = seeds(imargs) + for o in objects: + print(o, file=fw) + nseeds += len(objects) + fw.close() + logger.debug("Processed %d images.", len(images)) + logger.debug("A total of %d objects written to `%s`.", nseeds, outfile) + + pdfs = iglob(outdir, "*.pdf") + outpdf = folder + "-output.pdf" + cat(pdfs + [f"--outfile={outpdf}"]) + + logger.debug("Debugging information written to `%s`.", outpdf) + return outfile + + +def p_round(n: int, precision: int = 5) -> int: + """ + Round to the nearest precision. + """ + precision = int(precision) + return int(round(n / float(precision))) * precision + + +def pixel_stats(img: List[RGBTuple]) -> RGBTuple: + """ + Get the most common pixel color. + """ + img = [(p_round(r), p_round(g), p_round(b)) for r, g, b in img] + c = Counter(img) + imgx, _ = c.most_common(1)[0] + return imgx + + +def slice_to_ints(s: str, m: int) -> Tuple[int, int]: + """ + Parse slice string. + """ + assert ":" in s + ra, rb = s.split(":") + ra = 0 if ra == "" else int(ra) + rb = m if rb == "" else int(rb) + return ra, rb + + +def convert_background(pngfile: str, new_background: str): + """ + Replace the background color with the specified background color, default is + blue. + """ + if new_background: + _name, _ext = op.splitext(op.basename(pngfile)) + _name += "_bgxform" + newfile = op.join(op.dirname(pngfile), _name + _ext) + + img = iopen(pngfile) + pixels = list(img.getdata()) + + # Get Standard Deviation of RGB + rgb_array = [] + for x in range(255): + rgb_array.append(x) + std_rgb = np.std(rgb_array) * 0.8 + + # Get average color + obcolor = [0, 0, 0] + pixel_values = [] + for t in range(3): + pixel_color = img.getdata(band=t) + for pixel in pixel_color: + if pixel > std_rgb: + pixel_values.append(pixel) + obcolor[t] = sum(pixel_values) // len(pixel_values) + + # Get background color using average color and standard deviation + for t in range(3): + pixel_color = img.getdata(band=t) + seed_pixel_values = [] + for i in pixel_color: + if obcolor[t] - std_rgb < i < obcolor[t] + std_rgb: + seed_pixel_values.append(i) + obcolor[t] = sum(seed_pixel_values) // len(seed_pixel_values) + # Selection of colors based on option parser + nbcolor = [0, 0, 0] + if new_background == "INVERSE": + for t in range(3): + nbcolor[t] = 255 - obcolor[t] + elif new_background == "red": + nbcolor = [255, 0, 0] + + elif new_background == "green": + nbcolor = [0, 255, 0] + + elif new_background == "blue": + nbcolor = [0, 0, 255] + + elif new_background == "yellow": + nbcolor = [255, 255, 0] + + elif new_background == "purple": + nbcolor = [255, 0, 255] + + elif new_background == "orange": + nbcolor = [255, 165, 0] + + # Change Background Color + obcolor = tuple(obcolor) + nbcolor = tuple(nbcolor) + for idx, pixel in enumerate(pixels): + if all(o - std_rgb <= p <= o + std_rgb for o, p in zip(obcolor, pixel)): + pixels[idx] = nbcolor + img.putdata(pixels) + img.save(newfile, "PNG") + return newfile + return pngfile + + +def convert_image( + pngfile: str, + pf: str, + outdir: str = ".", + resize: int = 1000, + img_format: str = "jpeg", + rotate: int = 0, + rows: str = ":", + cols: str = ":", + labelrows: Optional[str] = None, + labelcols: Optional[str] = None, +) -> Tuple[str, str, Optional[str], dict]: + """ + Convert image to JPEG format and resize it. + """ + resizefile = op.join(outdir, pf + ".resize.jpg") + mainfile = op.join(outdir, pf + ".main.jpg") + labelfile = op.join(outdir, pf + ".label.jpg") + img = Image(filename=pngfile) + exif = dict((k, img.metadata[k]) for k in img.metadata if k.startswith("exif:")) + + # Rotation, slicing and cropping of main image + if rotate: + img.rotate(rotate) + if resize: + w, h = img.size + if min(w, h) > resize: + if w < h: + nw, nh = resize, resize * h // w + else: + nw, nh = resize * w // h, resize + img.resize(nw, nh) + logger.debug( + "Image `%s` resized from %dpx:%dpx to %dpx:%dpx", pngfile, w, h, nw, nh + ) + img.format = img_format + img.save(filename=resizefile) + + rimg = img.clone() + if rows != ":" or cols != ":": + w, h = img.size + ra, rb = slice_to_ints(rows, h) + ca, cb = slice_to_ints(cols, w) + # left, top, right, bottom + logger.debug("Crop image to %d:%d %d:%d", ra, rb, ca, cb) + img.crop(ca, ra, cb, rb) + img.format = img_format + img.save(filename=mainfile) + else: + mainfile = resizefile + + # Extract text labels from image + if labelrows or labelcols: + w, h = rimg.size + if labelrows and not labelcols: + labelcols = ":" + if labelcols and not labelrows: + labelrows = ":" + ra, rb = slice_to_ints(labelrows, h) + ca, cb = slice_to_ints(labelcols, w) + logger.debug("Extract label from %d:%d %d:%d", ra, rb, ca, cb) + rimg.crop(ca, ra, cb, rb) + rimg.format = img_format + rimg.save(filename=labelfile) + else: + labelfile = None + + return resizefile, mainfile, labelfile, exif + + +def extract_label(labelfile: str) -> str: + """ + Extract accession number from label image. + """ + accession = image_to_string(iopen(labelfile)) + accession = " ".join(accession.split()) # normalize spaces + accession = "".join(x for x in accession if x in string.printable) + if not accession: + accession = "none" + return accession + + +def efd_feature(contour: np.ndarray) -> np.ndarray: + """ + To use EFD as features, one can write a small wrapper function. + + Based on: https://pyefd.readthedocs.io/en/latest + """ + coeffs = elliptic_fourier_descriptors(contour, normalize=True) + # skip the first three coefficients, which are always 1, 0, 0 + return coeffs.flatten()[3:] + + +def seeds(args): + """ + %prog seeds [pngfile|jpgfile] + + Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image. + """ + p = OptionParser(seeds.__doc__) + p.set_outfile() + opts, args, iopts = add_seeds_options(p, args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (pngfile,) = args + pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0] + sigma, kernel = opts.sigma, opts.kernel + rows, cols = opts.rows, opts.cols + labelrows, labelcols = opts.labelrows, opts.labelcols + ff = opts.filter + calib = opts.calibrate + outdir = opts.outdir + if outdir and outdir != ".": + mkdir(outdir) + if calib: + calib = json.load(must_open(calib)) + pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"] + tr = np.array(tr) + nbcolor = opts.changeBackground + pngfile = convert_background(pngfile, nbcolor) + resizefile, mainfile, labelfile, exif = convert_image( + pngfile, + pf, + outdir=outdir, + rotate=opts.rotate, + rows=rows, + cols=cols, + labelrows=labelrows, + labelcols=labelcols, + ) + oimg = load_image(resizefile) + img = load_image(mainfile) + + _, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1, figsize=(iopts.w, iopts.h)) + # Edge detection + img_gray = rgb2gray(img) + w, h = img_gray.shape + canvas_size = w * h + min_size = int(round(canvas_size * opts.minsize / 100)) + max_size = int(round(canvas_size * opts.maxsize / 100)) + + logger.debug("Running %s edge detection …", ff) + if ff == "canny": + edges = canny(img_gray, sigma=opts.sigma) + elif ff == "otsu": + thresh = threshold_otsu(img_gray) + edges = img_gray > thresh + elif ff == "roberts": + edges = roberts(img_gray) + elif ff == "sobel": + edges = sobel(img_gray) + if ff == "sam": + masks = sam(img, opts.sam_checkpoint) + filtered_masks = [ + mask for mask in masks if min_size <= mask["area"] <= max_size + ] + deduplicated_masks = deduplicate_masks(filtered_masks) + logger.info( + "SAM: %d (raw) → %d (size filtered) → %d (deduplicated)", + len(masks), + len(filtered_masks), + len(deduplicated_masks), + ) + labels = np.zeros(img_gray.shape, dtype=int) + for i, mask in enumerate(deduplicated_masks): + labels[mask["segmentation"]] = i + 1 + labels = clear_border(labels) + else: + edges = clear_border(edges, buffer_size=opts.border) + selem = disk(kernel) + closed = closing(edges, selem) if kernel else edges + filled = binary_fill_holes(closed) + + # Watershed algorithm + if opts.watershed: + distance = distance_transform_edt(filled) + local_maxi = peak_local_max(distance, threshold_rel=0.05, indices=False) + coordinates = peak_local_max(distance, threshold_rel=0.05) + markers, nmarkers = label(local_maxi, return_num=True) + logger.debug("Identified %d watershed markers", nmarkers) + labels = watershed(closed, markers, mask=filled) + else: + labels = label(filled) + + # Object size filtering + logger.debug( + "Find objects with pixels between %d (%.2f%%) and %d (%d%%)", + min_size, + opts.minsize, + max_size, + opts.maxsize, + ) + + # Plotting + ax1.set_title("Original picture") + ax1.imshow(oimg) + + params = rf"{ff}, $\sigma$={sigma}, $k$={kernel}" + if opts.watershed: + params += ", watershed" + ax2.set_title(f"Edge detection\n({params})") + if ff != "sam": + closed = gray2rgb(closed) + ax2_img = labels + if opts.edges: + ax2_img = closed + elif opts.watershed: + ax2.plot(coordinates[:, 1], coordinates[:, 0], "g.") + ax2.imshow(ax2_img, cmap=iopts.cmap) + + ax3.set_title("Object detection") + ax3.imshow(img) + + filename = op.basename(pngfile) + if labelfile: + accession = extract_label(labelfile) + else: + accession = pf + + # Calculate region properties + rp = regionprops(labels) + rp = [x for x in rp if min_size <= x.area <= max_size] + rp.sort(key=lambda x: x.area, reverse=True) + nb_labels = len(rp) + logger.debug("A total of %d objects identified.", nb_labels) + objects = [] + for i, props in enumerate(rp): + i += 1 + if i > opts.count: + break + + contour = find_contours(labels == props.label, 0.5)[0] + efds = efd_feature(contour) + y0, x0 = props.centroid + orientation = props.orientation + major, minor = props.major_axis_length, props.minor_axis_length + major_dx = sin(orientation) * major / 2 + major_dy = cos(orientation) * major / 2 + minor_dx = cos(orientation) * minor / 2 + minor_dy = -sin(orientation) * minor / 2 + ax2.plot((x0 - major_dx, x0 + major_dx), (y0 - major_dy, y0 + major_dy), "r-") + ax2.plot((x0 - minor_dx, x0 + minor_dx), (y0 - minor_dy, y0 + minor_dy), "r-") + ax2.plot(contour[:, 1], contour[:, 0], "y-") + + npixels = int(props.area) + # Sample the center of the blob for color + d = min(int(round(minor / 2 * 0.35)) + 1, 50) + x0d, y0d = int(round(x0)), int(round(y0)) + square = img[(y0d - d) : (y0d + d), (x0d - d) : (x0d + d)] + pixels = [] + for row in square: + pixels.extend(row) + logger.debug( + "Seed #%d: %d pixels (%d sampled) - %.2f%%", + i, + npixels, + len(pixels), + 100.0 * npixels / canvas_size, + ) + + rgb = pixel_stats(pixels) + objects.append(Seed(filename, accession, i, rgb, props, efds, exif)) + minr, minc, maxr, maxc = props.bbox + rect = Rectangle( + (minc, minr), maxc - minc, maxr - minr, fill=False, ec="w", lw=1 + ) + ax3.add_patch(rect) + mc, mr = (minc + maxc) // 2, (minr + maxr) // 2 + ax3.text(mc, mr, f"{i}", color="w", ha="center", va="center", size=6) + + for ax in (ax2, ax3): + ax.set_xlim(0, h) + ax.set_ylim(w, 0) + + # Output identified seed stats + ax4.text(0.1, 0.92, f"File: {latex(filename)}", color="g") + ax4.text(0.1, 0.86, f"Label: {latex(accession)}", color="m") + yy = 0.8 + fw = must_open(opts.outfile, "w") + if not opts.noheader: + print(Seed.header(calibrated=calib), file=fw) + for o in objects: + if calib: + o.calibrate(pixel_cm_ratio, tr) + print(o, file=fw) + i = o.seedno + if i > 7: + continue + ax4.text(0.01, yy, str(i), va="center", bbox=dict(fc="none", ec="k")) + ax4.text(0.1, yy, o.pixeltag, va="center") + yy -= 0.04 + ax4.add_patch( + Rectangle((0.1, yy - 0.025), 0.12, 0.05, lw=0, fc=rgb_to_hex(o.rgb)) + ) + ax4.text(0.27, yy, o.hashtag, va="center") + yy -= 0.06 + ax4.text( + 0.1, + yy, + f"(A total of {nb_labels} objects displayed)", + color="darkslategray", + ) + normalize_axes(ax4) + + for ax in (ax1, ax2, ax3): + set_helvetica_axis(ax) + + image_name = op.join(outdir, pf + "." + iopts.format) + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + return objects + + +if __name__ == "__main__": + main() diff --git a/jcvi/graphics/heatmap.py b/jcvi/graphics/heatmap.py new file mode 100644 index 00000000..f7fdb84c --- /dev/null +++ b/jcvi/graphics/heatmap.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog csvfile + +Draw heatmap based on the data in the csv file. In a microarray setting, the +rows represent genes, and columns represent conditions. Some conditions can be +grouped which the script expect to see on the first row when --groups is on:: + +,WT+BL,,,,irx8+BL,,,,OE+BL,,,,WT,,,,irx8,,,,OE,,, +, Day 0,Day 3,Day 6,Day 9, Day 0,Day 3,Day 6,Day 9, Day 0,Day 3,Day 6,Day 9, ... +GAUT12,0.801069878,15.34822591,5.897076869,26.17286587,0,0,0,0,296.1121751, ... +MYB46,0.812252396,31.12495832,11.39240156,44.63179732,4.469148552,57.28160454, ... + +Option --rowgroups requires an additional file that group the genes:: + +I MYB46,GUX1 +II I14H/IRX14-L,IRX10 +III I9H/IRX9-L,IRX14 +IV IRX7,GUX2 +""" + + +import sys + +from itertools import groupby + +import numpy as np + +from ..apps.base import OptionParser + +from .base import mpl, plt, savefig + + +def parse_csv(csvfile, vmin=0, groups=False): + import csv + + reader = csv.reader(open(csvfile)) + if groups: + groups = next(reader)[1:] + # Fill in empty cells in groups + filled_groups = [] + lastg = "" + for g in groups: + g = g.strip() or lastg + filled_groups.append(g) + lastg = g + groups = filled_groups + + rows = [] + cols = next(reader)[1:] + data = [] + for row in reader: + name = row[0] + d = [max(vmin, float(x)) for x in row[1:]] + rows.append(name) + data.append(d) + + data = np.array(data) + + return groups, rows, cols, data + + +def main(): + p = OptionParser(__doc__) + p.add_argument( + "--groups", + default=False, + action="store_true", + help="The first row contains group info", + ) + p.add_argument("--rowgroups", help="Row groupings") + p.add_argument( + "--horizontalbar", + default=False, + action="store_true", + help="Horizontal color bar [default: vertical]", + ) + opts, args, iopts = p.set_image_options(figsize="8x8") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (datafile,) = args + pf = datafile.rsplit(".", 1)[0] + rowgroups = opts.rowgroups + + groups, rows, cols, data = parse_csv(datafile, vmin=1, groups=opts.groups) + cols = [x.replace("ay ", "") for x in cols] + + if rowgroups: + fp = open(rowgroups) + rgroups = [] + for row in fp: + a, b = row.split() + irows = [rows.index(x) for x in b.split(",")] + rgroups.append((a, min(irows), max(irows))) + + plt.rcParams["axes.linewidth"] = 0 + + xstart = 0.18 + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + ax = fig.add_axes([xstart, 0.15, 0.7, 0.7]) + + im = ax.matshow(data, cmap=iopts.cmap, norm=mpl.colors.LogNorm(vmin=1, vmax=10000)) + nrows, ncols = len(rows), len(cols) + + xinterval = 0.7 / ncols + yinterval = 0.7 / max(nrows, ncols) + + plt.xticks(range(ncols), cols, rotation=45, size=10, ha="center") + plt.yticks(range(nrows), rows, size=10) + + for x in ax.get_xticklines() + ax.get_yticklines(): + x.set_visible(False) + + ax.set_xlim(-0.5, ncols - 0.5) + + t = [1, 10, 100, 1000, 10000] + pad = 0.06 + if opts.horizontalbar: + ypos = 0.5 * (1 - nrows * yinterval) - pad + axcolor = fig.add_axes([0.3, ypos, 0.4, 0.02]) + orientation = "horizontal" + else: + axcolor = fig.add_axes([0.9, 0.3, 0.02, 0.4]) + orientation = "vertical" + fig.colorbar(im, cax=axcolor, ticks=t, orientation=orientation) + + if groups: + groups = [(key, len(list(nn))) for key, nn in groupby(groups)] + yy = 0.5 + 0.5 * nrows / ncols * 0.7 + 0.06 + e = 0.005 + sep = -0.5 + + for k, kl in groups: + # Separator in the array area + sep += kl + ax.plot([sep, sep], [-0.5, nrows - 0.5], "w-", lw=2) + # Group labels on the top + kl *= xinterval + root.plot([xstart + e, xstart + kl - e], [yy, yy], "-", color="gray", lw=2) + root.text(xstart + 0.5 * kl, yy + e, k, ha="center", color="gray") + xstart += kl + + if rowgroups: + from jcvi.graphics.glyph import TextCircle + + xpos = 0.04 + tip = 0.015 + assert rgroups + ystart = 1 - 0.5 * (1 - nrows * yinterval) + for gname, start, end in rgroups: + start = ystart - start * yinterval + end = ystart - (end + 1) * yinterval + start -= tip / 3 + end += tip / 3 + + # Bracket the groups + root.plot((xpos, xpos + tip), (start, start), "k-", lw=2) + root.plot((xpos, xpos), (start, end), "k-", lw=2) + root.plot((xpos, xpos + tip), (end, end), "k-", lw=2) + TextCircle(root, xpos, 0.5 * (start + end), gname) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + image_name = pf + "." + opts.cmap + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main() diff --git a/jcvi/graphics/histogram.py b/jcvi/graphics/histogram.py new file mode 100644 index 00000000..65ad7a25 --- /dev/null +++ b/jcvi/graphics/histogram.py @@ -0,0 +1,387 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Use R ggplot2 library to plot histogram, also contains an ASCII histogram (use +--text) when invoking histogram(). +""" +import os.path as op +import sys + +from math import log, ceil +from collections import defaultdict + +import numpy as np + +from ..apps.base import OptionParser, logger +from ..apps.r import RTemplate +from ..formats.base import DictFile + +from .base import asciiplot, quickplot + + +histogram_header = """ +library(ggplot2) +vmin <- $vmin +vmax <- $vmax +data <- read.table('$numberfile', skip=$skip) +data <- data[data >= vmin] +data <- data[data <= vmax] +data <- data.frame($xlabel=data) +m <- ggplot(data, aes(x=$xlabel)) + + theme(plot.title=element_text(size=11, colour="darkblue")) +""" + +histogram_template = ( + histogram_header + + """ +m + geom_histogram(colour="darkgreen", fill="$fill", binwidth=(vmax-vmin)/$bins) + +labs(title='$title') +ggsave('$outfile') +""" +) + +histogram_log_template = ( + histogram_header + + """ +library(scales) +m + geom_histogram(colour="darkgreen", fill="$fill", binwidth=0.33) + +labs(title='$title') + +scale_x_continuous(trans=log${base}_trans()) +ggsave('$outfile') +""" +) + +histogram_multiple_template = """ +library(ggplot2) +vmin <- $vmin +vmax <- $vmax +data <- read.table('$numberfile', header=T, sep="\t", skip=$skip) +""" + +histogram_multiple_template_a = ( + histogram_multiple_template + + """ +m <- ggplot(data, aes(x=$xlabel, fill=grp)) +m + geom_bar(binwidth=(vmax-vmin)/$bins, position="dodge") + +xlim(vmin, vmax) + +labs(title='$title') + +ggsave('$outfile') +""" +) + +histogram_multiple_template_b = ( + histogram_multiple_template + + """ +m <- ggplot(data, aes(x=$xlabel)) +m + geom_histogram(colour="darkgreen", fill="$fill", binwidth=(vmax-vmin)/$bins) + +xlim(vmin, vmax) + +labs(title='$title') + +facet_wrap(~grp) +ggsave('$outfile') +""" +) + + +def loghistogram(data, base=2, title="Counts", summary=False): + """ + bins is a dictionary with key: log(x, base), value: counts. + """ + from jcvi.utils.cbook import percentage + + if summary: + unique = len(data) + total = sum(data) + + # Print out a distribution + print("Unique: {0}".format(percentage(unique, total)), file=sys.stderr) + + bins = defaultdict(int) + for d in data: + logd = int(log(d, base)) + bins[logd] += 1 + + x, y = [], [] + for size, number in sorted(bins.items()): + lb, ub = base**size, base ** (size + 1) + x.append((lb, ub)) + y.append(number) + + asciiplot(x, y, title=title) + + +def get_data(filename, vmin=None, vmax=None, skip=0, col=0): + from jcvi.utils.cbook import SummaryStats + + fp = open(filename) + # Determine the data type + for s in range(skip): + next(fp) + for row in fp: + ntype = float if "." in row else int + break + + fp = open(filename) + for s in range(skip): + next(fp) + + data = np.array([ntype(x.split()[col]) for x in fp]) + s = SummaryStats(data, title=filename) + print(s, file=sys.stderr) + + vmin = min(data) if vmin is None else vmin + vmax = max(data) if vmax is None else vmax + data = data[(data >= vmin) & (data <= vmax)] + + return data, vmin, vmax + + +def stem_leaf_plot(data, vmin, vmax, bins, digit=1, title=None): + """ + Generate stem and leaf plot given a collection of numbers + """ + assert bins > 0 + range = vmax - vmin + step = range * 1.0 / bins + if isinstance(range, int): + step = int(ceil(step)) + + step = step or 1 + + bins = np.arange(vmin, vmax + step, step) + hist, bin_edges = np.histogram(data, bins=bins) + # By default, len(bin_edges) = len(hist) + 1 + bin_edges = bin_edges[: len(hist)] + asciiplot(bin_edges, hist, digit=digit, title=title) + print("Last bin ends in {0}, inclusive.".format(vmax), file=sys.stderr) + + return bin_edges, hist + + +def texthistogram(numberfiles, vmin, vmax, title=None, bins=20, skip=0, col=0, base=0): + + for nf in numberfiles: + logger.debug("Import `%s`.", nf) + data, vmin, vmax = get_data(nf, vmin, vmax, skip=skip, col=col) + if base: + loghistogram(data, base=base, title=title) + else: + stem_leaf_plot(data, vmin, vmax, bins, title=title) + + +def histogram( + numberfile, + vmin, + vmax, + xlabel, + title, + outfmt="pdf", + bins=50, + skip=0, + col=0, + ascii=False, + base=0, + fill="white", +): + """ + Generate histogram using number from numberfile, and only numbers in the + range of (vmin, vmax) + """ + if ascii: + return texthistogram( + [numberfile], + vmin, + vmax, + title=title, + bins=bins, + skip=skip, + col=col, + base=base, + ) + + data, vmin, vmax = get_data(numberfile, vmin, vmax, skip=skip, col=col) + outfile = ( + numberfile + ".base{0}.{1}".format(base, outfmt) + if base + else numberfile + ".pdf" + ) + template = histogram_log_template if base else histogram_template + rtemplate = RTemplate(template, locals()) + rtemplate.run() + + +def histogram_multiple( + numberfiles, + vmin, + vmax, + xlabel, + title, + outfmt="pdf", + tags=None, + bins=20, + skip=0, + ascii=False, + facet=False, + fill="white", + prefix="", +): + """ + Generate histogram using number from numberfile, and only numbers in the + range of (vmin, vmax). First combining multiple files. + """ + if ascii: + return texthistogram(numberfiles, vmin, vmax, title=title, bins=bins, skip=skip) + + newfile = "_".join(op.basename(x).split(".")[0] for x in numberfiles) + + fw = open(newfile, "w") + print("{0}\tgrp".format(xlabel), file=fw) + + if tags: + tags = tags.split(",") + + for i, f in enumerate(numberfiles): + data, va, vb = get_data(f, vmin, vmax, skip=skip) + vmin = min(vmin, va) + vmax = max(vmax, vb) + + fp = open(f) + if tags: + tag = tags[i] + else: + tag = op.basename(f).rsplit(".", 1)[0] + for row in fp: + val = row.strip() + print("\t".join((val, tag)), file=fw) + fw.close() + + numberfile = newfile + outfile = numberfile + "." + outfmt + if prefix: + outfile = prefix + outfile + htemplate = ( + histogram_multiple_template_b if facet else histogram_multiple_template_a + ) + rtemplate = RTemplate(htemplate, locals()) + rtemplate.run() + + +def main(): + """ + %prog numbers1.txt number2.txt ... + + Print histogram of the data files. The data files contain one number per + line. If more than one file is inputted, the program will combine the + histograms into the same plot. + """ + allowed_format = ("emf", "eps", "pdf", "png", "ps", "raw", "rgba", "svg", "svgz") + p = OptionParser(main.__doc__) + p.add_argument("--skip", default=0, type=int, help="skip the first several lines") + p.add_argument("--col", default=0, type=int, help="Get the n-th column") + p.set_histogram() + p.add_argument( + "--tags", + dest="tags", + default=None, + help="tags for data if multiple input files, comma sep", + ) + p.add_argument( + "--ascii", + default=False, + action="store_true", + help="print ASCII text stem-leaf plot", + ) + p.add_argument( + "--base", + default="0", + choices=("0", "2", "10"), + help="use logarithm axis with base, 0 to disable", + ) + p.add_argument( + "--facet", + default=False, + action="store_true", + help="place multiple histograms side-by-side", + ) + p.add_argument("--fill", default="white", help="color of the bin") + p.add_argument( + "--format", + default="pdf", + choices=allowed_format, + help="Generate image of format", + ) + p.add_argument( + "--quick", + default=False, + action="store_true", + help="Use quick plot, assuming bins are already counted", + ) + p.add_argument( + "--noprintstats", + default=False, + action="store_true", + help="Write basic stats when using --quick", + ) + opts, args = p.parse_args() + + if len(args) < 1: + sys.exit(not p.print_help()) + + skip = opts.skip + vmin, vmax = opts.vmin, opts.vmax + bins = opts.bins + xlabel, title = opts.xlabel, opts.title + title = title or args[0] + base = int(opts.base) + fileno = len(args) + + if opts.quick: + assert fileno == 1, "Single input file expected using --quick" + filename = args[0] + figname = filename.rsplit(".", 1)[0] + ".pdf" + data = DictFile(filename, keycast=int, cast=int) + quickplot( + data, + vmin, + vmax, + xlabel, + title, + figname=figname, + print_stats=(not opts.noprintstats), + ) + return + + if fileno == 1: + histogram( + args[0], + vmin, + vmax, + xlabel, + title, + outfmt=opts.format, + bins=bins, + skip=skip, + ascii=opts.ascii, + base=base, + fill=opts.fill, + col=opts.col, + ) + else: + histogram_multiple( + args, + vmin, + vmax, + xlabel, + title, + outfmt=opts.format, + tags=opts.tags, + bins=bins, + skip=skip, + ascii=opts.ascii, + facet=opts.facet, + fill=opts.fill, + ) + + +if __name__ == "__main__": + main() diff --git a/jcvi/graphics/karyotype.py b/jcvi/graphics/karyotype.py new file mode 100644 index 00000000..723628b7 --- /dev/null +++ b/jcvi/graphics/karyotype.py @@ -0,0 +1,476 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog seqids layout + +Illustrate macrosynteny between tracks which represent individual genomes. + +seqids contain the chromosomes to plot. Each line correspond to a track. +layout provides configuration for placement of tracks and mapping file between tracks. + +Layout file example - first section specify how to draw each track. Then the "edges" +section specify which connections to draw. + +# y, xstart, xend, rotation, color, label, va, bed, label_va +.6, .1, .4, 0, m, Grape, top, grape.bed, center +.4, .3, .6, 60, k, Athaliana, top, athaliana.bed, center +# edges +e, 0, 1, athaliana.grape.4x1.simple +""" + + +import sys + +from typing import List, Optional + +from ..apps.base import OptionParser, logger +from ..compara.synteny import SimpleFile +from ..formats.bed import Bed + +from .base import ( + AbstractLayout, + markup, + mpl, + normalize_axes, + plt, + savefig, + update_figname, +) +from .chromosome import Chromosome, HorizontalChromosome +from .glyph import TextCircle +from .synteny import Shade, ymid_offset + + +class LayoutLine(object): + def __init__(self, row, delimiter=",", generank=True): + args = row.rstrip().split(delimiter) + args = [x.strip() for x in args] + + self.empty = False + if len(args) < 8: + self.empty = True + return + self.y = float(args[0]) + self.xstart = float(args[1]) + self.xend = float(args[2]) + self.rotation = int(args[3]) + self.color = args[4] + self.label = args[5] + self.va = args[6] + self.bed = Bed(args[7]) + if len(args) == 9: + self.label_va = args[8] + else: + self.label_va = "center" + self.order = self.bed.order + self.order_in_chr = self.bed.order_in_chr if generank else self.bed.bp_in_chr + + +class Layout(AbstractLayout): + def __init__( + self, filename, delimiter=",", generank=False, seed: Optional[int] = None + ): + super().__init__(filename) + fp = open(filename) + self.edges = [] + for row in fp: + if row[0] == "#": + continue + if row[0] == "e": + args = row.rstrip().split(delimiter) + args = [x.strip() for x in args] + i, j, fn = args[1:4] + if len(args) == 5 and args[4]: + samearc = args[4] + else: + samearc = None + i, j = int(i), int(j) + assert args[0] == "e" + blocks = self.parse_blocks(fn, i) + self.edges.append((i, j, blocks, samearc)) + else: + self.append(LayoutLine(row, delimiter=delimiter, generank=generank)) + + self.assign_colors(seed=seed) + + def parse_blocks(self, simplefile, i): + order = self[i].order + return SimpleFile(simplefile, order=order).blocks + + +MaxSeqids = 16 # above which no labels are written + + +def make_circle_name(sid, rev): + """Extract a succinct label based on sid. + + If there are numbers to be extracted, returns the first group of number. + Otherwise, the first letter is returned. + + If sid is in rev, then '-' gets appended to the label. + + Args: + sid (str): seqid + rev (set[str]): Set of seqids that are reversed + + Returns: + str: Single letter label for the sid + """ + import re + + in_reverse = sid in rev + sid = sid.rsplit("_", 1)[-1] + sid = sid.replace("chr", "").replace("Chr", "") + si = re.findall(r"\d+", sid) + if si: + si = str(int(si[0])) + else: + si = sid[0].upper() + if in_reverse: + si += "-" + return si + + +class Track(object): + def __init__( + self, + ax, + t, + gap=0.01, + height=0.01, + lw=1, + draw=True, + chrstyle="auto", + ): + self.empty = t.empty + if t.empty: + return + + # Copy the data from LayoutLine + self.y = t.y + self.sizes = sizes = t.sizes + self.label = t.label + self.rotation = t.rotation + self.va = t.va + self.label_va = t.label_va + self.color = t.color if t.color != "None" else None + self.seqids = t.seqids + self.bed = t.bed + self.order = t.order + self.order_in_chr = t.order_in_chr + self.rev = t.rev + self.ax = ax + self.height = height + + self.xstart = xstart = t.xstart + self.xend = t.xend + + # Rotation transform + self.x = x = (self.xstart + self.xend) / 2 + y = self.y + self.tr = ( + mpl.transforms.Affine2D().rotate_deg_around(x, y, self.rotation) + + ax.transAxes + ) + self.inv = ax.transAxes.inverted() + + nseqids = len(self.seqids) + if nseqids > MaxSeqids: + gap = min(gap, gap * MaxSeqids / nseqids + 0.001) + self.gap = gap + + rpad = 1 - t.xend + span = 1 - xstart - rpad - gap * (len(sizes) - 1) + self.total = total = sum(sizes.values()) + ratio = span / total + + self.ratio = ratio + self.update_offsets() + self.lw = lw + + if draw: + self.draw(chrstyle=chrstyle) + + def __str__(self): + return self.label + + def draw( + self, + chrstyle="auto", + keep_chrlabels=False, + plot_label=True, + plot_circles=True, + pad=0.03, + vpad=0.09, + ): + if self.empty: + return + + y = self.y + color = self.color + ax = self.ax + xstart = self.xstart + gap = self.gap + va = self.va + nseqids = len(self.seqids) + tr = self.tr + + for i, sid in enumerate(self.seqids): + size = self.sizes[sid] + rsize = self.ratio * size + xend = xstart + rsize + hc = HorizontalChromosome( + ax, + xstart, + xend, + y, + height=self.height, + lw=self.lw, + fc=color, + style=chrstyle, + ) + hc.set_transform(tr) + si = sid if keep_chrlabels else make_circle_name(sid, self.rev) + xx = (xstart + xend) / 2 + xstart = xend + gap + + step = 2 if nseqids <= 40 else 10 + if nseqids >= 2 * MaxSeqids and (i + 1) % step != 0: + continue + if nseqids < 5: + continue + + hpad = -pad if va == "bottom" else pad + if plot_circles: + TextCircle( + ax, + xx, + y + hpad, + si, + fc="w", + color=color, + size=10, + transform=tr, + ) + + label = markup(self.label) + c = color if color != "gainsboro" else "k" + if plot_label: + if self.label_va == "top": + x, y = self.x, self.y + vpad + elif self.label_va == "bottom": + x, y = self.x, self.y - vpad + else: # "center" + x, y = self.xstart - vpad / 2, self.y + ax.text(x, y, label, ha="center", va="center", color=c, transform=tr) + + def update_offsets(self): + self.offsets = {} + xs = self.xstart + gap = self.gap + for sid in self.seqids: + size = self.sizes[sid] + self.offsets[sid] = xs + xs += self.ratio * size + gap + + def get_coords(self, gene): + order_in_chr = self.order_in_chr + seqid, i, _ = order_in_chr[gene] + if seqid not in self.offsets: + return [None, None] + + x = self.offsets[seqid] + if seqid in self.rev: + x += self.ratio * (self.sizes[seqid] - i - 1) + else: + x += self.ratio * i + y = self.y + x, y = self.tr.transform((x, y)) + x, y = self.inv.transform((x, y)) + + return [x, y] + + +class ShadeManager(object): + def __init__(self, ax, tracks, layout, heightpad=0, style="curve"): + self.style = style + for i, j, blocks, samearc in layout.edges: + # if same track (duplication shades), shall we draw above or below? + self.draw_blocks( + ax, blocks, tracks[i], tracks[j], samearc=samearc, heightpad=heightpad + ) + + def draw_blocks( + self, ax, blocks, atrack, btrack, samearc: Optional[str], heightpad=0 + ): + for a, b, c, d, _, _, highlight in blocks: + p = atrack.get_coords(a), atrack.get_coords(b) + q = btrack.get_coords(c), btrack.get_coords(d) + if p[0] is None or q[0] is None: + continue + + ymid_pad = ymid_offset(samearc) + if heightpad: + if atrack.y < btrack.y: + p[0][1] = p[1][1] = atrack.y + heightpad + q[0][1] = q[1][1] = btrack.y - heightpad + else: + p[0][1] = p[1][1] = atrack.y - heightpad + q[0][1] = q[1][1] = btrack.y + heightpad + + zorder = 2 if highlight else 1 + lw = 1 if highlight else 0 + Shade( + ax, + p, + q, + ymid_pad, + highlight=highlight, + alpha=1, + fc="gainsboro", + ec="gainsboro", + lw=lw, + zorder=zorder, + style=self.style, + ) + + +class Karyotype(object): + def __init__( + self, + root, + seqidsfile, + layoutfile, + gap=0.01, + height=0.01, + lw=1, + generank=True, + sizes=None, + heightpad=0, + keep_chrlabels=False, + plot_label=True, + plot_circles=True, + shadestyle="curve", + chrstyle="auto", + seed: Optional[int] = None, + ): + layout = Layout(layoutfile, generank=generank, seed=seed) + + fp = open(seqidsfile) + # Strip the reverse orientation tag for e.g. chr3- + di = lambda x: x[:-1] if x[-1] == "-" else x + # Comments can cause layout and seqids to be out of sync + # https://github.com/tanghaibao/jcvi/issues/676 + for i, row in enumerate(_ for _ in fp if not _.startswith("#") and _.strip()): + logger.info("Processing `%s` (track %d)", row.strip(), i) + t = layout[i] + # There can be comments in seqids file: + # https://github.com/tanghaibao/jcvi/issues/335 + seqids = row.split("#", 1)[0].rstrip().split(",") + t.rev = set(x[:-1] for x in seqids if x[-1] == "-") + seqids = [di(x) for x in seqids] + if t.empty: + continue + + bed = t.bed + self.generank = generank + if generank: + sz = dict((x, len(list(bed.sub_bed(x)))) for x in seqids) + else: + sz = sizes or dict( + (x, max(z.end for z in list(bed.sub_bed(x)))) for x in seqids + ) + assert sz is not None, "sizes not available and cannot be inferred" + t.seqids = seqids + # validate if all seqids are non-empty + for k, v in sz.items(): + if v == 0: + logger.error("Size of `%s` is empty. Please check", k) + t.sizes = sz + + tracks = [] + for lo in layout: + if lo.empty: + continue + tr = Track(root, lo, gap=gap, height=height, lw=lw, draw=False) + tracks.append(tr) + + ShadeManager(root, tracks, layout, heightpad=heightpad, style=shadestyle) + + for tr in tracks: + tr.draw( + chrstyle=chrstyle, + keep_chrlabels=keep_chrlabels, + plot_label=plot_label, + plot_circles=plot_circles, + ) + + self.tracks = tracks + self.layout = layout + + +def main(args: List[str]): + p = OptionParser(__doc__) + p.add_argument( + "--basepair", + default=False, + action="store_true", + help="Use base pair position instead of gene rank", + ) + p.add_argument( + "--keep-chrlabels", + default=False, + action="store_true", + help="Keep chromosome labels", + ) + p.add_argument( + "--nocircles", + default=False, + action="store_true", + help="Do not plot chromosome circles", + ) + p.add_argument( + "--shadestyle", + default="curve", + choices=Shade.Styles, + help="Style of syntenic wedges", + ) + p.add_argument( + "--chrstyle", + default="auto", + choices=Chromosome.Styles, + help="Style of chromosome labels", + ) + p.set_outfile("karyotype.pdf") + opts, args, iopts = p.set_image_options(args, figsize="8x7") + + if len(args) != 2: + sys.exit(not p.print_help()) + + seqidsfile, layoutfile = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + Karyotype( + root, + seqidsfile, + layoutfile, + keep_chrlabels=opts.keep_chrlabels, + plot_circles=(not opts.nocircles), + shadestyle=opts.shadestyle, + chrstyle=opts.chrstyle, + generank=(not opts.basepair), + seed=iopts.seed, + ) + normalize_axes(root) + + image_name = update_figname(opts.outfile, iopts.format) + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + return image_name + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/jcvi/graphics/landscape.py b/jcvi/graphics/landscape.py new file mode 100644 index 00000000..ad09c86d --- /dev/null +++ b/jcvi/graphics/landscape.py @@ -0,0 +1,1316 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Create chromosome landscape plots that are similar to the ones used in soybean +and sorghum paper. +""" + + +import os.path as op +import sys + +from collections import Counter, OrderedDict, defaultdict +from typing import Dict, List, Tuple, Optional + +import numpy as np +import seaborn as sns + +from ..algorithms.matrix import moving_sum +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..formats.base import BaseFile, DictFile, LineFile, must_open +from ..formats.bed import Bed, bins, get_nbins +from ..formats.sizes import Sizes +from ..utils.cbook import autoscale, human_size, percentage + +from .base import ( + CirclePolygon, + Colormap, + Extent, + Rectangle, + adjust_extent, + adjust_spines, + human_readable_base, + latex, + markup, + normalize_axes, + plt, + savefig, + set_human_axis, + ticker, +) +from .chromosome import HorizontalChromosome + +# Colors picked from Schmutz soybean genome paper using ColorPic +palette = ["#ACABD5", "#DBF0F5", "#3EA77A", "#FBF5AB", "#C162A6"] + list("rgbymck") +gray = "#CCCCCB" +Registration = { + "Gypsy": "LTR-RT/Gypsy", + "Copia": "LTR-RT/Copia", + "hAT": "DNA-TE/hAT", + "Helitron": "DNA-TE/Helitron", + "Tourist": "DNA-TE/Tourist", + "Introns": "Genes (introns)", + "Exons": "Genes (exons)", +} + +# Consider a depth of 5 as minimum covered depth +MIN_COVERED_DEPTH = 5 + + +class BinLine: + def __init__(self, row): + args = row.split() + self.chr = args[0] + self.len = float(args[1]) + self.binlen = int(args[2]) + + def __str__(self): + return "\t".join(str(x) for x in (self.chr, self.len, self.binlen)) + + def subtract(self, o): + self.binlen -= o.len + + +class BinFile(LineFile): + def __init__(self, filename): + super().__init__(filename) + self.mapping = defaultdict(list) + + fp = open(filename, encoding="utf-8") + for row in fp: + b = BinLine(row) + self.append(b) + chr, len, binlen = b.chr, b.len, b.binlen + self.mapping[chr].append((len, binlen)) + fp.close() + + +class ChrInfoLine: + def __init__(self, row, delimiter=","): + args = [x.strip() for x in row.split(delimiter)] + self.name = args[0] + self.color = args[1] + if len(args) > 2: + self.new_name = args[2] + else: + self.new_name = self.name + + +class ChrInfoFile(BaseFile, OrderedDict): + def __init__(self, filename, delimiter=","): + super().__init__(filename) + with open(filename, encoding="utf-8") as fp: + for row in fp: + if row[0] == "#": + continue + line = ChrInfoLine(row, delimiter=delimiter) + self[line.name] = line + + +class TitleInfoLine: + def __init__(self, row, delimiter=","): + args = [x.strip() for x in row.split(delimiter)] + self.name = args[0] + self.title = args[1] + self.subtitle = None + if len(args) > 2: + self.subtitle = args[2] + + +class TitleInfoFile(BaseFile, OrderedDict): + def __init__(self, filename, delimiter=","): + super().__init__(filename) + with open(filename, encoding="utf-8") as fp: + for row in fp: + if row[0] == "#": + continue + line = TitleInfoLine(row, delimiter=delimiter) + self[line.name] = line + + +def main(): + + actions = ( + ("composite", "combine line plots, feature bars and alt-bars"), + ("depth", "show per chromosome depth plot across genome"), + ("heatmap", "similar to stack but adding heatmap"), + ("mosdepth", "plot depth vs. coverage per chromosome"), + ("multilineplot", "combine multiple line plots in one vertical stack"), + ("stack", "create landscape plot with genic/te composition"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def parse_distfile(filename): + """Parse mosdepth dist.txt file. The file has contents like: + + #chr start end depth (header added here for clarity) + chr01A 0 50000 31.00 + chr01A 50000 100000 36.00 + chr01A 100000 150000 280.00 + chr01A 150000 200000 190.00 + + Args: + filename (str): Path to the file. + """ + dists = defaultdict(Counter) + with must_open(filename) as fp: + for row in fp: + chromosome, _, _, depth = row.split() + depth = int(float(depth)) + dists[chromosome][depth] += 1 + logger.debug("Loaded %d seqids", len(dists)) + return dists + + +def parse_groupsfile(filename): + """Parse groupsfile, which contains the tracks to be plotted + in the vertically stacked mosdepth plot. + + chr01A,chr01B g,m + chr02A,chr02B g,m + chr03A,chr03B g,m + + Args: + filename (str): Path to the groups file. + """ + groups = [] + with open(filename, encoding="utf-8") as fp: + for row in fp: + chrs, colors = row.split() + groups.append((chrs.split(","), colors.split(","))) + logger.debug("Loaded %d groups", len(groups)) + return groups + + +def cumarray_to_array(ar): + """Convert cumulative array to normal array. + + Args: + ar (List): List of numbers + """ + ans = [] + for i, x in enumerate(ar): + ans.append(x if i == 0 else (ar[i] - ar[i - 1])) + return ans + + +def mosdepth(args): + """ + %prog mosdepth mosdepth.global.dist.txt groups + + Plot depth vs. coverage per chromosome. Inspired by mosdepth plot. See also: + https://github.com/brentp/mosdepth + """ + sns.set_style("darkgrid") + + p = OptionParser(mosdepth.__doc__) + p.add_argument("--maxdepth", default=100, type=int, help="Maximum depth to plot") + p.add_argument( + "--logscale", default=False, action="store_true", help="Use log-scale on depth" + ) + opts, args, iopts = p.set_image_options(args, style="dark", figsize="6x8") + + if len(args) != 2: + sys.exit(p.print_help()) + + # Read in datasets + distfile, groupsfile = args + dists = parse_distfile(distfile) + groups = parse_groupsfile(groupsfile) + logscale = opts.logscale + + # Construct a composite figure with N tracks indicated in the groups + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + rows = len(groups) + ypad = 0.05 + yinterval = (1 - 2 * ypad) / (rows + 1) + yy = 1 - ypad + + for group_idx, (chrs, colors) in enumerate(groups): + yy -= yinterval + ax = fig.add_axes((0.15, yy, 0.7, yinterval * 0.85)) + for c, color in zip(chrs, colors): + cdata = dists[c].items() + logger.debug("Importing %d records for %s", len(cdata), c) + cx, cy = zip(*sorted(cdata)) + ax.plot(cx, cy, "-", color=color) + if logscale: + ax.set_xscale("log", basex=2) + ax.set_xlim(1 if logscale else 0, opts.maxdepth) + ax.get_yaxis().set_visible(False) + if group_idx != rows - 1: + ax.get_xaxis().set_visible(False) + + # Add legend to the right of the canvas + label_pad = 0.02 + label_yy = yy + yinterval + for c, color in zip(chrs, colors): + label_yy -= label_pad + root.text(0.92, label_yy, c, color=color, ha="center", va="center") + + root.text( + 0.1, + 0.5, + "Proportion of bases at coverage", + rotation=90, + color="darkslategray", + ha="center", + va="center", + ) + root.text(0.5, 0.05, "Coverage", color="darkslategray", ha="center", va="center") + normalize_axes(root) + adjust_spines(ax, ["bottom"], outward=True) + + pf = "mosdepth" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def draw_depth( + root, + ax, + bed: Bed, + chrinfo: dict = {}, + defaultcolor: str = "k", + sepcolor: str = "w", + maxdepth: int = 100, + logscale: bool = False, + title: Optional[str] = None, + subtitle: Optional[str] = None, + median_line: bool = True, + draw_seqids: bool = True, + calculate_coverage: bool = False, + roi: Optional[List[Tuple[str, int]]] = None, +): + """Draw depth plot on the given axes, using data from bed + + Args: + root (matplotlib.Axes): Canvas axes + ax (matplotlib.Axes): Axes to plot data on + bed (Bed): Bed data from mosdepth + chrinfo (ChrInfoFile): seqid => color, new name + defaultcolor (str): matplotlib-compatible color for data points + sepcolor (str): matplotlib-compatible color for chromosome breaks + maxdepth (int): Upper limit of the y-axis (depth) + title (str): Title of the figure, to the right of the axis + subtitle (str): Subtitle of the figure, just below title + """ + if chrinfo is None: + chrinfo = {} + sizes = bed.max_bp_in_chr + seqids = chrinfo.keys() if chrinfo else sizes.keys() + starts = {} + ends = {} + label_positions = [] + start = 0 + end = 0 + for seqid in seqids: + if seqid not in sizes: + continue + starts[seqid] = start + end = start + sizes[seqid] + ends[seqid] = end + label_positions.append((seqid, (start + end) / 2)) + start = end + xsize = end + + # Extract plotting data + data = [] + data_by_seqid = defaultdict(list) + total_bp = 0 + covered_bp = 0 + for b in bed: + seqid = b.seqid + if seqid not in starts: + continue + # chr01A 2000000 3000000 113.00 + x = starts[seqid] + (b.start + b.end) / 2 + y = float(b.accn) + c = chrinfo[seqid].color if seqid in chrinfo else "k" + data.append((x, y, c)) + data_by_seqid[seqid].append(y) + if y >= MIN_COVERED_DEPTH: + covered_bp += b.end - b.start + total_bp += b.end - b.start + logger.debug("cov: %s", percentage(covered_bp, total_bp, precision=0)) + + x, y, c = zip(*data) + ax.scatter( + x, + y, + c=c, + edgecolors="none", + s=8, + lw=0, + ) + logger.debug("Obtained %d data points with depth data", len(data)) + + # Per seqid median + medians = {} + for seqid, values in data_by_seqid.items(): + c = chrinfo[seqid].color if seqid in chrinfo else defaultcolor + seqid_start = starts[seqid] + seqid_end = ends[seqid] + seqid_median = np.median(values) + medians[seqid] = seqid_median + if median_line: + ax.plot( + (seqid_start, seqid_end), + (seqid_median, seqid_median), + "-", + lw=4, + color=c, + alpha=0.5, + ) + + # Vertical lines for all the breaks + for pos in starts.values(): + ax.plot((pos, pos), (0, maxdepth), "-", lw=1, color=sepcolor) + + # Beautify the numeric axis + for tick in ax.get_xticklines() + ax.get_yticklines(): + tick.set_visible(False) + + median_depth_y = 0.88 + chr_label_y = 0.08 + rotation = 20 if len(label_positions) > 10 else 0 + for seqid, position in label_positions: + xpos = 0.1 + position * 0.8 / xsize + c = chrinfo[seqid].color if seqid in chrinfo else defaultcolor + newseqid = chrinfo[seqid].new_name if seqid in chrinfo else seqid + if draw_seqids: + root.text( + xpos, + chr_label_y, + newseqid, + color=c, + ha="center", + va="center", + rotation=rotation, + ) + seqid_median = medians[seqid] + if median_line: + root.text( + xpos, + median_depth_y, + str(int(seqid_median)), + color=c, + ha="center", + va="center", + ) + + # Plot the regions of interest + if roi: + for chrom, pos, name in roi: + if chrom not in starts: + continue + x = starts[chrom] + pos + # TODO: Remove this special case + color = {"II": "tomato", "low qual": "g"}.get(name, "gray") + ax.plot((x, x), (0, maxdepth), "-", lw=2, color=color) + + # Add an arrow to the right of the plot, indicating these are median depths + if median_line: + root.text( + 0.91, + 0.88, + r"$\leftarrow$median", + color="lightslategray", + va="center", + ) + + if title: + root.text( + 0.95, + 0.5, + markup(title), + color="darkslategray", + ha="center", + va="center", + size=15, + ) + if subtitle: + root.text( + 0.95, + 0.375, + markup(subtitle), + color="darkslategray", + ha="center", + va="center", + size=15, + ) + if calculate_coverage: + cov_pct = percentage(covered_bp, total_bp, precision=0, mode=None) + root.text( + 0.95, + 0.25, + latex(f"cov: {cov_pct}"), + color="darkslategray", + ha="center", + va="center", + size=15, + ) + + ax.set_xticks([]) + ax.set_xlim(0, xsize) + if logscale: + ax.set_yscale("log", basey=2) + ax.set_ylim(1 if logscale else 0, maxdepth) + ax.set_ylabel("Depth") + + set_human_axis(ax) + plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="gray", size=10) + normalize_axes(root) + + +def read_roi(roi_file: str) -> Dict[str, List[str]]: + """ + Read the regions of interest file, and return a dict of filename => regions. + """ + roi = defaultdict(list) + with open(roi_file, encoding="utf-8") as fp: + for row in fp: + filename, region, name = row.strip().split(",")[:3] + chrom, start_end = region.split(":", 1) + start, end = start_end.split("-") + region = (chrom, (int(start) + int(end)) // 2, name) + roi[filename].append(region) + logger.info("Read %d regions of interest", len(roi)) + return roi + + +def draw_multi_depth( + root, + panel_roots, + panel_axes, + bedfiles: List[str], + chrinfo_file: str, + titleinfo_file: str, + maxdepth: int, + logscale: bool, + median_line: bool = True, + calculate_coverage: bool = False, + roi: Optional[str] = None, +): + """ + Draw multiple depth plots on the same canvas. + """ + chrinfo = ChrInfoFile(chrinfo_file) if chrinfo_file else {} + titleinfo = TitleInfoFile(titleinfo_file) if titleinfo_file else {} + npanels = len(bedfiles) + yinterval = 1.0 / npanels + ypos = 1 - yinterval + roi = read_roi(roi) if roi else {} + for i, (bedfile, panel_root, panel_ax) in enumerate( + zip(bedfiles, panel_roots, panel_axes) + ): + pf = op.basename(bedfile).split(".", 1)[0] + bed = Bed(bedfile) + + if ypos > 0.001: + root.plot((0.02, 0.98), (ypos, ypos), "-", lw=2, color="lightgray") + + title = titleinfo.get(bedfile, pf.split("_", 1)[0]) + subtitle = None + if isinstance(title, TitleInfoLine): + subtitle = title.subtitle + title = title.title + + draw_seqids = i in (0, npanels - 1) + draw_depth( + panel_root, + panel_ax, + bed, + chrinfo=chrinfo, + maxdepth=maxdepth, + logscale=logscale, + title=title, + subtitle=subtitle, + median_line=median_line, + draw_seqids=draw_seqids, + calculate_coverage=calculate_coverage, + roi=roi.get(bedfile), + ) + ypos -= yinterval + + normalize_axes(root) + + +def depth(args): + """ + %prog depth *.regions.bed.gz + + Plot the mosdepth regions BED file. We recommend to generate this BED file + by (please adjust the --by parameter to your required resolution): + + $ mosdepth --no-per-base --use-median --fast-mode --by 1000000 sample.wgs + sample.bam + + Use --chrinfo to specify a colormap between seqid, desired color, and + optionally a new name. For example: + + chr01A, #c51b7d, 1A + chr01B, #4d9221, 1B + ... + + Only seqids that are in the colormap will be plotted, in the order that's + given in the file. When --colormap is not set, every seqid will be drawn in + black. + + Can take multiple BED files as input and then plot all of them in a + composite figure. + """ + p = OptionParser(depth.__doc__) + p.add_argument( + "--chrinfo", help="Comma-separated mappings between seqid, color, new_name" + ) + p.add_argument( + "--titleinfo", + help="Comma-separated titles mappings between filename, title", + ) + p.add_argument("--maxdepth", default=100, type=int, help="Maximum depth to show") + p.add_argument( + "--logscale", default=False, action="store_true", help="Use log-scale on depth" + ) + p.add_argument( + "--no-median-line", + default=False, + action="store_true", + help="Do not plot median depth line", + ) + p.add_argument( + "--calculate-coverage", + default=False, + action="store_true", + help="Calculate genome coverage", + ) + p.add_argument( + "--roi", + help="File that contains regions of interest, format: filename, chr:start-end", + ) + p.set_outfile("depth.pdf") + opts, args, iopts = p.set_image_options(args, style="dark", figsize="14x4") + + if len(args) < 1: + sys.exit(not p.print_help()) + + bedfiles = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + npanels = len(bedfiles) + yinterval = 1.0 / npanels + ypos = 1 - yinterval + panel_roots, panel_axes = [], [] + for _ in range(npanels): + panel_root = root if npanels == 1 else fig.add_axes((0, ypos, 1, yinterval)) + panel_ax = fig.add_axes((0.1, ypos + 0.2 * yinterval, 0.8, 0.65 * yinterval)) + panel_roots.append(panel_root) + panel_axes.append(panel_ax) + ypos -= yinterval + + draw_multi_depth( + root, + panel_roots, + panel_axes, + bedfiles, + opts.chrinfo, + opts.titleinfo, + opts.maxdepth, + opts.logscale, + median_line=not opts.no_median_line, + calculate_coverage=opts.calculate_coverage, + roi=opts.roi, + ) + + image_name = opts.outfile + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + return image_name + + +def add_window_options(p): + """ + Add options for window plotting. + """ + p.add_argument("--window", default=500000, type=int, help="Size of window") + p.add_argument("--shift", default=100000, type=int, help="Size of shift") + p.add_argument("--subtract", help="Subtract bases from window") + p.add_argument( + "--nomerge", default=False, action="store_true", help="Do not merge features" + ) + + +def check_window_options(opts): + """ + Check the window options, and return the values. + """ + window = opts.window + shift = opts.shift + subtract = opts.subtract + assert window % shift == 0, "--window must be divisible by --shift" + logger.debug( + "Line/stack-plot options: window=%d shift=%d subtract=%s", + window, + shift, + subtract, + ) + merge = not opts.nomerge + + return window, shift, subtract, merge + + +def get_beds(s: List[str], binned: bool = False) -> List[str]: + """ + Get the bed files for each feature, and return them as a list. + """ + return [x + ".bed" for x in s] if not binned else [x for x in s] + + +def linearray(binfile, chr, window, shift): + mn = binfile.mapping[chr] + m, _ = zip(*mn) + + m = np.array(m, dtype=float) + w = window // shift + m = moving_sum(m, window=w) + return m + + +def lineplot(ax, binfiles, nbins, chr, window, shift, color="br"): + assert len(binfiles) <= 2, "A max of two line plots are supported" + + t = np.arange(nbins) + bf = binfiles[0] + m = linearray(bf, chr, window, shift) + ax.plot(t, m, "{0}-".format(color[0]), lw=2) + + formatter = ticker.FuncFormatter( + lambda x, pos: human_readable_base(int(x) * shift, pos) + ) + ax.xaxis.set_major_formatter(formatter) + for tl in ax.get_xticklabels(): + tl.set_color("darkslategray") + + label = bf.filename.split(".")[0] + perw = "per {0}".format(human_size(window, precision=0)) + ax.set_ylabel(label + " " + perw, color=color[0]) + + if len(binfiles) == 2: + ax2 = ax.twinx() + bf = binfiles[1] + m = linearray(bf, chr, window, shift) + ax2.plot(t, m, "{0}-".format(color[1]), lw=2) + # Differentiate tick labels through colors + for tl in ax.get_yticklabels(): + tl.set_color(color[0]) + for tl in ax2.get_yticklabels(): + tl.set_color(color[1]) + + label = bf.filename.split(".")[0] + ax2.set_ylabel(label + " " + perw, color=color[1]) + + ax.set_xlim(0, nbins) + + +def composite(args): + """ + %prog composite fastafile chr1 + + Combine line plots, feature bars and alt-bars, different data types + specified in options. Inputs must be BED-formatted. Three types of viz are + currently supported: + + --lines: traditional line plots, useful for plotting feature freq + --bars: show where the extent of features are + --altbars: similar to bars, yet in two alternating tracks, e.g. scaffolds + """ + p = OptionParser(composite.__doc__) + p.add_argument("--lines", help="Features to plot in lineplot") + p.add_argument("--bars", help="Features to plot in bars") + p.add_argument("--altbars", help="Features to plot in alt-bars") + p.add_argument( + "--fatten", + default=False, + action="store_true", + help="Help visualize certain narrow features", + ) + p.add_argument( + "--mode", + default="span", + choices=("span", "count", "score"), + help="Accumulate feature based on", + ) + add_window_options(p) + opts, args, iopts = p.set_image_options(args, figsize="8x5") + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, chr = args + window, shift, _, merge = check_window_options(opts) + linebeds, barbeds, altbarbeds = [], [], [] + fatten = opts.fatten + if opts.lines: + lines = opts.lines.split(",") + linebeds = get_beds(lines) + if opts.bars: + bars = opts.bars.split(",") + barbeds = get_beds(bars) + if opts.altbars: + altbars = opts.altbars.split(",") + altbarbeds = get_beds(altbars) + + linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode, merge=merge) + + margin = 0.12 + clen = Sizes(fastafile).mapping[chr] + nbins, _ = get_nbins(clen, shift) + + plt.rcParams["xtick.major.size"] = 0 + plt.rcParams["ytick.major.size"] = 0 + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + root.text(0.5, 0.95, chr, ha="center", color="darkslategray") + + xstart, xend = margin, 1 - margin + xlen = xend - xstart + ratio = xlen / clen + # Line plots + ax = fig.add_axes((xstart, 0.6, xlen, 0.3)) + lineplot(ax, linebins, nbins, chr, window, shift) + + # Bar plots + yy = 0.5 + yinterval = 0.08 + xs = lambda x: xstart + ratio * x + r = 0.01 + fattend = 0.0025 + for bb in barbeds: + root.text(xend + 0.01, yy, bb.split(".")[0], va="center") + HorizontalChromosome(root, xstart, xend, yy, height=0.02) + bb = Bed(bb) + for b in bb: + start, end = xs(b.start), xs(b.end) + span = end - start + if fatten and span < fattend: + span = fattend + + root.add_patch( + Rectangle((start, yy - r), span, 2 * r, lw=0, fc="darkslategray") + ) + yy -= yinterval + + # Alternative bar plots + offset = r / 2 + for bb in altbarbeds: + root.text(xend + 0.01, yy, bb.split(".")[0], va="center") + bb = Bed(bb) + for b in bb: + start, end = xs(b.start), xs(b.end) + span = end - start + if span < 0.0001: + continue + offset = -offset + root.add_patch( + Rectangle( + (start, yy + offset), end - start, 0.003, lw=0, fc="darkslategray" + ) + ) + yy -= yinterval + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + image_name = chr + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def multilineplot(args): + """ + %prog multilineplot fastafile chr1 + + Combine multiple line plots in one vertical stack + Inputs must be BED-formatted. + + --lines: traditional line plots, useful for plotting feature freq + """ + p = OptionParser(multilineplot.__doc__) + p.add_argument("--lines", help="Features to plot in lineplot") + p.add_argument("--colors", help="List of colors matching number of input bed files") + p.add_argument( + "--mode", + default="span", + choices=("span", "count", "score"), + help="Accumulate feature based on", + ) + p.add_argument( + "--binned", + default=False, + action="store_true", + help="Specify whether the input is already binned; " + + "if True, input files are considered to be binfiles", + ) + p.add_argument("--ymax", type=int, help="Set Y-axis max") + add_window_options(p) + opts, args, iopts = p.set_image_options(args, figsize="8x5") + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, chr = args + window, shift, _, merge = check_window_options(opts) + linebeds = [] + colors = opts.colors + if opts.lines: + lines = opts.lines.split(",") + assert len(colors) == len(lines), ( + "Number of chosen colors must match" + " number of input bed files" + ) + linebeds = get_beds(lines, binned=opts.binned) + + linebins = get_binfiles( + linebeds, fastafile, shift, mode=opts.mode, binned=opts.binned, merge=merge + ) + + clen = Sizes(fastafile).mapping[chr] + nbins, _ = get_nbins(clen, shift) + + plt.rcParams["xtick.major.size"] = 0 + plt.rcParams["ytick.major.size"] = 0 + plt.rcParams["figure.figsize"] = iopts.w, iopts.h + + fig, axarr = plt.subplots(nrows=len(lines)) + if len(linebeds) == 1: + axarr = (axarr,) + fig.suptitle(latex(chr), color="darkslategray") + + for i, ax in enumerate(axarr): + lineplot( + ax, + [linebins[i]], + nbins, + chr, + window, + shift, + color="{0}{1}".format(colors[i], "r"), + ) + + if opts.ymax: + ax.set_ylim(0, opts.ymax) + + plt.subplots_adjust(hspace=0.5) + + image_name = chr + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def draw_heatmaps( + fig, + root, + root_extent: Extent, + fastafile: str, + chr: str, + stacks: List[str], + heatmaps: List[str], + window: int, + shift: int, + cmap: Colormap, + subtract: Optional[int] = None, + merge: bool = False, + meres: Optional[str] = None, +): + """ + Draw heatmap for the given chromosome. + """ + stackbeds = get_beds(stacks) + heatmapbeds = get_beds(heatmaps) + stackbins = get_binfiles( + stackbeds, fastafile, shift, subtract=subtract, merge=merge + ) + heatmapbins = get_binfiles( + heatmapbeds, fastafile, shift, subtract=subtract, merge=merge + ) + + margin = 0.06 + inner = 0.015 + clen = Sizes(fastafile).mapping[chr] + + # Gauge + ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin) + yinterval = 0.3 + xx = margin + yy = 1 - margin + yy -= yinterval + xlen = clen / ratio + cc = chr + if "_" in chr: + ca, cb = chr.split("_") + cc = ca[0].upper() + cb + + root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) + extent = (xx, yy, xlen, yinterval - inner) + adjusted = adjust_extent(extent, root_extent) + ax = fig.add_axes(adjusted) + + nbins, _ = get_nbins(clen, shift) + + owindow = clen / 100 + if owindow > window: + window = owindow // shift * shift + + stackplot(ax, stackbins, nbins, palette, chr, window, shift) + ax.text( + 0.05, + 0.9, + cc, + va="top", + zorder=100, + transform=ax.transAxes, + bbox=dict(boxstyle="round", fc="w", alpha=0.5), + ) + + # Legends + xx += xlen + 0.01 + yspace = (yinterval - inner) / (len(stackbins) + 1) + yy = 1 - margin - yinterval + for s, p in zip(stacks, palette): + s = s.replace("_", " ") + s = Registration.get(s, s) + + yy += yspace + root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) + root.text(xx + 1.5 * inner, yy, s, size=10) + + yh = 0.05 # Heatmap height + # Heatmaps + xx = margin + yy = 1 - margin - yinterval - inner + for s, p in zip(heatmaps, heatmapbins): + s = s.replace("_", " ") + s = Registration.get(s, s) + + yy -= yh + m = stackarray(p, chr, window, shift) + + Y = np.array([m, m]) + root.imshow( + Y, + extent=(xx, xx + xlen, yy, yy + yh - inner), + interpolation="nearest", + aspect="auto", + cmap=cmap, + ) + root.text(xx + xlen + 0.01, yy, s, size=10) + + yy -= yh + + if meres: + bed = Bed(meres) + for b in bed: + if b.seqid != chr: + continue + pos = (b.start + b.end) / 2 + cpos = pos / ratio + xx = margin + cpos + accn = b.accn.capitalize() + root.add_patch(CirclePolygon((xx, yy), radius=0.01, fc="m", ec="m")) + root.text(xx + 0.014, yy, accn, va="center", color="m") + + normalize_axes(root) + + +def heatmap(args): + """ + %prog heatmap fastafile chr1 + + Combine stack plot with heatmap to show abundance of various tracks along + given chromosome. Need to give multiple beds to --stacks and --heatmaps + """ + p = OptionParser(heatmap.__doc__) + p.add_argument( + "--stacks", + default="Exons,Introns,DNA_transposons,Retrotransposons", + help="Features to plot in stackplot", + ) + p.add_argument( + "--heatmaps", + default="Copia,Gypsy,hAT,Helitron,Introns,Exons", + help="Features to plot in heatmaps", + ) + p.add_argument("--meres", default=None, help="Extra centromere / telomere features") + add_window_options(p) + opts, args, iopts = p.set_image_options(args, figsize="8x5") + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, chr = args + window, shift, subtract, merge = check_window_options(opts) + + stacks = opts.stacks.split(",") + heatmaps = opts.heatmaps.split(",") + + fig = plt.figure(1, (iopts.w, iopts.h)) + root_extent = (0, 0, 1, 1) + root = fig.add_axes(root_extent) + + draw_heatmaps( + fig, + root, + root_extent, + fastafile, + chr, + stacks, + heatmaps, + window, + shift, + iopts.cmap, + subtract, + merge, + meres=opts.meres, + ) + + image_name = chr + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def draw_gauge(ax, margin: float, maxl: int, rightmargin: Optional[float] = None): + """ + Draw a gauge on the top of the canvas, showing the scale of the chromosome. + """ + rightmargin = rightmargin or margin + ax.plot([margin, 1 - rightmargin], [1 - margin, 1 - margin], "k-", lw=2) + + best_stride = autoscale(maxl) + nintervals = maxl / best_stride + + xx, yy = margin, 1 - margin + tip = 0.005 + xinterval = (1 - margin - rightmargin) / nintervals + l = human_size(best_stride) + if l[-1] == "b": + suffix = target = l[-2:] + + for i in range(0, maxl + 1, best_stride): + l = human_size(i, precision=0, target=target) + if l[-1] == "b": + l, suffix = l[:-2], l[-2:] + ax.plot([xx, xx], [yy, yy + tip], "k-", lw=2) + ax.text(xx, yy + 2 * tip, l, ha="center", size=13) + xx += xinterval + + xx += 4 * tip - xinterval + ax.text(xx + tip, yy + 2 * tip, suffix) + + return best_stride / xinterval + + +def get_binfiles( + inputfiles: List[str], + fastafile: str, + shift: int, + mode: str = "span", + subtract: Optional[int] = None, + binned: bool = False, + merge: bool = True, +): + """ + Get binfiles from input files. If not binned, then bin them first. + """ + if not binned: + binopts = [f"--binsize={shift}"] + binopts.append(f"--mode={mode}") + if subtract: + binopts.append(f"--subtract={subtract}") + if not merge: + binopts.append("--nomerge") + binfiles = [bins([x, fastafile] + binopts) for x in inputfiles if op.exists(x)] + else: + binfiles = inputfiles + binfiles = [BinFile(x) for x in binfiles] + + return binfiles + + +def stackarray(binfile: BinFile, chr: str, window: int, shift: int): + """ + Get stack array from binfile for the given chr. + """ + mn = binfile.mapping[chr] + m, n = zip(*mn) + + m = np.array(m, dtype=float) + n = np.array(n, dtype=float) + + w = window // shift + nw = m.shape[0] + if nw < w: + logger.info("%s length < window, using %d bins instead of %d", chr, nw, w) + w = nw + m = moving_sum(m, window=w) + n = moving_sum(n, window=w) + m /= n + + return m + + +def stackplot( + ax, + binfiles: List[BinFile], + nbins: int, + palette: List[str], + chr: str, + window: int, + shift: int, +): + """ + Plot stackplot on the given axes, using data from binfiles. + """ + t = np.arange(nbins, dtype=float) + 0.5 + m = np.zeros(nbins, dtype=float) + zorders = range(10)[::-1] + for binfile, p, z in zip(binfiles, palette, zorders): + s = stackarray(binfile, chr, window, shift) + m += s + ax.fill_between(t, m, color=p, lw=0, zorder=z) + + ax.set_xlim(0, nbins) + ax.set_ylim(0, 1) + ax.set_axis_off() + + +def draw_stacks( + fig, + root, + root_extent: Extent, + stacks: List[str], + fastafile: str, + window: int, + shift: int, + top: int, + merge: bool = True, + subtract: Optional[int] = None, + switch: Optional[DictFile] = None, +): + """ + Draw stack plot. + """ + bedfiles = get_beds(stacks) + binfiles = get_binfiles(bedfiles, fastafile, shift, subtract=subtract, merge=merge) + + sizes = Sizes(fastafile) + s = list(sizes.iter_sizes())[:top] + maxl = max(x[1] for x in s) + margin = 0.08 + inner = 0.02 # y distance between tracks + + # Gauge + ratio = draw_gauge(root, margin, maxl) + + # Per chromosome + yinterval = (1 - 2 * margin) / (top + 1) + xx = margin + yy = 1 - margin + for chr, clen in s: + yy -= yinterval + xlen = clen / ratio + cc = chr + if "_" in chr: + ca, cb = chr.split("_") + cc = ca[0].upper() + cb + + if switch and cc in switch: + cc = "\n".join((cc, f"({switch[cc]})")) + + extent = (xx, yy, xlen, yinterval - inner) + adjusted = adjust_extent(extent, root_extent) + root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) + ax = fig.add_axes(adjusted) + + nbins, _ = get_nbins(clen, shift) + + stackplot(ax, binfiles, nbins, palette, chr, window, shift) + root.text( + xx - 0.04, yy + 0.5 * (yinterval - inner), cc, ha="center", va="center" + ) + + # Legends + yy -= yinterval + xx = margin + for b, p in zip(bedfiles, palette): + b = b.rsplit(".", 1)[0].replace("_", " ") + b = Registration.get(b, b) + + root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) + xx += 2 * inner + root.text(xx, yy, b, size=13) + xx += len(b) * 0.015 + inner + + normalize_axes(root) + + +def stack(args): + """ + %prog stack fastafile + + Create landscape plots that show the amounts of genic sequences, and repetitive + sequences along the chromosomes. + """ + p = OptionParser(stack.__doc__) + p.add_argument("--top", default=10, type=int, help="Draw the first N chromosomes") + p.add_argument( + "--stacks", + default="Exons,Introns,DNA_transposons,Retrotransposons", + help="Features to plot in stackplot", + ) + p.add_argument("--switch", help="Change chr names based on two-column file") + add_window_options(p) + opts, args, iopts = p.set_image_options(args, figsize="8x8") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + top = opts.top + window, shift, subtract, merge = check_window_options(opts) + switch = opts.switch + if switch: + switch = DictFile(opts.switch) + + stacks = opts.stacks.split(",") + + fig = plt.figure(1, (iopts.w, iopts.h)) + root_extent = (0, 0, 1, 1) + root = fig.add_axes(root_extent) + + draw_stacks( + fig, + root, + root_extent, + stacks, + fastafile, + window, + shift, + top, + merge, + subtract, + switch, + ) + + pf = fastafile.rsplit(".", 1)[0] + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + return image_name + + +if __name__ == "__main__": + main() diff --git a/jcvi/graphics/mummerplot.py b/jcvi/graphics/mummerplot.py new file mode 100644 index 00000000..4d54cde6 --- /dev/null +++ b/jcvi/graphics/mummerplot.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Wrapper for mummerplot. Selecting a subset of queries and references to plot +main features in the dot plot. +""" +import os.path as op +import sys + +from ..apps.base import OptionParser, logger, sh +from ..formats.base import SetFile +from ..formats.coords import Coords, filter +from ..formats.sizes import Sizes + + +def writeXfile(ids, sizes_dict, filename): + fw = open(filename, "w") + for q in ids: + print("\t".join(str(x) for x in (q, sizes_dict[q], "+")), file=fw) + + logger.debug("%d ids written to `%s`.", len(ids), filename) + fw.close() + + +def main(args): + """ + %prog deltafile + + Plot one query. Extract the references that have major matches to this + query. Control "major" by option --refcov. + """ + p = OptionParser(main.__doc__) + p.add_argument("--refids", help="Use subset of contigs in the ref") + p.add_argument( + "--refcov", + default=0.01, + type=float, + help="Minimum reference coverage", + ) + p.add_argument( + "--all", + default=False, + action="store_true", + help="Plot one pdf file per ref in refidsfile", + ) + p.add_argument( + "--color", + default="similarity", + choices=("similarity", "direction", "none"), + help="Color the dots based on", + ) + p.add_argument( + "--nolayout", + default=False, + action="store_true", + help="Do not rearrange contigs", + ) + p.set_align(pctid=0, hitlen=0) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (deltafile,) = args + reffasta, queryfasta = open(deltafile).readline().split() + color = opts.color + layout = not opts.nolayout + prefix = op.basename(deltafile).split(".")[0] + qsizes = Sizes(queryfasta).mapping + rsizes = Sizes(reffasta).mapping + + refs = SetFile(opts.refids) if opts.refids else set(rsizes.keys()) + refcov = opts.refcov + pctid = opts.pctid + hitlen = opts.hitlen + deltafile = filter( + [deltafile, "--pctid={0}".format(pctid), "--hitlen={0}".format(hitlen)] + ) + + if opts.all: + for r in refs: + pdffile = plot_some_queries( + [r], + qsizes, + rsizes, + deltafile, + refcov, + prefix=prefix, + color=color, + layout=layout, + ) + if pdffile: + sh("mv {0} {1}.pdf".format(pdffile, r)) + else: + plot_some_queries( + refs, + qsizes, + rsizes, + deltafile, + refcov, + prefix=prefix, + color=color, + layout=layout, + ) + + +def plot_some_queries( + refs, + qsizes, + rsizes, + deltafile, + refcov, + prefix="out", + color="similarity", + layout=True, +): + + Qfile, Rfile = "Qfile", "Rfile" + coords = Coords(deltafile) + queries = set() + for c in coords: + if c.refcov < refcov: + continue + if c.ref not in refs: + continue + queries.add(c.query) + + if not queries or not refs: + logger.debug("Empty - %s vs. %s", queries, refs) + return None + + if not layout: + queries = sorted(queries) + refs = sorted(refs) + + writeXfile(queries, qsizes, Qfile) + writeXfile(refs, rsizes, Rfile) + + cmd = "mummerplot {0}".format(deltafile) + cmd += " -Rfile {0} -Qfile {1}".format(Rfile, Qfile) + cmd += " --postscript -p {0}".format(prefix) + if layout: + cmd += " --layout" + if color == "similarity": + cmd += " --color" + elif color == "none": + cmd += " --nocolor" + sh(cmd) + + cmd = "ps2pdf {0}.ps {0}.pdf".format(prefix) + sh(cmd) + + return prefix + ".pdf" + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/jcvi/graphics/synteny.py b/jcvi/graphics/synteny.py new file mode 100644 index 00000000..94c78e53 --- /dev/null +++ b/jcvi/graphics/synteny.py @@ -0,0 +1,736 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +%prog mcscan.txt all.bed layout.csv + +Illustrate MCscan multiple collinearity alignments. Use layout.csv to indicate +the positions of tracks. For example: + +#x, y, rotation, ha, va, color, ratio +0.5, 0.6, 0, left, center, g +0.25, 0.7, 45, center, center, m + +With the row ordering corresponding to the column ordering in the MCscan output. + +For "ha" (horizontal alignment), accepted values are: left|right|leftalign|rightalign|center|"" +For "va" (vertical alignment), accepted values are: top|bottom|center|""(empty) +""" + +import sys + +from typing import List, Optional + +import numpy as np + +from matplotlib import transforms +from matplotlib.path import Path + +from ..apps.base import OptionParser, logger +from ..compara.synteny import BlockFile +from ..formats.base import DictFile +from ..formats.bed import Bed +from ..utils.cbook import human_size +from ..utils.validator import validate_in_choices, validate_in_range + +from .base import ( + AbstractLayout, + PathPatch, + markup, + plt, + savefig, +) +from .glyph import ( + BasePalette, + Glyph, + OrientationPalette, + OrthoGroupPalette, + RoundLabel, +) +from .tree import draw_tree, read_trees + + +HorizontalAlignments = ("left", "right", "leftalign", "rightalign", "center", "") +VerticalAlignments = ("top", "bottom", "center", "") +CANVAS_SIZE = 0.65 + + +class LayoutLine(object): + """ + Parse a line in the layout file. The line is in the following format: + + *0.5, 0.6, 0, left, center, g, 1, chr1 + """ + + def __init__(self, row, delimiter=","): + self.hidden = row[0] == "*" + if self.hidden: + row = row[1:] + args = row.rstrip().split(delimiter) + args = [x.strip() for x in args] + self.x = float(args[0]) + validate_in_range(self.x, 0, 1, "XPosition(x) column") + self.y = float(args[1]) + validate_in_range(self.y, 0, 1, "YPosition(y) column") + self.rotation = int(args[2]) + self.ha = args[3] + validate_in_choices( + self.ha, HorizontalAlignments, "HorizontaAlignment(ha) column" + ) + self.va = args[4] + validate_in_choices(self.va, VerticalAlignments, "VerticalAlignment(va) column") + self.color = args[5] + self.ratio = 1 + if len(args) > 6: + self.ratio = float(args[6]) + if len(args) > 7: + self.label = args[7].strip() + else: + self.label = None + if len(args) > 8: + self.label_fontsize = float(args[8]) + else: + self.label_fontsize = 10 + + +class Layout(AbstractLayout): + """ + Parse the layout file. + """ + + def __init__(self, filename, delimiter=",", seed: Optional[int] = None): + super().__init__(filename) + fp = open(filename, encoding="utf-8") + self.edges = [] + for row in fp: + if row[0] == "#": + continue + if row[0] == "e": + args = row.rstrip().split(delimiter) + args = [x.strip() for x in args] + a, b = args[1:3] + if len(args) >= 4 and args[3]: + blockcolor = args[3] + else: + blockcolor = None + if len(args) >= 5 and args[4]: + samearc = args[4] + else: + samearc = None + a, b = int(a), int(b) + assert args[0] == "e" + self.edges.append((a, b, blockcolor, samearc)) + else: + self.append(LayoutLine(row, delimiter=delimiter)) + + self.assign_colors(seed=seed) + + +class Shade(object): + """ + Draw a shade between two tracks. + """ + + Styles = ("curve", "line") + + def __init__( + self, + ax, + a, + b, + ymid_pad: float = 0.0, + highlight=False, + style="curve", + ec="k", + fc="k", + alpha=0.2, + lw=1, + zorder=1, + ): + """Create syntenic wedges between tracks. + + Args: + ax: matplotlib Axes + a (tuple of floats): ((start_x, start_y), (end_x, end_y)) + b (tuple of floats): ((start_x, start_y), (end_x, end_y)) + ymid_pad (float): Adjustment to y-mid position of Bezier controls, curve style only + highlight (bool, optional): Plot this shade if color is specified. Defaults to False. + style (str, optional): Style. Defaults to "curve", must be one of + ("curve", "line") + ec (str, optional): Edge color. Defaults to "k". + fc (str, optional): Face color. Defaults to "k". + alpha (float, optional): Transparency. Defaults to 0.2. + lw (int, optional): Line width. Defaults to 1. + zorder (int, optional): Z-order. Defaults to 1. + """ + fc = fc or "gainsboro" # Default block color is grayish + assert style in self.Styles, f"style must be one of {self.Styles}" + a1, a2 = a + b1, b2 = b + ax1, ay1 = a1 + ax2, ay2 = a2 + bx1, by1 = b1 + bx2, by2 = b2 + if ax1 is None or ax2 is None or bx1 is None or bx2 is None: + return + M, C4, L, CP = Path.MOVETO, Path.CURVE4, Path.LINETO, Path.CLOSEPOLY + if style == "curve": + ymid1 = (ay1 + by1) / 2 + ymid_pad + ymid2 = (ay2 + by2) / 2 + ymid_pad + pathdata = [ + (M, a1), + (C4, (ax1, ymid1)), + (C4, (bx1, ymid1)), + (C4, b1), + (L, b2), + (C4, (bx2, ymid2)), + (C4, (ax2, ymid2)), + (C4, a2), + (CP, a1), + ] + else: + pathdata = [(M, a1), (L, b1), (L, b2), (L, a2), (CP, a1)] + codes, verts = zip(*pathdata) + path = Path(verts, codes) + if highlight: + ec = fc = highlight + + pp = PathPatch(path, ec=ec, fc=fc, alpha=alpha, lw=lw, zorder=zorder) + ax.add_patch(pp) + + +class Region(object): + """ + Draw a region of synteny. + """ + + def __init__( + self, + ax, + ext, + layout, + bed, + scale, + switch=None, + chr_label=True, + loc_label=True, + gene_labels: Optional[set] = None, + genelabelsize=0, + genelabelrotation=25, + pad=0.05, + vpad=0.015, + extra_features=None, + glyphstyle="box", + glyphcolor: BasePalette = OrientationPalette(), + ): + x, y = layout.x, layout.y + ratio = layout.ratio + scale /= ratio + self.y = y + lr = layout.rotation + tr = transforms.Affine2D().rotate_deg_around(x, y, lr) + ax.transAxes + inv = ax.transAxes.inverted() + + start, end, si, ei, chrom, orientation, span = ext + flank = span / scale / 2 + xstart, xend = x - flank, x + flank + self.xstart, self.xend = xstart, xend + + cv = lambda t: xstart + abs(t - startbp) / scale + hidden = layout.hidden + + # Chromosome + if not hidden: + ax.plot((xstart, xend), (y, y), color="gray", transform=tr, lw=2, zorder=1) + + self.genes = genes = bed[si : ei + 1] + startbp, endbp = start.start, end.end + if orientation == "-": + startbp, endbp = endbp, startbp + + if switch: + chrom = switch.get(chrom, chrom) + if layout.label: + chrom = layout.label + + label = "-".join( + ( + human_size(startbp, target="Mb", precision=2)[:-2], + human_size(endbp, target="Mb", precision=2), + ) + ) + + height = 0.012 + self.gg = {} + # Genes + for g in genes: + gstart, gend = g.start, g.end + strand = g.strand + if strand == "-": + gstart, gend = gend, gstart + if orientation == "-": + strand = "+" if strand == "-" else "-" + + x1, x2, a, b = self.get_coordinates(gstart, gend, y, cv, tr, inv) + gene_name = g.accn + self.gg[gene_name] = (a, b) + + color, zorder = ( + glyphcolor.get_color_and_zorder(strand) + if isinstance(glyphcolor, OrientationPalette) + else glyphcolor.get_color_and_zorder(gene_name) + ) + + if hidden: + continue + gp = Glyph( + ax, + x1, + x2, + y, + height, + gradient=False, + fc=color, + style=glyphstyle, + zorder=zorder, + ) + gp.set_transform(tr) + if genelabelsize and (not gene_labels or gene_name in gene_labels): + if genelabelrotation == 0: + text_x = x1 if x1 > x2 else x2 + text_y = y + else: + text_x = (x1 + x2) / 2 + text_y = y + height / 2 + genelabelsize * vpad / 3 + ax.text( + text_x, + text_y, + markup(gene_name), + size=genelabelsize, + rotation=genelabelrotation, + ha="left", + va="center", + color="lightslategray", + ) + + # Extra features (like repeats) + if extra_features: + for g in extra_features: + gstart, gend = g.start, g.end + x1, x2, a, b = self.get_coordinates(gstart, gend, y, cv, tr, inv) + gp = Glyph( + ax, + x1, + x2, + y, + height * 3 / 4, + gradient=False, + fc="#ff7f00", + style=glyphstyle, + zorder=2, + ) + gp.set_transform(tr) + + ha, va = layout.ha, layout.va + + hpad = 0.02 + if ha == "left": + xx = xstart - hpad + ha = "right" + elif ha == "leftalign": + xx = 0.5 - CANVAS_SIZE / 2 - hpad + ha = "right" + elif ha == "right": + xx = xend + hpad + ha = "left" + elif ha == "rightalign": + xx = 0.5 + CANVAS_SIZE / 2 + hpad + ha = "left" + else: + xx = x + ha = "center" + + # Tentative solution to labels stick into glyph + magic = 40.0 + cc = abs(lr) / magic if abs(lr) > magic else 1 + if va == "top": + yy = y + cc * pad + elif va == "bottom": + yy = y - cc * pad + else: + yy = y + + l = np.array((xx, yy)) + trans_angle = ax.transAxes.transform_angles(np.array((lr,)), l.reshape((1, 2)))[ + 0 + ] + lx, ly = l + if not hidden: + bbox = dict(boxstyle="round", fc="w", ec="w", alpha=0.5) + kwargs = dict( + ha=ha, va="center", rotation=trans_angle, bbox=bbox, zorder=10 + ) + + chr_label = markup(chrom) if chr_label else None + loc_label = label if loc_label else None + if chr_label: + if loc_label: + ax.text( + lx, + ly + vpad, + chr_label, + size=layout.label_fontsize, + color=layout.color, + **kwargs, + ) + ax.text( + lx, + ly - vpad, + loc_label, + color="lightslategrey", + size=layout.label_fontsize, + **kwargs, + ) + else: + ax.text(lx, ly, chr_label, color=layout.color, **kwargs) + + def get_coordinates(self, gstart, gend, y, cv, tr, inv): + """ + Get coordinates of a gene. + """ + x1, x2 = cv(gstart), cv(gend) + a, b = tr.transform((x1, y)), tr.transform((x2, y)) + a, b = inv.transform(a), inv.transform(b) + return x1, x2, a, b + + +def ymid_offset(samearc: Optional[str], pad: float = 0.05): + """ + Adjustment to ymid, this is useful to adjust the appearance of the Bezier + curves between the tracks. + """ + if samearc == "above": + return 2 * pad + if samearc == "above2": + return 4 * pad + if samearc == "below": + return -2 * pad + if samearc == "below2": + return -4 * pad + return 0 + + +class Synteny(object): + """ + Draw the synteny plot. + """ + + def __init__( + self, + fig, + root, + datafile, + bedfile, + layoutfile, + switch=None, + tree=None, + extra_features=None, + chr_label: bool = True, + loc_label: bool = True, + gene_labels: Optional[set] = None, + genelabelsize: int = 0, + genelabelrotation: int = 25, + pad: float = 0.05, + vpad: float = 0.015, + scalebar: bool = False, + shadestyle: str = "curve", + glyphstyle: str = "arrow", + glyphcolor: str = "orientation", + seed: Optional[int] = None, + prune_features=True, + ): + _, h = fig.get_figwidth(), fig.get_figheight() + bed = Bed(bedfile) + order = bed.order + bf = BlockFile(datafile) + self.layout = lo = Layout(layoutfile, seed=seed) + switch = DictFile(switch, delimiter="\t") if switch else None + if extra_features: + extra_features = Bed(extra_features) + + exts = [] + extras = [] + for i in range(bf.ncols): + ext = bf.get_extent(i, order) + exts.append(ext) + if extra_features: + start, end, _, _, chrom, _, span = ext + start, end = start.start, end.end # start, end coordinates + ef = list(extra_features.extract(chrom, start, end)) + + # Pruning removes minor features with < 0.1% of the region + if prune_features: + ef_pruned = [x for x in ef if x.span >= span / 1000] + logger.info( + "Extracted %d features (%d after pruning)", + len(ef), + len(ef_pruned), + ) + extras.append(ef_pruned) + else: + logger.info("Extracted %d features", len(ef)) + extras.append(ef) + + maxspan = max(exts, key=lambda x: x[-1])[-1] + scale = maxspan / CANVAS_SIZE + + self.gg = gg = {} + self.rr = [] + ymids = [] + glyphcolor = ( + OrientationPalette() + if glyphcolor == "orientation" + else OrthoGroupPalette(bf.grouper()) + ) + for i in range(bf.ncols): + ext = exts[i] + ef = extras[i] if extras else None + r = Region( + root, + ext, + lo[i], + bed, + scale, + switch, + gene_labels=gene_labels, + genelabelsize=genelabelsize, + genelabelrotation=genelabelrotation, + chr_label=chr_label, + loc_label=loc_label, + vpad=vpad, + extra_features=ef, + glyphstyle=glyphstyle, + glyphcolor=glyphcolor, + ) + self.rr.append(r) + # Use tid and accn to store gene positions + gg.update(dict(((i, k), v) for k, v in r.gg.items())) + ymids.append(r.y) + + for i, j, blockcolor, samearc in lo.edges: + ymid_pad = ymid_offset(samearc, pad) + for ga, gb, h in bf.iter_pairs(i, j): + a, b = gg[(i, ga)], gg[(j, gb)] + Shade( + root, a, b, ymid_pad, fc=blockcolor, lw=0, alpha=1, style=shadestyle + ) + + for ga, gb, h in bf.iter_pairs(i, j, highlight=True): + a, b = gg[(i, ga)], gg[(j, gb)] + Shade( + root, + a, + b, + ymid_pad, + alpha=1, + highlight=h, + zorder=2, + style=shadestyle, + ) + + if scalebar: + logger.info("Build scalebar (scale=%.3f)", scale) + # Find the best length of the scalebar + ar = [1, 2, 5] + candidates = ( + [1000 * x for x in ar] + + [10000 * x for x in ar] + + [100000 * x for x in ar] + ) + # Find the one that's close to an optimal canvas size + dists = [(abs(x / scale - 0.12), x) for x in candidates] + dist, candidate = min(dists) + dist = candidate / scale + x, y, yp = 0.22, 0.92, 0.005 + a, b = x - dist / 2, x + dist / 2 + lsg = "lightslategrey" + root.plot([a, a], [y - yp, y + yp], "-", lw=2, color=lsg) + root.plot([b, b], [y - yp, y + yp], "-", lw=2, color=lsg) + root.plot([a, b], [y, y], "-", lw=2, color=lsg) + root.text( + x, + y + 0.02, + human_size(candidate, precision=0), + ha="center", + va="center", + ) + + if tree: + trees = read_trees(tree) + ntrees = len(trees) + logger.debug("A total of %d trees imported.", ntrees) + xiv = 1.0 / ntrees + yiv = 0.3 + xstart = 0 + ystart = min(ymids) - 0.4 + for i in range(ntrees): + ax = fig.add_axes([xstart, ystart, xiv, yiv]) + label, outgroup, color, tx = trees[i] + draw_tree( + ax, + tx, + outgroup=outgroup, + rmargin=0.4, + leaffont=11, + treecolor=color, + supportcolor=color, + leafcolor=color, + ) + xstart += xiv + RoundLabel(ax, 0.5, 0.3, label, fill=True, fc="lavender", color=color) + + +def draw_gene_legend( + ax, + x1: float, + x2: float, + ytop: float, + d: float = 0.04, + text: bool = False, + repeat: bool = False, + glyphstyle: str = "box", +): + """ + Draw a legend for gene glyphs. + """ + forward, backward = OrientationPalette.forward, OrientationPalette.backward + ax.plot([x1, x1 + d], [ytop, ytop], ":", color=forward, lw=2) + ax.plot([x1 + d], [ytop], ">", color=forward, mec=forward) + ax.plot([x2, x2 + d], [ytop, ytop], ":", color=backward, lw=2) + ax.plot([x2], [ytop], "<", color=backward, mec="g") + if text: + ax.text(x1 + d / 2, ytop + d / 2, "gene (+)", ha="center") + ax.text(x2 + d / 2, ytop + d / 2, "gene (-)", ha="center") + if repeat: + xr = (x1 + x2 + d) / 2 + Glyph( + ax, + xr - d / 2, + xr + d / 2, + ytop, + 0.012 * 3 / 4, + gradient=False, + fc="#ff7f00", + style=glyphstyle, + zorder=2, + ) + ax.text(xr, ytop + d / 2, "repeat", ha="center") + + +def main(args: List[str]): + p = OptionParser(__doc__) + p.add_argument("--switch", help="Rename the seqid with two-column file") + p.add_argument("--tree", help="Display trees on the bottom of the figure") + p.add_argument("--extra", help="Extra features in BED format") + p.add_argument( + "--genelabels", + help='Show only these gene labels, separated by comma. Example: "At1g12340,At5g54690"', + ) + p.add_argument( + "--genelabelsize", + default=0, + type=int, + help="Show gene labels at this font size, useful for debugging. " + + "However, plot may appear visually crowded. " + + "Reasonably good values are 2 to 6 [Default: disabled]", + ) + p.add_argument( + "--genelabelrotation", + default=25, + type=int, + help="Rotate gene labels at this angle (anti-clockwise), useful for debugging.", + ) + p.add_argument( + "--scalebar", + default=False, + action="store_true", + help="Add scale bar to the plot", + ) + p.add_argument( + "--glyphstyle", + default="box", + choices=Glyph.Styles, + help="Style of feature glyphs", + ) + p.add_argument( + "--glyphcolor", + default="orientation", + choices=Glyph.Palette, + help="Glyph coloring based on", + ) + p.add_argument( + "--shadestyle", + default="curve", + choices=Shade.Styles, + help="Style of syntenic wedges", + ) + p.add_argument( + "--outputprefix", + default="", + help="Prefix for the output file", + ) + p.add_argument( + "--noprune", + default=False, + action="store_true", + help="If set, do not exclude small features from annotation track (<1%% of region)", + ) + opts, args, iopts = p.set_image_options(args, figsize="8x7") + + if len(args) != 3: + sys.exit(not p.print_help()) + + datafile, bedfile, layoutfile = args + switch = opts.switch + tree = opts.tree + gene_labels = None if not opts.genelabels else set(opts.genelabels.split(",")) + prune_features = not opts.noprune + + pf = datafile.rsplit(".", 1)[0] + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + Synteny( + fig, + root, + datafile, + bedfile, + layoutfile, + switch=switch, + tree=tree, + extra_features=opts.extra, + gene_labels=gene_labels, + genelabelsize=opts.genelabelsize, + genelabelrotation=opts.genelabelrotation, + scalebar=opts.scalebar, + shadestyle=opts.shadestyle, + glyphstyle=opts.glyphstyle, + glyphcolor=opts.glyphcolor, + seed=iopts.seed, + prune_features=prune_features, + ) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + outputprefix = opts.outputprefix + if outputprefix: + pf = outputprefix + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + return image_name + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/jcvi/graphics/table.py b/jcvi/graphics/table.py new file mode 100644 index 00000000..73f9fc92 --- /dev/null +++ b/jcvi/graphics/table.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# +# table.py +# graphics +# +# Created by Haibao Tang on 05/25/20 +# Copyright © 2020 Haibao Tang. All rights reserved. +# +import csv +import sys + +from ..apps.base import OptionParser + +from .base import ( + Rectangle, + load_image, + markup, + normalize_axes, + plt, + savefig, +) + + +class CsvTable(list): + def __init__(self, csvfile="table.csv"): + super().__init__() + with open(csvfile) as csvfile: + reader = csv.reader(csvfile, skipinitialspace=True) + self.header = [markup(x) for x in next(reader)] + self.append(self.header) + for row in reader: + is_image_file = row[0].startswith("file://") + if is_image_file: + images = [] + for filenames in row: + images.append( + [ + load_image(filename=f.replace("file://", "")) + for f in filenames.split("|") + ] + ) + self.append(images) + else: + self.append(row) + print(self.header) + + def column_widths(self, total=1): + # Get the maximum width for each column + max_widths = [0] * self.columns + for row in self: + for j, cell in enumerate(row): + if isinstance(cell, list): + continue + max_widths[j] = max(max_widths[j], len(cell)) + total_width = sum(max_widths) + return [x * total / total_width for x in max_widths] + + @property + def rows(self): + return len(self) + + @property + def columns(self): + return len(self.header) + + +def draw_multiple_images_in_rectangle(ax, images, rect, box_width, yinflation=1): + """Draw multiple images in given rectangle. Used by draw_table(). + + Args: + ax (matplotlib axes): matplotlib axes + images (List[image]): List of images + rect (Tuple[float]): (left, bottom, width, height) + box_width (float): Width of the image square + yinflation (float): inflation along the y-axis + """ + n_images = len(images) + left, bottom, width, height = rect + box_start = (width - n_images * box_width) / 2 + left += box_start + bottom += (height - box_width * yinflation) / 2 + for image in images: + extent = (left, left + box_width, bottom, bottom + box_width * yinflation) + ax.imshow(image, extent=extent, aspect="auto") + left += box_width + + +def draw_table(ax, csv_table, extent=(0, 1, 0, 1), stripe_color="beige", yinflation=1): + """Draw table on canvas. + + Args: + ax (matplotlib axes): matplotlib axes + csv_table (CsvTable): Parsed CSV table + extent (tuple, optional): (left, right, bottom, top). Defaults to (0, 1, 0, 1). + stripe_color (str, optional): Stripe color of the table. Defaults to + "beige". + yinflation (float, optional): Inflate on y since imshow aspect ratio + sometimes create warped images. Defaults to 1. + """ + left, right, bottom, top = extent + width = right - left + height = top - bottom + rows = csv_table.rows + column_widths = csv_table.column_widths(width) + print(column_widths) + + yinterval = height / rows + for i, row in enumerate(csv_table): + should_stripe = i % 2 == 0 + contain_images = isinstance(row[0], list) + xstart = left + if contain_images: + box_width = min( + min(column_widths[j] / len(x) for j, x in enumerate(row)), yinterval + ) + for j, cell in enumerate(row): + xinterval = column_widths[j] + xmid = xstart + xinterval / 2 + ymid = top - (i + 0.5) * yinterval + if contain_images: + # There may be multiple images, center them + rect = (xstart, top - (i + 1) * yinterval, xinterval, yinterval) + draw_multiple_images_in_rectangle( + ax, cell, rect, box_width, yinflation=yinflation + ) + should_stripe = False + else: + ax.text( + xmid, + ymid, + cell, + ha="center", + va="center", + ) + + xstart += column_widths[j] + + if not should_stripe: + continue + + # Draw the stripes, extend a little longer horizontally + xpad = 0.01 + ax.add_patch( + Rectangle( + (left - xpad, top - (i + 1) * yinterval), + width + 2 * xpad, + yinterval, + fc=stripe_color, + ec=stripe_color, + ) + ) + + +def main(args): + """ + %prog table.csv + + Render a table on canvas. Input is a CSV file. + """ + p = OptionParser(main.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="7x7") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (csvfile,) = args + pf = csvfile.rsplit(".", 1)[0] + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + csv_table = CsvTable(csvfile) + + draw_table(root, csv_table) + + normalize_axes(root) + + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/jcvi/graphics/tree.py b/jcvi/graphics/tree.py new file mode 100644 index 00000000..8d96dc77 --- /dev/null +++ b/jcvi/graphics/tree.py @@ -0,0 +1,688 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +import sys + +from collections import defaultdict +from itertools import groupby + +from ete3 import Tree + +from ..apps.base import OptionParser, glob, logger +from ..formats.base import LineFile +from ..formats.sizes import Sizes + +from .base import ( + FancyBboxPatch, + Rectangle, + linear_shade, + markup, + normalize_axes, + plt, + savefig, + set3_n, +) +from .glyph import ExonGlyph, TextCircle, get_setups + + +class LeafInfoLine: + def __init__(self, row, delimiter=","): + args = [x.strip() for x in row.split(delimiter)] + self.name = args[0] + self.color = args[1] + self.new_name = None + if len(args) > 2: + self.new_name = args[2] + + +class LeafInfoFile(LineFile): + def __init__(self, filename, delimiter=","): + super().__init__(filename) + self.cache = {} + with open(filename) as fp: + for row in fp: + if row[0] == "#": + continue + line = LeafInfoLine(row, delimiter=delimiter) + self.cache[line.name] = line + + +class WGDInfoLine: + def __init__(self, row, delimiter=",", defaultcolor="#7fc97f"): + args = [x.strip() for x in row.split(delimiter)] + self.node_name = args[0] + self.divergence = float(args[1]) / 100 + self.name = args[2] + self.color = args[3] or defaultcolor + self.style = args[4] + + +class WGDInfoFile(LineFile): + def __init__(self, filename, delimiter=","): + super().__init__(filename) + self.cache = defaultdict(list) + with open(filename) as fp: + for row in fp: + if row[0] == "#": + continue + line = WGDInfoLine(row, delimiter=delimiter) + self.cache[line.node_name].append(line) + + +def truncate_name(name, rule=None): + """ + shorten taxa names for tree display + + Options of rule. This only affects tree display. + - headn (eg. head3 truncates first 3 chars) + - oheadn (eg. ohead3 retains only the first 3 chars) + - tailn (eg. tail3 truncates last 3 chars) + - otailn (eg. otail3 retains only the last 3 chars) + n = 1 ~ 99 + """ + import re + + if rule is None: + return name + + k = re.search("(?<=^head)[0-9]{1,2}$", rule) + if k: + k = k.group(0) + tname = name[int(k) :] + else: + k = re.search("(?<=^ohead)[0-9]{1,2}$", rule) + if k: + k = k.group(0) + tname = name[: int(k)] + else: + k = re.search("(?<=^tail)[0-9]{1,2}$", rule) + if k: + k = k.group(0) + tname = name[: -int(k)] + else: + k = re.search("(?<=^otail)[0-9]{1,2}$", rule) + if k: + k = k.group(0) + tname = name[-int(k) :] + else: + print(truncate_name.__doc__, file=sys.stderr) + raise ValueError("Wrong rule for truncation!") + return tname + + +def draw_wgd_xy(ax, xx, yy, wgdline): + """Draw WGD at (xx, yy) position + + Args: + ax (axis): Matplotlib axes + xx (float): x position + yy (float): y position + wgdline (WGDInfo): WGDInfoLines that contains the styling information + """ + TextCircle( + ax, + xx, + yy, + wgdline.name, + fc=wgdline.color, + radius=0.0225, + color="k", + fontweight="bold", + ) + + +def draw_wgd(ax, y, rescale, name, wgdcache): + """Draw WGD given a name and the WGDInfo cache. + + Args: + ax (matplotlib.axes): matplotlib axes + y (float): y position + rescale (function): Rescale function to generate x position + name (str): Name of the line (usually the taxon/internal name) + wgdcache (Dict): Dictionary containing WGDInfoLines + """ + if not wgdcache or name not in wgdcache: + return + for line in wgdcache[name]: + draw_wgd_xy(ax, rescale(line.divergence), y, line) + + +def draw_tree( + ax, + t, + hpd=None, + margin=0.1, + rmargin=0.2, + ymargin=0.1, + tip=0.01, + treecolor="k", + supportcolor="k", + internal=True, + outgroup=None, + dashedoutgroup=False, + reroot=True, + gffdir=None, + sizes=None, + trunc_name=None, + SH=None, + scutoff=0, + leafcolor="k", + leaffont=12, + leafinfo=None, + wgdinfo=None, + geoscale=False, + groups=[], +): + """ + main function for drawing phylogenetic tree + """ + + if reroot: + if outgroup: + R = t.get_common_ancestor(*outgroup) + else: + # Calculate the midpoint node + R = t.get_midpoint_outgroup() + + if R is not t: + t.set_outgroup(R) + + # By default, the distance to outgroup and non-outgroup is the same + # we re-adjust the distances so that the outgroups will appear + # farthest from everything else + if dashedoutgroup: + a, b = t.children + # Avoid even split + total = a.dist + b.dist + newR = t.get_common_ancestor(*outgroup) + a.dist = 0.9 * total + b.dist = total - a.dist + + farthest, max_dist = t.get_farthest_leaf() + print("max_dist = {}".format(max_dist), file=sys.stderr) + + xstart = margin + ystart = 2 * ymargin + # scale the tree + scale = (1 - margin - rmargin) / max_dist + + def rescale(dist): + return xstart + scale * dist + + def rescale_divergence(divergence): + return rescale(max_dist - divergence) + + num_leaves = len(t.get_leaf_names()) + yinterval = (1 - ystart) / num_leaves + ytop = ystart + (num_leaves - 0.5) * yinterval + + # get exons structures, if any + structures = {} + if gffdir: + gffiles = glob("{0}/*.gff*".format(gffdir)) + setups, ratio = get_setups(gffiles, canvas=rmargin / 2, noUTR=True) + structures = dict((a, (b, c)) for a, b, c in setups) + + if sizes: + sizes = Sizes(sizes).mapping + + coords = {} + i = 0 + color_groups = [] # Used to plot groups to the right of the tree + for n in t.traverse("postorder"): + dist = n.get_distance(t) + xx = rescale(dist) + + if n.is_leaf(): + yy = ystart + i * yinterval + i += 1 + + if trunc_name: + name = truncate_name(n.name, rule=trunc_name) + else: + name = n.name + + if leafinfo and n.name in leafinfo: + line = leafinfo[n.name] + lc = line.color + sname = line.new_name + else: + lc = leafcolor + sname = None + lc = lc or "k" + sname = sname or name.replace("_", "-") + # if color is given as "R,G,B" + if "," in lc: + lc = [float(x) for x in lc.split(",")] + + ax.text( + xx + tip, + yy, + markup(sname), + va="center", + fontstyle="italic", + size=leaffont, + color=lc, + ) + color_groups.append((lc, yy, xx)) + + gname = n.name.split("_")[0] + if gname in structures: + mrnabed, cdsbeds = structures[gname] + ExonGlyph( + ax, + 1 - rmargin / 2, + yy, + mrnabed, + cdsbeds, + align="right", + ratio=ratio, + ) + if sizes and gname in sizes: + size = sizes[gname] + size = size / 3 - 1 # base pair converted to amino acid + size = "{0}aa".format(size) + ax.text(1 - rmargin / 2 + tip, yy, size, size=leaffont) + + else: + linestyle = "--" if (dashedoutgroup and n is t) else "-" + children = [coords[x] for x in n.get_children()] + children_x, children_y = zip(*children) + min_y, max_y = min(children_y), max(children_y) + # plot the vertical bar + ax.plot((xx, xx), (min_y, max_y), linestyle, color=treecolor) + # plot the horizontal bar + for cx, cy in children: + ax.plot((xx, cx), (cy, cy), linestyle, color=treecolor) + yy = sum(children_y) * 1.0 / len(children_y) + # plot HPD if exists + if hpd and n.name in hpd: + a, b = hpd[n.name] + ax.plot( + (rescale_divergence(a), rescale_divergence(b)), + (yy, yy), + "-", + color="darkslategray", + alpha=0.4, + lw=2, + ) + support = n.support + if support > 1: + support = support / 100.0 + if not n.is_root() and supportcolor: + if support > scutoff / 100.0: + ax.text( + xx, + yy + 0.005, + "{0:d}".format(int(abs(support * 100))), + ha="right", + size=leaffont, + color=supportcolor, + ) + if internal and n.name: + TextCircle(ax, xx, yy, n.name, size=9) + else: # Just a dot + TextCircle(ax, xx, yy, None, radius=0.002) + + coords[n] = (xx, yy) + # WGD info + draw_wgd(ax, yy, rescale_divergence, n.name, wgdinfo) + + # scale bar + if geoscale: + draw_geoscale( + ax, ytop, margin=margin, rmargin=rmargin, yy=ymargin, max_dist=max_dist + ) + else: + br = 0.1 + x1 = xstart + 0.1 + x2 = x1 + br * scale + yy = ymargin + ax.plot([x1, x1], [yy - tip, yy + tip], "-", color=treecolor) + ax.plot([x2, x2], [yy - tip, yy + tip], "-", color=treecolor) + ax.plot([x1, x2], [yy, yy], "-", color=treecolor) + ax.text( + (x1 + x2) / 2, + yy - tip, + "{0:g}".format(br), + va="top", + ha="center", + size=leaffont, + color=treecolor, + ) + + # Groupings on the right, often to used to show groups such as phylogenetic + # clades + if groups: + color_groups.sort() + group_extents = [] + for color, group in groupby(color_groups, key=lambda x: x[0]): + group = list(group) + _, min_yy, xx = min(group) + _, max_yy, xx = max(group) + group_extents.append((min_yy, max_yy, xx, color)) + group_extents.sort(reverse=True) + + for group_name, (min_yy, max_yy, xx, color) in zip(groups, group_extents): + group_color = linear_shade(color, fraction=0.85) + ax.add_patch( + FancyBboxPatch( + (xx, min_yy - yinterval / 2), + rmargin - 0.01, + max_yy - min_yy + yinterval, + boxstyle="round,pad=-0.002,rounding_size=0.005", + fc=group_color, + ec=group_color, + ) + ) + # Add the group label + horizontal = (max_yy - min_yy) < 0.2 + mid_yy = (min_yy + max_yy) / 2 + label_rightend = 0.98 + if horizontal: + ax.text( + label_rightend, + mid_yy, + markup(group_name), + color="darkslategray", + ha="right", + va="center", + ) + else: + ax.text( + label_rightend, + mid_yy, + markup(group_name), + color="darkslategray", + ha="right", + va="center", + rotation=-90, + ) + + if SH is not None: + xs = x1 + ys = (ymargin + yy) / 2.0 + ax.text( + xs, + ys, + "SH test against ref tree: {0}".format(SH), + ha="left", + size=leaffont, + color="g", + ) + + +def read_trees(tree): + from urllib.parse import parse_qs + from jcvi.formats.base import read_block + + trees = [] + + fp = open(tree) + for header, tx in read_block(fp, "#"): + header = parse_qs(header[1:]) + label = header["label"][0].strip('"') + outgroup = header["outgroup"] + (color,) = header.get("color", ["k"]) + trees.append((label, outgroup, color, "".join(tx))) + + return trees + + +def draw_geoscale( + ax, ytop, margin=0.1, rmargin=0.2, yy=0.1, max_dist=3.0, contrast_epochs=True +): + """ + Draw geological epoch on million year ago (mya) scale. + max_dist = 3.0 => max is 300 mya + """ + import math + + a, b = margin, 1 - rmargin # Correspond to 300mya and 0mya + minx, maxx = 0, int(max_dist * 100) + + def cv(x): + return b - (x - b) / (maxx - minx) * (b - a) + + ax.plot((a, b), (yy, yy), "k-") + tick = 0.0125 + scale_start = int(math.ceil(maxx / 25) * 25) + for mya in range(scale_start - 25, 0, -25): + p = cv(mya) + ax.plot((p, p), (yy, yy - tick), "k-") + ax.text(p, yy - 2.5 * tick, str(mya), ha="center", va="center") + + ax.text( + (a + b) / 2, + yy - 5 * tick, + "Time before present (million years)", + ha="center", + va="center", + ) + + # Source: + # https://en.wikipedia.org/wiki/Geological_period + Geo = ( + ("Neogene", 2.588, 23.03), + ("Paleogene", 23.03, 66.0), + ("Cretaceous", 66.0, 145.5), + ("Jurassic", 145.5, 201.3), + ("Triassic", 201.3, 252.17), + ("Permian", 252.17, 298.9), + ("Carboniferous", 298.9, 358.9), + ) + h = 0.05 + for (era, start, end), color in zip(Geo, set3_n(len(Geo))): + if maxx - start < 10: # not visible enough + continue + start, end = cv(start), cv(end) + end = max(a, end) + p = Rectangle((end, yy + tick / 2), abs(start - end), h, lw=1, ec="w", fc=color) + ax.text( + (start + end) / 2, + yy + (tick + h) / 2, + era, + ha="center", + va="center", + size=8, + ) + ax.add_patch(p) + + # We highlight recent epochs for better visualization, we just highlight + # Neogene and Cretaceous as these are more relevant for most phylogeny + if contrast_epochs: + for era, start, end in Geo: + if not era in ("Neogene", "Cretaceous"): + continue + + # Make a beige patch + start, end = cv(start), cv(end) + ax.add_patch( + Rectangle( + (end, yy + tick + h), + abs(start - end), + ytop - yy - tick - h, + fc="beige", + ec="beige", + ) + ) + + +def parse_tree(infile): + """Parse newick formatted tree file and returns a tuple consisted of a + Tree object, and a HPD dictionary if 95%HPD is found in the newick string, + otherwise None + + Args: + infile (str): Path to the tree file + """ + import re + + with open(infile) as fp: + treedata = fp.read() + hpd_re = re.compile(r"( \[&95%HPD=[^[]*\])") + + def repl(match): + repl.count += 1 + name = "N{}".format(repl.count) + lb, ub = re.findall(r"HPD=\{(.*), (.*)\}", match.group(0))[0] + repl.hpd[name] = (float(lb), float(ub)) + return name + + repl.count = 0 + repl.hpd = {} + + treedata, changed = re.subn(hpd_re, repl, treedata) + if repl.hpd: + print(repl.hpd, file=sys.stderr) + + return (Tree(treedata, format=1), repl.hpd) if changed else (Tree(treedata), None) + + +def main(args): + """ + %prog newicktree + + Plot Newick formatted tree. The gene structure can be plotted along if + --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is + on, also show the number of amino acids. + """ + p = OptionParser(main.__doc__) + p.add_argument( + "--outgroup", + help="Outgroup for rerooting the tree. " + + "Use comma to separate multiple taxa.", + ) + p.add_argument( + "--noreroot", + default=False, + action="store_true", + help="Don't reroot the input tree", + ) + p.add_argument( + "--rmargin", default=0.2, type=float, help="Set blank rmargin to the right" + ) + p.add_argument( + "--gffdir", default=None, help="The directory that contain GFF files" + ) + p.add_argument("--sizes", default=None, help="The FASTA file or the sizes file") + p.add_argument("--SH", default=None, type=str, help="SH test p-value") + + group = p.add_argument_group("Node style") + group.add_argument("--leafcolor", default="k", help="Font color for the OTUs") + group.add_argument("--leaffont", default=12, help="Font size for the OTUs") + group.add_argument( + "--leafinfo", help="CSV file specifying the leaves: name,color,new_name" + ) + group.add_argument( + "--scutoff", + default=0, + type=int, + help="cutoff for displaying node support, 0-100", + ) + group.add_argument( + "--no_support", + dest="support", + default=True, + action="store_false", + help="Do not print node support values", + ) + group.add_argument( + "--no_internal", + dest="internal", + default=True, + action="store_false", + help="Do not show internal nodes", + ) + + group = p.add_argument_group("Edge style") + group.add_argument( + "--dashedoutgroup", + default=False, + action="store_true", + help="Gray out the edges connecting outgroup and non-outgroup", + ) + + group = p.add_argument_group("Additional annotations") + group.add_argument( + "--geoscale", + default=False, + action="store_true", + help="Plot geological scale", + ) + group.add_argument( + "--wgdinfo", help="CSV specifying the position and style of WGD events" + ) + group.add_argument( + "--groups", + help="Group names from top to bottom, to the right of the tree. " + "Each distinct color in --leafinfo is considered part of the same group. " + "Separate the names with comma, such as 'eudicots,,monocots,'. " + "Empty names will be ignored for that specific group. ", + ) + + opts, args, iopts = p.set_image_options(args, figsize="10x7") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (datafile,) = args + outgroup = None + reroot = not opts.noreroot + if opts.outgroup: + outgroup = opts.outgroup.split(",") + + hpd = None + if datafile == "demo": + t = Tree( + """(((Os02g0681100:0.1151,Sb04g031800:0.11220)1.0:0.0537, + (Os04g0578800:0.04318,Sb06g026210:0.04798)-1.0:0.08870)1.0:0.06985, + ((Os03g0124100:0.08845,Sb01g048930:0.09055)1.0:0.05332, + (Os10g0534700:0.06592,Sb01g030630:0.04824)-1.0:0.07886):0.09389);""" + ) + else: + logger.debug("Load tree file `%s`", datafile) + t, hpd = parse_tree(datafile) + + pf = datafile.rsplit(".", 1)[0] + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + supportcolor = "k" if opts.support else None + margin, rmargin = 0.1, opts.rmargin # Left and right margin + leafinfo = LeafInfoFile(opts.leafinfo).cache if opts.leafinfo else None + wgdinfo = WGDInfoFile(opts.wgdinfo).cache if opts.wgdinfo else None + + draw_tree( + root, + t, + hpd=hpd, + margin=margin, + rmargin=rmargin, + ymargin=margin, + supportcolor=supportcolor, + internal=opts.internal, + outgroup=outgroup, + dashedoutgroup=opts.dashedoutgroup, + reroot=reroot, + gffdir=opts.gffdir, + sizes=opts.sizes, + SH=opts.SH, + scutoff=opts.scutoff, + leafcolor=opts.leafcolor, + leaffont=opts.leaffont, + leafinfo=leafinfo, + wgdinfo=wgdinfo, + geoscale=opts.geoscale, + groups=opts.groups.split(",") if opts.groups else [], + ) + + normalize_axes(root) + + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/jcvi/graphics/wheel.py b/jcvi/graphics/wheel.py new file mode 100644 index 00000000..13fc3dc7 --- /dev/null +++ b/jcvi/graphics/wheel.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Wheel plot that shows continuous data in radial axes. +""" +import sys + +from math import degrees +from collections import OrderedDict +from itertools import groupby + +import numpy as np + +from ..apps.base import ActionDispatcher, OptionParser + +from .base import normalize_axes, plt, savefig + + +def main(): + + actions = (("wheel", "wheel plot that shows continuous data in radial axes"),) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def closed_plot(ax, theta, r, *args, **kwargs): + theta = list(theta) + [theta[0]] + r = list(r) + [r[0]] + ax.plot(theta, r, *args, **kwargs) + + +def sector(ax, theta_min, theta_max, theta_pad, r, R=30, *args, **kwargs): + theta = np.linspace(theta_min - theta_pad, theta_max + theta_pad, num=100) + r = len(theta) * [r] + theta = list(theta) + [0] + r = list(r) + [-R] + closed_plot(ax, theta, r, *args, **kwargs) + + +def parse_data(datafile, score_column="score"): + data = {} + fp = open(datafile) + for row in fp: + atoms = row.split(",") + if len(atoms) == 4: # First column is SampleID + atoms = atoms[1:] + label, score, percentile = atoms + label = label.strip() + label = label.strip('"') + score = float(score.strip()) + percentile = float(percentile.strip()) + if score_column == "score": + data[label] = score + else: + data[label] = percentile + return data + + +def parse_groups(groupsfile): + groups = OrderedDict() + fp = open(groupsfile) + for row in fp: + group, label = row.split(",") + group = group.strip() + label = label.strip() + groups[label] = group + return groups + + +def wheel(args): + """ + %prog wheel datafile.csv groups.csv + + Wheel plot that shows continous data in radial axes. + """ + p = OptionParser(wheel.__doc__) + p.add_argument( + "--column", + default="score", + choices=("score", "percentile"), + help="Which column to extract from `datafile.csv`", + ) + opts, args, iopts = p.set_image_options(args, figsize="5x5", format="png") + + if len(args) != 2: + sys.exit(not p.print_help()) + + datafile, groupsfile = args + column = opts.column + linecolor = "#d6d6d6" + df = parse_data(datafile, score_column=opts.column) + groups = parse_groups(groupsfile) + labels = [g for g in groups if g in df] + print(labels) + df = [df[g] for g in labels] + print(df) + groups = [groups[g] for g in labels] + print(groups) + + pf = datafile.rsplit(".", 1)[0] + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + categories = len(df) + # ax = plt.subplot(111, projection='polar') + ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True) + + brewer = [ + "#FF3B30", + "#DD43A0", + "#5856D6", + "#007AFE", + "#56BDEC", + "#4CD8BA", + "#4CD864", + "#B0F457", + "#FEF221", + "#FFCC01", + "#FF9500", + "#FF3B30", + ] + + # Baseline + theta = np.linspace(1.5 * np.pi, 3.5 * np.pi, endpoint=False, num=categories) + _theta = np.linspace(1.5 * np.pi, 3.5 * np.pi) + R = max(max(df), 10) + xlim = (-R, R) if column == "score" else (-100, 100) + plim = (-R / 2, R) if column == "score" else (0, 100) + ci = (-0.5, 2) if column == "score" else (10, 90) + + # Grid + if column == "score": + for t in theta: + ax.plot([t, t], plim, color=linecolor) + ax.axis("off") + + # Contours + for t in plim: + ax.plot(_theta, [t] * len(_theta), color=linecolor) + + # Sectors (groupings) + collapsed_groups = [] + gg = [] + for group, c in groupby(enumerate(groups), lambda x: x[1]): + c = [x[0] for x in list(c)] + collapsed_groups.append(group) + gg.append(c) + + show_sector = False + if show_sector: + theta_interval = 2 * np.pi / categories + theta_pad = theta_interval / 2 * 0.9 + for color, group in zip(brewer, gg): + tmin, tmax = min(group), max(group) + sector( + ax, + theta[tmin], + theta[tmax], + theta_pad, + R * 0.95, + ls="-", + color=color, + lw=2, + ) + + # Data + r = df + closed_plot(ax, theta, r, color="lightslategray", alpha=0.25) + for color, group in zip(brewer, gg): + hidden_data = [(theta[x], r[x]) for x in group if (ci[0] <= r[x] <= ci[1])] + shown_data = [(theta[x], r[x]) for x in group if (r[x] < ci[0] or r[x] > ci[1])] + for alpha, data in zip((1, 1), (hidden_data, shown_data)): + if not data: + continue + color_theta, color_r = zip(*data) + ax.plot(color_theta, color_r, "o", color=color, alpha=alpha) + + # Print out data + diseaseNames, risks = labels, df + print( + "var theta = [{}]".format(",".join("{:.1f}".format(degrees(x)) for x in theta)) + ) + print("var risks = [{}]".format(",".join(str(x) for x in risks))) + print( + "var diseaseNames = [{}]".format( + ",".join(['"{}"'.format(x) for x in diseaseNames]) + ) + ) + + # Labels + from math import cos, sin + + r = 0.5 + for i, label in enumerate(labels): + tl = theta[i] + x, y = 0.5 + r * cos(tl), 0.5 + r * sin(tl) + d = degrees(tl) + if 90 < d % 360 < 270: # On the left quardrants + d -= 180 + root.text( + x, y, label, size=4, rotation=d, ha="center", va="center", color=linecolor + ) + print(x, y, label) + + # Add baseline + baseline = 0 if column == "score" else 50 + _r = len(_theta) * [baseline] + closed_plot(ax, _theta, _r, "k:", lw=1, ms=4) + + # Add confidence interval + if column == "percentile": + barcolor = "#eeeeee" + ax.bar([0], [ci[1] - ci[0]], width=2 * np.pi, bottom=ci[0], fc=barcolor) + + ax.set_rmin(xlim[0]) + ax.set_rmax(xlim[1]) + + normalize_axes(root) + + image_name = pf + "-" + column + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/__init__.py b/jcvi/projects/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/projects/__main__.py b/jcvi/projects/__main__.py new file mode 100644 index 00000000..9e4c5a75 --- /dev/null +++ b/jcvi/projects/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Compilation of project specific scripts, used to execute specific analysis routines and generate publication-ready figures +""" + +from ..apps.base import dmain + + +if __name__ == "__main__": + dmain(__file__) diff --git a/jcvi/projects/age.py b/jcvi/projects/age.py new file mode 100644 index 00000000..92c42b25 --- /dev/null +++ b/jcvi/projects/age.py @@ -0,0 +1,738 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Scripts related to age prediction model. +""" +import json +import os +import os.path as op +import sys + +import numpy as np +import pandas as pd +import seaborn as sns + +from jinja2 import Template + +from ..apps.base import ActionDispatcher, OptionParser, iglob, logger +from ..graphics.base import panel_labels, plt, savefig + + +def main(): + + actions = ( + ("compile", "extract telomere length and ccn"), + ("traits", "make HTML page that reports eye and skin color"), + # Age paper plots + ("qc", "plot distributions of basic statistics of a sample"), + ("correlation", "plot correlation of age vs. postgenomic features"), + ("heritability", "plot composite on heritability estimates"), + ("regression", "plot chronological vs. predicted age"), + ("ccn", "plot several ccn plots including chr1,chrX,chrY,chrM"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +traits_template = """ + + + ART traits + + + + + + + + + + + + + + + {% for sample in samples %} + + + + + + + + {% endfor %} + +
Sample IDSkinEyes
{{ sample.sample_id }} +
+
+ + +
+
+ + +
+ + +""" + + +def lab2rgb(L, A, B): + # Borrowed from: + # + y = (L + 16) / 116 + x = A / 500 + y + z = y - B / 200 + + x = 0.95047 * (x * x * x if (x * x * x > 0.008856) else (x - 16 / 116) / 7.787) + y = 1.00000 * (y * y * y if (y * y * y > 0.008856) else (y - 16 / 116) / 7.787) + z = 1.08883 * (z * z * z if (z * z * z > 0.008856) else (z - 16 / 116) / 7.787) + + r = x * 3.2406 + y * -1.5372 + z * -0.4986 + g = x * -0.9689 + y * 1.8758 + z * 0.0415 + b = x * 0.0557 + y * -0.2040 + z * 1.0570 + + r = (1.055 * r ** (1 / 2.4) - 0.055) if (r > 0.0031308) else 12.92 * r + g = (1.055 * g ** (1 / 2.4) - 0.055) if (g > 0.0031308) else 12.92 * g + b = (1.055 * b ** (1 / 2.4) - 0.055) if (b > 0.0031308) else 12.92 * b + + return max(0, min(1, r)) * 255, max(0, min(1, g)) * 255, max(0, min(1, b)) * 255 + + +def make_rgb(L, A, B): + r, g, b = lab2rgb(L, A, B) + r = int(round(r)) + g = int(round(g)) + b = int(round(b)) + return "rgb({}, {}, {})".format(r, g, b) + + +def traits(args): + """ + %prog traits directory + + Make HTML page that reports eye and skin color. + """ + p = OptionParser(traits.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + samples = [] + for folder in args: + targets = iglob(folder, "*-traits.json") + if not targets: + continue + filename = targets[0] + js = json.load(open(filename)) + js["skin_rgb"] = make_rgb( + js["traits"]["skin-color"]["L"], + js["traits"]["skin-color"]["A"], + js["traits"]["skin-color"]["B"], + ) + js["eye_rgb"] = make_rgb( + js["traits"]["eye-color"]["L"], + js["traits"]["eye-color"]["A"], + js["traits"]["eye-color"]["B"], + ) + samples.append(js) + + template = Template(traits_template) + fw = open("report.html", "w") + print(template.render(samples=samples), file=fw) + logger.debug("Report written to `%s`", fw.name) + fw.close() + + +def plot_fit_line(ax, x, y): + from numpy.polynomial.polynomial import polyfit + + t = np.arange(100) + xy = [(a, b) for (a, b) in zip(x, y) if np.isfinite(a) and np.isfinite(b)] + x, y = zip(*xy) + b, m = polyfit(x, y, 1) + print("y = {} + {} * x".format(b, m)) + ax.plot(t, b + m * t, "-", lw=3, color="k") + + +def composite_ccn(df, size=(12, 8)): + """Plot composite ccn figure""" + fig = plt.figure(1, size) + ax1 = plt.subplot2grid((2, 2), (0, 0)) + ax2 = plt.subplot2grid((2, 2), (0, 1)) + ax3 = plt.subplot2grid((2, 2), (1, 0)) + ax4 = plt.subplot2grid((2, 2), (1, 1)) + mf = df[df["hli_calc_gender"] == "Male"] + + age_label = "Chronological age (yr)" + ax1.scatter( + mf["hli_calc_age_sample_taken"], + mf["ccn.chrX"], + s=10, + marker=".", + color="lightslategray", + ) + ax1.set_ylim(0.8, 1.1) + plot_fit_line(ax1, mf["hli_calc_age_sample_taken"], mf["ccn.chrX"]) + ax1.set_ylabel("ChrX copy number") + ax1.set_title("ChrX copy number in Male") + + ax2.scatter( + mf["hli_calc_age_sample_taken"], + mf["ccn.chrY"], + s=10, + marker=".", + color="lightslategray", + ) + plot_fit_line(ax2, mf["hli_calc_age_sample_taken"], mf["ccn.chrY"]) + ax2.set_ylim(0.8, 1.1) + ax2.set_ylabel("ChrY copy number") + ax2.set_title("ChrY copy number in Male") + + ax3.scatter( + df["hli_calc_age_sample_taken"], + df["ccn.chr1"], + s=10, + marker=".", + color="lightslategray", + ) + plot_fit_line(ax3, df["hli_calc_age_sample_taken"], df["ccn.chr1"]) + ax3.set_ylim(1.8, 2.1) + ax3.set_ylabel("Chr1 copy number") + ax3.set_title("Chr1 copy number") + + ax4.scatter( + df["hli_calc_age_sample_taken"], + df["ccn.chrM"], + s=10, + marker=".", + color="lightslategray", + ) + plot_fit_line(ax4, df["hli_calc_age_sample_taken"], df["ccn.chrM"]) + ax4.set_ylim(0, 400) + ax4.set_ylabel("Mitochondria copy number") + ax4.set_title("Mitochondria copy number") + + from matplotlib.lines import Line2D + + for ax in (ax1, ax2, ax3, ax4): + ax.set_xlabel(age_label) + + plt.tight_layout() + root = fig.add_axes((0, 0, 1, 1)) + labels = ((0.02, 0.98, "A"), (0.52, 0.98, "B"), (0.02, 0.5, "C"), (0.52, 0.5, "D")) + panel_labels(root, labels) + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + +def ccn(args): + """ + %prog ccn combined.tsv + + Plot several ccn plots including chr1,chrX,chrY,chrM + """ + p = OptionParser(ccn.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="12x8") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (tsvfile,) = args + df = pd.read_csv(tsvfile, sep="\t") + composite_ccn(df, size=(iopts.w, iopts.h)) + outfile = tsvfile.rsplit(".", 1)[0] + ".ccn.pdf" + savefig(outfile) + + +def regression(args): + """ + %prog regression postgenomic-s.tsv + + Plot chronological vs. predicted age. + """ + p = OptionParser(regression.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="8x8") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (tsvfile,) = args + df = pd.read_csv(tsvfile, sep="\t") + chrono = "Chronological age (yr)" + pred = "Predicted age (yr)" + resdf = pd.DataFrame( + {chrono: df["hli_calc_age_sample_taken"], pred: df["Predicted Age"]} + ) + g = sns.jointplot( + chrono, pred, resdf, joint_kws={"s": 6}, xlim=(0, 100), ylim=(0, 80) + ) + g.fig.set_figwidth(iopts.w) + g.fig.set_figheight(iopts.h) + outfile = tsvfile.rsplit(".", 1)[0] + ".regression.pdf" + savefig(outfile) + + +def composite_correlation(df, size=(12, 8)): + """Plot composite correlation figure""" + fig = plt.figure(1, size) + ax1 = plt.subplot2grid((2, 2), (0, 0)) + ax2 = plt.subplot2grid((2, 2), (0, 1)) + ax3 = plt.subplot2grid((2, 2), (1, 0)) + ax4 = plt.subplot2grid((2, 2), (1, 1)) + chemistry = ["V1", "V2", "V2.5", float("nan")] + colors = sns.color_palette("Set2", 8) + color_map = dict(zip(chemistry, colors)) + + age_label = "Chronological age (yr)" + ax1.scatter( + df["hli_calc_age_sample_taken"], + df["teloLength"], + s=10, + marker=".", + color=df["Chemistry"].map(color_map), + ) + ax1.set_ylim(0, 15) + ax1.set_ylabel("Telomere length (Kb)") + + ax2.scatter( + df["hli_calc_age_sample_taken"], + df["ccn.chrX"], + s=10, + marker=".", + color=df["Chemistry"].map(color_map), + ) + ax2.set_ylim(1.8, 2.1) + ax2.set_ylabel("ChrX copy number") + + ax4.scatter( + df["hli_calc_age_sample_taken"], + df["ccn.chrY"], + s=10, + marker=".", + color=df["Chemistry"].map(color_map), + ) + ax4.set_ylim(0.8, 1.1) + ax4.set_ylabel("ChrY copy number") + + ax3.scatter( + df["hli_calc_age_sample_taken"], + df["TRA.PPM"], + s=10, + marker=".", + color=df["Chemistry"].map(color_map), + ) + ax3.set_ylim(0, 250) + ax3.set_ylabel("$TCR-\\alpha$ deletions (count per million reads)") + + from matplotlib.lines import Line2D + + legend_elements = [ + Line2D( + [0], + [0], + marker=".", + color="w", + label=chem, + markerfacecolor=color, + markersize=16, + ) + for (chem, color) in zip(chemistry, colors)[:3] + ] + for ax in (ax1, ax2, ax3, ax4): + ax.set_xlabel(age_label) + ax.legend(handles=legend_elements, loc="upper right") + + plt.tight_layout() + root = fig.add_axes((0, 0, 1, 1)) + labels = ((0.02, 0.98, "A"), (0.52, 0.98, "B"), (0.02, 0.5, "C"), (0.52, 0.5, "D")) + panel_labels(root, labels) + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + +def correlation(args): + """ + %prog correlation postgenomic-s.tsv + + Plot correlation of age vs. postgenomic features. + """ + p = OptionParser(correlation.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="12x8") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (tsvfile,) = args + df = pd.read_csv(tsvfile, sep="\t") + composite_correlation(df, size=(iopts.w, iopts.h)) + outfile = tsvfile.rsplit(".", 1)[0] + ".correlation.pdf" + savefig(outfile) + + +def composite_qc(df_orig, size=(16, 12)): + """Plot composite QC figures""" + df = df_orig.rename( + columns={ + "hli_calc_age_sample_taken": "Age", + "hli_calc_gender": "Gender", + "eth7_max": "Ethnicity", + "MeanCoverage": "Mean coverage", + "Chemistry": "Sequencing chemistry", + "Release Client": "Cohort", + } + ) + + fig = plt.figure(1, size) + ax1 = plt.subplot2grid((2, 7), (0, 0), rowspan=1, colspan=2) + ax2 = plt.subplot2grid((2, 7), (0, 2), rowspan=1, colspan=2) + ax3 = plt.subplot2grid((2, 7), (0, 4), rowspan=1, colspan=3) + ax4 = plt.subplot2grid((2, 7), (1, 0), rowspan=1, colspan=2) + ax5 = plt.subplot2grid((2, 7), (1, 2), rowspan=1, colspan=2) + ax6 = plt.subplot2grid((2, 7), (1, 4), rowspan=1, colspan=3) + + sns.distplot(df["Age"].dropna(), kde=False, ax=ax1) + sns.countplot(x="Gender", data=df, ax=ax2) + sns.countplot( + x="Ethnicity", data=df, ax=ax3, order=df["Ethnicity"].value_counts().index + ) + sns.distplot(df["Mean coverage"].dropna(), kde=False, ax=ax4) + ax4.set_xlim(0, 100) + sns.countplot(x="Sequencing chemistry", data=df, ax=ax5) + sns.countplot(x="Cohort", data=df, ax=ax6, order=df["Cohort"].value_counts().index) + # Anonymize the cohorts + cohorts = ax6.get_xticklabels() + newCohorts = [] + for i, c in enumerate(cohorts): + if c.get_text() == "Spector": + c = "TwinsUK" + elif c.get_text() != "Health Nucleus": + c = "C{}".format(i + 1) + newCohorts.append(c) + ax6.set_xticklabels(newCohorts) + + for ax in (ax6,): + ax.set_xticklabels(ax.get_xticklabels(), ha="right", rotation=30) + + for ax in (ax1, ax2, ax3, ax4, ax5, ax6): + ax.set_title(ax.get_xlabel()) + ax.set_xlabel("") + + plt.tight_layout() + + root = fig.add_axes((0, 0, 1, 1)) + labels = ( + (0.02, 0.96, "A"), + (0.3, 0.96, "B"), + (0.6, 0.96, "C"), + (0.02, 0.52, "D"), + (0.3, 0.52, "E"), + (0.6, 0.52, "F"), + ) + panel_labels(root, labels) + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + +def qc(args): + """ + %prog qc postgenomic-s.tsv + + Plot basic statistics of a given sample: + Age, Gender, Ethnicity, Cohort, Chemistry + """ + p = OptionParser(heritability.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="10x6") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (tsvfile,) = args + df = pd.read_csv(tsvfile, sep="\t") + composite_qc(df, size=(iopts.w, iopts.h)) + outfile = tsvfile.rsplit(".", 1)[0] + ".qc.pdf" + savefig(outfile) + + +def extract_trait(df, id_field, trait_field): + traits = {} + # Get the gender information for filtering DZ twins + for i, row in df.iterrows(): + sample_id = str(row[id_field]) + traits[sample_id] = row[trait_field] + return traits + + +def filter_same_gender(pairs, gender): + notPresent = 0 + diffGender = 0 + # Need to screen same gender dizygotic twins + for a, b in pairs: + if not (a in gender and b in gender): + notPresent += 1 + continue + if gender[a] != gender[b]: + diffGender += 1 + continue + yield a, b, gender[a] + print(notPresent, "not found") + print(diffGender, "different gender") + + +def extract_twin_values(triples, traits, gender=None): + """Calculate the heritability of certain traits in triplets. + + Parameters + ========== + triples: (a, b, "Female/Male") triples. The sample IDs are then used to query + the traits dictionary. + traits: sample_id => value dictionary + gender: + + Returns + ======= + tuples of size 2, that contain paired trait values of the twins + """ + # Construct the pairs of trait values + traitValuesAbsent = 0 + nanValues = 0 + genderSkipped = 0 + twinValues = [] + for a, b, t in triples: + if gender is not None and t != gender: + genderSkipped += 1 + continue + if not (a in traits and b in traits): + traitValuesAbsent += 1 + continue + if np.isnan(traits[a]) or np.isnan(traits[b]): + nanValues += 1 + continue + twinValues.append((traits[a], traits[b])) + + print( + "A total of {} pairs extracted ({} absent; {} nan; {} genderSkipped)".format( + len(twinValues), traitValuesAbsent, nanValues, genderSkipped + ) + ) + return twinValues + + +def plot_paired_values( + ax, + mzValues, + dzValues, + label=None, + gender=None, + palette=sns.color_palette("PRGn", 10), +): + from scipy.stats import pearsonr + + mzx, mzy = zip(*mzValues) + dzx, dzy = zip(*dzValues) + (mzline,) = ax.plot(mzx, mzy, ".", color=palette[0], alpha=0.75) + (dzline,) = ax.plot(dzx, dzy, ".", color=palette[-1], alpha=0.75) + ax.set_xlabel(label + r" in twin \#1") + ax.set_ylabel(label + r" in twin \#2") + ax.legend( + (mzline, dzline), + ( + "Monozygotic twins ($N$={}{})".format( + len(mzValues), ((" " + gender) if gender else "") + ), + "Dizygotic twins ($N$={}{})".format( + len(dzValues), (" " + gender) if gender else "" + ), + ), + loc="upper left", + ) + rho_mz, p_mz = pearsonr(mzx, mzy) + rho_dz, p_dz = pearsonr(dzx, dzy) + heritability = 2 * (rho_mz - rho_dz) + ax.set_title( + "{} ($\\rho_{{MZ}}$={:.2f}, $\\rho_{{DZ}}$={:.2f}, $heritability$={:.2f})".format( + label, rho_mz, rho_dz, heritability + ) + ) + + +def plot_abs_diff(ax, mzValues, dzValues, label=None, palette="PRGn"): + # Let's visualize the feature differences using boxplot + mzDelta = [abs(x - y) for (x, y) in mzValues] + dzDelta = [abs(x - y) for (x, y) in dzValues] + x = ["MZ twins"] * len(mzDelta) + ["DZ twins"] * len(dzDelta) + y = mzDelta + dzDelta + sns.boxplot(x, y, palette=palette, ax=ax) + ax.set_ylabel("Absolute difference in {}".format(label)) + + +def filter_low_values(data, cutoff): + newData = [(a, b) for a, b in data if a > cutoff and b > cutoff] + print("Removed {} outliers (<= {})".format(len(data) - len(newData), cutoff)) + return newData + + +def composite(df, sameGenderMZ, sameGenderDZ, size=(16, 24)): + """Embed both absdiff figures and heritability figures.""" + fig = plt.figure(1, size) + + ax1a = plt.subplot2grid((6, 4), (0, 0), rowspan=2, colspan=1) + ax2a = plt.subplot2grid((6, 4), (0, 1), rowspan=2, colspan=1) + ax3a = plt.subplot2grid((6, 4), (0, 2), rowspan=2, colspan=1) + ax4a = plt.subplot2grid((6, 4), (0, 3), rowspan=2, colspan=1) + ax1b = plt.subplot2grid((6, 4), (2, 0), rowspan=2, colspan=2) + ax2b = plt.subplot2grid((6, 4), (2, 2), rowspan=2, colspan=2) + ax3b = plt.subplot2grid((6, 4), (4, 0), rowspan=2, colspan=2) + ax4b = plt.subplot2grid((6, 4), (4, 2), rowspan=2, colspan=2) + + # Telomeres + telomeres = extract_trait(df, "Sample name", "telomeres.Length") + mzTelomeres = extract_twin_values(sameGenderMZ, telomeres) + dzTelomeres = extract_twin_values(sameGenderDZ, telomeres) + plot_paired_values(ax1b, mzTelomeres, dzTelomeres, label="Telomere length") + plot_abs_diff(ax1a, mzTelomeres, dzTelomeres, label="Telomere length") + + # CCNX + CCNX = extract_trait(df, "Sample name", "ccn.chrX") + mzCCNX = extract_twin_values(sameGenderMZ, CCNX, gender="Female") + dzCCNX = extract_twin_values(sameGenderDZ, CCNX, gender="Female") + dzCCNX = filter_low_values(dzCCNX, 1.75) + plot_paired_values( + ax2b, mzCCNX, dzCCNX, gender="Female only", label="ChrX copy number" + ) + plot_abs_diff(ax2a, mzCCNX, dzCCNX, label="ChrX copy number") + + # CCNY + CCNY = extract_trait(df, "Sample name", "ccn.chrY") + mzCCNY = extract_twin_values(sameGenderMZ, CCNY, gender="Male") + dzCCNY = extract_twin_values(sameGenderDZ, CCNY, gender="Male") + dzCCNY = filter_low_values(dzCCNY, 0.75) + + plot_paired_values( + ax3b, mzCCNY, dzCCNY, gender="Male only", label="ChrY copy number" + ) + plot_abs_diff(ax3a, mzCCNY, dzCCNY, label="ChrY copy number") + + # CCNY + TRA = extract_trait(df, "Sample name", "TRA.PPM") + mzTRA = extract_twin_values(sameGenderMZ, TRA) + dzTRA = extract_twin_values(sameGenderDZ, TRA) + plot_paired_values(ax4b, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") + plot_abs_diff(ax4a, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") + + plt.tight_layout() + + root = fig.add_axes((0, 0, 1, 1)) + # ABCD absdiff, EFGH heritability + labels = ( + (0.03, 0.99, "A"), + (0.27, 0.99, "B"), + (0.53, 0.99, "C"), + (0.77, 0.99, "D"), + (0.03, 0.67, "E"), + (0.53, 0.67, "F"), + (0.03, 0.34, "G"), + (0.53, 0.34, "H"), + ) + panel_labels(root, labels) + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + +def heritability(args): + """ + %prog pg.tsv MZ-twins.csv DZ-twins.csv + + Plot composite figures ABCD on absolute difference of 4 traits, + EFGH on heritability of 4 traits. The 4 traits are: + telomere length, ccn.chrX, ccn.chrY, TRA.PPM + """ + p = OptionParser(heritability.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="12x18") + + if len(args) != 3: + sys.exit(not p.print_help()) + + combined, mz, dz = args + + # Prepare twins data + def get_pairs(filename): + with open(filename) as fp: + for row in fp: + yield row.strip().split(",") + + MZ = list(get_pairs(mz)) + DZ = list(get_pairs(dz)) + + print(len(MZ), "monozygotic twins") + print(len(DZ), "dizygotic twins") + + df = pd.read_csv(combined, sep="\t", index_col=0) + df["Sample name"] = np.array(df["Sample name"], dtype=np.str) + gender = extract_trait(df, "Sample name", "hli_calc_gender") + sameGenderMZ = list(filter_same_gender(MZ, gender)) + sameGenderDZ = list(filter_same_gender(DZ, gender)) + + composite(df, sameGenderMZ, sameGenderDZ, size=(iopts.w, iopts.h)) + savefig("heritability.pdf") + + +def compile(args): + """ + %prog compile directory + + Extract telomere length and ccn. + """ + p = OptionParser(compile.__doc__) + p.set_outfile(outfile="age.tsv") + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + dfs = [] + for folder in args: + ofolder = os.listdir(folder) + + # telomeres + subdir = [x for x in ofolder if x.startswith("telomeres")][0] + subdir = op.join(folder, subdir) + filename = op.join(subdir, "tel_lengths.txt") + df = pd.read_csv(filename, sep="\t") + d1 = df.ix[0].to_dict() + + # ccn + subdir = [x for x in ofolder if x.startswith("ccn")][0] + subdir = op.join(folder, subdir) + filename = iglob(subdir, "*.ccn.json")[0] + js = json.load(open(filename)) + d1.update(js) + df = pd.DataFrame(d1, index=[0]) + dfs.append(df) + + df = pd.concat(dfs, ignore_index=True) + df.to_csv(opts.outfile, sep="\t", index=False) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/allmaps.py b/jcvi/projects/allmaps.py new file mode 100644 index 00000000..edf714c3 --- /dev/null +++ b/jcvi/projects/allmaps.py @@ -0,0 +1,532 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Scripts for the ALLMAPS manuscript +""" +import sys +import numpy as np + +from ..apps.base import OptionParser, ActionDispatcher +from ..assembly.allmaps import AGP, GapEstimator, Map, normalize_lms_axis, spearmanr +from ..formats.bed import Bed +from ..graphics.base import ( + latex, + normalize_axes, + panel_labels, + plt, + savefig, + set_ticklabels_helvetica, + set2, +) +from ..graphics.chromosome import HorizontalChromosome +from ..utils.cbook import percentage + + +def main(): + + actions = ( + ("lms", "ALLMAPS cartoon to illustrate LMS metric"), + ("estimategaps", "illustrate ALLMAPS gap estimation algorithm"), + ("simulation", "plot ALLMAPS accuracy across a range of simulated data"), + ("comparebed", "compare the scaffold links indicated in two bed files"), + ("resamplestats", "prepare resample results table"), + ("resample", "plot ALLMAPS performance across resampled real data"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def resample(args): + """ + %prog resample yellow-catfish-resample.txt medicago-resample.txt + + Plot ALLMAPS performance across resampled real data. + """ + p = OptionParser(resample.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300) + + if len(args) != 2: + sys.exit(not p.print_help()) + + dataA, dataB = args + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + A = fig.add_axes([0.1, 0.18, 0.32, 0.64]) + B = fig.add_axes([0.6, 0.18, 0.32, 0.64]) + dataA = import_data(dataA) + dataB = import_data(dataB) + xlabel = "Fraction of markers" + ylabels = ("Anchor rate", "Runtime (m)") + legend = ("anchor rate", "runtime") + subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend) + subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend) + + labels = ((0.04, 0.92, "A"), (0.54, 0.92, "B")) + panel_labels(root, labels) + + normalize_axes(root) + image_name = "resample." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def resamplestats(args): + """ + %prog resamplestats prefix run.log + + Prepare resample results table. Ten subsets of original data were generated + and ALLMAPS were iterated through them, creating `run.log` which contains the + timing results. The anchor rate can be found in `prefix.0.{1-10}.summary.txt`. + """ + p = OptionParser(resamplestats.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + pf, runlog = args + fp = open(runlog) + Real = "real" + times = [] + for row in fp: + # real 10m31.513s + if not row.startswith(Real): + continue + tag, time = row.split() + assert tag == Real + m, s = time.split("m") + s = s.rstrip("s") + m, s = float(m), float(s) + time = m + s / 60 + times.append(time) + + N = len(times) + + rates = [] + for i in range(-N + 1, 1, 1): + summaryfile = "{0}.{1}.summary.txt".format(pf, 2**i) + fp = open(summaryfile) + lines = fp.readlines() + # Total bases 580,791,244 (80.8%) 138,298,666 (19.2%) + pct = float(lines[-2].split()[3].strip("()%")) + rates.append(pct / 100.0) + + assert len(rates) == N + + print("ratio\tanchor-rate\ttime(m)") + for j, i in enumerate(range(-N + 1, 1, 1)): + print("{0}\t{1:.3f}\t{2:.3f}".format(i, rates[j], times[j])) + + +def query_links(abed, bbed): + abedlinks = abed.links + bbedlinks = bbed.links + # Reverse complement bbedlinks + bxbedlinks = bbedlinks[:] + for (a, ai), (b, bi) in bbedlinks: + ai = {"+": "-", "?": "-", "-": "+"}[ai] + bi = {"+": "-", "?": "-", "-": "+"}[bi] + bxbedlinks.append(((b, bi), (a, ai))) + + atotal = len(abedlinks) + print("Total links in {0}: {1}".format(abed.filename, atotal), file=sys.stderr) + recovered = set(abedlinks) & set(bxbedlinks) + print("Recovered {0}".format(percentage(len(recovered), atotal)), file=sys.stderr) + print(set(abedlinks) - set(bxbedlinks), file=sys.stderr) + + +def comparebed(args): + """ + %prog comparebed AP.chr.bed infer.bed + + Compare the scaffold links indicated in two bed files. + """ + p = OptionParser(comparebed.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + abed, bbed = args + abed = Bed(abed) + bbed = Bed(bbed) + query_links(abed, bbed) + query_links(bbed, abed) + + +def estimategaps(args): + """ + %prog estimategaps JM-4 chr1 JMMale-1 + + Illustrate ALLMAPS gap estimation algorithm. + """ + p = OptionParser(estimategaps.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) + + if len(args) != 3: + sys.exit(not p.print_help()) + + pf, seqid, mlg = args + bedfile = pf + ".lifted.bed" + agpfile = pf + ".agp" + + function = lambda x: x.cm + cc = Map(bedfile, scaffold_info=True, function=function) + agp = AGP(agpfile) + + g = GapEstimator(cc, agp, seqid, mlg, function=function) + pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize + spl, spld = g.spl, g.spld + g.compute_all_gaps(verbose=False) + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + # Panel A + xstart, ystart = 0.15, 0.65 + w, h = 0.7, 0.3 + t = np.linspace(0, chrsize, 1000) + ax = fig.add_axes([xstart, ystart, w, h]) + mx, my = zip(*g.scatter_data) + rho = spearmanr(mx, my) + + dsg = "g" + ax.vlines(pp, 0, mlgsize, colors="beige") + ax.plot(mx, my, ".", color=set2[3]) + ax.plot(t, spl(t), "-", color=dsg) + ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes) + normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") + if rho < 0: + ax.invert_yaxis() + + # Panel B + ystart -= 0.28 + h = 0.25 + ax = fig.add_axes([xstart, ystart, w, h]) + ax.vlines(pp, 0, mlgsize, colors="beige") + ax.plot(t, spld(t), "-", lw=2, color=dsg) + ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) + normalize_lms_axis( + ax, + xlim=chrsize, + ylim=25 * 1e-6, + xfactor=1e-6, + xlabel="Physical position (Mb)", + yfactor=1000000, + ylabel="Recomb. rate\n(cM / Mb)", + ) + ax.xaxis.grid(False) + + # Panel C (specific to JMMale-1) + a, b = "scaffold_1076", "scaffold_861" + sizes = dict( + (x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation)) + for x in g.agp + if not x.is_gap + ) + a_beg, a_end, asize, ao = sizes[a] + b_beg, b_end, bsize, bo = sizes[b] + gapsize = g.get_gapsize(a) + total_size = asize + gapsize + bsize + ratio = 0.6 / total_size + y = 0.16 + pad = 0.03 + pb_ratio = w / chrsize + + # Zoom + lsg = "lightslategray" + root.plot((0.15 + pb_ratio * a_beg, 0.2), (ystart, ystart - 0.14), ":", color=lsg) + root.plot((0.15 + pb_ratio * b_end, 0.3), (ystart, ystart - 0.08), ":", color=lsg) + ends = [] + for tag, size, marker, beg in zip( + (a, b), (asize, bsize), (49213, 81277), (0.2, 0.2 + (asize + gapsize) * ratio) + ): + end = beg + size * ratio + marker = beg + marker * ratio + ends.append((beg, end, marker)) + root.plot((marker,), (y,), "o", color=lsg) + root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center") + HorizontalChromosome(root, beg, end, y, height=0.025, fc="gainsboro") + + begs, ends, markers = zip(*ends) + fontprop = dict(color=lsg, ha="center", va="center") + ypos = y + pad * 2 + root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg) + root.text( + sum(markers) / 2, + ypos + pad, + "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", + **fontprop + ) + + ypos = y - pad + xx = markers[0], ends[0] + root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) + root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop) + xx = markers[1], begs[1] + root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) + root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop) + + root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg) + root.text( + sum(markers) / 2, + ypos - 3 * pad, + r"$\textit{Estimated gap size: 96,433bp}$", + color="r", + ha="center", + va="center", + ) + + labels = ((0.05, 0.95, "A"), (0.05, 0.6, "B"), (0.05, 0.27, "C")) + panel_labels(root, labels) + normalize_axes(root) + + pf = "estimategaps" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def lms(args): + """ + %prog lms + + ALLMAPS cartoon to illustrate LMS metric. + """ + from random import randint + from jcvi.graphics.chromosome import HorizontalChromosome + + p = OptionParser(lms.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + # Panel A + w, h = 0.7, 0.35 + ax = fig.add_axes([0.15, 0.6, w, h]) + + xdata = [x + randint(-3, 3) for x in range(10, 110, 10)] + ydata = [x + randint(-3, 3) for x in range(10, 110, 10)] + ydata[3:7] = ydata[3:7][::-1] + xydata = zip(xdata, ydata) + lis = xydata[:3] + [xydata[4]] + xydata[7:] + lds = xydata[3:7] + xlis, ylis = zip(*lis) + xlds, ylds = zip(*lds) + ax.plot( + xlis, + ylis, + "r-", + lw=12, + alpha=0.3, + solid_capstyle="round", + solid_joinstyle="round", + ) + ax.plot( + xlds, + ylds, + "g-", + lw=12, + alpha=0.3, + solid_capstyle="round", + solid_joinstyle="round", + ) + ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) + HorizontalChromosome(root, 0.15, 0.15 + w, 0.57, height=0.02, lw=2) + root.text(0.15 + w / 2, 0.55, "Chromosome location (bp)", ha="center", va="top") + + ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center") + ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center") + ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center") + normalize_lms_axis(ax, xlim=110, ylim=110) + + # Panel B + w = 0.37 + p = (0, 45, 75, 110) + ax = fig.add_axes([0.1, 0.12, w, h]) + xdata = [x for x in range(10, 110, 10)] + ydata = ydata_orig = [x for x in range(10, 110, 10)] + ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1] + xydata = zip(xdata, ydata) + lis = xydata[:7] + xlis, ylis = zip(*lis) + ax.plot( + xlis, + ylis, + "r-", + lw=12, + alpha=0.3, + solid_capstyle="round", + solid_joinstyle="round", + ) + ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) + ax.vlines(p, 0, 110, colors="beige", lw=3) + normalize_lms_axis(ax, xlim=110, ylim=110) + patch = [0.1 + w * x / 110.0 for x in p] + HorizontalChromosome(root, 0.1, 0.1 + w, 0.09, patch=patch, height=0.02, lw=2) + scaffolds = ("a", "b", "c") + for i, s in enumerate(scaffolds): + xx = (patch[i] + patch[i + 1]) / 2 + root.text(xx, 0.09, s, va="center", ha="center") + root.text(0.1 + w / 2, 0.04, "LMS($a||b||c$) = 7", ha="center") + + # Panel C + ax = fig.add_axes([0.6, 0.12, w, h]) + patch = [0.6 + w * x / 110.0 for x in p] + ydata = ydata_orig + ax.plot( + xdata, + ydata, + "r-", + lw=12, + alpha=0.3, + solid_capstyle="round", + solid_joinstyle="round", + ) + ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) + ax.vlines(p, [0], [110], colors="beige", lw=3) + normalize_lms_axis(ax, xlim=110, ylim=110) + HorizontalChromosome(root, 0.6, 0.6 + w, 0.09, patch=patch, height=0.02, lw=2) + scaffolds = ("a", "-c", "b") + for i, s in enumerate(scaffolds): + xx = (patch[i] + patch[i + 1]) / 2 + root.text(xx, 0.09, s, va="center", ha="center") + root.text(0.6 + w / 2, 0.04, "LMS($a||-c||b$) = 10", ha="center") + + labels = ((0.05, 0.95, "A"), (0.05, 0.48, "B"), (0.55, 0.48, "C")) + panel_labels(root, labels) + + normalize_axes(root) + + pf = "lms" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def import_data(datafile): + data = [] + fp = open(datafile) + fp.readline() + for row in fp: + atoms = row.split() + atoms = [float(x) for x in atoms] + data.append(atoms) + return data + + +def subplot_twinx( + ax, + data, + xlabel, + ylabels, + title=None, + legend=None, + loc="upper left", +): + columned_data = zip(*data) + x, yy = columned_data[0], columned_data[1:] + assert len(ylabels) == 2 + assert len(yy) == 2 + lines = [] + ax2 = ax.twinx() + for a, y, m, yl in zip((ax, ax2), yy, "ox", ylabels): + (line,) = a.plot(x, y, "k:", marker=m, mec="k", mfc="w", ms=4) + lines.append(line) + a.set_ylabel(yl) + if legend: + assert len(legend) == 2 + ax.legend(lines, legend, loc=loc) + ax.set_xlabel(xlabel) + if title: + ax.set_title(title) + + ax.set_ylim(0, 1.1) + xticklabels = [ + r"$\frac{{1}}{" + str(int(2 ** -float(x))) + "}$" for x in ax.get_xticks() + ] + xticklabels[-1] = r"$1$" + yticklabels = [float(x) for x in ax.get_yticks()] + ax.set_xticklabels(xticklabels) + ax.set_yticklabels(yticklabels, family="Helvetica") + + yb = ax2.get_ybound()[1] + yb = yb // 5 * 5 # make integer interval + ax2.set_yticks(np.arange(0, 1.1 * yb, yb / 5)) + ax2.set_ylim(0, 1.1 * yb) + yticklabels = [int(x) for x in ax2.get_yticks()] + ax2.set_xticklabels(xticklabels) + ax2.set_yticklabels(yticklabels, family="Helvetica") + ax2.grid(False) + + +def subplot( + ax, data, xlabel, ylabel, xlim=None, ylim=1.1, xcast=float, ycast=float, legend=None +): + columned_data = zip(*data) + x, yy = columned_data[0], columned_data[1:] + lines = [] + for y, m in zip(yy, "o^x"): + (line,) = ax.plot(x, y, "k:", marker=m, mec="k", mfc="w", ms=4) + lines.append(line) + if legend: + assert len(lines) == len(legend) + ax.legend(lines, legend, loc="best") + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + if xlim: + ax.set_xlim(0, xlim) + if ylim: + ax.set_ylim(0, ylim) + set_ticklabels_helvetica(ax, xcast=xcast, ycast=ycast) + + +def simulation(args): + """ + %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt + + Plot ALLMAPS accuracy across a range of simulated datasets. + """ + p = OptionParser(simulation.__doc__) + opts, args, iopts = p.set_image_options(args, dpi=300) + + if len(args) != 4: + sys.exit(not p.print_help()) + + dataA, dataB, dataC, dataD = args + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + A = fig.add_axes([0.12, 0.62, 0.35, 0.35]) + B = fig.add_axes([0.62, 0.62, 0.35, 0.35]) + C = fig.add_axes([0.12, 0.12, 0.35, 0.35]) + D = fig.add_axes([0.62, 0.12, 0.35, 0.35]) + dataA = import_data(dataA) + dataB = import_data(dataB) + dataC = import_data(dataC) + dataD = import_data(dataD) + subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=0.5) + subplot( + B, + dataB, + "Translocation error rate", + "Accuracy", + xlim=0.5, + legend=("intra-chromosomal", "inter-chromosomal", r"75\% intra + 25\% inter"), + ) + subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int) + subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int) + + labels = ( + (0.03, 0.97, "A"), + (0.53, 0.97, "B"), + (0.03, 0.47, "C"), + (0.53, 0.47, "D"), + ) + panel_labels(root, labels) + + normalize_axes(root) + image_name = "simulation." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/bites.py b/jcvi/projects/bites.py new file mode 100644 index 00000000..176e9686 --- /dev/null +++ b/jcvi/projects/bites.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Scripts for the Brapa bites paper + +Tang et al. (2012) Altered Patterns of Fractionation and Exon Deletions in +Brassica rapa Support a Two-Step Model of Paleohexaploidy. Genetics. + +""" +from more_itertools import pairwise + +from ..apps.base import ActionDispatcher, OptionParser, fname +from ..graphics.base import CirclePolygon, Polygon, Rectangle, plt, savefig +from ..graphics.chromosome import Chromosome +from ..graphics.glyph import RoundLabel, TextCircle, arrowprops + + +def main(): + + actions = ( + ("excision", "show intra-chromosomal recombination"), + ("bites", "show the bites calling pipeline"), + ("scenario", "show step-wise genome merger events in brapa"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def excision(args): + """ + %prog excision + + Illustrate the mechanism of illegitimate recombination. + """ + p = OptionParser(__doc__) + opts, args = p.parse_args(args) + + fig = plt.figure(1, (5, 5)) + root = fig.add_axes([0, 0, 1, 1]) + + plt.plot((0.2, 0.8), (0.6, 0.6), "r-", lw=3) + plt.plot((0.4, 0.6), (0.6, 0.6), "b>-", mfc="g", mec="w", ms=12, lw=3) + plt.plot((0.3, 0.7), (0.5, 0.5), "r-", lw=3) + plt.plot((0.5,), (0.5,), "b>-", mfc="g", mec="w", ms=12, lw=3) + + # Circle excision + plt.plot((0.5,), (0.45,), "b>-", mfc="g", mec="w", ms=12, lw=3) + circle = CirclePolygon((0.5, 0.4), 0.05, fill=False, lw=3, ec="b") + root.add_patch(circle) + + arrow_dist = 0.07 + ar_xpos, ar_ypos = 0.5, 0.52 + root.annotate( + " ", (ar_xpos, ar_ypos), (ar_xpos, ar_ypos + arrow_dist), arrowprops=arrowprops + ) + + RoundLabel(root, 0.2, 0.64, "Gene") + RoundLabel(root, 0.3, 0.54, "Excision") + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + figname = fname() + ".pdf" + savefig(figname, dpi=300) + + +def bites(args): + """ + %prog bites + + Illustrate the pipeline for automated bite discovery. + """ + + p = OptionParser(__doc__) + opts, args = p.parse_args() + + fig = plt.figure(1, (6, 6)) + root = fig.add_axes([0, 0, 1, 1]) + + # HSP pairs + hsps = ( + ((50, 150), (60, 180)), + ((190, 250), (160, 235)), + ((300, 360), (270, 330)), + ((430, 470), (450, 490)), + ((570, 620), (493, 543)), + ((540, 555), (370, 385)), # non-collinear hsps + ) + + titlepos = (0.9, 0.65, 0.4) + titles = ("Compare orthologous region", "Find collinear HSPs", "Scan paired gaps") + ytip = 0.01 + mrange = 650.0 + m = lambda x: x / mrange * 0.7 + 0.1 + for i, (ya, title) in enumerate(zip(titlepos, titles)): + yb = ya - 0.1 + plt.plot((0.1, 0.8), (ya, ya), "-", color="gray", lw=2, zorder=1) + plt.plot((0.1, 0.8), (yb, yb), "-", color="gray", lw=2, zorder=1) + RoundLabel(root, 0.5, ya + 4 * ytip, title) + root.text(0.9, ya, "A. thaliana", ha="center", va="center") + root.text(0.9, yb, "B. rapa", ha="center", va="center") + myhsps = hsps + if i >= 1: + myhsps = hsps[:-1] + for (a, b), (c, d) in myhsps: + a, b, c, d = [m(x) for x in (a, b, c, d)] + r1 = Rectangle((a, ya - ytip), b - a, 2 * ytip, fc="r", lw=0, zorder=2) + r2 = Rectangle((c, yb - ytip), d - c, 2 * ytip, fc="r", lw=0, zorder=2) + r3 = Rectangle((a, ya - ytip), b - a, 2 * ytip, fill=False, zorder=3) + r4 = Rectangle((c, yb - ytip), d - c, 2 * ytip, fill=False, zorder=3) + r5 = Polygon( + ((a, ya - ytip), (c, yb + ytip), (d, yb + ytip), (b, ya - ytip)), + fc="r", + alpha=0.2, + ) + rr = (r1, r2, r3, r4, r5) + if i == 2: + rr = rr[:-1] + for r in rr: + root.add_patch(r) + + # Gap pairs + hspa, hspb = zip(*myhsps) + gapa, gapb = [], [] + for (a, b), (c, d) in pairwise(hspa): + gapa.append((b + 1, c - 1)) + for (a, b), (c, d) in pairwise(hspb): + gapb.append((b + 1, c - 1)) + gaps = zip(gapa, gapb) + tpos = titlepos[-1] + + yy = tpos - 0.05 + for i, ((a, b), (c, d)) in enumerate(gaps): + i += 1 + a, b, c, d = [m(x) for x in (a, b, c, d)] + xx = (a + b + c + d) / 4 + TextCircle(root, xx, yy, str(i)) + + # Bites + ystart = 0.24 + ytip = 0.05 + bites = ( + ("Bite(40=>-15)", True), + ("Bite(50=>35)", False), + ("Bite(70=>120)", False), + ("Bite(100=>3)", True), + ) + for i, (bite, selected) in enumerate(bites): + xx = 0.15 if (i % 2 == 0) else 0.55 + yy = ystart - i / 2 * ytip + i += 1 + TextCircle(root, xx, yy, str(i)) + color = "k" if selected else "gray" + root.text(xx + ytip, yy, bite, size=10, color=color, va="center") + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + figname = fname() + ".pdf" + savefig(figname, dpi=300) + + +def scenario(args): + """ + %prog scenario + + Illustration of the two-step genome merger process for B. rapa companion paper. + """ + p = OptionParser(__doc__) + p.parse_args() + + fig = plt.figure(1, (5, 5)) + root = fig.add_axes([0, 0, 1, 1]) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + # Layout format: (x, y, label, (chr lengths)) + anc = (0.5, 0.9, "Ancestor", (1,)) + s1 = (0.2, 0.6, "Genome I", (1,)) + s2 = (0.5, 0.6, "Genome II", (1,)) + s3 = (0.8, 0.6, "Genome III", (1,)) + tetra = (0.35, 0.4, "Tetraploid I / II", (0.5, 0.9)) + hexa = (0.5, 0.1, "Hexaploid I / II / III", (0.36, 0.46, 0.9)) + labels = (anc, s1, s2, s3, tetra, hexa) + connections = ( + (anc, s1), + (anc, s2), + (anc, s3), + (s1, tetra), + (s2, tetra), + (tetra, hexa), + (s3, hexa), + ) + + xinterval = 0.02 + yratio = 0.05 + for xx, yy, label, chrl in labels: + # RoundLabel(root, xx, yy, label) + root.text(xx, yy, label, ha="center", va="center") + offset = len(label) * 0.012 + for i, c in enumerate(chrl): + ya = yy + yratio * c + yb = yy - yratio * c + Chromosome(root, xx - offset + i * xinterval, ya, yb, width=0.01) + + # Comments + comments = ((0.15, 0.33, "II dominant"), (0.25, 0.03, "III dominant")) + + for xx, yy, c in comments: + root.text(xx, yy, c, size=9, ha="center", va="center") + + # Branches + tip = 0.04 + for a, b in connections: + xa, ya, la, chra = a + xb, yb, lb, chrb = b + plt.plot((xa, xb), (ya - tip, yb + 2 * tip), "k-", lw=2, alpha=0.5) + + figname = fname() + ".pdf" + savefig(figname, dpi=300) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/ies.py b/jcvi/projects/ies.py new file mode 100644 index 00000000..6c9ad7ed --- /dev/null +++ b/jcvi/projects/ies.py @@ -0,0 +1,426 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Locate IES sequences within MIC genome of tetrahymena. +""" +import os.path as op +import sys + +from collections import Counter +from itertools import groupby + +from ..algorithms.formula import outlier_cutoff +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh +from ..formats.base import must_open +from ..formats.bed import Bed, depth, mergeBed, some, sort +from ..utils.cbook import percentage +from ..utils.range import Range, range_interleave, range_chain + + +class EndPoint(object): + def __init__(self, label): + args = label.split("-") + self.label = label + self.leftright = args[0] + self.position = int(args[1]) + self.reads = int(args[2].strip("r")) + + +def main(): + + actions = ( + ("deletion", "find IES based on mapping MAC reads"), + ("insertion", "find IES excision points based on mapping MIC reads"), + ("insertionpairs", "pair up the candidate insertions"), + ("variation", "associate IES in parents and progeny"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def variation(args): + """ + %prog variation P1.bed P2.bed F1.bed + + Associate IES in parents and progeny. + """ + p = OptionParser(variation.__doc__) + p.add_argument( + "--diversity", + choices=("breakpoint", "variant"), + default="variant", + help="Plot diversity", + ) + opts, args, iopts = p.set_image_options(args, figsize="6x6") + + if len(args) != 3: + sys.exit(not p.print_help()) + + pfs = [op.basename(x).split("-")[0] for x in args] + P1, P2, F1 = pfs + newbedfile = "-".join(pfs) + ".bed" + if need_update(args, newbedfile): + newbed = Bed() + for pf, filename in zip(pfs, args): + bed = Bed(filename) + for b in bed: + b.accn = "-".join((pf, b.accn)) + b.score = None + newbed.append(b) + newbed.print_to_file(newbedfile, sorted=True) + + neworder = Bed(newbedfile).order + mergedbedfile = mergeBed(newbedfile, nms=True) + bed = Bed(mergedbedfile) + valid = 0 + total_counts = Counter() + F1_counts = [] + bp_diff = [] + novelbedfile = "novel.bed" + fw = open(novelbedfile, "w") + for b in bed: + accns = b.accn.split(",") + pfs_accns = [x.split("-")[0] for x in accns] + pfs_counts = Counter(pfs_accns) + if len(pfs_counts) != 3: + print(b, file=fw) + continue + + valid += 1 + total_counts += pfs_counts + F1_counts.append(pfs_counts[F1]) + + # Collect breakpoint positions between P1 and F1 + P1_accns = [x for x in accns if x.split("-")[0] == P1] + F1_accns = [x for x in accns if x.split("-")[0] == F1] + if len(P1_accns) != 1: + continue + + ri, ref = neworder[P1_accns[0]] + P1_accns = [neworder[x][-1] for x in F1_accns] + bp_diff.extend(x.start - ref.start for x in P1_accns) + bp_diff.extend(x.end - ref.end for x in P1_accns) + + print( + "A total of {} sites show consistent deletions across samples.".format( + percentage(valid, len(bed)) + ), + file=sys.stderr, + ) + for pf, count in total_counts.items(): + print( + "{:>9}: {:.2f} deletions/site".format(pf, count * 1.0 / valid), + file=sys.stderr, + ) + + F1_counts = Counter(F1_counts) + + # Plot the IES variant number diversity + from jcvi.graphics.base import plt, savefig, set_ticklabels_helvetica + + plt.figure(1, (iopts.w, iopts.h)) + if opts.diversity == "variant": + left, height = zip(*sorted(F1_counts.items())) + for l, h in zip(left, height): + print("{0:>9} variants: {1}".format(l, h), file=sys.stderr) + plt.text( + l, + h + 5, + str(h), + color="darkslategray", + size=8, + ha="center", + va="bottom", + rotation=90, + ) + + plt.bar(left, height, align="center") + plt.xlabel("Identified number of IES per site") + plt.ylabel("Counts") + plt.title("IES variation in progeny pool") + ax = plt.gca() + set_ticklabels_helvetica(ax) + savefig(F1 + ".counts.pdf") + + # Plot the IES breakpoint position diversity + else: + bp_diff = Counter(bp_diff) + bp_diff_abs = Counter() + for k, v in bp_diff.items(): + bp_diff_abs[abs(k)] += v + plt.figure(1, (iopts.w, iopts.h)) + left, height = zip(*sorted(bp_diff_abs.items())) + for l, h in list(zip(left, height))[:21]: + plt.text( + l, + h + 50, + str(h), + color="darkslategray", + size=8, + ha="center", + va="bottom", + rotation=90, + ) + + plt.bar(left, height, align="center") + plt.xlabel("Progeny breakpoint relative to SB210") + plt.ylabel("Counts") + plt.xlim(-0.5, 20.5) + ax = plt.gca() + set_ticklabels_helvetica(ax) + savefig(F1 + ".breaks.pdf") + # Serialize the data to a file + fw = open("Breakpoint-offset-histogram.csv", "w") + for k, v in sorted(bp_diff.items()): + print("{0},{1}".format(k, v), file=fw) + fw.close() + + total = sum(height) + zeros = bp_diff[0] + within_20 = sum([v for i, v in bp_diff.items() if -20 <= i <= 20]) + print("No deviation: {0}".format(percentage(zeros, total)), file=sys.stderr) + print(" Within 20bp: {0}".format(percentage(within_20, total)), file=sys.stderr) + + +def insertionpairs(args): + """ + %prog insertionpairs endpoints.bed + + Pair up the candidate endpoints. A candidate exision point would contain + both left-end (LE) and right-end (RE) within a given distance. + + -----------| |------------ + -------| |-------- + ---------| |---------- + (RE) (LE) + """ + p = OptionParser(insertionpairs.__doc__) + p.add_argument( + "--extend", + default=10, + type=int, + help="Allow insertion sites to match up within distance", + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + mergedbedfile = mergeBed(bedfile, d=opts.extend, nms=True) + bed = Bed(mergedbedfile) + fw = must_open(opts.outfile, "w") + support = lambda x: -x.reads + for b in bed: + names = b.accn.split(",") + ends = [EndPoint(x) for x in names] + REs = sorted([x for x in ends if x.leftright == "RE"], key=support) + LEs = sorted([x for x in ends if x.leftright == "LE"], key=support) + if not (REs and LEs): + continue + mRE, mLE = REs[0], LEs[0] + pRE, pLE = mRE.position, mLE.position + if pLE < pRE: + b.start, b.end = pLE - 1, pRE + else: + b.start, b.end = pRE - 1, pLE + b.accn = "{0}|{1}".format(mRE.label, mLE.label) + b.score = pLE - pRE - 1 + print(b, file=fw) + + +def insertion(args): + """ + %prog insertion mic.mac.bed + + Find IES based on mapping MIC reads to MAC genome. Output a bedfile with + 'lesions' (stack of broken reads) in the MAC genome. + """ + p = OptionParser(insertion.__doc__) + p.add_argument( + "--mindepth", default=6, type=int, help="Minimum depth to call an insertion" + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bedfile,) = args + mindepth = opts.mindepth + bed = Bed(bedfile) + fw = must_open(opts.outfile, "w") + for seqid, feats in bed.sub_beds(): + left_ends = Counter([x.start for x in feats]) + right_ends = Counter([x.end for x in feats]) + selected = [] + for le, count in left_ends.items(): + if count >= mindepth: + selected.append((seqid, le, "LE-{0}".format(le), count)) + for re, count in right_ends.items(): + if count >= mindepth: + selected.append((seqid, re, "RE-{0}".format(re), count)) + selected.sort() + for seqid, pos, label, count in selected: + label = "{0}-r{1}".format(label, count) + print("\t".join((seqid, str(pos - 1), str(pos), label)), file=fw) + + +def deletion(args): + """ + %prog deletion [mac.mic.bam|mac.mic.bed] mic.gaps.bed + + Find IES based on mapping MAC reads to MIC genome. + """ + p = OptionParser(deletion.__doc__) + p.add_argument( + "--mindepth", default=3, type=int, help="Minimum depth to call a deletion" + ) + p.add_argument( + "--minspan", default=30, type=int, help="Minimum span to call a deletion" + ) + p.add_argument( + "--split", + default=False, + action="store_true", + help="Break at cigar N into separate parts", + ) + p.set_tmpdir() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bedfile, gapsbedfile = args + if bedfile.endswith(".bam"): + bamfile = bedfile + bedfile = bamfile.replace(".sorted.", ".").replace(".bam", ".bed") + if need_update(bamfile, bedfile): + cmd = "bamToBed -i {0}".format(bamfile) + if opts.split: + cmd += " -split" + cmd += " | cut -f1-4" + sh(cmd, outfile=bedfile) + + sort_tmpdir = "--tmpdir={0}".format(opts.tmpdir) + if bedfile.endswith(".sorted.bed"): + pf = bedfile.rsplit(".", 2)[0] + sortedbedfile = bedfile + else: + pf = bedfile.rsplit(".", 1)[0] + sortedbedfile = pf + ".sorted.bed" + if need_update(bedfile, sortedbedfile): + sort([bedfile, "-u", "--accn", sort_tmpdir]) + + # Find reads that contain multiple matches + ibedfile = pf + ".d.bed" + if need_update(sortedbedfile, ibedfile): + bed = Bed(sortedbedfile, sorted=False) + fw = open(ibedfile, "w") + logger.debug("Write deletions to `{0}`.".format(ibedfile)) + for accn, bb in groupby(bed, key=lambda x: x.accn): + bb = list(bb) + branges = [(x.seqid, x.start, x.end) for x in bb] + iranges = range_interleave(branges) + for seqid, start, end in iranges: + if end - start + 1 < opts.minspan: + continue + print( + "\t".join(str(x) for x in (seqid, start - 1, end, accn + "-d")), + file=fw, + ) + fw.close() + + # Uniqify the insertions and count occurrences + countbedfile = pf + ".uniq.bed" + if need_update(ibedfile, countbedfile): + bed = Bed(ibedfile) + fw = open(countbedfile, "w") + logger.debug("Write counts to `{0}`.".format(countbedfile)) + registry = Counter((x.seqid, x.start, x.end) for x in bed) + ies_id = 1 + for (seqid, start, end), count in registry.items(): + ies_name = "{0:05d}-r{1}".format(ies_id, count) + if count < opts.mindepth: + continue + print("\t".join(str(x) for x in (seqid, start - 1, end, ies_name)), file=fw) + ies_id += 1 + fw.close() + sort([countbedfile, "-i", sort_tmpdir]) + + # Remove deletions that contain some read depth + depthbedfile = pf + ".depth.bed" + if need_update((sortedbedfile, countbedfile), depthbedfile): + depth([sortedbedfile, countbedfile, "--outfile={0}".format(depthbedfile)]) + + validbedfile = pf + ".valid.bed" + if need_update(depthbedfile, validbedfile): + fw = open(validbedfile, "w") + logger.debug("Filter valid deletions to `{0}`.".format(validbedfile)) + bed = Bed(depthbedfile) + all_scores = [float(b.score) for b in bed] + lb, ub = outlier_cutoff(all_scores) + logger.debug("Bounds for depths: LB={:.2f} (ignored) UB={:.2f}".format(lb, ub)) + for b in bed: + if float(b.score) > ub: + continue + print(b, file=fw) + fw.close() + + # Remove deletions that contain sequencing gaps on its flanks + selectedbedfile = pf + ".selected.bed" + if need_update(validbedfile, selectedbedfile): + flanksbedfile = pf + ".flanks.bed" + fw = open(flanksbedfile, "w") + bed = Bed(validbedfile) + flank = 100 + logger.debug("Write deletion flanks to `{0}`.".format(flanksbedfile)) + for b in bed: + start, end = b.start, b.end + b.start, b.end = start, min(start + flank - 1, end) + print(b, file=fw) + b.start, b.end = max(start, end - flank + 1), end + print(b, file=fw) + fw.close() + + intersectidsfile = pf + ".intersect.ids" + cmd = "intersectBed -a {0} -b {1}".format(flanksbedfile, gapsbedfile) + cmd += " | cut -f4 | sort -u" + sh(cmd, outfile=intersectidsfile) + some( + [ + validbedfile, + intersectidsfile, + "-v", + "--outfile={}".format(selectedbedfile), + ] + ) + + # Find best-scoring non-overlapping set + iesbedfile = pf + ".ies.bed" + if need_update(selectedbedfile, iesbedfile): + bed = Bed(selectedbedfile) + fw = open(iesbedfile, "w") + logger.debug("Write IES to `{0}`.".format(iesbedfile)) + branges = [ + Range(x.seqid, x.start, x.end, int(x.accn.rsplit("r")[-1]), i) + for i, x in enumerate(bed) + ] + iranges, iscore = range_chain(branges) + logger.debug("Best chain score: {} ({} IES)".format(iscore, len(iranges))) + ies_id = 1 + for seqid, start, end, score, id in iranges: + ies_name = "IES-{0:05d}-r{1}".format(ies_id, score) + span = end - start + 1 + print( + "\t".join(str(x) for x in (seqid, start - 1, end, ies_name, span)), + file=fw, + ) + ies_id += 1 + fw.close() + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/jcvi.py b/jcvi/projects/jcvi.py new file mode 100644 index 00000000..e89986da --- /dev/null +++ b/jcvi/projects/jcvi.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Functions in this script produce figures in the JCVI manuscript. +""" + +import sys + +import networkx as nx + +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..assembly.geneticmap import draw_geneticmap_heatmap +from ..assembly.hic import draw_hic_heatmap +from ..assembly.kmer import draw_ks_histogram +from ..compara.pedigree import Pedigree, calculate_inbreeding +from ..compara.synteny import check_beds +from ..graphics.base import ( + cm, + load_image, + normalize_axes, + panel_labels, + plt, + savefig, + set1, + setup_theme, +) +from ..graphics.chromosome import draw_chromosomes +from ..graphics.dotplot import dotplot +from ..graphics.karyotype import Karyotype +from ..graphics.landscape import draw_heatmaps, draw_multi_depth, draw_stacks +from ..graphics.synteny import Synteny, draw_gene_legend + + +def synteny(args): + """ + %prog synteny grape.peach.anchors seqids layout blocks grape_peach.bed blocks.layout + + Plot synteny composite figure, including: + A. Synteny dotplot + B. Karyotype plot + """ + p = OptionParser(synteny.__doc__) + p.set_beds() + opts, args, iopts = p.set_image_options(args, figsize="14x7") + setup_theme(style="dark") + + if len(args) != 6: + sys.exit(not p.print_help()) + + anchorfile, seqidsfile, layoutfile, datafile, bedfile, blockslayoutfile = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + ax1_root = fig.add_axes((0, 0, 0.5, 1)) + ax1_canvas = fig.add_axes((0.05, 0.1, 0.4, 0.8)) # the dot plot + ax2_root = fig.add_axes((0.5, 0.5, 0.5, 0.5)) + ax3_root = fig.add_axes((0.5, 0, 0.5, 0.5)) + + # Panel A + logger.info("Plotting synteny dotplot") + qbed, sbed, _, _, is_self = check_beds(anchorfile, p, opts) + dotplot( + anchorfile, + qbed, + sbed, + fig, + ax1_root, + ax1_canvas, + is_self=is_self, + chrlw=0.5, + sepcolor=set1[3], + ) + + # Panel B + logger.info("Plotting karyotype plot") + Karyotype(ax2_root, seqidsfile, layoutfile) + + # Panel C + logger.info("Plotting synteny blocks") + Synteny(fig, ax3_root, datafile, bedfile, blockslayoutfile, pad=0.1, vpad=0.03) + draw_gene_legend(root, 0.69, 0.8, 0.34) + + labels = ((0.02, 0.95, "A"), (0.52, 0.95, "B"), (0.52, 0.45, "C")) + panel_labels(root, labels) + normalize_axes(root, ax1_root, ax2_root, ax3_root) + + image_name = "synteny.pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def diversity(args): + """ + %prog diversity pedigree.ped VAR?_srtd.wgs.regions.bed.gz + + Plot diversity composite figure, including: + A. Pedigree + B. Depth distribution across genomes + """ + p = OptionParser(diversity.__doc__) + _, args, iopts = p.set_image_options(args, figsize="14x7") + + if len(args) < 2: + sys.exit(not p.print_help()) + + pedfile, bedfiles = args[0], args[1:] + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + ax1_root = fig.add_axes((0, 0, 0.25, 1)) + ax2_root = fig.add_axes((0.25, 0, 0.75, 1)) + + # Panel A + logger.info("Plotting pedigree") + ped = Pedigree(pedfile) + pngfile = f"{pedfile}.png" + inb = calculate_inbreeding(ped, ploidy=4, N=10000) + + G = ped.to_graph(inb, title="Pedigree of Variety1") + A = nx.nx_agraph.to_agraph(G) + dpi = 300 + A.draw(pngfile, prog="dot", args=f"-Gdpi={dpi}") + logger.info("Pedigree graph written to `%s`", pngfile) + + # Show the image as is + ax1_root.imshow(load_image(pngfile)) + ax1_root.set_axis_off() + + # Panel B + logger.info("Plotting depth distribution across genomes") + npanels = len(bedfiles) + yinterval = 1.0 / npanels + ypos = 1 - yinterval + panel_roots, panel_axes = [], [] + for _ in range(npanels): + panel_root = fig.add_axes((0.25, ypos, 0.75, yinterval)) + panel_ax = fig.add_axes( + (0.25 + 0.1 * 0.75, ypos + 0.2 * yinterval, 0.8 * 0.75, 0.65 * yinterval) + ) + panel_roots.append(panel_root) + panel_axes.append(panel_ax) + ypos -= yinterval + + draw_multi_depth( + ax2_root, + panel_roots, + panel_axes, + bedfiles, + chrinfo_file="chrinfo.txt", + titleinfo_file="titleinfo.txt", + maxdepth=100, + logscale=False, + ) + + labels = ( + (0.02, 0.95, "A"), + (0.25 + 0.25 * 0.1, 0.95, "B"), + ) + panel_labels(root, labels) + normalize_axes(root, ax2_root) + + image_name = "diversity.pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def landscape(args): + """ + %prog landscape features.bed athaliana.sizes TAIR10_chr_all.fas Chr2 + + Plot landscape composite figure, including: + A. Example genomic features painted on Arabidopsis genome + B. Landscape of genomic features across the genome + """ + p = OptionParser(landscape.__doc__) + _, args, iopts = p.set_image_options(args, figsize="12x8") + + if len(args) != 4: + sys.exit(not p.print_help()) + + bedfile, sizesfile, fastafile, ch = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + aspect_ratio = iopts.w / iopts.h + ax1_root = fig.add_axes((0, 1 / 4, 0.4, 0.5 * aspect_ratio)) + ax2_root_extent = (0.4, 0.5, 0.6, 0.47) + ax2_root = fig.add_axes(ax2_root_extent) + ax3_root_extent = (0.41, 0, 0.6, 0.47) + ax3_root = fig.add_axes(ax3_root_extent) + + # Panel A + logger.info("Plotting example genomic features painted on Arabidopsis genome") + draw_chromosomes( + ax1_root, + bedfile, + sizesfile, + iopts=iopts, + mergedist=0, + winsize=50000, + gauge=True, + legend=True, + empty=False, + title="*Arabidopsis* genome features", + ) + + # Panel B + logger.info("Plotting landscape of genomic features across the genome") + stacks = ["Repeats", "Exons"] + heatmaps = ["Copia", "Gypsy", "Helitron", "hAT", "Exons"] + window = 250000 + shift = 50000 + draw_stacks( + fig, + ax2_root, + ax2_root_extent, + stacks, + fastafile, + window, + shift, + top=5, + ) + + # Panel C + draw_heatmaps( + fig, + ax3_root, + ax3_root_extent, + fastafile, + "Chr2", + stacks, + heatmaps, + window, + shift, + cmap=cm.viridis, + ) + + ax2_root.set_axis_off() + ax3_root.set_axis_off() + + labels = ((0.02, 0.95, "A"), (0.42, 0.95, "B"), (0.42, 0.48, "C")) + panel_labels(root, labels) + normalize_axes(root, ax1_root) + + image_name = "landscape.pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def genomebuild(args): + """ + %prog genomebuild reads.histo geneticmap.matrix hic.resolution_500000.npy hic.resolution_500000.json + + Plot genome build composite figure, including: + A. Read kmer histogram + B. Genetic map concordance + C. Hi-C contact map concordance + """ + p = OptionParser(genomebuild.__doc__) + _, args, iopts = p.set_image_options(args, figsize="21x7") + + if len(args) != 4: + sys.exit(not p.print_help()) + + reads_histo, mstmap, hic_matrix, hic_json = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + ax1_root = fig.add_axes((0, 0, 1 / 3, 1)) + ax2_root = fig.add_axes((1 / 3, 0, 1 / 3, 1)) + ax3_root = fig.add_axes((2 / 3, 0, 1 / 3, 1)) + ax1 = fig.add_axes((1 / 3 * 0.1, 0.1, 1 / 3 * 0.8, 0.8)) + ax2 = fig.add_axes((1 / 3 * 1.1, 0.1, 1 / 3 * 0.8, 0.8)) + ax3 = fig.add_axes((1 / 3 * 2.1, 0.1, 1 / 3 * 0.8, 0.8)) + + # Panel A + logger.info("Plotting read kmer histogram") + _ = draw_ks_histogram( + ax1, + reads_histo, + method="nbinom", + coverage=0, + vmin=2, + vmax=200, + species="*S. species* ‘Variety 1’", + K=21, + maxiter=100, + peaks=False, + ) + + # Panel B + logger.info("Plotting genetic map concordance") + draw_geneticmap_heatmap(ax2_root, ax2, mstmap, 1000) + + # Panel C + logger.info("Plotting Hi-C contact map concordance") + draw_hic_heatmap( + ax3_root, + ax3, + hic_matrix, + hic_json, + contig=None, + groups_file="groups", + title="*S. species* Hi-C contact map", + vmin=1, + vmax=6, + plot_breaks=True, + ) + + labels = ( + (1 / 3 * 0.1, 0.95, "A"), + (1 / 3 * 1.1, 0.95, "B"), + (1 / 3 * 2.1, 0.95, "C"), + ) + panel_labels(root, labels) + normalize_axes(root, ax1_root, ax2_root, ax3_root) + + image_name = "genomebuild.pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def main(): + + actions = ( + ("synteny", "Plot synteny composite figure"), + ("diversity", "Plot diversity composite figure"), + ("genomebuild", "Plot genome build composite figure"), + ("landscape", "Plot landscape composite figure"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/misc.py b/jcvi/projects/misc.py new file mode 100644 index 00000000..971082ff --- /dev/null +++ b/jcvi/projects/misc.py @@ -0,0 +1,777 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Functions in this script produce figures in various manuscripts. +""" + +import os.path as op +import sys + +import numpy as np +import pandas as pd + +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler + +from ..apps.base import ActionDispatcher, OptionParser, fname, logger +from ..graphics.base import ( + Polygon, + normalize_axes, + panel_labels, + plt, + savefig, + set_helvetica_axis, +) +from ..graphics.glyph import DoubleSquare, GeneGlyph, RoundRect, TextCircle, plot_cap +from ..graphics.karyotype import Karyotype +from ..graphics.synteny import Synteny, draw_gene_legend + + +def main(): + + actions = ( + # Epoch paper (Woodhouse et al., 2012 Plant Cell) + ("epoch", "show the methods used in epoch paper"), + # Cotton paper (Paterson et al., 2012 Nature) + ("cotton", "plot cotton macro- and micro-synteny (requires data)"), + # Amborella paper (Albert et al., 2013 Science) + ("amborella", "plot amborella macro- and micro-synteny (requires data)"), + # Mt4.0 paper (Tang et al., 2014 BMC Genomics) + ("mtdotplots", "plot Mt3.5 and Mt4.0 side-by-side"), + # Oropetium paper (Vanburen et al., 2015 Nature) + ("oropetium", "plot oropetium micro-synteny (requires data)"), + # Pomegranate paper (Qin et al., 2017 Plant Journal) + ("pomegranate", "plot pomegranate macro- and micro-synteny (requires data)"), + ("birch", "plot birch macro-synteny (requires data)"), + ("litchi", "plot litchi micro-synteny (requires data)"), + ("utricularia", "plot utricularia micro-synteny (requires data)"), + ( + "waterlilyGOM", + "waterlily phylogeny and related infographics (requires data)", + ), + ("grabseeds", "GRABSEEDS PCA plot"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def rgb_to_hex(r: float, g: float, b: float): + """ + Convert RGB to hex. + """ + r, g, b = int(round(r)), int(round(g)), int(round(b)) + return f"#{r:02x}{g:02x}{b:02x}" + + +def grabseeds(args): + """ + %prog FINAL_DATA_FOR_ANOVA_HERITABILITY_ANALYSIS_SEED_COLOR_SHAPE_SIZE.csv + + Plot the PCA plot from GRABSEEDS. + """ + p = OptionParser(grabseeds.__doc__) + _, args, iopts = p.set_image_options(args, figsize="8x6") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (csvfile,) = args + df = pd.read_csv(csvfile).dropna() + features = [ + x + for x in df.columns + if x.startswith("Avg") + if x not in ("AvgOfL", "AvgOfa", "AvgOfb") + ] + x = df.loc[:, features].values + x = StandardScaler().fit_transform(x) + pca = PCA(n_components=2) + principal_components = pca.fit_transform(x) + logger.info("Explained variance: %s", pca.explained_variance_ratio_) + pc1_var, pc2_var = pca.explained_variance_ratio_ + + pc_df = pd.DataFrame(data=principal_components, columns=["PC1", "PC2"]) + final_df = pd.concat([pc_df, df[features]], axis=1).dropna() + final_df["Color"] = final_df.apply( + lambda x: rgb_to_hex(x["AvgOfRed"], x["AvgOfGreen"], x["AvgOfGreen"]), axis=1 + ) + final_df["ScatterSize"] = final_df["AvgOfArea"] / 500 + + fig = plt.figure(1, (iopts.w, iopts.h)) + ax = fig.add_subplot(1, 1, 1) + ax.set_xlabel(f"Principal Component 1 ({pc1_var * 100:.0f}\%)", fontsize=15) + ax.set_ylabel(f"Principal Component 2 ({pc2_var * 100:.0f}\%)", fontsize=15) + ax.set_title("Sorghum kernels, PCA Plot", fontsize=20) + ax.scatter("PC1", "PC2", s="ScatterSize", c="Color", data=final_df) + set_helvetica_axis(ax) + + image_name = "grabseeds_pca." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def waterlilyGOM(args): + """ + %prog mcmctree.tre table.csv + + Customized figure to plot phylogeny and related infographics. + """ + from ..graphics.tree import ( + LeafInfoFile, + WGDInfoFile, + draw_tree, + parse_tree, + draw_wgd_xy, + ) + from ..graphics.table import CsvTable, draw_table + + p = OptionParser(waterlilyGOM.__doc__) + _, args, iopts = p.set_image_options(args, figsize="12x9") + + if len(args) != 2: + sys.exit(not p.print_help()) + + (datafile, csvfile) = args + outgroup = ["ginkgo"] + + logger.debug("Load tree file `%s`", datafile) + t, hpd = parse_tree(datafile) + + pf = datafile.rsplit(".", 1)[0] + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + margin, rmargin = 0.15, 0.19 # Left and right margin + leafinfo = LeafInfoFile("leafinfo.csv").cache + wgdinfo = WGDInfoFile("wgdinfo.csv").cache + groups = "Monocots,Eudicots,ANA-grade,Gymnosperms" + + draw_tree( + root, + t, + hpd=hpd, + margin=margin, + rmargin=rmargin, + supportcolor=None, + internal=False, + outgroup=outgroup, + leafinfo=leafinfo, + wgdinfo=wgdinfo, + geoscale=True, + groups=groups.split(","), + ) + + # Bottom right show legends for the WGD circles + pad = 0.02 + ypad = 0.04 + xstart = 1 - rmargin + pad + ystart = 0.2 + waterlily_wgdline = wgdinfo["waterlily"][0] + ypos = ystart - 2 * ypad + draw_wgd_xy(root, xstart, ypos, waterlily_wgdline) + root.text( + xstart + pad, + ypos, + "Nymphaealean WGD", + color=waterlily_wgdline.color, + va="center", + ) + other_wgdline = wgdinfo["banana"][0] + ypos = ystart - 3 * ypad + draw_wgd_xy(root, xstart, ypos, other_wgdline) + root.text( + xstart + pad, + ypos, + "Other known WGDs", + color=other_wgdline.color, + va="center", + ) + + # Top left draw the comparison table + csv_table = CsvTable(csvfile) + draw_table( + root, + csv_table, + extent=(0.02, 0.44, 0.55, 0.985), + stripe_color="lavender", + yinflation=iopts.w / iopts.h, + ) + + normalize_axes(root) + + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def pomegranate(args): + """ + %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout + + Build a figure that calls graphics.karyotype to illustrate the high ploidy + of WGD history of pineapple genome. The script calls both graphics.karyotype + and graphic.synteny. + """ + p = OptionParser(pomegranate.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="9x7") + + if len(args) != 5: + sys.exit(not p.print_help()) + + seqidsfile, klayout, datafile, bedfile, slayout = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + Karyotype(root, seqidsfile, klayout) + Synteny(fig, root, datafile, bedfile, slayout) + + # legend showing the orientation of the genes + draw_gene_legend(root, 0.42, 0.52, 0.48) + + labels = ((0.04, 0.96, "A"), (0.04, 0.52, "B")) + panel_labels(root, labels) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "pomegranate-karyotype" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def utricularia(args): + from ..graphics.synteny import main as synteny_main + + p = OptionParser(synteny_main.__doc__) + p.add_argument("--switch", help="Rename the seqid with two-column file") + opts, args, iopts = p.set_image_options(args, figsize="8x7") + + if len(args) != 3: + sys.exit(not p.print_help()) + + datafile, bedfile, layoutfile = args + switch = opts.switch + + pf = datafile.rsplit(".", 1)[0] + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + s = Synteny( + fig, root, datafile, bedfile, layoutfile, loc_label=False, switch=switch + ) + light = "lightslategrey" + RoundRect(root, (0.02, 0.69), 0.96, 0.24, fill=False, lw=2, ec=light) + RoundRect(root, (0.02, 0.09), 0.96, 0.48, fill=False, lw=2, ec=light) + za, zb = s.layout[1].ratio, s.layout[-1].ratio # zoom level + if za != 1: + root.text( + 0.96, + 0.89, + "{}x zoom".format(za).replace(".0x", "x"), + color=light, + ha="right", + va="center", + size=14, + ) + if zb != 1: + root.text( + 0.96, + 0.12, + "{}x zoom".format(zb).replace(".0x", "x"), + color=light, + ha="right", + va="center", + size=14, + ) + + # legend showing the orientation of the genes + draw_gene_legend(root, 0.22, 0.3, 0.64, text=True) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def join_nodes( + root, coords, a, b, x, slope=2.4, fc="lightslategray", rectangle=True, circle=True +): + # Join node a and b to make an internal node + ax, ay = coords[a] + bx, by = coords[b] + if ay < by: + ax, ay, bx, by = bx, by, ax, ay + if rectangle: + nx, ny = x, (ay + by) / 2 + root.plot((nx, ax), (ay, ay), lw=2, color=fc) + root.plot((nx, bx), (by, by), lw=2, color=fc) + root.plot((nx, nx), (ay, by), lw=2, color=fc) + else: + dx = (abs(ay - by) / slope - abs(ax - bx)) / 2 + nx = max(ax, bx) + dx + ny = by + (nx - bx) * slope + root.plot((nx, ax), (ny, ay), lw=2, color=fc) + root.plot((nx, bx), (ny, by), lw=2, color=fc) + if circle: + DoubleSquare(root, nx, ny, fc=fc) + return nx, ny + + +def branch_length(ax, start, end, text, ha="left", va="bottom", color="r"): + xs, ys = start + xe, ye = end + text = r"$\mathsf{" + text + "}$" + ax.text((xs + xe) / 2, (ys + ye) / 2, text, ha=ha, va=va, color=color) + + +def birch(args): + """ + %prog birch seqids layout + + Plot birch macro-synteny, with an embedded phylogenetic tree to the right. + """ + p = OptionParser(birch.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="8x6") + + if len(args) != 2: + sys.exit(not p.print_help()) + + seqids, layout = args + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + K = Karyotype(root, seqids, layout) + L = K.layout + + xs = 0.79 + dt = dict(rectangle=False, circle=False) + # Embed a phylogenetic tree to the right + coords = {} + coords["Amborella"] = (xs, L[0].y) + coords["Vitis"] = (xs, L[1].y) + coords["Prunus"] = (xs, L[2].y) + coords["Betula"] = (xs, L[3].y) + coords["Populus"] = (xs, L[4].y) + coords["Arabidopsis"] = (xs, L[5].y) + coords["fabids"] = join_nodes(root, coords, "Prunus", "Betula", xs, **dt) + coords["malvids"] = join_nodes(root, coords, "Populus", "Arabidopsis", xs, **dt) + coords["rosids"] = join_nodes(root, coords, "fabids", "malvids", xs, **dt) + coords["eudicots"] = join_nodes(root, coords, "rosids", "Vitis", xs, **dt) + coords["angiosperm"] = join_nodes(root, coords, "eudicots", "Amborella", xs, **dt) + + # Show branch length + branch_length(root, coords["Amborella"], coords["angiosperm"], ">160.0") + branch_length(root, coords["eudicots"], coords["angiosperm"], ">78.2", va="top") + branch_length(root, coords["Vitis"], coords["eudicots"], "138.5") + branch_length(root, coords["rosids"], coords["eudicots"], "19.8", va="top") + branch_length( + root, coords["Prunus"], coords["fabids"], "104.2", ha="right", va="top" + ) + branch_length(root, coords["Arabidopsis"], coords["malvids"], "110.2", va="top") + branch_length( + root, coords["fabids"], coords["rosids"], "19.8", ha="right", va="top" + ) + branch_length(root, coords["malvids"], coords["rosids"], "8.5", va="top") + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "birch" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def mtdotplots(args): + """ + %prog mtdotplots Mt3.5 Mt4.0 medicago.medicago.lifted.1x1.anchors + + Plot Mt3.5 and Mt4.0 side-by-side. This is essentially combined from two + graphics.dotplot() function calls as panel A and B. + """ + from ..graphics.dotplot import check_beds, dotplot + + p = OptionParser(mtdotplots.__doc__) + p.set_beds() + opts, args, iopts = p.set_image_options(args, figsize="16x8", dpi=90) + + if len(args) != 3: + sys.exit(not p.print_help()) + + a, b, ac = args + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + r1 = fig.add_axes([0, 0, 0.5, 1]) + r2 = fig.add_axes([0.5, 0, 0.5, 1]) + a1 = fig.add_axes([0.05, 0.1, 0.4, 0.8]) + a2 = fig.add_axes([0.55, 0.1, 0.4, 0.8]) + + anchorfile = op.join(a, ac) + qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) + dotplot( + anchorfile, qbed, sbed, fig, r1, a1, is_self=is_self, genomenames="Mt3.5_Mt3.5" + ) + + opts.qbed = opts.sbed = None + anchorfile = op.join(b, ac) + qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) + dotplot( + anchorfile, qbed, sbed, fig, r2, a2, is_self=is_self, genomenames="Mt4.0_Mt4.0" + ) + + root.text(0.03, 0.95, "A", ha="center", va="center", size=36) + root.text(0.53, 0.95, "B", ha="center", va="center", size=36) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "mtdotplots" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def oropetium(args): + """ + %prog oropetium mcscan.out all.bed layout switch.ids + + Build a composite figure that calls graphis.synteny. + """ + p = OptionParser(oropetium.__doc__) + p.add_argument("--extra", help="Extra features in BED format") + opts, args, iopts = p.set_image_options(args, figsize="9x6") + + if len(args) != 4: + sys.exit(not p.print_help()) + + datafile, bedfile, slayout, switch = args + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + Synteny( + fig, root, datafile, bedfile, slayout, switch=switch, extra_features=opts.extra + ) + + # legend showing the orientation of the genes + draw_gene_legend(root, 0.4, 0.57, 0.74, text=True, repeat=True) + + # On the left panel, make a species tree + fc = "lightslategrey" + + coords = {} + xs, xp = 0.16, 0.03 + coords["oropetium"] = (xs, 0.7) + coords["setaria"] = (xs, 0.6) + coords["sorghum"] = (xs, 0.5) + coords["rice"] = (xs, 0.4) + coords["brachypodium"] = (xs, 0.3) + xs -= xp + coords["Panicoideae"] = join_nodes(root, coords, "setaria", "sorghum", xs) + xs -= xp + coords["BEP"] = join_nodes(root, coords, "rice", "brachypodium", xs) + coords["PACMAD"] = join_nodes(root, coords, "oropetium", "Panicoideae", xs) + xs -= xp + coords["Poaceae"] = join_nodes(root, coords, "BEP", "PACMAD", xs) + + # Names of the internal nodes + for tag in ("BEP", "Poaceae"): + nx, ny = coords[tag] + nx, ny = nx - 0.005, ny - 0.02 + root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) + for tag in ("PACMAD",): + nx, ny = coords[tag] + nx, ny = nx - 0.005, ny + 0.02 + root.text(nx, ny, tag, rotation=90, ha="right", va="bottom", color=fc) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "oropetium" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def litchi(args): + """ + %prog litchi mcscan.out all.bed layout switch.ids + + Build a composite figure that calls graphis.synteny. + """ + p = OptionParser(litchi.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="9x6") + + if len(args) != 4: + sys.exit(not p.print_help()) + + datafile, bedfile, slayout, switch = args + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + Synteny(fig, root, datafile, bedfile, slayout, switch=switch) + + # legend showing the orientation of the genes + draw_gene_legend(root, 0.4, 0.7, 0.82) + + # On the left panel, make a species tree + fc = "lightslategrey" + + coords = {} + xs, xp = 0.16, 0.03 + coords["lychee"] = (xs, 0.37) + coords["clementine"] = (xs, 0.5) + coords["cacao"] = (xs, 0.6) + coords["strawberry"] = (xs, 0.7) + coords["grape"] = (xs, 0.8) + xs -= xp + coords["Sapindales"] = join_nodes(root, coords, "clementine", "lychee", xs) + xs -= xp + coords["Rosid-II"] = join_nodes(root, coords, "cacao", "Sapindales", xs) + xs -= xp + coords["Rosid"] = join_nodes(root, coords, "strawberry", "Rosid-II", xs) + xs -= xp + coords["crown"] = join_nodes(root, coords, "grape", "Rosid", xs, circle=False) + + # Names of the internal nodes + for tag in ("Rosid", "Rosid-II", "Sapindales"): + nx, ny = coords[tag] + nx, ny = nx - 0.01, ny - 0.02 + root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "litchi" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def amborella(args): + """ + %prog amborella seqids karyotype.layout mcscan.out all.bed synteny.layout + + Build a composite figure that calls graphics.karyotype and graphics.synteny. + """ + p = OptionParser(amborella.__doc__) + p.add_argument("--tree", help="Display trees on the bottom of the figure") + p.add_argument("--switch", help="Rename the seqid with two-column file") + opts, args, iopts = p.set_image_options(args, figsize="8x7") + + if len(args) != 5: + sys.exit(not p.print_help()) + + seqidsfile, klayout, datafile, bedfile, slayout = args + switch = opts.switch + tree = opts.tree + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + Karyotype(root, seqidsfile, klayout) + Synteny(fig, root, datafile, bedfile, slayout, switch=switch, tree=tree) + + # legend showing the orientation of the genes + draw_gene_legend(root, 0.5, 0.68, 0.5) + + # annotate the WGD events + fc = "lightslategrey" + x = 0.05 + radius = 0.012 + TextCircle(root, x, 0.86, r"$\gamma$", radius=radius) + TextCircle(root, x, 0.95, r"$\epsilon$", radius=radius) + root.plot([x, x], [0.83, 0.9], ":", color=fc, lw=2) + pts = plot_cap((x, 0.95), np.radians(range(-70, 250)), 0.02) + x, y = zip(*pts) + root.plot(x, y, ":", color=fc, lw=2) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "amborella" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def cotton(args): + """ + %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout + + Build a composite figure that calls graphics.karyotype and graphic.synteny. + """ + p = OptionParser(cotton.__doc__) + p.add_argument("--depthfile", help="Use depth info in this file") + p.add_argument("--switch", help="Rename the seqid with two-column file") + opts, args, iopts = p.set_image_options(args, figsize="8x7") + + if len(args) != 5: + sys.exit(p.print_help()) + + seqidsfile, klayout, datafile, bedfile, slayout = args + switch = opts.switch + depthfile = opts.depthfile + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + kt = Karyotype(root, seqidsfile, klayout) + Synteny(fig, root, datafile, bedfile, slayout, switch=switch) + + light = "lightslategrey" + # Show the dup depth along the cotton chromosomes + if depthfile: + ymin, ymax = 0.9, 0.95 + root.text(0.11, 0.96, "Cotton duplication level", color="gray", size=10) + root.plot([0.1, 0.95], [ymin, ymin], color="gray") + root.text(0.96, 0.9, "1x", color="gray", va="center") + root.plot([0.1, 0.95], [ymax, ymax], color="gray") + root.text(0.96, 0.95, "6x", color="gray", va="center") + + fp = open(depthfile) + track = kt.tracks[0] # Cotton + depths = [] + for row in fp: + a, b, depth = row.split() + depth = int(depth) + try: + p = track.get_coords(a) + depths.append((p, depth)) + except KeyError: + pass + + depths.sort(key=lambda x: (x[0], -x[1])) + xx, yy = zip(*depths) + yy = [ymin + 0.01 * (x - 1) for x in yy] + root.plot(xx, yy, "-", color=light) + + # legend showing the orientation of the genes + draw_gene_legend(root, 0.5, 0.68, 0.5) + + # Zoom + xpos = 0.835 + ytop = 0.9 + xmin, xmax = 0.18, 0.82 + ymin, ymax = ytop, 0.55 + lc = "k" + kwargs = dict(lw=3, color=lc, mec=lc, mfc="w", zorder=3) + root.plot((xpos, xpos), (ymax, 0.63), ":o", **kwargs) + root.plot((xpos, xmin), (ymax, ymin), ":o", **kwargs) + root.plot((xpos, xmax), (ymax, ymin), ":o", **kwargs) + RoundRect(root, (0.06, 0.17), 0.92, 0.35, fill=False, lw=2, ec=light) + + # Panels + root.text(0.05, 0.95, "a", size=20, fontweight="bold") + root.text(0.1, 0.45, "b", size=20, fontweight="bold") + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "cotton" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def plot_diagram(ax, x, y, label="S", title="syntenic", gradient=True): + """ + Part of the diagrams that are re-used. (x, y) marks the center of the + diagram. Label determines the modification to the "S" graph. + """ + trackgap = 0.06 + tracklen = 0.12 + xa, xb = x - tracklen, x + tracklen + ya, yb = y + trackgap, y - trackgap + hsps = (((60, 150), (50, 130)), ((190, 225), (200, 240)), ((330, 280), (360, 310))) + + for yy in (ya, yb): + ax.plot((xa, xb), (yy, yy), "-", color="gray", lw=2, zorder=1) + + ytip = 0.015 + mrange = 400 + m = lambda t: xa + t * 1.0 / mrange * tracklen * 2 + + for i, ((a, b), (c, d)) in enumerate(hsps): + fb = False + if label == "FB" and i == 1: + c, d = 270, 280 + fb = True + if label == "G" and i == 0: + c, d = 120, 65 + + a, b, c, d = [m(t) for t in (a, b, c, d)] + color = "g" if i == 1 else "r" + GeneGlyph(ax, a, b, ya, 2 * ytip, fc=color, gradient=gradient, zorder=10) + + if i == 1 and label in ("F", "G", "FN"): + pass + else: + if fb: + GeneGlyph( + ax, c, d, yb, 2 * ytip, fc="w", tip=0, gradient=gradient, zorder=10 + ) + else: + GeneGlyph(ax, c, d, yb, 2 * ytip, fc="r", gradient=gradient, zorder=10) + + r = Polygon( + ((a, ya - ytip), (c, yb + ytip), (d, yb + ytip), (b, ya - ytip)), + fc="r", + alpha=0.2, + ) + + if i == 1 and label not in ("S", "FB"): + pass + elif i == 0 and label == "G": + pass + else: + ax.add_patch(r) + + if label == "FN": + ax.text(x + 0.005, yb, "NNNNN", ha="center", size=7) + + title = "{0}: {1}".format(label, title) + ax.text(x, ya + 5 * ytip, title, size=8, ha="center") + + +def epoch(args): + """ + %prog epoch + + Illustrate the methods used in Maggie's epoch paper, in particular, how to + classifiy S/G/F/FB/FN for the genes. + """ + p = OptionParser(__doc__) + p.parse_args(args) + + fig = plt.figure(1, (6, 4)) + root = fig.add_axes((0, 0, 1, 1)) + + # Separators + linestyle = dict(lw=2, color="b", alpha=0.2, zorder=2) + root.plot((0, 1), (0.5, 0.5), "--", **linestyle) + for i in (1.0 / 3, 2.0 / 3): + root.plot((i, i), (0.5, 1), "--", **linestyle) + for i in (1.0 / 6, 3.0 / 6, 5.0 / 6): + root.plot((i, i), (0, 0.5), "--", **linestyle) + + # Diagrams + plot_diagram(root, 1.0 / 6, 3.0 / 4, "S", "syntenic") + plot_diagram(root, 3.0 / 6, 3.0 / 4, "F", "missing, with both flankers") + plot_diagram(root, 5.0 / 6, 3.0 / 4, "G", "missing, with one flanker") + plot_diagram(root, 2.0 / 6, 1.0 / 4, "FB", "has non-coding matches") + plot_diagram(root, 4.0 / 6, 1.0 / 4, "FN", "syntenic region has gap") + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + figname = fname() + ".pdf" + savefig(figname, dpi=300) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/napus.py b/jcvi/projects/napus.py new file mode 100644 index 00000000..8e767e6c --- /dev/null +++ b/jcvi/projects/napus.py @@ -0,0 +1,858 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Scripts for the Brassica napus genome manuscript (Chalhoub et al. Science 2014). +""" +import os.path as op +import sys + +import numpy as np + +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..formats.base import LineFile +from ..graphics.base import ( + FancyArrowPatch, + Rectangle, + adjust_spines, + mpl, + normalize_axes, + panel_labels, + plt, + savefig, +) +from ..graphics.coverage import Coverage, Sizes, XYtrack, setup_gauge_ax +from ..graphics.glyph import TextCircle +from ..graphics.karyotype import Karyotype +from ..graphics.synteny import Synteny + + +template_cov = """# y, xstart, xend, rotation, color, label, va, bed +.56, {0}, {1}, 0, darkslategray, , top, AN.bed +.48, {2}, {3}, 0, darkslategray, , top, CN.bed +# edges +e, 0, 1, AN.CN.1x1.lifted.simple +""" +template_f3a = r"""# y, xstart, xend, rotation, color, label, va, bed +.65, {0}, {1}, 0, gainsboro, \noindent\textit{{B. napus}} A$\mathsf{{_n}}$2\\(cv Darmor-\textit{{bzh}}), top, AN.bed +.55, {2}, {3}, 0, gainsboro, \textit{{B. rapa}} A$\mathsf{{_r}}$2, top, brapa.bed +.45, {4}, {5}, 0, gainsboro, \textit{{B. oleracea}} C$\mathsf{{_o}}$2, top, boleracea.bed +.35, {6}, {7}, 0, gainsboro, \noindent\textit{{B. napus}} C$\mathsf{{_n}}$2\\(cv Darmor-\textit{{bzh}}), top, CN.bed +# edges +e, 0, 1, AN.brapa.1x1.lifted.simple +e, 1, 2, brapa.boleracea.1x1.lifted.simple +e, 3, 2, CN.boleracea.1x1.lifted.simple""" + +gap = 0.03 + + +class F4ALayoutLine(object): + def __init__(self, row, delimiter=","): + args = row.rstrip().split(delimiter) + args = [x.strip() for x in args] + self.region = args[0] + self.seqid, se = self.region.split(":") + start, end = se.split("-") + self.start, self.end = int(start), int(end) + self.center = (self.start + self.end) / 2 + self.span = self.end - self.start + 1 + self.box_region = args[1] + self.y = float(args[2]) + self.i = int(args[3]) + + +class F4ALayout(LineFile): + def __init__(self, filename, delimiter=","): + super().__init__(filename) + fp = open(filename) + self.edges = [] + for row in fp: + if row[0] == "#": + continue + self.append(F4ALayoutLine(row, delimiter=delimiter)) + + +def main(): + actions = ( + ("ploidy", "plot napus macro-synteny (requires data)"), + ("expr", "plot expression values between homeologs (requires data)"), + ("cov", "plot coverage graphs between homeologs (requires data)"), + ("deletion", "plot histogram for napus deletions (requires data)"), + ("fig3", "plot Figure-3"), + ("fig4", "plot Figure-4 (not in main text)"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def calc_ratio(chrs, sizes): + chr_sizes = [[sizes[x] for x in z] for z in chrs] + chr_sum_sizes = [sum(x) for x in chr_sizes] + ratio = 0.8 / max(chr_sum_sizes) + return chr_sizes, chr_sum_sizes, ratio + + +def center_panel(chr, chr_size, ratio, gap=gap, shift=0): + # Center two panels + w = (ratio * chr_size + (len(chr) - 1) * gap) / 2 + return 0.5 - w + shift, 0.5 + w + shift + + +def make_seqids(chrs, seqidsfile="seqids"): + fw = open(seqidsfile, "w") + for chr in chrs: + print(",".join(chr), file=fw) + fw.close() + logger.debug("File `{0}` written.".format(seqidsfile)) + return seqidsfile + + +def make_layout(chrs, chr_sizes, ratio, template, klayout="layout", shift=0): + coords = [] + for chr, chr_size in zip(chrs, chr_sizes): + coords.extend(center_panel(chr, chr_size, ratio, shift=shift)) + + fw = open(klayout, "w") + print(template.format(*coords), file=fw) + fw.close() + logger.debug("File `{0}` written.".format(klayout)) + + return klayout + + +def cov(args): + """ + %prog cov chrA01 chrC01 chr.sizes data AN.CN.1x1.lifted.anchors.simple + + Plot coverage graphs between homeologs, the middle panel show the + homeologous gene pairs. Allow multiple chromosomes to multiple chromosomes. + """ + p = OptionParser(cov.__doc__) + p.add_argument( + "--order", + default="swede,kale,h165,yudal,aviso,abu,bristol,bzh", + help="The order to plot the tracks, comma-separated", + ) + p.add_argument( + "--reverse", + default=False, + action="store_true", + help="Plot the order in reverse", + ) + p.add_argument( + "--gauge_step", default=5000000, type=int, help="Step size for the base scale" + ) + p.add_argument( + "--hlsuffix", + default="regions.forhaibao", + help="Suffix for the filename to be used to highlight regions", + ) + opts, args, iopts = p.set_image_options(args, figsize="11x8") + + if len(args) != 4: + sys.exit(not p.print_help()) + + chr1, chr2, sizesfile, datadir = args + chr1 = chr1.split(",") + chr2 = chr2.split(",") + + order = opts.order + hlsuffix = opts.hlsuffix + if order: + order = order.split(",") + if opts.reverse: + order.reverse() + sizes = Sizes(sizesfile).mapping + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + chrs = (chr1, chr2) + chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) + chr_size1, chr_size2 = chr_sum_sizes + chr_sizes1, chr_sizes2 = chr_sizes + + w1_start, w1_end = center_panel(chr1, chr_size1, ratio) + w2_start, w2_end = center_panel(chr2, chr_size2, ratio) + w1s = w1_start + w2s = w2_start + + dsg = "gray" + i = 0 + for c1, s1 in zip(chr1, chr_sizes1): + w1 = ratio * s1 + plot_label = i == 0 + i += 1 + canvas1 = (w1s, 0.6, w1, 0.3) + Coverage( + fig, + root, + canvas1, + c1, + (0, s1), + datadir, + order=order, + gauge="top", + plot_label=plot_label, + gauge_step=opts.gauge_step, + palette=dsg, + cap=40, + hlsuffix=hlsuffix, + ) + w1s += w1 + gap + + i = 0 + for c2, s2 in zip(chr2, chr_sizes2): + w2 = ratio * s2 + plot_label = i == 0 + i += 1 + canvas2 = (w2s, 0.15, w2, 0.3) + Coverage( + fig, + root, + canvas2, + c2, + (0, s2), + datadir, + order=order, + gauge="bottom", + plot_label=plot_label, + gauge_step=opts.gauge_step, + palette=dsg, + cap=40, + hlsuffix=hlsuffix, + ) + w2s += w2 + gap + + # Synteny panel + seqidsfile = make_seqids(chrs) + klayout = make_layout(chrs, chr_sum_sizes, ratio, template_cov) + Karyotype(root, seqidsfile, klayout, gap=gap, generank=False, sizes=sizes) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + chr2 = "_".join(chr2) + if opts.reverse: + chr2 += ".reverse" + image_name = chr2 + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def conversion_track(order, filename, col, label, ax, color, ypos=0, asterisk=False): + ids = [] + fp = open(filename) + for row in fp: + if asterisk and row[0] != "*": + continue + if (not asterisk) and row[0] == "*": + continue + if asterisk: + row = row[1:] + atoms = row.split() + gid = atoms[col].rsplit(".", 1)[0] + gid = gid.replace("T", "G") + ids.append(gid) + + beds = [order[x][1] for x in ids if x in order] + pts = [x.start for x in beds if x.seqid == label] + if len(pts): + logger.debug("A total of {0} converted loci imported.".format(len(pts))) + else: + logger.error("Array empty. Skipped scatterplot.") + return + + ax.vlines(pts, [-1], [ypos], color=color) + ax.set_axis_off() + + +def make_affix_axis(fig, t, yoffset, height=0.001): + x, y = t.xstart, t.y + yoffset + w = t.xend - t.xstart + ax = fig.add_axes([x, y, w, height]) + return ax + + +def fig3(args): + """ + %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data + + Napus Figure 3 displays alignments between quartet chromosomes, inset + with read histograms. + """ + from jcvi.formats.bed import Bed + + p = OptionParser(fig3.__doc__) + p.add_argument( + "--gauge_step", + default=10000000, + type=int, + help="Step size for the base scale", + ) + opts, args, iopts = p.set_image_options(args, figsize="12x9") + + if len(args) != 4: + sys.exit(not p.print_help()) + + chrs, sizes, bedfile, datadir = args + gauge_step = opts.gauge_step + diverge = iopts.diverge + rr, gg = diverge + chrs = [[x] for x in chrs.split(",")] + sizes = Sizes(sizes).mapping + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) + + # Synteny panel + seqidsfile = make_seqids(chrs) + klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=0.05) + height = 0.07 + r = height / 4 + K = Karyotype( + root, + seqidsfile, + klayout, + gap=gap, + height=height, + lw=2, + generank=False, + sizes=sizes, + heightpad=r, + plot_label=False, + ) + + # Chromosome labels + for kl in K.layout: + if kl.empty: + continue + lx, ly = kl.xstart, kl.y + if lx < 0.11: + lx += 0.1 + ly += 0.06 + label = kl.label + root.text(lx - 0.015, ly, label, fontsize=15, ha="right", va="center") + + # Inset with datafiles + datafiles = ( + "chrA02.bzh.forxmgr", + "parent.A02.per10kb.forxmgr", + "parent.C2.per10kb.forxmgr", + "chrC02.bzh.forxmgr", + ) + datafiles = [op.join(datadir, x) for x in datafiles] + tracks = K.tracks + hlfile = op.join(datadir, "bzh.regions.forhaibao") + xy_axes = [] + for t, datafile in zip(tracks, datafiles): + ax = make_affix_axis(fig, t, -r, height=2 * r) + xy_axes.append(ax) + chr = t.seqids[0] + xy = XYtrack(ax, datafile, color="lightslategray") + start, end = 0, t.total + xy.interpolate(end) + xy.cap(ymax=40) + xy.import_hlfile(hlfile, chr, diverge=diverge) + xy.draw() + ax.set_xlim(start, end) + gauge_ax = make_affix_axis(fig, t, -r) + adjust_spines(gauge_ax, ["bottom"]) + setup_gauge_ax(gauge_ax, start, end, gauge_step) + + # Converted gene tracks + ax_Ar = make_affix_axis(fig, tracks[1], r, height=r / 2) + ax_Co = make_affix_axis(fig, tracks[2], r, height=r / 2) + + order = Bed(bedfile).order + for asterisk in (False, True): + conversion_track( + order, + "data/Genes.Converted.seuil.0.6.AtoC.txt", + 0, + "A02", + ax_Ar, + rr, + asterisk=asterisk, + ) + conversion_track( + order, + "data/Genes.Converted.seuil.0.6.AtoC.txt", + 1, + "C2", + ax_Co, + gg, + asterisk=asterisk, + ) + conversion_track( + order, + "data/Genes.Converted.seuil.0.6.CtoA.txt", + 0, + "A02", + ax_Ar, + gg, + ypos=1, + asterisk=asterisk, + ) + conversion_track( + order, + "data/Genes.Converted.seuil.0.6.CtoA.txt", + 1, + "C2", + ax_Co, + rr, + ypos=1, + asterisk=asterisk, + ) + + Ar, Co = xy_axes[1:3] + annotations = ( + (Ar, "Bra028920 Bra028897", "center", "1DAn2+"), + (Ar, "Bra020081 Bra020171", "right", "2DAn2+"), + (Ar, "Bra020218 Bra020286", "left", "3DAn2+"), + (Ar, "Bra008143 Bra008167", "left", "4DAn2-"), + (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"), + (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"), + (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"), + (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"), + (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"), + (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-"), + ) + + for ax, genes, ha, label in annotations: + g1, g2 = genes.split() + x1, x2 = order[g1][1].start, order[g2][1].start + if ha == "center": + x = (x1 + x2) / 2 * 0.8 + elif ha == "left": + x = x2 + else: + x = x1 + label = r"\textit{{{0}}}".format(label) + color = rr if "+" in label else gg + ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center") + + ax_Ar.set_xlim(0, tracks[1].total) + ax_Ar.set_ylim(-1, 1) + ax_Co.set_xlim(0, tracks[2].total) + ax_Co.set_ylim(-1, 1) + + # Plot coverage in resequencing lines + gstep = 5000000 + order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",") + labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"} + hlsuffix = "regions.forhaibao" + chr1, chr2 = "chrA02", "chrC02" + t1, t2 = tracks[0], tracks[-1] + s1, s2 = sizes[chr1], sizes[chr2] + + canvas1 = (t1.xstart, 0.75, t1.xend - t1.xstart, 0.2) + c = Coverage( + fig, + root, + canvas1, + chr1, + (0, s1), + datadir, + order=order, + gauge=None, + plot_chr_label=False, + gauge_step=gstep, + palette="gray", + cap=40, + hlsuffix=hlsuffix, + labels_dict=labels_dict, + diverge=diverge, + ) + yys = c.yys + x1, x2 = 0.37, 0.72 + tip = 0.02 + annotations = ( + (x1, yys[2] + 0.3 * tip, tip, tip / 2, "FLC"), + (x1, yys[3] + 0.6 * tip, tip, tip / 2, "FLC"), + (x1, yys[5] + 0.6 * tip, tip, tip / 2, "FLC"), + (x2, yys[0] + 0.9 * tip, -1.2 * tip, 0, "GSL"), + (x2, yys[4] + 0.9 * tip, -1.2 * tip, 0, "GSL"), + (x2, yys[6] + 0.9 * tip, -1.2 * tip, 0, "GSL"), + ) + + arrowprops = dict(facecolor="black", shrink=0.05, frac=0.5, width=1, headwidth=4) + for x, y, dx, dy, label in annotations: + label = r"\textit{{{0}}}".format(label) + root.annotate( + label, + xy=(x, y), + xytext=(x + dx, y + dy), + arrowprops=arrowprops, + color=rr, + fontsize=9, + ha="center", + va="center", + ) + + canvas2 = (t2.xstart, 0.05, t2.xend - t2.xstart, 0.2) + Coverage( + fig, + root, + canvas2, + chr2, + (0, s2), + datadir, + order=order, + gauge=None, + plot_chr_label=False, + gauge_step=gstep, + palette="gray", + cap=40, + hlsuffix=hlsuffix, + labels_dict=labels_dict, + diverge=diverge, + ) + + pad = 0.03 + labels = ( + (0.1, 0.67, "A"), + (t1.xstart - 3 * pad, 0.95 + pad, "B"), + (t2.xstart - 3 * pad, 0.25 + pad, "C"), + ) + panel_labels(root, labels) + normalize_axes(root) + + image_name = "napus-fig3." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def fig4(args): + """ + %prog fig4 layout data + + Napus Figure 4A displays an example deleted region for quartet chromosomes, + showing read alignments from high GL and low GL lines. + """ + p = OptionParser(fig4.__doc__) + p.add_argument( + "--gauge_step", default=200000, type=int, help="Step size for the base scale" + ) + opts, args, iopts = p.set_image_options(args, figsize="9x7") + + if len(args) != 2: + sys.exit(not p.print_help()) + + layout, datadir = args + layout = F4ALayout(layout) + + gs = opts.gauge_step + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + block, napusbed, slayout = "r28.txt", "all.bed", "r28.layout" + s = Synteny(fig, root, block, napusbed, slayout, chr_label=False) + synteny_exts = [(x.xstart, x.xend) for x in s.rr] + + h = 0.1 + order = "bzh,yudal".split(",") + labels = ( + r"\textit{B. napus} A$\mathsf{_n}$2", + r"\textit{B. rapa} A$\mathsf{_r}$2", + r"\textit{B. oleracea} C$\mathsf{_o}$2", + r"\textit{B. napus} C$\mathsf{_n}$2", + ) + for t in layout: + xstart, xend = synteny_exts[2 * t.i] + canvas = [xstart, t.y, xend - xstart, h] + root.text(xstart - h, t.y + h / 2, labels[t.i], ha="center", va="center") + ch, ab = t.box_region.split(":") + a, b = ab.split("-") + vlines = [int(x) for x in (a, b)] + Coverage( + fig, + root, + canvas, + t.seqid, + (t.start, t.end), + datadir, + order=order, + gauge="top", + plot_chr_label=False, + gauge_step=gs, + palette="gray", + cap=40, + hlsuffix="regions.forhaibao", + vlines=vlines, + ) + + # Highlight GSL biosynthesis genes + a, b = (3, "Bra029311"), (5, "Bo2g161590") + for gid in (a, b): + start, end = s.gg[gid] + xstart, ystart = start + xend, yend = end + x = (xstart + xend) / 2 + arrow = FancyArrowPatch( + posA=(x, ystart - 0.04), + posB=(x, ystart - 0.005), + arrowstyle="fancy,head_width=6,head_length=8", + lw=3, + fc="k", + ec="k", + zorder=20, + ) + root.add_patch(arrow) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + image_name = "napus-fig4." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def deletion(args): + """ + %prog deletion [deletion-genes|deletion-bases] C2-deletions boleracea.bed + + Plot histogram for napus deletions. Can plot deletion-genes or + deletion-bases. The three largest segmental deletions will be highlighted + along with a drawing of the C2 chromosome. + """ + import math + from jcvi.formats.bed import Bed + from jcvi.graphics.chromosome import HorizontalChromosome + from jcvi.graphics.base import kb_formatter + + p = OptionParser(deletion.__doc__) + opts, args, iopts = p.set_image_options(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + deletion_genes, deletions, bed = args + dg = [int(x) for x in open(deletion_genes)] + dsg, lsg = "darkslategray", "lightslategray" + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) + minval = 2 if deletion_genes == "deleted-genes" else 2048 + bins = np.logspace(math.log(minval, 10), math.log(max(dg), 10), 16) + ax.hist(dg, bins=bins, fc=lsg, alpha=0.75) + ax.set_xscale("log", basex=2) + if deletion_genes == "deleted-genes": + ax.xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter("%d")) + ax.set_xlabel("No. of deleted genes in each segment") + else: + ax.xaxis.set_major_formatter(kb_formatter) + ax.set_xlabel("No. of deleted bases in each segment") + ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter("%d")) + ax.set_ylabel("No. of segments") + ax.patch.set_alpha(0.1) + + # Draw chromosome C2 + na, nb = 0.45, 0.85 + root.text((na + nb) / 2, 0.54, "ChrC02", ha="center") + HorizontalChromosome(root, na, nb, 0.5, height=0.025, fc=lsg) + + order = Bed(bed).order + fp = open(deletions) + scale = lambda x: na + x * (nb - na) / 52886895 + for i, row in enumerate(fp): + i += 1 + num, genes = row.split() + genes = genes.split("|") + ia, a = order[genes[0]] + mi, mx = a.start, a.end + mi, mx = scale(mi), scale(mx) + root.add_patch(Rectangle((mi, 0.475), mx - mi, 0.05, fc="red", ec="red")) + if i == 1: # offset between two adjacent regions for aesthetics + mi -= 0.015 + elif i == 2: + mi += 0.015 + TextCircle(root, mi, 0.44, str(i), fc="red") + + for i, mi in zip(range(1, 4), (0.83, 0.78, 0.73)): + TextCircle(root, mi, 0.2, str(i), fc="red") + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + image_name = deletion_genes + ".pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def ploidy(args): + """ + %prog ploidy seqids layout + + Build a figure that calls graphics.karyotype to illustrate the high ploidy + of B. napus genome. + """ + p = OptionParser(ploidy.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="8x7") + + if len(args) != 2: + sys.exit(not p.print_help()) + + seqidsfile, klayout = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + Karyotype(root, seqidsfile, klayout) + + fc = "darkslategrey" + radius = 0.012 + ot = -0.05 # use this to adjust vertical position of the left panel + TextCircle(root, 0.1, 0.9 + ot, r"$\gamma$", radius=radius, fc=fc) + root.text(0.1, 0.88 + ot, r"$\times3$", ha="center", va="top", color=fc) + TextCircle(root, 0.08, 0.79 + ot, r"$\alpha$", radius=radius, fc=fc) + TextCircle(root, 0.12, 0.79 + ot, r"$\beta$", radius=radius, fc=fc) + root.text( + 0.1, 0.77 + ot, r"$\times3\times2\times2$", ha="center", va="top", color=fc + ) + root.text( + 0.1, + 0.67 + ot, + r"Brassica triplication", + ha="center", + va="top", + color=fc, + size=11, + ) + root.text( + 0.1, + 0.65 + ot, + r"$\times3\times2\times2\times3$", + ha="center", + va="top", + color=fc, + ) + root.text( + 0.1, 0.42 + ot, r"Allo-tetraploidy", ha="center", va="top", color=fc, size=11 + ) + root.text( + 0.1, + 0.4 + ot, + r"$\times3\times2\times2\times3\times2$", + ha="center", + va="top", + color=fc, + ) + + bb = dict(boxstyle="round,pad=.5", fc="w", ec="0.5", alpha=0.5) + root.text( + 0.5, + 0.2 + ot, + r"\noindent\textit{Brassica napus}\\(A$\mathsf{_n}$C$\mathsf{_n}$ genome)", + ha="center", + size=16, + color="k", + bbox=bb, + ) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "napus" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def expr(args): + """ + %prog expr block exp layout napus.bed + + Plot a composite figure showing synteny and the expression level between + homeologs in two tissues - total 4 lists of values. block file contains the + gene pairs between AN and CN. + """ + from jcvi.graphics.base import red_purple as default_cm + + p = OptionParser(expr.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="8x5") + + if len(args) != 4: + sys.exit(not p.print_help()) + + block, exp, layout, napusbed = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + s = Synteny(fig, root, block, napusbed, layout) + + # Import the expression values + # Columns are: leaf-A, leaf-C, root-A, root-C + fp = open(exp) + data = {} + for row in fp: + gid, lf, rt = row.split() + lf, rt = float(lf), float(rt) + data[gid] = (lf, rt) + + rA, rB = s.rr + gA = [x.accn for x in rA.genes] + gC = [x.accn for x in rB.genes] + + A = [data.get(x, (0, 0)) for x in gA] + C = [data.get(x, (0, 0)) for x in gC] + A = np.array(A) + C = np.array(C) + A = np.transpose(A) + C = np.transpose(C) + + d, h = 0.01, 0.1 + lsg = "lightslategrey" + coords = s.gg # Coordinates of the genes + axes = [] + for j, (y, gg) in enumerate(((0.79, gA), (0.24, gC))): + r = s.rr[j] + x = r.xstart + w = r.xend - r.xstart + ax = fig.add_axes([x, y, w, h]) + axes.append(ax) + root.add_patch( + Rectangle((x - h, y - d), w + h + d, h + 2 * d, fill=False, ec=lsg, lw=1) + ) + root.text(x - d, y + 3 * h / 4, "root", ha="right", va="center") + root.text(x - d, y + h / 4, "leaf", ha="right", va="center") + ty = y - 2 * d if y > 0.5 else y + h + 2 * d + nrows = len(gg) + for i, g in enumerate(gg): + start, end = coords[(j, g)] + sx, sy = start + ex, ey = end + assert sy == ey + sy = sy + 2 * d if sy > 0.5 else sy - 2 * d + root.plot( + ((sx + ex) / 2, x + w * (i + 0.5) / nrows), + (sy, ty), + lw=1, + ls=":", + color="k", + alpha=0.2, + ) + + axA, axC = axes + axA.pcolormesh(A, cmap=default_cm) + axC.pcolormesh(C, cmap=default_cm) + axA.set_xlim(0, len(gA)) + axC.set_xlim(0, len(gC)) + + x, y, w, h = 0.35, 0.1, 0.3, 0.05 + ax_colorbar = fig.add_axes([x, y, w, h]) + fig.colorbar(p, cax=ax_colorbar, orientation="horizontal") + root.text(x - d, y + h / 2, "RPKM", ha="right", va="center") + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + for x in (axA, axC, root): + x.set_axis_off() + + image_name = "napusf4b." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/pineapple.py b/jcvi/projects/pineapple.py new file mode 100644 index 00000000..b092a47d --- /dev/null +++ b/jcvi/projects/pineapple.py @@ -0,0 +1,411 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Scripts for the pineapple genome paper. +""" +import sys + +from ..annotation.ahrd import read_interpro +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..formats.base import DictFile, LineFile, SetFile, get_number, must_open +from ..formats.bed import Bed +from ..formats.sizes import Sizes +from ..graphics.base import Rectangle, panel_labels, plt, savefig +from ..graphics.chromosome import Chromosome +from ..graphics.glyph import TextCircle +from ..graphics.karyotype import Karyotype +from ..graphics.synteny import Synteny, draw_gene_legend + + +class RegionsLine(object): + def __init__(self, line): + args = line.split() + self.karyotype = args[0][0] + self.group = args[0][1] + self.chromosome = int(args[1]) + self.start = int(args[5]) + self.end = int(args[8]) + self.span = abs(self.start - self.end) + + +class RegionsFile(LineFile): + def __init__(self, filename): + super().__init__(filename) + fp = open(filename) + next(fp) + for row in fp: + self.append(RegionsLine(row)) + + @property + def karyotypes(self): + return sorted(set(x.karyotype for x in self)) + + def get_karyotype(self, k): + return [x for x in self if x.karyotype == k] + + +def main(): + + actions = ( + # main figures in text + ("ancestral", "karoytype evolution of pineapple (requires data)"), + ("ploidy", "plot pineapple macro-synteny (requires data)"), + # build pseudomolecule + ("agp", "make agp file"), + ("breakpoints", "make breakpoints"), + ("check", "check agreement"), + # build gene info table + ("geneinfo", "build gene info table"), + ("flanking", "extract flanking genes for given SI loci"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def flanking(args): + """ + %prog flanking SI.ids liftover.bed master.txt master-removed.txt + + Extract flanking genes for given SI loci. + """ + p = OptionParser(flanking.__doc__) + p.add_argument("-N", default=50, type=int, help="How many genes on both directions") + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + SI, liftover, master, te = args + N = opts.N + SI = SetFile(SI, column=0, delimiter=".") + liftover = Bed(liftover) + order = liftover.order + neighbors = set() + for s in SI: + si, s = order[s] + LB = max(si - N, 0) + RB = min(si + N, len(liftover)) + for j in range(LB, RB + 1): + a = liftover[j] + if a.seqid != s.seqid: + continue + neighbors.add(a.accn) + + dmain = DictFile(master, keypos=0, valuepos=None, delimiter="\t") + dte = DictFile(te, keypos=0, valuepos=None, delimiter="\t") + header = next(open(master)) + print("\t".join(("SI/Neighbor", "Gene/TE", header.strip()))) + for a in liftover: + s = a.accn + if s not in neighbors: + continue + + tag = "SI" if s in SI else "neighbor" + if s in dmain: + d = dmain[s] + print("\t".join([tag, "gene"] + d)) + elif s in dte: + d = dte[s] + print("\t".join([tag, "TE"] + d)) + + +def join_nodes_vertical(root, coords, a, b, y, lw=2): + # Join node a and b to make an internal node + ax, ay = coords[a] + bx, by = coords[b] + nx, ny = (ax + bx) / 2, y + root.plot((ax, ax), (ay, ny), "k-", lw=lw) + root.plot((bx, bx), (ay, ny), "k-", lw=lw) + root.plot((ax, bx), (ny, ny), "k-", lw=lw) + return nx, ny + + +def ancestral(args): + """ + %prog ancestral ancestral.txt assembly.fasta + + Karyotype evolution of pineapple. The figure is inspired by Amphioxus paper + Figure 3 and Tetradon paper Figure 9. + """ + p = OptionParser(ancestral.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="8x7") + + if len(args) != 2: + sys.exit(not p.print_help()) + + regionsfile, sizesfile = args + regions = RegionsFile(regionsfile) + sizes = Sizes(sizesfile).mapping + sizes = dict((k, v) for (k, v) in sizes.iteritems() if k[:2] == "LG") + maxsize = max(sizes.values()) + ratio = 0.5 / maxsize + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + from jcvi.graphics.base import set2 + + a, b, c, d, e, f, g = set2[:7] + set2 = (c, g, b, e, d, a, f) + + # Upper panel is the evolution of segments + # All segments belong to one of seven karyotypes 1 to 7 + karyotypes = regions.karyotypes + xgap = 1.0 / (1 + len(karyotypes)) + ygap = 0.05 + mgap = xgap / 4.5 + gwidth = mgap * 0.75 + tip = 0.02 + coords = {} + for i, k in enumerate(regions.karyotypes): + x = (i + 1) * xgap + y = 0.9 + root.text(x, y + tip, "Anc" + k, ha="center") + root.plot((x, x), (y, y - ygap), "k-", lw=2) + y -= 2 * ygap + coords["a"] = (x - 1.5 * mgap, y) + coords["b"] = (x - 0.5 * mgap, y) + coords["c"] = (x + 0.5 * mgap, y) + coords["d"] = (x + 1.5 * mgap, y) + coords["ab"] = join_nodes_vertical(root, coords, "a", "b", y + ygap / 2) + coords["cd"] = join_nodes_vertical(root, coords, "c", "d", y + ygap / 2) + coords["abcd"] = join_nodes_vertical(root, coords, "ab", "cd", y + ygap) + for n in "abcd": + nx, ny = coords[n] + root.text(nx, ny - tip, n, ha="center") + coords[n] = (nx, ny - ygap / 2) + + kdata = regions.get_karyotype(k) + for kd in kdata: + g = kd.group + gx, gy = coords[g] + gsize = ratio * kd.span + gy -= gsize + p = Rectangle((gx - gwidth / 2, gy), gwidth, gsize, lw=0, color=set2[i]) + root.add_patch(p) + root.text( + gx, gy + gsize / 2, kd.chromosome, ha="center", va="center", color="w" + ) + coords[g] = (gx, gy - tip) + + # Bottom panel shows the location of segments on chromosomes + # TODO: redundant code, similar to graphics.chromosome + ystart = 0.54 + chr_number = len(sizes) + xstart, xend = xgap - 2 * mgap, 1 - xgap + 2 * mgap + xinterval = (xend - xstart - gwidth) / (chr_number - 1) + chrpos = {} + for a, (chr, clen) in enumerate(sorted(sizes.items())): + chr = get_number(chr) + xx = xstart + a * xinterval + gwidth / 2 + chrpos[chr] = xx + root.text(xx, ystart + 0.01, chr, ha="center") + Chromosome(root, xx, ystart, ystart - clen * ratio, width=gwidth) + + # Start painting + for r in regions: + xx = chrpos[r.chromosome] + yystart = ystart - r.start * ratio + yyend = ystart - r.end * ratio + p = Rectangle( + (xx - gwidth / 2, yystart), + gwidth, + yyend - yystart, + color=set2[int(r.karyotype) - 1], + lw=0, + ) + root.add_patch(p) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "pineapple-karyotype" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def geneinfo(args): + """ + %prog geneinfo pineapple.20141004.bed liftover.bed pineapple.20150413.bed \ + note.txt interproscan.txt + + Build gene info table from various sources. The three beds contain + information on the original scaffolds, linkage groups, and final selected + loci (after removal of TEs and split loci). The final two text files contain + AHRD and domain data. + """ + p = OptionParser(geneinfo.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 5: + sys.exit(not p.print_help()) + + scfbed, liftoverbed, lgbed, note, ipr = args + note = DictFile(note, delimiter="\t") + scfbed = Bed(scfbed) + lgorder = Bed(lgbed).order + liftover = Bed(liftoverbed).order + header = ( + "Accession Scaffold-position LG-position " + "Description Interpro-domain Interpro-description " + "GO-term KEGG".split() + ) + ipr = read_interpro(ipr) + + fw_clean = must_open("master.txt", "w") + fw_removed = must_open("master-removed.txt", "w") + + for fw in (fw_clean, fw_removed): + print("\t".join(header), file=fw) + + for b in scfbed: + accession = b.accn + scaffold_position = b.tag + if accession in liftover: + lg_position = liftover[accession][-1].tag + else: + lg_position = "split" + fw = fw_clean if accession in lgorder else fw_removed + description = note[accession] + interpro = interpro_description = go = kegg = "" + if accession in ipr: + interpro, interpro_description, go, kegg = ipr[accession] + print( + "\t".join( + ( + accession, + scaffold_position, + lg_position, + description, + interpro, + interpro_description, + go, + kegg, + ) + ), + file=fw, + ) + fw.close() + + +def ploidy(args): + """ + %prog ploidy seqids karyotype.layout mcscan.out all.bed synteny.layout + + Build a figure that calls graphics.karyotype to illustrate the high ploidy + of WGD history of pineapple genome. The script calls both graphics.karyotype + and graphic.synteny. + """ + p = OptionParser(ploidy.__doc__) + p.add_argument("--switch", help="Rename the seqid with two-column file") + opts, args, iopts = p.set_image_options(args, figsize="9x7") + + if len(args) != 5: + sys.exit(not p.print_help()) + + seqidsfile, klayout, datafile, bedfile, slayout = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + + Karyotype(root, seqidsfile, klayout) + Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch) + + # legend showing the orientation of the genes + draw_gene_legend(root, 0.27, 0.37, 0.52) + + # annotate the WGD events + fc = "lightslategrey" + x = 0.09 + radius = 0.012 + TextCircle(root, x, 0.825, r"$\tau$", radius=radius, fc=fc) + TextCircle(root, x, 0.8, r"$\sigma$", radius=radius, fc=fc) + TextCircle(root, x, 0.72, r"$\rho$", radius=radius, fc=fc) + for ypos in (0.825, 0.8, 0.72): + root.text(0.12, ypos, r"$\times2$", color=fc, ha="center", va="center") + root.plot([x, x], [0.85, 0.775], ":", color=fc, lw=2) + root.plot([x, x], [0.75, 0.675], ":", color=fc, lw=2) + + labels = ((0.04, 0.96, "A"), (0.04, 0.54, "B")) + panel_labels(root, labels) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "pineapple-karyotype" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +scaffold = "scaffold_" + + +def check(args): + fp = open("assembly-order.txt") + next(fp) + d = {} + for row in fp: + atoms = row.split() + scaf, tag, linkage, no = atoms[:4] + d[scaf] = tag + + fp = open("chimeric-scaffolds.txt") + next(fp) + for row in fp: + old, new, tag, start, end = row.strip().split("\t") + if new not in d: + print(new, "not in sheet1") + continue + if d[new] != tag: + print("{0} => {1} in sheet1 but {2} in sheet2".format(new, d[new], tag)) + + +def agp(args): + fp = open("assembly-order.txt") + next(fp) + sizes = Sizes("SCAFFOLD-SPLIT.fasta").mapping + for row in fp: + atoms = row.split() + assert len(atoms) in (4, 5) + if len(atoms) == 4: + atoms.append("?") + scaf, tag, linkage, no, strand = atoms + strand = strand.lower() + strand = {"f": "+", "r": "-", "?": "?"}[strand] + scaf = "scaffold_" + scaf + scaf_size = sizes[scaf] + linkage = "LG{0:02d}".format(ord(linkage.lower()) - ord("a") + 1) + print("\t".join(str(x) for x in (scaf, 0, scaf_size, linkage, 1000, strand))) + + +def breakpoints(args): + fp = open("chimeric-scaffolds.txt") + next(fp) + scaffolds = set() + nbreaks = 0 + for row in fp: + atoms = row.strip().split("\t") + if len(atoms) == 3: + continue + old, new, tag, start, end = atoms + old = scaffold + old + start, end = int(start), int(end) + if start >= end: + logger.warning("%s %d >= %d", old, start, end) + start, end = end, start + print("\t".join(str(x) for x in (old, start - 1, end))) + nbreaks += 1 + scaffolds.add(old) + print( + "{0} breakpoints in total, {1} scaffolds broken".format( + nbreaks, len(scaffolds) + ), + file=sys.stderr, + ) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/str.py b/jcvi/projects/str.py new file mode 100644 index 00000000..f31e2e12 --- /dev/null +++ b/jcvi/projects/str.py @@ -0,0 +1,2271 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Related scripts for the HLI-STR (TREDPARSE) paper. +""" +import os.path as op +import os +import csv +import sys +import json +import numpy as np +import pandas as pd + +from collections import defaultdict +from itertools import product +from random import sample + +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from natsort import natsorted +from pyfaidx import Fasta + +try: + import vcf +except ImportError: + pass + +from ..apps.base import ActionDispatcher, OptionParser, cleanup, iglob, logger, mkdir +from ..apps.base import datafile, sh +from ..apps.bwa import align +from ..apps.grid import Parallel +from ..assembly.sim import eagle, wgsim +from ..formats.base import is_number, must_open +from ..formats.sam import get_minibam_bed, index +from ..graphics.base import ( + FancyArrow, + normalize_axes, + panel_labels, + plt, + savefig, + set_helvetica_axis, +) +from ..utils.cbook import percentage +from ..utils.table import tabulate +from ..variation.str import TREDsRepo, af_to_counts, read_treds + + +# Huntington risk allele +infected_thr = 40 +ref_thr = 19 +SIMULATED_HAPLOID = r"Simulated haploid $\mathit{h}$" +SIMULATED_DIPLOID = r"Simulated diploid $\mathit{20/h}$" +lsg = "lightslategray" + +# List of TRED loci excluded from plots +ignore = ("AR",) + + +class TREDPARSEvcf(object): + def __init__(self, vcffile): + samplekey = op.basename(vcffile).split(".")[0] + reader = vcf.Reader(open(vcffile, "rb")) + res = "-1/-1" + for rec in reader: + sample = rec.samples[0] + res = sample["GB"] + ci = sample["CI"] + break + print(samplekey, res, ci) + + +class TrioOrDuo: + def __init__(self, parents, child, family): + self.parents = dict((x, family[x]) for x in parents) + self.child = dict((x, family[x]) for x in child) + self.is_trio = len(self.parents) == 2 + + def __len__(self): + return len(self.parents) + len(self.child) + + def __key(self): + return tuple(sorted(self.parents.values()) + self.child.values()) + + def __hash__(self): + return hash(self.__key()) + + def __eq__(self, other): + return self.__key() == other.__key() + + def __str__(self): + return str(self.parents) + "=>" + str(self.child) + + __repr__ = __str__ + + def check_mendelian(self, df, tred, tolerance=0, x_linked=False, verbose=False): + child_key = self.child.values()[0] + c = get_alleles(df, child_key, tred) + if c is None: + return 0 + if self.is_trio: + parent_keys = self.parents.values() + p1 = get_alleles(df, parent_keys[0], tred) + p2 = get_alleles(df, parent_keys[1], tred) + if (p1 is None) or (p2 is None): + return 0 + possible_progenies = get_progenies( + p1, p2, x_linked=x_linked, tolerance=tolerance + ) + mendelian_error = not (c in possible_progenies) + if verbose: + print( + parent_keys[0], + parent_keys[1], + child_key, + p1, + p2, + c, + not mendelian_error, + ) + else: + parent_key = self.parents.values()[0] + p1 = get_alleles(df, parent_key, tred) + if p1 is None: + return 0 + _p1 = expand_alleles(p1, tolerance=tolerance) + mendelian_error = len(set(_p1) & set(c)) == 0 + if mendelian_error and x_linked: + # Do not count case where - progeny is male, parent is male + if (c[0] == c[1]) and (p1[0] == p1[1]): + mendelian_error = 0 + if verbose: + print(parent_key, child_key, p1, c, not mendelian_error) + return mendelian_error + + +def expand_alleles(p, tolerance=0): + """ + Returns expanded allele set given the tolerance. + """ + _p = set() + for x in p: + _p |= set(range(x - tolerance, x + tolerance + 1)) + return _p + + +def get_progenies(p1, p2, x_linked=False, tolerance=0): + """ + Returns possible progenies in a trio. + """ + _p1 = expand_alleles(p1, tolerance=tolerance) + _p2 = expand_alleles(p2, tolerance=tolerance) + possible_progenies = set(tuple(sorted(x)) for x in product(_p1, _p2)) + if x_linked: # Add all hemizygotes + possible_progenies |= set((x, x) for x in (set(_p1) | set(_p2))) + return possible_progenies + + +def get_alleles(df, sample, tred): + try: + s = df.ix[sample] + a = int(s[tred + ".1"]) + b = int(s[tred + ".2"]) + except: + return None + if a == -1 or b == -1: + return None + return a, b + + +def main(): + + actions = ( + # Prepare data + ("simulate", "simulate bams with varying inserts with dwgsim"), + ("mergebam", "merge sets of BAMs to make diploid"), + ("mini", "prepare mini-BAMs that contain only the STR loci"), + ("alts", "build alternative loci based on simulation data"), + # Compile results + ("batchlobstr", "run lobSTR on a list of BAMs"), + ("compilevcf", "compile vcf outputs into lists"), + # Plotting + ("evidences", "plot distribution of evidences"), + ("likelihood", "plot likelihood surface"), + ("likelihood2", "plot likelihood surface and marginals"), + ("likelihood3", "plot likelihood surface and marginals for two settings"), + ("compare", "compare callers on fake HD patients"), + ("compare2", "compare TREDPARSE and lobSTR on fake HD patients"), + ("power", "compare TREDPARSE on fake HD patients adding evidence"), + ("tredparse", "compare TREDPARSE on fake HD patients adding coverage"), + ("allelefreq", "plot the allele frequencies of some STRs"), + ("allelefreqall", "plot all 30 STR allele frequencies"), + ("depth", "plot read depths across all TREDs"), + # Diagram + ("diagram", "plot the predictive power of various evidences"), + # Extra analysis for reviews + ("mendelian", "calculate Mendelian errors based on trios and duos"), + ("mendelian2", "second iteration of Mendelian error calculation"), + ("mendelian_errors", "plot Mendelian errors calculated by mendelian"), + ("mendelian_errors2", "plot Mendelian errors calculated by mendelian2"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def mendelian_errors2(args): + """ + %prog mendelian_errors2 Trios.summary.csv + + Plot Mendelian errors as calculated by mendelian(). File + `Trios.summary.csv` looks like: + + Name,Motif,Inheritance,N_Correct,N_Error,N_missing,ErrorRate [N_Error / (N_Correct + N_Error))] + DM1,CTG,AD,790,12,0,1.5% + DM2,CCTG,AD,757,45,0,5.6% + DRPLA,CAG,AD,791,11,0,1.4% + """ + p = OptionParser(mendelian_errors2.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="7x7", format="png") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (csvfile,) = args + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + ymin = -0.2 + df = pd.read_csv(csvfile) + data = [] + for i, d in df.iterrows(): + tred = d["Name"] + motif = d["Motif"] + if tred in ignore: + logger.debug("Ignore {}".format(d["TRED"])) + continue + + if len(motif) > 6: + if "/" in motif: # CTG/CAG + motif = motif.split("/")[0] + else: + motif = motif[:6] + ".." + xtred = "{} {}".format(tred, motif) + accuracy = d[-1] + data.append((xtred, accuracy)) + + key = lambda x: float(x.rstrip("%")) + data.sort(key=lambda x: key(x[-1])) + print(data) + treds, accuracies = zip(*data) + ntreds = len(treds) + ticks = range(ntreds) + accuracies = [key(x) for x in accuracies] + + for tick, accuracy in zip(ticks, accuracies): + ax.plot([tick, tick], [ymin, accuracy], "-", lw=2, color="lightslategray") + + (trios,) = ax.plot(accuracies, "o", mfc="w", mec="b") + ax.set_title("Mendelian errors based on STR calls in trios in HLI samples") + ntrios = "Mendelian errors in 802 trios" + ax.legend([trios], [ntrios], loc="best") + + ax.set_xticks(ticks) + ax.set_xticklabels(treds, rotation=45, ha="right", size=8) + ax.set_yticklabels([int(x) for x in ax.get_yticks()], family="Helvetica") + ax.set_ylabel(r"Mendelian errors (\%)") + ax.set_ylim(ymin, 100) + + normalize_axes(root) + + image_name = "mendelian_errors2." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def mendelian2(args): + """ + %prog mendelian2 + XC_kinship_TRIO_annotationed_age_sex_PaternalMaternalAgeWhenChildWasBorn.txt + hli.20170805.tsv + + Second iteration of Mendelian error calculation. This includes all the read + counts and gender information to correct error estimate of X-linked loci. + """ + p = OptionParser(mendelian2.__doc__) + p.add_argument( + "--treds", default=None, help="Extract specific treds, use comma to separate" + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + triofile, hlitsv = args + repo = TREDsRepo() + treds = opts.treds.split(",") if opts.treds else repo.names + triodata = pd.read_csv(triofile, sep="\t") + samplekey = lambda x: x.split("_")[1] + trios = [] + for i, row in triodata.iterrows(): + proband = row["proband"] + parents = row["parents"] + proband_sex = row["proband_sex"] + parents_sex = row["parent1_sex,parent2_sex"] + proband = samplekey(proband) + p1, p2 = parents.split(",") + p1, p2 = samplekey(p1), samplekey(p2) + p1_sex, p2_sex = parents_sex.split(",") + if p1_sex == "Male": + p1, p2 = p2, p1 + p1_sex, p2_sex = p2_sex, p1_sex + trios.append((proband, proband_sex, p1, p1_sex, p2, p2_sex)) + + header = "{0}_ID {0}_Sex {0}_Calls" + header += " {0}_Full {0}_Partial {0}_Repeat {0}_Paired" + tredsdata = pd.read_csv(hlitsv, sep="\t", low_memory=False) + tsvfiles = [] + summary = open("Trios.summary.csv", "w") + summary_header = ( + "Name,Motif,Inheritance,N_Correct,N_Error,N_missing," + "ErrorRate [N_Error / (N_Correct + N_Error))]" + ) + print(summary_header, file=summary) + print(summary_header) + for tred in treds: + if tred in ("FXS", "AR"): + continue + tr = repo[tred] + tsvfile = "{}.details.tsv".format(tred) + fw = open(tsvfile, "w") + td = {} + for _, row in tredsdata.iterrows(): + s = str(row["SampleKey"]) + inferredGender = row["inferredGender"] + try: + calls = row[tred + ".calls"] + fdp = int(row[tred + ".FDP"]) + pdp = int(row[tred + ".PDP"]) + rdp = int(row[tred + ".RDP"]) + pedp = int(row[tred + ".PEDP"]) + td[s] = [str(x) for x in (inferredGender, calls, fdp, pdp, rdp, pedp)] + except ValueError: + logger.error("Invalid row: {}".format(row)) + continue + + h = " ".join((header.format("P1"), header.format("P2"), header.format("Kid"))) + print("\t".join(["MendelianError"] + h.split()), file=fw) + tredcall = lambda x: td.get(x, ["", "-1|-1", "", "", "", ""])[:] + counts = defaultdict(int) + is_xlinked = repo[tred].is_xlinked + shorten = lambda x: str(int(x[-4:])) # Simplify SampleKey + for proband, proband_sex, p1, p1_sex, p2, p2_sex in trios: + tp1 = tredcall(p1) + tp2 = tredcall(p2) + tpp = tredcall(proband) + m = mendelian_check(tp1, tp2, tpp, is_xlinked=is_xlinked) + counts[m] += 1 + if is_xlinked: + for p, p_sex in ((tp1, p1_sex), (tp2, p2_sex), (tpp, proband_sex)): + if p[1].startswith("-"): + p[1] = "n.a." + cells = [shorten(p1), p1_sex] + tp1[1:] + cells += [shorten(p2), p2_sex] + tp2[1:] + cells += [shorten(proband), proband_sex] + tpp[1:] + print("\t".join([m] + cells), file=fw) + fw.close() + tsvfiles.append(tsvfile) + + error_rate = counts["Error"] * 100.0 / (counts["Correct"] + counts["Error"]) + line = ",".join( + str(x) + for x in ( + tred, + tr.motif, + tr.inheritance, + counts["Correct"], + counts["Error"], + counts["Missing"], + "{:.1f}%".format(error_rate), + ) + ) + print(line, file=summary) + print(line) + + # Combine into a master spreadsheet + import xlwt + + wb = xlwt.Workbook() + converter = lambda x: int(x) if is_number(x, cast=int) else x + header = xlwt.easyxf("font: bold on, name Helvetica; align: horiz center") + hc = "font: name Helvetica; align: horiz center;" + horiz_center = xlwt.Style.easyxf(hc) + correct = xlwt.Style.easyxf(hc + "pattern: pattern solid, fore_colour light_green;") + error = xlwt.Style.easyxf(hc + "pattern: pattern solid, fore_colour rose;") + missing = xlwt.Style.easyxf( + hc + "pattern: pattern solid, fore_colour light_yellow;" + ) + for tsvfile in tsvfiles: + sheet = op.basename(tsvfile).split(".", 1)[0] + ws = wb.add_sheet(sheet) + fp = open(tsvfile, "rb") + reader = csv.reader(fp, delimiter="\t") + for r, row in enumerate(reader): + style = header if r == 0 else horiz_center + for c, col in enumerate(row): + if c == 0 and r: + style = {"Correct": correct, "Error": error, "Missing": missing}[ + col + ] + ws.write(r, c, converter(col), style) + ws.set_panes_frozen(True) + ws.set_horz_split_pos(1) + + wb.save("Trios.xls") + summary.close() + + +def mendelian_check(tp1, tp2, tpp, is_xlinked=False): + """ + Compare TRED calls for Parent1, Parent2 and Proband. + """ + call_to_ints = lambda x: tuple(int(_) for _ in x.split("|") if _ != ".") + tp1_sex, tp1_call = tp1[:2] + tp2_sex, tp2_call = tp2[:2] + tpp_sex, tpp_call = tpp[:2] + # tp1_evidence = sum(int(x) for x in tp1[2:]) + # tp2_evidence = sum(int(x) for x in tp2[2:]) + # tpp_evidence = sum(int(x) for x in tpp[2:]) + tp1_call = call_to_ints(tp1_call) + tp2_call = call_to_ints(tp2_call) + tpp_call = call_to_ints(tpp_call) + possible_progenies = set(tuple(sorted(x)) for x in product(tp1_call, tp2_call)) + if is_xlinked and tpp_sex == "Male": + possible_progenies = set(tuple((x,)) for x in tp1_call) + if -1 in tp1_call or -1 in tp2_call or -1 in tpp_call: + tag = "Missing" + else: + tag = "Correct" if tpp_call in possible_progenies else "Error" + return tag + + +def in_region(rname, rstart, target_chr, target_start, target_end): + """ + Quick check if a point is within the target region. + """ + return (rname == target_chr) and (target_start <= rstart <= target_end) + + +def alts(args): + """ + %prog alts HD + + Build alternative loci based on simulation data. + """ + import pysam + from more_itertools import pairwise + from jcvi.utils.grouper import Grouper + + p = OptionParser(alts.__doc__) + p.set_outfile(outfile="TREDs.alts.csv") + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + treds = args + repo = TREDsRepo() + if "all" in treds: + treds = repo.names + + pad_left, pad_right = 1000, 10000 + READLEN = 150 + fw = must_open(opts.outfile, "w") + print("TRED,alts,alts.hg19", file=fw) # Header + for tred in treds: + ref_regions = [] + + # Simulate a depth 1000 BAM with 300 repeats + for ref in ("hg38", "hg19"): + + # This is the region that involves the TRED locus + repo = TREDsRepo(ref=ref) + t = repo[tred] + chr, start, end = t.chr, t.repeat_start, t.repeat_end + start -= pad_left + end += pad_right + + tred_ref = "{}_{}".format(tred, ref) + if not op.isdir(tred_ref): + simulate( + [ + tred_ref, + "300", + "300", + "--depth=1000", + "--ref={}".format(ref), + "--tred={}".format(tred), + ] + ) + bamfile = op.join(tred_ref, "300.bam") + + # Parse the BAM file, retrieve all regions + bamfile = pysam.AlignmentFile(bamfile, "rb") + nreads = altreads = 0 + alt_points = set() + for read in bamfile.fetch(): + fname, fstart = ( + bamfile.getrname(read.reference_id), + read.reference_start, + ) + rname, rstart = ( + bamfile.getrname(read.next_reference_id), + read.next_reference_start, + ) + f_in_region = in_region(fname, fstart, chr, start, end) + r_in_region = in_region(rname, rstart, chr, start, end) + if (not f_in_region) and r_in_region: + alt_points.add((fname, fstart)) + altreads += 1 + if (not r_in_region) and f_in_region: + alt_points.add((rname, rstart)) + altreads += 1 + nreads += 1 + + logger.debug( + "A total of {} reads ({} alts) processed".format(nreads, altreads) + ) + alt_points = natsorted(alt_points) + + # Chain these points together into regions + g = Grouper() + for a in alt_points: + g.join(a) + for a, b in pairwise(alt_points): + achr, apos = a + bchr, bpos = b + if achr != bchr: + continue + if (bpos - apos) > READLEN: + continue + g.join(a, b) + + # All regions that contain ALT + alt_sum = 0 + regions = [] + for c in g: + chr_min, pos_min = min(c) + chr_max, pos_max = max(c) + assert chr_min, chr_max + pos_min -= READLEN + pos_max += READLEN + regions.append((chr_min, pos_min, pos_max)) + alt_sum += pos_max - pos_min + + regions = "|".join( + [ + "{}:{}-{}".format(c, start, end) + for c, start, end in natsorted(regions) + ] + ) + ref_regions.append(regions) + + line = ",".join([tred] + ref_regions) + print(line, file=sys.stderr) + print(line, file=fw) + logger.debug("Alternative region sum: {} bp".format(alt_sum)) + + fw.close() + + +def depth(args): + """ + %prog depth DP.tsv + + Plot read depths across all TREDs. + """ + import seaborn as sns + + p = OptionParser(depth.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="14x14") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (tsvfile,) = args + fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots( + ncols=2, nrows=2, figsize=(iopts.w, iopts.h) + ) + plt.tight_layout(pad=6) + + data = pd.read_csv(tsvfile, sep="\t", low_memory=False) + + ids, treds = read_treds() + for dp, ax, title in zip( + ("FDP", "PDP", "RDP", "PEDP"), + (ax1, ax2, ax3, ax4), + ("Spanning reads", "Partial reads", "Repeat-only reads", "Paired-end reads"), + ): + logger.debug("Build {}".format(title)) + # Construct related data structure + xd = [] # (tred, dp) + mdp = [] # (tred, median_dp) + for tred, motif in zip(treds["abbreviation"], treds["motif"]): + if tred in ignore: + logger.debug("Ignore {}".format(tred)) + continue + if len(motif) > 4: + if "/" in motif: # CTG/CAG + motif = motif.split("/")[0] + else: + motif = motif[:4] + ".." + xtred = "{} {}".format(tred, motif) + md = [x for x in data[tred + "." + dp] if x >= 0] + subsample = 10000 if dp == "RDP" else 1000 + md = sample(md, subsample) + pmd = [x for x in md if x > 0] + median = np.median(pmd) if pmd else 0 + mdp.append((xtred, median)) + for d in md: + xd.append((xtred, d)) + + # Determine order + mdp.sort(key=lambda x: x[1]) + order, mdp = zip(*mdp) + + # OK, now plot + xt, xd = zip(*xd) + sns.boxplot(xt, xd, ax=ax, order=order, fliersize=2) + xticklabels = ax.get_xticklabels() + ax.set_xticklabels(xticklabels, rotation=45, ha="right") + ax.set_title("Number of {} per locus".format(title), size=18) + ylim = 30 if dp == "RDP" else 100 + ax.set_ylim(0, ylim) + + yticklabels = [int(x) for x in ax.get_yticks()] + ax.set_yticklabels(yticklabels, family="Helvetica", size=14) + + root = fig.add_axes([0, 0, 1, 1]) + pad = 0.04 + panel_labels( + root, + ( + (pad, 1 - pad, "A"), + (1 / 2.0 + pad / 2, 1 - pad, "B"), + (pad, 0.5 - pad / 2, "C"), + (1 / 2.0 + pad / 2, 0.5 - pad / 2, "D"), + ), + ) + normalize_axes(root) + + image_name = "depth." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def mendelian_errors(args): + """ + %prog mendelian_errors STR-Mendelian-errors.csv + + Plot Mendelian errors as calculated by mendelian(). File + `STR-Mendelian-errors.csv` looks like: + + ,Duos - Mendelian errors,Trios - Mendelian errors + SCA36,1.40%,0.60% + ULD,0.30%,1.50% + BPES,0.00%,1.80% + + One TRED disease per line, followed by duo errors and trio errors. + """ + p = OptionParser(mendelian_errors.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="6x6") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (csvfile,) = args + fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + ymin = -0.2 + df = pd.read_csv(csvfile) + data = [] + for i, d in df.iterrows(): + if d["TRED"].split()[0] in ignore: + logger.debug("Ignore {}".format(d["TRED"])) + continue + data.append(d) + treds, duos, trios = zip(*data) + ntreds = len(treds) + ticks = range(ntreds) + treds = [x.split()[0] for x in treds] + duos = [float(x.rstrip("%")) for x in duos] + trios = [float(x.rstrip("%")) for x in trios] + + for tick, duo, trio in zip(ticks, duos, trios): + m = max(duo, trio) + ax.plot([tick, tick], [ymin, m], "-", lw=2, color="lightslategray") + + (duos,) = ax.plot(duos, "o", mfc="w", mec="g") + (trios,) = ax.plot(trios, "o", mfc="w", mec="b") + ax.set_title("Mendelian errors based on trios and duos in HLI samples") + nduos = "Mendelian errors in 362 duos" + ntrios = "Mendelian errors in 339 trios" + ax.legend([trios, duos], [ntrios, nduos], loc="best") + + ax.set_xticks(ticks) + ax.set_xticklabels(treds, rotation=45, ha="right", size=8) + yticklabels = [int(x) for x in ax.get_yticks()] + ax.set_yticklabels(yticklabels, family="Helvetica") + ax.set_ylabel(r"Mendelian errors (\%)") + ax.set_ylim(ymin, 20) + + normalize_axes(root) + + image_name = "mendelian_errors." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def extract_trios(family): + """ + Identify all trios/duos inside a family, where a family contains dictionary + of relationship: individual, for example: + { + "ChildSelf": "176531498", + "DzTwin": "176531497", + "Parent": "176449143" + } + """ + self_key = ["ChildSelf"] + keys = family.keys() + spouse_key = [x for x in keys if ("spouse" in x.lower())] + assert len(spouse_key) <= 1 + parent_keys = [ + x for x in keys if ("parent" in x.lower()) and ("grand" not in x.lower()) + ] + sib_keys = [ + x for x in keys if ("sibling" in x.lower()) or ("twin" in x.lower()) + ] + self_key + child_keys = [ + x + for x in keys + if ("child" in x.lower()) + and ("grand" not in x.lower()) + and ("self" not in x.lower()) + ] + + for sk in sib_keys: + yield TrioOrDuo(parent_keys, [sk], family) + for ck in child_keys: + yield TrioOrDuo(self_key + spouse_key, [ck], family) + + +def read_tred_tsv(tsvfile): + """ + Read the TRED table into a dataframe. + """ + df = pd.read_csv(tsvfile, sep="\t", index_col=0, dtype={"SampleKey": str}) + return df + + +def mendelian(args): + """ + %prog mendelian trios_candidate.json hli.20170424.tred.tsv + + Calculate Mendelian errors based on trios and duos. + """ + p = OptionParser(mendelian.__doc__) + p.add_argument("--tolerance", default=0, type=int, help="Tolernace for differences") + p.set_verbose() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + triosjson, tredtsv = args + verbose = opts.verbose + tolerance = opts.tolerance + + js = json.load(open(triosjson)) + allterms = set() + duos = set() + trios = set() + for v in js: + allterms |= set(v.keys()) + for trio_or_duo in extract_trios(v): + assert len(trio_or_duo) in (2, 3) + if len(trio_or_duo) == 2: + duos.add(trio_or_duo) + else: + trios.add(trio_or_duo) + # print "\n".join(allterms) + print("A total of {} families imported".format(len(js))) + + # Read in all data + df = read_tred_tsv(tredtsv) + + ids, treds = read_treds() + table = {} + for tred, inheritance in zip(treds["abbreviation"], treds["inheritance"]): + x_linked = inheritance[0] == "X" # X-linked + name = tred + if x_linked: + name += " (X-linked)" + print("[TRED] {}".format(name)) + + n_total = len(duos) + n_error = 0 + for duo in duos: + n_error += duo.check_mendelian( + df, tred, tolerance=tolerance, x_linked=x_linked, verbose=verbose + ) + tag = "Duos - Mendelian errors" + print("{}: {}".format(tag, percentage(n_error, n_total))) + duo_error = percentage(n_error, n_total, mode=2) + table[(name, tag)] = "{0:.1f}%".format(duo_error) + + n_total = len(trios) + n_error = 0 + for trio in trios: + n_error += trio.check_mendelian( + df, tred, tolerance=tolerance, x_linked=x_linked, verbose=verbose + ) + tag = "Trios - Mendelian errors" + print("{}: {}".format(tag, percentage(n_error, n_total))) + trio_error = percentage(n_error, n_total, mode=2) + table[(name, tag)] = "{0:.1f}%".format(trio_error) + + # Summarize + print(tabulate(table)) + + +def make_STR_bed(filename="STR.bed", pad=0, treds=None): + tredsfile = datafile("TREDs.meta.csv") + tf = pd.read_csv(tredsfile) + + tds = list(tf["abbreviation"]) + regions = list(tf["repeat_location"]) + fw = must_open(filename, "w") + extract_Y = False + for td, region in zip(tds, regions): + if treds and (td not in treds): + continue + c, startend = region.split(":") + extract_Y = extract_Y or (c == "chrY") + start, end = startend.split("-") + start, end = int(start), int(end) + print("\t".join(str(x) for x in (c, start - pad, end + pad, td)), file=fw) + + if not extract_Y: + return filename + + UNIQY = datafile("chrY.hg38.unique_ccn.gc") + fp = open(UNIQY) + nregions = 0 + for i, row in enumerate(fp): + # Some regions still have mapped reads, exclude a few + if i in (1, 4, 6, 7, 10, 11, 13, 16, 18, 19): + continue + if nregions >= 5: + break + c, start, end, gc = row.split() + start, end = int(start), int(end) + print( + "\t".join( + str(x) + for x in ( + c, + start - pad, + end + pad, + "chrY.unique_ccn.{}".format(nregions), + ) + ), + file=fw, + ) + nregions += 1 + + fw.close() + return filename + + +def mini(args): + """ + %prog mini bamfile minibamfile + + Prepare mini-BAMs that contain only the STR loci. + """ + p = OptionParser(mini.__doc__) + p.add_argument( + "--pad", default=20000, type=int, help="Add padding to the STR reigons" + ) + p.add_argument( + "--treds", default=None, help="Extract specific treds, use comma to separate" + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bamfile, minibam = args + treds = opts.treds.split(",") if opts.treds else None + pad = opts.pad + bedfile = make_STR_bed(pad=pad, treds=treds) + + get_minibam_bed(bamfile, bedfile, minibam) + logger.debug("Mini-BAM written to `{}`".format(minibam)) + + +def parse_log(logfile): + fp = open(logfile) + likelihood = {} + for row in fp: + if row.startswith("DEBUG:IntegratedCaller:***"): + atoms = row.split() + i = int(atoms[1].strip("(,")) + j = int(atoms[2].strip(")")) + lnL = float(atoms[-1]) + likelihood[(i, j)] = lnL + if row.startswith("DEBUG:IntegratedCaller:CI(h1)"): + CI_h1 = [int(x.strip()) for x in row.split("=")[1].split("-")] + if row.startswith("DEBUG:IntegratedCaller:CI(h2)"): + CI_h2 = [int(x.strip()) for x in row.split("=")[1].split("-")] + if row.startswith("DEBUG:IntegratedCaller:ML estimate:"): + MLE = row.split(":")[3].split("=")[1].split()[:2] + MLE = [int(x.strip("[],")) for x in MLE] + + return likelihood, CI_h1, CI_h2, MLE + + +def likelihood(args): + """ + %prog likelihood + + Plot likelihood surface. Look for two files in the current folder: + - 100_100.log, haploid model + - 100_20.log, diploid model + """ + p = OptionParser(likelihood.__doc__) + opts, args, iopts = p.set_image_options( + args, figsize="10x5", style="white", cmap="coolwarm" + ) + + if len(args) != 0: + sys.exit(not p.print_help()) + + fig, (ax1, ax2) = plt.subplots(ncols=2, nrows=1, figsize=(iopts.w, iopts.h)) + plt.tight_layout(pad=4) + + # Haploid model + LL, CI_h1, CI_h2, MLE = parse_log("100_100.log") + data = [] + for k, v in LL.items(): + data.append((k[0], v)) + data.sort() + x, y = zip(*data) + x = np.array(x) + (curve,) = ax1.plot(x, y, "-", color=lsg, lw=2) + ax1.set_title("Simulated haploid ($h^{truth}=100$)") + + h_hat, max_LL = max(data, key=lambda x: x[-1]) + _, min_LL = min(data, key=lambda x: x[-1]) + ymin, ymax = ax1.get_ylim() + ax1.set_ylim([ymin, ymax + 30]) + + LL_label = "log(Likelihood)" + ax1.plot([h_hat, h_hat], [ymin, max_LL], ":", color=lsg, lw=2) + ax1.text(h_hat, max_LL + 10, r"$\hat{h}=93$", color=lsg) + ax1.set_xlabel(r"$h$") + ax1.set_ylabel(LL_label) + + a, b = CI_h1 + ci = ax1.fill_between( + x, [ymin] * len(x), y, where=(x >= a) & (x <= b), color=lsg, alpha=0.5 + ) + ax1.legend([curve, ci], ["Likelihood curve", r"95$\%$ CI"], loc="best") + + # Diploid model + LL, CI_h1, CI_h2, MLE = parse_log("100_20.log") + _, min_LL = min(data, key=lambda x: x[-1]) + data = np.ones((301, 301)) * min_LL + for k, v in LL.items(): + a, b = k + data[a, b] = v + data[b, a] = v + + data = mask_upper_triangle(data) + ax_imshow(ax2, data, opts.cmap, LL_label, 20, 104) + + root = fig.add_axes([0, 0, 1, 1]) + pad = 0.04 + panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2.0, 1 - pad, "B"))) + normalize_axes(root) + + image_name = "likelihood." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def mask_upper_triangle(data): + mask = np.zeros_like(data) + mask[np.triu_indices_from(mask)] = True + data = np.ma.array(data, mask=mask) + return data + + +def ax_plot(ax, P_h, h_hat, CI_h, xlabel, ylabel, ticks=True): + max_P = max(P_h.values()) + a, b = CI_h + + ax.plot([h_hat, h_hat], [0, max_P], ":", color=lsg, lw=2) + ax.set_xlabel(r"$%s$" % xlabel) + ax.set_ylabel(ylabel) + + data = [] + for k, v in sorted(P_h.items()): + data.append((int(k), v)) + data.sort() + x, y = zip(*data) + x = np.array(x) + ax.plot(x, y, "-", color=lsg, lw=2) + title = "Marginal distribution for $%s$" % xlabel + ax.set_title(title) + if not ticks: + ax.set_yticks([]) + + if a == b: + ax.plot([h_hat, h_hat], [0, max_P], "-", color=lsg, lw=2) + else: + ax.fill_between( + x, [0] * len(x), y, where=(x >= a) & (x <= b), color=lsg, alpha=0.5 + ) + ax.set_xlim(0, 300) + + ymin, ymax = ax.get_ylim() + if h_hat < 150: + ax.text( + h_hat + 20, + ymax * 4.0 / 5, + r"$\hat{%s}=%d$" % (xlabel, h_hat), + color=lsg, + va="center", + ) + ax.text( + h_hat + 20, + ymax * 3.0 / 5, + r"95$\%$ CI" + r"$=%s-%s$" % (a, b), + color=lsg, + va="center", + ) + else: + ax.text( + h_hat - 30, + ymax * 4.0 / 5, + r"$\hat{%s}=%d$" % (xlabel, h_hat), + color=lsg, + ha="right", + va="center", + ) + ax.text( + h_hat - 30, + ymax * 3.0 / 5, + r"95$\%$ CI" + r"$=%s-%s$" % (a, b), + color=lsg, + ha="right", + va="center", + ) + + ymin, ymax = ax.get_ylim() + ax.set_ylim(ymin, ymax * 1.05) + + +def ax_imshow( + ax, + P_h1h2, + cmap, + label, + h1_hat, + h2_hat, + h1_truth, + h2_truth, + r=4, + draw_circle=True, + ticks=True, +): + im = ax.imshow(P_h1h2, cmap=cmap, origin="lower") + + from mpl_toolkits.axes_grid1 import make_axes_locatable + + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="5%", pad=0.05) + cb = plt.colorbar(im, cax) + cb.set_label(label) + if not ticks: + cb.set_ticks([]) + + if draw_circle: + circle = plt.Circle((h1_hat, h2_hat), r, ec="w", fill=False) + ax.add_artist(circle) + + annotation = r"$\hat{h_1}=%d, \hat{h_2}=%d$" % (h1_hat, h2_hat) + ax.text(200, 100, annotation, color=lsg, ha="center", va="center") + + ax.set_xlabel(r"$h_1$") + ax.set_ylabel(r"$h_2$") + title = "Simulated diploid ($h_{1}^{truth}=%d, h_{2}^{truth}=%d$)" % ( + h1_truth, + h2_truth, + ) + ax.set_title(title) + + +def likelihood2(args): + """ + %prog likelihood2 100_20.json + + Plot the likelihood surface and marginal distributions. + """ + from matplotlib import gridspec + + p = OptionParser(likelihood2.__doc__) + opts, args, iopts = p.set_image_options( + args, figsize="10x5", style="white", cmap="coolwarm" + ) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (jsonfile,) = args + fig = plt.figure(figsize=(iopts.w, iopts.h)) + gs = gridspec.GridSpec(2, 2) + ax1 = fig.add_subplot(gs[:, 0]) + ax2 = fig.add_subplot(gs[0, 1]) + ax3 = fig.add_subplot(gs[1, 1]) + plt.tight_layout(pad=3) + pf = plot_panel(jsonfile, ax1, ax2, ax3, opts.cmap) + + root = fig.add_axes([0, 0, 1, 1]) + normalize_axes(root) + + image_name = "likelihood2.{}.".format(pf) + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def likelihood3(args): + """ + %prog likelihood3 140_20.json 140_70.json + + Plot the likelihood surface and marginal distributions for two settings. + """ + from matplotlib import gridspec + + p = OptionParser(likelihood3.__doc__) + opts, args, iopts = p.set_image_options( + args, figsize="10x10", style="white", cmap="coolwarm" + ) + if len(args) != 2: + sys.exit(not p.print_help()) + + jsonfile1, jsonfile2 = args + fig = plt.figure(figsize=(iopts.w, iopts.h)) + gs = gridspec.GridSpec(9, 2) + ax1 = fig.add_subplot(gs[:4, 0]) + ax2 = fig.add_subplot(gs[:2, 1]) + ax3 = fig.add_subplot(gs[2:4, 1]) + ax4 = fig.add_subplot(gs[5:, 0]) + ax5 = fig.add_subplot(gs[5:7, 1]) + ax6 = fig.add_subplot(gs[7:, 1]) + plt.tight_layout(pad=2) + + plot_panel(jsonfile1, ax1, ax2, ax3, opts.cmap) + plot_panel(jsonfile2, ax4, ax5, ax6, opts.cmap) + + root = fig.add_axes([0, 0, 1, 1]) + pad = 0.02 + panel_labels(root, ((pad, 1 - pad, "A"), (pad, 4.0 / 9, "B"))) + normalize_axes(root) + + image_name = "likelihood3." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def plot_panel(jsonfile, ax1, ax2, ax3, cmap, tred="HD"): + j = json.load(open(jsonfile)) + calls = j["tredCalls"] + P_h1h2 = calls[tred + ".P_h1h2"] + data = np.zeros((301, 301)) + for k, v in P_h1h2.items(): + a, b = k.split(",") + a, b = int(a), int(b) + data[a, b] = v + data[b, a] = v + + label = "Probability density" + data = mask_upper_triangle(data) + h1_hat, h2_hat = calls[tred + ".1"], calls[tred + ".2"] + pf = op.basename(jsonfile).split(".")[0] + h1_truth, h2_truth = sorted([int(x) for x in pf.split("_")]) + ax_imshow( + ax1, + data, + cmap, + label, + h1_hat, + h2_hat, + h1_truth, + h2_truth, + draw_circle=False, + ticks=False, + ) + + CI = calls[tred + ".CI"] + CI_h1, CI_h2 = CI.split("|") + CI_h1 = [int(x) for x in CI_h1.split("-")] + CI_h2 = [int(x) for x in CI_h2.split("-")] + P_h1 = calls[tred + ".P_h1"] + P_h2 = calls[tred + ".P_h2"] + + ax_plot(ax2, P_h1, h1_hat, CI_h1, "h_1", label, ticks=False) + ax_plot(ax3, P_h2, h2_hat, CI_h2, "h_2", label, ticks=False) + + return pf + + +def diagram(args): + """ + %prog diagram + + Plot the predictive power of various evidences. + """ + p = OptionParser(diagram.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="8x4", format="png") + + if len(args) != 0: + sys.exit(not p.print_help()) + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + # Gauge on top, this is log-scale + yy = 0.7 + yinterval = 0.1 + height = 0.05 + yp = yy - yinterval - height + canvas = 0.95 + xstart = 0.025 + convert = lambda x: xstart + x * canvas / 600 + # Symbols + root.text( + 0.5, 0.9, r"$L$: Read length, $F$: Flank size, $V$: Pair distance", ha="center" + ) + root.text(0.5, 0.85, r"ex. $L=150bp, F=9bp, V=500bp$", ha="center") + root.text( + xstart + canvas, + yy - height, + "STR repeat length", + ha="center", + color=lsg, + size=10, + ) + + # Mark the key events + pad = 0.02 + arrowlen = canvas * 1.05 + arrowprops = dict( + length_includes_head=True, + width=0.01, + fc=lsg, + lw=0, + head_length=arrowlen * 0.12, + head_width=0.04, + ) + p = FancyArrow(xstart, yy, arrowlen, 0, shape="right", **arrowprops) + root.add_patch(p) + + ppad = 30 + keyevents = ( + (0, 0, -1, r"$0$"), + (150 - 18, 150 - 18 - ppad, 0, r"$L - 2F$"), + (150 - 9, 150 - 9, 1, r"$L - F$"), + (150, 150 + ppad, 2, r"$L$"), + (500 - 9, 500 - 9, 3, r"$V - F$"), + ) + for event, pos, i, label in keyevents: + _event = convert(event) + _pos = convert(pos) + root.plot((_event, _event), (yy - height / 4, yy + height / 4), "-", color="k") + root.text(_pos, yy + pad, label, rotation=45, va="bottom", size=8) + if i < 0: + continue + ystart = yp - i * yinterval + root.plot((_event, _event), (ystart, yy - height / 4), ":", color=lsg) + + # Range on bottom. These are simple 4 rectangles, with the range indicating + # the predictive range. + CLOSED, OPEN = range(2) + ranges = ( + (0, 150 - 18, CLOSED, "Spanning reads"), + (9, 150 - 9, OPEN, "Partial reads"), + (150, 500 - 9, CLOSED, "Repeat reads"), + (0, 500 - 9, CLOSED, "Paired-end reads"), + ) + for start, end, starttag, label in ranges: + _start = convert(start) + _end = convert(end) + data = ( + [[0.0, 1.0], [0.0, 1.0]] if starttag == OPEN else [[1.0, 0.0], [1.0, 0.0]] + ) + root.imshow( + data, + interpolation="bicubic", + cmap=plt.cm.Greens, + extent=[_start, _end, yp, yp + height], + ) + root.text(_end + pad, yp + height / 2, label, va="center") + yp -= yinterval + + normalize_axes(root) + + image_name = "diagram." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def plot_allelefreq(ax, df, locus, color="lightslategray"): + tred = df.ix[locus] + cnt = af_to_counts(tred["allele_freq"]) + + cntx, cnty = zip(*cnt.items()) + + motif = tred["motif"] + cutoff_prerisk = tred["cutoff_prerisk"] + cutoff_risk = tred["cutoff_risk"] + npredisease = sum(v for (k, v) in cnt.items() if cutoff_prerisk <= k < cutoff_risk) + npatients = sum(v for (k, v) in cnt.items() if k >= cutoff_risk) + + ax.bar(cntx, cnty, fc=color) + + ymin, ymax = ax.get_ylim() + xmax = (cutoff_risk / 10 + 1) * 10 if cutoff_risk > 50 else 50 + pad = xmax * 0.03 + if cutoff_prerisk < cutoff_risk and npredisease: + ax.axvline(x=cutoff_prerisk, color="k", lw=2) + ax.text( + cutoff_prerisk + pad, + 0.5 * ymax, + r"Pre-disease ($\geq${}$\times${}) - {} alleles".format( + cutoff_prerisk, motif, npredisease + ), + rotation=90, + color="k", + ha="center", + va="center", + ) + ax.axvline(x=cutoff_risk, color="r", lw=2) + + if locus == "AR": + npatients = sum(v for (k, v) in cnt.items() if k <= cutoff_risk) + ax.text( + cutoff_risk - pad, + 0.5 * ymax, + r"Disease ($\leq${}$\times${}) - {} alleles".format( + cutoff_risk, motif, npatients + ), + rotation=90, + color="r", + ha="center", + va="center", + ) + else: + ax.text( + cutoff_risk + pad, + 0.5 * ymax, + r"Disease ($\geq${}$\times${}) - {} alleles".format( + cutoff_risk, motif, npatients + ), + rotation=90, + color="r", + ha="center", + va="center", + ) + + x = [] # All allelels + for k, v in cnt.items(): + x.extend([k] * v) + + ax.set_xlabel("Number of repeat units") + ax.set_ylabel("Number of alleles") + ax.set_xlim(0, xmax) + ax.set_title(r"{} ({})".format(locus, tred["title"], motif)) + set_helvetica_axis(ax) + + +def allelefreqall(args): + """ + %prog allelefreqall HN_Platinum_Gold.20180525.tsv.report.txt + + Plot all 30 STR allele frequencies. + """ + p = OptionParser(allelefreqall.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (reportfile,) = args + treds, df = read_treds(reportfile) + # Prepare 5 pages, each page with 6 distributions + treds = sorted(treds) + count = 6 + pdfs = [] + for page in range(len(treds) / count + 1): + start = page * count + page_treds = treds[start : start + count] + if not page_treds: + break + allelefreq( + [ + ",".join(page_treds), + "--usereport", + reportfile, + "--nopanels", + "--figsize", + "12x16", + ] + ) + outpdf = "allelefreq.{}.pdf".format(page) + sh("mv allelefreq.pdf {}".format(outpdf)) + pdfs.append(outpdf) + + from jcvi.formats.pdf import cat + + pf = op.basename(reportfile).split(".")[0] + finalpdf = pf + ".allelefreq.pdf" + logger.debug("Merging pdfs into `{}`".format(finalpdf)) + cat(pdfs + ["-o", finalpdf, "--cleanup"]) + + +def allelefreq(args): + """ + %prog allelefreq HD,DM1,SCA1,SCA17,FXTAS,FRAXE + + Plot the allele frequencies of some STRs. + """ + p = OptionParser(allelefreq.__doc__) + p.add_argument( + "--nopanels", + default=False, + action="store_true", + help="No panel labels A, B, ...", + ) + p.add_argument("--usereport", help="Use allele frequency in report file") + opts, args, iopts = p.set_image_options(args, figsize="9x13") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (loci,) = args + fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots( + ncols=2, nrows=3, figsize=(iopts.w, iopts.h) + ) + plt.tight_layout(pad=4) + if opts.usereport: + treds, df = read_treds(tredsfile=opts.usereport) + else: + treds, df = read_treds() + + df = df.set_index(["abbreviation"]) + + axes = (ax1, ax2, ax3, ax4, ax5, ax6) + loci = loci.split(",") + for ax, locus in zip(axes, loci): + plot_allelefreq(ax, df, locus) + + # Delete unused axes + for ax in axes[len(loci) :]: + ax.set_axis_off() + + root = fig.add_axes([0, 0, 1, 1]) + pad = 0.03 + if not opts.nopanels: + panel_labels( + root, + ( + (pad / 2, 1 - pad, "A"), + (0.5 + pad, 1 - pad, "B"), + (pad / 2, 2 / 3.0 - pad / 2, "C"), + (0.5 + pad, 2 / 3.0 - pad / 2, "D"), + (pad / 2, 1 / 3.0, "E"), + (0.5 + pad, 1 / 3.0, "F"), + ), + ) + normalize_axes(root) + + image_name = "allelefreq." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def make_fasta(seq, fastafile, id): + rec = SeqRecord(Seq(seq), description="", id=id) + fw = open(fastafile, "w") + SeqIO.write([rec], fw, "fasta") + fw.close() + + +def add_simulate_options(p): + p.add_argument("--readlen", default=150, type=int, help="Length of the read") + p.add_argument( + "--distance", + default=500, + type=int, + help="Outer distance between the two ends", + ) + p.set_depth(depth=20) + + +def simulate(args): + """ + %prog simulate run_dir 1 300 + + Simulate BAMs with varying inserts with dwgsim. The above command will + simulate between 1 to 300 CAGs in the HD region, in a directory called + `run_dir`. + """ + p = OptionParser(simulate.__doc__) + p.add_argument( + "--method", choices=("wgsim", "eagle"), default="eagle", help="Read simulator" + ) + p.add_argument( + "--ref", + default="hg38", + choices=("hg38", "hg19"), + help="Reference genome version", + ) + p.add_argument("--tred", default="HD", help="TRED locus") + add_simulate_options(p) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + rundir, startunits, endunits = args + ref = opts.ref + ref_fasta = "/mnt/ref/{}.upper.fa".format(ref) + startunits, endunits = int(startunits), int(endunits) + basecwd = os.getcwd() + mkdir(rundir) + os.chdir(rundir) + cwd = os.getcwd() + + # TRED region (e.g. Huntington) + pad_left, pad_right = 1000, 10000 + repo = TREDsRepo(ref=ref) + tred = repo[opts.tred] + chr, start, end = tred.chr, tred.repeat_start, tred.repeat_end + + logger.debug("Simulating {}".format(tred)) + fasta = Fasta(ref_fasta) + seq_left = fasta[chr][start - pad_left : start - 1] + seq_right = fasta[chr][end : end + pad_right] + motif = tred.repeat + + simulate_method = wgsim if opts.method == "wgsim" else eagle + # Write fake sequence + for units in range(startunits, endunits + 1): + pf = str(units) + mkdir(pf) + os.chdir(pf) + seq = str(seq_left) + motif * units + str(seq_right) + fastafile = pf + ".fasta" + make_fasta(seq, fastafile, id=chr.upper()) + + # Simulate reads on it + simulate_method( + [ + fastafile, + "--depth={}".format(opts.depth), + "--readlen={}".format(opts.readlen), + "--distance={}".format(opts.distance), + "--outfile={}".format(pf), + ] + ) + + read1 = pf + ".bwa.read1.fastq" + read2 = pf + ".bwa.read2.fastq" + samfile, _ = align([ref_fasta, read1, read2]) + indexed_samfile = index([samfile]) + + sh("mv {} ../{}.bam".format(indexed_samfile, pf)) + sh("mv {}.bai ../{}.bam.bai".format(indexed_samfile, pf)) + + os.chdir(cwd) + cleanup(pf) + + os.chdir(basecwd) + + +def mergebam(args): + """ + %prog mergebam dir1 homo_outdir + or + %prog mergebam dir1 dir2/20.bam het_outdir + + Merge sets of BAMs to make diploid. Two modes: + - Homozygous mode: pair-up the bams in the two folders and merge + - Heterozygous mode: pair the bams in first folder with a particular bam + """ + p = OptionParser(mergebam.__doc__) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) not in (2, 3): + sys.exit(not p.print_help()) + + if len(args) == 2: + idir1, outdir = args + dir1 = [idir1] if idir1.endswith(".bam") else iglob(idir1, "*.bam") + logger.debug("Homozygous mode") + dir2 = [""] * len(dir1) + elif len(args) == 3: + idir1, idir2, outdir = args + dir1 = [idir1] if idir1.endswith(".bam") else iglob(idir1, "*.bam") + dir2 = [idir2] if idir2.endswith(".bam") else iglob(idir2, "*.bam") + assert len(dir2) == 1, "Second pile must contain a single bam" + dir2 = [idir2] * len(dir1) + + assert len(dir1) == len(dir2), "Two piles must contain same number of bams" + cmd = "samtools merge {} {} {} && samtools index {}" + cmds = [] + mkdir(outdir) + for a, b in zip(dir1, dir2): + ia = op.basename(a).split(".")[0] + ib = op.basename(b).split(".")[0] if b else ia + outfile = op.join(outdir, "{}_{}.bam".format(ia, ib)) + cmds.append(cmd.format(outfile, a, b, outfile)) + + p = Parallel(cmds, cpus=opts.cpus) + p.run() + + +def batchlobstr(args): + """ + %prog batchlobstr bamlist + + Run lobSTR on a list of BAMs. The corresponding batch command for TREDPARSE: + $ tred.py bamlist --haploid chr4 --workdir tredparse_results + """ + p = OptionParser(batchlobstr.__doc__) + p.add_argument( + "--haploid", default="chrY,chrM", help="Use haploid model for these chromosomes" + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bamlist,) = args + cmd = "python -m jcvi.variation.str lobstr TREDs" + cmd += " --input_bam_path {}" + cmd += " --haploid {}".format(opts.haploid) + cmd += " --simulation" + cmds = [cmd.format(x.strip()) for x in open(bamlist).readlines()] + p = Parallel(cmds, cpus=opts.cpus) + p.run() + + +def compilevcf(args): + """ + %prog compilevcf dir + + Compile vcf outputs into lists. + """ + from jcvi.variation.str import LobSTRvcf + + p = OptionParser(compilevcf.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (folder,) = args + vcf_files = iglob(folder, "*.vcf,*.vcf.gz") + for vcf_file in vcf_files: + try: + p = LobSTRvcf(columnidsfile=None) + p.parse(vcf_file, filtered=False) + res = p.items() + if res: + k, v = res[0] + res = v.replace(",", "/") + else: + res = "-1/-1" + num = op.basename(vcf_file).split(".")[0] + print(num, res) + except (TypeError, AttributeError) as e: + p = TREDPARSEvcf(vcf_file) + continue + + +def evidences(args): + """ + %prog evidences + + Plot distribution of evidences against two factors: + - Sample mean coverage + - Longer allele + """ + p = OptionParser(evidences.__doc__) + p.add_argument( + "--csv", default="hli.20170328.tred.tsv", help="TRED csv output to plot" + ) + opts, args, iopts = p.set_image_options(args, format="pdf") + + if len(args) != 0: + sys.exit(not p.print_help()) + + format = iopts.format + + # Extract sample coverage first + df = pd.read_csv( + "qc-export-MeanCoverage.csv", + header=None, + names=["Samplekey", "MeanCoverage"], + index_col=0, + ) + + # Find coverage for HD + xf = pd.read_csv(opts.csv, sep="\t", index_col=0) + dp = {} + tred = "HD" + for sk, row in xf.iterrows(): + sk = str(sk) + a1 = row[tred + ".1"] + a2 = row[tred + ".2"] + fdp = row[tred + ".FDP"] + pdp = row[tred + ".PDP"] + pedp = row[tred + ".PEDP"] + dp[sk] = (a1, a2, fdp, pdp, pedp) + + # Build a consolidated dataframe + ef = pd.DataFrame.from_dict(dp, orient="index") + ef.columns = [ + tred + ".1", + tred + ".2", + tred + ".FDP", + tred + ".PDP", + tred + ".PEDP", + ] + ef.index.name = "SampleKey" + mf = df.merge(ef, how="right", left_index=True, right_index=True) + + # Plot a bunch of figures + outdir = "output" + mkdir(outdir) + xlim = ylim = (0, 100) + draw_jointplot( + outdir + "/A", + "MeanCoverage", + "HD.FDP", + data=mf, + xlim=xlim, + ylim=ylim, + format=format, + ) + draw_jointplot( + outdir + "/B", + "MeanCoverage", + "HD.PDP", + data=mf, + color="g", + xlim=xlim, + ylim=ylim, + format=format, + ) + draw_jointplot( + outdir + "/C", + "MeanCoverage", + "HD.PEDP", + data=mf, + color="m", + xlim=xlim, + ylim=ylim, + format=format, + ) + + xlim = (0, 50) + draw_jointplot( + outdir + "/D", "HD.2", "HD.FDP", data=mf, xlim=xlim, ylim=ylim, format=format + ) + draw_jointplot( + outdir + "/E", + "HD.2", + "HD.PDP", + data=mf, + color="g", + xlim=xlim, + ylim=ylim, + format=format, + ) + draw_jointplot( + outdir + "/F", + "HD.2", + "HD.PEDP", + data=mf, + color="m", + xlim=xlim, + ylim=ylim, + format=format, + ) + + +def draw_jointplot( + figname, x, y, data=None, kind="reg", color=None, xlim=None, ylim=None, format="pdf" +): + """ + Wraps around sns.jointplot + """ + import seaborn as sns + + sns.set_context("talk") + plt.clf() + + register = { + "MeanCoverage": "Sample Mean Coverage", + "HD.FDP": "Depth of full spanning reads", + "HD.PDP": "Depth of partial spanning reads", + "HD.PEDP": "Depth of paired-end reads", + "HD.2": "Repeat size of the longer allele", + } + + g = sns.jointplot(x, y, data=data, kind=kind, color=color, xlim=xlim, ylim=ylim) + g.ax_joint.set_xlabel(register.get(x, x)) + g.ax_joint.set_ylabel(register.get(y, y)) + savefig(figname + "." + format, cleanup=False) + + +def long_allele(s, default=19, exclude=None): + if "_" in s: + a, b = s.split("_") + elif "/" in s: + a, b = s.split("/") + else: + raise Exception("Don't know how to split string {}".format(s)) + + res = [int(a), int(b)] + if exclude and exclude in res: + res.remove(exclude) + res = max(res) + return default if res < 0 else res + + +def get_lo_hi_from_CI(s, exclude=None): + """ + Parse the confidence interval from CI. + + >>> get_lo_hi_from_CI("20-20/40-60") + (40, 60) + """ + a, b = s.split("|") + ai, aj = a.split("-") + bi, bj = b.split("-") + + los = [int(ai), int(bi)] + his = [int(aj), int(bj)] + if exclude and exclude in los: + los.remove(exclude) + if exclude and exclude in his: + his.remove(exclude) + return max(los), max(his) + + +def parse_results(datafile, exclude=None): + fp = open(datafile) + data = [] + for row in fp: + atoms = row.split() + truth, call = atoms[:2] + t = long_allele(truth, exclude=exclude) + c = long_allele(call, exclude=exclude) + if len(atoms) == 3: + ci = atoms[2] + lo, hi = get_lo_hi_from_CI(ci, exclude=exclude) + if lo > c: + lo = c + if hi < c: + hi = c + data.append((t, c, lo, hi)) + else: + data.append((t, c)) + return data + + +def compute_rmsd(truth, a, limit=150): + truth = truth[:limit] + a = a[:limit] + if len(a) > len(truth): + a = a[: len(truth)] + return (sum((i - j) ** 2 for (i, j) in zip(truth, a)) / len(truth)) ** 0.5 + + +def compare(args): + """ + %prog compare Evaluation.csv + + Compare performances of various variant callers on simulated STR datasets. + """ + p = OptionParser(compare.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="10x10") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (datafile,) = args + pf = datafile.rsplit(".", 1)[0] + fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots( + ncols=2, nrows=2, figsize=(iopts.w, iopts.h) + ) + plt.tight_layout(pad=3) + + bbox = {"facecolor": "tomato", "alpha": 0.2, "ec": "w"} + pad = 2 + + # Read benchmark data + df = pd.read_csv("Evaluation.csv") + truth = df["Truth"] + axes = (ax1, ax2, ax3, ax4) + progs = ("Manta", "Isaac", "GATK", "lobSTR") + markers = ("bx-", "yo-", "md-", "c+-") + + for ax, prog, marker in zip(axes, progs, markers): + ax.plot(truth, df[prog], marker) + ax.plot(truth, truth, "k--") # to show diagonal + ax.axhline(infected_thr, color="tomato") + ax.text( + max(truth) - pad, + infected_thr + pad, + "Risk threshold", + bbox=bbox, + ha="right", + ) + ax.axhline(ref_thr, color="tomato") + ax.text( + max(truth) - pad, + ref_thr - pad, + "Reference repeat count", + bbox=bbox, + ha="right", + va="top", + ) + ax.set_title(SIMULATED_HAPLOID) + ax.set_xlabel(r"Num of CAG repeats inserted ($\mathit{h}$)") + ax.set_ylabel("Num of CAG repeats called") + ax.legend([prog, "Truth"], loc="best") + + root = fig.add_axes([0, 0, 1, 1]) + pad = 0.03 + panel_labels( + root, + ( + (pad / 2, 1 - pad, "A"), + (1 / 2.0, 1 - pad, "B"), + (pad / 2, 1 / 2.0, "C"), + (1 / 2.0, 1 / 2.0, "D"), + ), + ) + normalize_axes(root) + + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def plot_compare( + ax, + title, + tredparse_results, + lobstr_results, + pad=8, + ms=3, + max_insert=300, + color="g", + risk=True, +): + truth = range(1, max_insert + 1) + tx, ty, tl, th = zip(*tredparse_results) + trmsd = compute_rmsd(truth, ty) + if lobstr_results: + lx, ly = zip(*lobstr_results) + lrmsd = compute_rmsd(truth, ly) + + rmsd_tag = "$RMSD_{1:150}$" + if lobstr_results: + ax.plot( + lx, ly, "c+-", ms=ms, label="lobSTR ({}={:.2f})".format(rmsd_tag, lrmsd) + ) + ax.plot( + tx, + ty, + ".-", + color=color, + ms=ms, + label="TREDPARSE ({}={:.2f})".format(rmsd_tag, trmsd), + ) + ax.plot(truth, truth, "k--", label="Truth") + ax.fill_between( + tx, tl, th, facecolor=color, alpha=0.25, label=r"TREDPARSE 95$\%$ CI" + ) + + ax.set_xlabel(r"Num of CAG repeats inserted ($\mathit{h}$)") + ax.set_ylabel("Num of CAG repeats called") + ax.set_title(title) + ax.legend(loc="best") + + bbox = {"facecolor": "tomato", "alpha": 0.2, "ec": "w"} + if risk: + ax.axhline(infected_thr, color="tomato") + ax.text( + max(truth) - pad, + infected_thr + pad, + "Risk cutoff={}".format(infected_thr) + r"$\times$CAGs", + bbox=bbox, + ha="right", + ) + else: + readlength, pairdistance = 150 / 3, 500 / 3 + ax.axhline(readlength, color="tomato") + ax.text( + max(truth) - pad, + readlength + pad, + "Read Length ($L$)", + bbox=bbox, + ha="right", + ) + ax.axhline(pairdistance, color="tomato") + ax.text( + max(truth) - pad, + pairdistance + pad, + "Paired-end distance($V$)", + bbox=bbox, + ha="right", + ) + + +def compare2(args): + """ + %prog compare2 + + Compare performances of various variant callers on simulated STR datasets. + """ + p = OptionParser(compare2.__doc__) + p.add_argument( + "--maxinsert", default=300, type=int, help="Maximum number of repeats" + ) + add_simulate_options(p) + opts, args, iopts = p.set_image_options(args, figsize="10x5") + + if len(args) != 0: + sys.exit(not p.print_help()) + + depth = opts.depth + readlen = opts.readlen + distance = opts.distance + max_insert = opts.maxinsert + fig, (ax1, ax2) = plt.subplots(ncols=2, nrows=1, figsize=(iopts.w, iopts.h)) + plt.tight_layout(pad=2) + + # ax1: lobSTR vs TREDPARSE with haploid model + lobstr_results = parse_results("lobstr_results_homo.txt") + tredparse_results = parse_results("tredparse_results_homo.txt") + title = SIMULATED_HAPLOID + r" ($D=%s\times, L=%dbp, V=%dbp$)" % ( + depth, + readlen, + distance, + ) + plot_compare(ax1, title, tredparse_results, lobstr_results, max_insert=max_insert) + + # ax2: lobSTR vs TREDPARSE with diploid model + lobstr_results = parse_results("lobstr_results_het.txt", exclude=20) + tredparse_results = parse_results("tredparse_results_het.txt", exclude=20) + title = SIMULATED_DIPLOID + r" ($D=%s\times, L=%dbp, V=%dbp$)" % ( + depth, + readlen, + distance, + ) + plot_compare(ax2, title, tredparse_results, lobstr_results, max_insert=max_insert) + + for ax in (ax1, ax2): + ax.set_xlim(0, max_insert) + ax.set_ylim(0, max_insert) + + root = fig.add_axes([0, 0, 1, 1]) + pad = 0.03 + panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2.0, 1 - pad, "B"))) + normalize_axes(root) + + image_name = "tredparse." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def power(args): + """ + %prog power + + Compare performances of various variant callers on simulated STR datasets. + This compares the power of various evidence types. + """ + p = OptionParser(power.__doc__) + p.add_argument( + "--maxinsert", default=300, type=int, help="Maximum number of repeats" + ) + add_simulate_options(p) + opts, args, iopts = p.set_image_options(args, figsize="10x10", format="png") + + if len(args) != 0: + sys.exit(not p.print_help()) + + max_insert = opts.maxinsert + fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots( + ncols=2, nrows=2, figsize=(iopts.w, iopts.h) + ) + plt.tight_layout(pad=3) + + color = "lightslategray" + # ax1: Spanning + tredparse_results = parse_results("tredparse_results_het-spanning.txt") + title = SIMULATED_DIPLOID + " (Sub-model 1: Spanning reads)" + plot_compare( + ax1, + title, + tredparse_results, + None, + color=color, + max_insert=max_insert, + risk=False, + ) + + # ax2: Partial + tredparse_results = parse_results("tredparse_results_het-partial.txt", exclude=20) + title = SIMULATED_DIPLOID + " (Sub-model 2: Partial reads)" + plot_compare( + ax2, + title, + tredparse_results, + None, + color=color, + max_insert=max_insert, + risk=False, + ) + + # ax3: Repeat + tredparse_results = parse_results("tredparse_results_het-repeat.txt", exclude=20) + # HACK (repeat reads won't work under 50) + tredparse_results = [x for x in tredparse_results if x[0] > 50] + title = SIMULATED_DIPLOID + " (Sub-model 3: Repeat-only reads)" + plot_compare( + ax3, + title, + tredparse_results, + None, + color=color, + max_insert=max_insert, + risk=False, + ) + + # ax4: Pair + tredparse_results = parse_results("tredparse_results_het-pair.txt", exclude=20) + title = SIMULATED_DIPLOID + " (Sub-model 4: Paired-end reads)" + plot_compare( + ax4, + title, + tredparse_results, + None, + color=color, + max_insert=max_insert, + risk=False, + ) + + for ax in (ax1, ax2, ax3, ax4): + ax.set_xlim(0, max_insert) + ax.set_ylim(0, max_insert) + + root = fig.add_axes([0, 0, 1, 1]) + pad = 0.03 + panel_labels( + root, + ( + (pad / 2, 1 - pad, "A"), + (1 / 2.0, 1 - pad, "B"), + (pad / 2, 1 / 2.0, "C"), + (1 / 2.0, 1 / 2.0, "D"), + ), + ) + normalize_axes(root) + + image_name = "power." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def tredparse(args): + """ + %prog tredparse + + Compare performances of various variant callers on simulated STR datasets. + Adds coverage comparisons as panel C and D. + """ + p = OptionParser(tredparse.__doc__) + p.add_argument( + "--maxinsert", default=300, type=int, help="Maximum number of repeats" + ) + add_simulate_options(p) + opts, args, iopts = p.set_image_options(args, figsize="10x10") + + if len(args) != 0: + sys.exit(not p.print_help()) + + depth = opts.depth + max_insert = opts.maxinsert + fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots( + ncols=2, nrows=2, figsize=(iopts.w, iopts.h) + ) + plt.tight_layout(pad=3) + + # ax1: lobSTR vs TREDPARSE with haploid model + lobstr_results = parse_results("lobstr_results_homo-20x-150bp-500bp.txt") + tredparse_results = parse_results("tredparse_results_homo-20x-150bp-500bp.txt") + title = SIMULATED_HAPLOID + r" (Depth=$%s\times$)" % depth + plot_compare(ax1, title, tredparse_results, lobstr_results, max_insert=max_insert) + + # ax2: lobSTR vs TREDPARSE with diploid model (depth=20x) + lobstr_results = parse_results("lobstr_results_het-20x-150bp-500bp.txt", exclude=20) + tredparse_results = parse_results( + "tredparse_results_het-20x-150bp-500bp.txt", exclude=20 + ) + title = SIMULATED_DIPLOID + r" (Depth=$%s\times$)" % depth + plot_compare(ax2, title, tredparse_results, lobstr_results, max_insert=max_insert) + + # ax3: lobSTR vs TREDPARSE with diploid model (depth=5x) + lobstr_results = parse_results("lobstr_results_het-5x-150bp-500bp.txt", exclude=20) + tredparse_results = parse_results( + "tredparse_results_het-5x-150bp-500bp.txt", exclude=20 + ) + title = SIMULATED_DIPLOID + r" (Depth=$%s\times$)" % 5 + plot_compare(ax3, title, tredparse_results, lobstr_results, max_insert=max_insert) + + # ax4: lobSTR vs TREDPARSE with diploid model (depth=80x) + lobstr_results = parse_results("lobstr_results_het-80x-150bp-500bp.txt", exclude=20) + tredparse_results = parse_results( + "tredparse_results_het-80x-150bp-500bp.txt", exclude=20 + ) + title = SIMULATED_DIPLOID + r" (Depth=$%s\times$)" % 80 + plot_compare(ax4, title, tredparse_results, lobstr_results, max_insert=max_insert) + + for ax in (ax1, ax2, ax3, ax4): + ax.set_xlim(0, max_insert) + ax.set_ylim(0, max_insert) + + root = fig.add_axes([0, 0, 1, 1]) + pad = 0.03 + panel_labels( + root, + ( + (pad / 2, 1 - pad, "A"), + (1 / 2.0, 1 - pad, "B"), + (pad / 2, 1 / 2.0, "C"), + (1 / 2.0, 1 / 2.0, "D"), + ), + ) + normalize_axes(root) + + image_name = "tredparse." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/sugarcane.py b/jcvi/projects/sugarcane.py new file mode 100644 index 00000000..1fd63131 --- /dev/null +++ b/jcvi/projects/sugarcane.py @@ -0,0 +1,807 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# +# sugarcane.py +# projects +# +# Created by Haibao Tang on 12/02/19 +# Copyright © 2019 Haibao Tang. All rights reserved. +# +""" +Simulate sugarcane genomes and analyze the diversity in the progeny genomes. +""" + +import os.path as op +import sys + +from collections import Counter, defaultdict +from enum import Enum +from itertools import combinations, groupby, product +from random import random, sample +from typing import Dict, List + +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd + +from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir +from ..formats.blast import Blast +from ..graphics.base import adjust_spines, markup, normalize_axes, savefig + +SoColor = "#7436a4" # Purple +SsColor = "#5a8340" # Green + + +class CrossMode(Enum): + """ + How the F1 is generated. + """ + + nplusn = "n+n" + nx2plusn = "nx2+n" + twoplusnFDR = "2n+n_FDR" + twoplusnSDR = "2n+n_SDR" + + +# Computed using prepare(), corrected with real sizes +ChrSizes = { + "SO-chr01": 148750011, + "SO-chr02": 119865146, + "SO-chr03": 103845728, + "SO-chr04": 104559946, + "SO-chr05": 93134056, + "SO-chr06": 74422021, + "SO-chr07": 81308893, + "SO-chr08": 71010813, + "SO-chr09": 86380266, + "SO-chr10": 73923121, + "SS-chr01": 114519418, + "SS-chr02": 119157314, + "SS-chr03": 85009228, + "SS-chr04": 79762909, + "SS-chr05": 90584537, + "SS-chr06": 95848354, + "SS-chr07": 83589369, + "SS-chr08": 64028871, +} + + +# Simulate genome composition +class Genome: + def __init__( + self, name: str, prefix: str, ploidy: int, haploid_chromosome_count: int + ): + """ + Simulate a genome with given ploidy and haploid_chromosome_count. Example: + + >>> print(Genome("t", "pf", 2, 3)) + test: pf-chr01_a,pf-chr01_b,pf-chr02_a,pf-chr02_b,pf-chr03_a,pf-chr03_b + """ + self.name = name + chromosomes = [] + for i in range(haploid_chromosome_count): + chromosomes += [ + f"{prefix}-chr{i + 1:02d}_{chr(ord('a') + j)}" for j in range(ploidy) + ] + self.chromosomes = chromosomes + + def __len__(self): + return len(self.chromosomes) + + @classmethod + def make(cls, name: str, chromosomes: List[str]): + genome = Genome(name, "", 0, 0) + genome.chromosomes = chromosomes + return genome + + @property + def gamete(self): + """Randomly generate a gamete from current genome that""" + self.chromosomes.sort() + gamete_chromosomes = [] + + # Check for any chromosome that have 2 identical copies, if so, we will assume disomic + # inheritance for that chromosome and always keep one and only copy + duplicate_chromosomes = [] + singleton_chromosomes = [] + for chromosome, chromosomes in groupby(self.chromosomes): + chromosomes = list(chromosomes) + ncopies = len(chromosomes) + duplicate_chromosomes += [chromosome] * (ncopies // 2) + if ncopies % 2 == 1: + singleton_chromosomes.append(chromosome) + + # Get one copy of each duplicate chromosome first + gamete_chromosomes += duplicate_chromosomes + + def prefix(x): + return x.split("_", 1)[0] + + # Randomly assign the rest, singleton chromosomes + for _, chromosomes in groupby(singleton_chromosomes, key=prefix): + chromosomes = list(chromosomes) + halfn = len(chromosomes) // 2 + # Odd number, e.g. 5, equal chance to be 2 or 3 + if len(chromosomes) % 2 != 0 and random() < 0.5: + halfn += 1 + gamete_chromosomes += sorted(sample(chromosomes, halfn)) + return Genome.make(self.name + " gamete", gamete_chromosomes) + + def mate_nplusn(self, name: str, other_genome: "Genome", verbose: bool = True): + if verbose: + print( + f"Crossing '{self.name}' x '{other_genome.name}' (n+n)", file=sys.stderr + ) + f1_chromosomes = sorted( + self.gamete.chromosomes + other_genome.gamete.chromosomes + ) + return Genome.make(name, f1_chromosomes) + + def mate_nx2plusn(self, name: str, other_genome: "Genome", verbose: bool = True): + if verbose: + print( + f"Crossing '{self.name}' x '{other_genome.name}' (2xn+n)", + file=sys.stderr, + ) + f1_chromosomes = sorted( + 2 * self.gamete.chromosomes + other_genome.gamete.chromosomes + ) + return Genome.make(name, f1_chromosomes) + + def mate_2nplusn_FDR(self, name: str, other_genome: "Genome", verbose: bool = True): + if verbose: + print( + f"Crossing '{self.name}' x '{other_genome.name}' (2n+n_FDR)", + file=sys.stderr, + ) + f1_chromosomes = sorted(self.chromosomes + other_genome.gamete.chromosomes) + return Genome.make(name, f1_chromosomes) + + def mate_2nplusn_SDR(self, name: str, other_genome: "Genome", verbose: bool = True): + if verbose: + print( + f"Crossing '{self.name}' x '{other_genome.name}' (2n+n_SDR)", + file=sys.stderr, + ) + raise NotImplementedError("2n+n_SDR not yet supported") + + def __str__(self): + return self.name + ": " + ",".join(self.chromosomes) + + @property + def summary(self): + def prefix(x, sep="-"): + return x.split(sep, 1)[0] + + def size(chromosomes): + return sum(ChrSizes[prefix(x, sep="_")] for x in chromosomes) + + # Chromosome count + total_count = 0 + total_unique = 0 + total_size = 0 + total_so_size = 0 + ans = [] + for group, chromosomes in groupby(self.chromosomes, prefix): + chromosomes = list(chromosomes) + uniq_chromosomes = set(chromosomes) + group_count = len(chromosomes) + group_unique = len(uniq_chromosomes) + group_so_size = size({x for x in uniq_chromosomes if x[:2] == "SO"}) + group_size = size(uniq_chromosomes) + total_count += group_count + total_unique += group_unique + total_so_size += group_so_size + total_size += group_size + ans.append((group, group_count, group_unique, group_so_size, group_size)) + ans.append(("Total", total_count, total_unique, total_so_size, total_size)) + return ans + + def print_summary(self): + print("[SUMMARY]") + for group, group_count, group_unique in self.summary: + print(f"{group}: count={group_count}, unique={group_unique}") + + +class GenomeSummary: + def __init__(self, SO_data, SS_data, percent_SO_data): + self.SO_data = SO_data + self.SS_data = SS_data + self.percent_SO_data = percent_SO_data + self.percent_SS_data = [100 - x for x in percent_SO_data] + + def _summary(self, a, tag, precision=0): + mean, mn, mx = ( + round(np.mean(a), precision), + round(np.min(a), precision), + round(np.max(a), precision), + ) + s = f"*{tag}* chr: {mean:.0f}" + if mn == mean and mx == mean: + return s + return s + f" ({mn:.0f}-{mx:.0f})" + + def _percent_summary(self, a, tag, precision=1): + mean, mn, mx = ( + round(np.mean(a), precision), + round(np.min(a), precision), + round(np.max(a), precision), + ) + s = f"*{tag}*%: {mean:.1f}%" + print(s) + if mn == mean and mx == mean: + return s + return s + f" ({mn:.1f}-{mx:.1f}%)" + + @property + def percent_SO_summary(self): + return self._percent_summary(self.percent_SO_data, "So") + + @property + def percent_SS_summary(self): + return self._percent_summary(self.percent_SS_data, "Ss") + + @property + def SO_summary(self): + return self._summary(self.SO_data, "So") + + @property + def SS_summary(self): + return self._summary(self.SS_data, "Ss") + + +def simulate_F1(SO: Genome, SS: Genome, mode: CrossMode, verbose: bool = False): + if mode == CrossMode.nx2plusn: + SO_SS_F1 = SO.mate_nx2plusn("SOxSS F1", SS, verbose=verbose) + elif mode == CrossMode.twoplusnFDR: + SO_SS_F1 = SO.mate_2nplusn_FDR("SOxSS F1", SS, verbose=verbose) + elif mode == CrossMode.twoplusnSDR: + SO_SS_F1 = SO.mate_2nplusn_SDR("SOxSS F1", SS, verbose=verbose) + if verbose: + SO_SS_F1.print_summary() + return SO_SS_F1 + + +def simulate_F2(SO: Genome, SS: Genome, mode: CrossMode, verbose: bool = False): + SO_SS_F1 = simulate_F1(SO, SS, mode=mode, verbose=verbose) + SO_SS_F2_nplusn = SO_SS_F1.mate_nplusn("SOxSS F2", SO_SS_F1, verbose=verbose) + if verbose: + SO_SS_F2_nplusn.print_summary() + return SO_SS_F2_nplusn + + +def simulate_F1intercross(SO: Genome, SS: Genome, mode: CrossMode, verbose=False): + SO_SS_F1_1 = simulate_F1(SO, SS, mode=mode, verbose=verbose) + SO_SS_F1_2 = simulate_F1(SO, SS, mode=mode, verbose=verbose) + SO_SS_F1intercross_nplusn = SO_SS_F1_1.mate_nplusn( + "SOxSS F1 intercross", SO_SS_F1_2, verbose=verbose + ) + return SO_SS_F1intercross_nplusn + + +def simulate_BCn(n: int, SO: Genome, SS: Genome, mode: CrossMode, verbose=False): + SS_SO_F1 = simulate_F1(SO, SS, mode=mode, verbose=verbose) + SS_SO_BC1, SS_SO_BC2_nplusn, SS_SO_BC3_nplusn, SS_SO_BC4_nplusn = ( + None, + None, + None, + None, + ) + # BC1 + if n >= 1: + if mode == CrossMode.nx2plusn: + SS_SO_BC1 = SO.mate_nx2plusn("SSxSO BC1", SS_SO_F1, verbose=verbose) + elif mode == CrossMode.twoplusnFDR: + SS_SO_BC1 = SO.mate_2nplusn_FDR("SSxSO BC1", SS_SO_F1, verbose=verbose) + elif mode == CrossMode.twoplusnSDR: + SS_SO_BC1 = SO.mate_2nplusn_SDR("SSxSO BC1", SS_SO_F1, verbose=verbose) + # BC2 + if n >= 2: + SS_SO_BC2_nplusn = SO.mate_nplusn("SSxSO BC2", SS_SO_BC1, verbose=verbose) + # BC3 + if n >= 3: + SS_SO_BC3_nplusn = SO.mate_nplusn( + "SSxSO BC3", SS_SO_BC2_nplusn, verbose=verbose + ) + # BC4 + if n >= 4: + SS_SO_BC4_nplusn = SO.mate_nplusn( + "SSxSO BC4", SS_SO_BC3_nplusn, verbose=verbose + ) + return [ + None, + SS_SO_BC1, + SS_SO_BC2_nplusn, + SS_SO_BC3_nplusn, + SS_SO_BC4_nplusn, + ][n] + + +def plot_summary(ax, samples: list[Genome]) -> GenomeSummary: + """Plot the distribution of chromosome numbers given simulated samples. + + Args: + ax (Axes): Matplotlib axes. + samples (list[Genome]): Summarized genomes. + + Returns: + GenomeSummary: Summary statistics of simulated genomes. + """ + SO_data = [] + SS_data = [] + percent_SO_data = [] + for s in samples: + summary = s.summary + try: + _, _, group_unique, _, _ = [x for x in summary if x[0] == "SO"][0] + except: + group_unique = 0 + SO_data.append(group_unique) + try: + _, _, group_unique, _, _ = [x for x in summary if x[0] == "SS"][0] + except: + group_unique = 0 + SS_data.append(group_unique) + total_tag, _, _, total_so_size, total_size = summary[-1] + assert total_tag == "Total" + percent_SO = total_so_size * 100.0 / total_size + percent_SO_data.append(percent_SO) + # Avoid overlapping bars + SS_counter, SO_counter = Counter(SS_data), Counter(SO_data) + overlaps = SS_counter.keys() & SO_counter.keys() + shift = 0.5 # used to offset bars a bit to avoid cluttering + if overlaps: + for overlap in overlaps: + logger.debug("Modify bar offsets at %s due to SS and SO overlaps", overlap) + SS_counter[overlap - shift] = SS_counter[overlap] + del SS_counter[overlap] + SO_counter[overlap + shift] = SO_counter[overlap] + del SO_counter[overlap] + + def modify_range_end(d: dict, value: int): + if value not in d: + return + # Has data at the range end, but no adjacent data points (i.e. isolated bar) + if value in d and (value - 1 in d or value + 1 in d): + return + logger.debug("Modify bar offsets at %d due to end of range ends", value) + d[value - shift if value else value + shift] = d[80] + del d[value] + + modify_range_end(SS_counter, 0) + modify_range_end(SS_counter, 80) + modify_range_end(SO_counter, 0) + modify_range_end(SO_counter, 80) + + x, y = zip(*sorted(SS_counter.items())) + ax.bar(np.array(x), y, color=SsColor, ec=SsColor) + x, y = zip(*sorted(SO_counter.items())) + ax.bar(np.array(x), y, color=SoColor, ec=SoColor) + ax.set_xlim(80, 0) + ax.set_ylim(0, len(samples) / 2) + ax.set_yticks([]) + summary = GenomeSummary(SO_data, SS_data, percent_SO_data) + + # Write the stats summary within the plot + summary_style = dict( + size=9, + ha="center", + va="center", + transform=ax.transAxes, + ) + ax.text(0.75, 0.85, markup(summary.SS_summary), color=SsColor, **summary_style) + ax.text( + 0.75, 0.65, markup(summary.percent_SS_summary), color=SsColor, **summary_style + ) + ax.text(0.25, 0.85, markup(summary.SO_summary), color=SoColor, **summary_style) + ax.text( + 0.25, 0.65, markup(summary.percent_SO_summary), color=SoColor, **summary_style + ) + + return summary + + +def write_chromosomes(genomes: list[Genome], filename: str): + """Write simulated chromosomes to file + + Args: + genomes (list[Genome]): List of simulated genomes. + filename (str): File path to write to. + """ + print(f"Write chromosomes to `{filename}`", file=sys.stderr) + with open(filename, "w", encoding="utf-8") as fw: + for genome in genomes: + print(genome, file=fw) + + +def write_SO_percent(summary: GenomeSummary, filename: str): + """Write SO % to file + + Args: + summary (GenomeSummary): List of simulated genomes. + filename (str): File path to write to. + """ + print(f"Write SO percent to `{filename}`", file=sys.stderr) + with open(filename, "w", encoding="utf-8") as fw: + print("\n".join(str(x) for x in sorted(summary.percent_SO_data)), file=fw) + + +def simulate(args): + """ + %prog simulate [2n+n_FDR|2n+n_SDR|nx2+n] + + Run simulation on female restitution. There are two modes: + - 2n+n_FDR: merger between a somatic and a germline + - 2n+n_SDR: merger between a recombined germline and a germline (not yet supported) + - nx2+n: merger between a doubled germline and a germline + + These two modes would impact the sequence diversity in the progeny + genome in F1, F2, BCn ... the goal of this simulation, is thus to + understand the mode and the spread of such diversity in the hybrid + progenies. + """ + sns.set_style("darkgrid") + + p = OptionParser(simulate.__doc__) + p.add_argument( + "--verbose", + default=False, + action="store_true", + help="Verbose logging during simulation", + ) + p.add_argument("-N", default=10000, type=int, help="Number of simulated samples") + opts, args, iopts = p.set_image_options(args, figsize="6x6") + if len(args) != 1: + sys.exit(not p.print_help()) + + (mode,) = args + mode = CrossMode(mode) + logger.info("Transmission: %s", mode) + + # Construct a composite figure with 6 tracks + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + rows = 6 + ypad = 0.05 + yinterval = (1 - 2 * ypad) / (rows + 1) + yy = 1 - ypad + xpad = 0.2 + xwidth = 0.7 + + # Axes are vertically stacked, and share x-axis + axes = [] + yy_positions = [] # Save yy positions so we can show details to the right laterr + for idx in range(rows): + yy_positions.append(yy) + yy -= yinterval + ax = fig.add_axes([xpad, yy, xwidth, yinterval * 0.85]) + if idx != rows - 1: + plt.setp(ax.get_xticklabels(), visible=False) + axes.append(ax) + ax1, ax2, ax3, ax4, ax5, ax6 = axes + + # Prepare the simulated data + # Simulate two parents + SS = Genome("SS", "SS", 10, 8) + SO = Genome("SO", "SO", 8, 10) + + verbose = opts.verbose + N = opts.N + all_F1s = [simulate_F1(SO, SS, mode=mode, verbose=verbose) for _ in range(N)] + all_F2s = [simulate_F2(SO, SS, mode=mode, verbose=verbose) for _ in range(N)] + all_BC1s = [simulate_BCn(1, SO, SS, mode=mode, verbose=verbose) for _ in range(N)] + all_BC2s = [simulate_BCn(2, SO, SS, mode=mode, verbose=verbose) for _ in range(N)] + all_BC3s = [simulate_BCn(3, SO, SS, mode=mode, verbose=verbose) for _ in range(N)] + all_BC4s = [simulate_BCn(4, SO, SS, mode=mode, verbose=verbose) for _ in range(N)] + + # Plotting + all_F1s_summary = plot_summary(ax1, all_F1s) + all_F2s_summary = plot_summary(ax2, all_F2s) + plot_summary(ax3, all_BC1s) + plot_summary(ax4, all_BC2s) + plot_summary(ax5, all_BC3s) + plot_summary(ax6, all_BC4s) + + # Show title to the left + xx = xpad / 2 + for (title, subtitle), yy in zip( + ( + (r"$\mathrm{F_1}$", None), + (r"$\mathrm{F_2}$", None), + (r"$\mathrm{BC_1}$", None), + (r"$\mathrm{BC_2}$", None), + (r"$\mathrm{BC_3}$", None), + (r"$\mathrm{BC_4}$", None), + ), + yy_positions, + ): + if subtitle: + yy -= 0.06 + else: + yy -= 0.07 + root.text( + xx, + yy, + title, + color="darkslategray", + ha="center", + va="center", + fontweight="semibold", + ) + if subtitle: + yy -= 0.02 + root.text( + xx, yy, subtitle, color="lightslategray", ha="center", va="center" + ) + + axes[-1].set_xlabel("Number of unique chromosomes") + adjust_spines(axes[-1], ["bottom"], outward=True) + normalize_axes(root) + + # Title + if mode == CrossMode.nx2plusn: + mode_title = r"$n_1\times2 + n_2$" + elif mode == CrossMode.twoplusnFDR: + mode_title = r"$2n + n$ (FDR)" + elif mode == CrossMode.twoplusnSDR: + mode_title = r"$2n + n$ (SDR)" + root.text(0.5, 0.95, f"Transmission: {mode_title}", ha="center") + + savefig(f"{mode}.pdf", dpi=120) + + outdir = f"simulations_{mode}" + mkdir(outdir) + # Write chromosomes to disk + for genomes, filename in ( + (all_F1s, "all_F1s"), + (all_F2s, "all_F2s"), + (all_BC1s, "all_BC1s"), + (all_BC2s, "all_BC2s"), + (all_BC3s, "all_BC3s"), + (all_BC4s, "all_BC4s"), + ): + write_chromosomes(genomes, op.join(outdir, filename)) + + # Write the SO percent in simulated samples so that we can compute P-value + for summary, SO_percent_filename in ( + (all_F1s_summary, "all_F1s_SO_percent"), + (all_F2s_summary, "all_F2s_SO_percent"), + ): + write_SO_percent(summary, op.join(outdir, SO_percent_filename)) + + +def _get_sizes(filename, prefix_length, tag, target_size=None): + """Returns a dictionary of chromome lengths from a given file. + + Args: + filename ([str]): Path to the input file. Input file is 2-column file + with rows `seqid length`. + prefix_length (int): Extract first N characters. + tag (str): Prepend `tag-` to the seqid. + target_size (int): Expected genome size. Defaults to None. + """ + sizes_list = defaultdict(list) + with open(filename, encoding="utf-8") as fp: + for row in fp: + if not row.startswith("Chr"): + continue + name, size = row.split() + idx = int(name[3:prefix_length]) + size = int(size) + name = f"{tag}-chr{idx:02d}" + sizes_list[name].append(size) + + # Get the average length + sizes = dict( + (name, int(round(np.mean(size_list)))) for name, size_list in sizes_list.items() + ) + print(sizes) + if target_size is None: + return sizes + + total_size = sum(sizes.values()) + correction_factor = target_size / total_size + print( + f"{tag} total:{total_size} target:{target_size} correction:{correction_factor:.2f}x" + ) + return dict( + (name, int(round(correction_factor * size))) for name, size in sizes.items() + ) + + +def prepare(args): + """ + %prog SoChrLen.txt SsChrLen.txt + + Calculate lengths from real sugarcane data. + """ + p = OptionParser(prepare.__doc__) + _, args = p.parse_args(args) + if len(args) != 2: + sys.exit(not p.print_help()) + + solist, sslist = args + # The haploid set of LA Purple is 957.2 Mb and haploid set of US56-14-4 is 732.5 Mb + sizes = _get_sizes(solist, 5, "SO", target_size=int(957.2 * 1e6)) + sizes.update(_get_sizes(sslist, 4, "SS", target_size=int(732.5 * 1e6))) + print(sizes) + + +def get_genome_wide_pct(summary: str) -> Dict[tuple, list]: + """Collect genome-wide ungapped percent identity. + Specifically, from file `SS_SR_SO.summary.txt`. + + Args: + summary (str): File that contains per chromosome pct identity info, + collected via `formats.blast.summary()`. + + Returns: + Dict[tuple, list]: Genome pair to list of pct identities. + """ + COLUMNS = "filename, identicals, qry_gapless, qry_gapless_pct, ref_gapless, ref_gapless_pct, qryspan, pct_qryspan, refspan, pct_refspan".split( + ", " + ) + df = pd.read_csv(summary, sep="\t", names=COLUMNS) + data_by_genomes = defaultdict(list) + for _, row in df.iterrows(): + filename = row["filename"] + # e.g. SO_Chr01A.SO_Chr01B.1-1.blast + chr1, chr2 = filename.split(".")[:2] + genome1, chr1 = chr1.split("_") + genome2, chr2 = chr2.split("_") + chr1, chr2 = chr1[:5], chr2[:5] + if ( # Special casing for SS certain chromosomes that are non-collinear with SO/SR + genome1 != "SS" + and genome2 == "SS" + and chr2 not in ("Chr01", "Chr03", "Chr04") + ): + continue + qry_gapless_pct, ref_gapless_pct = ( + row["qry_gapless_pct"], + row["ref_gapless_pct"], + ) + data_by_genomes[(genome1, genome2)] += [qry_gapless_pct, ref_gapless_pct] + return data_by_genomes + + +def get_anchors_pct(filename: str, min_pct: int = 94) -> list: + """Collect CDS-wide ungapped percent identity. + + Args: + filename (str): Input file name, which is a LAST file. + + Returns: + list: List of pct identities from this LAST file. + """ + blast = Blast(filename) + pct = [] + for c in blast: + if c.pctid < min_pct: + continue + identicals = c.hitlen - c.nmismatch - c.ngaps + qstart, qstop = c.qstart, c.qstop + sstart, sstop = c.sstart, c.sstop + qrycovered = qstop - qstart + 1 + refcovered = sstop - sstart + 1 + pct.append(identicals * 100 / qrycovered) + pct.append(identicals * 100 / refcovered) + return pct + + +def divergence(args): + """ + %prog divergence SS_SR_SO.summary.txt + + Plot divergence between and within SS/SR/SO genomes. + """ + sns.set_style("white") + + p = OptionParser(divergence.__doc__) + p.add_argument("--title", default="Gapless", help="Plot title") + p.add_argument( + "--xmin", + default=94, + type=int, + help="Minimum percent identity in the histogram", + ) + opts, args, iopts = p.set_image_options(args, figsize="8x8") + if len(args) != 1: + sys.exit(not p.print_help()) + + (summary,) = args + data_by_genomes = get_genome_wide_pct(summary) + # Print summary statistics + print("Genome-wide ungapped percent identity:") + for (genome1, genome2), pct in sorted(data_by_genomes.items()): + print(genome1, genome2, np.mean(pct), np.std(pct)) + + # Plotting genome-wide divergence + fig = plt.figure(figsize=(iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + SPECIES_CONFIG = { + "SS": {"label": "S. spontaneum", "pos": (0.5, 0.67)}, + "SR": {"label": "S. robustum", "pos": (0.2, 0.3)}, + "SO": {"label": "S. officinarum", "pos": (0.8, 0.3)}, + } + # Get median for each genome pair + medians = {} + for g1, g2 in product(SPECIES_CONFIG.keys(), repeat=2): + g1, g2 = sorted((g1, g2)) + d = data_by_genomes[(g1, g2)] + medians[(g1, g2)] = np.median(d) + for g, config in SPECIES_CONFIG.items(): + x, y = config["pos"] + text = f'*{config["label"]}*' + f"\n{medians[(g, g)]:.1f} %" + text = markup(text) + root.text(x, y, text, color="darkslategray", ha="center", va="center") + + # Connect lines + PAD, YPAD = 0.09, 0.045 + for g1, g2 in combinations(SPECIES_CONFIG.keys(), 2): + g1, g2 = sorted((g1, g2)) + x1, y1 = SPECIES_CONFIG[g1]["pos"] + x2, y2 = SPECIES_CONFIG[g2]["pos"] + x1, x2 = (x1 + PAD, x2 - PAD) if x1 < x2 else (x1 - PAD, x2 + PAD) + if y1 != y2: + y1, y2 = (y1 + YPAD, y2 - YPAD) if y1 < y2 else (y1 - YPAD, y2 + YPAD) + xmid, ymid = (x1 + x2) / 2, (y1 + y2) / 2 + text = f"{medians[(g1, g2)]:.1f} %" + text = markup(text) + root.text(xmid, ymid, text, ha="center", va="center", backgroundcolor="w") + root.plot([x1, x2], [y1, y2], "-", lw=4, color="darkslategray") + + # Pct identity histograms + PCT_CONFIG = { + ("SS", "SS"): {"pos": (0.5, 0.82)}, + ("SR", "SR"): {"pos": (0.1, 0.2)}, + ("SO", "SO"): {"pos": (0.9, 0.2)}, + ("SR", "SS"): {"pos": (0.3 - PAD, 0.55)}, + ("SO", "SS"): {"pos": (0.7 + PAD, 0.55)}, + ("SO", "SR"): {"pos": (0.5, 0.18)}, + } + HIST_WIDTH = 0.15 + xmin = opts.xmin + for genome_pair, config in PCT_CONFIG.items(): + x, y = config["pos"] + ax = fig.add_axes( + [x - HIST_WIDTH / 2, y - HIST_WIDTH / 2, HIST_WIDTH, HIST_WIDTH] + ) + d = data_by_genomes[genome_pair] + binwidth = (100 - xmin) / 20 + sns.histplot(d, ax=ax, binwidth=binwidth, kde=False) + ax.set_xlim(xmin, 100) + ax.get_yaxis().set_visible(False) + ax.set_xticks([xmin, 100]) + adjust_spines(ax, ["bottom"], outward=True) + ax.spines["bottom"].set_color("lightslategray") + + title = opts.title + italic_title = markup(f"*{title}*") + root.text( + 0.5, + 0.95, + f"{italic_title} identities between and within SS/SR/SO genomes", + size=14, + ha="center", + va="center", + ) + normalize_axes(root) + image_name = f"SO_SR_SS.{title}." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def main(): + + actions = ( + ("prepare", "Calculate lengths from real sugarcane data"), + ("simulate", "Run simulation on female restitution"), + # Plotting scripts to illustrate divergence between and within genomes + ("divergence", "Plot divergence between and within SS/SR/SO genomes"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/synfind.py b/jcvi/projects/synfind.py new file mode 100644 index 00000000..57d76646 --- /dev/null +++ b/jcvi/projects/synfind.py @@ -0,0 +1,860 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +SynFind analyses and visualization. +""" +import os.path as op +import sys + +from collections import defaultdict +from copy import deepcopy +from itertools import groupby + +from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir, symlink +from ..apps.grid import MakeManager +from ..formats.base import get_number, must_open +from ..formats.bed import Bed +from ..formats.blast import BlastLine +from ..formats.gff import Gff, load +from ..graphics.base import ( + FancyArrow, + plt, + savefig, + panel_labels, + markup, + normalize_axes, + latex, +) +from ..graphics.glyph import CartoonRegion, RoundRect +from ..utils.cbook import SummaryStats, gene_name, percentage +from ..utils.grouper import Grouper + + +def main(): + + actions = ( + ("cartoon", "generate cartoon illustration of SynFind"), + ("ecoli", "gene presence absence analysis in ecoli"), + ("grass", "validate SynFind pan-grass set against James"), + ("coge", "prepare coge datasets"), + # For benchmarking + ("synfind", "prepare input for SynFind"), + ("iadhore", "prepare input for iADHoRe"), + ("mcscanx", "prepare input for MCScanX"), + ("cyntenator", "prepare input for Cyntenator"), + ("athalianatruth", "prepare truth pairs for At alpha/beta/gamma"), + ("yeasttruth", "prepare truth pairs for 14 yeasts"), + ("grasstruth", "prepare truth pairs for 4 grasses"), + ("benchmark", "compare SynFind, MCScanX, iADHoRe and OrthoFinder"), + ("venn", "display benchmark results as Venn diagram"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def grasstruth(args): + """ + %prog grasstruth james-pan-grass.txt + + Prepare truth pairs for 4 grasses. + """ + p = OptionParser(grasstruth.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (james,) = args + fp = open(james) + pairs = set() + for row in fp: + atoms = row.split() + genes = [] + idx = {} + for i, a in enumerate(atoms): + aa = a.split("||") + for ma in aa: + idx[ma] = i + genes.extend(aa) + genes = [x for x in genes if ":" not in x] + Os = [x for x in genes if x.startswith("Os")] + for o in Os: + for g in genes: + if idx[o] == idx[g]: + continue + pairs.add(tuple(sorted((o, g)))) + + for a, b in sorted(pairs): + print("\t".join((a, b))) + + +def synfind(args): + """ + %prog synfind all.last *.bed + + Prepare input for SynFind. + """ + p = OptionParser(synfind.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + lastfile = args[0] + bedfiles = args[1:] + fp = open(lastfile) + filteredlast = lastfile + ".filtered" + fw = open(filteredlast, "w") + for row in fp: + b = BlastLine(row) + if b.query == b.subject: + continue + print(b, file=fw) + fw.close() + logger.debug("Filtered LAST file written to `{0}`".format(filteredlast)) + + allbed = "all.bed" + fw = open(allbed, "w") + for i, bedfile in enumerate(bedfiles): + prefix = chr(ord("A") + i) + bed = Bed(bedfile) + for b in bed: + b.seqid = prefix + b.seqid + print(b, file=fw) + fw.close() + logger.debug("Bed file written to `{0}`".format(allbed)) + + +def yeasttruth(args): + """ + %prog yeasttruth Pillars.tab *.gff + + Prepare pairs data for 14 yeasts. + """ + p = OptionParser(yeasttruth.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + pillars = args[0] + gffiles = args[1:] + aliases = {} + pivot = {} + for gffile in gffiles: + is_pivot = op.basename(gffile).startswith("Saccharomyces_cerevisiae") + gff = Gff(gffile) + for g in gff: + if g.type != "gene": + continue + for a in g.attributes["Alias"]: + aliases[a] = g.accn + if is_pivot: + pivot[a] = g.accn + logger.debug("Aliases imported: {0}".format(len(aliases))) + logger.debug("Pivot imported: {0}".format(len(pivot))) + fw = open("yeast.aliases", "w") + for k, v in sorted(aliases.items()): + print("\t".join((k, v)), file=fw) + fw.close() + + fp = open(pillars) + pairs = set() + fw = must_open(opts.outfile, "w") + for row in fp: + atoms = [x for x in row.split() if x != "---"] + pps = [pivot[x] for x in atoms if x in pivot] + atoms = [aliases[x] for x in atoms if x in aliases] + for p in pps: + for a in atoms: + if p == a: + continue + pairs.add(tuple(sorted((p, a)))) + + for a, b in sorted(pairs): + print("\t".join((a, b)), file=fw) + fw.close() + + +def venn(args): + """ + %prog venn *.benchmark + + Display benchmark results as Venn diagram. + """ + from matplotlib_venn import venn2 + + p = OptionParser(venn.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="9x9") + + if len(args) < 1: + sys.exit(not p.print_help()) + + bcs = args + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + pad = 0.02 + ystart = 1 + ywidth = 1.0 / len(bcs) + tags = ("Bowers", "YGOB", "Schnable") + for bc, tag in zip(bcs, tags): + fp = open(bc) + data = [] + for row in fp: + prog, pcounts, tcounts, shared = row.split() + pcounts = int(pcounts) + tcounts = int(tcounts) + shared = int(shared) + data.append((prog, pcounts, tcounts, shared)) + xstart = 0 + xwidth = 1.0 / len(data) + for prog, pcounts, tcounts, shared in data: + a, b, c = pcounts - shared, tcounts - shared, shared + ax = fig.add_axes( + [ + xstart + pad, + ystart - ywidth + pad, + xwidth - 2 * pad, + ywidth - 2 * pad, + ] + ) + venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) + message = "Sn={0} Pu={1}".format( + percentage(shared, tcounts, precision=0, mode=-1), + percentage(shared, pcounts, precision=0, mode=-1), + ) + print(message, file=sys.stderr) + ax.text( + 0.5, + 0.92, + latex(message), + ha="center", + va="center", + transform=ax.transAxes, + color="b", + ) + ax.set_axis_off() + xstart += xwidth + ystart -= ywidth + + panel_labels( + root, + ((0.04, 0.96, "A"), (0.04, 0.96 - ywidth, "B"), (0.04, 0.96 - 2 * ywidth, "C")), + ) + panel_labels( + root, + ( + (0.5, 0.98, "A. thaliana duplicates"), + (0.5, 0.98 - ywidth, "14 Yeast genomes"), + (0.5, 0.98 - 2 * ywidth, "4 Grass genomes"), + ), + ) + normalize_axes(root) + savefig("venn.pdf", dpi=opts.dpi) + + +def coge(args): + """ + %prog coge *.gff + + Prepare coge datasets. + """ + p = OptionParser(coge.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + gffs = args + for gff in gffs: + atoms = op.basename(gff).split(".") + gid = atoms[-2] + assert gid.startswith("gid") + gid = get_number(gid) + genomefasta = "genome_{0}.faa.fasta".format(gid) + species = "_".join(atoms[0].split("_")[:2]) + cdsfasta = species + ".cds.fasta" + load( + [ + gff, + genomefasta, + "--id_attribute=Parent", + "--outfile={0}".format(cdsfasta), + ] + ) + + +def calc_sensitivity_specificity(a, truth, tag, fw): + common = a & truth + sensitivity = len(common) * 100.0 / len(truth) + specificity = len(common) * 100.0 / len(a) + logger.debug("{0}: {1} pairs".format(tag, len(a))) + logger.debug( + "{0}: Sensitivity={1:.1f}% Purity={2:.1f}%".format( + tag, sensitivity, specificity + ) + ) + print(tag, len(a), len(truth), len(common), file=fw) + + +def write_pairs(pairs, pairsfile): + fz = open(pairsfile, "w") + for a, b in pairs: + print("\t".join((a, b)), file=fz) + fz.close() + + +def benchmark(args): + """ + %prog benchmark at bedfile + + Compare SynFind, MCScanx, iADHoRe and OrthoFinder against the truth. + """ + p = OptionParser(benchmark.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + pf, bedfile = args + truth = pf + ".truth" + synfind = pf + ".synfind" + mcscanx = pf + ".mcscanx" + iadhore = pf + ".iadhore" + orthofinder = pf + ".orthofinder" + pivots = set([x.accn for x in Bed(bedfile)]) + + fp = open(truth) + truth = set() + for row in fp: + a, b = row.strip().split("\t")[:2] + pivots.add(a) + truth.add(tuple(sorted((a, b)))) + logger.debug("Truth: {0} pairs".format(len(truth))) + + fp = open(synfind) + benchmarkfile = pf + ".benchmark" + fw = must_open(benchmarkfile, "w") + synfind = set() + for row in fp: + atoms = row.strip().split("\t") + query, hit, tag = atoms[:3] + if tag != "S": + continue + synfind.add(tuple(sorted((query, hit)))) + calc_sensitivity_specificity(synfind, truth, "SynFind", fw) + + fp = open(mcscanx) + mcscanx = set() + for row in fp: + if row[0] == "#": + continue + atoms = row.strip().split(":")[1].split() + query, hit = atoms[:2] + mcscanx.add(tuple(sorted((query, hit)))) + calc_sensitivity_specificity(mcscanx, truth, "MCScanX", fw) + + fp = open(iadhore) + iadhore = set() + next(fp) + for row in fp: + atoms = row.strip().split("\t") + query, hit = atoms[3:5] + iadhore.add(tuple(sorted((query, hit)))) + calc_sensitivity_specificity(iadhore, truth, "iADHoRe", fw) + + fp = open(orthofinder) + orthofinder = set() + next(fp) + for row in fp: + row = row.replace('"', "") + atoms = row.replace(",", " ").split() + genes = [x.strip() for x in atoms if not x.startswith("OG")] + genes = [gene_name(x) for x in genes] + pps = [x for x in genes if x in pivots] + for p in pps: + for g in genes: + if p == g: + continue + orthofinder.add(tuple(sorted((p, g)))) + # write_pairs(orthofinder, "orthofinder.pairs") + calc_sensitivity_specificity(orthofinder, truth, "OrthoFinder", fw) + fw.close() + + +def write_lst(bedfile): + pf = op.basename(bedfile).split(".")[0] + mkdir(pf) + bed = Bed(bedfile) + stanza = [] + for seqid, bs in bed.sub_beds(): + fname = op.join(pf, "{0}.lst".format(seqid)) + fw = open(fname, "w") + for b in bs: + print("{0}{1}".format(b.accn.replace(" ", ""), b.strand), file=fw) + stanza.append((seqid, fname)) + fw.close() + return pf, stanza + + +def write_txt(bedfile): + pf = op.basename(bedfile).split(".")[0][:20] + txtfile = pf + ".txt" + fw = open(txtfile, "w") + print("#genome", file=fw) + bed = Bed(bedfile) + for b in bed: + print( + " ".join(str(x) for x in (b.accn, b.seqid, b.start, b.end, b.strand)), + file=fw, + ) + fw.close() + return txtfile + + +def cyntenator(args): + """ + %prog cyntenator athaliana.athaliana.last athaliana.bed + + Prepare input for Cyntenator. + """ + p = OptionParser(cyntenator.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + lastfile = args[0] + fp = open(lastfile) + filteredlastfile = lastfile + ".blast" + fw = open(filteredlastfile, "w") + for row in fp: + b = BlastLine(row) + if b.query == b.subject: + continue + print("\t".join((b.query, b.subject, str(b.score))), file=fw) + fw.close() + + bedfiles = args[1:] + fp = open(lastfile) + b = BlastLine(next(fp)) + subject = b.subject + txtfiles = [] + for bedfile in bedfiles: + order = Bed(bedfile).order + if subject in order: + db = op.basename(bedfile).split(".")[0][:20] + logger.debug("Found db: {0}".format(db)) + txtfile = write_txt(bedfile) + txtfiles.append(txtfile) + + db += ".txt" + mm = MakeManager() + for txtfile in txtfiles: + outfile = txtfile + ".alignment" + cmd = 'cyntenator -t "({0} {1})" -h blast {2} > {3}'.format( + txtfile, db, filteredlastfile, outfile + ) + mm.add((txtfile, db, filteredlastfile), outfile, cmd) + mm.write() + + +def iadhore(args): + """ + %prog iadhore athaliana.athaliana.last athaliana.bed + + Wrap around iADHoRe. + """ + p = OptionParser(iadhore.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + lastfile = args[0] + bedfiles = args[1:] + blast_table = "blast_table.txt" + fp = open(lastfile) + seen = set() + for row in fp: + c = BlastLine(row) + a, b = c.query, c.subject + a, b = gene_name(a), gene_name(b) + if a > b: + a, b = b, a + seen.add((a, b)) + + fw = open(blast_table, "w") + for a, b in seen: + print("\t".join((a, b)), file=fw) + fw.close() + logger.debug("A total of {0} pairs written to `{1}`".format(len(seen), blast_table)) + + fw = open("config.txt", "w") + for bedfile in bedfiles: + pf, stanza = write_lst(bedfile) + print("genome={0}".format(pf), file=fw) + for seqid, fname in stanza: + print(" ".join((seqid, fname)), file=fw) + print(file=fw) + + print("blast_table={0}".format(blast_table), file=fw) + print("cluster_type=colinear", file=fw) + print("tandem_gap=10", file=fw) + print("prob_cutoff=0.001", file=fw) + print("gap_size=20", file=fw) + print("cluster_gap=20", file=fw) + print("q_value=0.9", file=fw) + print("anchor_points=4", file=fw) + print("alignment_method=gg2", file=fw) + print("max_gaps_in_alignment=20", file=fw) + print("output_path=i-adhore_out", file=fw) + print("number_of_threads=4", file=fw) + fw.close() + + +def extract_groups(g, pairs, txtfile): + register = defaultdict(list) + fp = open(txtfile) + next(fp) + for row in fp: + if row[0] != ">": + continue + track, atg, myname, pairname = row.split() + pairname = pairname.rstrip("ab").upper() + register[pairname].append(atg.upper()) + + for pairname, genes in register.items(): + tag = pairname[0] + tag = {"A": "alpha", "B": "beta", "C": "gamma", "S": "others"}[tag] + pairs.add(tuple(sorted(genes) + [tag])) + g.join(*genes) + + +def athalianatruth(args): + """ + %prog athalianatruth J_a.txt J_bc.txt + + Prepare pairs data for At alpha/beta/gamma. + """ + p = OptionParser(athalianatruth.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + atxt, bctxt = args + g = Grouper() + pairs = set() + for txt in (atxt, bctxt): + extract_groups(g, pairs, txt) + + fw = open("pairs", "w") + for pair in sorted(pairs): + print("\t".join(pair), file=fw) + fw.close() + + fw = open("groups", "w") + for group in list(g): + print(",".join(group), file=fw) + fw.close() + + +def make_gff(bed, prefix, fw): + bed = Bed(bed) + nfeats = 0 + for b in bed: + seqid = prefix + b.seqid + print("\t".join(str(x) for x in (seqid, b.accn, b.start, b.end)), file=fw) + nfeats += 1 + logger.debug("A total of {0} features converted to `{1}`".format(nfeats, fw.name)) + + +def mcscanx(args): + """ + %prog mcscanx athaliana.athaliana.last athaliana.bed + + Wrap around MCScanX. + """ + p = OptionParser(mcscanx.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + blastfile = args[0] + bedfiles = args[1:] + prefix = "_".join(op.basename(x)[:2] for x in bedfiles) + symlink(blastfile, prefix + ".blast") + allbedfile = prefix + ".gff" + fw = open(allbedfile, "w") + for i, bedfile in enumerate(bedfiles): + prefix = chr(ord("A") + i) + make_gff(bedfile, prefix, fw) + fw.close() + + +def grass(args): + """ + %prog grass coge_master_table.txt james.txt + + Validate SynFind pan-grass set against James. This set can be generated: + + https://genomevolution.org/r/fhak + """ + p = OptionParser(grass.__doc__) + p.set_verbose() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + master, james = args + + fp = open(master) + next(fp) + master_store = defaultdict(set) + for row in fp: + atoms = row.split() + s = set() + for x in atoms[1:6]: + m = x.split(",") + s |= set(m) + if "-" in s: + s.remove("-") + + a = atoms[1] + master_store[a] |= set(s) + + fp = open(james) + next(fp) + james_store = {} + tandems = set() + for row in fp: + atoms = row.split() + s = set() + Os = set() + for x in atoms[:-1]: + m = x.split("||") + if m[0].startswith("Os"): + Os |= set(m) + if m[0].startswith("http"): + continue + if m[0].startswith("chr"): + m = ["proxy"] + if "||" in x: + tandems |= set(m) + s |= set(m) + + for x in Os: + james_store[x] = s + + jaccards = [] + corr_jaccards = [] + perfect_matches = 0 + corr_perfect_matches = 0 + for k, v in james_store.items(): + if k not in master_store: + continue + m = master_store[k] + jaccard = len(v & m) * 100 / len(v | m) + jaccards.append(jaccard) + diff = (v ^ m) - tandems + corr_jaccard = 100 - len(diff) * 100 / len(v | m) + corr_jaccards.append(corr_jaccard) + if opts.verbose: + print(k) + print(v) + print(m) + print(diff) + print(jaccard) + if jaccard > 99: + perfect_matches += 1 + if corr_jaccard > 99: + corr_perfect_matches += 1 + + logger.debug("Perfect matches: {0}".format(perfect_matches)) + logger.debug("Perfect matches (corrected): {0}".format(corr_perfect_matches)) + print("Jaccards:", SummaryStats(jaccards)) + print("Corrected Jaccards:", SummaryStats(corr_jaccards)) + + +def ecoli(args): + """ + %prog ecoli coge_master_table.txt query.bed + + Perform gene presence / absence analysis in Ecoli master spreadsheet. Ecoli + spresheets can be downloaded below: + + Ecoli K12 MG1655 (K) as query + Regenerate this analysis: https://genomevolution.org/r/fggo + + Ecoli O157:H7 EDL933 (O) as query + Regenerate this analysis: https://genomevolution.org/r/fgt7 + + Shigella flexneri 2a 301 (S) as query + Regenerate this analysis: https://genomevolution.org/r/fgte + + Perform a similar analysis as in: + Jin et al. (2002) Genome sequence of Shigella flexneri 2a: insights + into pathogenicity through comparison with genomes of Escherichia + coli K12 and O157. Nucleic Acid Research. + """ + p = OptionParser(ecoli.__doc__) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + master, querybed = args + fp = open(master) + header = next(fp) + assert header[0] == "#" + qorg = header.strip().split("\t")[1] + qorg = qorg.split(":")[-1].strip() + + store = {} + MISSING = ("proxy", "-") + for row in fp: + a, b, c = row.strip().split("\t")[1:4] + store[a] = b in MISSING and c in MISSING + + bed = Bed(querybed) + tags = [] + for i, b in enumerate(bed): + accn = b.accn + if accn not in store: + logger.warning("missing {0}".format(accn)) + continue + tags.append((store[accn], accn)) + + large = 4 # large segments + II = [] + II_large = [] + for missing, aa in groupby(tags, key=lambda x: x[0]): + aa = list(aa) + if not missing: + continue + glist = list(a for missing, a in aa) + II.append(glist) + size = len(glist) + if size >= large: + II_large.append(glist) + + fw = must_open(opts.outfile, "w") + for a, t in zip((II, II_large), ("", ">=4 ")): + nmissing = sum(len(x) for x in a) + logger.debug( + "A total of {0} {1}-specific {2}islands found with {3} genes.".format( + len(a), qorg, t, nmissing + ) + ) + + for x in II: + print(len(x), ",".join(x), file=fw) + + +def plot_diagram(ax, x, y, A, B, tag, label): + ax.text(x, y + 0.14, "{0}: {1}".format(tag, label), ha="center") + strip = tag != "G" + A.draw(ax, x, y + 0.06, gene_len=0.02, strip=strip) + B.draw(ax, x, y, gene_len=0.02, strip=strip) + + +def cartoon(args): + """ + %prog synteny.py + + Generate cartoon illustration of SynFind. + """ + p = OptionParser(cartoon.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="10x7") + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + # Panel A + A = CartoonRegion(41) + A.draw(root, 0.35, 0.85, strip=False, color=False) + x1, x2 = A.x1, A.x2 + lsg = "lightslategray" + pad = 0.01 + xc, yc = 0.35, 0.88 + arrowlen = x2 - xc - pad + arrowprops = dict( + length_includes_head=True, + width=0.01, + fc=lsg, + lw=0, + head_length=arrowlen * 0.15, + head_width=0.03, + ) + p = FancyArrow(xc - pad, yc, -arrowlen, 0, shape="left", **arrowprops) + root.add_patch(p) + p = FancyArrow(xc + pad, yc, arrowlen, 0, shape="right", **arrowprops) + root.add_patch(p) + + yt = yc + 4 * pad + root.text((x1 + xc) / 2, yt, "20 genes upstream", ha="center") + root.text((x2 + xc) / 2, yt, "20 genes downstream", ha="center") + root.plot((xc,), (yc,), "o", mfc="w", mec=lsg, mew=2, lw=2, color=lsg) + root.text(xc, yt, "Query gene", ha="center") + + # Panel B + A.draw(root, 0.35, 0.7, strip=False) + + RoundRect(root, (0.07, 0.49), 0.56, 0.14, fc="y", alpha=0.2) + a = deepcopy(A) + a.evolve(mode="S", target=10) + a.draw(root, 0.35, 0.6) + b = deepcopy(A) + b.evolve(mode="F", target=8) + b.draw(root, 0.35, 0.56) + c = deepcopy(A) + c.evolve(mode="G", target=6) + c.draw(root, 0.35, 0.52) + + for x in (a, b, c): + root.text(0.64, x.y, "Score={0}".format(x.nonwhites), va="center") + + # Panel C + A.truncate_between_flankers() + a.truncate_between_flankers() + b.truncate_between_flankers() + c.truncate_between_flankers(target=6) + + plot_diagram(root, 0.14, 0.2, A, a, "S", "syntenic") + plot_diagram(root, 0.37, 0.2, A, b, "F", "missing, with both flankers") + plot_diagram(root, 0.6, 0.2, A, c, "G", "missing, with one flanker") + + labels = ((0.04, 0.95, "A"), (0.04, 0.75, "B"), (0.04, 0.4, "C")) + panel_labels(root, labels) + + # Descriptions + xt = 0.85 + desc = ( + "Extract neighborhood", + "of *window* size", + "Count gene pairs within *window*", + "Find regions above *score* cutoff", + "Identify flankers", + "Annotate syntelog class", + ) + for yt, t in zip((0.88, 0.84, 0.64, 0.6, 0.3, 0.26), desc): + root.text(xt, yt, markup(t), ha="center", va="center") + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "cartoon" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/tgbs.py b/jcvi/projects/tgbs.py new file mode 100644 index 00000000..51f64817 --- /dev/null +++ b/jcvi/projects/tgbs.py @@ -0,0 +1,696 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Reference-free tGBS related functions. +""" + +import os +import os.path as op +import sys + +from collections import Counter +from pickle import dump, load + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + iglob, + logger, + mkdir, + need_update, + sh, +) +from ..apps.cdhit import deduplicate +from ..apps.gmap import check_index +from ..apps.grid import MakeManager +from ..formats.base import must_open, write_file +from ..formats.fasta import Fasta, SeqIO +from ..formats.fastq import iter_fastq +from ..formats.sam import get_prefix +from ..graphics.base import normalize_axes, plt, savefig + + +speedupsh = r""" +cd {0} + +find *.native | sed 's/\..*//' | sort -u | \ + awk '{{ printf("split_by_chromosome.pl -s %s -o splitted_%s -native %s.*.native -x 5\n", \ + $0, $0, $0); }}' > split.sh +parallel -j {1} < split.sh + +find splitted_* -name "*.native" | \ + awk '{{ printf("SNP_Discovery-short.pl -native %s -o %s.SNPs_Het.txt -a 2 -ac 0.3 -c 0.8\n", \ + $0, $0); }}' > snps.sh +parallel -j {1} < snps.sh + +find splitted_*.log | \ + awk '{{ gsub("splitted_|.log", "", $0); \ + printf("combine_snps_single_file.pl -d splitted_%s -p \"*.txt\" -o %s.SNPs_Het.txt\n", \ + $0, $0); }}' > combine.sh +parallel -j {1} < combine.sh + +cd .. +""" + + +def main(): + + actions = ( + ("snpflow", "run SNP calling pipeline from reads to allele_counts"), + ("count", "count the number of reads in all clusters"), + ("snpplot", "illustrate the SNP sites in CDT"), + ("weblogo", "extract base composition for reads"), + ("novo", "reference-free tGBS pipeline v1"), + ("novo2", "reference-free tGBS pipeline v2"), + ("mstmap", "convert LMDs to MSTMAP input"), + ("query", "random access to loci file"), + ("synteny", "plot mst map against reference genome"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def build_index(locifile): + idxfile = locifile + ".idx" + if need_update(locifile, idxfile): + fp = open(locifile) + fw = open(idxfile, "w") + idx = {} + while True: + pos = fp.tell() + line = fp.readline() + if not line: + break + if not line.startswith("//"): + continue + tag, contig = line.split()[:2] + idx[contig] = pos + dump(idx, fw) + fw.close() + return idx + + idx = load(open(idxfile)) + return idx + + +def query(args): + """ + %prog query out.loci contig + + Random access to loci file. This script helps speeding up debugging. + """ + p = OptionParser(query.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + locifile, contig = args + idx = build_index(locifile) + pos = idx[contig] + logger.debug("Contig {0} found at pos {1}".format(contig, pos)) + fp = open(locifile) + fp.seek(pos) + section = [] + while True: + row = fp.readline() + if row.startswith("//") and row.split()[1] != contig: + break + section.append(row) + print("".join(section)) + + +def synteny(args): + """ + %prog synteny mstmap.out novo.final.fasta reference.fasta + + Plot MSTmap against reference genome. + """ + from jcvi.assembly.geneticmap import bed as geneticmap_bed + from jcvi.apps.align import blat + from jcvi.formats.blast import bed as blast_bed, best + + p = OptionParser(synteny.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + mstmapout, novo, ref = args + pf = mstmapout.split(".")[0] + rf = ref.split(".")[0] + mstmapbed = geneticmap_bed([mstmapout]) + cmd = "cut -d. -f1 {0}".format(mstmapbed) + tmpbed = mstmapbed + ".tmp" + sh(cmd, outfile=tmpbed) + os.rename(tmpbed, pf + ".bed") + + cmd = "cut -f4 {0} | cut -d. -f1 | sort -u".format(mstmapbed) + idsfile = pf + ".ids" + sh(cmd, outfile=idsfile) + fastafile = pf + ".fasta" + cmd = "faSomeRecords {0} {1} {2}".format(novo, idsfile, fastafile) + sh(cmd) + blastfile = blat([ref, fastafile]) + bestblastfile = best([blastfile]) + blastbed = blast_bed([bestblastfile]) + os.rename(blastbed, rf + ".bed") + + anchorsfile = "{0}.{1}.anchors".format(pf, rf) + cmd = "paste {0} {0}".format(idsfile) + sh(cmd, outfile=anchorsfile) + + +def mstmap(args): + """ + %prog mstmap LMD50.snps.genotype.txt + + Convert LMDs to MSTMAP input. + """ + from jcvi.assembly.geneticmap import MSTMatrix + + p = OptionParser(mstmap.__doc__) + p.add_argument( + "--population_type", + default="RIL6", + help="Type of population, possible values are DH and RILd", + ) + p.add_argument( + "--missing_threshold", + default=0.5, + help="Missing threshold, .25 excludes any marker with >25% missing", + ) + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (lmd,) = args + fp = open(lmd) + next(fp) # Header + table = {"0": "-", "1": "A", "2": "B", "3": "X"} + mh = ["locus_name"] + next(fp).split()[4:] + genotypes = [] + for row in fp: + atoms = row.split() + chr, pos, ref, alt = atoms[:4] + locus_name = ".".join((chr, pos)) + codes = [table[x] for x in atoms[4:]] + genotypes.append([locus_name] + codes) + + mm = MSTMatrix(genotypes, mh, opts.population_type, opts.missing_threshold) + mm.write(opts.outfile, header=True) + + +def weblogo(args): + """ + %prog weblogo [fastafile|fastqfile] + + Extract base composition for reads + """ + import numpy as np + from rich.progress import Progress + + p = OptionParser(weblogo.__doc__) + p.add_argument("-N", default=10, type=int, help="Count the first and last N bases") + p.add_argument("--nreads", default=1000000, type=int, help="Parse first N reads") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastqfile,) = args + N = opts.N + nreads = opts.nreads + + pat = "ATCG" + L = np.zeros((4, N), dtype="int32") + R = np.zeros((4, N), dtype="int32") + p = dict((a, i) for (i, a) in enumerate(pat)) + L4, R3 = Counter(), Counter() + + k = 0 + fw_L = open("L.fasta", "w") + fw_R = open("R.fasta", "w") + fastq = fastqfile.endswith(".fastq") + it = iter_fastq(fastqfile) if fastq else SeqIO.parse(must_open(fastqfile), "fasta") + + with Progress() as progress: + progress.add_task("[green] Processing ...", start=False, total=nreads) + for rec in it: + k += 1 + if k > nreads: + break + if rec is None: + break + s = str(rec.seq) + for i, a in enumerate(s[:N]): + if a in p: + a = p[a] + L[a][i] += 1 + for j, a in enumerate(s[-N:][::-1]): + if a in p: + a = p[a] + R[a][N - 1 - j] += 1 + l4, r3 = s[:4], s[-3:] + L4[l4] += 1 + R3[r3] += 1 + print(">{0}\n{1}".format(k, s[:N]), file=fw_L) + print(">{0}\n{1}".format(k, s[-N:]), file=fw_R) + + fw_L.close() + fw_R.close() + + cmd = "weblogo -F png -s large -f {0}.fasta -o {0}.png" + cmd += " --color-scheme classic --composition none -U probability" + cmd += " --title {1}" + sh(cmd.format("L", "First_10_bases")) + sh(cmd.format("R", "Last_10_bases")) + + np.savetxt("L.{0}.csv".format(pat), L, delimiter=",", fmt="%d") + np.savetxt("R.{0}.csv".format(pat), R, delimiter=",", fmt="%d") + + fw = open("L4.common", "w") + for p, c in L4.most_common(N): + print("\t".join((p, str(c))), file=fw) + fw.close() + + fw = open("R3.common", "w") + for p, c in R3.most_common(N): + print("\t".join((p, str(c))), file=fw) + fw.close() + + +def count(args): + """ + %prog count cdhit.consensus.fasta + + Scan the headers for the consensus clusters and count the number of reads. + """ + from jcvi.graphics.histogram import stem_leaf_plot + from jcvi.utils.cbook import SummaryStats + + p = OptionParser(count.__doc__) + p.add_argument("--csv", help="Write depth per contig to file") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastafile,) = args + csv = open(opts.csv, "w") if opts.csv else None + + f = Fasta(fastafile, lazy=True) + sizes = [] + for desc, rec in f.iterdescriptions_ordered(): + if desc.startswith("singleton"): + sizes.append(1) + continue + + # consensus_for_cluster_0 with 63 sequences + if "with" in desc: + name, w, size, seqs = desc.split() + if csv: + print("\t".join(str(x) for x in (name, size, len(rec))), file=csv) + assert w == "with" + sizes.append(int(size)) + # MRD85:00603:02472;size=167; + else: + name, size, tail = desc.split(";") + sizes.append(int(size.replace("size=", ""))) + + if csv: + csv.close() + logger.debug("File written to `%s`.", opts.csv) + + s = SummaryStats(sizes) + print(s, file=sys.stderr) + stem_leaf_plot(s.data, 0, 100, 20, title="Cluster size") + + +def novo(args): + """ + %prog novo reads.fastq + + Reference-free tGBS pipeline v1. + """ + from jcvi.assembly.kmer import jellyfish, histogram + from jcvi.assembly.preprocess import diginorm + from jcvi.formats.fasta import filter as fasta_filter, format + from jcvi.apps.cdhit import filter as cdhit_filter + + p = OptionParser(novo.__doc__) + p.add_argument( + "--technology", + choices=("illumina", "454", "iontorrent"), + default="iontorrent", + help="Sequencing platform", + ) + p.set_depth(depth=50) + p.set_align(pctid=96) + p.set_home("cdhit", default="/usr/local/bin/") + p.set_home("fiona", default="/usr/local/bin/") + p.set_home("jellyfish", default="/usr/local/bin/") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (fastqfile,) = args + cpus = opts.cpus + depth = opts.depth + pf, sf = fastqfile.rsplit(".", 1) + + diginormfile = pf + ".diginorm." + sf + if need_update(fastqfile, diginormfile): + diginorm([fastqfile, "--single", "--depth={0}".format(depth)]) + keepabund = fastqfile + ".keep.abundfilt" + sh("cp -s {0} {1}".format(keepabund, diginormfile)) + + jf = pf + "-K23.histogram" + if need_update(diginormfile, jf): + jellyfish( + [ + diginormfile, + "--prefix={0}".format(pf), + "--cpus={0}".format(cpus), + "--jellyfish_home={0}".format(opts.jellyfish_home), + ] + ) + + genomesize = histogram([jf, pf, "23"]) + fiona = pf + ".fiona.fa" + if need_update(diginormfile, fiona): + cmd = op.join(opts.fiona_home, "fiona") + cmd += " -g {0} -nt {1} --sequencing-technology {2}".format( + genomesize, cpus, opts.technology + ) + cmd += " -vv {0} {1}".format(diginormfile, fiona) + logfile = pf + ".fiona.log" + sh(cmd, outfile=logfile, errfile=logfile) + + dedup = "cdhit" + pctid = opts.pctid + cons = fiona + ".P{0}.{1}.consensus.fasta".format(pctid, dedup) + if need_update(fiona, cons): + deduplicate( + [ + fiona, + "--consensus", + "--reads", + "--pctid={0}".format(pctid), + "--cdhit_home={0}".format(opts.cdhit_home), + ] + ) + + filteredfile = pf + ".filtered.fasta" + if need_update(cons, filteredfile): + covfile = pf + ".cov.fasta" + cdhit_filter( + [cons, "--outfile={0}".format(covfile), "--minsize={0}".format(depth / 5)] + ) + fasta_filter([covfile, "50", "--outfile={0}".format(filteredfile)]) + + finalfile = pf + ".final.fasta" + if need_update(filteredfile, finalfile): + format( + [ + filteredfile, + finalfile, + "--sequential=replace", + "--prefix={0}_".format(pf), + ] + ) + + +def scan_read_files(trimmed, patterns): + reads = iglob(trimmed, patterns) + samples = sorted(set(op.basename(x).split(".")[0] for x in reads)) + logger.debug( + "Total {0} read files from {1} samples".format(len(reads), len(samples)) + ) + return reads, samples + + +def novo2(args): + """ + %prog novo2 trimmed projectname + + Reference-free tGBS pipeline v2. + """ + p = OptionParser(novo2.__doc__) + p.set_fastq_names() + p.set_align(pctid=95) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + trimmed, pf = args + pctid = opts.pctid + reads, samples = scan_read_files(trimmed, opts.names) + + # Set up directory structure + clustdir = "uclust" + acdir = "allele_counts" + for d in (clustdir, acdir): + mkdir(d) + + mm = MakeManager() + clustfiles = [] + # Step 0 - clustering within sample + for s in samples: + flist = [x for x in reads if op.basename(x).split(".")[0] == s] + outfile = s + ".P{0}.clustS".format(pctid) + outfile = op.join(clustdir, outfile) + cmd = "python -m jcvi.apps.uclust cluster --cpus=8" + cmd += " {0} {1}".format(s, " ".join(flist)) + cmd += " --outdir={0}".format(clustdir) + cmd += " --pctid={0}".format(pctid) + mm.add(flist, outfile, cmd) + clustfiles.append(outfile) + + # Step 1 - make consensus within sample + allcons = [] + for s, clustfile in zip(samples, clustfiles): + outfile = s + ".P{0}.consensus".format(pctid) + outfile = op.join(clustdir, outfile) + cmd = "python -m jcvi.apps.uclust consensus" + cmd += " {0}".format(clustfile) + mm.add(clustfile, outfile, cmd) + allcons.append(outfile) + + # Step 2 - clustering across samples + clustSfile = pf + ".P{0}.clustS".format(pctid) + cmd = "python -m jcvi.apps.uclust mcluster {0}".format(" ".join(allcons)) + cmd += " --prefix={0}".format(pf) + mm.add(allcons, clustSfile, cmd) + + # Step 3 - make consensus across samples + locifile = pf + ".P{0}.loci".format(pctid) + cmd = "python -m jcvi.apps.uclust mconsensus {0}".format(" ".join(allcons)) + cmd += " --prefix={0}".format(pf) + mm.add(allcons + [clustSfile], locifile, cmd) + + mm.write() + + +def snpflow(args): + """ + %prog snpflow trimmed reference.fasta + + Run SNP calling pipeline until allele_counts are generated. This includes + generation of native files, SNP_Het file. Speedup for fragmented genomes + are also supported. + """ + p = OptionParser(snpflow.__doc__) + p.set_fastq_names() + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + trimmed, ref = args + nseqs = len(Fasta(ref)) + supercat = nseqs >= 1000 + if supercat: + logger.debug("Total seqs in ref: {0} (supercat={1})".format(nseqs, supercat)) + + reads, samples = scan_read_files(trimmed, opts.names) + + # Set up directory structure + nativedir, countsdir = "native", "allele_counts" + for d in (nativedir, countsdir): + mkdir(d) + + mm = MakeManager() + # Step 0 - index database + db = op.join(*check_index(ref, supercat=supercat, go=False)) + cmd = "python -m jcvi.apps.gmap index {0}".format(ref) + if supercat: + cmd += " --supercat" + coordsfile = db + ".coords" + supercatfile = ref.rsplit(".", 1)[0] + ".supercat.fasta" + mm.add(ref, (db, coordsfile), cmd) + else: + mm.add(ref, db, cmd) + + # Step 1 - GSNAP alignment and conversion to native file + allnatives = [] + allsamstats = [] + gmapdb = supercatfile if supercat else ref + for f in reads: + prefix = get_prefix(f, ref) + gsnapfile = op.join(nativedir, prefix + ".gsnap") + nativefile = op.join(nativedir, prefix + ".unique.native") + samstatsfile = op.join(nativedir, prefix + ".unique.sam.stats") + cmd = "python -m jcvi.apps.gmap align {0} {1}".format(gmapdb, f) + cmd += " --outdir={0} --native --cpus=1".format(nativedir) + mm.add((f, db), nativefile, cmd) + + cmd = "python -m jcvi.apps.gmap bam {0} {1} --cpus=1".format(gsnapfile, gmapdb) + mm.add(nativefile, samstatsfile, cmd) + allnatives.append(nativefile) + allsamstats.append(samstatsfile) + + # Step 2 - call SNP discovery + if supercat: + nativeconverted = nativedir + "-converted" + mkdir(nativeconverted) + allnativesc = [op.join(nativeconverted, op.basename(x)) for x in allnatives] + cmd = "tGBS-Convert_Pseudo_Genome_NATIVE_Coordinates.pl" + cmd += " -i {0}/*.native -o {1}".format(nativedir, nativeconverted) + cmd += " -c {0}".format(coordsfile) + cmds = ["rm -rf {0}".format(nativeconverted), cmd] + mm.add(allnatives + [coordsfile], allnativesc, cmds) + + runfile = "speedup.sh" + write_file(runfile, speedupsh.format(nativeconverted, opts.cpus)) + nativedir = nativeconverted + allsnps = [op.join(nativedir, "{0}.SNPs_Het.txt".format(x)) for x in samples] + mm.add(allnativesc, allsnps, "./{0}".format(runfile)) + else: + for s in samples: + snpfile = op.join(nativedir, "{0}.SNPs_Het.txt".format(s)) + cmd = "SNP_Discovery-short.pl" + cmd += " -native {0}/{1}.*unique.native".format(nativedir, s) + cmd += " -o {0} -a 2 -ac 0.3 -c 0.8".format(snpfile) + flist = [x for x in allnatives if op.basename(x).split(".")[0] == s] + mm.add(flist, snpfile, cmd) + + # Step 3 - generate equal file + allsnps = [op.join(nativedir, "{0}.SNPs_Het.txt".format(x)) for x in samples] + for s in samples: + equalfile = op.join(nativedir, "{0}.equal".format(s)) + cmd = "extract_reference_alleles.pl" + cmd += " --native {0}/{1}.*unique.native".format(nativedir, s) + cmd += " --genotype {0}/{1}.SNPs_Het.txt".format(nativedir, s) + cmd += " --allgenotypes {0}/*.SNPs_Het.txt".format(nativedir) + cmd += " --fasta {0} --output {1}".format(ref, equalfile) + mm.add(allsnps, equalfile, cmd) + + # Step 4 - generate snp matrix + allequals = [op.join(nativedir, "{0}.equal".format(x)) for x in samples] + matrix = "snps.matrix.txt" + cmd = "generate_matrix.pl" + cmd += " --tables {0}/*SNPs_Het.txt --equal {0}/*equal".format(nativedir) + cmd += " --fasta {0} --output {1}".format(ref, matrix) + mm.add(allsnps + allequals, matrix, cmd) + + # Step 5 - generate allele counts + allcounts = [] + for s in samples: + allele_counts = op.join(countsdir, "{0}.SNPs_Het.allele_counts".format(s)) + cmd = "count_reads_per_allele.pl -m snps.matrix.txt" + cmd += " -s {0} --native {1}/{0}.*unique.native".format(s, nativedir) + cmd += " -o {0}".format(allele_counts) + mm.add(matrix, allele_counts, cmd) + allcounts.append(allele_counts) + + # Step 6 - generate raw snps + rawsnps = "Genotyping.H3.txt" + cmd = "/home/shared/scripts/delin/SamplesGenotyping.pl --homo 3" + cmd += " -pf allele_counts -f {0} --outfile {1}".format(countsdir, rawsnps) + cmds = ["rm -f {0}".format(rawsnps), cmd] + mm.add(allcounts, rawsnps, cmds) + + # Step 7 - generate alignment report + sam_summary = "sam.summary" + cmd = "/home/shared/scripts/eddyyeh/alignment_stats.pl" + cmd += " -f {0} -o {1}".format(" ".join(allsamstats), sam_summary) + mm.add(allsamstats, sam_summary, cmd) + + native_summary = "native.summary" + cmd = "/home/shared/scripts/eddyyeh/alignment_stats.pl" + cmd += " -n {0} -o {1}".format(" ".join(allnatives), native_summary) + mm.add(allnatives, native_summary, cmd) + + mm.write() + + +def snpplot(args): + """ + %prog counts.cdt + + Illustrate the histogram per SNP site. + """ + p = OptionParser(snpplot.__doc__) + opts, args, iopts = p.set_image_options(args, format="png") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (datafile,) = args + # Read in CDT file + fp = open(datafile) + next(fp) + next(fp) + data = [] + for row in fp: + atoms = row.split()[4:] + nval = len(atoms) + values = [float(x) for x in atoms] + # normalize + values = [x * 1.0 / sum(values) for x in values] + data.append(values) + + pf = datafile.rsplit(".", 1)[0] + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + xmin, xmax = 0.1, 0.9 + ymin, ymax = 0.1, 0.9 + yinterval = (ymax - ymin) / len(data) + colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg") + ystart = ymax + for d in data: + xstart = xmin + for dd, c in zip(d, colors): + xend = xstart + (xmax - xmin) * dd + root.plot((xstart, xend), (ystart, ystart), "-", color=c) + xstart = xend + ystart -= yinterval + + root.text( + 0.05, + 0.5, + "{0} LMD50 SNPs".format(len(data)), + ha="center", + va="center", + rotation=90, + color="lightslategray", + ) + + for x, t, c in zip((0.3, 0.5, 0.7), ("REF", "ALT", "HET"), "rbg"): + root.text(x, 0.95, t, color=c, ha="center", va="center") + normalize_axes(root) + + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +if __name__ == "__main__": + main() diff --git a/jcvi/projects/vanilla.py b/jcvi/projects/vanilla.py new file mode 100644 index 00000000..afbc38a0 --- /dev/null +++ b/jcvi/projects/vanilla.py @@ -0,0 +1,450 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Plotting scripts for the vanilla genome paper. +""" +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..compara.base import AnchorFile +from ..compara.synteny import check_beds +from ..formats.base import get_number +from ..formats.bed import Bed +from ..graphics.base import normalize_axes, panel_labels, plt, savefig +from ..graphics.chromosome import draw_chromosomes +from ..graphics.glyph import TextCircle +from ..graphics.synteny import Synteny, draw_gene_legend +from ..graphics.tree import LeafInfoFile, WGDInfoFile, draw_tree, parse_tree + + +def main(): + actions = ( + # Chromosome painting since WGD + ("ancestral", "paint 14 chromosomes following alpha WGD (requires data)"), + # main figures in text + ("ploidy", "plot vanilla synteny (requires data)"), + # Composite phylogeny - tree and ks + ("phylogeny", "create a composite figure with tree and ks"), + ("tree", "create a separate figure with tree"), + ("ks", "create a separate figure with ks"), + # Composite synteny - wgd and microsynteny + ("synteny", "create a composite figure with wgd and microsynteny"), + ("wgd", "create separate figures with wgd"), + ("microsynteny", "create separate figures with microsynteny"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def phylogeny(args): + """ + %prog phylogeny treefile ks.layout + + Create a composite figure with (A) tree and (B) ks. + """ + from ..compara.ks import Layout, KsPlot, KsFile + + p = OptionParser(phylogeny.__doc__) + _, args, iopts = p.set_image_options(args, figsize="10x12") + + (datafile, layoutfile) = args + + logger.debug("Load tree file `%s`", datafile) + t, hpd = parse_tree(datafile) + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes((0, 0, 1, 1)) + ax1 = fig.add_axes((0, 0.4, 1, 0.6)) + ax2 = fig.add_axes((0.12, 0.065, 0.8, 0.3)) + + margin, rmargin = 0.1, 0.2 # Left and right margin + leafinfo = LeafInfoFile("leafinfo.csv").cache + wgdinfo = WGDInfoFile("wgdinfo.csv").cache + outgroup = "ginkgo" + + # Panel A + draw_tree( + ax1, + t, + hpd=hpd, + margin=margin, + rmargin=rmargin, + supportcolor=None, + internal=False, + outgroup=outgroup, + reroot=False, + leafinfo=leafinfo, + wgdinfo=wgdinfo, + geoscale=True, + ) + # Panel B + ks_min = 0.0 + ks_max = 3.0 + bins = 60 + fill = False + layout = Layout(layoutfile) + print(layout, file=sys.stderr) + + kp = KsPlot(ax2, ks_max, bins, legendp="upper right") + for lo in layout: + data = KsFile(lo.ksfile) + data = [x.ng_ks for x in data] + data = [x for x in data if ks_min <= x <= ks_max] + kp.add_data( + data, + lo.components, + label=lo.label, + color=lo.color, + marker=lo.marker, + fill=fill, + fitted=False, + kde=True, + ) + + kp.draw(filename=None) + + normalize_axes(root, ax1) + labels = ((0.05, 0.95, "A"), (0.05, 0.4, "B")) + panel_labels(root, labels) + + image_name = "phylogeny.pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def tree(args): + """ + %prog tree treefile + + Create a tree figure. + """ + p = OptionParser(tree.__doc__) + _, args, iopts = p.set_image_options(args, figsize="10x8") + + (datafile,) = args + logger.debug("Load tree file `%s`", datafile) + t, hpd = parse_tree(datafile) + + fig = plt.figure(1, (iopts.w, iopts.h)) + ax1 = fig.add_axes((0, 0, 1, 1)) + + margin, rmargin = 0.1, 0.2 # Left and right margin + leafinfo = LeafInfoFile("leafinfo.csv").cache + wgdinfo = WGDInfoFile("wgdinfo.csv").cache + outgroup = "ginkgo" + + # Panel A + draw_tree( + ax1, + t, + hpd=hpd, + margin=margin, + rmargin=rmargin, + supportcolor=None, + internal=False, + outgroup=outgroup, + reroot=False, + leafinfo=leafinfo, + wgdinfo=wgdinfo, + geoscale=True, + ) + + normalize_axes(ax1) + image_name = "tree.pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def ks(args): + """ + %prog ks ks.layout + + Create a ks figure. + """ + from ..compara.ks import Layout, KsPlot, KsFile + + p = OptionParser(ks.__doc__) + _, args, iopts = p.set_image_options(args, figsize="10x4") + + (layoutfile,) = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + ax2 = fig.add_axes((0.12, 0.12, 0.8, 0.8)) + + # Panel B + ks_min = 0.0 + ks_max = 3.0 + bins = 60 + fill = False + layout = Layout(layoutfile) + print(layout, file=sys.stderr) + + kp = KsPlot(ax2, ks_max, bins, legendp="upper right") + for lo in layout: + data = KsFile(lo.ksfile) + data = [x.ng_ks for x in data] + data = [x for x in data if ks_min <= x <= ks_max] + kp.add_data( + data, + lo.components, + label=lo.label, + color=lo.color, + marker=lo.marker, + fill=fill, + fitted=False, + kde=True, + ) + + kp.draw(filename=None) + + image_name = "ks.pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def synteny(args): + """ + %prog synteny vplanifoliaA_blocks.bed vplanifoliaA.sizes \ + b1.blocks all.bed b1.layout + + Create a composite figure with (A) wgd and (B) microsynteny. + """ + p = OptionParser(synteny.__doc__) + _, args, iopts = p.set_image_options(args, figsize="12x12") + + (bedfile, sizesfile, blocksfile, allbedfile, blockslayout) = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + ax1 = fig.add_axes([0, 0.5, 1, 0.5]) + ax2 = fig.add_axes([0.02, 0, 0.98, 0.5]) + + # Panel A + title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$" + draw_chromosomes( + ax1, + bedfile, + sizes=sizesfile, + iopts=iopts, + mergedist=200000, + winsize=50000, + imagemap=False, + gauge=True, + legend=False, + title=title, + ) + + # Panel B + draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout) + + normalize_axes(root, ax1, ax2) + labels = ((0.05, 0.95, "A"), (0.05, 0.5, "B")) + panel_labels(root, labels) + + image_name = "synteny.pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def wgd(args): + """ + %prog wgd vplanifoliaA_blocks.bed vplanifoliaA.sizes + + Create a wgd figure. + """ + p = OptionParser(synteny.__doc__) + _, args, iopts = p.set_image_options(args, figsize="8x5") + + (bedfile, sizesfile) = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + ax1 = fig.add_axes((0, 0, 1, 1)) + + title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$" + draw_chromosomes( + ax1, + bedfile, + sizes=sizesfile, + iopts=iopts, + mergedist=200000, + winsize=50000, + imagemap=False, + gauge=True, + legend=False, + title=title, + ) + + normalize_axes(ax1) + + image_name = "wgd.pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def microsynteny(args): + """ + %prog microsynteny b1.blocks all.bed b1.layout + + Create a microsynteny figure. + """ + p = OptionParser(synteny.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="12x6") + + (blocksfile, allbedfile, blockslayout) = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + ax2 = fig.add_axes((0, 0, 1, 1)) + + draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout) + + normalize_axes(ax2) + + image_name = "microsynteny.pdf" + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def ancestral(args): + """ + %prog ancestral vplanifoliaA.vplanifoliaA.anchors > vplanifoliaA_blocks.bed + + Paint 14 chromosomes following alpha WGD. + """ + p = OptionParser(ancestral.__doc__) + p.set_beds() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (anchorsfile,) = args + qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts) + + # We focus on the following chromosome pairs + target_pairs = { + (1, 1), + (1, 6), + (1, 8), + (1, 13), + (2, 4), + (3, 12), + (3, 14), + (5, 6), + (5, 8), + (7, 9), + (7, 11), + (9, 10), + (10, 11), + } + + def get_target(achr, bchr): + if "chr" not in achr and "chr" not in bchr: + return None + achr, bchr = get_number(achr), get_number(bchr) + if achr > bchr: + achr, bchr = bchr, achr + if (achr, bchr) in target_pairs: + return achr, bchr + return None + + def build_bedline(astart, aend, target_pair): + # target_name = "{:02d}-{:02d}".format(*target_pair) + target_name = [str(x) for x in target_pair if x in (1, 2, 3, 5, 7, 10)][0] + return "\t".join( + str(x) for x in (astart.seqid, astart.start, aend.end, target_name) + ) + + # Iterate through the blocks, store any regions that has hits to one of the + # target_pairs + ac = AnchorFile(anchorsfile) + blocks = ac.blocks + outbed = Bed() + for block in blocks: + a, b, _ = zip(*block) + a = [qorder[x] for x in a] + b = [sorder[x] for x in b] + astart, aend = min(a)[1], max(a)[1] + bstart, bend = min(b)[1], max(b)[1] + # Now convert to BED lines with new accn + achr, bchr = astart.seqid, bstart.seqid + target = get_target(achr, bchr) + if target is None: + continue + outbed.add(build_bedline(astart, aend, target)) + outbed.add(build_bedline(bstart, bend, target)) + outbed.print_to_file(sorted=True) + + +def ploidy(args): + """ + %prog ploidy b1.blocks all.bed b1.layout + + Build a figure that illustrates the WGD history of the vanilla genome. + """ + p = OptionParser(ploidy.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="12x6") + + if len(args) != 3: + sys.exit(not p.print_help()) + + blocksfile, bedfile, blockslayout = args + + fig = plt.figure(1, (iopts.w, iopts.h)) + root = fig.add_axes([0, 0, 1, 1]) + + draw_ploidy(fig, root, blocksfile, bedfile, blockslayout) + + root.set_xlim(0, 1) + root.set_ylim(0, 1) + root.set_axis_off() + + pf = "vanilla-karyotype" + image_name = pf + "." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def draw_ploidy(fig, root, blocksfile, bedfile, blockslayout): + switchidsfile = "switch.ids" + Synteny( + fig, + root, + blocksfile, + bedfile, + blockslayout, + scalebar=True, + switch=switchidsfile, + ) + + # Legend showing the orientation of the genes + draw_gene_legend(root, 0.2, 0.3, 0.53) + + # WGD labels + radius = 0.025 + tau_color = "#bebada" + alpha_color = "#bc80bd" + label_color = "k" + pad = 0.05 + for y in (0.74 + 1.5 * pad, 0.26 - 1.5 * pad): + TextCircle( + root, + 0.25, + y, + r"$\alpha^{O}$", + radius=radius, + fc=alpha_color, + color=label_color, + fontweight="bold", + ) + TextCircle( + root, + 0.75, + y, + r"$\alpha^{O}$", + radius=radius, + fc=alpha_color, + color=label_color, + fontweight="bold", + ) + for y in (0.74 + 3 * pad, 0.26 - 3 * pad): + TextCircle( + root, 0.5, y, r"$\tau$", radius=radius, fc=tau_color, color=label_color + ) + + +if __name__ == "__main__": + main() diff --git a/jcvi/utils/__init__.py b/jcvi/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/utils/__main__.py b/jcvi/utils/__main__.py new file mode 100644 index 00000000..f627666a --- /dev/null +++ b/jcvi/utils/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Assortment of utility scripts implementing recipes from Python cookbooks, such as iterators, sorters, range queries, etc. +""" + +from ..apps.base import dmain + + +if __name__ == "__main__": + dmain(__file__) diff --git a/jcvi/utils/aws.py b/jcvi/utils/aws.py new file mode 100644 index 00000000..0546df50 --- /dev/null +++ b/jcvi/utils/aws.py @@ -0,0 +1,810 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +AWS-related methods. +""" +import fnmatch +import getpass +import json +import os +import os.path as op +import sys +import time + +from configparser import NoOptionError, NoSectionError +from datetime import datetime +from multiprocessing import Pool + +import boto3 + +from botocore.exceptions import ClientError, ParamValidationError + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + datafile, + get_config, + logger, + popen, + sh, +) +from ..formats.base import BaseFile, SetFile, timestamp + +from .console import console + + +AWS_CREDS_PATH = "%s/.aws/credentials" % (op.expanduser("~"),) + + +class InstanceSkeleton(BaseFile): + def __init__(self, filename=datafile("instance.json")): + super().__init__(filename) + self.spec = json.load(open(filename)) + + @property + def launch_spec(self): + return self.spec["LaunchSpec"] + + @property + def instance_id(self): + return self.spec["InstanceId"].strip() + + @property + def private_ip_address(self): + return self.spec["PrivateIpAddress"] + + @property + def availability_zone(self): + return self.spec["AvailabilityZone"] + + @property + def volumes(self): + return self.spec["Volumes"] + + @property + def block_device_mappings(self): + return self.launch_spec["BlockDeviceMappings"] + + @property + def ebs_optimized(self): + return self.launch_spec["EbsOptimized"] + + @property + def image_id(self): + return self.launch_spec["ImageId"] + + @property + def instance_type(self): + return self.launch_spec["InstanceType"] + + @property + def key_name(self): + return self.launch_spec["KeyName"] + + @property + def security_group_ids(self): + return self.launch_spec["SecurityGroupIds"] + + @property + def subnet_id(self): + return self.launch_spec["SubnetId"] + + @property + def iam_instance_profile(self): + return self.launch_spec["IamInstanceProfile"] + + def save(self): + fw = open(self.filename, "w") + s = json.dumps(self.spec, indent=4, sort_keys=True) + # Clear the trailing spaces + print("\n".join(x.rstrip() for x in s.splitlines()), file=fw) + fw.close() + + def save_instance_id(self, instance_id, private_id_address): + self.spec["InstanceId"] = instance_id + self.spec["PrivateIpAddress"] = private_id_address + self.save() + + def save_image_id(self, image_id): + self.spec["LaunchSpec"]["ImageId"] = image_id + self.save() + + +def main(): + + actions = ( + ("cp", "copy files with support for wildcards"), + ("ls", "list files with support for wildcards"), + ("rm", "remove files with support for wildcards"), + ("role", "change aws role"), + ("start", "start ec2 instance"), + ("stop", "stop ec2 instance"), + ("ip", "describe current instance"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def ip(args): + """ + %prog ip + + Show current IP address from JSON settings. + """ + p = OptionParser(ip.__doc__) + if len(args) != 0: + sys.exit(not p.print_help()) + + s = InstanceSkeleton() + print("IP address:", s.private_ip_address, file=sys.stderr) + print("Instance type:", s.instance_type, file=sys.stderr) + + +def start(args): + """ + %prog start + + Launch ec2 instance through command line. + """ + p = OptionParser(start.__doc__) + p.add_argument( + "--ondemand", + default=False, + action="store_true", + help="Do we want a more expensive on-demand instance", + ) + p.add_argument("--profile", default="mvrad-datasci-role", help="Profile name") + p.add_argument("--price", default=4.0, type=float, help="Spot price") + opts, args = p.parse_args(args) + + if len(args) != 0: + sys.exit(not p.print_help()) + + role(["htang"]) + session = boto3.Session(profile_name=opts.profile) + client = session.client("ec2") + s = InstanceSkeleton() + + # Make sure the instance id is empty + instance_id = s.instance_id + if instance_id != "": + logger.error("Instance exists {}".format(instance_id)) + sys.exit(1) + + launch_spec = s.launch_spec + instance_id = "" + + if opts.ondemand: + # Launch on-demand instance + response = client.run_instances( + BlockDeviceMappings=s.block_device_mappings, + MaxCount=1, + MinCount=1, + ImageId=s.image_id, + InstanceType=s.instance_type, + KeyName=s.key_name, + Placement={"AvailabilityZone": s.availability_zone}, + SecurityGroupIds=s.security_group_ids, + SubnetId=s.subnet_id, + EbsOptimized=s.ebs_optimized, + IamInstanceProfile=s.iam_instance_profile, + ) + instance_id = response["Instances"][0]["InstanceId"] + + else: + # Launch spot instance + response = client.request_spot_instances( + SpotPrice=str(opts.price), + InstanceCount=1, + Type="one-time", + AvailabilityZoneGroup=s.availability_zone, + LaunchSpecification=launch_spec, + ) + + request_id = response["SpotInstanceRequests"][0]["SpotInstanceRequestId"] + print("Request id {}".format(request_id), file=sys.stderr) + + while not instance_id: + response = client.describe_spot_instance_requests( + SpotInstanceRequestIds=[request_id] + ) + if "InstanceId" in response["SpotInstanceRequests"][0]: + instance_id = response["SpotInstanceRequests"][0]["InstanceId"] + else: + logger.debug("Waiting to be fulfilled ...") + time.sleep(10) + + # Check if the instance is running + print("Instance id {}".format(instance_id), file=sys.stderr) + status = "" + while status != "running": + logger.debug("Waiting instance to run ...") + time.sleep(3) + response = client.describe_instance_status(InstanceIds=[instance_id]) + if len(response["InstanceStatuses"]) > 0: + status = response["InstanceStatuses"][0]["InstanceState"]["Name"] + + # Tagging + name = "htang-lx-ondemand" if opts.ondemand else "htang-lx-spot" + response = client.create_tags( + Resources=[instance_id], + Tags=[ + {"Key": k, "Value": v} + for k, v in { + "Name": name, + "owner": "htang", + "project": "mv-bioinformatics", + }.items() + ], + ) + + # Attach working volumes + volumes = s.volumes + for volume in volumes: + response = client.attach_volume( + VolumeId=volume["VolumeId"], InstanceId=instance_id, Device=volume["Device"] + ) + + # Save instance id and ip + response = client.describe_instances(InstanceIds=[instance_id]) + ip_address = response["Reservations"][0]["Instances"][0]["PrivateIpAddress"] + print("IP address {}".format(ip_address), file=sys.stderr) + + s.save_instance_id(instance_id, ip_address) + + +def stop(args): + """ + %prog stop + + Stop EC2 instance. + """ + p = OptionParser(stop.__doc__) + p.add_argument("--profile", default="mvrad-datasci-role", help="Profile name") + opts, args = p.parse_args(args) + + if len(args) != 0: + sys.exit(not p.print_help()) + + role(["htang"]) + session = boto3.Session(profile_name=opts.profile) + client = session.client("ec2") + s = InstanceSkeleton() + + # Make sure the instance id is NOT empty + instance_id = s.instance_id + if instance_id == "": + logger.error("Cannot find instance_id {}".format(instance_id)) + sys.exit(1) + + block_device_mappings = [] + for volume in s.volumes: + block_device_mappings.append({"DeviceName": volume["Device"], "NoDevice": ""}) + + new_image_name = "htang-dev-{}-{}".format(timestamp(), int(time.time())) + response = client.create_image( + InstanceId=instance_id, + Name=new_image_name, + BlockDeviceMappings=block_device_mappings, + ) + print(response, file=sys.stderr) + new_image_id = response["ImageId"] + + image_status = "" + while image_status != "available": + logger.debug("Waiting for image to be ready") + time.sleep(10) + response = client.describe_images(ImageIds=[new_image_id]) + image_status = response["Images"][0]["State"] + + # Delete old image, snapshot and shut down instance + old_image_id = s.image_id + response = client.describe_images(ImageIds=[old_image_id]) + old_snapshot_id = response["Images"][0]["BlockDeviceMappings"][0]["Ebs"][ + "SnapshotId" + ] + response = client.deregister_image(ImageId=old_image_id) + print(response, file=sys.stderr) + response = client.delete_snapshot(SnapshotId=old_snapshot_id) + print(response, file=sys.stderr) + response = client.terminate_instances(InstanceIds=[instance_id]) + print(response, file=sys.stderr) + + # Save new image id + s.save_image_id(new_image_id) + s.save_instance_id("", "") + + +def glob_s3(store, keys=None, recursive=False): + store, cards = store.rsplit("/", 1) + contents = ls_s3(store, recursive=recursive) + if keys: + filtered = [x for x in contents if op.basename(x).split(".")[0] in keys] + else: + filtered = fnmatch.filter(contents, cards) + + if recursive: + store = "s3://" + store.replace("s3://", "").split("/")[0] + + return filtered + + +def rm_s3(store): + cmd = "aws s3 rm {}".format(store) + sh(cmd) + + +def rm(args): + """ + %prog rm "s3://hli-mv-data-science/htang/str/*.csv" + + Remove a bunch of files. + """ + p = OptionParser(rm.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (store,) = args + contents = glob_s3(store) + for c in contents: + rm_s3(c) + + +def worker(work): + c, target, force = work + if force or not op.exists(target): + pull_from_s3(c, target) + + +def cp(args): + """ + %prog cp "s3://hli-mv-data-science/htang/str/*.csv" . + + Copy files to folder. Accepts list of s3 addresses as input. + """ + p = OptionParser(cp.__doc__) + p.add_argument( + "--force", default=False, action="store_true", help="Force overwrite if exists" + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + store, folder = args + force = opts.force + cpus = opts.cpus + if op.exists(store): + contents = [x.strip().split(",") for x in open(store)] + else: + contents = glob_s3(store) + + tasks = [] + for c in contents: + if isinstance(c, str): + oc = op.basename(c) + tc = op.join(folder, oc) + else: + if len(c) == 2: + c, tc = c + else: + (c,) = c + tc = op.basename(c) + tasks.append((c, tc, force)) + + worker_pool = Pool(cpus) + worker_pool.map(worker, tasks) + worker_pool.close() + worker_pool.join() + + +def ls(args): + """ + %prog ls "s3://hli-mv-data-science/htang/str/*.vcf.gz" + + List files with support for wildcards. + """ + p = OptionParser(ls.__doc__) + p.add_argument("--keys", help="List of keys to include") + p.add_argument( + "--recursive", default=False, action="store_true", help="Recursive search" + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (store,) = args + keys = opts.keys + if keys: + keys = SetFile(keys) + print("\n".join(glob_s3(store, keys=keys, recursive=opts.recursive))) + + +def s3ify(address): + if not address.startswith("s3://"): + address = "s3://" + address.lstrip("/") + return address + + +def push_to_s3(s3_store, obj_name): + cmd = "sync" if op.isdir(obj_name) else "cp" + s3address = "{0}/{1}".format(s3_store, obj_name) + s3address = s3ify(s3address) + cmd = "aws s3 {0} {1} {2} --sse".format(cmd, obj_name, s3address) + sh(cmd) + return s3address + + +def pull_from_s3(s3_store, file_name=None, overwrite=True): + is_dir = s3_store.endswith("/") + if is_dir: + s3_store = s3_store.rstrip("/") + file_name = file_name or s3_store.split("/")[-1] + if not op.exists(file_name): + s3_store = s3ify(s3_store) + if overwrite or (not op.exists(file_name)): + cmd = "aws s3 cp {0} {1} --sse".format(s3_store, file_name) + if is_dir: + cmd += " --recursive" + sh(cmd) + return op.abspath(file_name) + + +def sync_from_s3(s3_store, target_dir=None): + s3_store = s3_store.rstrip("/") + s3_store = s3ify(s3_store) + if target_dir is None: + target_dir = op.basename(s3_store) + cmd = "aws s3 sync {}/ {}/".format(s3_store, target_dir) + sh(cmd) + return target_dir + + +def ls_s3(s3_store_obj_name, recursive=False): + s3_store_obj_name = s3ify(s3_store_obj_name) + cmd = "aws s3 ls {0}/".format(s3_store_obj_name) + contents = [] + for row in popen(cmd): + f = row.split()[-1] + f = op.join(s3_store_obj_name, f) + contents.append(f) + + if recursive: + que = [x for x in contents if x.endswith("/")] + while que: + f = que.pop(0).rstrip("/") + contents += ls_s3(f, recursive=True) + + return contents + + +def check_exists_s3(s3_store_obj_name: str, warn=False) -> bool: + """ + Check if s3 object exists. + """ + s3_store_obj_name = s3ify(s3_store_obj_name) + cmd = "aws s3 ls {0} | wc -l".format(s3_store_obj_name) + counts = int(popen(cmd).read()) + exists = counts != 0 + if exists and warn: + logger.debug("{} exists. Skipped.".format(s3_store_obj_name)) + return exists + + +def aws_configure(profile, key, value): + sh("aws configure set profile.{0}.{1} {2}".format(profile, key, value)) + + +def role(args): + """ + %prog role htang + + Change aws role. + """ + ( + src_acct, + src_username, + dst_acct, + dst_role, + ) = "205134639408 htang 114692162163 mvrad-datasci-role".split() + + p = OptionParser(role.__doc__) + p.add_argument("--profile", default="mvrad-datasci-role", help="Profile name") + p.add_argument( + "--device", + default="arn:aws:iam::" + src_acct + ":mfa/" + src_username, + metavar="arn:aws:iam::123456788990:mfa/dudeman", + help="The MFA Device ARN. This value can also be " + "provided via the environment variable 'MFA_DEVICE' or" + " the ~/.aws/credentials variable 'aws_mfa_device'.", + ) + p.add_argument( + "--duration", + type=int, + default=3600, + help="The duration, in seconds, that the temporary " + "credentials should remain valid. Minimum value: " + "900 (15 minutes). Maximum: 129600 (36 hours). " + "Defaults to 43200 (12 hours), or 3600 (one " + "hour) when using '--assume-role'. This value " + "can also be provided via the environment " + "variable 'MFA_STS_DURATION'. ", + ) + p.add_argument( + "--assume-role", + "--assume", + default="arn:aws:iam::" + dst_acct + ":role/" + dst_role, + metavar="arn:aws:iam::123456788990:role/RoleName", + help="The ARN of the AWS IAM Role you would like to " + "assume, if specified. This value can also be provided" + " via the environment variable 'MFA_ASSUME_ROLE'", + ) + p.add_argument( + "--role-session-name", + help="Friendly session name required when using --assume-role", + default=getpass.getuser(), + ) + p.add_argument( + "--force", + help="Refresh credentials even if currently valid.", + action="store_true", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + # Use a config to check the expiration of session token + config = get_config(AWS_CREDS_PATH) + validate(opts, config) + + +def validate(args, config): + """Validate if the config file is properly structured""" + profile = args.profile + if not args.profile: + if os.environ.get("AWS_PROFILE"): + args.profile = os.environ.get("AWS_PROFILE") + else: + args.profile = "default" + + if args.assume_role: + role_msg = "with assumed role: %s" % (args.assume_role,) + elif config.has_option(args.profile, "assumed_role_arn"): + role_msg = "with assumed role: %s" % ( + config.get(args.profile, "assumed_role_arn") + ) + else: + role_msg = "" + logger.info("Validating credentials for profile: %s %s" % (profile, role_msg)) + reup_message = "Obtaining credentials for a new role or profile." + + try: + key_id = config.get(profile, "aws_access_key_id") + access_key = config.get(profile, "aws_secret_access_key") + except NoSectionError: + log_error_and_exit( + "Credentials session '[%s]' is missing. " + "You must add this section to your credentials file " + "along with your long term 'aws_access_key_id' and " + "'aws_secret_access_key'" % (profile,) + ) + except NoOptionError as e: + log_error_and_exit(e) + + # get device from param, env var or config + if not args.device: + if os.environ.get("MFA_DEVICE"): + args.device = os.environ.get("MFA_DEVICE") + elif config.has_option(profile, "aws_mfa_device"): + args.device = config.get(profile, "aws_mfa_device") + else: + log_error_and_exit( + "You must provide --device or MFA_DEVICE or set " + '"aws_mfa_device" in ".aws/credentials"' + ) + + # get assume_role from param or env var + if not args.assume_role: + if os.environ.get("MFA_ASSUME_ROLE"): + args.assume_role = os.environ.get("MFA_ASSUME_ROLE") + elif config.has_option(profile, "assume_role"): + args.assume_role = config.get(profile, "assume_role") + + # get duration from param, env var or set default + if not args.duration: + if os.environ.get("MFA_STS_DURATION"): + args.duration = int(os.environ.get("MFA_STS_DURATION")) + else: + args.duration = 3600 if args.assume_role else 43200 + + # If this is False, only refresh credentials if expired. Otherwise + # always refresh. + force_refresh = False + + # Validate presence of profile-term section + if not config.has_section(profile): + config.add_section(profile) + force_refresh = True + # Validate option integrity of profile section + else: + required_options = [ + "assumed_role", + "aws_access_key_id", + "aws_secret_access_key", + "aws_session_token", + "aws_security_token", + "expiration", + ] + try: + short_term = {} + for option in required_options: + short_term[option] = config.get(profile, option) + except NoOptionError: + logger.warning( + "Your existing credentials are missing or invalid, " + "obtaining new credentials." + ) + force_refresh = True + + try: + current_role = config.get(profile, "assumed_role_arn") + except NoOptionError: + current_role = None + + if args.force: + logger.info("Forcing refresh of credentials.") + force_refresh = True + # There are not credentials for an assumed role, + # but the user is trying to assume one + elif current_role is None and args.assume_role: + logger.info(reup_message) + force_refresh = True + # There are current credentials for a role and + # the role arn being provided is the same. + elif ( + current_role is not None + and args.assume_role + and current_role == args.assume_role + ): + pass + # There are credentials for a current role and the role + # that is attempting to be assumed is different + elif ( + current_role is not None + and args.assume_role + and current_role != args.assume_role + ): + logger.info(reup_message) + force_refresh = True + # There are credentials for a current role and no role arn is + # being supplied + elif current_role is not None and args.assume_role is None: + logger.info(reup_message) + force_refresh = True + + should_refresh = True + + # Unless we're forcing a refresh, check expiration. + if not force_refresh: + exp = datetime.strptime(config.get(profile, "expiration"), "%Y-%m-%d %H:%M:%S") + diff = exp - datetime.utcnow() + if diff.total_seconds() <= 0: + logger.info("Your credentials have expired, renewing.") + else: + should_refresh = False + logger.info( + "Your credentials are still valid for %s seconds" + " they will expire at %s" % (diff.total_seconds(), exp) + ) + + if should_refresh: + get_credentials(profile, args, config) + + +def get_credentials(profile, args, config): + mfa_token = console.input( + "Enter AWS MFA code for device [%s] " + "(renewing for %s seconds): " % (args.device, args.duration) + ) + + boto3.setup_default_session(profile_name="default") + client = boto3.client("sts") + + if args.assume_role: + + logger.info( + "Assuming Role - Profile: %s, Role: %s, Duration: %s", + profile, + args.assume_role, + args.duration, + ) + + try: + print((args.assume_role, args.role_session_name, args.device, mfa_token)) + response = client.assume_role( + RoleArn=args.assume_role, + RoleSessionName=args.role_session_name, + SerialNumber=args.device, + TokenCode=mfa_token, + ) + except ClientError as e: + log_error_and_exit( + "An error occured while calling assume role: {}".format(e) + ) + except ParamValidationError: + log_error_and_exit("Token must be six digits") + + config.set( + profile, + "assumed_role", + "True", + ) + config.set( + profile, + "assumed_role_arn", + args.assume_role, + ) + else: + logger.info( + "Fetching Credentials - Profile: %s, Duration: %s", profile, args.duration + ) + try: + response = client.get_session_token( + DurationSeconds=args.duration, + SerialNumber=args.device, + TokenCode=mfa_token, + ) + except ClientError as e: + log_error_and_exit( + "An error occured while calling assume role: {}".format(e) + ) + except ParamValidationError: + log_error_and_exit("Token must be six digits") + + config.set( + profile, + "assumed_role", + "False", + ) + config.remove_option(profile, "assumed_role_arn") + + # aws_session_token and aws_security_token are both added + # to support boto and boto3 + options = [ + ("aws_access_key_id", "AccessKeyId"), + ("aws_secret_access_key", "SecretAccessKey"), + ("aws_session_token", "SessionToken"), + ("aws_security_token", "SessionToken"), + ] + + for option, value in options: + config.set(profile, option, response["Credentials"][value]) + # Save expiration individiually, so it can be manipulated + config.set( + profile, + "expiration", + response["Credentials"]["Expiration"].strftime("%Y-%m-%d %H:%M:%S"), + ) + with open(AWS_CREDS_PATH, "w") as configfile: + config.write(configfile) + + logger.info( + "Success! Your credentials will expire in %s seconds at: %s" + % (args.duration, response["Credentials"]["Expiration"]) + ) + + +def log_error_and_exit(message): + """Log an error message and exit with error""" + logger.error(message) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/jcvi/utils/cbook.py b/jcvi/utils/cbook.py new file mode 100644 index 00000000..20c7c2c5 --- /dev/null +++ b/jcvi/utils/cbook.py @@ -0,0 +1,465 @@ +""" +Useful recipes from various internet sources (thanks) +mostly decorator patterns +""" + +import os.path as op +import re +import sys + +from collections import defaultdict +from typing import Optional + +from ..apps.base import logger + + +def inspect(item, maxchar=80): + """ + Inspect the attributes of an item. + """ + for i in dir(item): + try: + member = str(getattr(item, i)) + if maxchar and len(member) > maxchar: + member = member[:maxchar] + "..." + except: + member = "[ERROR]" + print("{}: {}".format(i, member), file=sys.stderr) + + +def timeit(func): + """ + + """ + import time + + def timed(*args, **kw): + ts = time.time() + result = func(*args, **kw) + te = time.time() + + msg = "{0}{1} {2:.2f}s".format(func.__name__, args, te - ts) + logger.debug(msg) + + return result + + return timed + + +def depends(func): + """ + Decorator to perform check on infile and outfile. When infile is not present, issue + warning, and when outfile is present, skip function calls. + """ + from jcvi.apps.base import need_update, listify + + infile = "infile" + outfile = "outfile" + + def wrapper(*args, **kwargs): + assert outfile in kwargs, "You need to specify `outfile=` on function call" + if infile in kwargs: + infilename = listify(kwargs[infile]) + for x in infilename: + assert op.exists(x), "The specified infile `{0}` does not exist".format( + x + ) + + outfilename = kwargs[outfile] + if need_update(infilename, outfilename, warn=True): + return func(*args, **kwargs) + + outfilename = listify(outfilename) + + for x in outfilename: + assert op.exists(x), "Something went wrong, `{0}` not found".format(x) + + return outfilename + + return wrapper + + +""" +Functions that make text formatting easier. +""" + + +class Registry(defaultdict): + def __init__(self, *args, **kwargs): + super().__init__(list, *args, **kwargs) + + def iter_tag(self, tag): + for key, ts in self.items(): + if tag in ts: + yield key + + def get_tag(self, tag): + return list(self.iter_tag(tag)) + + def count(self, tag): + return sum(1 for x in self.iter_tag(tag)) + + def update_from(self, filename): + from jcvi.formats.base import DictFile + + d = DictFile(filename) + for k, v in d.items(): + self[k].append(v) + + +class SummaryStats(object): + def __init__(self, a, dtype=None, title=None): + import numpy as np + + self.data = a = np.array(a, dtype=dtype) + self.min = a.min() + self.max = a.max() + self.size = a.size + self.mean = np.mean(a) + self.sd = np.std(a) + self.median = np.median(a) + self.sum = a.sum() + self.title = title + + a.sort() + self.firstq = a[self.size // 4] + self.thirdq = a[self.size * 3 // 4] + self.p1 = a[int(self.size * 0.025)] + self.p2 = a[int(self.size * 0.975)] + + if dtype == "int": + self.mean = int(self.mean) + self.sd = int(self.sd) + self.median = int(self.median) + + def __str__(self): + s = self.title + ": " if self.title else "" + s += "Min={} Max={} N={} Mean={:.2f} SD={:.2f} Median={} Sum={}".format( + self.min, self.max, self.size, self.mean, self.sd, self.median, self.sum + ) + return s + + def todict(self, quartile=False): + d = {"Min": self.min, "Max": self.max, "Mean": self.mean, "Median": self.median} + if quartile: + d.update({"1st Quartile": self.firstq, "3rd Quartile": self.thirdq}) + + return d + + def tofile(self, filename): + fw = open(filename, "w") + for x in self.data: + print(x, file=fw) + fw.close() + logger.debug( + "Array of size {0} written to file `{1}`.".format(self.size, filename) + ) + + +class AutoVivification(dict): + """ + Implementation of perl's autovivification feature. + + Thanks to + """ + + def __getitem__(self, item): + try: + return dict.__getitem__(self, item) + except KeyError: + value = self[item] = type(self)() + return value + + +def enumerate_reversed(sequence): + """ + Perform reverse enumeration, returning an iterator with decrementing + index/position values + + Source: http://stackoverflow.com/questions/529424/traverse-a-list-in-reverse-order-in-python + """ + for index in reversed(range(len(sequence))): + yield index, sequence[index] + + +def percentage(a, b, precision=1, mode: Optional[int] = 0): + """ + >>> percentage(100, 200) + '100 of 200 (50.0%)' + """ + _a, _b = a, b + pct = "{0:.{1}f}%".format(a * 100.0 / b, precision) + a, b = thousands(a), thousands(b) + if mode == 0: + return "{0} of {1} ({2})".format(a, b, pct) + elif mode == 1: + return "{0} ({1})".format(a, pct) + elif mode == 2: + return _a * 100.0 / _b + return pct + + +def thousands(x): + """ + >>> thousands(12345) + '12,345' + """ + import locale + + try: + locale.setlocale(locale.LC_ALL, "en_US.utf8") + except Exception: + locale.setlocale(locale.LC_ALL, "en_US.UTF-8") + finally: + s = "%d" % x + groups = [] + while s and s[-1].isdigit(): + groups.append(s[-3:]) + s = s[:-3] + return s + ",".join(reversed(groups)) + + +SUFFIXES = { + 1000: ["", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb"], + 1024: ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB"], +} + + +def human_size(size, a_kilobyte_is_1024_bytes=False, precision=1, target=None): + """Convert a file size to human-readable form. + + Keyword arguments: + size -- file size in bytes + a_kilobyte_is_1024_bytes -- if True (default), use multiples of 1024 + if False, use multiples of 1000 + + Returns: string + Credit: + + >>> print(human_size(1000000000000, True)) + 931.3GiB + >>> print(human_size(1000000000000)) + 1.0Tb + >>> print(human_size(300)) + 300.0 + """ + if size < 0: + raise ValueError("number must be non-negative") + + multiple = 1024 if a_kilobyte_is_1024_bytes else 1000 + for suffix in SUFFIXES[multiple]: + + if target: + if suffix == target: + break + size /= float(multiple) + else: + if size >= multiple: + size /= float(multiple) + else: + break + + return "{0:.{1}f}{2}".format(size, precision, suffix) + + +def autoscale(bp: int, optimal: int = 6): + """ + Autoscale the basepair length to a more human readable number. + The optimal is the number of ticks we want to see on the axis. + + >>> autoscale(150000000) + 20000000 + >>> autoscale(97352632) + 10000000 + """ + slen = str(bp) + tlen = slen[0:2] if len(slen) > 1 else slen[0] + precision = len(slen) - 2 # how many zeros we need to pad? + bp_len_scaled = int(tlen) # scale bp_len to range (0, 100) + tick_diffs = [(x, abs(bp_len_scaled / x - optimal)) for x in [1, 2, 5, 10]] + best_stride, _ = min(tick_diffs, key=lambda x: x[1]) + + while precision > 0: + best_stride *= 10 + precision -= 1 + + return best_stride + + +def gene_name(st, exclude=("ev",), sep="."): + """ + Helper functions in the BLAST filtering to get rid alternative splicings. + This is ugly, but different annotation groups are inconsistent with respect + to how the alternative splicings are named. Mostly it can be done by removing + the suffix, except for ones in the exclude list. + """ + if any(st.startswith(x) for x in exclude): + sep = None + st = st.split("|")[0] + + if sep and sep in st: + name, suffix = st.rsplit(sep, 1) + else: + name, suffix = st, "" + + # We only want to remove suffix that are isoforms, longer suffix would + # suggest that it is part of the right gene name + if len(suffix) != 1: + name = st + + return name + + +def seqid_parse(seqid, sep=["-"], stdpf=True): + """ + This function tries to parse seqid (1st col in bed files) + return prefix, numeric id, and suffix, for example: + + >>> seqid_parse('chr1_random', stdpf=False) + ('chr', '1', '_random') + >>> seqid_parse('chr1_random', stdpf=True) + ('C', '1', '_random') + >>> seqid_parse('AmTr_v1.0_scaffold00001', ["-"], stdpf=False) + ('AmTr_v1.0_scaffold', '00001', '') + >>> seqid_parse('AmTr_v1.0_scaffold00001') + ('Sca', '00001', '') + >>> seqid_parse('PDK_30s1055861') + ('C', '1055861', '') + >>> seqid_parse('PDK_30s1055861', stdpf=False) + ('PDK_30s', '1055861', '') + >>> seqid_parse("AC235758.1", stdpf=False) + ('AC', '235758.1', '') + """ + seqid = seqid.split(";")[0] + if "mito" in seqid or "chloro" in seqid: + return seqid, "", "" + + numbers = re.findall(r"\d+\.*\d*", seqid) + + if not numbers: + return seqid, "", "" + + id = numbers[-1] + lastnumi = seqid.rfind(id) + suffixi = lastnumi + len(id) + suffix = seqid[suffixi:] + + if sep is None: + sep = [""] + elif type(sep) == str: + sep = [sep] + + prefix = seqid[:lastnumi] + if not stdpf: + sep = "|".join(sep) + atoms = re.split(sep, prefix) + if len(atoms) == 1: + prefix = atoms[0] + else: + prefix = atoms[-2] + prefix = prefix.replace("Chromosome", "Chr") + else: # use standard prefix + if re.findall("chr", prefix, re.I): + prefix = "Chr" + if re.findall("lg", prefix, re.I): + prefix = "LG" + elif re.findall("sca", prefix, re.I): + prefix = "Sca" + elif re.findall("supercontig", prefix, re.I): + prefix = "SCg" + elif re.findall("ctg|contig", prefix, re.I): + prefix = "Ctg" + elif re.findall("BAC", prefix, re.I): + prefix = "BAC" + else: + prefix = "C" + + return prefix, id, suffix + + +def fixChromName(name, orgn="medicago"): + """ + Convert quirky chromosome names encountered in different + release files, which are very project specific, into a more + general format. + + For example, in Medicago + Convert a seqid like + `Mt3.5.1_Chr1` to `chr1` + `Mt3.5_Chr3` to `chr3` + `chr01_pseudomolecule_IMGAG` to `chr1` + + Some examples from Maize + Convert a seqid like + `chromosome:AGPv2:2:1:237068873:1` to `2` + Special cases + `chromosome:AGPv2:mitochondrion:1:569630:1` to `Mt` + `chromosome:AGPv2:chloroplast:1:140384:1` to `Pt` + """ + mtr_pat1 = re.compile(r"Mt[0-9]+\.[0-9]+[.[0-9]+]*_([a-z]+[0-9]+)") + mtr_pat2 = re.compile(r"([A-z0-9]+)_[A-z]+_[A-z]+") + + zmays_pat = re.compile(r"[a-z]+:[A-z0-9]+:([A-z0-9]+):[0-9]+:[0-9]+:[0-9]+") + zmays_sub = {"mitochondrion": "Mt", "chloroplast": "Pt"} + if orgn == "medicago": + for mtr_pat in (mtr_pat1, mtr_pat2): + match = re.search(mtr_pat, name) + if match: + n = match.group(1) + n = n.replace("0", "") + name = re.sub(mtr_pat, n, name) + elif orgn == "maize": + match = re.search(zmays_pat, name) + if match: + n = match.group(1) + name = re.sub(zmays_pat, n, name) + if name in zmays_sub: + name = zmays_sub[name] + + return name + + +def fill(text, delimiter="", width=70): + """ + Wrap text with width per line + """ + texts = [] + for i in range(0, len(text), width): + t = delimiter.join(text[i : i + width]) + texts.append(t) + return "\n".join(texts) + + +def tile(lt, width=70, gap=1): + """ + Pretty print list of items. + """ + from more_itertools import grouper + + max_len = max(len(x) for x in lt) + gap + items_per_line = max(width // max_len, 1) + lt = [x.rjust(max_len) for x in lt] + g = list(grouper(lt, items_per_line, fillvalue="")) + + return "\n".join("".join(x) for x in g) + + +def uniqify(L): + """ + Uniqify a list, maintains order (the first occurrence will be kept). + """ + seen = set() + nL = [] + for a in L: + if a in seen: + continue + nL.append(a) + seen.add(a) + + return nL + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/jcvi/utils/console.py b/jcvi/utils/console.py new file mode 100644 index 00000000..77a52cf2 --- /dev/null +++ b/jcvi/utils/console.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# +# console.py +# utils +# +# Created by Haibao Tang on 01/09/21 +# Copyright © 2021 Haibao Tang. All rights reserved. +# + +""" +We create a singleton console instance at the module level or as an attribute +of your top-level object. +""" + +from rich.console import Console + +console = Console() +printf = console.print diff --git a/jcvi/utils/data/Airswing.ttf b/jcvi/utils/data/Airswing.ttf new file mode 100755 index 0000000000000000000000000000000000000000..bb7d3dc0eb7858cfa7fd766e7ac0be35e2a64f80 GIT binary patch literal 16912 zcmeHud7K!jK>W;_^d9K*Wfmf`FianW^9RR8`OP1l0HZ==;1sUd`00 zTXm~$opaB*=X~$ET_lJo0AeFrdirT6+MfTemq_|Ct`47kT5;yJ?>XmABJoa~pMBY}|gylKW2n2$Ab$FyPuUx?$t1yJw<(0Ul`97F=+T zEIb3}3viy7II+h*WV;#XGD&{#yU!&z6r8><&&um<*2Qm z5{o|ZfB&5jvp*(;i2cnCl_G{e5ewJX^NAV`i;xI&(dalqX3(tk?>bRy^=odh~W zi}l~Y2A6&Yd2}-9d|ILZnikM0peN92`cYa)r-Lq{GeA$IGxcB5Npu$I zVj9t(rX{orbSbR{T}Es4U(#|~3wkoG*Po&lbT;TIv;p)~I!FHnokr(^o=zL}C+Q5@ z1bQZI20e?m=ugl}+6p>C=Yg)GZTjQ1nznEmd+D2D_ zZl||{?x1(*Kc)-lYS5i@js7Eg3tbDki{1%(A-zlgAzei82ECZx1G<~utN(!Z(EC6y zq3iTV=u&z==wA8&=wFq`jq|vy^}r-`Y!q`=sxWejj~^z5@DTx&!nh^i_R~K1yE$ z{TSV;-%Hoi0ni)hF3^wDLH!=Ok-hLfBpm)<#px>ll>R+R8(XT+iO-J>w(joda=y&KD(0k}N`WIpx>vz=#Ye7 z1^ofNrr$|f<7(kKz}9t`ptAy1VDc+RM2NcQ2!kLMub3r zD>VJH^sI=0{!T29`rfUsDGNC7fqml6mifOM6>=WdQr51z9bU*C+SZj z1^Ti`gT5j%`X}hmA`AK#k<)LYSH%p_*F-Dm>!MA+k^U;$L63A z?QREda@V=ssJ6R3_;Y(aHXB!>;P6 zsPq_>c;5RsKdE%uFdTlnv5#)l? z>r}j|;`Kd`$~ychb#-<-S96~7P z=lz88g zYd*i4MI~>zfg6u-!R@yy@w__QO>TF<9|&OgJM2DJG=N*sz5YPd7YNQqrBUVgI~>PX z;`!iWL%3H7del&( z8qzL6Wyq{_yrB}$YgZ|xC?Vc(pVJxi#KS0fLrO5*6bwhMnyPd;j<3Y?kq`3oVctUF zunW&OYr$~1F06#K=8zWs;NPpn^U<5V z@S9ZP`Pcz|KEj(8iMU*0B^eDzBEE=MizdR+`U6vyZs+lpcpe)gY3w3wA@(MmF#vp= z9PrWvq_x^Y>^S+SHyd!hgPg#TZs11+xUmj+(GM@A0w0Ee3pL=uC~#ms@LwZvUwqoT zl0bQBpt~$kT^?wz6)3J9=&ci|tq8PM21@G!I_m){>jfH{0~BV%5iSbvl<=1TuGxzS zr{h`z$ZG?a^WB2@mh9#$|HijP4sLCic^5E4PB4zB0MLGSWC%932_yLpwfh2an+PeIsb$ zJgWs9jZ4LG+~n_A0R1&zs(5!@A+uN4~Q#QpfMcM$ws4(lj$V*9N|t5bu2k;qmo15_@F zleXN2CS50O*|5KI>dM(_?_K2556vFwQj==7OHB`pJrhS>f8BQTUsny=TJdgGe@(tj zPGRm-pmmkBC>ARv?~HsVtvEzSdnR2U^3sgFoTv|Vw4>$8*Yo>DaeSa)ZDbuMc_`NM zP-GtZMQB#uYPDgX{xj%;_)5=c9jp7|-WaGNM$fl>G&-Vv`H`T(Y@77LEA5ipyz}ktcD+@EKyk)C43Y{b{CN|nW4V`q-`br9{REkQIrG<`suBAx}hk%$^ zE1{fdI-ck0+Q6A5K@$~fB`B~2BIN+T5m*;`yE|uxB&t)jWGO=(rYJpGLr`hlG_Aq$ z!z|Rj6IWcew1Mra0a7hwY2D&=!?@bp`a(s|8Y!2YxPHENUMAYPN=U0ZqnY!xjfd8F zilJd-;~?t{{de-HJRADxz!*!3&dy4K+FDb|NU*InmrW%t-Q=>i=34z=cvV$oifawB zNo94`irV>)l_gR+w#e!*lSgZHcg0f84G3rGZsPOp1BOk0p;n2J^P6E{RV9k~bI<|Y z5iDa~e^TBkuY^9cbZ%va$mJ^e<|vP`mS$6n0lx(6hf*CXluFGiRV{J2$dVSL@L|){ zIw@$bR?wWO(`lkWq4JA;Yboy6@Fl1c}n~c zS0**5U_>=w93}Om@_M-pe6XRC73p*(Lyb`hd%*8;!aJivIECc*2Yfz%ASI%cMtW_* zHRd|z8ksVP26HS1|DmBm%%Yu`VxOk zPW+|MQwplZPBq9QZlmc91hmEeVk7G)iZQp^_8I6XL#q&Hve`-wj?X4%(uw-%J}~ol zd|<683T10W$+9Sa)Bnka|Lcy1H8uLy_Q9dHw!s0}<#W}u)4NwlO+yiv7!$8o?P~qX zj#zrq*C|Cch`tU^T$X|?a}kRI@-vVnNGmHXA{45G75EFA98?37-rVVO2K+*{^kRg} zDU}q=;bbXArckXUAw!bm#!Hu)zt!UVy0J02xytCVr=>5YFw{vx2K&cCk)H)$Nu!Tb zsY+T&1emczEHsU?dJ(G{4B~hVUlwCkmlmT`EoZ4W;Vhb0!)178zla(G0DkT>)(Gyp zPq-q3+r>u=-z+DqJYSR=Kc~R95BZiev4yF-iU|2p)|*sbRO|C z$ygy0>Jrlh|YEM9Jfo$yW#r?J$U`?l0;)XT3Rpc9xEKociMG{gF z{>n3je~N%z@K@Isd>H6H40NiBhpA_+x=cgrRblqgNQ-b9U;w_We++sV+9p1H`4U7I z+ur1{+iWv~s-=%~taG)&S0f4g5b#-=7FS{-lc{9W@uqMv-Bj;$I#T|0Jqe%H`7m!_ zGVbx(5sJZH%T9)^$LQ@`I%xtgIm);!+AN}>C}5ch(7J4oYCBSm#kCVn3-Y1V_Q;ka z>jNu_rCgVD-Tj+w%HClO!0fOz`{E^0r`P9zY&K(abSYecXHCgx|g0vP6RLCI3Tn1&|H?ZN_+audVH zP_R@ChM3RIk?pC*!a5QQ@z$`z9Ukr@6KK&YkM;c42ln%-cX55wLF#*Ir;m+2Jgu%juM9 zAiYDPx71ZOlo5$KWWm}fukKh?ZUX;UONlgWO*NHPcdRZqq<3hVknInu%M)AuTEQ0- zK$zWGKIsg3FmYJ8dxV;vIXJ#aw=os*dkXXO$X~CM2r@ z((#`s#U?lG_gnCAh$u*0$o?B81o>d!|>atz_Dno-Vu$)B9Uk^8&O12>2kz@NT80Ju#S zKsdBUDMb1QA_Zm5_+u%tX`Trnu!M@Wu9u{UJP)(gm6fE>v`VDT&bdUN-{DmhuOLiO zWm)nH+zeYYrl-TE+$^&t>!gE}NUI$nni0eU&b%eeBox)`y-`d;STLu( z$1vpXUU9b-swR$2!>0~7(XSgh-sePDvFj~UQLn#DR zB?GgD?ppPy#S^v4<&1bs>evG&xNbNU^b~zsT~cuF0gy><1HR@g z9sAjExS|nO15}T}83)`+KWj0@Tv;Q;~>rBKz4?BdWtz;m)th?Eh7kU z7J+@}H{?Ka{Jp}Z^-GEve`wEQctfMFn!wLB(_&Q8j8TYC4Uy`o<|sQEW+zAO8)KA6 z!=uBe!|P9Ds9%&#uOE`HHiJ~Lq=w37kV-d{S4)C@I)ap(8h@o9s&_~FrZ`2MTD(W< zV%>Uy4T|75WnhCe<|n?U#&9sz81p3&r&3sgPg$1z=GXwRqP7c-iAb;6Z%K>CJPX16 z=h2}?aFWv(aX%s(b96yI$b6c}U`1g4kttyC``wEf7dT>zcT3%3{AI=k(wT-B1}uQx z8GYV{KJSo|h_xqI8V!8Y9FK*Nd`dP4{a$JgvXqk(#ev}}w4ClWgKq|MOdN^MFN!p5 z!Qqs<*uQW{Z4q5SIvfs zu|5PBt$*yfrSs=4op|=S)9nJt<~b2vI&a=GQ;z7wTji@DM-wr=iN`C=O%6myE$meU zMTFNf$#6!@=<3~zW!0wGCvHfIYh1~b47-sjfj)m-&LA(|3WW0?m0<(n%r2MU z9c>jH5!KpKjrB1#;6u*HCMh15JzZ%F+voJnu|+%D5$bH2bh39grKU{G1piShE6Ind zt~m5hZPPWEdNZ^%tEW3(E{Jr>S@sD8xN@`Tn%Nue$`4kNYt)w)BIF@e=9M;rGU3~rJ<9DQ`lO}cqk3>Zv;%!-Xg|s3){`fFo zb&8RujN6?l2xo8B?aCAno^i2|&T6l3T77q0K=nL1dbJQEZPDh{-xTcu)eSE@J%{=M z&JiEPLPCP-+UIKcr9vLRCdgyOKaW-UJXX&0Sl!NJ1v-ya;ygWr{m}NGvF4D+FE;Wdy#-6(X)H_Bp=~!7GV0n7kUMaI9FLesFOKav591gvn&oDc zn~S9~qkEWrxgYrvV?q@TIXV4`?!}l$XQvWEoOiv1Q>{SCLaZ%vsuiOfdDzNa&chxS z%O(@A-Ba1t@rT3|o?ctcyiU)%p2hugfJ)%48G8x5X2D+;cb-3=wc^1bg zuJL&SM?a3`$B}@~;ihQuAO@WiodDscPYvzULM(DAWd@R23>^lN7?0DEtxGR6k)xSw zHIU=9Wb3GjjYg5on@Y4wxzxn6~wAmfEbOF9CphJVz7c3TfqS$bU+YVwNK)`=~*um4 z2##Ngnu5y^T!!E>1eYPW48dgxE<G&zyzdjueKQon&KH0RmG`L7$C$O9bdu(lP zoU`?DbYQC^9D~+*J??9?u4SflU8CbcQ5`i#3LR@BM)ujR_R8xfHH$?mQ?oph#K>5a zm28&8Ra6Pt~TkT$%%!cJ=%M0;umUWhA*3HXy`aanRJJ>`Dt*= z7RG5S=3Ibt8#uQy(}~ zCN+rL`KtesQaPJunAwiWODHcFL_WuOGjG9B6TnC9Lp%r8hW9GA&G)Z!cI8S%o=Eak zJ(OP{j&i$US+0=7OP|8N+gjVYzAy%OQs45W{kaVL8OG9Aa1wF)W7|mO~87 zA%^9uF)RmZa)@C$BZk@F8YRdiq3OAhtLhqiOt{elc-8}W)&qFf19;X0c-8}W)&qFf z19;X0c-B*eXFY&tJ!rS53eS1~&w31aW<%5S9>-K5TdsDgb^33uqS@`V8eQP^7G19TR z)RfwaOpFM_Jeta@ko<;q+!_*gPpDJ#Z%t&ook)!beFZ*QE-frA>I`gIVq|!bH(9iI z^Rn|#te!2ISh~Lbl(uCdw^Yt;P7f|02)k{DKd;Su*I;0dw2XbLv5KRq8J`S4pMWkC zRb3{a%LH_pfG!izWdgcPK$i*VG67vCpvy#6mkH=H0bM2xT_*4f4t0z_I0W)Ei9?)$ zff@fa7=z8k9mo0k>|}}&$vI9kVj_*{PmeYExz=(z(pQYMx%EpEGjA>}IxcO#QJhuc zrQ`V%*C(ay)GUEPak8Ow7P<=dU7I(**|hH>c+6n0#_=MUXXD^`1Uvr>&eIT(=Xczn zannfSFw!`TG!7$;!${*W(m0GX4kL}jNaHZlc-2VbFw!`TG+s5*IE*xI7%4Z|fu0m4oFNc9B)x+93y*!yKnMRii+th!M=;FVm{|YmXM8u}{Q$4rZ@i z;u{YO$)OEz!y-Nwi^pD&Hc$K{S*Q7mSiNsB=cw|m)Wmf>CPhtNJ#Fea2u(8&L4GfH zW&rQxoHtKhl4Ub#CifaC^DclJ@hCf1EB6x$erwE1Itw!&EnS#A_5$8Bl`l7BMZ1g< zs`0iSi7I|6PeeH0RwpFYxGPNTlr|P^4Pi|V5EjZDA3|O7H3+=y_=RL<#5wR{98?!| zk?b_H_fzck;m27fZVJORQoz74eAR~t7V6?&=||oX%j76A zV^NFqq}g%aDFnlr@JRrZ4(1n3G?@#|oXR&4jMRlrw36XoG5+B0#W4p1n8mwI-A(1g z!J2`)*nOq2v*EDb;`n_t^IZoH&F}-eVF(&Zb1!mM&o9ydHOPB+F`hx{l%cBhns3lZ zZ!4w9P%L(Gz({Wo_&N?}loF@FXL{0yrS4_(j1)JR753AO6!-k4&yDqC{V>uuX7V_d z>uxdai+_GySH*}tXXcrh13t^tF&1UOSdshCurX$c)q54xTb@0ZNBY_ZV>>*R)xJ+o zK(!|&T@!m#V$`nKR2l0C+;!FDHlQBjWo;h&!zM3b{3x5dhY>ydQhU+xXJPoWF#K5< z{wxfC7KT3y!=HuW&%*F$VfeFf)t`kiAj9xy;i^9i!=Hr>e>RmF01PVOhvDo$TQC&( z=(GH|Z<=b-sa(E{Ur^5KEo1Q^5-np@z=S;MWZr_oaS=_MLAoo8^<`r?lv9Hu-xZZs zwUo1cPEq1~cArz)C7msG#YM@uv=1^Ac@Qe5UHp?mRlv9Zbcf^nNww&9+ySY{PGOH|8TD;vV+i=h4tg@Zj#hq5!fv>rrhUE#H8_o8Km#i}Qlsc?3_>@*yW$-Dj zv&!I8`m9w3pVIwS8GOpkvA6;MxWF-jQo};)_SV zFM)3o@Leg*Mc!{0zG2*h;||=Dz~70uV*}dQW?UP^cbcQ9xftggaPL2Ak-uU1$8G-a z-1Cp$#NSrjhJn-zI@9>tq742QqCdLv?J3tx{c$~ihjILykCS%M=#J4{8!j5%m^kOs z#KH@9Ty(+aT^n|88QqoWF7(dby6eI{TX$?uBu>N!@!PiU7(I1#^TpdX>^lCfE? zF@y!4xG{eT1f zrH%|=@qhB2Pd{e5gZQ>m7B+l8eS%?^C*oV_#rQ^gDeQAOzKLFeZ=p}cH_)f!+vhX! h&GSlp>%0nIHm#wxw&iCmT`HY;x?}#Sg;J?Z{{zK84!8gS literal 0 HcmV?d00001 diff --git a/jcvi/utils/data/Collegia.ttf b/jcvi/utils/data/Collegia.ttf new file mode 100755 index 0000000000000000000000000000000000000000..3245a69685dd09449c49a6c3c3c017a56627f6b7 GIT binary patch literal 103940 zcmeFa2fSTXwdg(P?!EWk&OW{O^pH+M2?UZ*6G|Gr2Sq?px}ty-E7w91MHGP$QQ)c+ zFDfdaC<1z=*vnNcSI&O_vDV(%M{=*e?|b=vzxVy#+sT+~?zPvNW%N1bSnDXQl!{Q; zRB6S*2Oav#8P3;~zU~&KEGMj4cj(H%H$Q!|QjU2_>F_~^4vyS$@^3#z{dVr!bokN( zHf(X6vQsJN!AjX*I{uW+r_UTzo2yg@zdH^&{@k-mwzc|FrTYKC^|>vlZ#~7a@k5hJ z_5O`E&f0qN`CD#3`2!o38VD)9;lXVuoU+rF>sYCje@Lm+&TS`dKH+Ei3y$U5KJK5` z#tFyotqb^m0^e)fPC0w$lMhw88LNv4jGc7inWsk1jvS!$<*#w>n3GRCe)II!n`%m5 z`Xt|Tr)=JNy7emG;oN^Y_m@uHe9DQh)z1B_(pS-^{e7pOcGlTv)W3b3((iqq>mND& z%o9%^n*Q>OjQ@15f2R^$0sfnh^B)_W_eYhs(vMPKNx!hvI9_q2Zrm|*t@RUDaHQPI zB4_z? zrfNCg?wnEn@)q&}vvS3~?ZWC`=V#5i8#`#H*^f5vaua_>pB<5LNKujuFa z-aIn)n7&gvEg4m|c$C-jl=A6ctB`()_HR>BeW2PbV`e`6mX&HAeJUMP`{{S9oW2*_ z97$IF^Xe;%|6d!g=&RK*^u+Pu_o`8Sk1FUKb3Tmm zr0DAretVyaS&mQ{{T}+g3j7_Rvif}5jj9bC^UOc1XTaUb{PwV_TCB`(0vzW#ehb_N z)e`+-wZ!uK#_uha#vd(C<`Sd7g34Gfpv}h`ZwUUB(C+od9eNx00C4FMmfxtX<#V+A zXEk75$G9$KJOKDNK;O}r(cgrLegRDYEfV^lz>MlO==3Y|dh4UA%zXRwCE)0TjOP(` z2)MpjFJ}%1s1@MpOua&B@b@(5y!y_@pLMwLC-Cot=5d1_8n=g73A}3- zTuNQ&`Ce%7Zs2P3sJ^5s>On;4%Z(qZuQZ;~|H}M7Nt?6bV|>4l^RwY<)nj>&I@Ew` zp@H!5r<8}I@bi@ANOhq86=Pb>e3UxevIAVc(fGXO`R28kHNI=9sRG~QGIvXo`NRO3 zgUqoT9=uKkke{F>!aR2~N3M~hfXIM)t?@_THR@&KKJK-yr>!O)1Q!NA1V_SGts}HH zWK8&6WJ~zFd8Dl>G>(bP2;a7jLObbGWKiS)%K4XSt(PvU&c<(5k5TrjF6w=%yYU<3vxjm(^->P1zQ(WN$bQOU zH9$F{1{=RpqiTq9Obs`Fsm9d^<%AlgoK$0tU!cjQoL1wMbJRrR=W4E+q(puw=d0<) z&(s1nhjO8s3!g7i^C%aq`IP&q1&vqL619+Wsaiz2Of7D_qLv%w3bh~gm1;@jWwlBz zrQBaFYy4ELR?8{Zs1=k4sFjU>R|l$9ln1H(DGyew8$VHN)f&n})B%kjt3%a+lZOSwTE(s)sAREJU?q1I6zsSazrppH_9Qy#6>H-4m!Q5z_aRU0Wc zsUsTCtIg_2%Hxdkcy$zd=LB^$<%#MT$}Q^H#t+n1wTW_@+T3_fZCA%p?oh{5o}^A_ zd|#ccPNY0VZJ|6>ZEbu{ou;-?o^F(9sO^ns)tPDsa#xv?{bu#5S>J-Xz)v1lA z)p_bP%AM+T%JbD3jqj?rs52?ws?KVBM_r)KrhJ=GUZ~D#d|O?l&ZWFqok#h0wX^Y* zdWSln@)GqH%6F=_Hom3ar7obnRK2b7O?8>Nkn(bM5#<%?;>I`BmFn%3R~hBi>K%SoUsuK65W%70NeP<~9^ z*!ZgYxVnk*6YA#1SJWrf2PkhO+)ws1H+qN`0j9h`LjKl=9Q+7Rt}4 ze`!3d?ouD4{H*$T;~{mo`UK_Y)F&xFuWoI8S>2;Yl~}>Jjw?%70b&Hg=(@ zrTnV8kMe8k{>J_4QMHTmG4%lD<7#)~KJ|q9BIVcBgOvZKzSOu^J&CouSA9c0)cAt> zrh1t2Tj~+Yr|@d-QQtPo?_dq@QQuWxZG2umt-eP2jCz#vS@l@sbLxBQamw$jCmMIF z=hW9Jf1v)2@`vil#%J*pzd`vU^-anb)VCUUsTYm%CG`~bAFFRSKBIo3zC-!%>bs3k ztDmZ;DPLC4P`;v`ZQQ9|Ro|n0O?{v8XX?4er_|5Y4=8`3e%QD}{Zc(o`75LRwfa%x zcJ&+e0_AVjisD4WMx_Y_sNxY<2DF3WprThyv z^b_i@M){xWXVm|ue%|=FdPDt!az_1y-^^K!7*tS+a;NmJ4?S2(0&J~A3^wrDxO=i{_9LjF7L z7N^5ucN%xeZSC>62qYrZc48d+w9*@Jo2RAyHn@tLK!)~$L+%Bt|rezo=L8LH>qroJC54YRlkpbGJ6Y0tB6Eg6VF;Zki zkjO~EhsSN;gRwAd2b|UZz&7AaP+=5Kr`zdr%73@h>fz|71(^dE$dti{)5&?-pdW`p zJlGn%F$D18_X|Gw!C~OTZZL_f47!x4 z4Y$+dVI-(HW+3>W6{c*nx!sIgcpEljN+8GO=AYotOE=uYX{U=>(-AC?ZWk&*_|3qF;LezoC`ynIcQ`yoU*JP#$<0om z;KS>J_6|qb=W_)FA2LYp03RXH-NFZha|;s-KAaA}*XCnjG7Zp7VSM)BL-50h`8Yft zuiFb!41D-J9=Fdbcgb9YZ%ljyj5ES9v+%(Lz(?4?haHjddLgO4`frQ zh|KbFyMrx!2v_hi_=BdvM>OhIXfQ;=PmfNgSGd&!&D}0=EW`JDnTLmOB2l2pE1kiC z9uH_1{CYih19!p#HW!MLdqrAAOwk=qugB^3KsdWIK;iJAK)_Tq5b%UVie21Ai&UbZ zn=9c8{xNwW3ZsWvg8_ShfqCg$IyP94wgn4<4@P5t+)kg*@AWfgk2mPG2faQ|Kq$%_ zU>R-`e0V$|P8nj&1mt53fWAC#6_0xaA1;XHhs-V?ly&+Ty)YibxBKAhpcg!&hJ1F~ z^m}|>pBF~-2Eh#2^?9K`_!kyH^%%?uFS=cR@WIS{UYFYo`#D@8E(Ix0c*GSC1-%i$ zhoFMtb8FlN1-#rT!s?(8c+rL!yPct+JxG&29|LhSTjsz_gi}DKA=6TokKZ5g1q3O+ zkl!An8nk)6G6z$RL|6NK5l$Fn^UxCHvqMB5|w?WIcOpP8-b{R&+YZOFznul z-{*9Od>(kjor;8gF=0RtR>tr1VYrgu#_NYgz`DmtAEFacr5;x#;)sBGKOY1_3O*(R zP+-EtVALps!B8L+2n2nBXwVT3g!~bk+yxO0zLBf_{GT_Dsdx=K$Iq8Gm z?0%o!>yF2qG58@E48W@X0F3DY0m3Pe03J4QAw>6v!;xSFr1(SekTVtx2V-^`4+gnQ z_(t?uAduulkOml+ai_y>{uuD7d@%?S*~7i=dD1RTzAILthPK6pJC zc7mp8FvLH|P&NdW3m>F8c5UeDgn0R#LBA@MLkbP% zkND#;7m^miDtJRE6I$_yLQZES67eO%AwMz`j<~pu-a=tmG8|5Zf`Z>@$n9gwemDH% z@H^rVF~E!he&HWx7GW?UcR1+wBO`8iEXZ7azH%xN%KA|;0dPcvPM5z3ZUSLM2H_02 z>BATFJ3>K6z?(|BQs|aQBpgOe8BzNKI-GDo^W%%f;*kVM2}M&eS1J;ZBpqNU67dJa zfp7r!bRv9NP8ehh(i|Trk|jTe1FCG`!|jg-5^=#t)QRc~oA?L_KBCd6KN;qF8HfuU z$099!q(UJBA7Qs2c@=yh;w^l5g8rC^k7(Ez40|FVIW72z2YqfdPdS|o=UVuHT;QYR zKop?82y?(40Ux0zKGNU=agRnLk$^wKh>Q=4L^K=}2OB{HBj6(uO)}GICzy*zJ>FzE;9)|wTsm3`;t7Pn5hHfFgEc4Uh#?UC3wdHulwr^r2{}Xle9oOk zGUM?Wq8nvIq5y(^ARzujm`W%JQbWOHG8Iq9qidr#GQ6>7i64pLNqsN_aKyi$>znL>Oc2Nu^SuTmmKx zhY|^QC>~BH60t-q97!e$@mK_^q~qR5Gzb@jqi9QaCI$hSQ8b(mhmrSCDjtj`0*R8oD-F($coT7yVb~Rmx+1}H$x{ULsZ=5v z2`3T>rp`z70xpcp1>5j}E280SHkZn$Qt4Q_l<^i**<{g`NF-CqFnp81*@DZG$y$<+ z=w0S0AGbRRQ$^f~uo~!3Dl~dH6D{UFXsQf;OelpiNyNDZ1CYsN!o^fFnuKuBKCde?At0 zN5cKpVzN{45sQ1#4@s{l-0zM=xiT1Kq%nUw84V_)?nK-j4OJ`NN+96RWK!v9G?mU6 z^N)yLh=|pV2`cCn-I321GQ~_Lm&jIgzDlN$uDH|Okd7o%v2-jZb}60iOf#!!ILf#T zEPCB(m@4W?Mb*ey8hrR7`8YH{MsgUEa3+w9rxRS0_WAPpe5{g5$FYx@tS^#^6|>n? zHWiO&GqrRo4tDeDKr9h~C1VMXJLWDYcRXD0@O7Xea=C0a9)ks# zJrjut9-z1<3CkcOd}5_iIbUJSseHZYujNbG4tJItawP^g-U z1+vlTM0ZDaAdY90O#A)0Y|iJ8k9p#WR6c@zV5Gr9HW|qzJei~?8SU=&cLzejLLr|| zCUS*>$UjO!Fd(jo^q5RY5wBEg#g1aJoGtZ~16{>xq00lya7PvtrC?9Lzfc(9L?IbV z#?pBMi+->CnDpn8YR+^)dA-3zIn~(#!^g{5$ygzrN#!zJllS|}<#MvCm`@dxiDJ>8 z$R(?VVz!t~rHh50JUTHJt>i;VcqtW1WxT$mw8}NQ%k@%^w^%HdOQ~E@iPcOKq8Tkx3{l8P_K8FJ4d_2Bb~h+ z!~R;WR`1A`YlT{&Al9T-o33?qbmUUGRIw^eWP|={p<2iVYdN)gOYV7Pl~7fE^_bwU z7%!HC!9rlDQm&LxH|0Wqp^(YNbG`L!r8C)GjTNW6JLB=eYCc-eX4g!O)feTFj#4$k zMC;*5epN7EtaPQ)g>)fRigk6AGPOdWRtc0cbEYCw@npQGr>m=2=!qCu2?`UuDKu6!4f#_uKaQ9@OtE;=GyV%hsd?OmTyL(YLAK~(9r!-NB1m(w4 zq^l_V%A^noSa-9-ET0_TsObve0F09yEmQ<%fB2CgFmV-DVmOx=s0c!bflFtyxqJam ztw7FtCqwG(>mL{#8Xg%P8=sh*nw~Rv-uwj%7cJgz$8X->~tBBab@zm}56>R-d`+vv+^)^Y?t=-uv#~^}y~gKKP|CKlJb;|N51$e(ll6 z9)IHN|Mui@-#Gq+6Sr*L_RViSwSC98PdZtB=etjz^31c}`~Gu3IQ6t2o_@xeXFdO; zvtM}eoO92!{`e>VzVrOIy!C>&U3k$?Uw-A)i~shHOO(3wvdgcy^0lA+{1?Cc)vtf^ z+u!|O{o#+V|LLl$-~FCz-uu35ue<*J*`#;qGwJ1R z^ztaZe1BFi(Ne0EgDa$$={NNfuB4m2oM7~#8V&ZORpT#>-#32U_(kJYcG6YjutvRc zXJby|!;R}2r!_ieexlSHUjtUZG4e+Gjr2?ErL89MUt0Up5ihNJY2Hf*zj)V+_rCD- z3(vjqg#7ZtXJ5GCg)?3_?MJ`-VV1ALI>*WR)`zU3y*}E-g+TkmygfS5{ywWV)8fYF zxz{8N0u?vZsA6 zd$ONkFZN9K41dmU;7{0dy^B5Y&$0t}H~WIWV8`!Q>euZ4-NdfzAK3Z(Jv)k@XMgr& zcKH6tUf(_J1%H%%^n2NZy^sCY``K+hoxRov*=hX}d#ew#v-${osW-Ee`Ze}ZA7#(< zG4>aMJie11@&C)0|CcZScjQY%LG`WPrQ8Q^_?*`7*|d+0_Dc5Z%-@RyKc2D)hjT`6Cj!oL(7B0VXBQ5M`w!qOiHTVV>+51+P z?zYws-mrGV-4`#--o0q)M*N}Dvb!HXc*ETfFU^)WZsaP*o{=)*w{1@~$LwUxjvjt@ znXMeU;qFD*yVb@kuaq_q-B2yxeeso7X0K!(&F{O^!*4pF)tgT&YMuZSv;iiT?a~(? zOk*6X<*b~jmaApPxp66dyZcrjx?ve3EpO}>ocE%8G#1*@OA?=ze^eggFhs6Zk?aD| zsIK0J$i*Cc(-}KiSF^7{acR=4W}kY9hNnr$s_kir-W;^$?4@fHH{_C?f~F!b2;HT~ zLrarK`XGAhgJh*yBoA2MXDu-zr#cwa4MGIK6{3Ns0QpYc`aE@xHjY+~?&oE4%Qnar z*#~IXv0&qFnsX?tbZuNYG&~wEhdV~Y)rI=JnaBS6S8M5SFDm!%GsA7ZvP9k$W*D zUevr71@2Y;qXo}i_K+gejFs{0=9ukInR5#>y7->)+hzQ0SufbwoRCXSxMV`RRN#3= z;e<#+s7_!KSOKgDwgTq?mjO2dw*mJ9j{*Lpnai_OUI8pe4-H4HRqI&isKZ*RkB?1^ zjwIqS$Gz5*t=65^=~91Rx!6BoIe2E9z9};tedCOQQn`PiR2&3Vf-C(!eSqa-WE3XU z%ez#PIavn{Tv@>tEM?-#3a+f+$_lQm;K~ZFtl-KDuB_n73a+f+$_lQm;L6G8^>6N}*8s%0x#f z!cl+k$ZBz5uuwVbXgvS)V4-B-%hC9qWr^iOU(aZN_^O)l(*X5ZF2X+00`Z2Xnc5JS%I;!xNch@&{+*&`TzWV6GTW3C8xV7-{&+|FznxmSBUtDty z!$S#lHXhP%B}czYAHEx>G-eFB6LCg@gmxupSAupWXjg)EC1_WIb|q+6f_5cnSAupW zXjg)Er55c<(5__AuEYdHZscfTLNSh}{D^{ysS?s4%`IX4(tLR2mA=@WV||F z?;KBzj!aIBk97`c{=x&n=t1HLOPA^?(k8kf7?#O1+^|cc$bL`~O{?>Aq<+E^P zo{ZZuHP5noH(JJ|X-3U8Xgi2X7q;}F|C?IU2HtHgyxYLL4ZPdHyA8bCz`G5++rYaG zyxYLL4ZPdHyA8bCOoHD>4)LP^xJp9R3~ZBuZ8ES;2DZt-HW}C^1KVU^n+$A|fo(F% zE86WDDlY>DrX7qq0n~vBU^#F&umw05xD>bnxD~h$coZ;nL^nkWvZiev)j0yog56p zD2&qk6k3Mk0%55HilGarv=qc_gXSXVSr9)>rglpo44XD#7#1AHCJZz9VQj)MHenc> zFpNzY#wHA76Na$~!`OsjY{IZP-)E`30+{nvYS^lzoj+?V}QAq(^z!b0& z*Z^z;b^@0JHvzW;yMV_5bG8XJPP;?H_DW|{dnPeBPKP$WkB*q%CP_|mc1}!2Cpwdk zsKM8xBjaNl{x*((Tt(!nuxkH8B2kz*Y5zjFxT3;#O}v?uUvnmTPCuvd^)g}rvEplLm8FjlR_STz`{2BXv< zb`8d=!B{mIs|I7$V5}O9RfDl=Fjfu5s)1@aXOC8zZO-;`jHZ!v;hzX}uA6kOL+5&n z&UNTqht75AT!+qe=v;@+b?97&&UNTqht75AT!+qeIv*M?SL>aQYDLWBWL*f2l{6{q zOe81JRxxM7I@VNIlM_*{=_DaRsB1ardP`>Y8Qz-I?(vi%zWc9JabQmsqg2XV8D(Atn8pp8jgZ z>5r9WK9ZOy4di0E`WnMd;kT&IKv%CibvMon-mht&#EvHJg5o}s=|Y+@R*|W43(Dw@hvU-f*%=6r&zK{w7^O0cI;Sp?a_T7|KSfm-uKm?ykd@}OTSAm!8Z@X z@|a>{2HYtgzE?2oo#iq4<%Hifz~Y~J5h`)ITQ!nlo9E=INb^@&YnZiQvj&-x&Q53N zq;ovkIhmYHIw#|=PA#w2mQNmeNb@|j$`p^4!AAWfED(_m~{$1$F*k0^jT>|0#@x6~zI>L-D}cz1dLA~YPE3j~)6^6ft?!QII{L~T z$=Jr)nC{J&`|}y3S2M8(bYADdaka_OvXt^P`9iqL4Jd}~q}fRnz<6S=9+`~SU%GfQ zvQ&(N`3vsQa?U9k=uXM zhgrVG{Jh{xaOFnowAF#&VPH`Mdgz{vb!#kquQRwMW?A;e%Lj#qYGam;{G8F6$e@L% ztNx&0vD}6Xmen6kk0eh+73L?5D~;x<2k^^%eE0EP*5Yx#%Yvl3hku$&*!G&X?bDb` z`-o|SBG{ma$;>V)DWD6O0#*VWfNj7|;Bw$5;C5ga@Hij_P&goMqB#wk)1WyGn$w^; z4Vu%SISrcApg9ei)1WyGn$w^;-9mF3G^Y(T+d=cBvD!-}oNOhMid%8U9ZAy@pB(F~ zYuC1O^_=y=pgPGEK59-7K>7h@m`AD!QPe3o5#xq6;dzprQ*Zx}c&9D!QPe z3nGYd=0SJEH4=KpK!+^El5p_o_(T-Tfph2_Zz_$TW?d97hrck^SDHF__3E`FQwLmk zYj^U*nD(x&7c0Z()VuoDEUp+7?jiGX(x1Y!qa$;zzSeJ zuoXBDxD2=vxDB`;cnoN&4ktV4WJF(?&- zQZXnMgHkal6@yYSgHnm+2xXGySVcLpR&C1280*$q^b)V%@ceHXT^a1Hx<|_0b5|YL zUmcvBSiRrYvEv5kdvfQKcZm#5kJ^s%tjZ5m&Q5PyvSzUVJqJ&(q6_p=1tNZ(SY3p` z&WCriG(&%5P(|-b(8k75EU#e{s7s(kjIW#*iIVH09HX2QzfA%r@>_BbP4?pSX0XTz zD#qbX=}vByP=xpUeZCJerv7bdKt;$C7c|1-3u@ zc4yL6zURDiKVn_qpD*@qJY8QtbC$L}!giYQ=>YooI_6nYzuL{hcHh~2q?!R&_RKdR zJQ$e8gLyfTH-o4NW4_`h8_)%hfhAFecj}j>~O+^GDiP2Aj_Oad)=@9o&q`Wg)j)O&KQ`lKwbk+w}^h_;E zqz4C5%Zj=y)_ZxdP+ULSf5C|zwR~u2ZnAHHe1b_m?+ojW?!|N0op`Prg<|1(<3H)| z;!~znr@mfdyw#Q$RmF>{nu6e_k_NhgXSp+E#MF(Lx)D=1 zV(Lar-H53hF?A!RZp74$n7R>DalA}_uET7tgVs7)t#!~^2d#C`S_iFl&{_wrbacA_Kw!H%+aCv|99b8U&w|0GDZWSwA{TlPljfzqfnrl-%%=1IDK2 z%-ghe!PEs7TO!*#l8zpmoU2z~u=%aYa|<6RP0!nW;(p6_Y}y(Z%Mxr^f-Ot1WeK({!ImZ1vIJX}V9OG0 zS%NJ~uw^s&{4AAMfIZIkq#4g>8Zi7oGbq(M{$nD#teEsp0>(Ab@eEG_i8#Zio zowDW9V&T}sk3IJA&AH;GTTZd8K5X5(!)E^Tv>iK6Lmw5*3(QXuW$(xEOk0Q!5Rzt1j;f_3t zD32n_qloe-qCAQyk0Q#Wi1H|+Jc=mKeDWxwJTqoJgGY`*Fe2krS|KMVro%8EMr>(9 ze`jjNrStkkUkoi>bMiTxwk?=?o3Yw=&+kt)kYze}{MqI+cO-B7zHmoAT!=ia2XrAU`&P_#r=?12ORlr7I zJ8(X51#mNP2k-#!1Rz#R!jr9)W^1dlw5e*ZA10ZzfleKXpxRJ{mp-FtJX|>rBvYdz|h{NIHQ#zQaO_uvil&;1* z;(i^U*58}Cg#@qulb<~As;ka}BcQ>U`l2OfaiacZ_geI;xjmx|hLL;AaXT=kUz#cD zm-G#1obkgm&X9XG(ze+W+|#caDU^+KUTJ3ZJ2~F3$L;Ycy@`Imw^=%U@??JRZ~R3c zgYII3Mqj;4buwU|xo+AAJ8-R?e%TaBVq;{#t?(2;FSX|vC&5Y$4Ov4&*3ghOG-M48 zSwlnC(2zAWWDN~jLqpcg&7u3KJPM#~#Awz~lk$b;X}TRWkBPX&W|^<7oWMSdvnG3v zK~r@FQC-9hP1O}dbp=sfL68|lbp=sfK~z@|)fGf_1yNl=R96tf22ou>cw4;Btm1(L ze5fq#mQB7a!L_WFODuvthFbh;PLsiYSzp1XF%F&!OmE%g&a8W_+kbE)9c(R#LP)slR= z(0NAg&=pf7eKVK-eI7uOw}4$^`5JG?*MaAE znf&(S&%fc|Wh3JUE*-4rGXve3ktq-XK|tFPN}g4RbJymR{jYxRoxQ`3hmowMH}!CR0*@`l6+Y-ss8T9D5Bul6jj z+8$`r@oDbN7`DM?@T%$Fd!I7n03mFA1j1;DA&x%(;?-nzY*1H0&g5uk@b2qR9ZPjy z<%`{w4D5IAl;wjnn|E9r8eY40H1M|35X&AQ6wOW1VoX5C=w7w0-{Jj)X*YXjzNQIvZnUU(q^{R zW|lUyw3(&NENy0KGfSIU+RV~smNv7rnWfDvZDwf`B(WF|?`gLP%Oqs~!tf&Z^6zJR zktt+JrWocZ{(UpNf&v$BvWUU#z+datgeXZpGPed3onwSZSnemANB!KuoD;uqvpss~ z_|(Er5(zpy6SJ8k{8$xYEsBH*!GZo<(DlY>T zWB*KooqPGu&2_wC8pNwrrfE=^2Kdr64ZKK{FI~VCuoBn+Yy)-zmjgEew*$L?$AQ*j zovi{l9P{^E1-7FXN3jb3e80eLdU>t=0tu-44aa^r=rC(I&wHd8Nv~GIt;M%4E$pmZ zUP?N9^ooHumaIBsa{gS)=;}rL|ITHNUzE^WF4(jqxl_V@$e{4~AK-HzGyNi77#7+O zNCdm>v9)7x;@!6P3ZyQfT~SKyD05R>+w$9vy@4Yx4V!olgXeGy&tZBGgXb`K4uj_~ zcn*W-FnA7w=P-B34yTq+$;h;f4?49Xj#s zk)eMOXONVduyKpT8zdD@T%guAfXqJ1IY~}RNDRUdBqKbA1hLg+NYZ3{h{5h3AAiTO zy{TAz+q-*8=~4aXWs%dZAF`fm{gOn%&2gT=I5DykyJ~MqwnCb+z7Nuo9L;cMO9xm) z!&+u(*k-DMp-|n#5oO9kR0m@fB8y`6GT!J|=id)jBBSGDHoZ78cK(S2iCE{Uws+Vp z^Ot7Kp?-UAA-~v=?br2d$aYa*MD!f)iUDb~l)_gt!{g$cwMJf&uIW-7Xy2YILgHaL zSQICKIxqn&2M!0e0Otai0yhA+0`~!r0!59OxTvInE?^2+ z32Xqi0Xu=qft!HafnC7kfULJW438jeY}cBJXC&rKkVcxvYvjzypZ#*DZ@=ELvvWgkiv`+Fp+sEpVNv~uNn6>* zm8@P_1Blr$DQPlNE0Ql1ZaGLku5Ag3L|%nwB@8e17?$IJW7gW zPWt6si-J;bQ*f_^K~6|YM&KSds66-H-~&xNb>>htABDLR2uGMQeY+I2x7_ zY>pEBW(QBdS}xlzvE7GQ@c%xl`leG}9!zFNUVLF9nHsPJ2U6+&FMW1C58y$5Q?Au6 z@aa_tNTj0OZ7bWjEs!P-+B?<%Krgj)jk&3ae!1Lc%PiF|?jK7oo5?RtnSFbRXRV=c zTou_ako-?`s_pFNu%InG3vJC#4J^5E#>^19$CL>VYShDW$is5TgW>bA9P+Rn@~|B8 zupIKR9P+Rn@~|B8upDY8<~&Q~6+nbl*uW2qgur?T9i&CgH$z73lu8WOp;xR~cx->Y zW8kz?r^9;6ZkadprFSly_Me-$MR!}zkXP;*cJ7DFEg^Gj?Ku5+{3$qYSvk2Th({R` zYmP{^v}iN3(`KN(9E`mj*EWlpJA;xzC=HWEPmTPm`k=Yn)Z!w1_EBq8ukpp+Vsg;= zDgRa9{cg46`(K}zDNgF2ElXynZ~0ucH1pKV`%M~(J-?cH`W0)`+5FR5lZ#@X%}b_y zwr2GYcnEfb4klV=^OIPvF=6lb_2uhR`o#sM>b#j@YjdxbR0PX1#*-B9B87~~uAY%W z4?!gZK@7N!x{Z2@x=CmZ4xRZqFWyLq^QMm2jlsh}8LCl)upMdJD_<@lteTj`s=AENFPCaTFX5L#TwplN-B`%r2;fAv; z+h){$_0GjVnwio1#d+Pe#TGq&%hUR7;lTuN3V+V>VKn!*7&-HjFwtzf2p+`pm7(UWpCZM#{Qcz$bVR8l}wTq8J_dfP+WYj<=n)a}K&?PnMy zI>kHx4bEl#*2Ir}f`EmUd=Gz zhX>Z^Urf$C{N1@@qw{7~tnCo~Wp9{0)8CV`5@az$&WdRD3?Mu4tw^HmeTvl(MJx$K z5`h)BN~}bFkIio9?}6htXFG^fghL@#HH1|azre7nA!rlAs)n#@A*^Z$s~W5F%4y-@1~i?*Ds z^+^lUA?>^Jtgmgo^2)8bJ5E_{WT||7ykpg6_ZavTeegBpPr_B-Gw~@LA^2=rWAXJ{ z5-5miX$PsBI5y%vDORQ_6H&5<$xEYInWk8ordXM#Sed3+nWk8ordXM#SecTR0z3db z0SKZ*z9rj8_%@18jloRjmbBy>wQ2VcR;dh$OU?aeUL0!Z%}Gh=5zdoUsga#vt%i;8 zS!eyHBifGfz2S^2m(JUno!dLoS^ZG9x;)i0re9c|e%q$w)-KoEXP!w6j?bAYH`he) zAaM!aCC|#AXTYl9OK?`DuN($%#rht{e24EYo6Sy3>3Ai2D1w0N29b|E` zueR4@uI4ClFG;|Zh|_#+c$knP9di#zO7cZFk z^Qw6^t+$?}^^QRJ+{!VX-S0W;k*!xQU%EB-$z`ddhf3v3?z!`-kBiPR*Q!q%HoT-A zcz4i5eDyN3`G3gEd3d=QH*`};1Kq$hunO1+YzNK1t(grV{N2vi_=o&$a6!|VFx)CK3JgX!Sw9JPrJH9+wFv6JXMOhBtDO#O zl@viE1LLQ|T01VW*={`j%0uV+BbSaQhO&B2-|(Bn;FQDf?x^46Cx2y+C}`dm|0n$( z_Psmwqoj$0X|X+ZX43NAGs2Pgv4`*|fC`ns{{QQzKH8>Gd_K!l01H4Xnk!=nlBj)C zH5uy24GEh`6}%rGl(-?G8xp!9Az9eKcHn&A3gBko4&VXc2|!5rW>>lbY7_icG5YJ= zBeeg(vojqzbIa9kI=yy$9Xn319jDih(`(1+wd3^KaeD1Iy>^^lJ5H}1r`L|t%j;at zXpbGIm-j;Bw@Ff#*``DuQl{Leko%_om83lR*9A-gD}fEbHee@kIdBtjJFp9Q9FXOo zk=SLJl06S05yso|1QID4@=(Y7r&p{VA1rs<-4iS3jecd>l7%Z)4fXde&ZgHac4sf{ ziX^)8fn;T%^Wf=E7SHaQ=q}`QJ%hdTwvWpn+QH8u9HBpfoCeie*@}|5sj2f#E7S5d z_tJ&4SJk5MBpFN;uDCs|7=*4iy$mybqppuw!K!Kne*cG~mVudp74hs6{gn0e(oT#>Sx z_VS>a^=>G7C@Qwft0la=be; zGIP(zCu)+Mr&)JGG4SP4i&}KrYi@gmL)!kLJk3W7hFLCGt)sDImT>W?KIpp7UFqL) zR^U9#2SdX%Tl5D;#CfDYA6I{a{-VThAc#0>d#yqCdSKw6JFIPA-`otfGa^|J@rayJ zlisB!550JGbD*8A;aV15blAF!uYQMX<)O)od>fWKFVTn0+-3P-vS;QBebiBrK@R8D zNkr0kfJ*<^82^#2@$ZG6kgP;PvsNN)d^M;1=9Qtueq@`RzXN3sl9i!s3r4M@iK1>U z3pct$s}2a+-*x*fms!Khmxb(?+;V53e8UGyS-noLh*hq>x|p2#^2}`po|&+InsJ8I zj{iy~%{ZZ{UZrDFP3gEbqvm+q%)HkW+oCIX+xQDnf?I8_mgei$u=Ufov0(bdTO5IP zhXm}zVb8s9y3#TId3{yA^7!LbV-29;1*NZNtU-3$nh5%*-n=|e-U>B|KQ7BhbIHhB zm7HT!T{x@=Mm!q%xsmP_eeJ&9?6$Q24EbKUnRQlqPGfW9PF=G6mPt%X)V&>lnY~VH zdB|etWhLT}*s$?LFcAu#loCfYUg-?e*JhjTp+sMz@37)V%j(VL?bbIgvu>Z2-!D%F zGe^U?a<8#j$A;Y`tII_4H`-fDA2b<`ZFPn}AB~J#`!8JiquWMnqoqY}{B)`${MXOl zH~6J7-i4B;qYYgvYehkb<|s&!?=8<&>VD4o2kF;oSXc=j(>FQ4Vd5!fjHo}+7w><_ zW;<-Vwph-$Zm%Bx#_r>c=L$4`=c;D$T0VuSr)fhL?uJsOEy-9g^A0pp#m<|)dfT>( z@89xyUDJ-4zv@e8-XhmDGWrjVw_@AHJk4I5xg;E?3FpU2chG-$W%Fj4<9-&WdeXv_ z3|twDW{F1gkk8HQ51wOjzO!F~9~S;j&7bu){V*;_r~aV%6sA1W(4w4}6!D%!=_KJ# zuHa0IX^b6w2b=H-pbks`%YnmzEx@_JrN9lqt-yW2qd+sh+|JwzgBZ!&GLsWB;96F& zh6hhw5*FIorOmixJDGGZ)i7I=dGI~OmRf0pJPOHvsbM#2*o_)?qlVq6VK-{njT&~N zhTW)PH)_gr^kOG6E$?-xVQ_c~(sVg$7!t{&5TsaDjX_K7nZFoj3}1rs`!Zp|Y9vgc za>r$nAY0OSALAs18jk`bh(RY?Szl~mPmF6dI5D+isDEiGzqEJZ>b|3vTThLra?xZy zny+{CA2fGmPq)jnFtsJNcGGwI!VmBU)m$vyfnK)omxA8VA3=vCR9Sx%-x~_Z&O?PM zwJ4rJLRy}HgY%|$!tc~ukrr_}Jx>ALYs4L%GMvv6&SyzHUdeqZAqFL!&l1jO3Fotf z^I5|AEa7~Xa6U^opCz2n63%A{=TkBi84WE*;YQJxrmxq2-excL+DwP~pJEdjo~CoA zi;;ukIVhfkRdTRO4pzy*Dmhps2dm^@l^m>+gH>{{O0MN%T*ULJ4tyZ%nAaz2JFM0-EdzZA_1C+GjxE2>X z7#R;E<6&exjEskoaoKHz?9Whn8L;fxQ*MUE{wuPGGUiPzh8#&yz;EKs58nJOy!pkW z18;ut<_B+n@a6|^e(>f8Z+`IR2XB7x<_B;5y)PI?s7tg-c9;J?4X(Mn{LgdYn!C%b zTsV1l$Q-Icik4s+S=_B;lDOlA_2Z+%;~PfCyLtz*$yBy~XlSl;zlFyfv)_vRj#^iB zWVqVdb#_r?uZrwlW5`}r-;Aab?FY1EO*Eks*=xz0#F<+1Ccm}htwz1&ZOeJt?UD76 z5xH<6MhTz}OaRM)!+|Zpxxl5s4Zy9yeZZqYlZx%PQb^D3+~@z(qz><7w2rSbSw;2M z(ZU9?qbvwzgT-KjC&x{>>l#@x)Z0CHe6dm~bn#}`_$W}72F(d*;miD$dFw8F%2>0viwV!GMqq$6D=7|Aj1h{IDrf&kl_R}oIr*X$Z!G~ zP9VbxWH^BgCy-&%H)J2SDvGU*XH(^MR-B`CeclVM_0zrc29KW@>m6EmNMH9S=Gfk9 zd2mzyq~wx?JC0bebZUG<<*GIENIW`D*4in{zwijbtDb?QrO$enHRwqUW@<-umNS<5rPx4mvt{O2qsio`j%w@X z?PDwbTVxrC7uxXa_v%M5;iGDqs=ke9HY%K+8m?J zG1?rX%`w^>qs=kH7)T7-)T_864uc1$(1}y%#3^**6gqJVoj8R~oI)o~p%bUjiBssr zDRkmgOD9fY*`^qZAiAcej8RH#RaQ5mhfRsxP5jvWRo)ZSx=te8^WPi^Y4c5&uv(XD zpJ@8L`1;Zw&o9!O`Q9O^nO^ybDQf0lm~C#(j38uD;0(>d8JdGLGzVvB4$jaVoS``i%P3E| z%~4t$<{PQp2HX!c9p*W>MRULtYg>_Dc@~nC3dy(>pQWib$3=JY!k?xkbsBF!5zjVi zygXxcM3;va1~RS#=6CtSUd9O}Py+46imIWs}dfvFS7sM8m8)fYxR#-H-)N{1cTHnZyQai%E zR}81Qu4!|@?0A^51Z+M?Ywm>57)EzyxgHB@`}wxcKK)SPv(u5#+&6wR5VrP)UoY1S{g+kdrI%c> ze*SXtcW`>tr+9xB&zIB+%Jv~T7cqvD`i8m}~Xf(Sgz-Z0iKO`vGsW#YPN&CEqJ zUPJ*t1$Qlc%dWh!KWRieBud%DR2!GG^9}aZ-ZBokM0Uqz5k&SKp_$HEXuN4~WNkSb z*48tv6M{vcaL}B2=kxam^z}W-Xy3bA$TaoBFnZx~!!N&4_Fmhut(GnjT`4+3j^>W0 zaI@WbtQV#FUq@&6Ld0GnjM)$Kv_r5G-86%-@|2|+j5RYNB?2WePABi(F*0I?S^HU+ z2Ayxe^ty9w(W9)fAxpyGj8~6{hCV&>%V6#9n++b(ydP7k4>QOAFY~8*WjK2V@y4%> zuJOetVbX2*yaN(_8btc);8OO@*eA2{y3LH36)S%Vv{^a5RT(lW@RWI-uXSCsayl^? zS4s}++(DB208rkmE%1a*FsHy2bHlk8~A_ZoFsok_i$dN=hF^&aXyg!6iF zCCtq?St$+u_cp#;Y<%}D8{aP{`e&VJF0R`9+)Z(5pQy?$Rr66D3*GIj$P9?(-0AziA%8-`8B$INNJ zxew(9*ODjM+PM~Ay=9+e|EZNx5aOEFDp>M(T6@e=Z`op5oS1f4JgP)<$ztPddxvT- z1KhN?Z3WhTuA(XG?M-Mi>aee37&1sQudO^f;_pLQ>?Y@cYZ zR@*0LYhqa_?5k%qWWYRwgKd8^Sk$5JaE8~pHTB%TV zoe|0ZN6R0ruphb@c`55#%pEX!i&BNbn)|$`z=BHE+E{@%#wE|wZcZT^0Gv6-Z+11Z1J2*u)e;#K6lZ?oXO5? zN9L&Uepl|S&SE0$ijN*Rzg);2&fg*9Z?}yfw5o%!gjPZPp@=0!EXXD%AZ5fg<<)#< ztStgZ$X<+k%FI-iv;WQU#w_GXp_cdD+DQ;+sUVLDl9yq;(+lHdhTrG&td0>tHgneRT5r$#di#p$ zjYyJ1ug$x{6xWS%f}Zj)Ow~#>S=y!jas4&*{9516ZtJiJ;!=1kG3;o6mmsr&w8xs zxwo~`UcvjeTtEv6U(9|nf}$~2TjCF}B@1){lfVjKJ+Kux54a4t5x5PwA9xIC`U64y z0ePc>3{o{$S}?T{RU37X`)fRt&WrzAwqe}y#6k6f;obX#UAe05Y>VZT0o(HFqj2sM zD=(W)7jf_VEa8EfZ_X9>o_tggekr)#J2$O8^QJ@9KGB}}?A)}uI$9;ZhpbiKf7&}v z^Us|MoTDF%4*cK;V-lCpEXb8*4SmJXPfe6HbDY{u?`0I4d1>t{+MPGsXs3Hi%7vLM zBQFzZ)nr4$OrR-uIwdVlvc1Mz2dh4N!JHjC&#?^+InT5Y4*O0!_0?DPgTtZ69?RB~ zJ&!#$=AT(g3p6b`RF^RRsCsi=T6=VRZK&-v-2d6Uv`HR^G4s-#N$s;oyLy6-otM1p zTx+<$KWsbilD9|umafc{fAnHHfAGPT*o!|h#+YoB^hJy@sOAff5f+0r{{fz7qe`r! ziK--Tt8QhOC3yw;+Wl;vQ_r$_M@H?gvv2j=UV16ep|1!IJpJutw|?U@1B72>1*-Qq z`hIhcSsOpI(ILj|pX8VsFRhg~iivK?MsD2DlhdWh2TtkE9F=~-cFK{t7p>ckdDj|? z^$C_o2zIQNhd#~scFHE0#MnhsN$#l#rRWDaUq&~i31Z~AR`eX$lD|O_GgJa{VXUO> zaWnHC3&P)ClhDTG#9SS(uscEOps_!Z&?nT6IxL!5upkrd?K!HpwCoEnSQzq^k2?OK zBM(?s?YACh?XNC7;K=U!@%00b?VRrUAMCvgoMcsX=w16f>s{4duYPyeqpSM;s;;W; z>7Ms64+cRH21byl0%Huy!x8jGh!_D)@DWAOTw^pBA#XK7Akmn_AS9ZMNr33h%OrB6 zUQME3VoaQx`(OK<)73LQjNVJ<=KH>17*^M=Q|Iin_gZVOwf5R;uRYnSU3qR5d|h&) zFZ5f+mvBapc$(Fm^jo26=A(*v43odEdeNXyw9}>>2yn(JkloANrNCiM==PJAcp3{kSI; zd3ndl8}>nJKc61TCOC~lUvzbx@twF=QQbtpffiYl4oZ^ZJIIFZmGExmYMFAzt&t|U z?y5if)bf8_{uD;_)-9vzFK-_|{z2o)@!Kc9roJxiD4cjq9ai4~F`AqAf(V9b0Mv;PVlV^W{Y63$kkE*D)iU1_ow|G z+IM7+qz*08JRSr=Vphm!Kpt`Vx?B^}Udob@MXw$vXmrP(;)yofE0L(31M^>dWe@ z>N;@AmLTNn-WZ?3!WTXNyIIu}B(<^qQkc3j^#%7q)(U66*saGg6Il?zZkcuVMU zxqv{DMRb#b1e{93ilFq8wbC_frQcX9{pMQfJ3&dUtd+K}m40!p^qgNp!sJtGQ6~XT zD!1zL$8pg&{3C0%>^f=lgl?*0W4D{&A^OuIRmGpAI96BW!Km+ z`{5;=m@y&l)8d@W_aj7aAgW6Fa(t+)&+M;s3i>*xk{OIPT1JWuo zB)Cqs+^jaTOcv0o^15nED_1w`%sbSml~7YOf^g$%OaGAu<71URJGq4dZ{TwJ6*QHwVWF%CXJ}gftL}Z%Tb1DP)z5Pn>B1FVs%?# z+Y0k)o0=@gw7UCDlZ4SJ4qFY!BjJQHY%3B`PCT7=!?qJO+z5B6ST=9lDJS9-q}FIO zoUl26JFdFjOjjgqS%ztLhgFzlc)OxB%*+p&1vAV(&akp9+sfER!r;t#&RfMo44bBM zoT#H*Hxlip2a%+TSS&v~arxbdILb7QFim$X%TbY3f<`(Dt8!;8hf_yrVi(0M`chsO6wxhj9&H|fJ%Z$41XOq!zBs?~xoDmv) zCpb*yP0Nj^!q6sMaEkeODwcE8Mz@h1s@X-AbbIq)rMqM#l>yfJ5-QT}>sF?%45iY_ zsoJhVHC7yq#wnY&V^;RW98G(g6EmuVYL z(Xs9DP{dFX8IyUVas%I&e8B<-eG(8fr`4a;?*25cUW$5eOBwv%S05J~f(YsL#I9L}WDeZ%=M zFCOf2`s_%rtLy~NyQY~kMpDql&PY=^E!c{hnS^DTap4TQ9&zkQBI9Ob^tZ=ObY;}% z4L9XFU=Yg7mR9VLvCx-QQhgS4fP6!Kx9TGt;M6g35 z*dY<@kO+221Utk(pW|r?Ujf8bCT>Zxg9Iiwn?={f4!nPE_wG61%la=R+7dnSB4fai zj1=2FKSp2=iVq?%IAp(^SZdwkeGu#1#+*s{frqjXY06nBU1j}GA9L2MvzOz!>(0OS z@+;?BN7OFk;BmufA2-yu!dIVv!Qz%zYdoCzTlFaUC?^zoPb4j>7=LW85W7tLlL`Ej ze&94+#XL;lpG@GNOyHkP;Gaz3pG@GNOyHkP;Gaz3pG@GNOyHlCz-cj@6aKyo+l#-m zsZ%ywYM-Y>pW4)EQ>RUxHg($6X;Y_7oi=sa)M--RHNgvC{m@&O6}o^lrlqeg3rjb&l4T&o|tG_i)4sGn(DTpt{t!7v3*~ ze&1WC-Q061WoN$3sdZX^ad0`zJMuskXaakIL%_AbQQ$4WeZYgjBft~DhIxvND`KjG zapg_bB~VDFL*2st>&6*#^0lxhRqQDidwNQxPZmn00-wfUv8PZh7UZ(`DR?>YE&lu& zF)TyS@k3(%dn+7bdW&a3W@N)_k-VKrqCb`B%pCj+>oW?7=U1i@Ix-jz73@fz;^+1F z7&=f6JdtAJ%lfKk@=F%PkU!-dp(Zb=t|-eqF6ndGbKc1hRtvJ3DHrQU{b=O(R^L@x z_?^M5x^KtW%lFL8$19cdZX0X0f9ch|3zc1a?yrA+;pOVbw`{+~eZaZJus-eHVw67D zRb($AbMO=YbmBQ<%J^02F~sh!pO*=T#MWrpP-PrfgK=DC99J2~RmO3Zaa?5_R~g4u z#&MN#TxA?r8OK$|ag}jg)#F%v(_{2CSQ#N=N4Vt&-^?EDg7>*4ILa{a-Fh)L!^b=y zu~1O&0h7coa*W04=YX9}l@N+YHfy;-4owOB=>&)Uk{8HO^1Q$$24o2bNC_`+2`_L7 zFK`Jja0xGP2`_L7FK`Jja0xGP2`_NT8zj$Acn*+BaMiC@<_o#Bn8I>0V6~CPDwWfV zaJh?Fov9W~ZP+%l>bc@SzjRAi?&`iN3M+};s~XRpGtgyyc4*g=!}S;UCym~#U)lco znZD}2{w>>nLLHjPP3&88FK%2@t*OlT&$PcWc6t3(L`sR*M*W-dQCjjrSx)m3@`(d4 zp;xE6bQHEA!7HK1Bt%5Q3N#)3>&@ljI$D)eAPeHDlToo6N5J)EH z;`5iX=u7zwd?j*kH`ltUVSwqYoOlXyWHzi9By=>A_>uB;rU^1J08jdyrwi@*#^ScF z$lJn+r7g|*tBM2vC0#2O%cY6jYinx1dy9M7*{f%5yTZN2`Sh;p%C-y6I&1H~i?;4q z78zSq_k_NUEa@YDA}`}tpF$$MJ&YtAhEBss!eJ!gFp_W>NjQuo97YljBMFC*gu_U} zVI-k{-ow)rz5)o5In_8ctbKLD$dc(<)>k`t3j6ykC8|a6AiH>EU8BHfCrYA|n@u#n zce29zosOOQOkaNsE;mbAvL@7En&!u9pJqFvJ}2O#gQd#!@W5oQdvc&QJFsQSI4hl) z7@Sb|=KJT$lYJPL)#TRr+?Lz>qOZ1w`>TTcKJ_-{M(>9O3+h~t$t0Vd$X5d_7@qXX zBE2MPMq44|L*&aiunZgk4g*Jk+kkt32Y`ox$AG5*u|fnNk}FE&3L!y4-8j^ZL)|#k zjYHiy)Qv;kIMj_p-8j^ZL)|#kjYD0D0z#NRP2nqm*pIRmL-sui)smDY{HnltdFZ7A3sM7bBhsVe$0dTljA<%G_>nFydgwzwK`R zd%7=OE~oqYQk6=Ig&o=RkvZ`%#$|YACdv4@MSbmYju8ATsFXE16(%1WdmF`0$eu0rJTL$c^KsgRnK|n#b8QM;;hfn z63;XPj%UEJ=;C?G^OVKax4O2@w_%CZKj&x7`2*u|A+l3R&Eiaj!u)bzAr-^0i zy=yBer@q}2f6+pv8qIW>&eHOWw!Ub*Vun`-UzM4fS#4f4G(25*?C!4}Joup5niwi+ z8|V0m=hW}1KSBhQLX!%g{1ok;@T9~9Qeq;I5)<4qfs~j)N=zUnCXf;nNQnug!~{}e z0x2!A z=ZBPb7`C&*h$Y117+wcCQ!j)`zLt$4t48c(*>{-TH`k>m%N+k9fB};@$d) zck3hGt&g|$5%1PVcV$Bn=_g@1nGJa*&qC+CTv_R^!hBV@sEUnKh54#5Ulr!7!hBVj zuL|>3VZJKNSB3e+PNZqiPWNY|G3U%7o3thb}TXR2IJ z^t?NB`Ma}|<+|jHl=B;U)#d7U=xbG#;BF8sv5~^%1*jni-#ks=X<%(@Z^9YJBI}0W z#SOse~Ad)fFiHjNX#s&C}lRovPqOPjjlm>@9;m z$vq1ks&Mx1!JeV&YX&deyHuZ4ua^@+W`;+IN`o%5CrY6w)n{O^Oz8Gt`ATE1ixDiF z$Kk#Rg=K zn}%e~j07AK<-Wd$yUbTv#;xWSjy92Izgs=fzOHXBW4wLsXb5%U;S+bNzczk9G#a{s zS#GyBfkh#DYoR0~(@H}Gd9d;nr5psdNI3|*k#Z19B=-kFEm9uTGi3-O#Hf~JKkdy?8W*0TV{s0?S0cow)?9 zygsZx{pPJ_8Lu(xW9R>p^Ll09Te&KKRNNuZW-PQ%?NIN-brXX+Vt~QY5LR8Y>)&xp?+UwjRYEl%xCLJ&oZFzMBmPIc}iR`gAbt zt=A*bq-o=xRvSmFjic4Z(Q4ypwQ;oCI9hEStu~HU8%L{+qt(XIYU60N@j$DMqt)2A zkC0*AJv6RSB0`0=wmsb2H_Q^&;yJCOTd2M65x8^R^V>ne&8YCQQ%2Hgja%5 z=cUETQE#B6#Hr;;3qLQ+TKFRwfC3j%@adg+z#5iTsT|(>)`j!$T%Dd>z2p44+P`z* zr8~yAG;4dWU1_x&XZLrd`wt%MPpA9m8m~U|_8&j=>V@fB+y~vG?#FR0wJut`>gt7y zE~;O3b^RiGTsZMvl1N4)J5PfHE|PK}>U2cjF|mWR zRaJvWYVb%69;v}2HF%^3kJR9i8az^iM{4j$4IZh%Bff3>G=;AKGXIj=JA1Bt*63JH z8-mR?*!~^cN~Cv>ZoW(rNhB(2wI5#T=Ux5qND`kVDf?b1DIyWsg_N@QQyJXP80{Fa|6E`+-Y<>w#N=yMgP_790ra6w*mJ64*(AX zj{#2sLI&~Xcrwuof%0D$oS>0*8QWfuq1%fct<4fk%KR0AC{Z z!|lRKfyY@6ndF#FEq=uBFuXSR^_jU7A>+rm>I)9HZq`<1PkD;tTP8}PG<&qAqIr;H zi$mmS4_Gqzs=9}>-OJ~cyL*O5A05ebFCQ#rdPb`1+1~uft}TNzs?sx9?UV;<+8zmL zmte2j4@rctcoG?5UqT*z4e)J66WSq?fo0$Ta2PlO+y>kOJODflJO(@k2<;+hH!(6K zNR4{wMx5?Q9v<1LFEf07Lb#{V3ec|r{R+^p0R0NkuK@iD(60df3ec|r{R+^p0R8-J zKu=Tn3Lv5&iEZj-Rg=}iz~~e7ZP?G&p<&1M=grMFkA}h#5^gPY6z7%bG6^p0)3jVO zJ6N_L1|%>-_@->=HaJpQK3Kko(!j_lC8Ja=3|40$Hq%bln9C3Cj;D~?^PP9+_gZg>n zmucoOnoMQ`7Bew9$x(9Y$wbgA;Wdx|l$xxT)e4R}`qE!;5?92>|8d;BDrwYyq5WXG zIOm?PzIZ`=PSQDA?BV$cXXi{R$pWUW#+umWwRP*r>kpug@+L7`OwEd(Ltyhpl~``< zbQ0?!$+%C_j3m}W66+y}^^nARNMb!Cu^y6G4@sp>2n(bhu}>p_mOk(niD zcxv$^qcgAwPn|Q49P`9ii2&#c{|T^&YtEH$vH~C4Ak>Fo)mVni>-;Na9V3OgfJWYS zxbmLZyOnWwd!R&MD3NlXAzTHz$>; z!{)K}9JR?#s0h7_0-_L$!^`Y(=>5V#Nr$0RrihH9{X zNhTFz;BJ(AMiG0GnQ&AO>;k_?jw*>9Ums&2*CnPvo(w$3!cP-io8T={_M&HGvrZ_M?R4fTJE@3|Ip81D62T z1GfTq1NQ?D0gnPt0%D5=eg)Cqa{fh{UJ8H7+%d@E;Q4)+1d|)5>N@o^utaMC!6%2S zoNH#sm)#9qkiPk5Q}UCjeN&eQFdi-v7<- z&H!+NBej)_!-lbB3qXXwN`(rnp`DFS&MqRb8n2->pk9oluAjFU{B4LJN zk;R$Qbnqbam_RbiR86A1WYu7^Vs~V3dDhQ=BRKEOW7gwb4;OzQvUZiqRBJN{;mu#f z=q-rL^dzD%VOd8a)$h@W>C2yVs5Lb0Q%hzq>j>wPE9D=|pT%Z%@hMAU=oy6C4m0z3 zXvJRx&-;fQ*L`1@Fv+}O(nIDm z6rKY-uVW}Qsm%)+?#rH(a6j-6@F?&k;31W; z8(ps~qt4TAF?Jd<>ilq%qW`J<&(=wExvCc^&{!N@D=8g1&{#AF?!N1u_65YEw5RU5 z=k8B2>r2Y4Z^FDPp{DK1`1pAHOWw!ZD4QSi|5FD(_R)`h^vfqr3#08r2d}&Cx~`c1 zt3E*aIx*QoqK^J6dy!(HSBTHRiy9RRE3n<9Y|||{sz3ya#Hh)6d{Pz*P1L`XH=HSX zI;&~@8GzZsluqAwO3FUkv`ZaVJB<4oMJaIY&4I+AWCmAbg{)psg+mujob&Q?)$uDY zFCRTB{n%~vsKds)v{dp|=wz}ZE1MV?@R!W~z%Y|&G!uScrqQANu6XZYd9t3_86W5^ z&l<(jWNYj2=t`6NE>Pc4HyaI9M~wAMv2Uf03=0ewsG|DD{{39L!}y$fpZQ{jX+QV4 z`W|o5#{eUkhk?g{rvQ;e;$o21OHrdbDS>cK9;gCMU@ve8 zxE44Hyal)qco295cmnXXLJIjSNeMiz?va@7ZcSN<-In7`PwqH7&2(%4;Tk_TzGcg} zKr)j}Bs1#U-nWV6H)j)xTsFyKxC-5LVvp(%%~R*N*d-Av^xB;i$tgQBC6#0sd>e(M zBy9+u0=;t%|M9FONMajVEB|k9ZGL<)7IjjoWq%)!ImJG&B5Eb83INu-sd6 zR^!s)eEiq|M<~M4_t8Je${^X$}4%dI`J?3 zk$jN@p|{CQ$D^Uhz+lcMWtp$ZXRzua=nX7SDT@!L2ja*^{D3|i4h{3^qZMb63PogE zlzzwo5K@I#qP4C*5ZsHbuQj)`bA#lcgNi+h1?OwEcT5kqAF_I5)8Vlq=jjZkr@PdZ z{eNn{rn>Z=uIMhaKmYe~3hPg)Z2Bz8EkQrO!=FL=*%!K5`g!V^T%M>CN4czX%iL5} z2fTjDnX86X3}R7`Sxe%kgsBaIKKHhsyk6Ci`HfR;*rdGQ$pnl8|C!e~NC z`4rEN3`kjA)N#su20fMEC03%yJ!0#3PH5S%E8owW0Ub}~XVDYdQNrWG9BDq~KvrR# z&2k!f4HV<7#2e&OjJ8oaB^{_zuA*5b4_kgk^UR2x3^vITT&*%3B}pM7&MQ}69!(`Kj&!Rn#)|sA{e|`;4qiu$701j#BY9=o>updp>se107Gz~W%vZ*Siwmy+)PNSS54ae(4tO1K7w}%-Bfuws&jS9k z&B>=zdUZ;`?*Hx6DSbtLrb8+do|6AQXH^=1{LmFwfF7I=tg-De9)Rb^pr+?2ZdBWd4*jPQ-SR1qC`TTuys=P)=xuiOf-L5_# zy<^9g?W?Q(iG`O<)wb_m-nwOcurPVm(rn!0A<3>cluy`$L3X|Vd?HrR+4a)f>!?^(IzuW*8IFQ%_i&RjtCwrz{?> zZ+Y>?lNPt?O}IuVNe-^N@d2i=rgkD)WVvh2qW0g}nf?0#i(C*pFVJcj<^ERCQ!{bN z8GUE(wPH8lX2u&0Cvw{_C9Np!`Kwbiav6Uy@E+V3TrjFE9=)0|$V^z!Bg!;2z)s;9=k~;3+^9sCbnnaKz>_ zB^2W`SO*m48q5LYdO_R$tdHnX5#vfHC*S(%n3mJN5uxoy;tlmYNG!O#gj;w9u^-&# z_~)5t{8P-UOZJ^s&yl_799|+T$D6G8^si3N?=${vJX@&lT;6u=4Hq@eYAwi-=EvGk z7iJn;8oRVy8)uKeL)ghZoZ$QsF%7Y6JimM|c43i;QL4wx&+BtxELw2MM`3?Qy(#fOT87`8vQmejA*wyMVqbcbIiko7ytTyR* zPH&GQ1|88vyd!eT%Rw*mBoSVDS6lA>iFoRvsreT*CJ5wb&Qxh!G(UN3DmPv#SBj(g z?q|>6b)fg7%U$UK4Ebv+`s4Q!tKlP7(n@#rOeAD<6xv0c~91p!!41QSye4H=d zUP4n`M4D~HrjUKI^4afy%2cj|UKl;;u1T%7p^D5Y*Vbj%3y~p4qP#_t>HFDSsdarq zu0mPW6_dU)egiKhvv?4^bP#&O8=5esw;(@ouKJ5}kAK}<+S-RFSM80yYIvqJJ3c=* zRGK;b^4?^(@dxc25|!^wI*ZA2`wO*L`w&zmz<{`hMI5HUN+45Gd=F*-dCEbowannx*3Bsg`|9PM zkz?Kre-wFAr9nOVZSspZr#}^=om~zR^F5%&^7@rUSqc!ZfP7Z?tT4geh*x{v9_|Jk zjU+nRt)Gll!;B>p#@XgizGHvV&iv56eH)Pad1JA^JJZ+L^R;gpx8MrL*EOvs$Vm7b z7!9n;#TXzu@1mHe5=)dhiMUq>=qH zj`6|JBG{6tP-5NDm4WmQg8|Wm2|hi|=N%L)t5M#Q%*bijBEOGA#X&x2xPJ!TZg%eA z>Jq<7ZCe8#*Q0#>zWaHmJ<_Hk0zn*x^W5E;s7;0DHB}{4Z2fsBK|F=TSOmPK_pu)H zYN*12GdOIkx|dDAZCvL}K*kD$*Z=<^8rJc2%t zpwA=d^9cGpfEb-=^d#SbGmwWzQa|w}aIYIW|>#25Ab5P0?JQKiafz>FfV!7Rz7ev9{9yX^^ZT$fsvxh>8vQ z@yT*k=Wmfq!envZ9V*jVZ$mrnejY(tJXJEP*{!Dt!abs6FFA2F`BL8n0^LwqMpVb~ zEz(Y~>;(BuEg8uqO*M7m)FVgKLA9^_Si2y9g$EW@nni%OQpu_7SBvU%D*gJ`bI)?< zRph*w;SSl^CKkNt4Kc>T)IKWC8*FjCv+Wmu@+Z%I*SpTue_s_8_(~IH9Ux>_=8e=- zKpak}9iFFQGvm^|W!1nA+`RmuqAfb^lT;|DNBIO@@?zj4{N9Nv^Y_wsY>JK|@U4zv znJ$wsqvPn65#gP{E%vEsDbZLW6GhgQ`4r7DhUS^T=$1^~dZ{u9FUPQ3izaqVHfPpz zUnFAsNso{X9Xx{VVUjVX>$7ws`iJ3dvnZm&kvyE?1D`3_2cS zYIoHu-<6&}|NMILdw;o(h98865=$|z_INRMV&YBEK9hF-e__2Y(CRW)1h-}M2;l`f zxwzO?rjDKrY($@Y9eNgod>lQnmI}eozva_YmjA?d5{(}$3wrbudrT~h8g!KXbYelA zOviN#h3Yh9OiXNDot)gdbuyXFB~sb;U!Fon;|t!;ldEsdl3+fYEPwC6Z6G6Kx^QAn z-9zm6m>QDp?E17j@o#U!$;Tt~PYika^k!0jtzOtg-dm`EH6qO4?coID1|)08`gO%Q3ZaYLkuOrIk) z;@P>zo35Db$yTrbiHVViaIo~&)xVt8{TlP9%p1&4ny)dw)_sX~-|Z zG+1*D)6a%it7p9PANX#b8eH;H>sPJYt)AOIW4+O|K5|N=iP~A~3#)Sbcec;ZoTL6} zy8Y=tURkWKw)dSsYTCugrL7~QTbjHS-C8>F|EPaNx61jIwNPEXTzJQ`=!gVI4rw3O zFt(heSHfozq0kGA1Ixex;4p9mxDB`mcmQ}9cno+7kU*whKE3#BIjL3FI-|(4N!}$3 z&7MY|_gSv7<{tLfusT@pAo+1N1b9;;S*=QYt5s=MtI}RWIVBs8ViN+e1ndVc0j>vb z1?~p!2Oa_*1)cErdfSw)ZB{-dwF?nk!nxbBCPdgYFuuC7xu_r>3O^Y32w z*0)|a@bC}sJ=@s-^KbufWAwmXAOFm$`Wd>Tw4wXILU%c{Yf;U}!nn7$qlzf^sXhYL zM*^ykK*15HJ_6N8p!x_@AA#y4P<;fdk3jVis6GPKN1(bKZ7+MJMl_RqBU$WVS^1V_ zXm51NQc_x5MP6o^1?~{x(_G_#4G|O@#+~l~);d&XePVTsOkfpAR+oB2UBcTv(60yj z^{~3sgCD1d)ukR*mwH%T>S1-Mht;JXR+oBcI86Txh35c?)DcT+{21Dv&spdPmDq(Q z2lk3ztlBW;J70x_4TkDzjVtv!=N?O~?)ae}-oC1=yKNz1MvN;GFaCyaWEn3T?Jd+s z+mF3!-`NND-k@HU8}D0afBeJE$^q3rEgidAC%RncwH#tU*84mxivQB0z2d(RTuW8~ z8$5e^Sf{uW%%3+adw}py)F0nsrws9S$=)w-7n@10>C8J?uuC+*ZyR;|`0Kr+TH^X+ zU(+?oWK-xWLw(Gn>lkz$gRW!HRdzLvLDwXDidUdZrV24Rvxm}C$p8H7m&VUj_ZWDq79 zgo#XbG6<85=KmaT5I-1`Db%8Q=B#dh8klC|_ee=PIon*?H#fa!WN^>q_JdQG?KN-6 z^$cVRgPFmqPFH&TwWHJLt?Zu|4@bB6T$?@r%0HUwdMpx84P<*q#r=kZ?>~9t|9`yk z`xn1>C2~gIc=Pmoj{o@&#vAuzxfjVT@t}7$uq^%Aj8=)o70Xd%wYc@9ET^c2;cD*` zwOOtN>(F{UIt-0GPz9R6Uf>XLEpQZg3veIsAn*wA1mH(RjuDIJjYmYU9~mHv#Ny!; z6HMMN^B=jUb5H1q-E{{>b!13KWcVi7djDM)>&zlU#6!1vf!|@|o&c2rQTJURR>R0px!KY$igvzd*yqbvhe)QHM?X=rt(wk_JwZn)pU+C3xe{* zWF_Pq9z4}oBOJ5p>9tk#+A4Z&71_0lURy=4t)kaf(QB*dwN>=mDtc`dy|#*8TMhKu zDtc{I_r4cnepT~ih4*=jgQE4xaVWb7Yv?se=c4sm4gRVz{%Yv88hWjUUaO(kYUs5Z zdaZ_DtD)Cw=(QSpO_IV0w)#U;TCY7{V(IUb!E{a6spKgRrq`Z~aabcn((l@j?Ly7kpma@PN4E_X3B2Yk{M{TY&q32Z2X` zCjg)S=Hb72O##lQ_tyjT+CA@*NujS&_mxYkd28?OEu32s}0+m_(ACAe)V!H{Y$*%1#W(jg|wXStd@l|FSJ#Zl6O_20)zL;^k{}5 zHV>0ZqFDJZLUe}D9czNq-!9)_)D2Eif7|{^PSGL^wcaTzOG7Ijo36m7E3oMbY`OxQ zuE3@%u;~hHx&oW7z@{s(=?ZMR0-O3<>z<+T93bkVhk+nl>!i1fenzlK-auYX5tvm3 zrli;0DH8#Sgsal)DtNCVFslg6Dgv{Lz^o!Ls|d_00<+2pts*cbNvB|`Pn!vvP1oj! z>zuYfsJ#3Q&yl-2JIY)v9* zP2^y7?TW09njl<97zDdiv|Q`V6FvW}w|2f+VJ;GzARc$}9%%B{{1WT08~nHa!6E)> z$CBoQHCS?X{=GdqQ(K5eoMdt_@9sW#zH!Myb9{29ucxx(bnwQsO<_#4qB7$JQpzN22F=kdLq+d7Jb1V@8_M45RS0=pb6sbk*WWARD!@ZFo| z3w8<4*27=?Cj;CiT25xOI{s^I-tA`;=nTJ3jzvlTAjyI}zpv|!F8t)oVQWAO*aut; zTnD@kxC?kM@Dbn>z-Iy9ed02@nEYzOle}9uElYDYE1Rx2rOxO?E0Q>MEvAwsaGizi zGx(e@_>1tx%ag0K*EAO==PtZpYW!!nSZ^@&{JA}U<=_t$c5i>>OSkP=YFtu!+d1cK zphK=hhYqe!rOA5S|0mw-)!)4M#arI+Ja}hr+J4~q@h_}>rY)7oYPjurk-@mSR3f6_D~ zrzG=Y3)lx-3|t4i4!8?=FYpoI6ToKypZ}xqe^m2-9P3lIDtU|lnBI+%0K zbC;K4UCe-tVmlF9h~^1@|^=y&Q}+P+Kkl+D^? zZ=jt&`5)fSOAej4@=Cv*+P!vaJ5T&~+WAGjr@T9KzW9?nyEY{zLvkE;Rz19OR`9ka zmo_8^>iDt*Z+c()`b0~=;*IQI*D00Oyf~+{%RKQk?fQm)o>f^@#7{4$$DH14BWIPJ zK6@j(IS5(dM{$@)n8Pw~05}XB0hp_ZnfQW_vXUvO7bfGaSqB0`?EOIyDJGGWC3$qP z$WY|F7mNYJQtl*_-f$L0$4L|TWKQ+PoNS<VBO^U`p-w_La##(P^4pJqA!7!6LOWwMVqd5Dk{qK7)S$gdcAT=B=p& z=n=seprA^*LS5qLlJYoZSyPvLJYRyWy9V`(Nv1b&o#ai}a18snNa68yVtK=?&Ch?O z^{AH{>S()_d^n|^B61-I#)!(C=eubiR5-tC+pwVrFLE}7~NRG8{Z zN`}(Rf%B(Ety?)aR2?Xm&09?KhUre0hO5-R)x-Df=_%~H>&D_gcTJjIGpfENU4eu; zH?z9SmrKLyO`feH7Pny8i%ifH@J-tNA?xL0*KqoY46`h=u`DFaG8@Y>8_O~q%Q73w zG8@Y>8_O~q%Q73wG8@Y>8XwK6Vz5>XaW}MoJCrNJKSAIIl?K#aiZ;jpJ`HS%UPg=jgc#8i3T0hGSWXox3Ez0|u z?@w!)hjfQZGOoqxC3A6cX2p(qe%yjL3SV%BV$3FL^v~RS!7(=8yj0?)D<@eNe&kYT zTz4!B&#>F9FM7Jc*KC;}K=ah+o`a5ygc`=w`^7zp(FF5}-lFgywJRttVCa@Pc1Y8Ro1>OQ6 z+2kPIlb1@7iz2=|OQk6q=xuloCR=kf)e9<~^8)ba@cqttGnEL1USJ$p1`Ys+fg`|e zz&*eNz{9{}z*B(4LdgcYPB4}qtlD9u?{7_ctF{$y>QrIsR0*a|6@+|+4po>sRhT+e zm^xLMI#rlDRhT+em^xLMI#n=|D@>g#x&(Z_>QrYjNX=Dpn1*WOMG!>Yp0@GArnhH!)foUo*O$DZ@z%;^j;OQ9(&jCKuoV?6* zCT2M`2iQwb$1I&Sr9XWhCSl%i;(O{|ZEuzoUP;kP5<4$iX)O-+n5Uh@PF%lZO}2#w zJJX~*$e`}yUI_^n|4ra4lixBfF5tbuM}SWNp9OqsWT1vbW=TUW9YESK040X9 z?hsI^Z9jHLHN0(x<%aX;*nqD+uR5en{zPhQplsb@7(YDY*?>3hUA_!+KfnL3<=$Zo z{wbqtru_#iV(@$TzhC3OqNbT*`uHEEGXeeu=Ye-#aA_U`xKHR2SPtS?4EzEAJ9H^g zm+Z%s#kdY#swdg+{|9vOlNd8++ibd&e_*=g3sB~pSZr6HN*UWSUk7c(X8Yo4X;a9j zQZgO$>2t64zl_TGka@Hmpeys=0QX67D1qz}8!Gvq10R;?l#~7Tj0^4+SA)#vJlsJmVwJ_{cLp@{Er>@SNO?8cM?dDrk;%^=Zm9=xhGl8HYsPm+mw-?7Vhs5_Gk>&Mc z-l?18+_gw~k?|x67`?k>H)khvyf3girp~lbXnLIj>)1ZNjH}BX_4R)0Q(a3Q3vJ#( zqYao)!br~GQARhNiP&>N2)X|`a2l$g#+LcZGcka{4!sYmtBpU^`}Ka`8;7E+Wc+wC zQCl7_I5gqkGcTjwi}?fQWvo82$ zg}D_Ab1V03#lqZ*g}D_Ab1N3+RxHe|SeRR}Ft=i1ZpFgfsx3_MRq4#XT3D{%0ush# z3>JbmE8brcKdxU)-snp*{bVSzj-hv5lfb83b86+B&SbzLa5vp~!P#eDaHse8hO@S9 zJL`rU&f32HtT&7(>mt*<>cC5z)o}dck<@S(=lS#&%%ex{zWc~g@2~OhYc9Fzro-1< zbC|zZx3BHa&!qZGsxgy3f7`yLn|gX~4!3_RIaI1v@@yA2PvWbFx}(EY6+gP`|9#w~ zIlBU9SK#aloLzylD{yuN&aS}O6*#*BXIJ3t3Y=YmvwdUyX$oHfM7@pqoFUZ7^JJ;UTW)szw@avym~V>DEqT;rNO1(_!&;{|?Qd9ON-puNK$oLR>F8((3 z9p$K`DyRWfQLs;ktE>TdO*`jq;z`g<~fT1LhgG0Mi2vC}xmIM2Ai zIAmN61wLyWGoCg6%J^I3TXe-WBW9O5&M_%_%$Jx)%zMoLWvya#S$}VR*Us4ud$)bH{RaCT_RrcMwg15Wvi%+B zu=7fHuY0|Fqx*XIe)rehXWegy^WkQATln(uP2v9%enlB&=t}P6!Isc;k zlyBu*{9y@)f;{i!vm9KVb@|Ie4pNp}tC`hy>jxo<~&enx^ry7E5kD-SK88=D>Q4| z*5HyepD%2K{5)HAhgDLrPv%Z8Y=sW`U$i^e<04s8w=2k8`?@O{hY`)kRIhLbi}~XI06ma!0jAwa}zm z%X7f6Zf=&ZdFarD_E~p9a6ql)Mj76K5kNT@$?_gvUkCqLaLG-o#tAS&E*>uDz!z*I z9cPam^r4lC2PiDIWKP~}ErP5TxCRZBNCg3oG^Ys>os_{Jvf_a|UI@u)tNdFi$h+Vu z7W(S23Cz@-2VlY$l`4?K;!mMTTN^a1;PBER5L7GE=!Q^=?n@pG=tBG1kS%DhLQmMI z32m!9KwKYA1ZXIXFWdQAFulCKyr6Wqg~pH(oH<~dB(`M3TA=Zeu2rqm?!rQoyzDLN zqFwSPm1FH5iSi&hvaqRqMi*G6t!%45ApUdEUqg)72Uy zfx#i9u?NkmD#$`Ijlv&BVT0JEYv z8O(fSA*Vbdbap`kZ-OJ}mPVbF=X9-A(&WJBR8w$3B?At5)y(H0Vr{O>O9bIis8%X3 z@}?Rr!cP2bjkY1^5G4{_2;-u%o4k3Oq1)oL6A?OhZNw6WWK}+U% zjmHQeI^SsJK|NBgmRB;&^X0|z;`{<*-s5dF4?SLF$fAGpYEIAv0R_EODMBO3F|*P^ z+P;v{tSbY7HoBmFak~K~;mmK}#AFawk zG}se^nTC@vZMfL!acwTyj$xXvVL7fc z9XskoY$x37b}QSB^$^UGQsHPMsgky`Ol24X`hVUMjuhOeVaxlJ;nD(Smn4_W^zvFg9C1{o9sxI}2iba)?s?aX53D87SIaw98|sDMbcJ2oKR-oa(1jJE56v>&VfRz zbJd?SPf>Qpa>>79SauF9Ii};7NfkF7t6hrNV8M22rezeOHoPN`fB`6FwO8q*AWCzi z6*bLp-muiOX40~pF+r0sQp9yYNy@U!@Dw~?nsIt#gUWW(q}>+W8|E-sc*1cwq~v2f z1RsRqrfymU>JwI%;iSxrVWgb)Ibp_(#jv2mR@5>eFEx&ssR*@#Abz1ev}&izv?yvY z8fYn~F|9<*qFSe5hj$sS3B_I70T07#4!uYShiKks#6d!Ml%~RODw2jDl;be29HR=} z;0jt9Q;wZ7Y>O#g7(BR!?TqjvmAHxl1I=B?Dm6-LySZeTQk}~+upF*XQ3Qj|w(WRC zeUeU@@jlxeaX~PM%-O>1AjmMCsOhG%6UkmGR*Xi=a1(jgaNV#Q$tve!9*cmzL@$HJ zNy&f<=k4yOvNBo2)cvZ8nl|lHW;|_Gquj=bg`|dqBuRr8Ml|iEr&%jv(giRSapIO0H&vHm7urub zkz~w74p2W2M&b114s`kgiMmU^y)8S;l?LaoujWFEz#>-_pI^FBU6FtRPH^fC? zFbtuhNLRr^`?w&w+vzflVy@3NoOo-7;goWcFnzd~=&RTBRTZ;il@hX(=bQ*~)pT7I zwX8@u_F*^8vj|rg87IRefq0}J)N02~JIT<9WMDQ3XvDht2?_`c3EMGz5HxW}G#C^0 zmW{(~pb*^i$3f2Y8sTo_oE3-XVJ$cUvca72tw=36Q_pYAl+a%8Nu8_ zmv{}A?YNaevO;y0mO<|#8fe!*n6bhg4mH6WJdXHuRT`1@emgw`FDJv$%}lF4JEg7$ z{m9s)yxxk)K*PpFfS7jFZvUo|@z!NI@jOkAC8M-8oJmBiTsor0xJy^Wt-J)v!Bav{ z+f0SsWc!$v8i1Qt%d{=>`JA${dv%tU|U zP6U%{BuppkKvomRza)cB0X;N9MrW8>(-CF^UEzck4@XSL*diJxEQGZak**#mowS@- z7KvvivdMl1pGx$qZEh67W4=R6Ak(Ng;LC9ThQZ>f?4ZSK9reRu^blCrJSw;bOH_?S zGvI9~ZpGk`CF211B8|cm$jXQv>k*EYL2al+q0Dnt%k9@}ZV$>3QnB86{>`KDUX**p zon6F;h;^xQm|7miySE-TK$PeInIGU#AcA{sf zp~m2yI2;q{GPs7&KLuhv-?{MayOo{bEX>A(1j>Mz9voh$)b=O3D~^!fu># z3c>8CYeVlw)QH9qIUHN0Rt zVWc&Kz#K+0g%c4o9PUC0TT$ek9WzvX3uA?@GpJISMGkm$W49!Fqo^AfmP)#jT$pA_ zcNk}P!clGtZbWvvh*V^xvfEp@fEEQm7zvCM$LJPj2}fqYIl$#OgF6 zA&$63;5tzhwHdM7MH@vL#mkDjKF=GQ8VKqdNxg#O;U?gDV^t(SYm$6Gk>J z=)jWVfe74Vg6CKy!dvZRdleSv_2!ou^gOXggsdWz0tvHX9FOpurF#8eI$4w5_wL3lg~U9uZ?^miEaj zQKhIKjgPoFMmKnWEMg(FjRFdYdq5(z_a3bxR17YM{q$?-(kwQ^Mqv344FEwZlx>nJ`@`}3Y z1iTkZxo-E6b$tX2*o-C_{vwG{-mZydTW3sLF{GJ^EgeSqMZ$wL-oj#b%_o1lk`8z0L4vSQJYlE) zIJGoaG~I6cVRU)amZ=SLBg?7PPQpz(aXXsmaSzQ@Qaz?q$z#nzI~BnXt`ID- zzS&GQ9UdQx*20se>WN~9I=v^oD4Iu>dgd3 zJncrKCVN#ntz65^&}4MB*~QYJnAcX)Oc_cvUl?+`2D2rj(1#{BF(L;MZx$vIN(}o{ z-9rLZcJn0YX;iQF`t z3?eHmmb$Jul5*one)u)qZE6eRCE-|(Sw+O|XBb3PDjIG7vSP~8o z9#bhqH=@W$#>A+gZOodTaD+xXk%Y)9WLhjGlOlfC)q}|Pw~;?c)JLh*F=UPgQzGDH zk{ZqP#HW+YFD9%+4;E0r6-)h;N@EeA@FG7EP0?T4NGoCrWoJkB@FE5aR)&iJ7vd@e zSp>t6VHO5aaU;CL&5KAtNSaP0$0Xj)p>5)Qu}F75-eWO?fe1Zn72_dcpBIaUyBM?K zw;{=y-F`iq1k0Hoeu`VKsJJfbj0On3&`xMSbTMasiX;L95#~Hd=Ut#NE#_2`B`nte zmQS33EO)Yk7A2;4-9Eie4ASGQoO)TxWTDL~5w#{qfHX#P3B_VlF}JLqZGTW**`83( zsDrQi-0R-!(?be5Mmo;ziGUktIQtSCmK}S)i2YI?4K}S2g9J z54{?^VVB*8Q#(4*1^y!&f*IEznwa(iwbHaJO$bMtK&>=^T4@5c(gbRy3DimxsFfyA zD@~wQnn0~Ifm)JjQiqkM3DlDP6?5}N7MO6#l?WnWE!E2oyH&BE`k_>Opl5Jff9j4q zu4(ia?pP^zr*?eFxaPO-pR6qXaeIeHgZ_|GUohTDc*Uq1ll{4pxk2Jc%6j=$*2F{7 zR)?SRKoyV#_;#>}pkAu&b~4g3P|**VW6-y?0(z zpIzGe((R4P!suY7UQus&aOm>vz-!KV{Tr_Sk*%$BFW9;7HG3vY^9LqNQ>dN`6Ri8?{{HAvMpKcQR zs?ebmzi52fcpvp#=N|bz(2m$mKla9hMnS#e>qE|U&UGr%ewVu8&>_L$iI1JQS?wdnpik2xr5i3xCd@{i zRhU84ldu7
>q|!66NEsFhOx94@%TNbTD3T5KXelCaIZ+dJ`!u`y$|=ib|HdwlPO z>DTtm=N8q~KfHKIlK5-dw~gO5emgX*5)xG{n|*aelto086+}dF3q`7YCn0?D=)tE zz7Krj>g$bL7LHzeO(R1T$V;#L$-|3(b*nPolu_@kUw&)*=Z#}RdxK(VrX=t+6KqoP!c250T9V8rx%>Fohmq3|Ip81D62T z1GfTq1NQ?D0gnPt0+NSFj$lt|H9WxkBsH8D%si}nrr>>V80hmvSlDiLoqDNdxT^K*0LgM~dixlk)*NA-nsR@DBb1KDKdhLrUSS{@s{>er{>ZB z+MB2E``*8uk~%+V?=3p^vlP0C_MWsqX2a>l9d+pQPG`S$XTwa^XPM5%8L@i$>4og- z9-$W{F*Q8WYQpNZF&%n#x*lykB@k?|tu-Ev%_AwQ`eN2+= zdy3Y0k-uK%4=?*S*SMZR%8=XFk06!2Ai;ihVSK%lNH(%d5Vz(N-=;i$M)2UUB#EKN zG?Ov2c^SbaBEORn{0B`2?i(FJGH~f^NFTfu+F;GJ>5K5eATFWhxm zvsGVzwb~D~PpHp5=a<#rRQvm?RepN>HLv-7`IPVU$p{Uc_yT_Sf2DUW($kj&L=Rf> ztCn!`c5g7-X1^lbrKPg}@-N^0?yFz@>RI(Z_4f9SAN}ZKoNHtHXHVX%u~Spy>~4ec zBron5EEVjO`d{sx3w%`7nZVDT872vj2?-&unS{wCWJpLRVe*cVM}&|FA|R-|lmHPT zk6;LI>e_8(!79EfsI}H7Xlq;RTIjmgwk``@+p?~uthK(jQcG>MsI+w}+5dOWy)%&n zv_G@E-FB1TcjwHVJ9p0g&i6Xsch2|qU)F48XT{fc!7%IsJKuB?jVz!Dr~_JnIY2wG z5!epw2KEB`fkS}U1(GXQDpMzNUz3YR&d7X75=Cn6`j9@fMPq!QLwplaeC(vErkT1) zRZTN>6GL1!&B%Jt5IQ}Br3dZ`R)}CVWf1=(8DlKV9Xch}KE3khOnc{n&VWuP^xjoC zKaiR-J#(Ma2Q5#jZ+aEf#XeXIop18iOZt#l&bmUVuTcM2b8G@G2Ce|s0b7Bcz=OaO zzyZLf=R)XN^8HC!DJOrum;o+~7$-Z9h?oH&pl9_|J&I|JqNG@0vLzYsvVbC>4rl@9 z0PVm=U^}oI*bD3j4goTXlHep>BGh@Sxx_(Xewa);W)p;aegSm56Ai8I6oZchvWQk9Lf&Z4(tZ@0{ekOfSAQ$)QG7= z5{A!9;dA+|FDqo?~2AlB6e>pDBhZD|B+9KC0=Qfj4`TgU&=03ORMU zPNNHVuQFYb2oL>dUN`72vfZEdBrdVAoxZCVm;;C{L(VOk+K7a3fl?!xjUD262F1eH zxyp1kqmLL>XGF~2VY{Z$Hjk$Yg3g|3y?lOJ#-vj@6EjT}BUkr(^ynS;X8NM@e|?<3 z==@(&szckgXOSZx?u(rZ3<97#sYbAUs7JO0aCg5UP6+r(>gMIEN7%>BkIC#nGETW( zoZwbp17#KpM{!b;VfRFxw50}=!*GH=2bJ51Tn-nWVZQUIR26gZx>@5j zBzTpK34+)BkAYX6??4=Of5~_?#!@fT`ixm|2JGIa?Iom!>i)o8=ni3ICb7v7QUzPu zQzmjXZUWgTw1ohM9HX6|gy$?TCC_pfevOS_DUY47b(?kbAAjl|Y5RDL zKjHs*<4mQ(`sR(LNSZT@-lL*9U3TrSJDE8J)ln=RA#}irrYSf?Jo%3F(O%TU#v&TZ ztEeooz*BjZX`smdVZ7JXV|X7Dt4UI3Y3}(`>UUulnb=My8jHQ{Kn?J5^1<0|7N=$x zvjNEha>g26;?nD*9#sLl4)Xx~3SXZMqi-0q7~0IB z?$>trUGNaTE^&iN##bHt^)L(f;M(xu;q%}T@!%2h;1TiQ5%J&=@!%2h;1TiQ5%J&= zkzkYd@OkivNXQD#j#U!1>xsnY?>!@TGCd>UEFM@z)Pkvosh*(VXGPRP5w%c6Efi4; zMbttOwNOMY6j2LB)B+0)I0%SCWC(Zj8qwh8vAl=|FQUPVXz&vD^%C~=687~H_Vp6> z^%C~=687~H_Vt=aI6~tXAWj`3U%C{Dc(_apaY5`aR+2S86f4!LWo$5Bj5YFssn05z zi(M758m!h8rYP&a!0o1{spOQKL%yIal_O)20`}}NNa0sg7=skXAcZkVVGL3jgA~Rf zg)vBB3{n__6viNhF-UR7AcZkVF=G((KG;5#xc@G$dQF%hpj|)`)EHt$V+f-$#2Jkt zj0PpQfLfp#xD;3hYyh?ayMR5wKHwlAqajSCOJw(pKRkmmmR{mRjD_FXWhTK`2@m4^ z9`2R-*Q@hut1D~gSJf6r$^%*1fwGF|xfe1D360|}`*1~t5iXw@h8Qp00MX=HRsG8g zCRMTco@}m=gh5*8{Z(*AI6qvGQBz^{!5kw$J+EcvtmZ!W`VOZZ+>bXel% zVHKD4_PQzUnjMITL0rymE#ypcnEsRf64LY;+3$mmeRRWz?Drx2eaL@=Tb;D^5y0S^6L*gM_j|IGBs=Z)hD+ zk)SuZ^CE(YPzTZRF#AT+t7HX3ehu3T3E3gOtg&+nZXa};<=U3C-oMxs@{atdu+HNt zwyeCwytJ_sFQd%2tK!(LOH+i&&mmI-!S!$*R8Q25$+EwSuy*W7 zrigqbeZEt(UW z4rl@90PVm=U^}oI*bD3j4goSE68CoISK=Nvb1TWBmZ@u%n=6XweF?oUDRX$VPr`;JYOFE(yL%g71>x zyCnE73BF5$?~>rVB={}~zOw_vM`#=a#O4|X7fGTuDY?$*Xse`R3FULG?e4;vO-KFmyu@!fb8kw^Hui1UJIhd$2YiS3h>O{JHQ6V#BEXz2oFj_bB}Aj8;m2GM3L zdWA4Xgpw0AhC?Nh(WiO+Luu(6snsQuC|%-G1>kiPb;>-d*jjHaF(KZ3o!7`jP#u3p z+~cP?G!aa*z|>$m5lkn7sYEcH2&NOkbRw8e1k;INIuT4Kg6Tvsod~8AHK`5a)np!& zdkfA@epMmve3hZee3adCuy5OhaZ9~tY1%?oXBiDjeGPc8e`im!G?Q$)vVJ81Ckyoj z;N$?D9DtJpaB=`n4#3F)I5_|(2jJuYoE+eJ!P*fT#{l8vT;@*_Dw9GPB=<1<6<;~4 zSJK)=xx|7uBYs56Zsn9j@;tFhTy9;##yhlB54p8?>eLUOuPzxIj926eP`bt%kaC_Q z(@m^uJCBgBzT;?7%hX9c}1yFw)U_wZ;_OigPQzFT^ocbi<_cA=<@PnOt zHf~Y&DmfB2JtWG{gTWXwk#!qVCT}*+iXerQ-H@^yQg%biZb;b;DZ3$MH>B)_l--cB z+aYB)r0mwDJODm66$!S>8M7W5)}t@-UDFy!Vy=f~vIr%EE>Y#m9h$kB4>KuyVnP|rYJ zM$}g3E|u0@qrM7X4N_t2LeV)RHBFABkJ?c3aw=VT4yXp2fQx}EfOWuDU?=b(@C0xG zu>EV<*pS(pCVkx1)X9}FwJds<+EG4!!@-HP#pmwWh+?QYHGw2Z>tw(U$L=Io6veQ3 zrZxF-wA$;GT*ZgV7!7Sg&Viz$*}C9TeqJyu zDQ!gl=t!|^phM0doa|Y1DWj@~-Cz;>sIlIM&PQ8ClKoKjE|}WjMn0zVUekR)7Nu$CI#gY9ZgBAAiA$^%!;7+` zr@nDtbwP_YZ)8?Rc;u=5S2VRGWE9)nCbn~wvD(EN`iOOz$Rsm82^-neFV*IShfyJV zjKr;NB$BN9bIyw6*@@T4s=N>*R&dzFdj!cPv`hG=w98{0)=QQ3s+`el6zw&Aah5BP zxmtlqCtYk0t8jOJMabkSlk~1CXMihYl601Q(J`TE%^Dpkvm%wfl10}DI}Xcbn49y1 zqt1{d+Ga?MH52#A#;y#iS7gqDKmi=PUEIu{I1Z_~>Q9NEiIpTZx{cGKzp(vL@kmxc z_Z3L{ot1aCkk8@fW|<2`sSUBqRYk``R+d@mAi7iLCZ5A)!DRxk5}p_5)*xuusaYUv zLx=O3aYM9$K4 z+5Zw`$+DnbhMl!VGFy?PwaLh?WMo$|vMU+cm5l64Ms_75yONPz$;hr`WLGk>E7_4< z$;dAD&oEobfpuiRi-|6rB^SEv!#mWAqE|*9vGP1;pyYy4)^BA5L{?!NP@m&oRc*cg zEfz$WxpNhyjXp`nJyeQiP>N+xie*rWWl)M`P>N+x>Qs3z#XKm*JSfFHD8)P|#XKl= zs=SwC9+c|mN#fR}T3|_zTvJ5bIU_~1#n3hbD$ymRt z*aPeX4gzA7*Yqk6F2b`)X1kINmvnYgE?m2U^Fw-@8%g_yr@ikDk147gL|Uih>U8hW zl)A}Ns>=cw zoO!IhPO^s;XV&LU%q}R%7U;QV@(%^s{(=I3c7gDY#DY#*Z@8z!|HanlK{92&q*Or4Kx8416Kg+fUUqz;6dOC-~eFT;6q_D zaow8TQ$3}iV_l29la=%m=#zFyOyi$*N&ZM=n@BbhzWrsD32Hnl6>gMO=B&apGJ#(p zRw7Y%A2co@4^87fXxs;l`=D_jH131OebBfM8uvltK4{zrjr*W+A2cq36SD*{j7*XJ z9g-Yc!s692lCFqWRkOqfA1XOC!|qVHLWsYDj0E=%$sb+&`|Gz&s~bQ0*7dEHdy|LU zEYEc2G!3h)9XiQ7y*6`LhBfbjN28rJzkGAU;KtFFU4e3P$GD#TY-MHDh4N&X-9xOK zTx;Op3U!WZa*g{IT{luSfilW={3C);vx_k&(Z~XdfI6TBm;ZeTC4A2zGZMToKQxR$fvgGXN!xkl<9ykP7o0n4 zNJ`#~`=^c>yC4({W(6w7OdPXf+_}D#+;MlznsMm`(>nu`Y+|?eW-cAz%}5)6d6IYJ zkRihcq_^Jb54Fr1;Tk@=*5?`&E-r5_FMV)QQ_k>=Q`Hq!m9FjAR#jC^7ul%5y!n65 zy3%^V^|YsPKxDv{0Z)cFjoB7-XjUpZu*?|k3U zh4`ElhiS_`u=6ProPqrHrW=PiH1(?#K#e9n4+e0Mq@l6l;JI7J&Kh_3_{S$)GvT$yX^owYJI|XqasH$wlXgtn*R*r;hRN?Y zZ)x7svbtq!%U$QcHs$El=+sS9-@o9u0(Yv#=FP?kJrb~`q8sz`Loag2|H#azU(>yhA&%9^n?^#f};Ij)JUhw3CS1$YR zNuUfLD?Yb-OU3#j$;mV!MXRb(J@!ZP0R$aI1rH*LF)vKGXD((!f zN$(oEcKW(S>qF~LY}mJP-=>M1S8us`%X6O!TzB<#U%T$L&vjlu_WDKF-+zPWhK3tX z?8w|vzvIA;<2#Pu*m&dPH*LM??VHnYK7Pwnw`SkE<<{qK+i}~Qw?}W^vNLh#w4LvN z{^8HRxNF<);@yYu*l=h4ou}^l{ug%Lz3}e$?-_f~@h|=S-VOKNa(}@C-UnWM@Y#n} z?}_g1+_Y1I1diUFKUwZdRjv3wWa13xJi(C|M zajenpbJ($v&8I)$SkV0n$BEtVa%}4U701c?OtWs!?EWohX7R0nYT%xf&ZAvGW|`MG z)_1?jv7!4Yf-0kX563zDVxgMP-HNe_PjXD>4#iyaYea2E_c4xH^rVdTXE+AvNg4M% z%(1TfCmiebnTGCf)1Je3%hX)%Q^q|{pbds|{|dhQo@fSrY~XwiXP)6$(0!caM15@P z{sryH-OqDu?tY!)`TG2HeSU^M&eUfv(x1-aS!&d5_D$4~Z{wF7FYSJr;~ag>T(D82 z=J6CY^hEBwfa~k{-CuAl00VUzhjrlaB<;!i*h~-W`SdlknDqG^?sk%+;HQBz-{dIR zXwY|P;0~|SZUPSt+~E+%X71cT&wtKwX7@`RXLY~G@nZVZz}-%8oC{7G=-ay#%n>|H z0`n(0W`n)+!T-Bz8t0Fx*( zW4FX!^Tpn|)pWdp=Fh`fovehn1Nhag_IWSo4-2j8zwc2^!?Z~pVm)NHT`Ix)y4`lO zpOcjX`f3l?ykfTpklE(A-S(=1E;gR%Yf@lw*;bzR0*;G$?NBQ@ zzeHaZM7ejURb1V{_d2;^HF~d&RuJ6uyxfEY1Bl5{57_&XDm>oofot&?(A6Ewxqo^*wnGAt7B*{4~ox#e;m`GKRpNN0)?BA#> zi;gKPuZdqCZ}o*zhISFwVNgwB(Ayz{WwaY@@>#?3i=bK_`(!-rQm)wuk+eY=i!~|9 zHB&f0%RVn8)yd~=+_{}&kU=fyU&yExQd<%`Q^P%i_P_k*8m^X4g&>2vFY;|6=`Q|- zDBJmDDd&WoJJeeHZqjSHawX^1@L4-Olkd0d@5&Rla^I!;8exo}n#yPE^c`389bufl zNj#`7;$F*Q*Eqdhk6pK(-p6qxzaf2GW8)=4@0LT#f}_*fvMDMixk-8xS)Vr;_qh+B*if8zsd!x^b%@k<$`rc*%RYdoESDL0 zkkvFH;_C%!Bw9R-1z&{j9u3)+G7KU|#z1@(NU|y>GBsHDb?CQq&_3s?v1*(e&upO) zwwQ?aX~Jx8X3}&%#5@&QIZa)NzLLztFv!pMmyVX7FOX^;AAM>TX>S6T=I~c#N9#h{?kE_3D{`5`t zr20qol=`-MT75@-SA9=CqyC9G)lbw>ro=C(pQ;zt&(urmW%Y`BRlTPEgDJ~#G}7zp zSL#j6t$wY3V|kcA{6f8}-c!F-C)GRZgnC20rQU|*7en%G%;lCszU}Hth;aoBvI=rs zt*(N7)(!0wR&|@Yo$1RiCXaWh zJJnt4i|T&$0F#+L>dWdY>QVJoW<~qSp79S%Yz{K5{J#1Z?7JVRAF3arOaE0pr~XYn zPvVUu>H~F3bz6%3gOZDtOtvIs97J~IA*82FCVgRw`cErW{a*dSGi~~-*Gp_?DE$QfLUA(w$RoB3_STmu;zN+1Roo&Ca zb>6QO3A`#{t$7{T68owUss#*$*2s>wqP1%k6e@W-RCl-j#9H3gDd)^}tVPu{Z*j)5 zPUS9J?EBL${oPCRNKCb>~O|zC+tE?>~+ketJU_EO+XT4$_v)<;;Vi{Z5 zJ#WJsVi}8FELO1_o98WU(TXjZ$oU^&iM^sNIkAPs5*9mHtYEQ$Eo}pP^f$x;7W-GM zU#a4~2W#vdZP^j#<~PLd6{}Y~T*tJP$3i-6Ua@$^-W6+CY+bQ*#m*HgS8QC5wsEz! zs6Racw~AFKwyjvUV%LgQD>kiIv|`VSH7mBPSh8ZriWMt1tXQyOzl!xLwyRjKVz;u5 zf|gjVV)u*1D)y>as}^Tp)6au-sC)D-l0z(2u~W@AUey+=*r#Hhift;Eso14rm5NO& z7OB{yg865_fY_sAjmj99v$6<7tWmK=#S)cVc_+D2EK#vTEp3Nd$R|mUVF_UK|~|yF>=t1yXQ{U7tNUW4=8yHa98c}nx>*(mwrMd z#t}JAEU9T}ip5ZqT)0g`mNY!Pc=4jW`*1yk=%(A3*3}qq)LtA#G`#}%iK1K;t66}2_wyX(h9x2?zZ$II%P8_nOEw-A*#p?>M|nzkmf zOl-k5>XWyzW_jJ*ZJ&LCYtZK&*0iFf^)^@9HljK2sBXH+-c^@dOqkRy}Px-krdFp=RlLx7TUZ)?^vvh#Y z&>Qp%+E1Mp9->$2=kzn$OWSA<{glqp4*DhiJN-o7e}w*xen`*J7W!ZGBK?ZC(pmZ! zdWm+^k1*=n={)^2{ea$~cj;5wMc<>>=reko{*_*#!}J^a-}Exo(jqcwG1bu$+DS|4 zL8_-^)Ig22oK{d1Md%@Fp=N5ORkV^0(f6s1zDf_%*XV!HoAf`yv379jamd^ndY-;c z-=MYh1g)cQ(v$QQeT#lg>*;CwC;B#Rqz$x>UZ7`a6K$sNP?($=OV&LYD;LIOHO0hK zahsScYDAk@EjDSt(tfS|hxU8NX2&kaKF0y?7;maK%bV@>dxPF7-f7-hKEE&M*Zgk( zXn(Rl%b)F^=)c{+&|l~Oe8QXOH93l2+77H`qEajb)^=e1rNsJv0@f7oSnoKmS7N=X z57s1qW*n;_u~IB1W#K^Vnb@LOUCbG~KXxsVnPYm1dMbKuynMFjy5n?wry9TG>yK|f z{*B{1{_xlzetzu3W5nZ2Sj(7jxkY`kekk-Sk(;{RKG_4$8Ukx|}Q9==YHF*XRU& zLVu!Dvf~1yuJJ51_qdL_j?x6u>1HrkzeN-V{e?FRJuRs~PfvCGiwh?Te_B#11&fOU zeyymG3Py>v)O1uSO2WMfdNH1Gr>1L98Tf?5(Qw!Z>tVw%-wf@IMx!Dx(%un9p%E7I zeKTj?Hgl#k%h0*5dBlhq`0tt3X&xy%nlDmNJ;msXM7j*1i*nuDW=bU%+bZ%v=MaKU zOAi(nGi`z4LQ?8T^>fpPp)=Ork2cHktYKVUS}!p;v$TB@LlRMP5C}$zfPcJwA|KW9 z5%U=dpE-9kapAE1Ws(O3 z|Gn++oi`R#iyVA)9^P{3bmM}0^GLSH*=`;QWr_`kHqQ{tGtHOfm(la0Vcw^TS<+*F za#Xr5Pzgq5k|-)HF32g=(>w+GwIbDDR2amlxG@YF*%%CWAOmG3K0iiJS7TF*Dq`2s z&aU>%aAp_tBot~ljMMug9Vjr*a!GqQWN15cx3`Ded31OzxLQch%-_9Vm5p((#t4IB#H@jY7DGh!)B-(~B{<;oN?Ijl*9*zW zI3H0tb)pa(A`vv8K4z17BpQu$baaRmk;lc77QX55D!CBz`O4g3L?d_*q7s&J&CHbA z%ITI;JCr3($!xHMpD;SB+nl@kB=S<;YB|~uif(r^jU;mVwWKl0AXy$nH>MD#6h7jh z`ZLVw<_`Q#@B7>sJvnKN<}XwS`zd!bYRf+ZPtV8SK}FeW@s@i!>`O_AJ zB{u5N_Bob8E-0zUmsF7COY%@-WB6N9t%E5qV6r`8kde0$1#r+{+c_37C@K zF7b+ww0+vGtfzoCP1^@dhU_)80WiP=voQqZgHo;V8J>=r<|mrr{x(1Dfz)Z5dtpzs z$Fn}`Vk^&PcuuDvEiB9AlguuVtjp1piwZTUy0p|}#eJKXY7Fv3o;V1W9>L#qLu`P` z+F&+Cqwt{wXd;IfSkoA1p_D(U(*nI1`ZP6NPYr-47NwS)Y3Fbq4qrN{sE*T>i@8-; z;Pa-Bal70ecYzOjmPIz;!|X39o-kdEv2=jHC^O6w8r@?4;?Twl$O%wIdRB!4YsB&` zEs-$hz2PbkN6j6F+bcR6WqV=0x@&(~Sd5jEo_T8VBL}Pt_=6?F2c^ym1uds~1k-W7 zNK`Omf@mm|T_Nr^p=FJ*w$ISb=^!Ht#_T`|dKHN{D$C2n&GNzsm^D#zdQ|C|h?oy% zX|WSlyP4#*cyoG3;Q>>xVA9pgr)8#+dQK=&X4 zmgbD;19}(dYC8D|9ZVWStY`vLS-hY%lhabQKvAOF#C$;;+_gx=h_K{!w6}M(LkGw0tTdEtEYT?nS$u>vv)sT8fL#HW6;X^nVl0b7VQpSVsON_zQ1RNW zAvV8KFv6wnmrl0m|sZ0Zrb7k%T9Li4u1)6z9+u`xcH<&?eF|uzU;6b~3;b zd@R>OSHb|V;<1`sVA?Y z@x2ohPJ!L88FMpR5eDoI;T9bf{A_40}F4-LmzV)B#5&@W2Jd$1tFgcnC_1LMM? zc(X{)arhi=CJV=)=V*DGUMhA4!U3@>zom`)S{118*;gMZD>d>(_n~~fYNK*Try=7bU!)St+Q1XdshJ$J27YvQ*Dyq`35@DlC!p_~8f#HbGT8x=dzmZ)T%E(Y|nqS_Sk009uCqFGk=ga67Wkt z!3O)TI2K?Y3m~g___TwL2$+U{3v=FYFV3yv4g!TNbr;}ZnAy0`HTr4 zNv=9|tW`%?cmpb(zN9!1EVLas^jCC;LlwsQ{h?g(R#|-r4dzw_%r8|N zV${0)7Q=NEjY>+H9aZbPwuj10t>XJU`Tw)A{?O5iVU3AWHMaj4B#5rfUL~p)dnIm` zPYa9lgqjO-Bn$M6lysPq0u~c{_=4;cxN{sqo)~Hj*W^d_axr1b{q5m3#*#0&O3llq zp#^1!S%Zb0-|aAZ?&{pD?QFD$5JDa9*}2};q21AWz9$)#EGgQJwy`d@+ZO3+G-UGu ze~z%4WHAY8PeJ?3vz}Fa#?4%c!98JLIC5!C*l>zaSYcP%gRv`5UCs+NJCyQJo_fMO zfIg3Fk4VIx0sSj7sv}ESL7l9XOUAH6hBek~FaRF<60m0YlS8)8*HcHSPP_#nLkh=IJf^Vk42G%o2@4#A7cv4aRr*pu4;lp($uhmNQk&H zntHekVx-IjtPdO3u#BRGFCT|@0@`DLg5~6x2KFLE0ko2E^HeJ4Rwjxd6clDSI#cL) zn%xicKDZy|eR20g%+J@w{p^6aj#h2Fgq5FjlXN~V&t9|6u_uHca0~9=p@k_4Ee!9| zjGvoHaSYO`>twvu{rd+ty|MEulxQP+{$kM+VsgZ{M0bDMPGAD@8XYST=#}K5P{LRe z2K*T*#U+IS6{bXKI&+{v&k>r3V?D4Nqs9H^wm?1NUS)dM;cT!xY`DxFV*Y~t+YMtu ziFv9^S{HaSOSQ+UhZ!Mr`q4wEX1-${>G{TXxWb>BU!Z`;fT?%~RKO}*W00sM5{Cl{ zH$@JIcCoa&tkirbms`e={6eobHcInwp|mU-D9dN#u%>5GX*G)jlu31|_PzRcScaaz zG&X7@d1MG5$2PRk-ZE@`3Q*;Mwp9jST#GSD68E6L+Or5(K4_vCi7yB>Fm4xWZP*EZ@XHRsY z<~Bc3@s&Q9GkWbi1Q$xhw(L?xKnOSz7LJaWcz^F)D1PHQ&(R_6R#AMNNS`b`f&4AH zz9ruXZ*eYcJD;0-p|ND|UPb?nj#II%ykbqmy2o(}GhdeXM&44=-PXz^+*aL*meAhn zyqBbkPA%8|^Apd?MFqzP)v5x+Xa#ZxEF|nUFp_B; z1rR#%s$aVHN?UOJAtTW1-6OCPQTByZDDwv2IPGv|sfE8~|FM)YdRECOfb28mWi5Jw zejuL3E9CxK7R|wyNl#uywyY7h#sR%jt7FL<8HGm+wg)e*q}79jT>@qj8tb6aSry|J@^1Um=N!QHzj?v<__K}ri&aavOGv9*6CHJ+pF}d_NrXH zti4}v%r~p__2qh%UaqgNYES4DWE72dV_dK^CH)p?5gE*eLwGHwIy8;tCox0RP~e0= zJlJRlXMmm$ag;XJY?88+!E0ropVSxthqYv6?gw%T)W(U7UWo39;*eZ?O1otbw&D+h zS-b|~511rF7LsnM7qLzh_EH$`Igg66+Hn`ASHO6{6~VlrA*q+L<;=qBv58Nk2+GK= z#Kp_<8?<@u8Hf~1F2!S-z-%$6Z=l0s6#GNaHUJrp3}d+gMU={gHm|-A2Z47KuK@b_ z?sl*|5O?CCweiu}m)C+ggL4R9-mOfv;ytAQ2N=sM`1De95A^Aa2l(`luus3HJwI1$ zD6OcLK7FqF&1z$R;{cc5oL;qVV_9aIxuz3$69>#i2=J(F%Q^ZN*G^uCwO-yDb)`bzTZe=vS$=KmUUQz910m#qssznISWaJ zzA7nqW_B)iLDPJU0AUX5URoC379P{ z$aw9DJI@Xo0qwJIB=1> zML)+qz9k7>z-3*X7*S=NnmC(N$lG@|!}H*M(ksnmHGz3>!`M0EaATO4H1gAD4r}MY z4M;z@oUqXvpSy6qfl6+rGLeWvWU+$4b}8!{ESBtbpQF-}l`HtxYKw4yZ_T@dAdu;@ z_XfMnpYtVe42pwtV^FH3giS$pEjI*hDKAHa41MUgidNem(%Ps90e69s!~SOB3(F(! z#5^v*2YUfZudyv=ZyJ`@_NLk0LYOzv=wKtDSMnGwd}(lc1D`^Vu_^<0UgQC<^5B5? z3SM-A+m_E>^egPjzO1*kE$HL2IkT!Z_Dq2uez%{e#`s;Y1OJtM=pYv3w~7QGI^L%9 zOuWtY>jyMkvGIyzh^gi~eTezKnA(>xtLHrDlm}fr!S>*Y=M#o-!Vtu zLF+*bLV;Ypusy;og+VXAg5xZ?kP!KKqsaT|@2kan?ex0M`iL%Y7e z!*l+mI?;2^sFt3yV=dg}vYroZ&$$d+)##^`Rd~+4W1Ef@nqMpB_*kis+qP+`0r=|4 zIz$xWhvk|BTtWTehdm@=wM5-uemUrxVL%o2vd9H69ioZdH|^O)Qt~s`7JcJY@YW ziT_5cT~Mp2cP5rwVM1B7Uo! z`J>7uo$RkVPRZ4w+!s!<)7H7B z2_3>aZMs@_z=eWcDjGAs&!!$0w+S0$aF^|%08AT4Huz7TV&%zaavO*DjPQ}GIRqzE z`_i^4S55{BEHA$j6ckYz2x5=4L}p#}k44DDHP4_?-yF~roGfHqm~+@#)<^6ZFdWl% zRnY6H4y!VZ#%hqZi#^t|Dg=h(UTb-_j;L|JvO_xOxMj+3wzQ&Kbj$sJ8e?t#`@i$% zcfM0sr>#4*dGn!XlxHfV_*etec#aXs-M5J{2Qpyquke~crXfVNATP3E^qFPaP7G5- z?w>eM=$Ep&Fd82%-W!+$~o5woIf& z<5hO8twaO%oOw&nA#})UySB_HUcx9IEY9ZGy^}!fv0O;2EO3 zLocl^MWVEIG#3XD!7K~O@7#L5ejT)}NC_a8wjTU3&u+|zIx#BbH%pty)Ak{AD2MmJ z#ccQ|V?f83?Nu)%4Yt5qeaWEaM0%Nmz`ENl@529x-$jC*x@%6ynsqhite@#UAU#sf zYm{Y998m`D!soJM$74A2V9?mH8yr?7<6E7uF?H-=M#;`V&)$hM#&|~TqAq6v?|dXf zKCna{Ei$@F!nwKO5?57~*p(U8^=M{D#WXIV_IXuXgCvg@?psBUD_+}K&|91P9NX$j zh2I_IogTL&5)HFFyj%+`5Uy7+K11C$+L22pgvGG*PuBzoIJ!E+R-!4ImoEy9ei_#McIErZYHue6_`i=tpWnmdG{kj zs(qCn$V+i7y|1EoRMHq~MR;VKSy-PBu5vgVoMU?NZ*W&XIE>#KU!HP?f_p$PJ>D{(f0Gmbe& z+G%?*TKr_6IlZ)^v@|!jR2?{p`u)nAZ?3#q^vHwd-&ncxJ})75r?cZ&jAX3@GpfmK zHy9z#LgO$_dY{~|eHgK`nJng+oQ|w=#)0t6OazawylbOhcgeF}FCcQ#8{* zF!x+-Gw6WiA@Ui$C|%N9$~7>TWeq$}vA5kb`uxh_R?e>dzxkzu@+$ywem=24X<+ic_!A6G4`2&rZ z{;)DBwqQU~BlGzam@0Y&Ai6x)`*(FkU$@q|0?U!&W2d81!xnQZVFKFp1+mtMG z7OMtsSHel+Jw7CI4cxJmZOeVpOy0BPq>^~_p8E(5B*%X66e6iEIStjyB2sy7*5(_$ z51B`#Iw{8idyCM}*Km|-5DUt-{VLlY@y7b@e7I-uje)x#u-DKmYaL$JGOSk|YdP~- zYKq?9dXx+m8AF9&%?fAZ_KgB zD`gU8-Lec?Y(N(jp>}BKW*o}-a_UHemX2u{jt#WsX$qB>if?7QM^y}aDZ7&mpTx!yMtBFkc@dd$BM*$W zqksyq=A#g$9HMgC{kZ|k}6ITh1!DIN%R~9Ew89(Bmf87aa*RdkUQcst^ zyu+okN!mPE{=ab#HDRCG;s8Z$4h)Rmv?uxSvshoI)&KkX!tVqKN2z#{4EY zS8&L#6D8V1y$t`Xm*L}eKf#soR<@_a0v+l z5_D~|s$ zK)b29Uaf%78K5!nvqqFX37CVjmjG=FxGr#n5}!e)KjX~w%p)E3%bj1Qd{}oR#sl`z zG#qtp6_v4{$~4Jm>u@Xpo;m=I=(jG<(Le1Ru3@j89=@bADIUhg!pLpzyDG8ou9XX`_9l0U;G;U zl>O$uTLAoM1HToON8FcVJjk&>POB~PI;{eq)mZU(f-ig>r}YD>buqk7#*ic`@l3PE z675JnG9KppPW7DgyHa+@)k1FIA@Mv_02VmL(t|9&{Jf_@(FdK0v`PhWnPb^b#hAZK z6@Uezf~Lmrp)O{$;y38z@qs+yhq_i7_Ypi|y6_Avn4(D@e@Imt!?f53^xN2{mMj?B zBdE76)+6qvT)_3>k2J!yin1KpbRFPYoR4$uq*0Cnx)yb3i$?s)=P$@%_%!x2Z6u8n z$H}41p>b#<#9)sD%Z&CAEdm|4vJ8l&_yzw@@$0YGs8$>yFMhle;JVgSe-Xz-_!OMO zE`xXu)FDS*dkZ!OyFrd80S5pV;cb-zS^?h!>;W7FU?<5{0oVi$jx~aN4FEI8vk>qm zUSBa5@GMl26EF(^d_xZcP-mDQfaivf13ZP-J!pW%fc-=xLBq(Wi9-xD>T3YdFzT;R zjB5eFI|j6jc^;kuo=I8+_!MhcJe&M6{N@>e?RcF8?xlj(wDEwg_~j;WrB4G?1JG_d zaHhWx+ktu+cs2ul9-9k5yJJoGo_7Jz&n!OxG-RROYvuvMMB`v1#x($5h4}&f*W&)Q z2Z^$$1MqzINuujO<8{Z0yz>FSgAX+g@EG7Z0LuNq;RnzCvj7hOz+3-%0Q%+M2RH&a zLlkfV&_+N9+ztS~0O|%%H-NeU)D56+0CmTs?s(K4UkI23FaVDLHUnM;ya6~0=*Aww z2*7wiDc~MJ17Iy+E1(nb7T{yRWujmzAP+DL@Bp9{upaO{U?1QJ;0zY}PQW+|P%r0Q zqTEXO=P17(yuE%k(GB3|4L>HDcn_eNC~qv_Fi}4Eoev)7A0?XfAOJj@)J-%w84v`3 ze-hB{B(aUcxI@vH&`u834S+z*7vo#gLs64FJBxVHVEO@Be482Dm^&3EH|5 ze7bQCV5wGAUbwsW6Vf3^!**YH%A7Q(7 zK7#ztlVuGJ-rHBKSXO6vtD3yKymMP?npf2~F3B(S&Tee2Yi_NnZ(Ls2*gDCpPnn`m z^4_zwzQwzEMPsWM$AvN4 zb>2B#y|!+ecWG;D)3nKxjh2Qh8)};Sluf%L znR(vanb%8F@qp@ARnxt7%`NpS8og5|P090qspett)SL49*Xe_w`&Q#^u3J*yg8ra? zt<5z?-SV2|W!$qXvIu*24fyk7Cwc|`mf?Lj2Ck~`h8r*5jN_%bc*9K%-gC1G@4sop zN-3WTQ92vvt*FcIzp0UTmg9=cCgHA5Q}CzD^Ly~_oO-+|$BTFAtdKRl)^!7pweqPJ ztJRgbZ%C|OU|NbZK5{F|C59Ei(R*hW?z8~M5G6_JAfJ_pRUYLrs419 zW*(l&McnFo9BrPDw!~Cg19p+U%T3mtaNmBZjL*^2D&Gq=X8K|`q@0I$R zluaIETWaAZLi^(rdjMlS*ykcNxQ1E|Xl56*um{#;D6GkF8UbrD3K5hsh@K{6|0)%+ zlyu~#j)hgoLdu6i3A4zBauehA!Td0mNd|0?FRdRNyYcu&`AdXyfc$MMRLv9t#7 z>ROApb@98po}_QecXh3&4I+~^iY&aND~$IqZNnSGcHpgG&*QCKyYN=Af5rR4cH`Y) zKc;`9PP~DP-$(W{ym@Rt-bZ#2Z!qIGm;Dm9_CM(q-pKSR-bHo}?;AUh>>TwLrwe$G z(+T<`oy6OkzHPk)>{+~_jNf0j74H)JKHgRKBHnbij}G9iPKU)cVw|{EWQ*&BSNP~X z`iPF=jZc4|wwrvfBFQ+Lg-}H`KMc%UbIjj5_yh zb!T?(ouRXhdKiwD`WE*c>ghXFojVfh+!3#Hx2kius&jX59rxX;g(_8{N>!*zsBpi$ zGxYvAu7_2@!*+pIQ|DgM(ooZ~)TQn;*VQygJi~g?46$pwWP6_4I-|a!p~k(urnY$n HJkb9GD73?q literal 0 HcmV?d00001 diff --git a/jcvi/utils/data/Humor-Sans.ttf b/jcvi/utils/data/Humor-Sans.ttf new file mode 100644 index 0000000000000000000000000000000000000000..d642b12b821a16d313b4035b3640eaa54a45b9e8 GIT binary patch literal 25832 zcmd6ve~_H@ec!*&-tF$8M`(8~ZiPYC?O7m!IdmW)4zSF@8DkkEkcBa4Vq*zeLaN0{ zkpx%`rEYpnYnKTbYEL|>Cvk|=Fs_?X7>6=xta{p7lTJLOX(*{PYHPci&^pdELuu-9 zi>|Nt`}2LCTRBeqPbU*;ckkKf$M^aCcz=F=zx$*jrL-b_DvhMUz4uLT{^@^tWGbcN zD)!#9_t8B^Zr}dGmXt>LUOf58p%eSJy!<Y-CUU;@=_fIaJ)2Zt#vq z8cVk!m&9lO*O)DSE;S+Gw;G>e*E8|A_(s}XjOdi0C7(R>=k`O#k1@(%@D@ju6>p__iyay&3NfwEpPjLou^|uSMTbc`m6mK zi*Y4!kEL@e8sMSW&igp;@A7VOth!#;f9rsF~&*N#Iu17|eh3Ju7xmPWBbYj#9LZPuMGJ4jsr$BR8@NV(WIxiY*=so<8 zgCnEOR(tHC73Io{S9Q9*x2(Qoyuaqsw_bMn+PAHn7)-u>{S{Yk*m%{|@3>~_+D+5f zUB7wD4O?%Vx#{Lx-ns2vx6a;n``o+lc+Z{h-TuD2-hcOwd+y!&fnE1~@cs`yF#q8P zf8rw_eduGm_YB|dFW7!~?*#|;UGR-BAHQJ7{&euKJ#xToed2-xKkiE~Kbqz__?FbQ z?LC!#8NcxBgVlq#4&FBS$`S^c2I;xH@6y3q?OXcKOJ84lY3ZxqzVF-jetXA%`scM6 zhwp+km!40rH=arp2}@tyUO!!IogUjd+t@PE9G_^6w^w(H_0x^Kqy3AM1RoZjR7D&dP;} zhtG!O$L5I0=9E!=V`F9uF~#AEj18)*fKW`eykSQeKp1RKZnS&T`TajV5Hx)I<1Q}RKm{qAC~KQ&{y zO#v(o3%~%DL?!7-ry(>1IwO2a^vHS=SxxmOy9*jHjHOqWijW}jC1TuIx-xA~yV4^m z&1?jA6UA(?rZc*Jx-E-nkGE$!D*@f?O5;j^1ez1Y%$jV=7}Dr?`>CM3+n<3tfrq=9 z%(-l@F!v&Rc0dkz%t(RpUU#n5Jdv@~QDTT>L7+;TT}5Oj82`Vsjaw z>T${r-FGT5?<<>7a6%(M)OTtbmk1w88g3F30$1c%dcJtMu?;+3o<0VerW>}Z0$2eS z*dE;?zJ(UKHKA&DOwO${s$;S}UoqZ3E6cL~3hT$3V3yDBYe;6Mdj^$cMNZnLd=!Gs z?v$8?f7Citic4$@f@ko8FU5Es`~0s<`C-1?>lsi1%n+L4Fb{L{)rd77ra(f7A4Sc` zJi8)A*ivw8rnBi37PTRJORPkmCmiIMH2sT)J0e5K;;DXRuh1%92=xnW;sFL~WTHw4 z4j}4oORp5`8oSc1+`6-3d}5^xatw(1jzV0!xEOf>7;qf6BurSb70l-1AarnuPlF%M zTdl?*$b`SLdrFXG%j8!W0vnUPMLduMk5LM0lAdT(j9dCvv72~y8@{^J0s-qc%E8V; zVw1=weRs#(#l5CrQN9TpQP+H?B^w};n(-ZnZW4+@{;!&n#{i(JDkf#%^WPsOl6u13k1jFX5Bgl&?p4O zzVTAupIi064!Lq<;w*fu!W5f95K6XyjFMNY5E_MUr^FI(xa$aTcj0Ag1fSUVj{+R7mYG;78*nS4$Uau=N}@|Y&;2dYqSF!NS0lA{u@5txvH1W* zHs(s%KSsw3h_bz-1zl*VxFaAR76)ACRvfCIqUBpOVZ)S1J2( z;0X*!e35#T2nR`r4I!`@6jR5c@MSa5d{P+RZsAFN;%o+y>1}uHzux8=a20?>y(mfA zK_`JT>0`zY!axqoY;+slv|E-mY8IPjh+J7FrHnfKq%1Z>Fx!}v4J9Zp#E%9s>#S zis&G(4H)K^3hmn7BarQaV-%Z-d>1*g>-h*35S0S1gx+BV%T7H}przFXMPm zcD|4#3IiQ%WQx6&2}T3d0g8Y-8=nO=c*ml1k%Exx#rVv5mE2?{uIXYDWKFC3w)4P2 zHvV>{N$!g>Lc{Ut(p$_7zbvAbqI)K73g7WEJlJ|jszp1T>!|<8{f0X9SFn>A2nIuP zaD}5W>^QjJREZ}+peoN@@jQ!%+>pk}pY}WUHi3iPK(Pj}bSP{<3NXQEQ1gU!ls2G- z{lep7O`}DM+ed#>6s!>E?uei=C+}6Bc!o9&JQy9pI_01?s+?EVib5q75rKgN3d47q z8?1!5DXW%KeXR~75quPIC!g^p23ReC9hS)j`8e5tdP`6XEiQW^&p+2VU@wt-1LF#P z_(uBz#fAtEvJnSEHL})s#v~kUg}SKt{UIeYxVi(@uj@1EY<`*b1sqz~KBP>-l`|6I zIr2IQg5xUw>>wVrXOqJs+at-#cIhK#>ZrHVW1fcZz_lhF_uRdOV}dgJ3X@hS#mVZ>#l# z<;n`l%Rx3i?YKpd&bq6|U$BF;&~5i&^`R)NH2_@cBvgnm^<2l2s{eDTlcv(&QhXPd z6ti8~)At;60~ERFMd`2BUf2?+w#TupiKZb}C<|ES+NwmY^o;{vhRJ6X!orQn^s78W z)`i;Do)j8UHLl1e;rv+AP~`E2?r7QvWd!1b?D!q39h4;d?^{^3rX9{McULAN7NbN2 z(5>)mxEH4m_&w-xTgxO%sthC`#-`kG;7iST+^X9%%N&(l8&&8|a^@9jZTd+hxplg& z=%-*PX-0KMZYV(M!yOjPGlvLmL^{wG!f+zOWbt$`+nQ3tqDli?^XxSn8BK^!{!=H1 z*c~}U`0xvfi4VCuSg4M)PPztR=M*9luwoVPZ3^@ zNUFA<@b+A*dp5fc(Xr_82myEy32Hd&6s+XY-Am3Hk*0uP+kgm>149*@RUsZ23fmEq zhCLlghT4T0rViVP`urboo4Rl(|E~hNES}pA=@V5DT5jNG5s*!v%VE*taYbFAKS1gX zi9OlB-qLuOY8>1(I~U0ea}CmqsR$Y(QG8>=^gfU)AS=cj59ATiy7ZH85KP(%GDzU2 z>TyK9%6^cSlb{8267?Bp0uA*Yg#-6Ol#xu)T@WA6GXi*(Md?fpYOTOH*r|oaMWH@K zM=r-#f~OFw3{1R7*oe^MSWd+>RXQ8kPuq#;4yNoCk8_aAi{B1Ws4wBhkmU{7)K88>eCjKBtsOK~iMO#L#<3Mu>4{Si=O91N z%;^u)PRXHR7regHHXgFpnFA%ZH6;-8H@AJA$WE$r`jYM!Y*&%UDzYOK5QrhR2oVjT z(r%_IzB$g*UlWfncmhWZ;V^A!g+N&cy&1@B5FrqR^gOaRb$Ih4Tm5Jh#aW$ z7d8;O9ClXLX5Y51c9c5UR$+~0c*lmhi#xmmh5Q2g6SMJxnEaBjUfwr{^N0F6SDfS0 z2wDsm#oiP%I3X?diZ`VD@s)@{;nxz03SG)~`I6%@p*Wk141nL~X^1>5p zeyG;F8LP+>L)KOa6XXx#i0>g)TTGu*}pAIj3(57xTr z!{JS2MwL^Q0O^U1vb;cZnQj4Qtm%&RDz zlCLe+Sv9sjgTdE@ji^#^Kz24m%>5O1*h`$IkGGB4dUx#c0d@mDfuZOrM~&F0nHr|= z&Su$Vs}*J~^e7b#ldwdD*VtfKo?lxw!2bmIW`d5 za!Sryh(jy3NT_4&#sf|pLVC7uS3icnV}Eu5a`Ls3O{ek*i&id5XVE>;j&KIF!vM4! z4zIKK43Y$0avz*Y$3sGll+YFuEmyT&IT3|smldX`WLi7zqBfHYw_+xd0=mkVN zTlN2BiapBTi&bUW1z|Bg`(c%-7{RD^Fa7cMM1vphv7Ct8X7utx8zZV=JJY$*^VZiLaBJj;UdYFSoNu3OHm>W**PVO(R`b`;m{i|UE}M9``C!xxgHG^ z*D|L9_?ClmDz(O7e0gu3L2D@W%a*Z%bDrdT)eaU}3?DD5%ScNN9CYt=mTrs+n^Qv+ z3M(OR@@ zatyeahy?>91S46yGOTLi01@;idM@G^k~KKk zcUXb$HK%%2OoyiqEtETetU@oRYIzc`xmt@^f4bK7L|o4&#Dg4bw{_N9rMs57a7<;8 zxllgx6N)eg%`tY%8CPN8m78Mz2*>b2}fcd(m^B9CwA>?DH$IhuSxgg|^!xNQF2&>84qJZayFqXt`Hh|#sQsKxr=3>Igtx#<9`L4_@a5X zh(w4Ob3#CXB4+~*Bn}%x>pQYRS`Erqn@jgMHc+=*tBi~Kq#%ykd`6sxAQv`VovE|k z7`OeNbmAB*yTVRXQmX4;1%P_euzUWC$cE`LaBqjfgfn&mNGQjoQ(L&IjOg2t`CZvz zMJYs(Kacg41yNA-R9sYx+N3!5`#r<6WR|ETDBk7PppZz51?e7$G$h6Cr;uFK{=-Hj zP1I8WBIs2OqxmAz)C%G@^x`>%dK)VBr3E2ViPlBT@CqAapm&2@WI+=X+?E>5i1?K`JXl^(++>Q8(J1P+Bk+e~FT%CV}zC2-|@ zWf^DToFpq3%SS>K!D`KlSO>+hUNu={vN>fAZ<>pRuyv_KQecq?<{gOF+4-WpsrH~} zH(7JSZ!fxumE{Hb4Jc@L#`{a{=o z4;P4yB0bw~F4|C>U5IefT%xN&b@e`12Ka6-R-bKq792h!q(=zEZ#TA?CSb2z4q~td zZE)X6@cAx-FlX{iC#)_=iCM{0qH00s0%lAGS-~O{N;0G+GhE4^Nf}Dui#4&5A~$)4 zH-I?Bm#QPAEM;5zpfKK1FxH~>>7J^ziHm3H8px4cV*oqUX`mB!BW&1xg@Pgw>JuAP zDraq>FzL+dF>`jcWFT({tP!iVaYA3ctQb-EJqIPsA@O)gQO-sk-F024#Oz84@KsqX zUH1obUE@xl&_)w9Pd-N93Mh6icKlF?@r0xeHF|Z>5+=X`Y_wtrq|Y7P=cMjTYbmNO zQ-vUY&st;yuPRvCq>L^(wau1F$f>G5A`tMo!?KdZgz69~68XJ4U8pC2ZTT)o<(p2Y zvv8{UO`@SwM{LYiIzofnKHU>wCo*K4u_ZH~JR~aO!5HrVFUPA%Gsn>nD3iQNj zc@RKMhm!27No_xo=8NZ;=eUSFOltA(q}C)qjc^hCZb+JM%8?^vd|UeSx8%NYy&Gx~9j zezu?`OXOC5Ii_cmp)G(r@YEHSRP z7<(Djc@_eZTzjNPs%MS=?SW->%L|coF+R7(Tv?SXYpuM2+=r=(eSm2>+L&vK?pV7Z3p8jv&j#hgIW)IrzCV;$>ATMyne*{zhoJuG%>?j$ zRzwSEcPdv^zXnfKXAJ2CAD7G003Q%KrNOqPN5nzCn`oO7O{4hfmeP!n-_a!%jnqbau zO$HwVOKk(8cR@R2Zq-DT}xA9XiEcR8msa(-=h1 zR#uF+!rgipxRH9Yjrv;+lBkANRm9W`at@gF6~)vQd5P5~@0AyU8|5bESFSiZBm{Ej z#`d*JOOOWBO}`2xTqy|;%&2Gzz>&rQE;=}nNY2RZYIQ?~ltvGUAFte9`)8U;YU2|3 z9>w3CnxVgWgKP!>arLhA5>&I~_$tkzR@Du{$j z=zoGoL9%KBXm6+n{++}wyEv#ciKHP}vz)0P$oUd^NJkYrUrHcL<`azp`3ag;kC)YE zni(=)@qr`jH#&eK33WN7azv&AQN1sLmnk=x2x~UCUagCAyjO(go1%HaGxsTSLHH_b zn9-xSTMW|7ldJw%^r_wc$&&l`p;@Ix((Ea296apzZ?(uL^~+XcvdVw#ve2bkmep^z zWERVzTh=2o6*pQe$dWC3eX|3R6uKP7pj0 z>AgVo4`bA4RNhlz3?c3*rUAcldQ{CcHKOsruBN;UsWDs1NcCcpH-5<`N}vIx2@Tng88F(z?Y-N2(b6h7 z3e15Smk7bEb>Vq@LC?#okablO8&F7}AK0Kh`EeDT#1&McJ||%-B!yRVPg6|meSUjr zXBJ<*E8T->XKRiQo$O0YRvu}UW}TF$VB4croo@jQN2Z(!)Ivof={Gkvx@#=m25W4GeWD%MH#Pv352SugYl4LX~LfkgZB_v-10dW6i7VWX5$H^)r22GIP2idA*yk?s)}GJXJGN;?n^~ z6AeSYi_Vt~U(+m%FDN>1s+FKv9DWM$xF@~W`a|ZpVDUZUM8e4gh7{rowc2h32FWh^ z2WKr7W21tFByvj9b7zmU5gM0hv1;YzR6=qB^EHv`s2>rTzWxf&Dx2kod}CB2AXC;# zPWwF{e}&fi8V3qSO|N2*%27NEe6cjd&_+t5dH_Pz6p2K&{tTQgSSa zIRREPpxHWU5|ACbc&!3#xY}Wxs%&d6->9a=bJMB?mDxW_Oh1qgDKoY+5|WhpS}t3s zOZ`Mnv*AUjJK<`$_*$-jp;1B5i|oUWq&{4_Wgzzu>-S_82$2crT##AK*+u#+*&84b zrlfMr5zfjDbrx#`DpkVp4J zUBRfx5m%-0#U=2~FCbY2~T@M?F| zXBAc<0JY1Z*!lQ;oxA+&V4-x74j`yHCvdXaK}vjV76$9XJE`%pT?qZfg@@ z1M(Ee>Rsly{;FKT;tHYFpPH{)76(MD?OFVcxs?9p_{6cYx2oF9xE|scUe(9-*+z|7 z*0p*~#kw_4Qq;A7*3aw%C8t42*DKCF^Sqp)GW$X4+U)Rq58PzDt*t%LzbRV1iIp?CmQ78RTRi}hT1-jB%3<%MSVFv%f#)na5L?E`w#_LH`myk07zhI9k# zRi0MW$2+l}4QWqUkC#1(Dd}A(DfJ^Jmkij=h}MSkvmw~8EN0j+oPDB~430Q7bb^x6 zs&{7fVIYP;IG6Bvn2ULg1Tk#pjO-;Ah*oh+p}>r0!^s+pl|_<~u7u-2Nt`U8RFUm; z<0-h-%qDe1r#UGXp$S|xihIp*KdRQIS9)qrqw#1!k-;foxCvyPbXYN1WZHQeO-PDW zgcPuZ80V#T{tG{e@ZQ49q$;bu4MF&U4o9 z)J-23=)oYU!=M&1Se;U=yN;Dz{S#)6UQU;&H zA(}+;6rPYnk7q?<1?x=w%BJC|>RByvmNNCBc&RFw87KT3jXl=}{A$t~Lkof13>@qP zl~zKLXpB+E^1i0(b6S>9kp-R!S7`9cG)^=1YQ) zU`u1w!~@URz?Zfg2kQ2Qe4Kv;7Kwo)@Uj4Z6k<6k$*D<991J@h*-a%9sl}?Pn=*En z*r}Bn4#FYB(hA!v9%qOU4!(`Zn|q}8r@5>R1jJ{MTDvAmn+YD@Ht2{iN-gSyN{*0M zw0+EBZm(CE^9c}Pn6Q8cJe+^6@v}S^bR@M|`Mxf_JzYh1dL2h@O55mM-J9+sR(+ml z7XE7b$LXJ@Z=`>nemDKQ^sV$C(s$Bp>5tPJ=?Ce57pWL6Rurp>ONvX2wZ)*=P`snq zRBSG8EN&@oE#`_li@S<@3Z8K~vvte3-Xj|^BIO6P^^`8Os{=C|XST5BkyF~dSL@XL z*Ozg2TaU;Y!71sWPwF(Z2OMqHBj|*q^=$%+p(10|*;{6GD}iLSC&RFPw(8TWJ#o>6pO?cyS<+KhEEmhNxPkLtOwIIUm&FE>*h1XxCpCqD9J{^q-waceAd1~(`8(z>K8xqdGARAD z_d-~4zxY)Y5Vh=RKn#Cabq6UVuf>Q@h1|1zS;qi6ppjV3%PwSailrlFsJN>XLG0p% z`=qu$&2O}CR0g6alHni0uv_LdED247@JPV0H+&~vQw?*~dHkAOExU-W{gNyb;mL=n z`-hux(9Hhm8T%H(jQ_$uG9w}RY}MFjdDsFei?PO-;g;%vr2yaW@@sVciN{rVVmrzs zbCLS2E%}drDu5PQXd=k1axFb_>0rWHWFmTzdpwgM;(G4nI}CaaQn5}Bl$g|M2v$zvQR*{Uts#kRDk2tMx?JzgqOHB!cS6+Fxch(naZFp8qzU z-o{Fzt66{ip7cU7PCiOZBKoW!-!jfyf7j^mMi9P+ps}&jSOdcIAw97-*Gx*XM+p$> z&a(3&!jf{%tuhUTK%Oc{iJ@q)Q~jxReLUS1W&||9bDmJbhhkV6Mi>b1P`do$5}g>0}aEm&DHfD zZQx;+U*uf=X@H$Q4z_B#v1k?qy;TY~+|sJ}SI(w4l&U3_r5PDGpYHK5Gq&92%$Mm>ktRl+rQ@q(*-p?| z%S~&y|Du1-;*8%;l7588U3`V-w|||w_Ftsbx{mk1NvVB2r7^w#ETxO?e|b(zX96rA^Q^y+5VvZb<3+b-Yiew0Q;ZBfS4D@AD~b zxih63pnL1{JhuNA$W0KPZbyrHW@Nyeo;P&~H=AirC zyHmP@bKi3+r8~LTdv~U^owc^_ga5mJHl_D3rgZmRDeYLno9pjE2KT&_($2Y*KG@>@ z&6MuHgZJO#jf_78-yeD-r3bj~!CMmo^OMxs;NNd)Re=8$=@LD0-TxYCRWX~lN7C-% zALi{*&Y|z*f6cUkXTQh4R(eb0U50w{>2;YzS%Kg4# zkTz3MU6*d=X-@<0aFpYFIKGdubj=AqpYZ=pX+NVaaO@yk2RY*?*X(8AqrUbr#@$0? zxt{wUgwAWJ)Apywq2v0rb^BwF9zHs_Z_mOp>gvb1;xJbasNY2Ov0eKfdF;@hqcE`# z#vWsUJ?ZHG{gOK?Q4Kb4x_NNb^)s8V{_`Rn42^N;pCewZZ-Q|N@%l8wHwp6p&FtXE zvGAdNM~@vmj9#H;^G(xRrmwp>eaJF8hBPF%<=bthex2rbnA4KGN&FGPWU&b64SkSB ze?jXrf6Tu_>?=5HgeRakQ;SM@j99;dr&g~d;;-UaKwaYgTX;s&B}D%|Pt>}UXVYHB zGp*N>3#{Ym`~zl^-_FxkuOKtn09LL-4Da9xa8qb-lWDvj(QHARTbaL~f%cozE$N+9 z;P2vzRkP`~bbFcuId_m#+{qfM?d1A*rT6oT*&VE3xHs)gA3(A9@e76fk;ntA7yK~m n`+p*Rgr^ig#FMpmqwK}>chZ;AKS(d67pddFntmgFEv5ehM(RRK literal 0 HcmV?d00001 diff --git a/jcvi/utils/data/TREDs.meta.csv b/jcvi/utils/data/TREDs.meta.csv new file mode 100644 index 00000000..9e8a4ef8 --- /dev/null +++ b/jcvi/utils/data/TREDs.meta.csv @@ -0,0 +1,33 @@ +abbreviation,title,variant_type,gene_name,gene_location,id,motif,repeat,repeat_location,repeat_location.hg19,prefix,suffix,gene_part,inheritance,omim_id,url,src,mutation_nature,cutoff_prerisk,cutoff_risk,normal,prerisk,risk,symptom,allele_freq +DM1,Myotonic dystrophy 1,short tandem repeats,DMPK,chr19:45769708-45782556:-,chr19_45770205_CAG,CTG,CAG,chr19:45770205-45770264,chr19:46273463-46273522,AATGGTCTGTGATCCCCC,CATTCCCGGCTACAAGGA,3' UTR,AD,160900,http://en.wikipedia.org/wiki/Myotonic_dystrophy,wikipedia,increase,35,50,5-34 repeats,35-49 repeats,50+ repeats,"Myotonic dystrophy (DM) is a chronic, slowly progressing, highly variable, inherited multisystemic disease. In DM1, the affected gene is called DMPK, which codes for myotonic dystrophy protein kinase, a protein expressed predominantly in skeletal muscle. In DM1, there is an expansion of the cytosine-thymine-guanine (CTG) triplet repeat in the DMPK gene. ","{3:1,4:2,5:8666,6:59,7:122,8:227,9:41,10:517,11:3263,12:3742,13:4178,14:1566,15:274,16:234,17:96,18:44,19:91,20:345,21:459,22:309,23:194,24:197,25:145,26:122,27:93,28:77,29:43,30:40,31:37,32:15,33:16,34:4,35:4,36:5,37:2,39:3,40:2,41:3,42:1,43:1,45:3,46:2,53:1,54:3,55:2,57:1,58:1,60:3,62:1,67:2,73:1}" +DM2,Myotonic dystrophy 2,short tandem repeats,ZNF9,chr3:129167814-129183966:-,chr3_129172577_CAGG,CCTG,CAGG,chr3:129172577-129172656,chr3:128891420-128891499,GGGACAAAGTGAGACAGA,CAGACAGACAGACAGACA,intron 1,AD,602668,http://en.wikipedia.org/wiki/Myotonic_dystrophy,wikipedia,increase,27,75,11-26 repeats,27-74 repeats,75+ repeats,"Myotonic dystrophy (DM) is a multisystem disorder and the most common form of muscular dystrophy in adults. Individuals with DM2 have muscle pain and stiffness, progressive muscle weakness, myotonia, male hypogonadism, cardiac arrhythmias, diabetes, and early cataracts. ","{1:1,3:2,4:5,5:5,6:39,7:125,8:38,9:29,10:115,11:127,12:193,13:63,14:405,15:15176,16:4884,17:1462,18:633,19:946,20:330,21:128,22:112,23:43,24:40,25:52,26:176,27:38,28:13,29:6,30:6,31:14,32:7,33:3,34:1,40:1,41:4,42:2,43:5,44:2,45:2,46:3,47:1,48:1,49:3,50:3,51:1,52:1,53:3,54:2,55:1,57:1,62:1,64:1,68:1,71:1,72:1}" +DRPLA,Dentatorubro-pallidoluysian atrophy,short tandem repeats,ATN1,chr12:6924462-6942320:+,chr12_6936729_CAG,CAG,CAG,chr12:6936729-6936773,chr12:7045892-7045936,CACCACCAGCAACAGCAA,CATCACGGAAACTCTGGG,coding region,AD,125370,http://en.wikipedia.org/wiki/Dentatorubral-pallidoluysian_atrophy,wikipedia,increase,36,48,6-35 repeats,36-47 repeats,48+ repeats,"Dentatorubro-pallidoluysian atrophy (DRPLA) can be juvenile-onset (< 20 years), early adult-onset (20-40 years), or late adult-onset (> 40 years). Early adult-onset DRPLA also includes seizures and myoclonus. Late adult-onset DRPLA is characterized by ataxia, choreoathetosis and dementia. DRPLA is an autosomal dominant spinocerebellar degeneration caused by an expansion of a CAG repeat encoding a polyglutamine tract in the atrophin-1 protein.","{5:13,6:18,7:64,8:2685,9:386,10:2265,11:375,12:1370,13:1595,14:1330,15:8475,16:3796,17:1467,18:488,19:295,20:313,21:130,22:58,23:76,24:27,25:17,26:6,27:1,28:2,29:2,30:1,34:2,39:1}" +FXTAS,Fragile X-associated tremor/ataxia syndrome,short tandem repeats,FMR1,chrX:147911950-147951126:+,chrX_147912051_CGG,CGG,CGG,chrX:147912051-147912110,chrX:146993569-146993628,AGGGGGCGTGCGGCAGCG,CTGGGCCTCGAGCGCCCG,5' UTR,XLD,300623,http://en.wikipedia.org/wiki/Fragile_X-associated_tremor/ataxia_syndrome,wikipedia,increase,55,55,5-44 repeats,-,55-200 repeats,"Fragile X-associated tremor/ataxia syndrome (FXTAS) is a late onset neurodegenerative disorder associated with problems of movement, memory, and the autonomic nervous system. It is related to the disease fragile X syndrome, although FXTAS is a clinically distinct syndrome. In fragile X syndrome (FXS), the fragile X mental retardation 1 gene, FMR1, is silenced; in FXTAS FMR1 is overexpressed and interferes with brain function. Both FXS and FXTAS are caused by a trinucleotide repeat expansion in FMR1. This CGG repeat expansion is smaller in FXTAS: the disease only occurs in individuals with a Fragile X permutation. It most often occurs in men, but can present in women. There is no cure for FXTAS, but several of the symptoms can be managed with medication.","{4:201,5:430,6:517,7:642,8:730,9:934,10:2266,11:525,12:844,13:1461,14:1585,15:1633,16:1483,17:1210,18:750,19:583,20:894,21:192,22:225,23:332,24:179,25:139,26:130,27:85,28:93,29:264,30:378,31:135,32:69,33:24,34:6,35:8,36:20,37:8,38:16,39:11,40:3,41:2,43:1,59:1,60:1}" +FXS,Fragile X syndrome,short tandem repeats,FMR1,chrX:147911950-147951126:+,chrX_147912051_CGG,CGG,CGG,chrX:147912051-147912110,chrX:146993569-146993628,AGGGGGCGTGCGGCAGCG,CTGGGCCTCGAGCGCCCG,5' UTR,XLD,300624,http://en.wikipedia.org/wiki/Fragile_X_syndrome,wikipedia,increase,200,200,5-44 repeats,-,200+ repeats,"Fragile X syndrome (FXS) results in a spectrum of intellectual disabilities ranging from mild to severe as well as physical characteristics such as an elongated face, large or protruding ears, and large testes (macroorchidism), and behavioral characteristics such as stereotypic movements (e.g. hand-flapping), and social anxiety. Fragile X syndrome is associated with the expansion of the CGG trinucleotide repeat affecting the Fragile X mental retardation 1 (FMR1) gene on the X chromosome, resulting in a failure to express the fragile X mental retardation protein (FMRP), which is required for normal neural development. Fragile X-associated tremor/ataxia syndrome (FXTAS) is a late onset neurodegenerative disorder associated with problems of movement, memory, and the autonomic nervous system. It is related to the disease fragile X syndrome, although FXTAS is a clinically distinct syndrome. Both FXS and FXTAS are caused by a trinucleotide repeat expansion in FMR1. This CGG repeat expansion is smaller in FXTAS: the disease only occurs in individuals with a Fragile X premutation.","{4:201,5:432,6:517,7:642,8:730,9:934,10:2266,11:525,12:844,13:1461,14:1587,15:1633,16:1483,17:1210,18:750,19:583,20:894,21:192,22:225,23:332,24:179,25:139,26:130,27:85,28:93,29:264,30:378,31:135,32:69,33:24,34:6,35:8,36:20,37:8,38:16,39:11,40:3,41:2,43:1,59:1,60:1}" +FRAXE,"Mental retardation, FRAXE type",short tandem repeats,FMR2,chrX:148500618-149000662:+,chrX_148500639_CCG,GCC,GCC,chrX:148500638-148500682,chrX:147582158-147582202,GCCGCCTGTGCAGCCGCT,GCTGCCGCCCCGGCTGCC,5' UTR,XLR,309548,http://en.wikipedia.org/wiki/Fragile_mental_retardation_2,wikipedia,increase,26,200,6-25 repeats,26-200 repeats,200+ repeats,Fragile XE mental retardation (FRAXE) is one of the most common forms of non-syndromic X-linked mental retardation. The most common mutation giving rise to this syndrome is a triplet expansion of CCG in the 5' untranslated region which leads to a silencing of the FMR2 gene.,"{3:5,4:723,5:887,6:1141,7:1402,8:1431,9:1535,10:1657,11:1705,12:1562,13:1115,14:759,15:1478,16:552,17:264,18:373,19:128,20:76,21:59,22:37,23:29,24:25,25:10,26:3,27:13,28:6,29:10,30:6,31:1,32:3,35:2,36:3,38:3,39:3,41:7,42:2,43:1,44:2}" +FRDA,Friedreich ataxia,short tandem repeats,FXN,chr9:69035562-69100177:+,chr9_69037287_GAA,GAA,GAA,chr9:69037287-69037304,chr9:71652203-71652220,ACAAAAAAAAAAAAAAAA,AATAAAGAAAAGTTAGCC,intron 1,AR,229300,http://en.wikipedia.org/wiki/Friedreich%27s_ataxia,wikipedia,increase,23,66,7-22 repeats,23-65 repeats,66+ repeats,"Friedreich's ataxia (FRDA) is an autosomal recessive inherited disease that causes progressive damage to the nervous system. It manifests in initial symptoms of poor coordination such as gait disturbance; it can also lead to scoliosis, heart disease and diabetes, but does not affect cognitive function. The particular genetic mutation (expansion of an intronic GAA triplet repeat in the FXN gene) leads to reduced expression of the mitochondrial protein frataxin. ","{2:1,3:1,5:246,6:333,7:152,8:7600,9:12407,10:219,11:29,12:163,13:147,14:151,15:117,16:247,17:460,18:649,19:543,20:421,21:306,22:200,23:199,24:122,25:129,26:98,27:50,28:52,29:26,30:21,31:19,32:8,33:7,34:3,35:4,36:4,38:2,40:2,41:1,43:3,44:1,45:1,46:1,52:1,53:1,54:4,55:1,56:4,57:7,58:6,59:7,60:8,61:7,62:9,63:8,64:5,65:9,66:1,67:5,68:5,69:3,70:3,71:5,72:4,73:1,74:1,75:1,77:3,78:1,79:1,80:1,83:1,84:1,86:1}" +HD,Huntington disease,short tandem repeats,HTT,chr4:3074509-3243959:+,chr4_3074877_CAG,CAG,CAG,chr4:3074877-3074933,chr4:3076604-3076660,GAGTCCCTCAAGTCCTTC,CAACAGCCGCCACCGCCG,coding region,AD,143100,http://en.wikipedia.org/wiki/Huntington%27s_disease,wikipedia,increase,36,40,<26 repeats,36-39 repeats,40+ repeats,Huntington's disease (HD) is a neurodegenerative genetic disorder that affects muscle coordination and leads to mental decline and behavioral symptoms. HD is one of several trinucleotide repeat disorders which are caused by the length of a repeated section of a gene exceeding a normal range.,"{3:1,4:1,8:1,9:106,10:140,11:35,12:148,13:56,14:234,15:2837,16:1383,17:8329,18:3441,19:2280,20:1650,21:1064,22:753,23:770,24:570,25:370,26:283,27:196,28:176,29:124,30:75,31:52,32:58,33:43,34:30,35:20,36:16,37:5,38:4,39:4,40:3,41:1,44:1}" +HDL,Huntington disease-like 2,short tandem repeats,JPH3,chr16:87601834-87698155:+,chr16_87604288_CTG,CTG,CTG,chr16:87604288-87604329,chr16:87637894-87637935,CGGAAGCCAGGGAGCTGC,TAAGATGGTTTCTGTGCA,3' UTR,AD,606438,http://www.omim.org/entry/606438,omim,increase,29,40,6-28 repeats,29-39 repeats,40+ repeats,"Huntington disease-like 2 (HDL2) is clinically similar to Huntington disease but arose from a CAG expansion in a different gene. The disorder is characterized by onset in the fourth decade, involuntary movements and abnormalities of voluntary movements, psychiatric symptoms, weight loss, dementia, and relentless course with death about 20 years after disease onset. ","{5:10,6:1,7:19,8:82,9:18,10:39,11:606,12:113,13:645,14:12231,15:2128,16:6958,17:1739,18:315,19:185,20:15,21:2,22:11,23:17,24:6,25:24,26:38,27:43,28:14,29:2,33:1}" +ULD,"Epilepsy, progressive myoclonic 1A/Unverricht-Lundborg Disease",short tandem repeats,CSTB,chr21:43773664-43776374:-,chr21_43776444_CGCGGGGCGGGG,CCCCGCCCCGCG,CGCGGGGCGGGG,chr21:43776444-43776479,chr21:45196325-45196360,GCCCCGCAAGAAGGGACG,AACCTGGCCACCACTCGC,5' UTR/Promoter,AR,254800,http://www.omim.org/entry/254800,omim,increase,30,30,2-3 repeats,-,30+ repeats,"Myoclonic epilepsy of Unverricht and Lundborg (ULD) is an autosomal recessive disorder characterized by onset of neurodegeneration between 6 and 13 years of age. Although it is considered a progressive myoclonic epilepsy, it differs from other forms in that is appears to be progressive only in adolescence, with dramatic worsening of myoclonus and ataxia in the first 6 years after onset. ","{1:6,2:9403,3:15740,4:61,5:30,6:11,10:1}" +OPMD,Oculopharyngeal muscular dystrophy,short tandem repeats,PABPN1,chr14:23320187-23326184:+,chr14_23321473_GCN,GCN,GCN,chr14:23321473-23321502,chr14:23790682-23790711,CCAGTCTGAGCGGCGATG,GGGGCTGCGGGCGGTCGG,coding region,AD,164300,http://www.ncbi.nlm.nih.gov/books/NBK1126/,omim,increase,11,12,2-10 repeats,11 repeats,12-17 repeats,Oculopharyngeal muscular dystrophy (OPMD) is an autosomal dominant disorder presenting in late life and characterized by dysphagia and progressive ptosis of the eyelids. ,"{6:1,7:1,8:4,9:98,10:25073,11:73,12:2,13:6}" +SBMA,Spinal and bulbar muscular atrophy of Kennedy,short tandem repeats,AR,chrX:67544031-67730618:+,chrX_67545318_CAG,CAG,CAG,chrX:67545318-67545383,chrX:66765160-66765225,GCCAGTTTGCTGCTGCTG,CAAGAGACTAGCCCCAGG,exon 1,XLR,313200,http://en.wikipedia.org/wiki/Spinal_and_bulbar_muscular_atrophy,wikipedia,increase,36,36,<34 repeats,-,36+ repeats,"Spinal and bulbar muscular atrophy (SBMA) is a debilitating neurodegenerative disorder resulting in muscle cramps and progressive weakness due to degeneration of motor neurons in the brain stem and spinal cord. SBMA is caused by expansion of a CAG repeat in the first exon of the androgen receptor gene (trinucleotide repeats). The greater the expansion of the CAG repeat, the earlier the disease onset and more severe the disease manifestations. ","{1:1,2:3,4:2,5:2,6:1,7:24,8:13,9:8,10:5,11:11,12:30,13:76,14:183,15:159,16:251,17:678,18:1456,19:1964,20:2114,21:3202,22:2191,23:2098,24:2013,25:1465,26:859,27:477,28:281,29:242,30:86,31:53,32:19,33:15,34:7,35:6,36:10,37:3,38:1,40:2,41:1}" +SCA1,Spinocerebellar ataxia 1,short tandem repeats,ATXN1,chr6:16299111-16761489:-,chr6_16327636_CTG,CAG,CTG,chr6:16327636-16327722,chr6:16327867-16327953,CGGAGCCCTGCTGAGGTG,CTCAGCCTTGTGTCCCGG,coding region,AD,164400,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,36,39,6-35 repeats,36-38 repeats,39+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by an expanded (CAG)n trinucleotide repeat in the ataxin-1 gene (ATXN1). SCA1 patient experiences hypermetric and slow saccades.","{2:2,3:2,4:1,5:2,6:2,7:3,8:5,9:1,10:3,11:2,12:5,13:1,14:2,15:2,16:8,17:2,18:4,19:53,20:51,21:25,22:40,23:63,24:35,25:64,26:842,27:589,28:1933,29:7441,30:8337,31:2190,32:1995,33:711,34:252,35:226,36:291,37:32,38:17,39:10,40:2,41:2,42:1,43:3,44:1,45:2,55:2,66:1,68:1,76:1}" +SCA2,Spinocerebellar ataxia 2,short tandem repeats,ATXN2,chr12:111452213-111599675:-,chr12_111598951_CTG,CAG,CTG,chr12:111598951-111599019,chr12:112036755-112036823,GGCAGCCGCGGGCGGCGG,GGGCTTCAGCGACATGGT,coding region,AD,183090,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,32,33,<31 repeats,32 repeats,33+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by an expanded (CAG)n trinucleotide repeat in the gene encoding ataxin-2 (ATXN2). SCA2 patient experiences diminished velocity saccades and areflexia (absense of neurologic reflexes).","{8:2,9:4,10:5,11:2,12:10,13:20,14:6,15:21,16:18,17:108,18:14,19:89,20:88,21:978,22:20300,23:2877,24:181,25:75,26:24,27:274,28:22,29:66,30:35,31:29,32:10,33:1,36:1,39:1,60:1}" +SCA3,Spinocerebellar ataxia 3,short tandem repeats,ATXN3,chr14:92058551-92106620:-,chr14_92071011_CTG,CAG,CTG,chr14:92071011-92071034,chr14:92537355-92537378,CTGTCCTGATAGGTCCCC,TTGCTGCTTTTGCTGCTG,coding region,AD,109150,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,45,60,12-44 repeats,45 repeats,~60+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by a (CAG)n trinucleotide repeat expansion encoding glutamine repeats in the ataxin-3 gene (ATXN3). SCA3 patient experiences Gaze-evoked nystagmus (a rapid, involuntary, oscillatory motion of the eyeball) and slow saccades.","{2:1,3:1,6:5,7:9,8:5377,9:41,10:28,11:15,12:171,13:270,14:1191,15:1990,16:1126,17:6820,18:1129,19:219,20:423,21:3156,22:1233,23:465,24:412,25:233,26:196,27:218,28:205,29:125,30:90,31:46,32:23,33:12,34:12,35:10,36:2,37:2,38:2,42:1,55:1}" +SCA6,Spinocerebellar ataxia 6,short tandem repeats,CACNA1A,chr19:13206441-13506459:-,chr19_13207859_CTG,CAG,CTG,chr19:13207859-13207897,chr19:13318673-13318711,GCCCGGCCTGGCCACCGC,CGGGGGCCCCGAGCCGCC,coding region,AD,183086,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,19,20,<18 repeats,19 repeats,20+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease. SCA6 patient experiences fownbeating nystagmus and positional vertigo.","{3:1,4:223,5:2,6:10,7:2068,8:124,9:18,10:194,11:9596,12:4467,13:7557,14:878,15:75,16:22,17:10,18:12,19:3,20:2}" +SCA7,Spinocerebellar ataxia 7,short tandem repeats,ATXN7,chr3:63864556-64003461:+,chr3_63912686_CAG,CAG,CAG,chr3:63912686-63912715,chr3:63898362-63898391,GCGGCCGCGGCCGCCCGG,CCGCCGCCTCCGCAGCCC,coding region,AD,164500,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,28,34,<19 repeats,28-33 repeats,34+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by an expanded trinucleotide repeat in the gene encoding ataxin-7 (ATXN7). SCA7 patient experiences macular degeneration, upper motor neuron and slow saccades.","{4:2,5:12,6:18,7:340,8:66,9:488,10:17886,11:1697,12:3183,13:1272,14:138,15:48,16:26,17:14,18:9,19:13,20:4,23:3,32:1}" +SCA8,Spinocerebellar ataxia 8,short tandem repeats,ATXN8OS/ATXN8,chr13:70107213-70139429:+,chr13_70139384_CTG,CTG/CAG,CTG,chr13:70139384-70139428,chr13:70713516-70713560,CTACTACTACTACTACTA,CATTTTTTAAAAATATAT,"untranslated RNA, coding region",AD,603680,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,80,80,15-50 repeats,-,80+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by a CTG/CAG trinucleotide repeat expansion. SCA8 patient experiences horizontal nystagmus (a rapid, involuntary, oscillatory motion of the eyeball), instability and lack of coordination.","{4:2,5:5,6:111,7:32,8:123,9:3757,10:226,11:755,12:5301,13:546,14:1226,15:5791,16:2824,17:1647,18:627,19:566,20:663,21:353,22:120,23:116,24:58,25:29,26:23,27:25,28:17,29:6,30:7,31:4,32:12,33:18,34:11,35:11,36:31,37:18,38:13,39:5,40:4,41:6,42:2,43:6,44:3,45:3,46:3,47:1,51:1,52:6,53:10,54:8,55:13,56:14,57:9,58:5,59:11,60:7,61:7,62:8,63:9,64:5,65:5,66:5,67:5,68:1,69:2,70:3,71:4,72:3,73:1,74:2,75:2,76:2,78:1,80:1,89:1,92:1,100:1}" +SCA10,Spinocerebellar ataxia 10,short tandem repeats,ATXN10,chr22:45671797-45845306:+,chr22_45795355_ATTCT,ATTCT,ATTCT,chr22:45795355-45795424,chr22:46191235-46191304,AAAAGACTACTAGAATGG,TTTTGAGATGAAGTCTCT,intron 9,AD,603516,http://www.omim.org/entry/603516,omim,increase,800,800,10-32 repeats,-,800+ repeats,"Spinocerebellar ataxia type 10 (SCA10) is a progressive, genetic neurodegenerative disease, caused by a ATTCT repeat expansion. The autosomal dominant cerebellar ataxias (ADCAs) are a clinically and genetically heterogeneous group of disorders characterized by ataxia, dysarthria, dysmetria, and intention tremor. ","{7:58,8:11,9:18,10:89,11:457,12:3296,13:6573,14:8078,15:3069,16:1826,17:875,18:488,19:183,20:132,21:47,22:21,23:8,24:2,25:5,31:1,34:2,35:1,36:3,37:5,38:2,39:2,40:1,41:2,42:1,44:3,45:1,46:1,47:1}" +SCA12,Spinocerebellar ataxia 12,short tandem repeats,PPP2R2B,chr5:146589503-147081519:-,chr5_146878729_CTG,CAG,CTG,chr5:146878729-146878758,chr5:146258292-146258321,ACACGCGCGCACTCGCAG,CAGGAGGCTGGAGGCGGC,5' UTR/Promoter,AD,604326,http://www.omim.org/entry/604326,omim,increase,51,51,8-23 repeats,-,51+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by a CTG/CAG trinucleotide repeat expansion. Spinocerebellar ataxia type 12 (SCA12) is a prototypic phenotype was that of a classic spinocerebellar ataxia, and the disease resembled the spinocerebellar ataxias more closely than any other form of neurodegenerative disorder. ","{2:1,4:2,5:2,6:9,7:8,8:76,9:1482,10:13389,11:232,12:40,13:3734,14:2041,15:2703,16:687,17:301,18:337,19:92,20:35,21:19,22:8,23:18,24:12,25:10,26:8,27:5,28:4,31:2,33:2,35:1}" +SCA17,Spinocerebellar ataxia 17,short tandem repeats,TBP,chr6:170554332-170572869:+,chr6_170561908_CAG,CAG,CAG,chr6:170561908-170562021,chr6:170870996-170871109,TTGGAAGAGCAACAAAGG,GCAGTGGCAGCTGCAGCC,coding region,AD,607136,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,43,43,25-40 repeats,-,43+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease. SCA17 patient experiences mental retardation. SCA17 can be caused by heterozygous expansion of a trinucleotide repeat encoding glutamine (CAG or CAA) in the TATA box-binding protein TBP.","{5:1,7:1,11:1,12:2,14:1,15:4,16:2,17:7,18:2,19:5,20:6,21:4,22:1,23:3,24:5,25:11,26:10,27:24,28:26,29:140,30:178,31:79,32:446,33:546,34:730,35:2429,36:4571,37:6039,38:7589,39:1762,40:400,41:128,42:48,43:15,44:26,45:7,46:4,52:1,53:2}" +SCA36,Spinocerebellar ataxia 36,short tandem repeats,NOP56,chr20:2652531-2658392:+,chr20_2652734_GGCCTG,GGCCTG,GGCCTG,chr20:2652734-2652757,chr20:2633380-2633403,CGTTCGGGCCGCAGACAG,CGCCTGCGCCTGCGCCTG,intron 1,AD,614153,http://www.omim.org/entry/614153,omim,increase,650,650,3-14 repeats,-,650+ repeats,"Spinocerebellar ataxia type 36 (SCA36) is a slowly progressive neurodegenerative disorder characterized by adult-onset gait ataxia, eye movement abnormalities, tongue fasciculations, and variable upper motor neuron signs. ","{3:44,4:8914,5:1250,6:833,7:10748,8:809,9:2222,10:227,11:165,12:35,13:8,14:3}" +EIEE1,"Epileptic encephalopathy, early infantile, 1",short tandem repeats,ARX,chrX:25003693-25015947:-,chrX_25013662_CGC,GCG,CGC,chrX:25013662-25013691,chrX:25031779-25031808,GGCCGTGGCGGCCGCTGC,TGCCGCACCCTGAAGGAG,coding region,XLR,308350,http://www.omim.org/entry/308350,omim,increase,20,20,<12 repeats,-,20+ repeats,"Early infantile epileptic encephalopathy (EIEE1) is characterized by frequent tonic seizures or spasms beginning in infancy with a specific EEG finding of suppression-burst patterns, characterized by high-voltage bursts alternating with almost flat suppression phases. ","{3:1,4:55,5:72,6:119,7:146,8:158,9:584,10:18651,11:25,12:18,13:2,14:2,15:1}" +BPES,"Blepharophimosis, epicanthus inversus, and ptosis",short tandem repeats,FOXL2,chr3:138944223-138947139:-,chr3_138946021_NGC,GCN,NGC,chr3:138946021-138946062,chr3:138664863-138664904,GCCAGGGCTACCGGGGCC,CATCTGGCAGGAGGCATA,coding region,AD,110100,http://www.omim.org/entry/110100,omim,increase,19,19,14 repeats,-,19-24 repeats,"Blepharophimosis, epicanthus inversus, and ptosis (BPES) patients show dysplasia of the eyelids. In addition to small palpebral fissures, features include epicanthus inversus (fold curving in the mediolateral direction, inferior to the inner canthus), low nasal bridge, and ptosis of the eyelids.","{4:77,5:1,8:1,9:6,12:1,13:31,14:25129,15:7,16:2,19:1}" +CCD,Cleidocranial dysplasia,short tandem repeats,RUNX2,chr6:45327799-45664031:+,chr6_45422751_GCN,GCN,GCN,chr6:45422751-45422801,chr6:45390488-45390538,CAGCAGCAGCAGCAGGAG,GTGCCCCGGTTGCGGCCG,coding region,AD,119600,http://www.omim.org/entry/119600,omim,increase,27,27,17 repeats,-,27 repeats,"Cleidocranial dysplasia (CCD) patients show persistently open skull sutures with bulging calvaria, hypoplasia or aplasia of the clavicles permitting abnormal facility in apposing the shoulders, wide pubic symphysis, short middle phalanx of the fifth fingers, dental anomalies, and often vertebral malformation.","{6:1,10:4,11:1337,12:5,14:16,16:73,17:23694,18:111,19:2,20:2,21:2,22:2,23:3,24:1,29:1,30:2,36:1,39:1}" +CCHS,Central hypoventilation syndrome,short tandem repeats,PHOX2B,chr4:41744081-41748969:-,chr4_41745972_NGC,GCN,NGC,chr4:41745972-41746031,chr4:41747989-41748048,AGCCGCAGCCAGGCCTCC,GCCGCCCTTGCCGGGTTC,coding region,AD,209880,http://www.omim.org/entry/209880,omim,increase,24,24,20 repeats,-,24 repeats,"Central hypoventilation syndrome (CCHS), also known as 'Ondine's curse', is a rare disorder characterized by abnormal control of respiration in the absence of neuromuscular, lung or cardiac disease, or an identifiable brainstem lesion. ","{6:6,7:41,8:18,9:24,10:25,11:39,12:39,13:156,14:353,15:227,16:85,17:115,18:124,19:210,20:23733,21:9,22:15,23:2,24:1,25:5,26:1,27:4}" +HFG,Hand-foot-uterus syndrome,short tandem repeats,HOXA13,chr7:27193418-27200265:-,chr7_27199679_NGC,GCN,NGC,chr7:27199925-27199966,chr7:27239544-27239585,GCCCCCGCCCCCGGCCCC,CCCTTCCATGTTCTTGTT,exon 1,AD,140000,http://www.omim.org/entry/140000,omim,increase,22,22,14/12/12 repeats,-,22/18/18 repeats,"Hand-foot-uterus (HFG) patients show small feet with unusually short great toes and abnormal thumbs. Females with the disorder have duplication of the genital tract, including longitudinal vaginal septum ","{4:3,5:248,6:17,7:7,8:10,9:17,10:12,11:30,12:35,13:102,14:24620,15:16,16:16,17:5,18:2,20:1,21:1,22:1,23:1}" +HPE5,Holoprosencephaly-5,short tandem repeats,ZIC2,chr13:99981771-99986764:+,chr13_99985449_GCN,GCN,GCN,chr13:99985449-99985493,chr13:100637703-100637747,AGCTCCAACCTGTCCCCA,GTGTCCGCGGTGCACCGG,coding region,AD,609637,http://www.omim.org/entry/609637,omim,increase,25,25,15 repeats,-,25 repeats,Holoprosencephaly (HPE5) is the most common structural anomaly of the human brain and is one of the anomalies seen in patients with deletions and duplications of chromosome 13. ,"{3:1,5:16,6:11,7:3,8:7,9:8,10:55,11:18,12:24,13:14,14:435,15:24561,16:44,17:30,18:11,19:5,20:12,21:1}" +SD5,Syndactyly,short tandem repeats,HOXD13,chr2:176087504-176095937:+,chr2_176093059_GCN,GCN,GCN,chr2:176093059-176093103,chr2:176957787-176957831,GGGACGCATTCGGGGCGG,TCCGGCTTTGCGTACCCC,coding region,AD,186300,http://www.omim.org/entry/186300,omim,increase,22,22,15 repeats,-,22 repeats,Syndactyly (SD5) patients show the characteristic of the presence of an associated metacarpal and metatarsal fusion. ,"{6:8,7:5,8:4,9:1,11:8,12:75,13:2,14:67,15:25047,16:18,17:19,18:5,24:1}" +XLMR,"Mental retardation, X-linked, with isolated growth hormone deficiency",short tandem repeats,SOX3,chrX:140502986-140505059:-,chrX_140504317_NGC,GCN,NGC,chrX:140504317-140504361,chrX:139586482-139586526,CACGCCCACCGGACTGCT,ACCGGGAGGCAGGAGGCC,coding region,XLR,300123,http://www.omim.org/entry/300123,omim,increase,22,22,15 repeats,-,22 repeats,"Mental retardation, X-linked, with hormone deficiencies (XLMR) is caused by defects of SOX3, which encodes a transcription factor that regulates embryonic development and determines cell fate.","{6:3,7:1,8:34,9:37,10:1,11:1,13:3,14:8,15:19874,16:34,17:15,23:1}" +AR,Susceptibility to prostate cancer due to Androgen Receptor expression,short tandem repeats,AR,chrX:67544032-67730619:+,chrX_67545318_CAG,CAG,CAG,chrX:67545318-67545383,chrX:66765160-66765225,GCCAGTTTGCTGCTGCTG,CAAGAGACTAGCCCCAGG,coding region,XLR,176807,http://www.omim.org/entry/176807,omim,decrease,18,8,22 repeats,<=18 repeats,<=8 repeats,"The length of a polymorphic CAG repeat sequence, occurring in the androgen receptor gene, is inversely correlated with transcriptional activity by the androgen receptor. Because heightened androgenic stimulation may increase risk of prostate cancer development and progression, we examined whether shorter CAG repeats in the androgen receptor gene are related to higher risk of prostate cancer. An association existed between fewer androgen receptor gene CAG repeats and higher risk of total prostate cancer [relative risk (RR) = 1.52; 95% confidence interval (CI) = 0.92-2.49; P trend = 0.04; for men with CAG repeat lengths <=18 relative to >=26 repeats]. In particular, a shorter CAG repeat sequence was associated with cancers characterized by extraprostatic extension or distant metastases (stage C or D) or high histologic grade (RR = 2.14; CI = 1.14-4.01; P trend = 0.001). This association was observed individually both for high stage (RR = 2.23) and high grade prostate cancer (RR = 1.89). Men with shorter repeats were at particularly high risk for distant metastatic and fatal prostate cancer. Variability in the CAG repeat length was not associated with low grade or low stage disease. These results demonstrate that a shorter CAG repeat sequence in the androgen receptor gene predicts higher grade and advanced stage of prostate cancer at diagnosis, and metastasis and mortality from the disease.","{1:1,2:3,4:2,5:2,6:1,7:24,8:13,9:8,10:5,11:11,12:30,13:76,14:183,15:159,16:252,17:677,18:1458,19:1964,20:2114,21:3202,22:2191,23:2098,24:2013,25:1465,26:859,27:477,28:281,29:242,30:86,31:53,32:19,33:15,34:7,35:6,36:10,37:3,38:1,40:2,41:1}" +ALS,"Amyotrophic lateral sclerosis, or familial frontotemporal dementia",short tandem repeats,C9orf72,chr9:27546546-27573866:-,chr9_27573529_GGCCCC,GGGGCC,GGCCCC,chr9:27573529-27573546,chr9:27573527-27573544,CCGCCCCGACCACGCCCC,TAGCGCGCGACTCCTGAG,intron 1,AD,105550,http://www.omim.org/entry/105550,omim,increase,31,31,<31 repeats,-,31+ repeats,The hexamer G4C2 repeat expansion in the C9orf72 locus is a major cause of both ALS and frontotemporal dementia. The pathogenic repeat length (>30 repeats) is present in ~10% of all ALS patients including ~40% of familial ALS cases and ~6-8% of sporadic ALS cases in some populations.,"{1:12,2:13703,3:307,4:1005,5:2987,6:1830,7:1194,8:2548,9:407,10:662,11:285,12:181,13:70,14:36,15:12,16:9,17:5,18:1,19:2}" \ No newline at end of file diff --git a/jcvi/utils/data/__init__.py b/jcvi/utils/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/utils/data/adapters.fasta b/jcvi/utils/data/adapters.fasta new file mode 100644 index 00000000..bf72c1e0 --- /dev/null +++ b/jcvi/utils/data/adapters.fasta @@ -0,0 +1,38 @@ +>PrefixNX/1 +AGATGTGTATAAGAGACAG +>Trans1 +TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG +>Trans1_rc +CTGTCTCTTATACACATCTGACGCTGCCGACGA +>Trans2 +GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG +>Trans2_rc +CTGTCTCTTATACACATCTCCGAGCCCACGAGAC +>PrefixPE/1 +AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +>PrefixPE/2 +CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATC +T +>PCR_Primer1_rc +AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT +>PCR_Primer2_rc +AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTT +G +>FlowCell1 +TTTTTTTTTTAATGATACGGCGACCACCGAGATCTACAC +>FlowCell2 +TTTTTTTTTTCAAGCAGAAGACGGCATACGA +>TruSeq2_SE +AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG +>TruSeq2_PE_f +AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT +>TruSeq2_PE_r +AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG +>PE1 +TACACTCTTTCCCTACACGACGCTCTTCCGATCT +>PE1_rc +AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA +>PE2 +GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT +>PE2_rc +AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC diff --git a/jcvi/utils/data/blosum80.mat b/jcvi/utils/data/blosum80.mat new file mode 100644 index 00000000..e6008c23 --- /dev/null +++ b/jcvi/utils/data/blosum80.mat @@ -0,0 +1,40 @@ +# Blosum80 +# Matrix made by matblas from blosum80.iij +# * column uses minimum score +# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units +# Blocks Database = /data/blocks_5.0/blocks.dat +# Cluster Percentage: >= 80 +# Entropy = 0.9868, Expected = -0.7442 +GAP-PENALTIES=12 6 6 + A R N D C Q E G H I L K M F P S T W Y V B Z X ? a g t c u ] n +A 7 -3 -3 -3 -1 -2 -2 0 -3 -3 -3 -1 -2 -4 -1 2 0 -5 -4 -1 -3 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9 +R -3 9 -1 -3 -6 1 -1 -4 0 -5 -4 3 -3 -5 -3 -2 -2 -5 -4 -4 -2 0 -2 -9 -9 -9 -9 -9 -9 -9 -9 +N -3 -1 9 2 -5 0 -1 -1 1 -6 -6 0 -4 -6 -4 1 0 -7 -4 -5 5 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9 +D -3 -3 2 10 -7 -1 2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6 6 1 -3 -9 -9 -9 -9 -9 -9 -9 -9 +C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -9 -9 -9 -9 -9 -9 -9 -9 +Q -2 1 0 -1 -5 9 3 -4 1 -5 -4 2 -1 -5 -3 -1 -1 -4 -3 -4 -1 5 -2 -9 -9 -9 -9 -9 -9 -9 -9 +E -2 -1 -1 2 -7 3 8 -4 0 -6 -6 1 -4 -6 -2 -1 -2 -6 -5 -4 1 6 -2 -9 -9 -9 -9 -9 -9 -9 -9 +G 0 -4 -1 -3 -6 -4 -4 9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9 +H -3 0 1 -2 -7 1 0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4 3 -5 -1 0 -2 -9 -9 -9 -9 -9 -9 -9 -9 +I -3 -5 -6 -7 -2 -5 -6 -7 -6 7 2 -5 2 -1 -5 -4 -2 -5 -3 4 -6 -6 -2 -9 -9 -9 -9 -9 -9 -9 -9 +L -3 -4 -6 -7 -3 -4 -6 -7 -5 2 6 -4 3 0 -5 -4 -3 -4 -2 1 -7 -5 -2 -9 -9 -9 -9 -9 -9 -9 -9 +K -1 3 0 -2 -6 2 1 -3 -1 -5 -4 8 -3 -5 -2 -1 -1 -6 -4 -4 -1 1 -2 -9 -9 -9 -9 -9 -9 -9 -9 +M -2 -3 -4 -6 -3 -1 -4 -5 -4 2 3 -3 9 0 -4 -3 -1 -3 -3 1 -5 -3 -2 -9 -9 -9 -9 -9 -9 -9 -9 +F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1 0 -5 0 10 -6 -4 -4 0 4 -2 -6 -6 -3 -9 -9 -9 -9 -9 -9 -9 -9 +P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -9 -9 -9 -9 -9 -9 -9 -9 +S 2 -2 1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2 7 2 -6 -3 -3 0 -1 -1 -9 -9 -9 -9 -9 -9 -9 -9 +T 0 -2 0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3 2 8 -5 -3 0 -1 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9 +W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3 0 -7 -6 -5 16 3 -5 -8 -5 -5 -9 -9 -9 -9 -9 -9 -9 -9 +Y -4 -4 -4 -6 -5 -3 -5 -6 3 -3 -2 -4 -3 4 -6 -3 -3 3 11 -3 -5 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9 +V -1 -4 -5 -6 -2 -4 -4 -6 -5 4 1 -4 1 -2 -4 -3 0 -5 -3 7 -6 -4 -2 -9 -9 -9 -9 -9 -9 -9 -9 +B -3 -2 5 6 -6 -1 1 -2 -1 -6 -7 -1 -5 -6 -4 0 -1 -8 -5 -6 6 0 -3 -9 -9 -9 -9 -9 -9 -9 -9 +Z -2 0 -1 1 -7 5 6 -4 0 -6 -5 1 -3 -6 -2 -1 -2 -5 -4 -4 0 6 -1 -9 -9 -9 -9 -9 -9 -9 -9 +X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9 +? -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 +a -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 4 -2 -2 -2 -2 -9 0 +g -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 4 -2 -2 -2 -9 0 +t -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 4 -2 4 -9 0 +c -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 -2 4 -2 -9 0 +u -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 4 -2 4 -9 0 +] -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 +n -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 0 0 0 0 0 -9 0 diff --git a/jcvi/utils/data/chrY.hg38.unique_ccn.gc b/jcvi/utils/data/chrY.hg38.unique_ccn.gc new file mode 100644 index 00000000..6f771a4e --- /dev/null +++ b/jcvi/utils/data/chrY.hg38.unique_ccn.gc @@ -0,0 +1,300 @@ +chrY 2784557 2791188 39 +chrY 2801260 2807624 38 +chrY 3001290 3010033 46 +chrY 4208387 4214156 44 +chrY 6532336 6537578 32 +chrY 6556575 6562243 34 +chrY 6749492 6759172 41 +chrY 6780750 6788302 40 +chrY 6807376 6813277 37 +chrY 6824953 6830628 37 +chrY 6840559 6848076 40 +chrY 6858179 6866161 39 +chrY 6890149 6895762 44 +chrY 6948842 6956239 38 +chrY 6958596 6965916 41 +chrY 7021311 7026777 40 +chrY 7129616 7136420 41 +chrY 7168704 7184007 43 +chrY 7192517 7204933 43 +chrY 7475993 7481505 41 +chrY 7499729 7505044 46 +chrY 7550843 7557901 44 +chrY 7654904 7661272 48 +chrY 7706983 7712325 42 +chrY 7719870 7726971 45 +chrY 7760061 7767544 40 +chrY 7828110 7844640 42 +chrY 7884997 7892945 40 +chrY 7894240 7899526 35 +chrY 7909385 7914546 39 +chrY 7952588 7961745 40 +chrY 7985485 7999389 39 +chrY 8096558 8111236 35 +chrY 8121937 8127095 42 +chrY 8127642 8133073 37 +chrY 8195288 8200644 38 +chrY 8202366 8213685 42 +chrY 8233698 8238943 31 +chrY 8250681 8255940 51 +chrY 8263716 8269483 35 +chrY 8398026 8404720 35 +chrY 8475928 8482558 38 +chrY 8535415 8540967 46 +chrY 8645758 8653464 43 +chrY 8712894 8718966 44 +chrY 8803075 8812356 44 +chrY 8863076 8870962 45 +chrY 8943669 8952269 36 +chrY 8978916 8990553 44 +chrY 8995434 9001730 42 +chrY 9217975 9225816 35 +chrY 9238021 9245857 40 +chrY 9272802 9278977 43 +chrY 9280714 9289444 44 +chrY 9571046 9576635 41 +chrY 9920843 9926256 42 +chrY 9971808 9980413 41 +chrY 10042785 10053869 41 +chrY 10055746 10062704 39 +chrY 10093144 10098264 38 +chrY 10108505 10114835 38 +chrY 10158930 10166993 38 +chrY 11071908 11077229 49 +chrY 11490469 11500385 37 +chrY 11647705 11652919 37 +chrY 11653098 11660229 38 +chrY 11664384 11669803 37 +chrY 11674119 11721364 38 +chrY 11863344 11869166 42 +chrY 11888418 11900677 44 +chrY 11967783 11975761 44 +chrY 11998168 12004009 40 +chrY 12006921 12013128 39 +chrY 12056971 12063288 40 +chrY 12066290 12073346 40 +chrY 12148758 12153839 40 +chrY 12155881 12162156 44 +chrY 12166502 12172555 37 +chrY 12180152 12185912 41 +chrY 12212568 12217982 47 +chrY 12259952 12266900 43 +chrY 12321134 12328048 44 +chrY 12353997 12361755 48 +chrY 12471583 12477750 41 +chrY 12556061 12566816 43 +chrY 12567189 12574918 41 +chrY 12574969 12581385 37 +chrY 12593138 12599159 45 +chrY 12599753 12608733 45 +chrY 12623423 12630790 39 +chrY 12920390 12926991 44 +chrY 12993164 12999140 34 +chrY 13004137 13009294 41 +chrY 13039768 13045527 36 +chrY 13051834 13056895 40 +chrY 13115138 13120449 43 +chrY 13167104 13175874 40 +chrY 13185822 13191485 40 +chrY 13252351 13260053 41 +chrY 13263004 13269401 39 +chrY 13280964 13289829 39 +chrY 13328822 13337861 38 +chrY 13338803 13347288 46 +chrY 13424970 13430518 35 +chrY 13437155 13442971 41 +chrY 13484161 13492046 39 +chrY 13514966 13525010 42 +chrY 13529624 13540690 41 +chrY 13565815 13571309 37 +chrY 13573461 13578852 36 +chrY 13590814 13599779 41 +chrY 13600364 13621396 40 +chrY 13627105 13638297 44 +chrY 13644651 13652259 40 +chrY 13657736 13666461 43 +chrY 13670624 13679488 38 +chrY 13806885 13812979 40 +chrY 13887283 13897986 48 +chrY 13971831 13979428 40 +chrY 14082459 14087728 39 +chrY 14098242 14105696 40 +chrY 14157891 14165038 43 +chrY 14205230 14211091 40 +chrY 14233882 14245283 37 +chrY 14307467 14316895 46 +chrY 14339301 14346615 42 +chrY 14347799 14362566 43 +chrY 14374247 14382286 41 +chrY 14461230 14469256 38 +chrY 14504544 14513770 49 +chrY 14560376 14567068 39 +chrY 14572407 14578086 41 +chrY 14590607 14597155 46 +chrY 14762319 14775326 37 +chrY 14813142 14824402 42 +chrY 14862780 14876511 38 +chrY 14885221 14891091 39 +chrY 14984297 14992792 43 +chrY 15007186 15015808 39 +chrY 15035858 15044755 36 +chrY 15059569 15066537 36 +chrY 15098478 15103894 43 +chrY 15105936 15111341 40 +chrY 15113514 15120079 37 +chrY 15133822 15139556 39 +chrY 15254586 15270417 47 +chrY 15314346 15324591 40 +chrY 15336790 15342225 38 +chrY 15368318 15374523 40 +chrY 15384959 15394356 44 +chrY 15394940 15403276 42 +chrY 15500660 15508764 38 +chrY 15509090 15518502 43 +chrY 15607733 15621522 40 +chrY 15626346 15632174 40 +chrY 15662806 15669452 37 +chrY 15682681 15690135 36 +chrY 15731661 15736805 35 +chrY 15741948 15749918 41 +chrY 15815883 15824613 40 +chrY 15849529 15859619 38 +chrY 15941092 15948139 39 +chrY 15964963 15970803 38 +chrY 15973607 15983695 39 +chrY 16019020 16025198 41 +chrY 16057227 16062612 33 +chrY 16068290 16083002 42 +chrY 16098947 16105913 39 +chrY 16128439 16134046 36 +chrY 16292454 16302067 37 +chrY 16694618 16703847 37 +chrY 16746896 16753868 39 +chrY 16760739 16770576 39 +chrY 16832687 16839381 43 +chrY 16874803 16880861 37 +chrY 16948322 16961811 41 +chrY 17002365 17009878 40 +chrY 17042309 17048817 47 +chrY 17129821 17138633 43 +chrY 17138980 17146209 42 +chrY 17166159 17171544 40 +chrY 17183940 17195160 47 +chrY 17212703 17219558 41 +chrY 17235070 17241735 38 +chrY 17285494 17291488 34 +chrY 17301431 17306508 41 +chrY 17340923 17347005 45 +chrY 17347434 17356449 42 +chrY 17437924 17445856 38 +chrY 18664679 18675178 42 +chrY 18882369 18888650 33 +chrY 18943921 18950373 43 +chrY 18995361 19004635 41 +chrY 19039184 19044596 47 +chrY 19101931 19108189 42 +chrY 19111372 19120891 41 +chrY 19126528 19140151 41 +chrY 19145280 19151619 40 +chrY 19168180 19183890 40 +chrY 19185299 19191204 40 +chrY 19230146 19235234 44 +chrY 19244362 19249681 38 +chrY 19253056 19258881 41 +chrY 19258931 19273153 39 +chrY 19277079 19283223 32 +chrY 19289712 19300316 40 +chrY 19307035 19314959 38 +chrY 19321316 19328613 38 +chrY 19359504 19366637 38 +chrY 19378363 19385822 37 +chrY 19393798 19400012 33 +chrY 19401507 19412484 41 +chrY 19418100 19430919 44 +chrY 19431245 19441997 39 +chrY 19442560 19453698 40 +chrY 19468938 19476722 42 +chrY 19505168 19512242 40 +chrY 19514508 19520050 41 +chrY 19520649 19525914 36 +chrY 19568898 19575669 35 +chrY 19577857 19582872 39 +chrY 19610784 19617219 39 +chrY 19617552 19624111 39 +chrY 19624483 19632100 37 +chrY 19632154 19638440 37 +chrY 19640429 19647772 38 +chrY 19652361 19666665 37 +chrY 19685100 19690709 40 +chrY 19706127 19711247 48 +chrY 19743571 19751093 36 +chrY 19792080 19809048 35 +chrY 19827253 19834589 37 +chrY 19867154 19878600 36 +chrY 19887017 19893244 43 +chrY 19893835 19905779 34 +chrY 19906368 19915050 40 +chrY 19915636 19926525 43 +chrY 19945933 19951015 33 +chrY 19953983 19959945 43 +chrY 19960558 19966518 38 +chrY 19967063 19978729 34 +chrY 19990375 19995406 41 +chrY 20006275 20012407 44 +chrY 20012734 20019861 37 +chrY 20047396 20053206 34 +chrY 20299012 20306346 43 +chrY 20389947 20396652 31 +chrY 20400867 20407806 39 +chrY 20443875 20449892 39 +chrY 20459613 20465100 41 +chrY 20476363 20483543 40 +chrY 20483894 20495078 34 +chrY 20513862 20519704 38 +chrY 20541890 20550721 39 +chrY 20562467 20570492 35 +chrY 20595959 20607549 34 +chrY 20615642 20628183 37 +chrY 20628911 20638868 35 +chrY 20642520 20650592 45 +chrY 20658635 20663699 42 +chrY 20672133 20680570 38 +chrY 20682288 20687454 36 +chrY 20693591 20702020 43 +chrY 20703710 20712554 33 +chrY 20742660 20747871 46 +chrY 20763657 20768831 43 +chrY 20793884 20799934 43 +chrY 20800596 20805931 40 +chrY 20815851 20825197 40 +chrY 20841153 20856303 38 +chrY 20856442 20863199 40 +chrY 20891499 20901661 41 +chrY 20920061 20926313 40 +chrY 20926855 20935649 37 +chrY 20937111 20944700 39 +chrY 20963842 20978947 38 +chrY 20979543 20986990 38 +chrY 20989470 20995932 42 +chrY 21021445 21030068 43 +chrY 21035642 21045925 44 +chrY 21086840 21092451 41 +chrY 21123674 21131429 39 +chrY 21142994 21149100 34 +chrY 21159742 21165259 40 +chrY 21173822 21181450 43 +chrY 21189214 21195774 37 +chrY 21203550 21210182 36 +chrY 21235593 21249194 41 +chrY 21281787 21290096 38 +chrY 21293897 21305309 40 +chrY 21447788 21455746 43 +chrY 21571245 21578854 44 +chrY 21623157 21629316 38 +chrY 21844530 21850799 43 +chrY 21851973 21858034 44 +chrY 22229489 22234807 38 +chrY 22247024 22264166 42 +chrY 22292662 22298501 47 +chrY 22346580 22352845 32 +chrY 26623418 26628476 45 diff --git a/jcvi/utils/data/colorchecker.txt b/jcvi/utils/data/colorchecker.txt new file mode 100644 index 00000000..ad8999d1 --- /dev/null +++ b/jcvi/utils/data/colorchecker.txt @@ -0,0 +1,4 @@ +115,82,68 194,150,130 98,122,157 87,108,67 133,128,177 103,189,170 +214,126,44 80,91,166 193,90,99 94,60,108 157,188,64 224,163,46 +56,61,150 70,148,73 175,54,60 231,199,31 187,86,149 8,133,161 +243,243,242 200,200,200 160,160,160 122,122,121 85,85,85 52,52,52 diff --git a/jcvi/utils/data/hg38.band.txt b/jcvi/utils/data/hg38.band.txt new file mode 100644 index 00000000..1f52ac81 --- /dev/null +++ b/jcvi/utils/data/hg38.band.txt @@ -0,0 +1,1294 @@ +#chrom chromStart chromEnd name gieStain +chr1 0 2300000 p36.33 gneg +chr1 2300000 5300000 p36.32 gpos25 +chr1 5300000 7100000 p36.31 gneg +chr1 7100000 9100000 p36.23 gpos25 +chr1 9100000 12500000 p36.22 gneg +chr1 12500000 15900000 p36.21 gpos50 +chr1 15900000 20100000 p36.13 gneg +chr1 20100000 23600000 p36.12 gpos25 +chr1 23600000 27600000 p36.11 gneg +chr1 27600000 29900000 p35.3 gpos25 +chr1 29900000 32300000 p35.2 gneg +chr1 32300000 34300000 p35.1 gpos25 +chr1 34300000 39600000 p34.3 gneg +chr1 39600000 43700000 p34.2 gpos25 +chr1 43700000 46300000 p34.1 gneg +chr1 46300000 50200000 p33 gpos75 +chr1 50200000 55600000 p32.3 gneg +chr1 55600000 58500000 p32.2 gpos50 +chr1 58500000 60800000 p32.1 gneg +chr1 60800000 68500000 p31.3 gpos50 +chr1 68500000 69300000 p31.2 gneg +chr1 69300000 84400000 p31.1 gpos100 +chr1 84400000 87900000 p22.3 gneg +chr1 87900000 91500000 p22.2 gpos75 +chr1 91500000 94300000 p22.1 gneg +chr1 94300000 99300000 p21.3 gpos75 +chr1 99300000 101800000 p21.2 gneg +chr1 101800000 106700000 p21.1 gpos100 +chr1 106700000 111200000 p13.3 gneg +chr1 111200000 115500000 p13.2 gpos50 +chr1 115500000 117200000 p13.1 gneg +chr1 117200000 120400000 p12 gpos50 +chr1 120400000 121700000 p11.2 gneg +chr1 121700000 123400000 p11.1 acen +chr1 123400000 125100000 q11 acen +chr1 125100000 143200000 q12 gvar +chr1 143200000 147500000 q21.1 gneg +chr1 147500000 150600000 q21.2 gpos50 +chr1 150600000 155100000 q21.3 gneg +chr1 155100000 156600000 q22 gpos50 +chr1 156600000 159100000 q23.1 gneg +chr1 159100000 160500000 q23.2 gpos50 +chr1 160500000 165500000 q23.3 gneg +chr1 165500000 167200000 q24.1 gpos50 +chr1 167200000 170900000 q24.2 gneg +chr1 170900000 173000000 q24.3 gpos75 +chr1 173000000 176100000 q25.1 gneg +chr1 176100000 180300000 q25.2 gpos50 +chr1 180300000 185800000 q25.3 gneg +chr1 185800000 190800000 q31.1 gpos100 +chr1 190800000 193800000 q31.2 gneg +chr1 193800000 198700000 q31.3 gpos100 +chr1 198700000 207100000 q32.1 gneg +chr1 207100000 211300000 q32.2 gpos25 +chr1 211300000 214400000 q32.3 gneg +chr1 214400000 223900000 q41 gpos100 +chr1 223900000 224400000 q42.11 gneg +chr1 224400000 226800000 q42.12 gpos25 +chr1 226800000 230500000 q42.13 gneg +chr1 230500000 234600000 q42.2 gpos50 +chr1 234600000 236400000 q42.3 gneg +chr1 236400000 243500000 q43 gpos75 +chr1 243500000 248956422 q44 gneg +chr2 0 4400000 p25.3 gneg +chr2 4400000 6900000 p25.2 gpos50 +chr2 6900000 12000000 p25.1 gneg +chr2 12000000 16500000 p24.3 gpos75 +chr2 16500000 19000000 p24.2 gneg +chr2 19000000 23800000 p24.1 gpos75 +chr2 23800000 27700000 p23.3 gneg +chr2 27700000 29800000 p23.2 gpos25 +chr2 29800000 31800000 p23.1 gneg +chr2 31800000 36300000 p22.3 gpos75 +chr2 36300000 38300000 p22.2 gneg +chr2 38300000 41500000 p22.1 gpos50 +chr2 41500000 47500000 p21 gneg +chr2 47500000 52600000 p16.3 gpos100 +chr2 52600000 54700000 p16.2 gneg +chr2 54700000 61000000 p16.1 gpos100 +chr2 61000000 63900000 p15 gneg +chr2 63900000 68400000 p14 gpos50 +chr2 68400000 71300000 p13.3 gneg +chr2 71300000 73300000 p13.2 gpos50 +chr2 73300000 74800000 p13.1 gneg +chr2 74800000 83100000 p12 gpos100 +chr2 83100000 91800000 p11.2 gneg +chr2 91800000 93900000 p11.1 acen +chr2 93900000 96000000 q11.1 acen +chr2 96000000 102100000 q11.2 gneg +chr2 102100000 105300000 q12.1 gpos50 +chr2 105300000 106700000 q12.2 gneg +chr2 106700000 108700000 q12.3 gpos25 +chr2 108700000 112200000 q13 gneg +chr2 112200000 118100000 q14.1 gpos50 +chr2 118100000 121600000 q14.2 gneg +chr2 121600000 129100000 q14.3 gpos50 +chr2 129100000 131700000 q21.1 gneg +chr2 131700000 134300000 q21.2 gpos25 +chr2 134300000 136100000 q21.3 gneg +chr2 136100000 141500000 q22.1 gpos100 +chr2 141500000 143400000 q22.2 gneg +chr2 143400000 147900000 q22.3 gpos100 +chr2 147900000 149000000 q23.1 gneg +chr2 149000000 149600000 q23.2 gpos25 +chr2 149600000 154000000 q23.3 gneg +chr2 154000000 158900000 q24.1 gpos75 +chr2 158900000 162900000 q24.2 gneg +chr2 162900000 168900000 q24.3 gpos75 +chr2 168900000 177100000 q31.1 gneg +chr2 177100000 179700000 q31.2 gpos50 +chr2 179700000 182100000 q31.3 gneg +chr2 182100000 188500000 q32.1 gpos75 +chr2 188500000 191100000 q32.2 gneg +chr2 191100000 196600000 q32.3 gpos75 +chr2 196600000 202500000 q33.1 gneg +chr2 202500000 204100000 q33.2 gpos50 +chr2 204100000 208200000 q33.3 gneg +chr2 208200000 214500000 q34 gpos100 +chr2 214500000 220700000 q35 gneg +chr2 220700000 224300000 q36.1 gpos75 +chr2 224300000 225200000 q36.2 gneg +chr2 225200000 230100000 q36.3 gpos100 +chr2 230100000 234700000 q37.1 gneg +chr2 234700000 236400000 q37.2 gpos50 +chr2 236400000 242193529 q37.3 gneg +chr3 0 2800000 p26.3 gpos50 +chr3 2800000 4000000 p26.2 gneg +chr3 4000000 8100000 p26.1 gpos50 +chr3 8100000 11600000 p25.3 gneg +chr3 11600000 13200000 p25.2 gpos25 +chr3 13200000 16300000 p25.1 gneg +chr3 16300000 23800000 p24.3 gpos100 +chr3 23800000 26300000 p24.2 gneg +chr3 26300000 30800000 p24.1 gpos75 +chr3 30800000 32000000 p23 gneg +chr3 32000000 36400000 p22.3 gpos50 +chr3 36400000 39300000 p22.2 gneg +chr3 39300000 43600000 p22.1 gpos75 +chr3 43600000 44100000 p21.33 gneg +chr3 44100000 44200000 p21.32 gpos50 +chr3 44200000 50600000 p21.31 gneg +chr3 50600000 52300000 p21.2 gpos25 +chr3 52300000 54400000 p21.1 gneg +chr3 54400000 58600000 p14.3 gpos50 +chr3 58600000 63800000 p14.2 gneg +chr3 63800000 69700000 p14.1 gpos50 +chr3 69700000 74100000 p13 gneg +chr3 74100000 79800000 p12.3 gpos75 +chr3 79800000 83500000 p12.2 gneg +chr3 83500000 87100000 p12.1 gpos75 +chr3 87100000 87800000 p11.2 gneg +chr3 87800000 90900000 p11.1 acen +chr3 90900000 94000000 q11.1 acen +chr3 94000000 98600000 q11.2 gvar +chr3 98600000 100300000 q12.1 gneg +chr3 100300000 101200000 q12.2 gpos25 +chr3 101200000 103100000 q12.3 gneg +chr3 103100000 106500000 q13.11 gpos75 +chr3 106500000 108200000 q13.12 gneg +chr3 108200000 111600000 q13.13 gpos50 +chr3 111600000 113700000 q13.2 gneg +chr3 113700000 117600000 q13.31 gpos75 +chr3 117600000 119300000 q13.32 gneg +chr3 119300000 122200000 q13.33 gpos75 +chr3 122200000 124100000 q21.1 gneg +chr3 124100000 126100000 q21.2 gpos25 +chr3 126100000 129500000 q21.3 gneg +chr3 129500000 134000000 q22.1 gpos25 +chr3 134000000 136000000 q22.2 gneg +chr3 136000000 139000000 q22.3 gpos25 +chr3 139000000 143100000 q23 gneg +chr3 143100000 149200000 q24 gpos100 +chr3 149200000 152300000 q25.1 gneg +chr3 152300000 155300000 q25.2 gpos50 +chr3 155300000 157300000 q25.31 gneg +chr3 157300000 159300000 q25.32 gpos50 +chr3 159300000 161000000 q25.33 gneg +chr3 161000000 167900000 q26.1 gpos100 +chr3 167900000 171200000 q26.2 gneg +chr3 171200000 176000000 q26.31 gpos75 +chr3 176000000 179300000 q26.32 gneg +chr3 179300000 183000000 q26.33 gpos75 +chr3 183000000 184800000 q27.1 gneg +chr3 184800000 186300000 q27.2 gpos25 +chr3 186300000 188200000 q27.3 gneg +chr3 188200000 192600000 q28 gpos75 +chr3 192600000 198295559 q29 gneg +chr4 0 4500000 p16.3 gneg +chr4 4500000 6000000 p16.2 gpos25 +chr4 6000000 11300000 p16.1 gneg +chr4 11300000 15000000 p15.33 gpos50 +chr4 15000000 17700000 p15.32 gneg +chr4 17700000 21300000 p15.31 gpos75 +chr4 21300000 27700000 p15.2 gneg +chr4 27700000 35800000 p15.1 gpos100 +chr4 35800000 41200000 p14 gneg +chr4 41200000 44600000 p13 gpos50 +chr4 44600000 48200000 p12 gneg +chr4 48200000 50000000 p11 acen +chr4 50000000 51800000 q11 acen +chr4 51800000 58500000 q12 gneg +chr4 58500000 65500000 q13.1 gpos100 +chr4 65500000 69400000 q13.2 gneg +chr4 69400000 75300000 q13.3 gpos75 +chr4 75300000 78000000 q21.1 gneg +chr4 78000000 81500000 q21.21 gpos50 +chr4 81500000 83200000 q21.22 gneg +chr4 83200000 86000000 q21.23 gpos25 +chr4 86000000 87100000 q21.3 gneg +chr4 87100000 92800000 q22.1 gpos75 +chr4 92800000 94200000 q22.2 gneg +chr4 94200000 97900000 q22.3 gpos75 +chr4 97900000 100100000 q23 gneg +chr4 100100000 106700000 q24 gpos50 +chr4 106700000 113200000 q25 gneg +chr4 113200000 119900000 q26 gpos75 +chr4 119900000 122800000 q27 gneg +chr4 122800000 127900000 q28.1 gpos50 +chr4 127900000 130100000 q28.2 gneg +chr4 130100000 138500000 q28.3 gpos100 +chr4 138500000 140600000 q31.1 gneg +chr4 140600000 145900000 q31.21 gpos25 +chr4 145900000 147500000 q31.22 gneg +chr4 147500000 150200000 q31.23 gpos25 +chr4 150200000 154600000 q31.3 gneg +chr4 154600000 160800000 q32.1 gpos100 +chr4 160800000 163600000 q32.2 gneg +chr4 163600000 169200000 q32.3 gpos100 +chr4 169200000 171000000 q33 gneg +chr4 171000000 175400000 q34.1 gpos75 +chr4 175400000 176600000 q34.2 gneg +chr4 176600000 182300000 q34.3 gpos100 +chr4 182300000 186200000 q35.1 gneg +chr4 186200000 190214555 q35.2 gpos25 +chr5 0 4400000 p15.33 gneg +chr5 4400000 6300000 p15.32 gpos25 +chr5 6300000 9900000 p15.31 gneg +chr5 9900000 15000000 p15.2 gpos50 +chr5 15000000 18400000 p15.1 gneg +chr5 18400000 23300000 p14.3 gpos100 +chr5 23300000 24600000 p14.2 gneg +chr5 24600000 28900000 p14.1 gpos100 +chr5 28900000 33800000 p13.3 gneg +chr5 33800000 38400000 p13.2 gpos25 +chr5 38400000 42500000 p13.1 gneg +chr5 42500000 46100000 p12 gpos50 +chr5 46100000 48800000 p11 acen +chr5 48800000 51400000 q11.1 acen +chr5 51400000 59600000 q11.2 gneg +chr5 59600000 63600000 q12.1 gpos75 +chr5 63600000 63900000 q12.2 gneg +chr5 63900000 67400000 q12.3 gpos75 +chr5 67400000 69100000 q13.1 gneg +chr5 69100000 74000000 q13.2 gpos50 +chr5 74000000 77600000 q13.3 gneg +chr5 77600000 82100000 q14.1 gpos50 +chr5 82100000 83500000 q14.2 gneg +chr5 83500000 93000000 q14.3 gpos100 +chr5 93000000 98900000 q15 gneg +chr5 98900000 103400000 q21.1 gpos100 +chr5 103400000 105100000 q21.2 gneg +chr5 105100000 110200000 q21.3 gpos100 +chr5 110200000 112200000 q22.1 gneg +chr5 112200000 113800000 q22.2 gpos50 +chr5 113800000 115900000 q22.3 gneg +chr5 115900000 122100000 q23.1 gpos100 +chr5 122100000 127900000 q23.2 gneg +chr5 127900000 131200000 q23.3 gpos100 +chr5 131200000 136900000 q31.1 gneg +chr5 136900000 140100000 q31.2 gpos25 +chr5 140100000 145100000 q31.3 gneg +chr5 145100000 150400000 q32 gpos75 +chr5 150400000 153300000 q33.1 gneg +chr5 153300000 156300000 q33.2 gpos50 +chr5 156300000 160500000 q33.3 gneg +chr5 160500000 169000000 q34 gpos100 +chr5 169000000 173300000 q35.1 gneg +chr5 173300000 177100000 q35.2 gpos25 +chr5 177100000 181538259 q35.3 gneg +chr6 0 2300000 p25.3 gneg +chr6 2300000 4200000 p25.2 gpos25 +chr6 4200000 7100000 p25.1 gneg +chr6 7100000 10600000 p24.3 gpos50 +chr6 10600000 11600000 p24.2 gneg +chr6 11600000 13400000 p24.1 gpos25 +chr6 13400000 15200000 p23 gneg +chr6 15200000 25200000 p22.3 gpos75 +chr6 25200000 27100000 p22.2 gneg +chr6 27100000 30500000 p22.1 gpos50 +chr6 30500000 32100000 p21.33 gneg +chr6 32100000 33500000 p21.32 gpos25 +chr6 33500000 36600000 p21.31 gneg +chr6 36600000 40500000 p21.2 gpos25 +chr6 40500000 46200000 p21.1 gneg +chr6 46200000 51800000 p12.3 gpos100 +chr6 51800000 53000000 p12.2 gneg +chr6 53000000 57200000 p12.1 gpos100 +chr6 57200000 58500000 p11.2 gneg +chr6 58500000 59800000 p11.1 acen +chr6 59800000 62600000 q11.1 acen +chr6 62600000 62700000 q11.2 gneg +chr6 62700000 69200000 q12 gpos100 +chr6 69200000 75200000 q13 gneg +chr6 75200000 83200000 q14.1 gpos50 +chr6 83200000 84200000 q14.2 gneg +chr6 84200000 87300000 q14.3 gpos50 +chr6 87300000 92500000 q15 gneg +chr6 92500000 98900000 q16.1 gpos100 +chr6 98900000 100000000 q16.2 gneg +chr6 100000000 105000000 q16.3 gpos100 +chr6 105000000 114200000 q21 gneg +chr6 114200000 117900000 q22.1 gpos75 +chr6 117900000 118100000 q22.2 gneg +chr6 118100000 125800000 q22.31 gpos100 +chr6 125800000 126800000 q22.32 gneg +chr6 126800000 130000000 q22.33 gpos75 +chr6 130000000 130900000 q23.1 gneg +chr6 130900000 134700000 q23.2 gpos50 +chr6 134700000 138300000 q23.3 gneg +chr6 138300000 142200000 q24.1 gpos75 +chr6 142200000 145100000 q24.2 gneg +chr6 145100000 148500000 q24.3 gpos75 +chr6 148500000 152100000 q25.1 gneg +chr6 152100000 155200000 q25.2 gpos50 +chr6 155200000 160600000 q25.3 gneg +chr6 160600000 164100000 q26 gpos50 +chr6 164100000 170805979 q27 gneg +chr7 0 2800000 p22.3 gneg +chr7 2800000 4500000 p22.2 gpos25 +chr7 4500000 7200000 p22.1 gneg +chr7 7200000 13700000 p21.3 gpos100 +chr7 13700000 16500000 p21.2 gneg +chr7 16500000 20900000 p21.1 gpos100 +chr7 20900000 25500000 p15.3 gneg +chr7 25500000 27900000 p15.2 gpos50 +chr7 27900000 28800000 p15.1 gneg +chr7 28800000 34900000 p14.3 gpos75 +chr7 34900000 37100000 p14.2 gneg +chr7 37100000 43300000 p14.1 gpos75 +chr7 43300000 45400000 p13 gneg +chr7 45400000 49000000 p12.3 gpos75 +chr7 49000000 50500000 p12.2 gneg +chr7 50500000 53900000 p12.1 gpos75 +chr7 53900000 58100000 p11.2 gneg +chr7 58100000 60100000 p11.1 acen +chr7 60100000 62100000 q11.1 acen +chr7 62100000 67500000 q11.21 gneg +chr7 67500000 72700000 q11.22 gpos50 +chr7 72700000 77900000 q11.23 gneg +chr7 77900000 86700000 q21.11 gpos100 +chr7 86700000 88500000 q21.12 gneg +chr7 88500000 91500000 q21.13 gpos75 +chr7 91500000 93300000 q21.2 gneg +chr7 93300000 98400000 q21.3 gpos75 +chr7 98400000 104200000 q22.1 gneg +chr7 104200000 104900000 q22.2 gpos50 +chr7 104900000 107800000 q22.3 gneg +chr7 107800000 115000000 q31.1 gpos75 +chr7 115000000 117700000 q31.2 gneg +chr7 117700000 121400000 q31.31 gpos75 +chr7 121400000 124100000 q31.32 gneg +chr7 124100000 127500000 q31.33 gpos75 +chr7 127500000 129600000 q32.1 gneg +chr7 129600000 130800000 q32.2 gpos25 +chr7 130800000 132900000 q32.3 gneg +chr7 132900000 138500000 q33 gpos50 +chr7 138500000 143400000 q34 gneg +chr7 143400000 148200000 q35 gpos75 +chr7 148200000 152800000 q36.1 gneg +chr7 152800000 155200000 q36.2 gpos25 +chr7 155200000 159345973 q36.3 gneg +chr8 0 2300000 p23.3 gneg +chr8 2300000 6300000 p23.2 gpos75 +chr8 6300000 12800000 p23.1 gneg +chr8 12800000 19200000 p22 gpos100 +chr8 19200000 23500000 p21.3 gneg +chr8 23500000 27500000 p21.2 gpos50 +chr8 27500000 29000000 p21.1 gneg +chr8 29000000 36700000 p12 gpos75 +chr8 36700000 38500000 p11.23 gneg +chr8 38500000 39900000 p11.22 gpos25 +chr8 39900000 43200000 p11.21 gneg +chr8 43200000 45200000 p11.1 acen +chr8 45200000 47200000 q11.1 acen +chr8 47200000 51300000 q11.21 gneg +chr8 51300000 51700000 q11.22 gpos75 +chr8 51700000 54600000 q11.23 gneg +chr8 54600000 60600000 q12.1 gpos50 +chr8 60600000 61300000 q12.2 gneg +chr8 61300000 65100000 q12.3 gpos50 +chr8 65100000 67100000 q13.1 gneg +chr8 67100000 69600000 q13.2 gpos50 +chr8 69600000 72000000 q13.3 gneg +chr8 72000000 74600000 q21.11 gpos100 +chr8 74600000 74700000 q21.12 gneg +chr8 74700000 83500000 q21.13 gpos75 +chr8 83500000 85900000 q21.2 gneg +chr8 85900000 92300000 q21.3 gpos100 +chr8 92300000 97900000 q22.1 gneg +chr8 97900000 100500000 q22.2 gpos25 +chr8 100500000 105100000 q22.3 gneg +chr8 105100000 109500000 q23.1 gpos75 +chr8 109500000 111100000 q23.2 gneg +chr8 111100000 116700000 q23.3 gpos100 +chr8 116700000 118300000 q24.11 gneg +chr8 118300000 121500000 q24.12 gpos50 +chr8 121500000 126300000 q24.13 gneg +chr8 126300000 130400000 q24.21 gpos50 +chr8 130400000 135400000 q24.22 gneg +chr8 135400000 138900000 q24.23 gpos75 +chr8 138900000 145138636 q24.3 gneg +chr9 0 2200000 p24.3 gneg +chr9 2200000 4600000 p24.2 gpos25 +chr9 4600000 9000000 p24.1 gneg +chr9 9000000 14200000 p23 gpos75 +chr9 14200000 16600000 p22.3 gneg +chr9 16600000 18500000 p22.2 gpos25 +chr9 18500000 19900000 p22.1 gneg +chr9 19900000 25600000 p21.3 gpos100 +chr9 25600000 28000000 p21.2 gneg +chr9 28000000 33200000 p21.1 gpos100 +chr9 33200000 36300000 p13.3 gneg +chr9 36300000 37900000 p13.2 gpos25 +chr9 37900000 39000000 p13.1 gneg +chr9 39000000 40000000 p12 gpos50 +chr9 40000000 42200000 p11.2 gneg +chr9 42200000 43000000 p11.1 acen +chr9 43000000 45500000 q11 acen +chr9 45500000 61500000 q12 gvar +chr9 61500000 65000000 q13 gneg +chr9 65000000 69300000 q21.11 gpos25 +chr9 69300000 71300000 q21.12 gneg +chr9 71300000 76600000 q21.13 gpos50 +chr9 76600000 78500000 q21.2 gneg +chr9 78500000 81500000 q21.31 gpos50 +chr9 81500000 84300000 q21.32 gneg +chr9 84300000 87800000 q21.33 gpos50 +chr9 87800000 89200000 q22.1 gneg +chr9 89200000 91200000 q22.2 gpos25 +chr9 91200000 93900000 q22.31 gneg +chr9 93900000 96500000 q22.32 gpos25 +chr9 96500000 99800000 q22.33 gneg +chr9 99800000 105400000 q31.1 gpos100 +chr9 105400000 108500000 q31.2 gneg +chr9 108500000 112100000 q31.3 gpos25 +chr9 112100000 114900000 q32 gneg +chr9 114900000 119800000 q33.1 gpos75 +chr9 119800000 123100000 q33.2 gneg +chr9 123100000 127500000 q33.3 gpos25 +chr9 127500000 130600000 q34.11 gneg +chr9 130600000 131100000 q34.12 gpos25 +chr9 131100000 133100000 q34.13 gneg +chr9 133100000 134500000 q34.2 gpos25 +chr9 134500000 138394717 q34.3 gneg +chrM 0 16569 gneg +chrX 0 4400000 p22.33 gneg +chrX 4400000 6100000 p22.32 gpos50 +chrX 6100000 9600000 p22.31 gneg +chrX 9600000 17400000 p22.2 gpos50 +chrX 17400000 19200000 p22.13 gneg +chrX 19200000 21900000 p22.12 gpos50 +chrX 21900000 24900000 p22.11 gneg +chrX 24900000 29300000 p21.3 gpos100 +chrX 29300000 31500000 p21.2 gneg +chrX 31500000 37800000 p21.1 gpos100 +chrX 37800000 42500000 p11.4 gneg +chrX 42500000 47600000 p11.3 gpos75 +chrX 47600000 50100000 p11.23 gneg +chrX 50100000 54800000 p11.22 gpos25 +chrX 54800000 58100000 p11.21 gneg +chrX 58100000 61000000 p11.1 acen +chrX 61000000 63800000 q11.1 acen +chrX 63800000 65400000 q11.2 gneg +chrX 65400000 68500000 q12 gpos50 +chrX 68500000 73000000 q13.1 gneg +chrX 73000000 74700000 q13.2 gpos50 +chrX 74700000 76800000 q13.3 gneg +chrX 76800000 85400000 q21.1 gpos100 +chrX 85400000 87000000 q21.2 gneg +chrX 87000000 92700000 q21.31 gpos100 +chrX 92700000 94300000 q21.32 gneg +chrX 94300000 99100000 q21.33 gpos75 +chrX 99100000 103300000 q22.1 gneg +chrX 103300000 104500000 q22.2 gpos50 +chrX 104500000 109400000 q22.3 gneg +chrX 109400000 117400000 q23 gpos75 +chrX 117400000 121800000 q24 gneg +chrX 121800000 129500000 q25 gpos100 +chrX 129500000 131300000 q26.1 gneg +chrX 131300000 134500000 q26.2 gpos25 +chrX 134500000 138900000 q26.3 gneg +chrX 138900000 141200000 q27.1 gpos75 +chrX 141200000 143000000 q27.2 gneg +chrX 143000000 148000000 q27.3 gpos100 +chrX 148000000 156040895 q28 gneg +chrY 0 300000 p11.32 gneg +chrY 300000 600000 p11.31 gpos50 +chrY 600000 10300000 p11.2 gneg +chrY 10300000 10400000 p11.1 acen +chrY 10400000 10600000 q11.1 acen +chrY 10600000 12400000 q11.21 gneg +chrY 12400000 17100000 q11.221 gpos50 +chrY 17100000 19600000 q11.222 gneg +chrY 19600000 23800000 q11.223 gpos50 +chrY 23800000 26600000 q11.23 gneg +chrY 26600000 57227415 q12 gvar +chr10 0 3000000 p15.3 gneg +chr10 3000000 3800000 p15.2 gpos25 +chr10 3800000 6600000 p15.1 gneg +chr10 6600000 12200000 p14 gpos75 +chr10 12200000 17300000 p13 gneg +chr10 17300000 18300000 p12.33 gpos75 +chr10 18300000 18400000 p12.32 gneg +chr10 18400000 22300000 p12.31 gpos75 +chr10 22300000 24300000 p12.2 gneg +chr10 24300000 29300000 p12.1 gpos50 +chr10 29300000 31100000 p11.23 gneg +chr10 31100000 34200000 p11.22 gpos25 +chr10 34200000 38000000 p11.21 gneg +chr10 38000000 39800000 p11.1 acen +chr10 39800000 41600000 q11.1 acen +chr10 41600000 45500000 q11.21 gneg +chr10 45500000 48600000 q11.22 gpos25 +chr10 48600000 51100000 q11.23 gneg +chr10 51100000 59400000 q21.1 gpos100 +chr10 59400000 62800000 q21.2 gneg +chr10 62800000 68800000 q21.3 gpos100 +chr10 68800000 73100000 q22.1 gneg +chr10 73100000 75900000 q22.2 gpos50 +chr10 75900000 80300000 q22.3 gneg +chr10 80300000 86100000 q23.1 gpos100 +chr10 86100000 87700000 q23.2 gneg +chr10 87700000 91100000 q23.31 gpos75 +chr10 91100000 92300000 q23.32 gneg +chr10 92300000 95300000 q23.33 gpos50 +chr10 95300000 97500000 q24.1 gneg +chr10 97500000 100100000 q24.2 gpos50 +chr10 100100000 101200000 q24.31 gneg +chr10 101200000 103100000 q24.32 gpos25 +chr10 103100000 104000000 q24.33 gneg +chr10 104000000 110100000 q25.1 gpos100 +chr10 110100000 113100000 q25.2 gneg +chr10 113100000 117300000 q25.3 gpos75 +chr10 117300000 119900000 q26.11 gneg +chr10 119900000 121400000 q26.12 gpos50 +chr10 121400000 125700000 q26.13 gneg +chr10 125700000 128800000 q26.2 gpos50 +chr10 128800000 133797422 q26.3 gneg +chr11 0 2800000 p15.5 gneg +chr11 2800000 11700000 p15.4 gpos50 +chr11 11700000 13800000 p15.3 gneg +chr11 13800000 16900000 p15.2 gpos50 +chr11 16900000 22000000 p15.1 gneg +chr11 22000000 26200000 p14.3 gpos100 +chr11 26200000 27200000 p14.2 gneg +chr11 27200000 31000000 p14.1 gpos75 +chr11 31000000 36400000 p13 gneg +chr11 36400000 43400000 p12 gpos100 +chr11 43400000 48800000 p11.2 gneg +chr11 48800000 51000000 p11.12 gpos75 +chr11 51000000 53400000 p11.11 acen +chr11 53400000 55800000 q11 acen +chr11 55800000 60100000 q12.1 gpos75 +chr11 60100000 61900000 q12.2 gneg +chr11 61900000 63600000 q12.3 gpos25 +chr11 63600000 66100000 q13.1 gneg +chr11 66100000 68700000 q13.2 gpos25 +chr11 68700000 70500000 q13.3 gneg +chr11 70500000 75500000 q13.4 gpos50 +chr11 75500000 77400000 q13.5 gneg +chr11 77400000 85900000 q14.1 gpos100 +chr11 85900000 88600000 q14.2 gneg +chr11 88600000 93000000 q14.3 gpos100 +chr11 93000000 97400000 q21 gneg +chr11 97400000 102300000 q22.1 gpos100 +chr11 102300000 103000000 q22.2 gneg +chr11 103000000 110600000 q22.3 gpos100 +chr11 110600000 112700000 q23.1 gneg +chr11 112700000 114600000 q23.2 gpos50 +chr11 114600000 121300000 q23.3 gneg +chr11 121300000 124000000 q24.1 gpos50 +chr11 124000000 127900000 q24.2 gneg +chr11 127900000 130900000 q24.3 gpos50 +chr11 130900000 135086622 q25 gneg +chr12 0 3200000 p13.33 gneg +chr12 3200000 5300000 p13.32 gpos25 +chr12 5300000 10000000 p13.31 gneg +chr12 10000000 12600000 p13.2 gpos75 +chr12 12600000 14600000 p13.1 gneg +chr12 14600000 19800000 p12.3 gpos100 +chr12 19800000 21100000 p12.2 gneg +chr12 21100000 26300000 p12.1 gpos100 +chr12 26300000 27600000 p11.23 gneg +chr12 27600000 30500000 p11.22 gpos50 +chr12 30500000 33200000 p11.21 gneg +chr12 33200000 35500000 p11.1 acen +chr12 35500000 37800000 q11 acen +chr12 37800000 46000000 q12 gpos100 +chr12 46000000 48700000 q13.11 gneg +chr12 48700000 51100000 q13.12 gpos25 +chr12 51100000 54500000 q13.13 gneg +chr12 54500000 56200000 q13.2 gpos25 +chr12 56200000 57700000 q13.3 gneg +chr12 57700000 62700000 q14.1 gpos75 +chr12 62700000 64700000 q14.2 gneg +chr12 64700000 67300000 q14.3 gpos50 +chr12 67300000 71100000 q15 gneg +chr12 71100000 75300000 q21.1 gpos75 +chr12 75300000 79900000 q21.2 gneg +chr12 79900000 86300000 q21.31 gpos100 +chr12 86300000 88600000 q21.32 gneg +chr12 88600000 92200000 q21.33 gpos100 +chr12 92200000 95800000 q22 gneg +chr12 95800000 101200000 q23.1 gpos75 +chr12 101200000 103500000 q23.2 gneg +chr12 103500000 108600000 q23.3 gpos50 +chr12 108600000 111300000 q24.11 gneg +chr12 111300000 111900000 q24.12 gpos25 +chr12 111900000 113900000 q24.13 gneg +chr12 113900000 116400000 q24.21 gpos50 +chr12 116400000 117700000 q24.22 gneg +chr12 117700000 120300000 q24.23 gpos50 +chr12 120300000 125400000 q24.31 gneg +chr12 125400000 128700000 q24.32 gpos50 +chr12 128700000 133275309 q24.33 gneg +chr13 0 4600000 p13 gvar +chr13 4600000 10100000 p12 stalk +chr13 10100000 16500000 p11.2 gvar +chr13 16500000 17700000 p11.1 acen +chr13 17700000 18900000 q11 acen +chr13 18900000 22600000 q12.11 gneg +chr13 22600000 24900000 q12.12 gpos25 +chr13 24900000 27200000 q12.13 gneg +chr13 27200000 28300000 q12.2 gpos25 +chr13 28300000 31600000 q12.3 gneg +chr13 31600000 33400000 q13.1 gpos50 +chr13 33400000 34900000 q13.2 gneg +chr13 34900000 39500000 q13.3 gpos75 +chr13 39500000 44600000 q14.11 gneg +chr13 44600000 45200000 q14.12 gpos25 +chr13 45200000 46700000 q14.13 gneg +chr13 46700000 50300000 q14.2 gpos50 +chr13 50300000 54700000 q14.3 gneg +chr13 54700000 59000000 q21.1 gpos100 +chr13 59000000 61800000 q21.2 gneg +chr13 61800000 65200000 q21.31 gpos75 +chr13 65200000 68100000 q21.32 gneg +chr13 68100000 72800000 q21.33 gpos100 +chr13 72800000 74900000 q22.1 gneg +chr13 74900000 76700000 q22.2 gpos50 +chr13 76700000 78500000 q22.3 gneg +chr13 78500000 87100000 q31.1 gpos100 +chr13 87100000 89400000 q31.2 gneg +chr13 89400000 94400000 q31.3 gpos100 +chr13 94400000 97500000 q32.1 gneg +chr13 97500000 98700000 q32.2 gpos25 +chr13 98700000 101100000 q32.3 gneg +chr13 101100000 104200000 q33.1 gpos100 +chr13 104200000 106400000 q33.2 gneg +chr13 106400000 109600000 q33.3 gpos100 +chr13 109600000 114364328 q34 gneg +chr14 0 3600000 p13 gvar +chr14 3600000 8000000 p12 stalk +chr14 8000000 16100000 p11.2 gvar +chr14 16100000 17200000 p11.1 acen +chr14 17200000 18200000 q11.1 acen +chr14 18200000 24100000 q11.2 gneg +chr14 24100000 32900000 q12 gpos100 +chr14 32900000 34800000 q13.1 gneg +chr14 34800000 36100000 q13.2 gpos50 +chr14 36100000 37400000 q13.3 gneg +chr14 37400000 43000000 q21.1 gpos100 +chr14 43000000 46700000 q21.2 gneg +chr14 46700000 50400000 q21.3 gpos100 +chr14 50400000 53600000 q22.1 gneg +chr14 53600000 55000000 q22.2 gpos25 +chr14 55000000 57600000 q22.3 gneg +chr14 57600000 61600000 q23.1 gpos75 +chr14 61600000 64300000 q23.2 gneg +chr14 64300000 67400000 q23.3 gpos50 +chr14 67400000 69800000 q24.1 gneg +chr14 69800000 73300000 q24.2 gpos50 +chr14 73300000 78800000 q24.3 gneg +chr14 78800000 83100000 q31.1 gpos100 +chr14 83100000 84400000 q31.2 gneg +chr14 84400000 89300000 q31.3 gpos100 +chr14 89300000 91400000 q32.11 gneg +chr14 91400000 94200000 q32.12 gpos25 +chr14 94200000 95800000 q32.13 gneg +chr14 95800000 100900000 q32.2 gpos50 +chr14 100900000 102700000 q32.31 gneg +chr14 102700000 103500000 q32.32 gpos50 +chr14 103500000 107043718 q32.33 gneg +chr15 0 4200000 p13 gvar +chr15 4200000 9700000 p12 stalk +chr15 9700000 17500000 p11.2 gvar +chr15 17500000 19000000 p11.1 acen +chr15 19000000 20500000 q11.1 acen +chr15 20500000 25500000 q11.2 gneg +chr15 25500000 27800000 q12 gpos50 +chr15 27800000 30000000 q13.1 gneg +chr15 30000000 30900000 q13.2 gpos50 +chr15 30900000 33400000 q13.3 gneg +chr15 33400000 39800000 q14 gpos75 +chr15 39800000 42500000 q15.1 gneg +chr15 42500000 43300000 q15.2 gpos25 +chr15 43300000 44500000 q15.3 gneg +chr15 44500000 49200000 q21.1 gpos75 +chr15 49200000 52600000 q21.2 gneg +chr15 52600000 58800000 q21.3 gpos75 +chr15 58800000 59000000 q22.1 gneg +chr15 59000000 63400000 q22.2 gpos25 +chr15 63400000 66900000 q22.31 gneg +chr15 66900000 67000000 q22.32 gpos25 +chr15 67000000 67200000 q22.33 gneg +chr15 67200000 72400000 q23 gpos25 +chr15 72400000 74900000 q24.1 gneg +chr15 74900000 76300000 q24.2 gpos25 +chr15 76300000 78000000 q24.3 gneg +chr15 78000000 81400000 q25.1 gpos50 +chr15 81400000 84700000 q25.2 gneg +chr15 84700000 88500000 q25.3 gpos50 +chr15 88500000 93800000 q26.1 gneg +chr15 93800000 98000000 q26.2 gpos50 +chr15 98000000 101991189 q26.3 gneg +chr16 0 7800000 p13.3 gneg +chr16 7800000 10400000 p13.2 gpos50 +chr16 10400000 12500000 p13.13 gneg +chr16 12500000 14700000 p13.12 gpos50 +chr16 14700000 16700000 p13.11 gneg +chr16 16700000 21200000 p12.3 gpos50 +chr16 21200000 24200000 p12.2 gneg +chr16 24200000 28500000 p12.1 gpos50 +chr16 28500000 35300000 p11.2 gneg +chr16 35300000 36800000 p11.1 acen +chr16 36800000 38400000 q11.1 acen +chr16 38400000 47000000 q11.2 gvar +chr16 47000000 52600000 q12.1 gneg +chr16 52600000 56000000 q12.2 gpos50 +chr16 56000000 57300000 q13 gneg +chr16 57300000 66600000 q21 gpos100 +chr16 66600000 70800000 q22.1 gneg +chr16 70800000 72800000 q22.2 gpos50 +chr16 72800000 74100000 q22.3 gneg +chr16 74100000 79200000 q23.1 gpos75 +chr16 79200000 81600000 q23.2 gneg +chr16 81600000 84100000 q23.3 gpos50 +chr16 84100000 87000000 q24.1 gneg +chr16 87000000 88700000 q24.2 gpos25 +chr16 88700000 90338345 q24.3 gneg +chr17 0 3400000 p13.3 gneg +chr17 3400000 6500000 p13.2 gpos50 +chr17 6500000 10800000 p13.1 gneg +chr17 10800000 16100000 p12 gpos75 +chr17 16100000 22700000 p11.2 gneg +chr17 22700000 25100000 p11.1 acen +chr17 25100000 27400000 q11.1 acen +chr17 27400000 33500000 q11.2 gneg +chr17 33500000 39800000 q12 gpos50 +chr17 39800000 40200000 q21.1 gneg +chr17 40200000 42800000 q21.2 gpos25 +chr17 42800000 46800000 q21.31 gneg +chr17 46800000 49300000 q21.32 gpos25 +chr17 49300000 52100000 q21.33 gneg +chr17 52100000 59500000 q22 gpos75 +chr17 59500000 60200000 q23.1 gneg +chr17 60200000 63100000 q23.2 gpos75 +chr17 63100000 64600000 q23.3 gneg +chr17 64600000 66200000 q24.1 gpos50 +chr17 66200000 69100000 q24.2 gneg +chr17 69100000 72900000 q24.3 gpos75 +chr17 72900000 76800000 q25.1 gneg +chr17 76800000 77200000 q25.2 gpos25 +chr17 77200000 83257441 q25.3 gneg +chr18 0 2900000 p11.32 gneg +chr18 2900000 7200000 p11.31 gpos50 +chr18 7200000 8500000 p11.23 gneg +chr18 8500000 10900000 p11.22 gpos25 +chr18 10900000 15400000 p11.21 gneg +chr18 15400000 18500000 p11.1 acen +chr18 18500000 21500000 q11.1 acen +chr18 21500000 27500000 q11.2 gneg +chr18 27500000 35100000 q12.1 gpos100 +chr18 35100000 39500000 q12.2 gneg +chr18 39500000 45900000 q12.3 gpos75 +chr18 45900000 50700000 q21.1 gneg +chr18 50700000 56200000 q21.2 gpos75 +chr18 56200000 58600000 q21.31 gneg +chr18 58600000 61300000 q21.32 gpos50 +chr18 61300000 63900000 q21.33 gneg +chr18 63900000 69100000 q22.1 gpos100 +chr18 69100000 71000000 q22.2 gneg +chr18 71000000 75400000 q22.3 gpos25 +chr18 75400000 80373285 q23 gneg +chr19 0 6900000 p13.3 gneg +chr19 6900000 12600000 p13.2 gpos25 +chr19 12600000 13800000 p13.13 gneg +chr19 13800000 16100000 p13.12 gpos25 +chr19 16100000 19900000 p13.11 gneg +chr19 19900000 24200000 p12 gvar +chr19 24200000 26200000 p11 acen +chr19 26200000 28100000 q11 acen +chr19 28100000 31900000 q12 gvar +chr19 31900000 35100000 q13.11 gneg +chr19 35100000 37800000 q13.12 gpos25 +chr19 37800000 38200000 q13.13 gneg +chr19 38200000 42900000 q13.2 gpos25 +chr19 42900000 44700000 q13.31 gneg +chr19 44700000 47500000 q13.32 gpos25 +chr19 47500000 50900000 q13.33 gneg +chr19 50900000 53100000 q13.41 gpos25 +chr19 53100000 55800000 q13.42 gneg +chr19 55800000 58617616 q13.43 gpos25 +chr20 0 5100000 p13 gneg +chr20 5100000 9200000 p12.3 gpos75 +chr20 9200000 12000000 p12.2 gneg +chr20 12000000 17900000 p12.1 gpos75 +chr20 17900000 21300000 p11.23 gneg +chr20 21300000 22300000 p11.22 gpos25 +chr20 22300000 25700000 p11.21 gneg +chr20 25700000 28100000 p11.1 acen +chr20 28100000 30400000 q11.1 acen +chr20 30400000 33500000 q11.21 gneg +chr20 33500000 35800000 q11.22 gpos25 +chr20 35800000 39000000 q11.23 gneg +chr20 39000000 43100000 q12 gpos75 +chr20 43100000 43500000 q13.11 gneg +chr20 43500000 47800000 q13.12 gpos25 +chr20 47800000 51200000 q13.13 gneg +chr20 51200000 56400000 q13.2 gpos75 +chr20 56400000 57800000 q13.31 gneg +chr20 57800000 59700000 q13.32 gpos50 +chr20 59700000 64444167 q13.33 gneg +chr21 0 3100000 p13 gvar +chr21 3100000 7000000 p12 stalk +chr21 7000000 10900000 p11.2 gvar +chr21 10900000 12000000 p11.1 acen +chr21 12000000 13000000 q11.1 acen +chr21 13000000 15000000 q11.2 gneg +chr21 15000000 22600000 q21.1 gpos100 +chr21 22600000 25500000 q21.2 gneg +chr21 25500000 30200000 q21.3 gpos75 +chr21 30200000 34400000 q22.11 gneg +chr21 34400000 36400000 q22.12 gpos50 +chr21 36400000 38300000 q22.13 gneg +chr21 38300000 41200000 q22.2 gpos50 +chr21 41200000 46709983 q22.3 gneg +chr22 0 4300000 p13 gvar +chr22 4300000 9400000 p12 stalk +chr22 9400000 13700000 p11.2 gvar +chr22 13700000 15000000 p11.1 acen +chr22 15000000 17400000 q11.1 acen +chr22 17400000 21700000 q11.21 gneg +chr22 21700000 23100000 q11.22 gpos25 +chr22 23100000 25500000 q11.23 gneg +chr22 25500000 29200000 q12.1 gpos50 +chr22 29200000 31800000 q12.2 gneg +chr22 31800000 37200000 q12.3 gpos50 +chr22 37200000 40600000 q13.1 gneg +chr22 40600000 43800000 q13.2 gpos50 +chr22 43800000 48100000 q13.31 gneg +chr22 48100000 49100000 q13.32 gpos50 +chr22 49100000 50818468 q13.33 gneg +chrUn_GL000195v1 0 182896 gneg +chrUn_GL000213v1 0 164239 gneg +chrUn_GL000214v1 0 137718 gneg +chrUn_GL000216v2 0 176608 gneg +chrUn_GL000218v1 0 161147 gneg +chrUn_GL000219v1 0 179198 gneg +chrUn_GL000220v1 0 161802 gneg +chrUn_GL000224v1 0 179693 gneg +chrUn_GL000226v1 0 15008 gneg +chrUn_KI270302v1 0 2274 gneg +chrUn_KI270303v1 0 1942 gneg +chrUn_KI270304v1 0 2165 gneg +chrUn_KI270305v1 0 1472 gneg +chrUn_KI270310v1 0 1201 gneg +chrUn_KI270311v1 0 12399 gneg +chrUn_KI270312v1 0 998 gneg +chrUn_KI270315v1 0 2276 gneg +chrUn_KI270316v1 0 1444 gneg +chrUn_KI270317v1 0 37690 gneg +chrUn_KI270320v1 0 4416 gneg +chrUn_KI270322v1 0 21476 gneg +chrUn_KI270329v1 0 1040 gneg +chrUn_KI270330v1 0 1652 gneg +chrUn_KI270333v1 0 2699 gneg +chrUn_KI270334v1 0 1368 gneg +chrUn_KI270335v1 0 1048 gneg +chrUn_KI270336v1 0 1026 gneg +chrUn_KI270337v1 0 1121 gneg +chrUn_KI270338v1 0 1428 gneg +chrUn_KI270340v1 0 1428 gneg +chrUn_KI270362v1 0 3530 gneg +chrUn_KI270363v1 0 1803 gneg +chrUn_KI270364v1 0 2855 gneg +chrUn_KI270366v1 0 8320 gneg +chrUn_KI270371v1 0 2805 gneg +chrUn_KI270372v1 0 1650 gneg +chrUn_KI270373v1 0 1451 gneg +chrUn_KI270374v1 0 2656 gneg +chrUn_KI270375v1 0 2378 gneg +chrUn_KI270376v1 0 1136 gneg +chrUn_KI270378v1 0 1048 gneg +chrUn_KI270379v1 0 1045 gneg +chrUn_KI270381v1 0 1930 gneg +chrUn_KI270382v1 0 4215 gneg +chrUn_KI270383v1 0 1750 gneg +chrUn_KI270384v1 0 1658 gneg +chrUn_KI270385v1 0 990 gneg +chrUn_KI270386v1 0 1788 gneg +chrUn_KI270387v1 0 1537 gneg +chrUn_KI270388v1 0 1216 gneg +chrUn_KI270389v1 0 1298 gneg +chrUn_KI270390v1 0 2387 gneg +chrUn_KI270391v1 0 1484 gneg +chrUn_KI270392v1 0 971 gneg +chrUn_KI270393v1 0 1308 gneg +chrUn_KI270394v1 0 970 gneg +chrUn_KI270395v1 0 1143 gneg +chrUn_KI270396v1 0 1880 gneg +chrUn_KI270411v1 0 2646 gneg +chrUn_KI270412v1 0 1179 gneg +chrUn_KI270414v1 0 2489 gneg +chrUn_KI270417v1 0 2043 gneg +chrUn_KI270418v1 0 2145 gneg +chrUn_KI270419v1 0 1029 gneg +chrUn_KI270420v1 0 2321 gneg +chrUn_KI270422v1 0 1445 gneg +chrUn_KI270423v1 0 981 gneg +chrUn_KI270424v1 0 2140 gneg +chrUn_KI270425v1 0 1884 gneg +chrUn_KI270429v1 0 1361 gneg +chrUn_KI270435v1 0 92983 gneg +chrUn_KI270438v1 0 112505 gneg +chrUn_KI270442v1 0 392061 gneg +chrUn_KI270448v1 0 7992 gneg +chrUn_KI270465v1 0 1774 gneg +chrUn_KI270466v1 0 1233 gneg +chrUn_KI270467v1 0 3920 gneg +chrUn_KI270468v1 0 4055 gneg +chrUn_KI270507v1 0 5353 gneg +chrUn_KI270508v1 0 1951 gneg +chrUn_KI270509v1 0 2318 gneg +chrUn_KI270510v1 0 2415 gneg +chrUn_KI270511v1 0 8127 gneg +chrUn_KI270512v1 0 22689 gneg +chrUn_KI270515v1 0 6361 gneg +chrUn_KI270516v1 0 1300 gneg +chrUn_KI270517v1 0 3253 gneg +chrUn_KI270518v1 0 2186 gneg +chrUn_KI270519v1 0 138126 gneg +chrUn_KI270521v1 0 7642 gneg +chrUn_KI270522v1 0 5674 gneg +chrUn_KI270528v1 0 2983 gneg +chrUn_KI270529v1 0 1899 gneg +chrUn_KI270530v1 0 2168 gneg +chrUn_KI270538v1 0 91309 gneg +chrUn_KI270539v1 0 993 gneg +chrUn_KI270544v1 0 1202 gneg +chrUn_KI270548v1 0 1599 gneg +chrUn_KI270579v1 0 31033 gneg +chrUn_KI270580v1 0 1553 gneg +chrUn_KI270581v1 0 7046 gneg +chrUn_KI270582v1 0 6504 gneg +chrUn_KI270583v1 0 1400 gneg +chrUn_KI270584v1 0 4513 gneg +chrUn_KI270587v1 0 2969 gneg +chrUn_KI270588v1 0 6158 gneg +chrUn_KI270589v1 0 44474 gneg +chrUn_KI270590v1 0 4685 gneg +chrUn_KI270591v1 0 5796 gneg +chrUn_KI270593v1 0 3041 gneg +chrUn_KI270741v1 0 157432 gneg +chrUn_KI270742v1 0 186739 gneg +chrUn_KI270743v1 0 210658 gneg +chrUn_KI270744v1 0 168472 gneg +chrUn_KI270745v1 0 41891 gneg +chrUn_KI270746v1 0 66486 gneg +chrUn_KI270747v1 0 198735 gneg +chrUn_KI270748v1 0 93321 gneg +chrUn_KI270749v1 0 158759 gneg +chrUn_KI270750v1 0 148850 gneg +chrUn_KI270751v1 0 150742 gneg +chrUn_KI270752v1 0 27745 gneg +chrUn_KI270753v1 0 62944 gneg +chrUn_KI270754v1 0 40191 gneg +chrUn_KI270755v1 0 36723 gneg +chrUn_KI270756v1 0 79590 gneg +chrUn_KI270757v1 0 71251 gneg +chr1_GL383518v1_alt 0 182439 gneg +chr1_GL383519v1_alt 0 110268 gneg +chr1_GL383520v2_alt 0 366580 gneg +chr1_KI270759v1_alt 0 425601 gneg +chr1_KI270760v1_alt 0 109528 gneg +chr1_KI270761v1_alt 0 165834 gneg +chr1_KI270762v1_alt 0 354444 gneg +chr1_KI270763v1_alt 0 911658 gneg +chr1_KI270764v1_alt 0 50258 gneg +chr1_KI270765v1_alt 0 185285 gneg +chr1_KI270766v1_alt 0 256271 gneg +chr1_KI270892v1_alt 0 162212 gneg +chr2_GL383521v1_alt 0 143390 gneg +chr2_GL383522v1_alt 0 123821 gneg +chr2_GL582966v2_alt 0 96131 gneg +chr2_KI270767v1_alt 0 161578 gneg +chr2_KI270768v1_alt 0 110099 gneg +chr2_KI270769v1_alt 0 120616 gneg +chr2_KI270770v1_alt 0 136240 gneg +chr2_KI270771v1_alt 0 110395 gneg +chr2_KI270772v1_alt 0 133041 gneg +chr2_KI270773v1_alt 0 70887 gneg +chr2_KI270774v1_alt 0 223625 gneg +chr2_KI270775v1_alt 0 138019 gneg +chr2_KI270776v1_alt 0 174166 gneg +chr2_KI270893v1_alt 0 161218 gneg +chr2_KI270894v1_alt 0 214158 gneg +chr3_GL383526v1_alt 0 180671 gneg +chr3_JH636055v2_alt 0 173151 gneg +chr3_KI270777v1_alt 0 173649 gneg +chr3_KI270778v1_alt 0 248252 gneg +chr3_KI270779v1_alt 0 205312 gneg +chr3_KI270780v1_alt 0 224108 gneg +chr3_KI270781v1_alt 0 113034 gneg +chr3_KI270782v1_alt 0 162429 gneg +chr3_KI270783v1_alt 0 109187 gneg +chr3_KI270784v1_alt 0 184404 gneg +chr3_KI270895v1_alt 0 162896 gneg +chr3_KI270924v1_alt 0 166540 gneg +chr3_KI270934v1_alt 0 163458 gneg +chr3_KI270935v1_alt 0 197351 gneg +chr3_KI270936v1_alt 0 164170 gneg +chr3_KI270937v1_alt 0 165607 gneg +chr4_GL000257v2_alt 0 586476 gneg +chr4_GL383527v1_alt 0 164536 gneg +chr4_GL383528v1_alt 0 376187 gneg +chr4_KI270785v1_alt 0 119912 gneg +chr4_KI270786v1_alt 0 244096 gneg +chr4_KI270787v1_alt 0 111943 gneg +chr4_KI270788v1_alt 0 158965 gneg +chr4_KI270789v1_alt 0 205944 gneg +chr4_KI270790v1_alt 0 220246 gneg +chr4_KI270896v1_alt 0 378547 gneg +chr4_KI270925v1_alt 0 555799 gneg +chr5_GL339449v2_alt 0 1612928 gneg +chr5_GL383530v1_alt 0 101241 gneg +chr5_GL383531v1_alt 0 173459 gneg +chr5_GL383532v1_alt 0 82728 gneg +chr5_GL949742v1_alt 0 226852 gneg +chr5_KI270791v1_alt 0 195710 gneg +chr5_KI270792v1_alt 0 179043 gneg +chr5_KI270793v1_alt 0 126136 gneg +chr5_KI270794v1_alt 0 164558 gneg +chr5_KI270795v1_alt 0 131892 gneg +chr5_KI270796v1_alt 0 172708 gneg +chr5_KI270897v1_alt 0 1144418 gneg +chr5_KI270898v1_alt 0 130957 gneg +chr6_GL000250v2_alt 0 4672374 gneg +chr6_GL000251v2_alt 0 4795265 gneg +chr6_GL000252v2_alt 0 4604811 gneg +chr6_GL000253v2_alt 0 4677643 gneg +chr6_GL000254v2_alt 0 4827813 gneg +chr6_GL000255v2_alt 0 4606388 gneg +chr6_GL000256v2_alt 0 4929269 gneg +chr6_GL383533v1_alt 0 124736 gneg +chr6_KB021644v2_alt 0 185823 gneg +chr6_KI270758v1_alt 0 76752 gneg +chr6_KI270797v1_alt 0 197536 gneg +chr6_KI270798v1_alt 0 271782 gneg +chr6_KI270799v1_alt 0 152148 gneg +chr6_KI270800v1_alt 0 175808 gneg +chr6_KI270801v1_alt 0 870480 gneg +chr6_KI270802v1_alt 0 75005 gneg +chr7_GL383534v2_alt 0 119183 gneg +chr7_KI270803v1_alt 0 1111570 gneg +chr7_KI270804v1_alt 0 157952 gneg +chr7_KI270805v1_alt 0 209988 gneg +chr7_KI270806v1_alt 0 158166 gneg +chr7_KI270807v1_alt 0 126434 gneg +chr7_KI270808v1_alt 0 271455 gneg +chr7_KI270809v1_alt 0 209586 gneg +chr7_KI270899v1_alt 0 190869 gneg +chr8_KI270810v1_alt 0 374415 gneg +chr8_KI270811v1_alt 0 292436 gneg +chr8_KI270812v1_alt 0 282736 gneg +chr8_KI270813v1_alt 0 300230 gneg +chr8_KI270814v1_alt 0 141812 gneg +chr8_KI270815v1_alt 0 132244 gneg +chr8_KI270816v1_alt 0 305841 gneg +chr8_KI270817v1_alt 0 158983 gneg +chr8_KI270818v1_alt 0 145606 gneg +chr8_KI270819v1_alt 0 133535 gneg +chr8_KI270820v1_alt 0 36640 gneg +chr8_KI270821v1_alt 0 985506 gneg +chr8_KI270822v1_alt 0 624492 gneg +chr8_KI270900v1_alt 0 318687 gneg +chr8_KI270901v1_alt 0 136959 gneg +chr8_KI270926v1_alt 0 229282 gneg +chr9_GL383539v1_alt 0 162988 gneg +chr9_GL383540v1_alt 0 71551 gneg +chr9_GL383541v1_alt 0 171286 gneg +chr9_GL383542v1_alt 0 60032 gneg +chr9_KI270823v1_alt 0 439082 gneg +chrX_KI270880v1_alt 0 284869 gneg +chrX_KI270881v1_alt 0 144206 gneg +chrX_KI270913v1_alt 0 274009 gneg +chr10_GL383545v1_alt 0 179254 gneg +chr10_GL383546v1_alt 0 309802 gneg +chr10_KI270824v1_alt 0 181496 gneg +chr10_KI270825v1_alt 0 188315 gneg +chr11_GL383547v1_alt 0 154407 gneg +chr11_JH159136v1_alt 0 200998 gneg +chr11_JH159137v1_alt 0 191409 gneg +chr11_KI270826v1_alt 0 186169 gneg +chr11_KI270827v1_alt 0 67707 gneg +chr11_KI270829v1_alt 0 204059 gneg +chr11_KI270830v1_alt 0 177092 gneg +chr11_KI270831v1_alt 0 296895 gneg +chr11_KI270832v1_alt 0 210133 gneg +chr11_KI270902v1_alt 0 106711 gneg +chr11_KI270903v1_alt 0 214625 gneg +chr11_KI270927v1_alt 0 218612 gneg +chr12_GL383549v1_alt 0 120804 gneg +chr12_GL383550v2_alt 0 169178 gneg +chr12_GL383551v1_alt 0 184319 gneg +chr12_GL383552v1_alt 0 138655 gneg +chr12_GL383553v2_alt 0 152874 gneg +chr12_GL877875v1_alt 0 167313 gneg +chr12_GL877876v1_alt 0 408271 gneg +chr12_KI270833v1_alt 0 76061 gneg +chr12_KI270834v1_alt 0 119498 gneg +chr12_KI270835v1_alt 0 238139 gneg +chr12_KI270836v1_alt 0 56134 gneg +chr12_KI270837v1_alt 0 40090 gneg +chr12_KI270904v1_alt 0 572349 gneg +chr13_KI270838v1_alt 0 306913 gneg +chr13_KI270839v1_alt 0 180306 gneg +chr13_KI270840v1_alt 0 191684 gneg +chr13_KI270841v1_alt 0 169134 gneg +chr13_KI270842v1_alt 0 37287 gneg +chr13_KI270843v1_alt 0 103832 gneg +chr14_KI270844v1_alt 0 322166 gneg +chr14_KI270845v1_alt 0 180703 gneg +chr14_KI270846v1_alt 0 1351393 gneg +chr14_KI270847v1_alt 0 1511111 gneg +chr15_GL383554v1_alt 0 296527 gneg +chr15_GL383555v2_alt 0 388773 gneg +chr15_KI270848v1_alt 0 327382 gneg +chr15_KI270849v1_alt 0 244917 gneg +chr15_KI270850v1_alt 0 430880 gneg +chr15_KI270851v1_alt 0 263054 gneg +chr15_KI270852v1_alt 0 478999 gneg +chr15_KI270905v1_alt 0 5161414 gneg +chr15_KI270906v1_alt 0 196384 gneg +chr16_GL383556v1_alt 0 192462 gneg +chr16_GL383557v1_alt 0 89672 gneg +chr16_KI270853v1_alt 0 2659700 gneg +chr16_KI270854v1_alt 0 134193 gneg +chr16_KI270855v1_alt 0 232857 gneg +chr16_KI270856v1_alt 0 63982 gneg +chr17_GL000258v2_alt 0 1821992 gneg +chr17_GL383563v3_alt 0 375691 gneg +chr17_GL383564v2_alt 0 133151 gneg +chr17_GL383565v1_alt 0 223995 gneg +chr17_GL383566v1_alt 0 90219 gneg +chr17_JH159146v1_alt 0 278131 gneg +chr17_JH159147v1_alt 0 70345 gneg +chr17_JH159148v1_alt 0 88070 gneg +chr17_KI270857v1_alt 0 2877074 gneg +chr17_KI270858v1_alt 0 235827 gneg +chr17_KI270859v1_alt 0 108763 gneg +chr17_KI270860v1_alt 0 178921 gneg +chr17_KI270861v1_alt 0 196688 gneg +chr17_KI270862v1_alt 0 391357 gneg +chr17_KI270907v1_alt 0 137721 gneg +chr17_KI270908v1_alt 0 1423190 gneg +chr17_KI270909v1_alt 0 325800 gneg +chr17_KI270910v1_alt 0 157099 gneg +chr18_GL383567v1_alt 0 289831 gneg +chr18_GL383568v1_alt 0 104552 gneg +chr18_GL383569v1_alt 0 167950 gneg +chr18_GL383570v1_alt 0 164789 gneg +chr18_GL383571v1_alt 0 198278 gneg +chr18_GL383572v1_alt 0 159547 gneg +chr18_KI270863v1_alt 0 167999 gneg +chr18_KI270864v1_alt 0 111737 gneg +chr18_KI270911v1_alt 0 157710 gneg +chr18_KI270912v1_alt 0 174061 gneg +chr19_GL000209v2_alt 0 177381 gneg +chr19_GL383573v1_alt 0 385657 gneg +chr19_GL383574v1_alt 0 155864 gneg +chr19_GL383575v2_alt 0 170222 gneg +chr19_GL383576v1_alt 0 188024 gneg +chr19_GL949746v1_alt 0 987716 gneg +chr19_GL949747v2_alt 0 729520 gneg +chr19_GL949748v2_alt 0 1064304 gneg +chr19_GL949749v2_alt 0 1091841 gneg +chr19_GL949750v2_alt 0 1066390 gneg +chr19_GL949751v2_alt 0 1002683 gneg +chr19_GL949752v1_alt 0 987100 gneg +chr19_GL949753v2_alt 0 796479 gneg +chr19_KI270865v1_alt 0 52969 gneg +chr19_KI270866v1_alt 0 43156 gneg +chr19_KI270867v1_alt 0 233762 gneg +chr19_KI270868v1_alt 0 61734 gneg +chr19_KI270882v1_alt 0 248807 gneg +chr19_KI270883v1_alt 0 170399 gneg +chr19_KI270884v1_alt 0 157053 gneg +chr19_KI270885v1_alt 0 171027 gneg +chr19_KI270886v1_alt 0 204239 gneg +chr19_KI270887v1_alt 0 209512 gneg +chr19_KI270888v1_alt 0 155532 gneg +chr19_KI270889v1_alt 0 170698 gneg +chr19_KI270890v1_alt 0 184499 gneg +chr19_KI270891v1_alt 0 170680 gneg +chr19_KI270914v1_alt 0 205194 gneg +chr19_KI270915v1_alt 0 170665 gneg +chr19_KI270916v1_alt 0 184516 gneg +chr19_KI270917v1_alt 0 190932 gneg +chr19_KI270918v1_alt 0 123111 gneg +chr19_KI270919v1_alt 0 170701 gneg +chr19_KI270920v1_alt 0 198005 gneg +chr19_KI270921v1_alt 0 282224 gneg +chr19_KI270922v1_alt 0 187935 gneg +chr19_KI270923v1_alt 0 189352 gneg +chr19_KI270929v1_alt 0 186203 gneg +chr19_KI270930v1_alt 0 200773 gneg +chr19_KI270931v1_alt 0 170148 gneg +chr19_KI270932v1_alt 0 215732 gneg +chr19_KI270933v1_alt 0 170537 gneg +chr19_KI270938v1_alt 0 1066800 gneg +chr20_GL383577v2_alt 0 128386 gneg +chr20_KI270869v1_alt 0 118774 gneg +chr20_KI270870v1_alt 0 183433 gneg +chr20_KI270871v1_alt 0 58661 gneg +chr21_GL383578v2_alt 0 63917 gneg +chr21_GL383579v2_alt 0 201197 gneg +chr21_GL383580v2_alt 0 74653 gneg +chr21_GL383581v2_alt 0 116689 gneg +chr21_KI270872v1_alt 0 82692 gneg +chr21_KI270873v1_alt 0 143900 gneg +chr21_KI270874v1_alt 0 166743 gneg +chr22_GL383582v2_alt 0 162811 gneg +chr22_GL383583v2_alt 0 96924 gneg +chr22_KB663609v1_alt 0 74013 gneg +chr22_KI270875v1_alt 0 259914 gneg +chr22_KI270876v1_alt 0 263666 gneg +chr22_KI270877v1_alt 0 101331 gneg +chr22_KI270878v1_alt 0 186262 gneg +chr22_KI270879v1_alt 0 304135 gneg +chr22_KI270928v1_alt 0 176103 gneg +chr1_KI270706v1_random 0 175055 gneg +chr1_KI270707v1_random 0 32032 gneg +chr1_KI270708v1_random 0 127682 gneg +chr1_KI270709v1_random 0 66860 gneg +chr1_KI270710v1_random 0 40176 gneg +chr1_KI270711v1_random 0 42210 gneg +chr1_KI270712v1_random 0 176043 gneg +chr1_KI270713v1_random 0 40745 gneg +chr1_KI270714v1_random 0 41717 gneg +chr2_KI270715v1_random 0 161471 gneg +chr2_KI270716v1_random 0 153799 gneg +chr3_GL000221v1_random 0 155397 gneg +chr4_GL000008v2_random 0 209709 gneg +chr5_GL000208v1_random 0 92689 gneg +chr9_KI270717v1_random 0 40062 gneg +chr9_KI270718v1_random 0 38054 gneg +chr9_KI270719v1_random 0 176845 gneg +chr9_KI270720v1_random 0 39050 gneg +chrY_KI270740v1_random 0 37240 gneg +chr11_KI270721v1_random 0 100316 gneg +chr14_GL000009v2_random 0 201709 gneg +chr14_GL000194v1_random 0 191469 gneg +chr14_GL000225v1_random 0 211173 gneg +chr14_KI270722v1_random 0 194050 gneg +chr14_KI270723v1_random 0 38115 gneg +chr14_KI270724v1_random 0 39555 gneg +chr14_KI270725v1_random 0 172810 gneg +chr14_KI270726v1_random 0 43739 gneg +chr15_KI270727v1_random 0 448248 gneg +chr16_KI270728v1_random 0 1872759 gneg +chr17_GL000205v2_random 0 185591 gneg +chr17_KI270729v1_random 0 280839 gneg +chr17_KI270730v1_random 0 112551 gneg +chr22_KI270731v1_random 0 150754 gneg +chr22_KI270732v1_random 0 41543 gneg +chr22_KI270733v1_random 0 179772 gneg +chr22_KI270734v1_random 0 165050 gneg +chr22_KI270735v1_random 0 42811 gneg +chr22_KI270736v1_random 0 181920 gneg +chr22_KI270737v1_random 0 103838 gneg +chr22_KI270738v1_random 0 99375 gneg +chr22_KI270739v1_random 0 73985 gneg diff --git a/jcvi/utils/data/hg38.chrom.sizes b/jcvi/utils/data/hg38.chrom.sizes new file mode 100644 index 00000000..39a3ef9a --- /dev/null +++ b/jcvi/utils/data/hg38.chrom.sizes @@ -0,0 +1,455 @@ +chr1 248956422 +chr2 242193529 +chr3 198295559 +chr4 190214555 +chr5 181538259 +chr6 170805979 +chr7 159345973 +chrX 156040895 +chr8 145138636 +chr9 138394717 +chr11 135086622 +chr10 133797422 +chr12 133275309 +chr13 114364328 +chr14 107043718 +chr15 101991189 +chr16 90338345 +chr17 83257441 +chr18 80373285 +chr20 64444167 +chr19 58617616 +chrY 57227415 +chr22 50818468 +chr21 46709983 +chr15_KI270905v1_alt 5161414 +chr6_GL000256v2_alt 4929269 +chr6_GL000254v2_alt 4827813 +chr6_GL000251v2_alt 4795265 +chr6_GL000253v2_alt 4677643 +chr6_GL000250v2_alt 4672374 +chr6_GL000255v2_alt 4606388 +chr6_GL000252v2_alt 4604811 +chr17_KI270857v1_alt 2877074 +chr16_KI270853v1_alt 2659700 +chr16_KI270728v1_random 1872759 +chr17_GL000258v2_alt 1821992 +chr5_GL339449v2_alt 1612928 +chr14_KI270847v1_alt 1511111 +chr17_KI270908v1_alt 1423190 +chr14_KI270846v1_alt 1351393 +chr5_KI270897v1_alt 1144418 +chr7_KI270803v1_alt 1111570 +chr19_GL949749v2_alt 1091841 +chr19_KI270938v1_alt 1066800 +chr19_GL949750v2_alt 1066390 +chr19_GL949748v2_alt 1064304 +chr19_GL949751v2_alt 1002683 +chr19_GL949746v1_alt 987716 +chr19_GL949752v1_alt 987100 +chr8_KI270821v1_alt 985506 +chr1_KI270763v1_alt 911658 +chr6_KI270801v1_alt 870480 +chr19_GL949753v2_alt 796479 +chr19_GL949747v2_alt 729520 +chr8_KI270822v1_alt 624492 +chr4_GL000257v2_alt 586476 +chr12_KI270904v1_alt 572349 +chr4_KI270925v1_alt 555799 +chr15_KI270852v1_alt 478999 +chr15_KI270727v1_random 448248 +chr9_KI270823v1_alt 439082 +chr15_KI270850v1_alt 430880 +chr1_KI270759v1_alt 425601 +chr12_GL877876v1_alt 408271 +chrUn_KI270442v1 392061 +chr17_KI270862v1_alt 391357 +chr15_GL383555v2_alt 388773 +chr19_GL383573v1_alt 385657 +chr4_KI270896v1_alt 378547 +chr4_GL383528v1_alt 376187 +chr17_GL383563v3_alt 375691 +chr8_KI270810v1_alt 374415 +chr1_GL383520v2_alt 366580 +chr1_KI270762v1_alt 354444 +chr15_KI270848v1_alt 327382 +chr17_KI270909v1_alt 325800 +chr14_KI270844v1_alt 322166 +chr8_KI270900v1_alt 318687 +chr10_GL383546v1_alt 309802 +chr13_KI270838v1_alt 306913 +chr8_KI270816v1_alt 305841 +chr22_KI270879v1_alt 304135 +chr8_KI270813v1_alt 300230 +chr11_KI270831v1_alt 296895 +chr15_GL383554v1_alt 296527 +chr8_KI270811v1_alt 292436 +chr18_GL383567v1_alt 289831 +chrX_KI270880v1_alt 284869 +chr8_KI270812v1_alt 282736 +chr19_KI270921v1_alt 282224 +chr17_KI270729v1_random 280839 +chr17_JH159146v1_alt 278131 +chrX_KI270913v1_alt 274009 +chr6_KI270798v1_alt 271782 +chr7_KI270808v1_alt 271455 +chr22_KI270876v1_alt 263666 +chr15_KI270851v1_alt 263054 +chr22_KI270875v1_alt 259914 +chr1_KI270766v1_alt 256271 +chr19_KI270882v1_alt 248807 +chr3_KI270778v1_alt 248252 +chr15_KI270849v1_alt 244917 +chr4_KI270786v1_alt 244096 +chr12_KI270835v1_alt 238139 +chr17_KI270858v1_alt 235827 +chr19_KI270867v1_alt 233762 +chr16_KI270855v1_alt 232857 +chr8_KI270926v1_alt 229282 +chr5_GL949742v1_alt 226852 +chr3_KI270780v1_alt 224108 +chr17_GL383565v1_alt 223995 +chr2_KI270774v1_alt 223625 +chr4_KI270790v1_alt 220246 +chr11_KI270927v1_alt 218612 +chr19_KI270932v1_alt 215732 +chr11_KI270903v1_alt 214625 +chr2_KI270894v1_alt 214158 +chr14_GL000225v1_random 211173 +chrUn_KI270743v1 210658 +chr11_KI270832v1_alt 210133 +chr7_KI270805v1_alt 209988 +chr4_GL000008v2_random 209709 +chr7_KI270809v1_alt 209586 +chr19_KI270887v1_alt 209512 +chr4_KI270789v1_alt 205944 +chr3_KI270779v1_alt 205312 +chr19_KI270914v1_alt 205194 +chr19_KI270886v1_alt 204239 +chr11_KI270829v1_alt 204059 +chr14_GL000009v2_random 201709 +chr21_GL383579v2_alt 201197 +chr11_JH159136v1_alt 200998 +chr19_KI270930v1_alt 200773 +chrUn_KI270747v1 198735 +chr18_GL383571v1_alt 198278 +chr19_KI270920v1_alt 198005 +chr6_KI270797v1_alt 197536 +chr3_KI270935v1_alt 197351 +chr17_KI270861v1_alt 196688 +chr15_KI270906v1_alt 196384 +chr5_KI270791v1_alt 195710 +chr14_KI270722v1_random 194050 +chr16_GL383556v1_alt 192462 +chr13_KI270840v1_alt 191684 +chr14_GL000194v1_random 191469 +chr11_JH159137v1_alt 191409 +chr19_KI270917v1_alt 190932 +chr7_KI270899v1_alt 190869 +chr19_KI270923v1_alt 189352 +chr10_KI270825v1_alt 188315 +chr19_GL383576v1_alt 188024 +chr19_KI270922v1_alt 187935 +chrUn_KI270742v1 186739 +chr22_KI270878v1_alt 186262 +chr19_KI270929v1_alt 186203 +chr11_KI270826v1_alt 186169 +chr6_KB021644v2_alt 185823 +chr17_GL000205v2_random 185591 +chr1_KI270765v1_alt 185285 +chr19_KI270916v1_alt 184516 +chr19_KI270890v1_alt 184499 +chr3_KI270784v1_alt 184404 +chr12_GL383551v1_alt 184319 +chr20_KI270870v1_alt 183433 +chrUn_GL000195v1 182896 +chr1_GL383518v1_alt 182439 +chr22_KI270736v1_random 181920 +chr10_KI270824v1_alt 181496 +chr14_KI270845v1_alt 180703 +chr3_GL383526v1_alt 180671 +chr13_KI270839v1_alt 180306 +chr22_KI270733v1_random 179772 +chrUn_GL000224v1 179693 +chr10_GL383545v1_alt 179254 +chrUn_GL000219v1 179198 +chr5_KI270792v1_alt 179043 +chr17_KI270860v1_alt 178921 +chr19_GL000209v2_alt 177381 +chr11_KI270830v1_alt 177092 +chr9_KI270719v1_random 176845 +chrUn_GL000216v2 176608 +chr22_KI270928v1_alt 176103 +chr1_KI270712v1_random 176043 +chr6_KI270800v1_alt 175808 +chr1_KI270706v1_random 175055 +chr2_KI270776v1_alt 174166 +chr18_KI270912v1_alt 174061 +chr3_KI270777v1_alt 173649 +chr5_GL383531v1_alt 173459 +chr3_JH636055v2_alt 173151 +chr14_KI270725v1_random 172810 +chr5_KI270796v1_alt 172708 +chr9_GL383541v1_alt 171286 +chr19_KI270885v1_alt 171027 +chr19_KI270919v1_alt 170701 +chr19_KI270889v1_alt 170698 +chr19_KI270891v1_alt 170680 +chr19_KI270915v1_alt 170665 +chr19_KI270933v1_alt 170537 +chr19_KI270883v1_alt 170399 +chr19_GL383575v2_alt 170222 +chr19_KI270931v1_alt 170148 +chr12_GL383550v2_alt 169178 +chr13_KI270841v1_alt 169134 +chrUn_KI270744v1 168472 +chr18_KI270863v1_alt 167999 +chr18_GL383569v1_alt 167950 +chr12_GL877875v1_alt 167313 +chr21_KI270874v1_alt 166743 +chr3_KI270924v1_alt 166540 +chr1_KI270761v1_alt 165834 +chr3_KI270937v1_alt 165607 +chr22_KI270734v1_random 165050 +chr18_GL383570v1_alt 164789 +chr5_KI270794v1_alt 164558 +chr4_GL383527v1_alt 164536 +chrUn_GL000213v1 164239 +chr3_KI270936v1_alt 164170 +chr3_KI270934v1_alt 163458 +chr9_GL383539v1_alt 162988 +chr3_KI270895v1_alt 162896 +chr22_GL383582v2_alt 162811 +chr3_KI270782v1_alt 162429 +chr1_KI270892v1_alt 162212 +chrUn_GL000220v1 161802 +chr2_KI270767v1_alt 161578 +chr2_KI270715v1_random 161471 +chr2_KI270893v1_alt 161218 +chrUn_GL000218v1 161147 +chr18_GL383572v1_alt 159547 +chr8_KI270817v1_alt 158983 +chr4_KI270788v1_alt 158965 +chrUn_KI270749v1 158759 +chr7_KI270806v1_alt 158166 +chr7_KI270804v1_alt 157952 +chr18_KI270911v1_alt 157710 +chrUn_KI270741v1 157432 +chr17_KI270910v1_alt 157099 +chr19_KI270884v1_alt 157053 +chr19_GL383574v1_alt 155864 +chr19_KI270888v1_alt 155532 +chr3_GL000221v1_random 155397 +chr11_GL383547v1_alt 154407 +chr2_KI270716v1_random 153799 +chr12_GL383553v2_alt 152874 +chr6_KI270799v1_alt 152148 +chr22_KI270731v1_random 150754 +chrUn_KI270751v1 150742 +chrUn_KI270750v1 148850 +chr8_KI270818v1_alt 145606 +chrX_KI270881v1_alt 144206 +chr21_KI270873v1_alt 143900 +chr2_GL383521v1_alt 143390 +chr8_KI270814v1_alt 141812 +chr12_GL383552v1_alt 138655 +chrUn_KI270519v1 138126 +chr2_KI270775v1_alt 138019 +chr17_KI270907v1_alt 137721 +chrUn_GL000214v1 137718 +chr8_KI270901v1_alt 136959 +chr2_KI270770v1_alt 136240 +chr16_KI270854v1_alt 134193 +chr8_KI270819v1_alt 133535 +chr17_GL383564v2_alt 133151 +chr2_KI270772v1_alt 133041 +chr8_KI270815v1_alt 132244 +chr5_KI270795v1_alt 131892 +chr5_KI270898v1_alt 130957 +chr20_GL383577v2_alt 128386 +chr1_KI270708v1_random 127682 +chr7_KI270807v1_alt 126434 +chr5_KI270793v1_alt 126136 +chr6_GL383533v1_alt 124736 +chr2_GL383522v1_alt 123821 +chr19_KI270918v1_alt 123111 +chr12_GL383549v1_alt 120804 +chr2_KI270769v1_alt 120616 +chr4_KI270785v1_alt 119912 +chr12_KI270834v1_alt 119498 +chr7_GL383534v2_alt 119183 +chr20_KI270869v1_alt 118774 +chr21_GL383581v2_alt 116689 +chr3_KI270781v1_alt 113034 +chr17_KI270730v1_random 112551 +chrUn_KI270438v1 112505 +chr4_KI270787v1_alt 111943 +chr18_KI270864v1_alt 111737 +chr2_KI270771v1_alt 110395 +chr1_GL383519v1_alt 110268 +chr2_KI270768v1_alt 110099 +chr1_KI270760v1_alt 109528 +chr3_KI270783v1_alt 109187 +chr17_KI270859v1_alt 108763 +chr11_KI270902v1_alt 106711 +chr18_GL383568v1_alt 104552 +chr22_KI270737v1_random 103838 +chr13_KI270843v1_alt 103832 +chr22_KI270877v1_alt 101331 +chr5_GL383530v1_alt 101241 +chr11_KI270721v1_random 100316 +chr22_KI270738v1_random 99375 +chr22_GL383583v2_alt 96924 +chr2_GL582966v2_alt 96131 +chrUn_KI270748v1 93321 +chrUn_KI270435v1 92983 +chr5_GL000208v1_random 92689 +chrUn_KI270538v1 91309 +chr17_GL383566v1_alt 90219 +chr16_GL383557v1_alt 89672 +chr17_JH159148v1_alt 88070 +chr5_GL383532v1_alt 82728 +chr21_KI270872v1_alt 82692 +chrUn_KI270756v1 79590 +chr6_KI270758v1_alt 76752 +chr12_KI270833v1_alt 76061 +chr6_KI270802v1_alt 75005 +chr21_GL383580v2_alt 74653 +chr22_KB663609v1_alt 74013 +chr22_KI270739v1_random 73985 +chr9_GL383540v1_alt 71551 +chrUn_KI270757v1 71251 +chr2_KI270773v1_alt 70887 +chr17_JH159147v1_alt 70345 +chr11_KI270827v1_alt 67707 +chr1_KI270709v1_random 66860 +chrUn_KI270746v1 66486 +chr16_KI270856v1_alt 63982 +chr21_GL383578v2_alt 63917 +chrUn_KI270753v1 62944 +chr19_KI270868v1_alt 61734 +chr9_GL383542v1_alt 60032 +chr20_KI270871v1_alt 58661 +chr12_KI270836v1_alt 56134 +chr19_KI270865v1_alt 52969 +chr1_KI270764v1_alt 50258 +chrUn_KI270589v1 44474 +chr14_KI270726v1_random 43739 +chr19_KI270866v1_alt 43156 +chr22_KI270735v1_random 42811 +chr1_KI270711v1_random 42210 +chrUn_KI270745v1 41891 +chr1_KI270714v1_random 41717 +chr22_KI270732v1_random 41543 +chr1_KI270713v1_random 40745 +chrUn_KI270754v1 40191 +chr1_KI270710v1_random 40176 +chr12_KI270837v1_alt 40090 +chr9_KI270717v1_random 40062 +chr14_KI270724v1_random 39555 +chr9_KI270720v1_random 39050 +chr14_KI270723v1_random 38115 +chr9_KI270718v1_random 38054 +chrUn_KI270317v1 37690 +chr13_KI270842v1_alt 37287 +chrY_KI270740v1_random 37240 +chrUn_KI270755v1 36723 +chr8_KI270820v1_alt 36640 +chr1_KI270707v1_random 32032 +chrUn_KI270579v1 31033 +chrUn_KI270752v1 27745 +chrUn_KI270512v1 22689 +chrUn_KI270322v1 21476 +chrM 16569 +chrUn_GL000226v1 15008 +chrUn_KI270311v1 12399 +chrUn_KI270366v1 8320 +chrUn_KI270511v1 8127 +chrUn_KI270448v1 7992 +chrUn_KI270521v1 7642 +chrUn_KI270581v1 7046 +chrUn_KI270582v1 6504 +chrUn_KI270515v1 6361 +chrUn_KI270588v1 6158 +chrUn_KI270591v1 5796 +chrUn_KI270522v1 5674 +chrUn_KI270507v1 5353 +chrUn_KI270590v1 4685 +chrUn_KI270584v1 4513 +chrUn_KI270320v1 4416 +chrUn_KI270382v1 4215 +chrUn_KI270468v1 4055 +chrUn_KI270467v1 3920 +chrUn_KI270362v1 3530 +chrUn_KI270517v1 3253 +chrUn_KI270593v1 3041 +chrUn_KI270528v1 2983 +chrUn_KI270587v1 2969 +chrUn_KI270364v1 2855 +chrUn_KI270371v1 2805 +chrUn_KI270333v1 2699 +chrUn_KI270374v1 2656 +chrUn_KI270411v1 2646 +chrUn_KI270414v1 2489 +chrUn_KI270510v1 2415 +chrUn_KI270390v1 2387 +chrUn_KI270375v1 2378 +chrUn_KI270420v1 2321 +chrUn_KI270509v1 2318 +chrUn_KI270315v1 2276 +chrUn_KI270302v1 2274 +chrUn_KI270518v1 2186 +chrUn_KI270530v1 2168 +chrUn_KI270304v1 2165 +chrUn_KI270418v1 2145 +chrUn_KI270424v1 2140 +chrUn_KI270417v1 2043 +chrUn_KI270508v1 1951 +chrUn_KI270303v1 1942 +chrUn_KI270381v1 1930 +chrUn_KI270529v1 1899 +chrUn_KI270425v1 1884 +chrUn_KI270396v1 1880 +chrUn_KI270363v1 1803 +chrUn_KI270386v1 1788 +chrUn_KI270465v1 1774 +chrUn_KI270383v1 1750 +chrUn_KI270384v1 1658 +chrUn_KI270330v1 1652 +chrUn_KI270372v1 1650 +chrUn_KI270548v1 1599 +chrUn_KI270580v1 1553 +chrUn_KI270387v1 1537 +chrUn_KI270391v1 1484 +chrUn_KI270305v1 1472 +chrUn_KI270373v1 1451 +chrUn_KI270422v1 1445 +chrUn_KI270316v1 1444 +chrUn_KI270338v1 1428 +chrUn_KI270340v1 1428 +chrUn_KI270583v1 1400 +chrUn_KI270334v1 1368 +chrUn_KI270429v1 1361 +chrUn_KI270393v1 1308 +chrUn_KI270516v1 1300 +chrUn_KI270389v1 1298 +chrUn_KI270466v1 1233 +chrUn_KI270388v1 1216 +chrUn_KI270544v1 1202 +chrUn_KI270310v1 1201 +chrUn_KI270412v1 1179 +chrUn_KI270395v1 1143 +chrUn_KI270376v1 1136 +chrUn_KI270337v1 1121 +chrUn_KI270335v1 1048 +chrUn_KI270378v1 1048 +chrUn_KI270379v1 1045 +chrUn_KI270329v1 1040 +chrUn_KI270419v1 1029 +chrUn_KI270336v1 1026 +chrUn_KI270312v1 998 +chrUn_KI270539v1 993 +chrUn_KI270385v1 990 +chrUn_KI270423v1 981 +chrUn_KI270392v1 971 +chrUn_KI270394v1 970 diff --git a/jcvi/utils/data/instance.json b/jcvi/utils/data/instance.json new file mode 100644 index 00000000..32c35876 --- /dev/null +++ b/jcvi/utils/data/instance.json @@ -0,0 +1,42 @@ +{ + "AvailabilityZone": "us-west-2b", + "InstanceId": "", + "LaunchSpec": { + "BlockDeviceMappings": [ + { + "DeviceName": "/dev/sda1", + "Ebs": { + "VolumeSize": 80, + "VolumeType": "gp2" + } + } + ], + "EbsOptimized": true, + "IamInstanceProfile": { + "Arn": "", + "Name": "" + }, + "ImageId": "ami-1bc98663", + "InstanceType": "c4.8xlarge", + "KeyName": "mvrad-pdx-htang", + "Monitoring": { + "Enabled": false + }, + "SecurityGroupIds": [ + "sg-31982956", + "sg-76bd4f11" + ], + "SubnetId": "subnet-123ab865" + }, + "PrivateIpAddress": "", + "Volumes": [ + { + "Device": "/dev/sdf", + "VolumeId": "vol-aad57e1f" + }, + { + "Device": "/dev/sdg", + "VolumeId": "vol-0fee51beb98eee8c5" + } + ] +} diff --git a/jcvi/utils/db.py b/jcvi/utils/db.py new file mode 100644 index 00000000..6e2e4b94 --- /dev/null +++ b/jcvi/utils/db.py @@ -0,0 +1,334 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Connect to databases (Sybase, MySQL and PostgreSQL database backends) +""" +import os.path as op +import re +import sys + +from ..apps.base import ActionDispatcher, OptionParser, getusername, logger, sh +from ..formats.base import must_open +from ..utils.cbook import AutoVivification + + +# set up valid database connection params +valid_dbconn = AutoVivification() +for dbconn, port, module, host in zip( + ("Sybase", "MySQL", "PostgreSQL", "Oracle"), + (2025, 3306, 5432, 1521), + ("Sybase", "MySQLdb", "psycopg2", "cx_Oracle"), + ("SYBPROD", "mysql-lan-dev", "pgsql-lan-dev", "DBNAME.tacc.utexas.edu"), +): + valid_dbconn[dbconn]["port"] = port + valid_dbconn[dbconn]["module"] = module + valid_dbconn[dbconn]["hostname"] = host + + +def db_defaults(connector="Sybase"): + """ + JCVI legacy Sybase, MySQL and PostgreSQL database connection defaults + """ + return valid_dbconn[connector]["hostname"], "access", "access" + + +def get_profile( + sqshrc="~/.sqshrc", connector="Sybase", hostname=None, username=None, password=None +): + """ + get database, username, password from .sqshrc file e.g. + \set username="user" + """ + if connector == "Sybase": + shost, suser, spass = None, None, None + _ = lambda x: x.split("=")[-1].translate(None, "\"'").strip() + sqshrc = op.expanduser(sqshrc) + if op.exists(sqshrc): + for row in open(sqshrc): + row = row.strip() + if not row.startswith("\\set") or "prompt" in row: + continue + if "password" in row: + spass = _(row) + if "hostname" in row: + shost = _(row) + if "username" in row: + suser = _(row) + else: + print("[warning] file `{0}` not found".format(sqshrc), file=sys.stderr) + + if suser and spass: + username, password = suser, spass + if shost: + hostname = shost + + dhost, duser, dpass = db_defaults(connector=connector) + if not password: + username, password = duser, dpass + elif not username: + username = getusername() + + if not hostname: + hostname = dhost + + return hostname, username, password + + +def connect( + dbname, connector="Sybase", hostname=None, username=None, password=None, port=None +): + if None in (hostname, username, password): + hostname, username, password = get_profile( + hostname=hostname, username=username, password=password + ) + if port is None: + port = valid_dbconn[connector]["port"] + + dbconn = __import__(valid_dbconn[connector]["module"]) + if connector == "PostgreSQL": + dsn = "host={0} user={1} password={2} dbname={3} port={4}".format( + hostname, username, password, dbname, port + ) + dbh = dbconn.connect(dsn) + elif connector == "Oracle": + dsn = dbconn.makedsn(hostname, port, dbname) + dbh = dbconn.connect(username, password, dsn) + else: + dbh = dbconn.connect(hostname, username, password, dbname, port) + + cur = dbh.cursor() + return dbh, cur + + +def fetchall(cur, sql, connector=None): + cur.execute(sql) + return cur if connector == "Oracle" else cur.fetchall() + + +def execute(cur, sql): + cur.execute(sql) + + +def commit(dbh): + return dbh.commit() + + +def main(): + + actions = ( + ("libs", "get list of lib_ids to to run by pull"), + ("pull", "pull the sequences from the TIGR database"), + ("query", "run query using input from datafile"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def libs(args): + """ + %prog libs libfile + + Get list of lib_ids to be run by pull(). The SQL commands: + + select library.lib_id, library.name from library join bac on + library.bac_id=bac.id where bac.lib_name="Medicago"; + select seq_name from sequence where seq_name like 'MBE%' + and trash is null; + """ + p = OptionParser(libs.__doc__) + p.set_db_opts(dbname="track", credentials=None) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (libfile,) = args + + sqlcmd = ( + "select library.lib_id, library.name, bac.gb# from library join bac on " + + "library.bac_id=bac.id where bac.lib_name='Medicago'" + ) + cur = connect(opts.dbname) + results = fetchall(cur, sqlcmd) + + fw = open(libfile, "w") + for lib_id, name, gb in results: + name = name.translate(None, "\n") + if not gb: + gb = "None" + + print("|".join((lib_id, name, gb)), file=fw) + fw.close() + + +def pull(args): + """ + %prog pull libfile + + Pull the sequences using the first column in the libfile. + """ + p = OptionParser(pull.__doc__) + p.set_db_opts(dbname="mtg2", credentials=None) + p.add_argument( + "--frag", + default=False, + action="store_true", + help="The command to pull sequences from db", + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (libfile,) = args + + dbname = opts.dbname + frag = opts.frag + fp = open(libfile) + hostname, username, password = get_profile() + + for row in fp: + lib_id, name = row.split("|", 1) + sqlfile = lib_id + ".sql" + + if not op.exists(sqlfile): + fw = open(sqlfile, "w") + print( + "select seq_name from sequence where seq_name like" + + " '{0}%' and trash is null".format(lib_id), + file=fw, + ) + fw.close() + + if frag: + cmd = "pullfrag -D {0} -n {1}.sql -o {1} -q -S {2}".format( + dbname, lib_id, hostname + ) + cmd += " -U {0} -P {1}".format(username, password) + else: + cmd = "pullseq -D {0} -n {1}.sql -o {1} -q".format(dbname, lib_id) + sh(cmd) + + +to_commit_re = re.compile( + "|".join("^{0}".format(x) for x in ("update", "insert", "delete")), re.I +) + + +def to_commit(query): + """ + check if query needs to be committed (only if "update", "insert" or "delete") + """ + if re.search(to_commit_re, query): + return True + return None + + +def query(args): + """ + %prog query "SELECT feat_name FROM asm_feature WHERE feat_type = \\"{0}\\" AND end5 <= \\"{1}\\" AND end3 >= \\"{2}\\"" ::: datafile1 .... + + Script takes the data from tab-delimited datafile(s) and replaces the placeholders + in the query which is then executed. Depending upon the type of query, results are + either printed out (when running `select`) or not (when running `insert`, `update` + or `delete`) + + If the query contains quotes around field values, then these need to be escaped with \\ + """ + p = OptionParser(query.__doc__) + p.set_db_opts() + p.add_argument( + "--dryrun", + default=False, + action="store_true", + help="Don't commit to database. Just print queries", + ) + p.set_sep(help="Specify output field separator") + p.set_verbose(help="Print out all the queries") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) == 0: + sys.exit(not p.print_help()) + + fieldsep = opts.sep + + sep = ":::" + files = None + if sep in args: + sepidx = args.index(sep) + files = args[sepidx + 1 :] + args = args[:sepidx] + if not files: + files = [""] + + qrys = [] + qry = " ".join(args) + if ";" in qry: + for q in qry.split(";"): + if len(q.strip()) > 0: + qrys.append(q) + else: + qrys.append(qry) + + queries = set() + if files: + for datafile in files: + datafile = datafile.strip() + fp = must_open(datafile) + for row in fp: + for qry in qrys: + qry = qry.strip() + m = re.findall(r"{\d+}", qry) + if m: + mi = [int(x.strip("{}")) for x in m] + atoms = row.strip().split("\t") + assert max(mi) <= len( + atoms + ), "Number of columns in `datafile`({0})".format( + len(atoms) + ) + " != number of `placeholders`({0})".format( + len(m) + ) + natoms = [atoms[x] for x in mi] + for idx, (match, atom) in enumerate(zip(m, natoms)): + qry = qry.replace(match, atom) + queries.add(qry) + else: + for qry in qrys: + if re.search(r"{\d+}", qry): + logger.error( + "Query `%s` contains placeholders, no datafile(s) specified", qry + ) + sys.exit() + queries.add(qry) + + if not opts.dryrun: + fw = must_open(opts.outfile, "w") + dbh, cur = connect( + opts.dbname, + connector=opts.dbconn, + hostname=opts.hostname, + username=opts.username, + password=opts.password, + port=opts.port, + ) + cflag = None + for qry in queries: + if opts.dryrun or opts.verbose: + print(qry) + if not opts.dryrun: + if to_commit(qry): + execute(cur, qry) + cflag = True + else: + results = fetchall(cur, qry, connector=opts.dbconn) + for result in results: + print(fieldsep.join([str(x) for x in result]), file=fw) + if not opts.dryrun and cflag: + commit(dbh) + + +if __name__ == "__main__": + main() diff --git a/jcvi/utils/ez_setup.py b/jcvi/utils/ez_setup.py new file mode 100644 index 00000000..3dfd4d51 --- /dev/null +++ b/jcvi/utils/ez_setup.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# +# ez_setup.py +# utils +# +# Created by Haibao Tang on 11/24/20 +# Copyright © 2021 Haibao Tang. All rights reserved. +# + +""" +Identify the best downloading mechanism for a given URL. +Credits: https://pypi.org/project/ez_setup/ +""" + + +import os +import platform +import subprocess + +from urllib.request import urlopen + + +def download_file_powershell(url, target, cookies=None): + """ + Download the file at url to target using Powershell (which will validate + trust). Raise an exception if the command cannot complete. + """ + if cookies: + raise NotImplementedError + target = os.path.abspath(target) + cmd = [ + "powershell", + "-Command", + f"(new-object System.Net.WebClient).DownloadFile({url}, {target})", + ] + subprocess.check_call(cmd) + + +def has_powershell(): + if platform.system() != "Windows": + return False + cmd = ["powershell", "-Command", "echo test"] + devnull = open(os.path.devnull, "wb") + try: + try: + subprocess.check_call(cmd, stdout=devnull, stderr=devnull) + except FileNotFoundError: + return False + finally: + devnull.close() + return True + + +download_file_powershell.viable = has_powershell + + +def download_file_curl(url, target, cookies=None): + cmd = ["curl", url, "--output", target] + # https://github.com/tanghaibao/jcvi/issues/307 + # When downloading Phytozome directory listing, there are multiple redirects + # before we hit the index page. Natually we'd follow the redirects, similar + # to the default behavior of wget + cmd += ["-L"] # follow redirect + if url.startswith("ftp:"): + cmd += ["-P", "-"] + if cookies: + cmd += ["-b", cookies] + subprocess.check_call(cmd) + + +def has_curl(): + cmd = ["curl", "--version"] + devnull = open(os.path.devnull, "wb") + try: + try: + subprocess.check_call(cmd, stdout=devnull, stderr=devnull) + except FileNotFoundError: + return False + finally: + devnull.close() + return True + + +download_file_curl.viable = has_curl + + +def download_file_wget(url, target, cookies=None): + cmd = ["wget", url, "--output-document", target] + cmd += ["--no-check-certificate"] + if url.startswith("ftp:"): + cmd += ["--passive-ftp"] + if cookies: + cmd += ["--load-cookies", cookies] + subprocess.check_call(cmd) + + +def has_wget(): + cmd = ["wget", "--version"] + devnull = open(os.path.devnull, "wb") + try: + try: + subprocess.check_call(cmd, stdout=devnull, stderr=devnull) + except (FileNotFoundError, NotADirectoryError): + return False + except subprocess.CalledProcessError: + return False + finally: + devnull.close() + return True + + +download_file_wget.viable = has_wget + + +def download_file_insecure(url, target, cookies=None): + """ + Use Python to download the file, even though it cannot authenticate the + connection. + """ + if cookies: + raise NotImplementedError + src = dst = None + try: + src = urlopen(url) + # Read/write all in one block, so we don't create a corrupt file + # if the download is interrupted. + data = src.read() + dst = open(target, "wb") + dst.write(data) + finally: + if src: + src.close() + if dst: + dst.close() + + +download_file_insecure.viable = lambda: True + +ALL_DOWNLOADERS = [ + ("wget", download_file_wget), + ("curl", download_file_curl), + ("powershell", download_file_powershell), + ("insecure", download_file_insecure), +] + + +def get_best_downloader(downloader=None): + """Choose among a set of 4 popular downloaders, in the following order: + - wget + - curl + - powershell + - insecure (Python) + + Args: + downloader (str, optional): Use a given downloader. One of wget|curl|powershell|insecure. + Defaults to None. + + Returns: + Download function: The downloader function that accepts as parameters url, target + and cookies. + """ + for dl_name, dl in ALL_DOWNLOADERS: + if downloader and dl_name != downloader: + continue + if dl.viable(): + return dl diff --git a/jcvi/utils/grouper.py b/jcvi/utils/grouper.py new file mode 100755 index 00000000..0d158aa9 --- /dev/null +++ b/jcvi/utils/grouper.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Disjoint set data structure +Author: Michael Droettboom +""" + + +class Grouper(object): + """ + This class provides a lightweight way to group arbitrary objects + together into disjoint sets when a full-blown graph data structure + would be overkill. + + Objects can be joined using .join(), tested for connectedness + using .joined(), and all disjoint sets can be retrieved using list(g) + The objects being joined must be hashable. + + >>> g = Grouper() + >>> g.join('a', 'b') + >>> g.join('b', 'c') + >>> g.join('d', 'e') + >>> list(g) + [['a', 'b', 'c'], ['d', 'e']] + >>> g.joined('a', 'b') + True + >>> g.joined('a', 'c') + True + >>> 'f' in g + False + >>> g.joined('a', 'd') + False + >>> del g['b'] + >>> list(g) + [['a', 'c'], ['d', 'e']] + """ + + def __init__(self, init=[]): + mapping = self._mapping = {} + for x in init: + mapping[x] = [x] + + def join(self, a, *args): + """ + Join given arguments into the same set. Accepts one or more arguments. + """ + mapping = self._mapping + set_a = mapping.setdefault(a, [a]) + + for arg in args: + set_b = mapping.get(arg) + if set_b is None: + set_a.append(arg) + mapping[arg] = set_a + elif set_b is not set_a: + if len(set_b) > len(set_a): + set_a, set_b = set_b, set_a + set_a.extend(set_b) + for elem in set_b: + mapping[elem] = set_a + + def joined(self, a, b): + """ + Returns True if a and b are members of the same set. + """ + mapping = self._mapping + try: + return mapping[a] is mapping[b] + except KeyError: + return False + + def __iter__(self): + """ + Returns an iterator returning each of the disjoint sets as a list. + """ + seen = set() + for elem, group in self._mapping.items(): + if elem not in seen: + yield group + seen.update(group) + + def __getitem__(self, key): + """ + Returns the set that a certain key belongs. + """ + return tuple(self._mapping[key]) + + def __contains__(self, key): + return key in self._mapping + + def __len__(self): + group = set() + for v in self._mapping.values(): + group.update([tuple(v)]) + return len(group) + + def __delitem__(self, key): + group = self._mapping[key] + group.remove(key) + del self._mapping[key] + + @property + def num_members(self): + return sum(len(x) for x in self) + + def keys(self): + return self._mapping.keys() + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/jcvi/utils/orderedcollections.py b/jcvi/utils/orderedcollections.py new file mode 100644 index 00000000..6fcb7386 --- /dev/null +++ b/jcvi/utils/orderedcollections.py @@ -0,0 +1,297 @@ +# Copyright (c) 2009 Raymond Hettinger +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +from bisect import bisect_left, bisect_right +from collections import defaultdict, OrderedDict +from urllib.parse import parse_qsl + +try: + from collections.abc import Callable +except ImportError: + from collections import Callable + + +class DefaultOrderedDict(OrderedDict): + def __init__(self, default_factory=None, *a, **kw): + if default_factory is not None and not isinstance(default_factory, Callable): + raise TypeError("first argument must be callable") + OrderedDict.__init__(self, *a, **kw) + self.default_factory = default_factory + + def __getitem__(self, key): + try: + return OrderedDict.__getitem__(self, key) + except KeyError: + return self.__missing__(key) + + def __missing__(self, key): + if self.default_factory is None: + raise KeyError(key) + self[key] = value = self.default_factory() + return value + + def __reduce__(self): + if self.default_factory is None: + args = tuple() + else: + args = (self.default_factory,) + return type(self), args, None, None, self.items() + + def copy(self): + return self.__copy__() + + def __copy__(self): + return type(self)(self.default_factory, self) + + def __deepcopy__(self, memo): + import copy + + return type(self)(self.default_factory, copy.deepcopy(self.items())) + + def __repr__(self): + return OrderedDict.__repr__(self) + + +def parse_qs(qs, separator=";", keep_attr_order=True): + """ + Kind of like urlparse.parse_qs, except returns an ordered dict. + Also avoids replicating that function's bad habit of overriding the + built-in 'dict' type. + + Taken from below with modification: + + """ + od = DefaultOrderedDict(list) if keep_attr_order else defaultdict(list) + # Python versions earlier than Python 3.9.2 allowed using both ; and & + # as query parameter separator. This has been changed in 3.9.2 to allow + # only a single separator key, with & as the default separator. + try: + for name, value in parse_qsl(qs, separator=separator): + od[name].append(value) + except TypeError: + for name, value in parse_qsl(qs): + od[name].append(value) + + return od + + +""" +Recipe from . +""" + + +class SortedCollection(object): + """Sequence sorted by a key function. + + SortedCollection() is much easier to work with than using bisect() directly. + It supports key functions like those use in sorted(), min(), and max(). + The result of the key function call is saved so that keys can be searched + efficiently. + + Instead of returning an insertion-point which can be hard to interpret, the + five find-methods return a specific item in the sequence. They can scan for + exact matches, the last item less-than-or-equal to a key, or the first item + greater-than-or-equal to a key. + + Once found, an item's ordinal position can be located with the index() method. + New items can be added with the insert() and insert_right() methods. + Old items can be deleted with the remove() method. + + The usual sequence methods are provided to support indexing, slicing, + length lookup, clearing, copying, forward and reverse iteration, contains + checking, item counts, item removal, and a nice looking repr. + + Finding and indexing are O(log n) operations while iteration and insertion + are O(n). The initial sort is O(n log n). + + The key function is stored in the 'key' attibute for easy introspection or + so that you can assign a new key function (triggering an automatic re-sort). + + In short, the class was designed to handle all of the common use cases for + bisect but with a simpler API and support for key functions. + + >>> from pprint import pprint + >>> from operator import itemgetter + + >>> s = SortedCollection(key=itemgetter(2)) + >>> for record in [ + ... ('roger', 'young', 30), + ... ('angela', 'jones', 28), + ... ('bill', 'smith', 22), + ... ('david', 'thomas', 32)]: + ... s.insert(record) + + >>> pprint(list(s)) # show records sorted by age + [('bill', 'smith', 22), + ('angela', 'jones', 28), + ('roger', 'young', 30), + ('david', 'thomas', 32)] + + >>> s.find_le(29) # find oldest person aged 29 or younger + ('angela', 'jones', 28) + >>> s.find_lt(28) # find oldest person under 28 + ('bill', 'smith', 22) + >>> s.find_gt(28) # find youngest person over 28 + ('roger', 'young', 30) + + >>> r = s.find_ge(32) # find youngest person aged 32 or older + >>> s.index(r) # get the index of their record + 3 + >>> s[3] # fetch the record at that index + ('david', 'thomas', 32) + + >>> s.key = itemgetter(0) # now sort by first name + >>> pprint(list(s)) + [('angela', 'jones', 28), + ('bill', 'smith', 22), + ('david', 'thomas', 32), + ('roger', 'young', 30)] + + """ + + def __init__(self, iterable=(), key=None): + self._given_key = key + key = (lambda x: x) if key is None else key + decorated = sorted((key(item), item) for item in iterable) + self._keys = [k for k, item in decorated] + self._items = [item for k, item in decorated] + self._key = key + + def _getkey(self): + return self._key + + def _setkey(self, key): + if key is not self._key: + self.__init__(self._items, key=key) + + def _delkey(self): + self._setkey(None) + + key = property(_getkey, _setkey, _delkey, "key function") + + def clear(self): + self.__init__([], self._key) + + def copy(self): + return self.__class__(self, self._key) + + def __len__(self): + return len(self._items) + + def __getitem__(self, i): + return self._items[i] + + def __iter__(self): + return iter(self._items) + + def __reversed__(self): + return reversed(self._items) + + def __repr__(self): + return "%s(%r, key=%s)" % ( + self.__class__.__name__, + self._items, + getattr(self._given_key, "__name__", repr(self._given_key)), + ) + + def __reduce__(self): + return self.__class__, (self._items, self._given_key) + + def __contains__(self, item): + k = self._key(item) + i = bisect_left(self._keys, k) + j = bisect_right(self._keys, k) + return item in self._items[i:j] + + def index(self, item): + """Find the position of an item. Raise ValueError if not found.""" + k = self._key(item) + i = bisect_left(self._keys, k) + j = bisect_right(self._keys, k) + return self._items[i:j].index(item) + i + + def count(self, item): + """Return number of occurrences of item""" + k = self._key(item) + i = bisect_left(self._keys, k) + j = bisect_right(self._keys, k) + return self._items[i:j].count(item) + + def insert(self, item): + """Insert a new item. If equal keys are found, add to the left""" + k = self._key(item) + i = bisect_left(self._keys, k) + self._keys.insert(i, k) + self._items.insert(i, item) + + def insert_right(self, item): + """Insert a new item. If equal keys are found, add to the right""" + k = self._key(item) + i = bisect_right(self._keys, k) + self._keys.insert(i, k) + self._items.insert(i, item) + + def remove(self, item): + """Remove first occurence of item. Raise ValueError if not found""" + i = self.index(item) + del self._keys[i] + del self._items[i] + + def find(self, item): + """Return first item with a key == item. Raise ValueError if not found.""" + k = self._key(item) + i = bisect_left(self._keys, k) + if i != len(self) and self._keys[i] == k: + return self._items[i] + raise ValueError("No item found with key equal to: %r" % (k,)) + + def find_le(self, item): + """Return last item with a key <= item. Raise ValueError if not found.""" + k = self._key(item) + i = bisect_right(self._keys, k) + if i: + return self._items[i - 1] + raise ValueError("No item found with key at or below: %r" % (k,)) + + def find_lt(self, item): + """Return last item with a key < item. Raise ValueError if not found.""" + k = self._key(item) + i = bisect_left(self._keys, k) + if i: + return self._items[i - 1] + raise ValueError("No item found with key below: %r" % (k,)) + + def find_ge(self, item): + """Return first item with a key >= equal to item. Raise ValueError if not found""" + k = self._key(item) + i = bisect_left(self._keys, k) + if i != len(self): + return self._items[i] + raise ValueError("No item found with key at or above: %r" % (k,)) + + def find_gt(self, item): + """Return first item with a key > item. Raise ValueError if not found""" + k = self._key(item) + i = bisect_right(self._keys, k) + if i != len(self): + return self._items[i] + raise ValueError("No item found with key above: %r" % (k,)) diff --git a/jcvi/utils/range.py b/jcvi/utils/range.py new file mode 100644 index 00000000..d12043e2 --- /dev/null +++ b/jcvi/utils/range.py @@ -0,0 +1,529 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +This script implements algorithm for finding intersecting rectangles, +both on the 2D dotplot and 1D-projection + +`range_chain` implements the exon-chain algorithm +""" +import sys + +from collections import namedtuple, defaultdict +from itertools import groupby + +from more_itertools import pairwise + + +LEFT, RIGHT = 0, 1 +Range = namedtuple("Range", "seqid start end score id") + + +def range_parse(s): + """ + >>> range_parse("chr1:1000-1") + Range(seqid='chr1', start=1, end=1000, score=0, id=0) + """ + chr, se = s.split(":") + start, end = se.split("-") + start, end = int(start), int(end) + if start > end: + start, end = end, start + + return Range(chr, start, end, 0, 0) + + +def range_intersect(a, b, extend=0): + """ + Returns the intersection between two reanges. + + >>> range_intersect((30, 45), (55, 65)) + >>> range_intersect((48, 65), (45, 55)) + [48, 55] + """ + a_min, a_max = a + if a_min > a_max: + a_min, a_max = a_max, a_min + b_min, b_max = b + if b_min > b_max: + b_min, b_max = b_max, b_min + + if a_max + extend < b_min or b_max + extend < a_min: + return None + i_min = max(a_min, b_min) + i_max = min(a_max, b_max) + if i_min > i_max + extend: + return None + + return [i_min, i_max] + + +def ranges_intersect(rset): + """ + Recursively calls the range_intersect() - pairwise version. + + >>> ranges_intersect([(48, 65), (45, 55), (50, 56)]) + [50, 55] + """ + if not rset: + return None + + a = rset[0] + for b in rset[1:]: + if not a: + return None + a = range_intersect(a, b) + + return a + + +def range_overlap(a, b, ratio=False): + """ + Returns whether two ranges overlap. Set percentage=True returns overlap + ratio over the shorter range of the two. + + >>> range_overlap(("1", 30, 45), ("1", 41, 55)) + 5 + >>> range_overlap(("1", 21, 45), ("1", 41, 75), ratio=True) + 0.2 + >>> range_overlap(("1", 30, 45), ("1", 15, 55)) + 16 + >>> range_overlap(("1", 30, 45), ("1", 15, 55), ratio=True) + 1.0 + >>> range_overlap(("1", 30, 45), ("1", 57, 68)) + 0 + >>> range_overlap(("1", 30, 45), ("2", 42, 55)) + 0 + >>> range_overlap(("1", 30, 45), ("2", 42, 55), ratio=True) + 0.0 + """ + a_chr, a_min, a_max = a + b_chr, b_min, b_max = b + a_min, a_max = sorted((a_min, a_max)) + b_min, b_max = sorted((b_min, b_max)) + shorter = min((a_max - a_min), (b_max - b_min)) + 1 + # must be on the same chromosome + if a_chr != b_chr: + ov = 0 + else: + ov = min(shorter, (a_max - b_min + 1), (b_max - a_min + 1)) + ov = max(ov, 0) + if ratio: + ov /= float(shorter) + return ov + + +def range_distance(a, b, distmode="ss"): + """ + Returns the distance between two ranges. + + distmode is ss, se, es, ee and sets the place on read one and two to + measure the distance (s = start, e = end) + + >>> range_distance(("1", 30, 45, '+'), ("1", 45, 55, '+')) + (26, '++') + >>> range_distance(("1", 30, 45, '-'), ("1", 57, 68, '-')) + (39, '--') + >>> range_distance(("1", 30, 42, '-'), ("1", 45, 55, '+')) + (26, '-+') + >>> range_distance(("1", 30, 42, '+'), ("1", 45, 55, '-'), distmode='ee') + (2, '+-') + """ + assert distmode in ("ss", "ee") + + a_chr, a_min, a_max, a_strand = a + b_chr, b_min, b_max, b_strand = b + # must be on the same chromosome + if a_chr != b_chr: + dist = -1 + # elif range_overlap(a[:3], b[:3]): + # dist = 0 + else: + # If the two ranges do not overlap, check stranded-ness and distance + if a_min > b_min: + a_min, b_min = b_min, a_min + a_max, b_max = b_max, a_max + a_strand, b_strand = b_strand, a_strand + + if distmode == "ss": + dist = b_max - a_min + 1 + elif distmode == "ee": + dist = b_min - a_max - 1 + + orientation = a_strand + b_strand + + return dist, orientation + + +def range_minmax(ranges): + """ + Returns the span of a collection of ranges where start is the smallest of + all starts, and end is the largest of all ends. + + >>> ranges = [(30, 45), (40, 50), (10, 100)] + >>> range_minmax(ranges) + (10, 100) + """ + rmin = min(ranges)[0] + rmax = max(ranges, key=lambda x: x[1])[1] + return rmin, rmax + + +def range_closest(ranges, b, left=True): + """ + Returns the range that's closest to the given position. Notice that the + behavior is to return ONE closest range to the left end (if left is True). + This is a SLOW method. + + >>> ranges = [("1", 30, 40), ("1", 33, 35), ("1", 10, 20)] + >>> b = ("1", 22, 25) + >>> range_closest(ranges, b) + ('1', 10, 20) + >>> range_closest(ranges, b, left=False) + ('1', 33, 35) + >>> b = ("1", 2, 5) + >>> range_closest(ranges, b) + """ + from jcvi.utils.orderedcollections import SortedCollection + + key = (lambda x: x) if left else (lambda x: (x[0], x[2], x[1])) + rr = SortedCollection(ranges, key=key) + try: + if left: + s = rr.find_le(b) + assert key(s) <= key(b), (s, b) + else: + s = rr.find_ge(b) + assert key(s) >= key(b), (s, b) + except ValueError: + s = None + + return s + + +def range_interleave(ranges, sizes={}, empty=False): + """ + Returns the ranges in between the given ranges. + + >>> ranges = [("1", 30, 40), ("1", 45, 50), ("1", 10, 30)] + >>> range_interleave(ranges) + [('1', 41, 44)] + >>> ranges = [("1", 30, 40), ("1", 42, 50)] + >>> range_interleave(ranges) + [('1', 41, 41)] + >>> range_interleave(ranges, sizes={"1": 70}) + [('1', 1, 29), ('1', 41, 41), ('1', 51, 70)] + """ + ranges = range_merge(ranges) + interleaved_ranges = [] + + for ch, cranges in groupby(ranges, key=lambda x: x[0]): + cranges = list(cranges) + size = sizes.get(ch, None) + if size: + ch, astart, aend = cranges[0] + if astart > 1: + interleaved_ranges.append((ch, 1, astart - 1)) + elif empty: + interleaved_ranges.append(None) + + for a, b in pairwise(cranges): + ch, astart, aend = a + ch, bstart, bend = b + istart, iend = aend + 1, bstart - 1 + if istart <= iend: + interleaved_ranges.append((ch, istart, iend)) + elif empty: + interleaved_ranges.append(None) + + if size: + ch, astart, aend = cranges[-1] + if aend < size: + interleaved_ranges.append((ch, aend + 1, size)) + elif empty: + interleaved_ranges.append(None) + + return interleaved_ranges + + +def range_merge(ranges, dist=0): + """ + Returns merged range. Similar to range_union, except this returns + new ranges. + + >>> ranges = [("1", 30, 45), ("1", 40, 50), ("1", 10, 50)] + >>> range_merge(ranges) + [('1', 10, 50)] + >>> ranges = [("1", 30, 40), ("1", 45, 50)] + >>> range_merge(ranges) + [('1', 30, 40), ('1', 45, 50)] + >>> ranges = [("1", 30, 40), ("1", 45, 50)] + >>> range_merge(ranges, dist=5) + [('1', 30, 50)] + """ + if not ranges: + return [] + + ranges.sort() + + cur_range = list(ranges[0]) + merged_ranges = [] + for r in ranges[1:]: + # open new range if start > cur_end or seqid != cur_seqid + if r[1] - cur_range[2] > dist or r[0] != cur_range[0]: + merged_ranges.append(tuple(cur_range)) + cur_range = list(r) + else: + cur_range[2] = max(cur_range[2], r[2]) + merged_ranges.append(tuple(cur_range)) + + return merged_ranges + + +def range_union(ranges): + """ + Returns total size of ranges, expect range as (chr, left, right) + + >>> ranges = [("1", 30, 45), ("1", 40, 50), ("1", 10, 50)] + >>> range_union(ranges) + 41 + >>> ranges = [("1", 30, 45), ("2", 40, 50)] + >>> range_union(ranges) + 27 + >>> ranges = [("1", 30, 45), ("1", 45, 50)] + >>> range_union(ranges) + 21 + >>> range_union([]) + 0 + """ + if not ranges: + return 0 + + ranges.sort() + + total_len = 0 + cur_chr, cur_left, cur_right = ranges[0] # left-most range + for r in ranges: + # open new range if left > cur_right or chr != cur_chr + if r[1] > cur_right or r[0] != cur_chr: + total_len += cur_right - cur_left + 1 + cur_chr, cur_left, cur_right = r + else: + # update cur_right + cur_right = max(r[2], cur_right) + + # the last one + total_len += cur_right - cur_left + 1 + + return total_len + + +def range_span(ranges): + """ + Returns the total span between the left most range to the right most range. + + >>> ranges = [("1", 30, 45), ("1", 40, 50), ("1", 10, 50)] + >>> range_span(ranges) + 41 + >>> ranges = [("1", 30, 45), ("2", 40, 50)] + >>> range_span(ranges) + 27 + >>> ranges = [("1", 30, 45), ("1", 45, 50)] + >>> range_span(ranges) + 21 + >>> range_span([]) + 0 + """ + if not ranges: + return 0 + + ranges.sort() + ans = 0 + for seq, lt in groupby(ranges, key=lambda x: x[0]): + lt = list(lt) + ans += max(max(lt)[1:]) - min(min(lt)[1:]) + 1 + return ans + + +def _make_endpoints(ranges): + assert ranges, "Ranges cannot be empty" + endpoints = [] + + for i, (seqid, start, end, score, id) in enumerate(ranges): + endpoints.append((seqid, start, LEFT, i, score)) + endpoints.append((seqid, end, RIGHT, i, score)) + + return sorted(endpoints) + + +def range_piles(ranges): + """ + Return piles of intervals that overlap. The piles are only interrupted by + regions of zero coverage. + + >>> ranges = [Range("2", 0, 1, 3, 0), Range("2", 1, 4, 3, 1), Range("3", 5, 7, 3, 2)] + >>> list(range_piles(ranges)) + [[0, 1], [2]] + """ + endpoints = _make_endpoints(ranges) + + for seqid, ends in groupby(endpoints, lambda x: x[0]): + active = [] + depth = 0 + for seqid, pos, leftright, i, score in ends: + if leftright == LEFT: + active.append(i) + depth += 1 + else: + depth -= 1 + + if depth == 0 and active: + yield active + active = [] + + +def range_conflict(ranges, depth=1): + """ + Find intervals that are overlapping in 1-dimension. + Return groups of block IDs that are in conflict. + + >>> ranges = [Range("2", 0, 1, 3, 0), Range("2", 1, 4, 3, 1), Range("3", 5, 7, 3, 2)] + >>> list(range_conflict(ranges)) + [(0, 1)] + """ + overlap = set() + active = set() + endpoints = _make_endpoints(ranges) + + for seqid, ends in groupby(endpoints, lambda x: x[0]): + active.clear() + for seqid, pos, leftright, i, score in ends: + if leftright == LEFT: + active.add(i) + else: + active.remove(i) + + if len(active) > depth: + overlap.add(tuple(sorted(active))) + + for ov in overlap: + yield ov + + +def range_chain(ranges): + """ + Take list of weighted intervals, find non-overlapping set with max weight. + We proceed with each end point (sorted by their relative positions). + + The input are a list of ranges of the form (start, stop, score), output is + subset of the non-overlapping ranges that give the highest score, score + + >>> ranges = [Range("1", 0, 9, 22, 0), Range("1", 3, 18, 24, 1), Range("1", 10, 28, 20, 2)] + >>> range_chain(ranges) + ([Range(seqid='1', start=0, end=9, score=22, id=0), Range(seqid='1', start=10, end=28, score=20, id=2)], 42) + >>> ranges = [Range("2", 0, 1, 3, 0), Range("2", 1, 4, 3, 1), Range("3", 5, 7, 3, 2)] + >>> range_chain(ranges) + ([Range(seqid='2', start=0, end=1, score=3, id=0), Range(seqid='3', start=5, end=7, score=3, id=2)], 6) + """ + endpoints = _make_endpoints(ranges) + + # stores the left end index for quick retrieval + left_index = {} + # dynamic programming, each entry [score, from_index, which_chain] + scores = [] + + for i, (seqid, pos, leftright, j, score) in enumerate(endpoints): + + cur_score = [0, -1, -1] if i == 0 else scores[-1][:] + + if leftright == LEFT: + left_index[j] = i + + else: # this is right end of j-th interval + # update if chaining j-th interval gives a better score + left_j = left_index[j] + chain_score = scores[left_j][0] + score + if chain_score > cur_score[0]: + cur_score = [chain_score, left_j, j] + + scores.append(cur_score) + + chains = [] + score, last, chain_id = scores[-1] # start backtracking + while last != -1: + if chain_id != -1: + chains.append(chain_id) + _, last, chain_id = scores[last] + + chains.reverse() + + selected = [ranges[x] for x in chains] + + return selected, score + + +def ranges_depth(ranges, sizes, verbose=True): + """ + Allow triple (seqid, start, end) rather than just tuple (start, end) + """ + ranges.sort() + for seqid, rrs in groupby(ranges, key=lambda x: x[0]): + rrs = [(a, b) for (s, a, b) in rrs] + size = sizes[seqid] + ds, depthdetails = range_depth(rrs, size, verbose=verbose) + depthdetails = [(seqid, s, e, d) for s, e, d in depthdetails] + yield depthdetails + + +def range_depth(ranges, size, verbose=True): + """ + Overlay ranges on [start, end], and summarize the ploidy of the intervals. + """ + from jcvi.utils.cbook import percentage + + # Make endpoints + endpoints = [] + for a, b in ranges: + endpoints.append((a, LEFT)) + endpoints.append((b, RIGHT)) + endpoints.sort() + vstart, vend = min(endpoints)[0], max(endpoints)[0] + + assert 0 <= vstart < size + assert 0 <= vend < size + + depth = 0 + depthstore = defaultdict(int) + depthstore[depth] += vstart + depthdetails = [(0, vstart, depth)] + + for (a, atag), (b, btag) in pairwise(endpoints): + if atag == LEFT: + depth += 1 + elif atag == RIGHT: + depth -= 1 + depthstore[depth] += b - a + depthdetails.append((a, b, depth)) + + assert btag == RIGHT + depth -= 1 + + assert depth == 0 + depthstore[depth] += size - vend + depthdetails.append((vend, size, depth)) + + assert sum(depthstore.values()) == size + if verbose: + for depth, count in sorted(depthstore.items()): + print( + "Depth {0}: {1}".format(depth, percentage(count, size)), file=sys.stderr + ) + + return depthstore, depthdetails + + +if __name__ == "__main__": + + import doctest + + doctest.testmod() diff --git a/jcvi/utils/table.py b/jcvi/utils/table.py new file mode 100644 index 00000000..957d958e --- /dev/null +++ b/jcvi/utils/table.py @@ -0,0 +1,145 @@ +""" +Routines to summarize and report tabular data. +""" + + +def comment_banner(s, width=50): + line = "#" * width + return "\n".join((line, "#", "# " + s.strip(), "#", line)) + + +def banner(header, rows, major="=", minor="-"): + formatted = [header] + rows + rulersize = max(max(len(z) for z in x.splitlines()) for x in formatted) + table_edge = major * rulersize + table_sep = minor * rulersize + rows = "\n".join(rows) + + return "\n".join((table_edge, header, table_sep, rows, table_sep)) + + +def loadtable(header, rows, thousands=True): + """ + Print a tabular output, with horizontal separators + """ + formatted = load_csv(header, rows, sep=" ", thousands=thousands) + header, rows = formatted[0], formatted[1:] + + return banner(header, rows) + + +def tabulate(d, transpose=False, thousands=True, key_fun=None, sep=",", align=True): + """ + d is a dictionary, keyed by tuple(A, B). + Goal is to put A in rows, B in columns, report data in table form. + + >>> d = {(1,'a'):3, (1,'b'):4, (2,'a'):5, (2,'b'):0} + >>> print(tabulate(d)) + =========== + o a b + ----------- + 1 3 4 + 2 5 0 + ----------- + >>> print(tabulate(d, transpose=True)) + =========== + o 1 2 + ----------- + a 3 5 + b 4 0 + ----------- + """ + pairs = d.keys() + rows, cols = zip(*pairs) + if transpose: + rows, cols = cols, rows + + rows = sorted(set(rows)) + cols = sorted(set(cols)) + header = ["o"] + list(cols) + table = [] + for r in rows: + combo = [(r, c) for c in cols] + if transpose: + combo = [(c, r) for (r, c) in combo] + data = [d.get(x, "n/a") for x in combo] + data = ["{0:.1f}".format(x) if isinstance(x, float) else x for x in data] + if key_fun: + data = [key_fun(x) for x in data] + table.append([str(r)] + data) + + if not align: + formatted = load_csv(header, table, sep=sep) + return "\n".join(formatted) + + return loadtable(header, table, thousands=thousands) + + +def load_csv(header, contents, sep=",", thousands=False, align=True): + + from jcvi.formats.base import is_number + from jcvi.utils.cbook import thousands as th + + allcontents = [header] + contents if header else contents + cols = len(contents[0]) + for content in allcontents: + assert len(content) == cols + + # Stringify the contents + for i, content in enumerate(allcontents): + if thousands: + content = [int(x) if is_number(x, cast=int) else x for x in content] + content = [ + th(x) if (is_number(x, cast=int) and x >= 1000) else x for x in content + ] + allcontents[i] = [str(x) for x in content] + + colwidths = [max(len(x[i]) for x in allcontents) for i in range(cols)] + sep += " " + formatted_contents = [] + for content in allcontents: + rjusted = ( + [x.rjust(cw) for x, cw in zip(content, colwidths)] if align else content + ) + formatted = sep.join(rjusted) + formatted_contents.append(formatted) + + return formatted_contents + + +def write_csv( + header, + contents, + sep=",", + filename="stdout", + thousands=False, + tee=False, + align=True, + comment=False, +): + """ + Write csv that are aligned with the column headers. + + >>> header = ["x_value", "y_value"] + >>> contents = [(1, 100), (2, 200)] + >>> write_csv(header, contents) + x_value, y_value + 1, 100 + 2, 200 + """ + from jcvi.formats.base import must_open + + formatted = load_csv(header, contents, sep=sep, thousands=thousands, align=align) + if comment: + formatted[0] = "#" + formatted[0][1:] + formatted = "\n".join(formatted) + output = must_open(filename, "w") + print(formatted, file=output) + if tee and filename != "stdout": + print(formatted) + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/jcvi/utils/taxonomy.py b/jcvi/utils/taxonomy.py new file mode 100644 index 00000000..dd79fec4 --- /dev/null +++ b/jcvi/utils/taxonomy.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +r""" +From my blog post: + + +Example: +>>> mylist = [3702, 3649, 3694, 3880] +>>> t = TaxIDTree(mylist) +>>> print t +(((Carica_papaya,Arabidopsis_thaliana)Brassicales,(Medicago_truncatula,Populus_trichocarpa)fabids)rosids); +>>> t.print_tree() + + /-Carica_papaya + + /---| + + | \-Arabidopsis_thaliana + +---- /---| + + | /-Medicago_truncatula + + \---| + + \-Populus_trichocarpa +""" +import sys +import time + +from functools import lru_cache + +from urllib.request import urlopen +from urllib.error import HTTPError, URLError + +from ete3 import Tree + +from ClientForm import ParseResponse +from BeautifulSoup import BeautifulSoup + +from ..apps.base import ActionDispatcher, OptionParser, logger + + +URL = "http://itol.embl.de/other_trees.shtml" + + +class TaxIDTree(object): + def __init__(self, list_of_taxids): + # If only one taxid provided, get full tree with nameExp + # else, get default tree + if isinstance(list_of_taxids, int): # single taxon + list_of_taxids = [list_of_taxids] + form_element_id = "nameExp" + else: + form_element_id = "nameCol" + + # the data to send in + form_data = "\n".join(str(x) for x in list_of_taxids) + + success = False + while not success: + try: + response = urlopen(URL) + success = True + except (URLError, HTTPError, RuntimeError) as e: + logger.error(e) + logger.debug("wait 5 seconds to reconnect...") + time.sleep(5) + + forms = ParseResponse(response, backwards_compat=False) + form = forms[0] + + form["ncbiIDs"] = form_data + page = urlopen(form.click()).read() + soup = BeautifulSoup(page) + + self.newick = "" + for element in soup("textarea"): + + if element["id"] == form_element_id: + self.newick = str(element.contents[0]) + + if self.newick == "": + print(soup) + + def __str__(self): + return self.newick + + def print_tree(self): + t = Tree(self.newick, format=8) + print(t) + + +def get_names(list_of_taxids): + """ + >>> mylist = [3702, 3649, 3694, 3880] + >>> get_names(mylist) + ['Arabidopsis thaliana', 'Carica papaya', 'Populus trichocarpa', 'Medicago truncatula'] + """ + from jcvi.apps.fetch import batch_taxonomy + + list_of_taxids = [str(x) for x in list_of_taxids] + return list(batch_taxonomy(list_of_taxids)) + + +def get_taxids(list_of_names): + """ + >>> mylist = ['Arabidopsis thaliana', 'Carica papaya'] + >>> get_taxids(mylist) + [1, 2] + """ + from jcvi.apps.fetch import batch_taxids + + return [int(x) for x in batch_taxids(list_of_names)] + + +def MRCA(list_of_taxids): + """ + This gets the most recent common ancester (MRCA) for a list of taxids + + >>> mylist = [3702, 3649, 3694, 3880] + >>> MRCA(mylist) + 'rosids' + """ + + t = TaxIDTree(list_of_taxids) + t = Tree(str(t), format=8) + + ancestor = t.get_common_ancestor(*t.get_leaves()) + + return ancestor.name + + +@lru_cache(maxsize=None) +def isPlantOrigin(taxid): + """ + Given a taxid, this gets the expanded tree which can then be checked to + see if the organism is a plant or not + + >>> isPlantOrigin(29760) + True + """ + + assert isinstance(taxid, int) + + t = TaxIDTree(taxid) + try: + return "Viridiplantae" in str(t) + except AttributeError: + raise ValueError("{0} is not a valid ID".format(taxid)) + + +def main(): + + actions = ( + ("newick", "query a list of IDs to newick"), + ("test", "test taxonomy module"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def test(args): + print("Testing isPlantOrigin():") + print(3702, isPlantOrigin(3702)) # Arabidopsis thaliana + print(10090, isPlantOrigin(10090)) # Mus musculus + + print("\nTest cache by 10K calls:") + for i in range(10000): + isPlantOrigin(3702) + isPlantOrigin(10090) + print("done") + + print("\nTest invalid ID:") + print(10099, isPlantOrigin(10099)) # Wrong ID + + +def newick(args): + """ + %prog newick idslist + + Query a list of IDs to retrieve phylogeny. + """ + p = OptionParser(newick.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (idsfile,) = args + mylist = [x.strip() for x in open(idsfile) if x.strip()] + print(get_taxids(mylist)) + + t = TaxIDTree(mylist) + print(t) + + +if __name__ == "__main__": + main() diff --git a/jcvi/utils/validator.py b/jcvi/utils/validator.py new file mode 100644 index 00000000..6836e891 --- /dev/null +++ b/jcvi/utils/validator.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Simple validator to make sure certain values match expectation. +""" + +from typing import Collection, Union, TypeVar + +ComparableType = Union[int, float] +T = TypeVar("T") + + +class ValidationError(Exception): + pass + + +def validate_in_choices(value: T, choices: Collection[T], tag: str = "Value") -> bool: + """ + Validate if certain value is among a collection. + Args: + value: value of interest + choices (Collection): a collection (list, tuple, dict, set etc.) of values + tag (str): the semantic meaning of value to be shown in error + + Returns: + True if validation passes. Raises ValidationError if it fails + """ + if value not in choices: + raise ValidationError(f"{tag} must be one of {choices}, you have: {value}") + return True + + +def validate_in_range( + value: ComparableType, + min_value: ComparableType, + max_value: ComparableType, + tag: str = "Value", +) -> bool: + """ + Validate if certain value is numerically within range. + + Args: + value: value of interest + min_value: minimum expected value + max_value: maximum expected value + tag (str): the semantic meaning of value to be shown in error + + Returns: + True if validation passes. Raises ValidationError if it fails. + """ + if not min_value <= value <= max_value: + raise ValidationError( + f"{tag} must be between [{min_value}, {max_value}], you have: {value}" + ) + return True diff --git a/jcvi/utils/webcolors.py b/jcvi/utils/webcolors.py new file mode 100755 index 00000000..e1c33c41 --- /dev/null +++ b/jcvi/utils/webcolors.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# +# webcolors.py +# utils +# +# Created by Haibao Tang on 01/28/20 +# Copyright © 2021 Haibao Tang. All rights reserved. +# +import logging + +import numpy as np + +from skimage.color import rgb2lab, deltaE_cmc +from webcolors._definitions import _CSS3_NAMES_TO_HEX +from webcolors import hex_to_rgb + + +def color_diff(rgb1, rgb2): + """ + Calculate distance between two RGB colors. See discussion: + + http://stackoverflow.com/questions/8863810/python-find-similar-colors-best-way + + - for basic / fast calculations, you can use dE76 but beware of its problems + - for graphics arts use we recommend dE94 and perhaps dE-CMC 2:1 + - for textiles use dE-CMC + """ + rgb1 = np.array(rgb1, dtype="float64").reshape(1, 1, 3) / 255.0 + rgb2 = np.array(rgb2, dtype="float64").reshape(1, 1, 3) / 255.0 + lab1 = rgb2lab(rgb1) + lab2 = rgb2lab(rgb2) + return deltaE_cmc(lab1, lab2, kL=2, kC=1)[0, 0] + + +def closest_color(requested_color): + """ + Find closest color name for the request RGB tuple. + """ + logging.disable(logging.DEBUG) + colors = [] + for name, hex in _CSS3_NAMES_TO_HEX.items(): + diff = color_diff(hex_to_rgb(hex), requested_color) + colors.append((diff, name)) + logging.disable(logging.NOTSET) + _, min_color = min(colors) + + return min_color + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/jcvi/variation/__init__.py b/jcvi/variation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jcvi/variation/__main__.py b/jcvi/variation/__main__.py new file mode 100644 index 00000000..175c1d19 --- /dev/null +++ b/jcvi/variation/__main__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +Set of scripts relating to variation studies such as imputation, phasing, SNP/CNV analysis, and other supporting routines +""" + +from ..apps.base import dmain + + +if __name__ == "__main__": + dmain(__file__) diff --git a/jcvi/variation/cnv.py b/jcvi/variation/cnv.py new file mode 100644 index 00000000..e2d397be --- /dev/null +++ b/jcvi/variation/cnv.py @@ -0,0 +1,1509 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Helper functions for Copy Number Variations (CNV). +""" +import logging +import os.path as op +import sys + +from collections import Counter, defaultdict +from dataclasses import dataclass +from itertools import groupby +from multiprocessing import Pool +from random import choice + +import numpy as np +import numpy.ma as ma +import pandas as pd +import pysam + +from pybedtools import BedTool, cleanup, set_tempdir + +from ..algorithms.formula import get_kmeans +from ..apps.base import ( + ActionDispatcher, + OptionParser, + getfilesize, + logger, + mkdir, + popen, + sh, +) +from ..apps.grid import MakeManager +from ..utils.aws import glob_s3, push_to_s3, sync_from_s3 +from ..utils.cbook import percentage + +autosomes = [f"chr{x}" for x in range(1, 23)] +sexsomes = ["chrX", "chrY"] +allsomes = autosomes + sexsomes +# See: http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/ +PAR = [("chrX", 10001, 2781479), ("chrX", 155701383, 156030895)] + + +class CopyNumberSegment(object): + def __init__(self, chr, rr, tag, mean_cn, realbins, is_PAR=False): + self.chr = chr + self.rr = rr + self.start = rr[0] * 1000 + self.end = rr[1] * 1000 + self.span = self.end - self.start + self.tag = tag + self.mean_cn = mean_cn + self.realbins = realbins + self.is_PAR = is_PAR + + def __str__(self): + mb = self.rr / 1000.0 + coords = "{}:{}-{}Mb".format(self.chr, format_float(mb[0]), format_float(mb[1])) + if self.is_PAR: + coords += ":PAR" + msg = "[{}] {} CN={} bins={}".format( + self.tag, coords, self.mean_cn, self.realbins + ) + if self.realbins >= 10000: # Mark segments longer than 10K bins ~ 10Mb + msg += "*" + return msg + + @property + def bedline(self): + return "\t".join( + str(x) + for x in (self.chr, self.start, self.end, self.tag, self.span, self.mean_cn) + ) + + +class CopyNumberHMM(object): + def __init__( + self, workdir, betadir="beta", mu=0.003, sigma=10, step=0.1, threshold=0.2 + ): + self.model = self.initialize(mu=mu, sigma=sigma, step=step) + self.workdir = workdir + self.betadir = betadir + if not op.exists(betadir): + sync_from_s3("s3://hli-mv-data-science/htang/ccn/beta", target_dir=betadir) + self.mu = mu + self.sigma = sigma + self.step = step + self.threshold = threshold + + def run(self, samplekey, chrs=allsomes): + if isinstance(chrs, str): + chrs = [chrs] + allevents = [] + for chr in chrs: + X, Z, clen, events = self.run_one(samplekey, chr) + allevents.extend(events) + return allevents + + def run_one(self, samplekey, chr): + cov = np.fromfile( + "{}/{}-cn/{}.{}.cn".format(self.workdir, samplekey, samplekey, chr) + ) + beta = np.fromfile("beta/{}.beta".format(chr)) + std = np.fromfile("beta/{}.std".format(chr)) + # Check if the two arrays have different dimensions + clen, blen = cov.shape[0], beta.shape[0] + tlen = max(clen, blen) + if tlen > clen: + cov = np.array(list(cov) + [np.nan] * (tlen - clen)) + elif tlen > blen: + beta = np.array(list(beta) + [np.nan] * (tlen - blen)) + clen, blen = cov.shape[0], beta.shape[0] + assert clen == blen, "cov ({}) and correction ({}) not same dimension".format( + clen, blen + ) + normalized = cov / beta + fixed = normalized.copy() + fixed[np.where(std > self.threshold)] = np.nan + X = fixed + Z = self.predict(X) + + med_cn = np.median(fixed[np.isfinite(fixed)]) + print(chr, med_cn) + + # Annotate segments + segments = self.annotate_segments(Z) + events = [] + for mean_cn, rr in segments: + ss = fixed[rr[0] : rr[1]] + realbins = np.sum(np.isfinite(ss)) + # Determine whether this is an outlier + segment = self.tag(chr, mean_cn, rr, med_cn, realbins) + if segment: + events.append((mean_cn, rr, segment)) + events.sort(key=lambda x: x[-1].start) + + # Send some debug info to screen + for mean_cn, rr, segment in events: + print(segment) + + return X, Z, clen, events + + def tag(self, chr, mean_cn, rr, med_cn, realbins, base=2): + around_0 = around_value(mean_cn, 0) + around_1 = around_value(mean_cn, 1) + around_2 = around_value(mean_cn, 2) + if realbins <= 1: # Remove singleton bins + return + if chr == "chrX": + start, end = rr + is_PAR = end < 5000 or start > 155000 + if med_cn < 1.25: # Male + # PAR ~ 2, rest ~ 1 + if is_PAR: + base = 2 + if around_2: + return + else: + base = 1 + if around_1: + return + else: + # All ~ 2 + if around_2: + return + elif chr == "chrY": + if med_cn < 0.25: # Female + base = 0 + if around_0: + return + else: + base = 1 + if around_1: + return + else: + if around_2: + return + tag = "DUP" if mean_cn > base else "DEL" + segment = CopyNumberSegment(chr, rr, tag, mean_cn, realbins, is_PAR=False) + return segment + + def initialize(self, mu, sigma, step): + from hmmlearn import hmm + + # Initial population probability + n = int(10 / step) + startprob = 1.0 / n * np.ones(n) + transmat = mu * np.ones((n, n)) + np.fill_diagonal(transmat, 1 - (n - 1) * mu) + + # The means of each component + means = np.arange(0, step * n, step) + means.resize((n, 1, 1)) + # The covariance of each component + covars = sigma * np.ones((n, 1, 1)) + + # Build an HMM instance and set parameters + model = hmm.GaussianHMM(n_components=n, covariance_type="full") + + # Instead of fitting it from the data, we directly set the estimated + # parameters, the means and covariance of the components + model.startprob_ = startprob + model.transmat_ = transmat + model.means_ = means + model.covars_ = covars + return model + + def predict(self, X): + # Handle missing values + X = ma.masked_invalid(X) + mask = X.mask + dX = ma.compressed(X).reshape(-1, 1) + dZ = self.model.predict(dX) + Z = np.array([np.nan for _ in range(X.shape[0])]) + Z[~mask] = dZ + Z = ma.masked_invalid(Z) + + return Z * self.step + + def annotate_segments(self, Z): + """Report the copy number and start-end segment""" + # We need a way to go from compressed idices to original indices + P = Z.copy() + P[~np.isfinite(P)] = -1 + _, mapping = np.unique(np.cumsum(P >= 0), return_index=True) + + dZ = Z.compressed() + uniq, idx = np.unique(dZ, return_inverse=True) + segments = [] + for i, mean_cn in enumerate(uniq): + if not np.isfinite(mean_cn): + continue + for rr in contiguous_regions(idx == i): + segments.append((mean_cn, mapping[rr])) + + return segments + + def plot( + self, samplekey, chrs=allsomes, color=None, dx=None, ymax=8, ms=2, alpha=0.7 + ): + from brewer2mpl import get_map + import matplotlib.pyplot as plt + + props = dict(boxstyle="round", facecolor="wheat", alpha=0.2) + + if isinstance(chrs, str): + chrs = [chrs] + f, axs = plt.subplots(1, len(chrs), sharey=True) + if not isinstance(axs, np.ndarray): + axs = np.array([axs]) + plt.tight_layout() + if color is None: + color = choice(get_map("Set2", "qualitative", 8).mpl_colors) + + for region, ax in zip(chrs, axs): + chr, start, end = parse_region(region) + X, Z, clen, events = self.run_one(samplekey, chr) + ax.plot(X, ".", label="observations", ms=ms, mfc=color, alpha=alpha) + ax.plot(Z, "k.", label="hidden", ms=6) + + if start is None and end is None: + ax.set_xlim(0, clen) + else: + ax.set_xlim(start / 1000, end / 1000) + + ax.set_ylim(0, ymax) + ax.set_xlabel("1Kb bins") + title = "{} {}".format(samplekey.split("_")[1], chr) + if dx: + title += " ({})".format(dx) + ax.set_title(title) + + # The final calls + yy = 0.9 + abnormal = [x for x in events if x[-1]] + if len(abnormal) > 5: + yinterval = 0.02 + size = 10 + else: + yinterval = 0.05 + size = 12 + for mean_cn, rr, event in events: + if mean_cn > ymax: + continue + ax.text(np.mean(rr), mean_cn + 0.2, mean_cn, ha="center", bbox=props) + if event is None: + continue + ax.text( + 0.5, + yy, + str(event).rsplit(" ", 1)[0], + color="r", + ha="center", + transform=ax.transAxes, + size=size, + ) + yy -= yinterval + + axs[0].set_ylabel("Copy number") + + +def parse_region(region): + if ":" not in region: + return region, None, None + + chr, start_end = region.split(":") + start, end = start_end.split("-") + return chr, int(start), int(end) + + +def contiguous_regions(condition): + """Finds contiguous True regions of the boolean array "condition". Returns + a 2D array where the first column is the start index of the region and the + second column is the end index.""" + + # Find the indicies of changes in "condition" + d = np.diff(condition) + (idx,) = d.nonzero() + + # We need to start things after the change in "condition". Therefore, + # we'll shift the index by 1 to the right. + idx += 1 + + if condition[0]: + # If the start of condition is True prepend a 0 + idx = np.r_[0, idx] + + if condition[-1]: + # If the end of condition is True, append the length of the array + idx = np.r_[idx, condition.size] # Edit + + # Reshape the result into two columns + idx.shape = (-1, 2) + return idx + + +def format_float(f): + s = "{:.3f}".format(f) + return s.rstrip("0").rstrip(".") + + +def around_value(s, mu, max_dev=0.25): + return mu - max_dev < s < mu + max_dev + + +def main(): + + actions = ( + ("cib", "convert bam to cib"), + ("coverage", "plot coverage along chromosome"), + ("cn", "correct cib according to GC content"), + ("mergecn", "compile matrix of GC-corrected copy numbers"), + ("hmm", "run cnv segmentation"), + # Gene copy number + ("exonunion", "collapse overlapping exons within the same gene"), + ("gcn", "gene copy number based on Canvas results"), + ("summarycanvas", "count different tags in Canvas vcf"), + # Interact with CCN script + ("batchccn", "run CCN script in batch"), + ("batchcn", "run HMM in batch"), + ("plot", "plot some chromosomes for visual proof"), + # Benchmark, training, etc. + ("sweep", "write a number of commands to sweep parameter space"), + ("compare", "compare cnv output to ground truths"), + # Plots + ("gcdepth", "plot GC content vs depth for genomic bins"), + ("validate", "validate CNV calls by plotting RDR/BAF/CN"), + ("wes_vs_wgs", "plot WES vs WGS CNV calls"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def gcdepth(args): + """ + %prog gcdepth sample_name tag + + Plot GC content vs depth vs genomnic bins. Inputs are mosdepth output: + - NA12878_S1.mosdepth.global.dist.txt + - NA12878_S1.mosdepth.region.dist.txt + - NA12878_S1.regions.bed.gz + - NA12878_S1.regions.bed.gz.csi + - NA12878_S1.regions.gc.bed.gz + + A sample mosdepth.sh script might look like: + ``` + #!/bin/bash + LD_LIBRARY_PATH=mosdepth/htslib/ mosdepth/mosdepth $1 \\ + bams/$1.bam -t 4 -c chr1 -n --by 1000 + + bedtools nuc -fi GRCh38/WholeGenomeFasta/genome.fa \\ + -bed $1.regions.bed.gz \\ + | pigz -c > $1.regions.gc.bed.gz + ``` + """ + import hashlib + from jcvi.algorithms.formula import MAD_interval + from jcvi.graphics.base import latex, plt, savefig, set2 + + p = OptionParser(gcdepth.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + sample_name, tag = args + # The tag is used to add to title, also provide a random (hashed) color + coloridx = int(hashlib.sha256(tag).hexdigest(), 16) % len(set2) + color = set2[coloridx] + + # mosdepth outputs a table that we can use to plot relationship + gcbedgz = sample_name + ".regions.gc.bed.gz" + df = pd.read_csv(gcbedgz, delimiter="\t") + mf = df.loc[:, ("4_usercol", "6_pct_gc")] + mf.columns = ["depth", "gc"] + + # We discard any bins that are gaps + mf = mf[(mf["depth"] > 0.001) | (mf["gc"] > 0.001)] + + # Create GC bins + gcbins = defaultdict(list) + for i, row in mf.iterrows(): + gcp = int(round(row["gc"] * 100)) + gcbins[gcp].append(row["depth"]) + gcd = sorted((k * 0.01, MAD_interval(v)) for (k, v) in gcbins.items()) + gcd_x, gcd_y = zip(*gcd) + m, lo, hi = zip(*gcd_y) + + # Plot + plt.plot( + mf["gc"], + mf["depth"], + ".", + color="lightslategray", + ms=2, + mec="lightslategray", + alpha=0.1, + ) + patch = plt.fill_between( + gcd_x, + lo, + hi, + facecolor=color, + alpha=0.25, + zorder=10, + linewidth=0.0, + label="Median +/- MAD band", + ) + plt.plot(gcd_x, m, "-", color=color, lw=2, zorder=20) + + ax = plt.gca() + ax.legend(handles=[patch], loc="best") + ax.set_xlim(0, 1) + ax.set_ylim(0, 100) + ax.set_title("{} ({})".format(latex(sample_name), tag)) + ax.set_xlabel("GC content") + ax.set_ylabel("Depth") + savefig(sample_name + ".gcdepth.png") + + +def exonunion(args): + """ + %prog exonunion gencode.v26.annotation.exon.bed + + Collapse overlapping exons within the same gene. File + `gencode.v26.annotation.exon.bed` can be generated by: + + $ zcat gencode.v26.annotation.gtf.gz | awk 'OFS="\t" {if ($3=="exon") + {print $1,$4-1,$5,$10,$12,$14,$16,$7}}' | tr -d '";' + """ + p = OptionParser(exonunion.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (gencodebed,) = args + beds = BedTool(gencodebed) + # fields[3] is gene_id; fields[6] is gene_name + for g, gb in groupby(beds, key=lambda x: x.fields[3]): + gb = BedTool(gb) + sys.stdout.write(str(gb.sort().merge(c="4,5,6,7", o=",".join(["first"] * 4)))) + + +def get_gain_loss_summary(vcffile): + """Extract Canvas:GAIN/LOSS/REF/LOH tags""" + from cyvcf2 import VCF + + counter = Counter() + for v in VCF(vcffile): + tag = v.ID.split(":")[1] + counter[tag] += 1 + + return counter + + +def summarycanvas(args): + """ + %prog summarycanvas output.vcf.gz + + Generate tag counts (GAIN/LOSS/REF/LOH) of segments in Canvas output. + """ + p = OptionParser(summarycanvas.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + for vcffile in args: + counter = get_gain_loss_summary(vcffile) + pf = op.basename(vcffile).split(".")[0] + print( + pf + + " " + + " ".join("{}:{}".format(k, v) for k, v in sorted(counter.items())) + ) + + +def parse_segments(vcffile): + """Extract all copy number segments from a CANVAS file + + VCF line looks like: + chr1 788879 Canvas:GAIN:chr1:788880-821005 N 2 q10 + SVTYPE=CNV;END=821005;CNVLEN=32126 RC:BC:CN:MCC 157:4:3:2 + """ + from io import StringIO + from cyvcf2 import VCF + + output = StringIO() + for v in VCF(vcffile): + chrom = v.CHROM + start = v.start + end = v.INFO.get("END") - 1 + (cn,) = v.format("CN")[0] + print("\t".join(str(x) for x in (chrom, start, end, cn)), file=output) + + beds = BedTool(output.getvalue(), from_string=True) + return beds + + +def counter_mean_and_median(counter): + """Calculate the mean and median value of a counter""" + if not counter: + return np.nan, np.nan + + total = sum(v for k, v in counter.items()) + mid = total / 2 + weighted_sum = 0 + items_seen = 0 + median_found = False + for k, v in sorted(counter.items()): + weighted_sum += k * v + items_seen += v + if not median_found and items_seen >= mid: + median = k + median_found = True + mean = weighted_sum * 1.0 / total + return mean, median + + +def counter_format(counter): + """Pretty print a counter so that it appears as: "2:200,3:100,4:20" """ + if not counter: + return "na" + + return ",".join("{}:{}".format(*z) for z in sorted(counter.items())) + + +def gcn(args): + """ + %prog gcn gencode.v26.exonunion.bed data/*.vcf.gz + + Compile gene copy njumber based on CANVAS results. + """ + p = OptionParser(gcn.__doc__) + p.set_cpus() + p.set_tmpdir(tmpdir="tmp") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + exonbed = args[0] + canvasvcfs = args[1:] + tsvfile = opts.outfile + tmpdir = opts.tmpdir + + mkdir(tmpdir) + set_tempdir(tmpdir) + + df = vcf_to_df(canvasvcfs, exonbed, opts.cpus) + for suffix in (".avgcn", ".medcn"): + df_to_tsv(df, tsvfile, suffix) + + +def vcf_to_df_worker(arg): + """Convert CANVAS vcf to a dict, single thread""" + canvasvcf, exonbed, i = arg + logger.debug("Working on job {}: {}".format(i, canvasvcf)) + samplekey = op.basename(canvasvcf).split(".")[0].rsplit("_", 1)[0] + d = {"SampleKey": samplekey} + + exons = BedTool(exonbed) + cn = parse_segments(canvasvcf) + overlaps = exons.intersect(cn, wao=True) + gcn_store = {} + for ov in overlaps: + # Example of ov.fields: + # [u'chr1', u'11868', u'12227', u'ENSG00000223972.5', + # u'ENST00000456328.2', u'transcribed_unprocessed_pseudogene', + # u'DDX11L1', u'.', u'-1', u'-1', u'.', u'0'] + gene_name = "|".join((ov.fields[6], ov.fields[3], ov.fields[5])) + if gene_name not in gcn_store: + gcn_store[gene_name] = defaultdict(int) + + cn = ov.fields[-2] + if cn == ".": + continue + cn = int(cn) + if cn > 10: + cn = 10 + amt = int(ov.fields[-1]) + gcn_store[gene_name][cn] += amt + + for k, v in sorted(gcn_store.items()): + v_mean, v_median = counter_mean_and_median(v) + d[k + ".avgcn"] = v_mean + d[k + ".medcn"] = v_median + cleanup() + return d + + +def vcf_to_df(canvasvcfs, exonbed, cpus): + """Compile a number of vcf files into tsv file for easy manipulation""" + df = pd.DataFrame() + p = Pool(processes=cpus) + results = [] + args = [(x, exonbed, i) for (i, x) in enumerate(canvasvcfs)] + r = p.map_async(vcf_to_df_worker, args, callback=results.append) + r.wait() + + for res in results: + df = df.append(res, ignore_index=True) + return df + + +def df_to_tsv(df, tsvfile, suffix): + """Serialize the dataframe as a tsv""" + tsvfile += suffix + columns = ["SampleKey"] + sorted(x for x in df.columns if x.endswith(suffix)) + tf = df.reindex_axis(columns, axis="columns") + tf.sort_values("SampleKey") + tf.to_csv(tsvfile, sep="\t", index=False, float_format="%.4g", na_rep="na") + print( + "TSV output written to `{}` (# samples={})".format(tsvfile, tf.shape[0]), + file=sys.stderr, + ) + + +def coverage(args): + """ + %prog coverage *.coverage + + Plot coverage along chromosome. The coverage file can be generated with: + $ samtools depth a.bam > a.coverage + + The plot is a simple line plot using matplotlib. + """ + from jcvi.graphics.base import savefig + + p = OptionParser(coverage.__doc__) + opts, args, iopts = p.set_image_options(args, format="png") + + if len(args) != 1: + sys.exit(not p.print_help()) + + (covfile,) = args + df = pd.read_csv(covfile, sep="\t", names=["Ref", "Position", "Depth"]) + + xlabel, ylabel = "Position", "Depth" + df.plot(xlabel, ylabel, color="g") + + image_name = covfile + "." + iopts.format + savefig(image_name) + + +def plot(args): + """ + %prog plot workdir sample chr1,chr2 + + Plot some chromosomes for visual proof. Separate multiple chromosomes with + comma. Must contain folder workdir/sample-cn/. + """ + from jcvi.graphics.base import savefig + + p = OptionParser(plot.__doc__) + opts, args, iopts = p.set_image_options(args, figsize="8x7", format="png") + + if len(args) != 3: + sys.exit(not p.print_help()) + + workdir, sample_key, chrs = args + chrs = chrs.split(",") + hmm = CopyNumberHMM(workdir=workdir) + hmm.plot(sample_key, chrs=chrs) + + image_name = sample_key + "_cn." + iopts.format + savefig(image_name, dpi=iopts.dpi, iopts=iopts) + + +def sweep(args): + """ + %prog sweep workdir 102340_NA12878 + + Write a number of commands to sweep parameter space. + """ + p = OptionParser(sweep.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + workdir, sample_key = args + golden_ratio = (1 + 5**0.5) / 2 + cmd = "python -m jcvi.variation.cnv hmm {} {}".format(workdir, sample_key) + cmd += " --mu {:.5f} --sigma {:.3f} --threshold {:.3f}" + mus = [0.00012 * golden_ratio**x for x in range(10)] + sigmas = [0.0012 * golden_ratio**x for x in range(20)] + thresholds = [0.1 * golden_ratio**x for x in range(10)] + print(mus, file=sys.stderr) + print(sigmas, file=sys.stderr) + print(thresholds, file=sys.stderr) + for mu in mus: + for sigma in sigmas: + for threshold in thresholds: + tcmd = cmd.format(mu, sigma, threshold) + print(tcmd) + + +def compare_worker(arg): + cnvoutput, truths = arg + cmd = "intersectBed -f .5 -F .5" + cmd += " -a {} -b {} | wc -l".format(cnvoutput, truths) + nlines = int(popen(cmd, debug=False).read()) + target_lines = len([x for x in open(cnvoutput)]) + truths_lines = len([x for x in open(truths)]) + precision = nlines * 100.0 / target_lines + recall = nlines * 100.0 / truths_lines + d = "\t".join( + str(x) + for x in ( + cnvoutput, + truths, + nlines, + target_lines, + truths_lines, + precision, + recall, + ) + ) + return d + + +def compare(args): + """ + %prog compare NA12878_array_hg38.bed *.seg + + Compare cnv output to known ground truths. + """ + p = OptionParser(compare.__doc__) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + truths = args[0] + cnvoutputs = args[1:] + cpus = min(len(cnvoutputs), opts.cpus) + p = Pool(processes=cpus) + results = [] + files = [(x, truths) for x in cnvoutputs] + r = p.map_async(compare_worker, files, callback=results.append) + r.wait() + + for res in results: + print("\n".join(res)) + + +def bam_to_cib(arg): + bamfile, seq, samplekey = arg + bam = pysam.AlignmentFile(bamfile, "rb") + name, length = seq["SN"], seq["LN"] + logger.debug("Computing depth for {} (length={})".format(name, length)) + pileup = bam.pileup(name) + a = np.ones(length, dtype=np.int8) * -128 + for x in pileup: + a[x.reference_pos] = min(x.nsegments, 255) - 128 + + cibfile = op.join(samplekey, "{}.{}.cib".format(samplekey, name)) + a.tofile(cibfile) + logger.debug("Depth written to `{}`".format(cibfile)) + + +def cib(args): + """ + %prog cib bamfile samplekey + + Convert BAM to CIB (a binary storage of int8 per base). + """ + p = OptionParser(cib.__doc__) + p.add_argument("--prefix", help="Report seqids with this prefix only") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bamfile, samplekey = args + mkdir(samplekey) + bam = pysam.AlignmentFile(bamfile, "rb") + refs = [x for x in bam.header["SQ"]] + prefix = opts.prefix + if prefix: + refs = [x for x in refs if x["SN"].startswith(prefix)] + + task_args = [] + for r in refs: + task_args.append((bamfile, r, samplekey)) + cpus = min(opts.cpus, len(task_args)) + logger.debug("Use {} cpus".format(cpus)) + + p = Pool(processes=cpus) + for _ in p.imap(bam_to_cib, task_args): + continue + + +def batchcn(args): + """ + %prog batchcn workdir samples.csv + + Run CNV segmentation caller in batch mode. Scans a workdir. + """ + p = OptionParser(batchcn.__doc__) + p.add_argument( + "--upload", + default="s3://hli-mv-data-science/htang/ccn", + help="Upload cn and seg results to s3", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + workdir, samples = args + upload = opts.upload + store = upload + "/{}/*.seg".format(workdir) + computed = [op.basename(x).split(".")[0] for x in glob_s3(store)] + computed = set(computed) + + # Generate a bunch of cn commands + fp = open(samples) + nskipped = ntotal = 0 + cmd = "python -m jcvi.variation.cnv cn --hmm --cleanup {}".format(workdir) + for row in fp: + samplekey, path = row.strip().split(",") + ntotal += 1 + if samplekey in computed: + nskipped += 1 + continue + print(" ".join((cmd, samplekey, path))) + + logger.debug("Skipped: {}".format(percentage(nskipped, ntotal))) + + +def hmm(args): + """ + %prog hmm workdir sample_key + + Run CNV segmentation caller. The workdir must contain a subfolder called + `sample_key-cn` that contains CN for each chromosome. A `beta` directory + that contains scaler for each bin must also be present in the current + directory. + """ + p = OptionParser(hmm.__doc__) + p.add_argument("--mu", default=0.003, type=float, help="Transition probability") + p.add_argument( + "--sigma", + default=0.1, + type=float, + help="Standard deviation of Gaussian emission distribution", + ) + p.add_argument( + "--threshold", + default=1, + type=float, + help="Standard deviation must be < this in the baseline population", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + workdir, sample_key = args + model = CopyNumberHMM( + workdir=workdir, mu=opts.mu, sigma=opts.sigma, threshold=opts.threshold + ) + events = model.run(sample_key) + params = ".mu-{}.sigma-{}.threshold-{}".format(opts.mu, opts.sigma, opts.threshold) + hmmfile = op.join(workdir, sample_key + params + ".seg") + fw = open(hmmfile, "w") + nevents = 0 + for mean_cn, rr, event in events: + if event is None: + continue + print(" ".join((event.bedline, sample_key)), file=fw) + nevents += 1 + fw.close() + logger.debug( + "A total of {} aberrant events written to `{}`".format(nevents, hmmfile) + ) + return hmmfile + + +def batchccn(args): + """ + %prog batchccn test.csv + + Run CCN script in batch. Write makefile. + """ + p = OptionParser(batchccn.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (csvfile,) = args + mm = MakeManager() + pf = op.basename(csvfile).split(".")[0] + mkdir(pf) + + header = next(open(csvfile)) + header = None if header.strip().endswith(".bam") else "infer" + logger.debug("Header={}".format(header)) + df = pd.read_csv(csvfile, header=header) + cmd = "perl /mnt/software/ccn_gcn_hg38_script/ccn_gcn_hg38.pl" + cmd += " -n {} -b {}" + cmd += " -o {} -r hg38".format(pf) + for i, (sample_key, bam) in df.iterrows(): + cmdi = cmd.format(sample_key, bam) + outfile = "{}/{}/{}.ccn".format(pf, sample_key, sample_key) + mm.add(csvfile, outfile, cmdi) + mm.write() + + +def mergecn(args): + """ + %prog mergecn FACE.csv + + Compile matrix of GC-corrected copy numbers. Place a bunch of folders in + csv file. Each folder will be scanned, one chromosomes after another. + """ + p = OptionParser(mergecn.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (csvfile,) = args + samples = [x.replace("-cn", "").strip().strip("/") for x in open(csvfile)] + betadir = "beta" + mkdir(betadir) + for seqid in allsomes: + names = [ + op.join(s + "-cn", "{}.{}.cn".format(op.basename(s), seqid)) + for s in samples + ] + arrays = [np.fromfile(name, dtype=np.float) for name in names] + shapes = [x.shape[0] for x in arrays] + med_shape = np.median(shapes) + arrays = [x for x in arrays if x.shape[0] == med_shape] + ploidy = 2 if seqid not in ("chrY", "chrM") else 1 + if seqid in sexsomes: + chr_med = [np.median([x for x in a if x > 0]) for a in arrays] + chr_med = np.array(chr_med) + idx = get_kmeans(chr_med, k=2) + zero_med = np.median(chr_med[idx == 0]) + one_med = np.median(chr_med[idx == 1]) + logger.debug("K-means with {} c0:{} c1:{}".format(seqid, zero_med, one_med)) + higher_idx = 1 if one_med > zero_med else 0 + # Use the higher mean coverage componen + arrays = np.array(arrays)[idx == higher_idx] + arrays = [[x] for x in arrays] + ar = np.concatenate(arrays) + print(seqid, ar.shape) + rows, columns = ar.shape + beta = [] + std = [] + for j in range(columns): + a = ar[:, j] + beta.append(np.median(a)) + std.append(np.std(a) / np.mean(a)) + beta = np.array(beta) / ploidy + betafile = op.join(betadir, "{}.beta".format(seqid)) + beta.tofile(betafile) + stdfile = op.join(betadir, "{}.std".format(seqid)) + std = np.array(std) + std.tofile(stdfile) + logger.debug("Written to `{}`".format(betafile)) + ar.tofile("{}.bin".format(seqid)) + + +def is_matching_gz(origfile, gzfile): + if not op.exists(origfile): + return False + if not op.exists(gzfile): + return False + return getfilesize(origfile) == getfilesize(gzfile) + + +def load_cib(cibfile, n=1000): + cibgzfile = cibfile + ".gz" + # When we try unzip if cib not found, or cib does not match cibgz + if not op.exists(cibfile) or not is_matching_gz(cibfile, cibgzfile): + if op.exists(cibgzfile): + cibfile = cibgzfile + if cibfile.endswith(".gz"): + sh("pigz -d -k -f {}".format(cibfile)) + cibfile = cibfile.replace(".gz", "") + if not op.exists(cibfile): + return + + cib = np.fromfile(cibfile, dtype=np.int8) + 128 + rm = pd.rolling_mean(cib, n, min_periods=n / 2) + a = rm[n - 1 :: n].copy() + del cib + del rm + return a + + +def build_gc_array(fastafile="/mnt/ref/hg38.upper.fa", gcdir="gc", n=1000): + from pyfasta import Fasta + + f = Fasta(fastafile) + mkdir(gcdir) + for seqid in allsomes: + if seqid not in f: + logger.debug("Seq {} not found. Continue anyway.".format(seqid)) + continue + c = np.array(f[seqid]) + gc = (c == "G") | (c == "C") # If base is GC + rr = ~(c == "N") # If base is real + mgc = pd.rolling_sum(gc, n, min_periods=n / 2)[n - 1 :: n] + mrr = pd.rolling_sum(rr, n, min_periods=n / 2)[n - 1 :: n] + gc_pct = np.rint(mgc * 100 / mrr) + gc_pct = np.asarray(gc_pct, dtype=np.uint8) + arfile = op.join(gcdir, "{}.{}.gc".format(seqid, n)) + gc_pct.tofile(arfile) + print(seqid, gc_pct, arfile, file=sys.stderr) + + +def cn(args): + """ + %prog cn workdir 102340_NA12878 \ + s3://hli-bix-us-west-2/kubernetes/wf-root-test/102340_NA12878/lpierce-ccn_gcn-v2/ + + Download CCN output folder and convert cib to copy number per 1Kb. + """ + p = OptionParser(cn.__doc__) + p.add_argument( + "--binsize", default=1000, type=int, help="Window size along chromosome" + ) + p.add_argument( + "--cleanup", + default=False, + action="store_true", + help="Clean up downloaded s3 folder", + ) + p.add_argument( + "--hmm", + default=False, + action="store_true", + help="Run HMM caller after computing CN", + ) + p.add_argument( + "--upload", + default="s3://hli-mv-data-science/htang/ccn", + help="Upload cn and seg results to s3", + ) + p.add_argument( + "--rebuildgc", help="Rebuild GC directory rather than pulling from S3" + ) + opts, args = p.parse_args(args) + + if len(args) == 2: + workdir, sample_key = args + s3dir = None + elif len(args) == 3: + workdir, sample_key, s3dir = args + else: + sys.exit(not p.print_help()) + + n = opts.binsize + rebuildgc = opts.rebuildgc + mkdir(workdir) + sampledir = op.join(workdir, sample_key) + if s3dir: + sync_from_s3(s3dir, target_dir=sampledir) + + assert op.exists(sampledir), "Directory {} doesn't exist!".format(sampledir) + + cndir = op.join(workdir, sample_key + "-cn") + if op.exists(cndir): + logger.debug("Directory {} exists. Skipped.".format(cndir)) + return + + gcdir = "gc" + if rebuildgc: + build_gc_array(fastafile=rebuildgc, n=n, gcdir=gcdir) + if not op.exists(gcdir): + sync_from_s3("s3://hli-mv-data-science/htang/ccn/gc", target_dir=gcdir) + + # Build GC correction table + gc_bin = defaultdict(list) + gc_med = {} + coverage = [] + + for seqid in allsomes: + gcfile = op.join(gcdir, "{}.{}.gc".format(seqid, n)) + if not op.exists(gcfile): + logger.error("File {} not found. Continue anyway.".format(gcfile)) + continue + gc = np.fromfile(gcfile, dtype=np.uint8) + cibfile = op.join(sampledir, "{}.{}.cib".format(sample_key, seqid)) + cib = load_cib(cibfile) + print(seqid, gc.shape[0], cib.shape[0], file=sys.stderr) + if seqid in autosomes: + for gci, k in zip(gc, cib): + gc_bin[gci].append(k) + coverage.append((seqid, gc, cib)) + + for gci, k in gc_bin.items(): + nonzero_k = [x for x in k if x] + gc_med[gci] = med = np.median(nonzero_k) / 2 + print(gci, len(nonzero_k), med, file=sys.stderr) + + mkdir(cndir) + apply_fun = np.vectorize(gc_med.get) + # Apply the GC correction over coverage + for seqid, gc, cib in coverage: + nitems = cib.shape[0] + beta = apply_fun(gc[:nitems]) + beta_cn = cib / beta + cnfile = op.join(cndir, "{}.{}.cn".format(sample_key, seqid)) + beta_cn.tofile(cnfile) + + # Run HMM caller if asked + segfile = hmm([workdir, sample_key]) if opts.hmm else None + + upload = opts.upload + if upload: + push_to_s3(upload, cndir) + if segfile: + push_to_s3(upload, segfile) + + if opts.cleanup: + from jcvi.apps.base import cleanup + + cleanup(cndir, sampledir) + + +@dataclass +class CNV: + chr: str + start: int + end: int + type: str + name: str + is_pass: bool + cn: int + + +def validate(args): + """ + %prog validate sample.bcc sample.cnv.vcf.gz + + Plot RDR/BAF/CN for validation of CNV calls in `sample.vcf.gz`. + """ + p = OptionParser(validate.__doc__) + p.add_argument( + "--no-rdr-logy", + default=False, + action="store_true", + help="Do not make y-axis of RDR log-scale", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + import holoviews as hv + import hvplot.pandas + + hv.extension("bokeh") + + ( + bccfile, + vcffile, + ) = args + rdr_logy = not opts.no_rdr_logy + df = pd.read_csv(bccfile, sep="\t") + + sample = op.basename(bccfile).split(".", 1)[0] + sizes, xlim = get_hg19_chr_sizes_and_xlim() + b = np.cumsum(sizes["size"]) + a = pd.Series(b[:-1]) + a.index += 1 + sizes["cumsize"] = pd.concat([pd.Series([0]), a]) + jf = pd.merge(df, sizes, how="left", left_on="#chr", right_on="chr") + jf["pos"] = jf["start"] + jf["cumsize"] + model, rfx = get_model_and_dataframe(vcffile, sizes) + + rdr_ylim = (0.5, 4) if rdr_logy else (0, 8) + rdr = jf.hvplot.scatter( + x="pos", + y="rdr", + logy=rdr_logy, + xlim=xlim, + ylim=rdr_ylim, + s=1, + width=1440, + height=240, + c="chr", + title=f"{sample}, Tumor RD/Normal RD (RDR)", + legend=False, + ) + baf = jf.hvplot.scatter( + x="pos", + y="baf", + xlim=xlim, + ylim=(0, 0.5), + s=1, + width=1440, + height=240, + c="chr", + title=f"{sample}, Germline Variant B-Allele Fraction (BAF)", + legend=False, + ) + vaf = jf.hvplot.scatter( + x="pos", + y="tumor_vaf", + xlim=xlim, + ylim=(0, 1), + s=1, + width=1440, + height=180, + c="chr", + title=f"{sample}, Somatic Variant Allele Fraction (VAF)", + legend=False, + ) + comp = get_segments(rfx) + for _, row in sizes.iterrows(): + chr = row["chr"] + cb = row["cumsize"] + vline = hv.VLine(cb).opts(color="lightgray", line_width=1) + ctext1 = hv.Text( + cb, 0.5, chr.replace("chr", ""), halign="left", valign="bottom" + ) + ctext2 = hv.Text(cb, 0, chr.replace("chr", ""), halign="left", valign="bottom") + rdr = rdr * vline * ctext1 + baf = baf * vline * ctext2 + comp = comp * vline + vaf = vaf * vline + model_kv = " ".join(f"{k}={v}" for k, v in model.items()) + comp.opts( + width=1440, + height=240, + xlim=xlim, + ylim=(0, 10), + title=f"{sample}, CNV calls Copy Number (CN) - Red: GAIN, Blue: LOSS, Black: REF, Magenta: CNLOH, Cyan: GAINLOH\n{model_kv}", + ) + cc = (rdr + baf + comp + vaf).cols(1) + htmlfile = f"{sample}.html" + hv.save(cc, htmlfile) + logger.info("Report written to `%s`", htmlfile) + + +def get_segments(rfx: pd.DataFrame): + """ + Return a holoviews object for segments. + """ + import holoviews as hv + + rfx_gain = rfx[(rfx["type"] == "GAIN") & rfx["is_pass"]] + rfx_loss = rfx[(rfx["type"] == "LOSS") & rfx["is_pass"]] + rfx_ref = rfx[(rfx["type"] == "REF") & rfx["is_pass"]] + rfx_cnloh = rfx[(rfx["type"] == "CNLOH") & rfx["is_pass"]] + rfx_gainloh = rfx[(rfx["type"] == "GAINLOH") & rfx["is_pass"]] + rfx_nonpass = rfx[~rfx["is_pass"]] + seg_gain = hv.Segments( + rfx_gain, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] + ) + seg_loss = hv.Segments( + rfx_loss, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] + ) + seg_ref = hv.Segments( + rfx_ref, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] + ) + seg_cnloh = hv.Segments( + rfx_cnloh, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] + ) + seg_gainloh = hv.Segments( + rfx_gainloh, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] + ) + seg_nonpass = hv.Segments( + rfx_nonpass, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] + ) + seg_gain.opts(color="r", line_width=5, tools=["hover"]) + seg_loss.opts(color="b", line_width=5, tools=["hover"]) + seg_ref.opts(color="k", line_width=5, tools=["hover"]) + seg_cnloh.opts(color="m", line_width=5, tools=["hover"]) + seg_gainloh.opts(color="c", line_width=5, tools=["hover"]) + seg_nonpass.opts(color="lightgray", line_width=5, tools=["hover"]) + comp = seg_gain * seg_ref * seg_loss * seg_cnloh * seg_gainloh * seg_nonpass + return comp + + +def get_model_and_dataframe( + vcffile: str, sizes: pd.DataFrame +) -> tuple[dict, pd.DataFrame]: + """ + Get the model and dataframe from the VCF file. + """ + model = get_purity_and_model(vcffile) + records = get_CNV_records(vcffile) + rf = pd.DataFrame(x.__dict__ for x in records) + rfx = pd.merge(rf, sizes, how="left", left_on="chr", right_on="chr") + rfx["pos"] = rfx["start"] + rfx["cumsize"] + rfx["pos_end"] = rfx["end"] + rfx["cumsize"] + return model, rfx + + +def get_hg19_chr_sizes_and_xlim() -> tuple[pd.DataFrame, tuple[int, int]]: + """ + Get chromosome sizes for hg19 + """ + from io import StringIO + + # hg19 + s = """ + chr size + chr1 249250621 + chr2 243199373 + chr3 198022430 + chr4 191154276 + chr5 180915260 + chr6 171115067 + chr7 159138663 + chr8 146364022 + chr9 141213431 + chr10 135534747 + chr11 135006516 + chr12 133851895 + chr13 115169878 + chr14 107349540 + chr15 102531392 + chr16 90354753 + chr17 81195210 + chr18 78077248 + chr19 59128983 + chr20 63025520 + chr21 48129895 + chr22 51304566 + chrX 155270560 + chrY 59373566""" + sizes = pd.read_csv(StringIO(s), delim_whitespace=True) + return sizes, (0, 2881033286) + + +def get_CNV_records(vcffile: str) -> list[CNV]: + """ + Get CNV records from a VCF file. + """ + from cyvcf2 import VCF + + vcf_reader = VCF(vcffile) + records = [] + for record in vcf_reader: + name = record.ID + dragen, type, chr, start_end = name.split(":") + start, end = [int(x) for x in start_end.split("-")] + is_pass = "PASS" in record.FILTERS + (cn,) = record.format("CN")[0] + record = CNV(chr, start, end, type, name, is_pass, cn) + records.append(record) + logger.info("A total of %d records imported", len(records)) + return records + + +def get_purity_and_model(vcffile: str) -> dict[str, str]: + """ + Get purity and model from VCF header. + """ + model = { + "ModelSource": None, + "EstimatedTumorPurity": None, + # "DiploidCoverage": None, + "OverallPloidy": None, + } + import gzip + + for row in gzip.open(vcffile): + row = row.decode("utf-8") + if not row.startswith("##"): + continue + a, b = row[2:].split("=", 1) + if a in model: + model[a] = b + return model + + +def wes_vs_wgs(args): + """ + %prog wes_vs_wgs sample.bcc sample.wes.cnv.vcf.gz sample.wgs.cnv.vcf.gz + + Compare WES and WGS CNVs. + """ + p = OptionParser(wes_vs_wgs.__doc__) + p.add_argument( + "--no-rdr-logy", + default=False, + action="store_true", + help="Do not make y-axis of RDR log-scale", + ) + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + import holoviews as hv + import hvplot.pandas + + hv.extension("bokeh") + + bccfile, wesfile, wgsfile = args + df = pd.read_csv(bccfile, sep="\t") + rdr_logy = not opts.no_rdr_logy + + sample = op.basename(bccfile).split(".", 1)[0] + sizes, xlim = get_hg19_chr_sizes_and_xlim() + b = np.cumsum(sizes["size"]) + a = pd.Series(b[:-1]) + a.index += 1 + sizes["cumsize"] = pd.concat([pd.Series([0]), a]) + jf = pd.merge(df, sizes, how="left", left_on="#chr", right_on="chr") + jf["pos"] = jf["start"] + jf["cumsize"] + + wes_model, wes_rfx = get_model_and_dataframe(wesfile, sizes) + wgs_model, wgs_rfx = get_model_and_dataframe(wgsfile, sizes) + + rdr_ylim = (0.5, 4) if rdr_logy else (0, 8) + rdr = jf.hvplot.scatter( + x="pos", + y="rdr", + logy=rdr_logy, + xlim=xlim, + ylim=rdr_ylim, + s=1, + width=1440, + height=240, + c="chr", + title=f"{sample}, Tumor RD/Normal RD (RDR)", + legend=False, + ylabel="Read depth ratio", + ) + wes_model = " ".join(f"{k}={v}" for k, v in wes_model.items()) + wes_comp = get_segments(wes_rfx) + wgs_model = " ".join(f"{k}={v}" for k, v in wgs_model.items()) + wgs_comp = get_segments(wgs_rfx) + for _, row in sizes.iterrows(): + chr = row["chr"] + cb = row["cumsize"] + vline = hv.VLine(cb).opts(color="lightgray", line_width=1) + ctext1 = hv.Text( + cb, 0.5, chr.replace("chr", ""), halign="left", valign="bottom" + ) + rdr = rdr * vline * ctext1 + wes_comp = wes_comp * vline + wgs_comp = wgs_comp * vline + cc = (rdr + wes_comp + wgs_comp).cols(1) + for label, c, model_kv in zip( + ("WES", "WGS"), (wes_comp, wgs_comp), (wes_model, wgs_model) + ): + c.opts( + width=1440, + height=240, + xlim=xlim, + ylim=(0, 10), + title=f"{label} {sample}, CNV calls Copy Number (CN) - Red: GAIN, Blue: LOSS, Black: REF, Magenta: CNLOH, Cyan: GAINLOH, Gray: NON-PASS\n{model_kv}", + ) + htmlfile = f"{sample}.html" + hv.save(cc, htmlfile) + logger.info("Report written to `%s`", htmlfile) + + +if __name__ == "__main__": + main() diff --git a/jcvi/variation/deconvolute.py b/jcvi/variation/deconvolute.py new file mode 100644 index 00000000..c525c8a9 --- /dev/null +++ b/jcvi/variation/deconvolute.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Deconvolute fastq files according to barcodes. +""" +import os.path as op +import sys + +from collections import namedtuple +from itertools import product, groupby, islice +from multiprocessing import Pool + +from Bio.Data.IUPACData import ambiguous_dna_values +from Bio.SeqIO.QualityIO import FastqGeneralIterator + +from ..apps.base import ActionDispatcher, OptionParser, flatten, glob, logger, mkdir +from ..formats.base import FileMerger, must_open +from ..formats.fastq import FastqPairedIterator + + +def main(): + + actions = ( + ("split", "split fastqfile into subsets"), + ("merge", "consolidate split contents"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +BarcodeLine = namedtuple("BarcodeLine", ["id", "seq"]) + + +def unpack_ambiguous(s): + """ + List sequences with ambiguous characters in all possibilities. + """ + sd = [ambiguous_dna_values[x] for x in s] + return ["".join(x) for x in list(product(*sd))] + + +def is_barcode_sample(seq, barcode, excludebarcode, trim): + if seq[:trim] != barcode.seq: + return False + hasexclude = any(seq.startswith(x.seq) for x in excludebarcode) + if hasexclude: + return False + return True + + +def split_barcode_paired(t): + + barcode, excludebarcode, outdir, inputfile = t + trim = len(barcode.seq) + outfastq = op.join(outdir, "{0}.{1}.fastq".format(barcode.id, barcode.seq)) + + r1, r2 = inputfile + p1fp, p2fp = FastqPairedIterator(r1, r2) + fw = open(outfastq, "w") + while True: + a = list(islice(p1fp, 4)) + if not a: + break + + b = list(islice(p2fp, 4)) + title, seq, plus, qual = a + title, seq, qual = title.strip(), seq.strip(), qual.strip() + if not is_barcode_sample(seq, barcode, excludebarcode, trim): + continue + + print("{0}\n{1}\n+\n{2}".format(title, seq[trim:], qual[trim:]), file=fw) + fw.writelines(b) + + fw.close() + + +def append_barcode_paired(t): + + barcode, excludebarcode, outdir, inputfile = t + bs = barcode.seq + trim = len(bs) + fake_qual = len(bs) * "#" + outfastq = op.join(outdir, "{0}.{1}.fastq".format(barcode.id, barcode.seq)) + + r1, r2 = inputfile + p1fp, p2fp = FastqPairedIterator(r1, r2) + fw = open(outfastq, "w") + while True: + a = list(islice(p1fp, 4)) + if not a: + break + + title, seq, plus, qual = a + seq = seq.strip() + if not is_barcode_sample(seq, barcode, excludebarcode, trim): + continue + + fw.writelines(a) + + title, seq, plus, qual = list(islice(p2fp, 4)) + title, seq, qual = title.strip(), seq.strip(), qual.strip() + # append barcode + seq = bs + seq + qual = fake_qual + qual + print("{0}\n{1}\n+\n{2}".format(title, seq, qual), file=fw) + + fw.close() + + +def split_barcode(t): + + barcode, excludebarcode, outdir, inputfile = t + trim = len(barcode.seq) + outfastq = op.join(outdir, "{0}.{1}.fastq".format(barcode.id, barcode.seq)) + + fp = must_open(inputfile) + fw = open(outfastq, "w") + for title, seq, qual in FastqGeneralIterator(fp): + if not is_barcode_sample(seq, barcode, excludebarcode, trim): + continue + print("@{0}\n{1}\n+\n{2}".format(title, seq[trim:], qual[trim:]), file=fw) + + fw.close() + + +def split(args): + """ + %prog split barcodefile fastqfile1 .. + + Deconvolute fastq files into subsets of fastq reads, based on the barcodes + in the barcodefile, which is a two-column file like: + ID01 AGTCCAG + + Input fastqfiles can be several files. Output files are ID01.fastq, + ID02.fastq, one file per line in barcodefile. + + When --paired is set, the number of input fastqfiles must be two. Output + file (the deconvoluted reads) will be in interleaved format. + """ + p = OptionParser(split.__doc__) + p.set_outdir(outdir="deconv") + p.add_argument( + "--nocheckprefix", + default=False, + action="store_true", + help="Don't check shared prefix", + ) + p.add_argument( + "--paired", + default=False, + action="store_true", + help="Paired-end data", + ) + p.add_argument( + "--append", + default=False, + action="store_true", + help="Append barcode to 2nd read", + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + barcodefile = args[0] + fastqfile = args[1:] + paired = opts.paired + append = opts.append + if append: + assert paired, "--append only works with --paired" + + nfiles = len(fastqfile) + + barcodes = [] + fp = open(barcodefile) + for row in fp: + id, seq = row.split() + for s in unpack_ambiguous(seq): + barcodes.append(BarcodeLine._make((id, s))) + + nbc = len(barcodes) + logger.debug("Imported {0} barcodes (ambiguous codes expanded).".format(nbc)) + checkprefix = not opts.nocheckprefix + + if checkprefix: + # Sanity check of shared prefix + excludebarcodes = [] + for bc in barcodes: + exclude = [] + for s in barcodes: + if bc.id == s.id: + continue + + assert bc.seq != s.seq + if s.seq.startswith(bc.seq) and len(s.seq) > len(bc.seq): + logger.error("{0} shares same prefix as {1}.".format(s, bc)) + exclude.append(s) + excludebarcodes.append(exclude) + else: + excludebarcodes = nbc * [[]] + + outdir = opts.outdir + mkdir(outdir) + + cpus = opts.cpus + logger.debug("Create a pool of {0} workers.".format(cpus)) + pool = Pool(cpus) + + if paired: + assert nfiles == 2, "You asked for --paired, but sent in {0} files".format( + nfiles + ) + split_fun = append_barcode_paired if append else split_barcode_paired + mode = "paired" + else: + split_fun = split_barcode + mode = "single" + + logger.debug("Mode: {0}".format(mode)) + + pool.map( + split_fun, zip(barcodes, excludebarcodes, nbc * [outdir], nbc * [fastqfile]) + ) + + +def merge(args): + """ + %prog merge folder1 ... + + Consolidate split contents in the folders. The folders can be generated by + the split() process and several samples may be in separate fastq files. This + program merges them. + """ + p = OptionParser(merge.__doc__) + p.set_outdir(outdir="outdir") + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + folders = args + outdir = opts.outdir + mkdir(outdir) + + files = flatten(glob("{0}/*.*.fastq".format(x)) for x in folders) + files = list(files) + key = lambda x: op.basename(x).split(".")[0] + files.sort(key=key) + for id, fns in groupby(files, key=key): + fns = list(fns) + outfile = op.join(outdir, "{0}.fastq".format(id)) + FileMerger(fns, outfile=outfile).merge(checkexists=True) + + +if __name__ == "__main__": + main() diff --git a/jcvi/variation/delly.py b/jcvi/variation/delly.py new file mode 100644 index 00000000..a8f1b5e0 --- /dev/null +++ b/jcvi/variation/delly.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Convert delly output to BED format. +""" + +import os.path as op +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh +from ..formats.base import BaseFile, read_until, must_open +from ..formats.sam import coverage +from ..utils.aws import ls_s3, push_to_s3 +from ..utils.cbook import percentage + + +class DelLine(object): + def __init__(self, line): + args = line.strip().split("\t") + self.seqid = args[0] + self.start = int(args[1]) + 1 + self.end = int(args[2]) + self.size = int(args[3]) + assert self.size == self.end - self.start + 1 + self.supporting_pairs = int(args[4]) + self.avg_mapping_quality = float(args[5]) + self.accn = args[6] + + @property + def bedline(self): + return "\t".join( + str(x) + for x in ( + self.seqid, + self.start - 1, + self.end, + self.accn, + self.supporting_pairs, + "+", + ) + ) + + +class Delly(BaseFile): + def __init__(self, filename): + super().__init__(filename) + + def __iter__(self): + fp = must_open(self.filename) + while True: + read_until(fp, "-----") + nextline = fp.readline() + nextline = fp.readline() + if not nextline.strip(): + break + d = DelLine(nextline) + yield d + + def write_bed(self, bedfile="stdout"): + fw = must_open(bedfile, "w") + for d in self: + print(d.bedline, file=fw) + logger.debug("File written to `%s`.", bedfile) + + +def main(): + + actions = ( + ("bed", "Convert del.txt to del.bed"), + ("mito", "Find mito deletions in BAM"), + ("mitosomatic", "Find mito mosaic somatic mutations in piledriver results"), + ("mitocompile", "Compile mito deletions from multiple VCF files"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def mitosomatic(args): + """ + %prog mitosomatic t.piledriver + + Find mito mosaic somatic mutations in piledriver results. + """ + import pandas as pd + + p = OptionParser(mitosomatic.__doc__) + p.add_argument("--minaf", default=0.005, type=float, help="Minimum allele fraction") + p.add_argument("--maxaf", default=0.1, type=float, help="Maximum allele fraction") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (df,) = args + af_file = df.rsplit(".", 1)[0] + ".af" + fw = open(af_file, "w") + df = pd.read_csv(df, sep="\t") + for i, row in df.iterrows(): + na = row["num_A"] + nt = row["num_T"] + nc = row["num_C"] + ng = row["num_G"] + nd = row["num_D"] + ni = row["num_I"] + depth = row["depth"] + # major, minor = sorted([na, nt, nc, ng], reverse=True)[:2] + # af = minor * 1. / (major + minor) + af = (nd + ni) * 1.0 / depth + if not (opts.minaf <= af <= opts.maxaf): + continue + print("{}\t{}\t{:.6f}".format(row["chrom"], row["start"], af), file=fw) + fw.close() + + logger.debug("Allele freq written to `{}`".format(af_file)) + + +def bed(args): + """ + %prog bed del.txt + + Convert `del.txt` to BED format. DELLY manual here: + + + Deletion: + chr, start, end, size, #supporting_pairs, avg._mapping_quality, deletion_id + chr1, 10180, 10509, 329, 75, 15.8667, Deletion_Sample_00000000 + """ + p = OptionParser(bed.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (delt,) = args + dt = Delly(delt) + dt.write_bed("del.bed") + + +def mitocompile(args): + """ + %prog mitcompile *.vcf.gz + + Extract information about deletions in vcf file. + """ + from urllib.parse import parse_qsl + from jcvi.formats.vcf import VcfLine + + p = OptionParser(mitocompile.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + vcfs = args + print("\t".join("vcf samplekey depth seqid pos alt svlen pe sr".split())) + for i, vcf in enumerate(vcfs): + if (i + 1) % 100 == 0: + logger.debug("Process `{}` [{}]".format(vcf, percentage(i + 1, len(vcfs)))) + depthfile = vcf.replace(".sv.vcf.gz", ".depth") + fp = must_open(depthfile) + _, depth = next(fp).split() + depth = int(float(depth)) + samplekey = op.basename(vcf).split("_")[0] + + fp = must_open(vcf) + for row in fp: + if row[0] == "#": + continue + v = VcfLine(row) + info = dict(parse_qsl(v.info)) + print( + "\t".join( + str(x) + for x in ( + vcf, + samplekey, + depth, + v.seqid, + v.pos, + v.alt, + info.get("SVLEN"), + info["PE"], + info["SR"], + ) + ) + ) + + +def mito(args): + """ + %prog mito chrM.fa input.bam + + Identify mitochondrial deletions. + """ + p = OptionParser(mito.__doc__) + p.set_aws_opts(store="hli-mv-data-science/htang/mito-deletions") + p.add_argument( + "--realignonly", default=False, action="store_true", help="Realign only" + ) + p.add_argument( + "--svonly", + default=False, + action="store_true", + help="Run Realign => SV calls only", + ) + p.add_argument( + "--support", default=1, type=int, help="Minimum number of supporting reads" + ) + p.set_home("speedseq", default="/mnt/software/speedseq/bin") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + chrMfa, bamfile = args + store = opts.output_path + cleanup = not opts.nocleanup + + if not op.exists(chrMfa): + logger.debug("File `{}` missing. Exiting.".format(chrMfa)) + return + + chrMfai = chrMfa + ".fai" + if not op.exists(chrMfai): + cmd = "samtools index {}".format(chrMfa) + sh(cmd) + + if not bamfile.endswith(".bam"): + bamfiles = [x.strip() for x in open(bamfile)] + else: + bamfiles = [bamfile] + + if store: + computed = ls_s3(store) + computed = [ + op.basename(x).split(".")[0] for x in computed if x.endswith(".depth") + ] + remaining_samples = [ + x for x in bamfiles if op.basename(x).split(".")[0] not in computed + ] + + logger.debug( + "Already computed on `{}`: {}".format( + store, len(bamfiles) - len(remaining_samples) + ) + ) + bamfiles = remaining_samples + + logger.debug("Total samples: {}".format(len(bamfiles))) + + for bamfile in bamfiles: + run_mito( + chrMfa, + bamfile, + opts, + realignonly=opts.realignonly, + svonly=opts.svonly, + store=store, + cleanup=cleanup, + ) + + +def run_mito( + chrMfa, bamfile, opts, realignonly=False, svonly=False, store=None, cleanup=False +): + from jcvi.formats.sam import get_minibam + + region = "chrM" + minibam = op.basename(bamfile).replace(".bam", ".{}.bam".format(region)) + if not op.exists(minibam): + get_minibam(bamfile, region) + else: + logger.debug("{} found. Skipped.".format(minibam)) + + speedseq_bin = op.join(opts.speedseq_home, "speedseq") + + realign = minibam.rsplit(".", 1)[0] + ".realign" + realignbam = realign + ".bam" + margs = " -v -t {} -o {}".format(opts.cpus, realign) + if need_update(minibam, realign + ".bam", warn=True): + cmd = speedseq_bin + " realign" + cmd += margs + cmd += " {} {}".format(chrMfa, minibam) + sh(cmd) + + if realignonly: + return + + depthfile = realign + ".depth" + if need_update(realignbam, depthfile): + coverage( + [ + chrMfa, + realignbam, + "--nosort", + "--format=coverage", + "--outfile={}".format(depthfile), + ] + ) + + if store: + push_to_s3(store, depthfile) + + vcffile = realign + ".sv.vcf.gz" + if need_update(realignbam, vcffile, warn=True): + cmd = speedseq_bin + " sv" + cmd += margs + cmd += " -R {}".format(chrMfa) + cmd += " -m {}".format(opts.support) + cmd += " -B {} -D {} -S {}".format( + realignbam, realign + ".discordants.bam", realign + ".splitters.bam" + ) + sh(cmd) + + if store: + push_to_s3(store, vcffile) + + if svonly: + if cleanup: + do_cleanup(minibam, realignbam) + return + + piledriver = realign + ".piledriver" + if need_update(realignbam, piledriver): + cmd = "bamtools piledriver -fasta {}".format(chrMfa) + cmd += " -in {}".format(realignbam) + sh(cmd, outfile=piledriver) + + if store: + push_to_s3(store, piledriver) + + if cleanup: + do_cleanup(minibam, realignbam) + + +def do_cleanup(minibam, realignbam): + sh("rm -f {}* {}*".format(minibam, realignbam)) + + +if __name__ == "__main__": + main() diff --git a/jcvi/variation/impute.py b/jcvi/variation/impute.py new file mode 100644 index 00000000..3807c206 --- /dev/null +++ b/jcvi/variation/impute.py @@ -0,0 +1,384 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Impute unknown variations given an input vcf file. +""" +import os.path as op +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger +from ..apps.grid import MakeManager +from ..formats.base import must_open +from ..formats.vcf import VcfLine, CM +from ..utils.cbook import percentage + + +def main(): + + actions = ( + ("beagle", "use BEAGLE4.1 to impute vcf"), + ("impute", "use IMPUTE2 to impute vcf"), + ("minimac", "use MINIMAC3 to impute vcf"), + ("passthrough", "pass through Y and MT vcf"), + ("validate", "validate imputation against withheld variants"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def passthrough(args): + """ + %prog passthrough chrY.vcf chrY.new.vcf + + Pass through Y and MT vcf. + """ + p = OptionParser(passthrough.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + vcffile, newvcffile = args + fp = open(vcffile) + fw = open(newvcffile, "w") + gg = ["0/0", "0/1", "1/1"] + for row in fp: + if row[0] == "#": + print(row.strip(), file=fw) + continue + + v = VcfLine(row) + v.filter = "PASS" + v.format = "GT:GP" + probs = [0] * 3 + probs[gg.index(v.genotype)] = 1 + v.genotype = v.genotype.replace("/", "|") + ":{0}".format( + ",".join("{0:.3f}".format(x) for x in probs) + ) + print(v, file=fw) + fw.close() + + +def validate(args): + """ + %prog validate imputed.vcf withheld.vcf + + Validate imputation against withheld variants. + """ + p = OptionParser(validate.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + imputed, withheld = args + register = {} + fp = open(withheld) + for row in fp: + if row[0] == "#": + continue + v = VcfLine(row) + register[(v.seqid, v.pos)] = v.genotype + + logger.debug("Imported %d records from `%s`", len(register), withheld) + + fp = must_open(imputed) + hit = concordant = 0 + seen = set() + for row in fp: + if row[0] == "#": + continue + v = VcfLine(row) + chr, pos, genotype = v.seqid, v.pos, v.genotype + if (chr, pos) in seen: + continue + seen.add((chr, pos)) + if (chr, pos) not in register: + continue + truth = register[(chr, pos)] + imputed = genotype.split(":")[0] + if "|" in imputed: + imputed = "/".join(sorted(genotype.split(":")[0].split("|"))) + # probs = [float(x) for x in genotype.split(":")[-1].split(",")] + # imputed = max(zip(probs, ["0/0", "0/1", "1/1"]))[-1] + hit += 1 + if truth == imputed: + concordant += 1 + else: + print(row.strip(), "truth={0}".format(truth), file=sys.stderr) + + logger.debug("Total concordant: %s", percentage(concordant, hit)) + + +def minimac(args): + """ + %prog batchminimac input.txt + + Use MINIMAC3 to impute vcf on all chromosomes. + """ + p = OptionParser(minimac.__doc__) + p.set_home("shapeit") + p.set_home("minimac") + p.set_outfile() + p.set_chr() + p.set_ref() + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (txtfile,) = args + ref = opts.ref + mm = MakeManager() + pf = txtfile.split(".")[0] + allrawvcf = [] + alloutvcf = [] + chrs = opts.chr.split(",") + for x in chrs: + px = CM[x] + chrvcf = pf + ".{0}.vcf".format(px) + if txtfile.endswith(".vcf"): + cmd = "vcftools --vcf {0} --chr {1}".format(txtfile, x) + cmd += " --out {0}.{1} --recode".format(pf, px) + cmd += " && mv {0}.{1}.recode.vcf {2}".format(pf, px, chrvcf) + else: # 23andme + cmd = "python -m jcvi.formats.vcf from23andme {0} {1}".format(txtfile, x) + cmd += " --ref {0}".format(ref) + mm.add(txtfile, chrvcf, cmd) + + chrvcf_hg38 = pf + ".{0}.23andme.hg38.vcf".format(px) + minimac_liftover(mm, chrvcf, chrvcf_hg38, opts) + allrawvcf.append(chrvcf_hg38) + + minimacvcf = "{0}.{1}.minimac.dose.vcf".format(pf, px) + if x == "X": + minimac_X(mm, x, chrvcf, opts) + elif x in ["Y", "MT"]: + cmd = "python -m jcvi.variation.impute passthrough" + cmd += " {0} {1}".format(chrvcf, minimacvcf) + mm.add(chrvcf, minimacvcf, cmd) + else: + minimac_autosome(mm, x, chrvcf, opts) + + # keep the best line for multi-allelic markers + uniqvcf = "{0}.{1}.minimac.uniq.vcf".format(pf, px) + cmd = "python -m jcvi.formats.vcf uniq {0} > {1}".format(minimacvcf, uniqvcf) + mm.add(minimacvcf, uniqvcf, cmd) + + minimacvcf_hg38 = "{0}.{1}.minimac.hg38.vcf".format(pf, px) + minimac_liftover(mm, uniqvcf, minimacvcf_hg38, opts) + alloutvcf.append(minimacvcf_hg38) + + if len(allrawvcf) > 1: + rawhg38vcfgz = pf + ".all.23andme.hg38.vcf.gz" + cmd = "vcf-concat {0} | bgzip > {1}".format(" ".join(allrawvcf), rawhg38vcfgz) + mm.add(allrawvcf, rawhg38vcfgz, cmd) + + if len(alloutvcf) > 1: + outhg38vcfgz = pf + ".all.minimac.hg38.vcf.gz" + cmd = "vcf-concat {0} | bgzip > {1}".format(" ".join(alloutvcf), outhg38vcfgz) + mm.add(alloutvcf, outhg38vcfgz, cmd) + + mm.write() + + +def minimac_liftover(mm, chrvcf, chrvcf_hg38, opts): + cmd = "python -m jcvi.formats.vcf liftover {0} {1}/hg19ToHg38.over.chain.gz {2}".format( + chrvcf, opts.ref, chrvcf_hg38 + ) + mm.add(chrvcf, chrvcf_hg38, cmd) + + +def minimac_X(mm, chr, vcffile, opts): + """See details here: + http://genome.sph.umich.edu/wiki/Minimac3_Cookbook_:_Chromosome_X_Imputation + """ + pf = vcffile.rsplit(".", 1)[0] + ranges = [(1, 2699519), (2699520, 154931043), (154931044, 155270560)] + tags = ["PAR1", "NONPAR", "PAR2"] + Xvcf = [] + phasedfiles = [] + for tag, (start, end) in zip(tags, ranges): + recodefile = pf + "_{0}.recode.vcf".format(tag) + cmd = "vcftools --vcf {0} --out {1}_{2}".format(vcffile, pf, tag) + cmd += " --chr X --from-bp {0} --to-bp {1} --recode".format(start, end) + mm.add(vcffile, recodefile, cmd) + + phasedfile = shapeit_phasing(mm, chr + "_{0}".format(tag), recodefile, opts) + phasedfiles.append(phasedfile) + + pars = [x for x in phasedfiles if "_PAR" in x] + parfile = pf + "_PAR.recode.phased.vcf" + nonparfile = pf + "_NONPAR.recode.phased.vcf" + cmd = "vcf-concat {0} > {1}".format(" ".join(pars), parfile) + mm.add(pars, parfile, cmd) + + for phasedfile in (parfile, nonparfile): + outvcf = minimac_autosome(mm, chr, phasedfile, opts, phasing=False) + Xvcf.append(outvcf) + + minimacvcf = pf + ".minimac.dose.vcf" + cmd = "vcf-concat {0} | vcf-sort -c > {1}".format(" ".join(Xvcf), minimacvcf) + mm.add(Xvcf, minimacvcf, cmd) + + +def minimac_autosome(mm, chr, vcffile, opts, phasing=True): + pf = vcffile.rsplit(".", 1)[0] + kg = op.join(opts.ref, "1000GP_Phase3") + if phasing: + shapeit_phasing(mm, chr, vcffile, opts) + phasedfile = pf + ".phased.vcf" + else: + phasedfile = vcffile + + chrtag = chr + if chr == "X": + chrtag = "X.Non.Pseudo.Auto" if "NONPAR" in vcffile else "X.Pseudo.Auto" + + opf = pf + ".minimac" + minimac_cmd = op.join(opts.minimac_home, "Minimac3") + + cmd = minimac_cmd + " --chr {0}".format(chr) + cmd += ( + " --refHaps {0}/{1}.1000g.Phase3.v5.With.Parameter.Estimates.m3vcf.gz".format( + kg, chrtag + ) + ) + cmd += " --haps {0} --prefix {1}".format(phasedfile, opf) + cmd += " --format GT,GP --nobgzip" + outvcf = opf + ".dose.vcf" + mm.add(phasedfile, outvcf, cmd) + + return outvcf + + +def beagle(args): + """ + %prog beagle input.vcf 1 + + Use BEAGLE4.1 to impute vcf on chromosome 1. + """ + p = OptionParser(beagle.__doc__) + p.set_home("beagle") + p.set_ref() + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + vcffile, chr = args + pf = vcffile.rsplit(".", 1)[0] + outpf = pf + ".beagle" + outfile = outpf + ".vcf.gz" + + mm = MakeManager() + beagle_cmd = opts.beagle_home + kg = op.join(opts.ref, "1000GP_Phase3") + cmd = beagle_cmd + " gt={0}".format(vcffile) + cmd += " ref={0}/chr{1}.1kg.phase3.v5a.bref".format(kg, chr) + cmd += " map={0}/plink.chr{1}.GRCh37.map".format(kg, chr) + cmd += " out={0}".format(outpf) + cmd += " nthreads=16 gprobs=true" + mm.add(vcffile, outfile, cmd) + + mm.write() + + +def shapeit_phasing(mm, chr, vcffile, opts, vcf=True): + kg = op.join(opts.ref, "1000GP_Phase3") + shapeit_cmd = op.join(opts.shapeit_home, "shapeit") + + rpf = "{0}/1000GP_Phase3_chr{1}".format(kg, chr) + pf = vcffile.rsplit(".", 1)[0] + mapfile = "{0}/genetic_map_chr{1}_combined_b37.txt".format(kg, chr) + mapfile = mapfile.replace("NONPAR", "nonPAR") + + hapsfile = pf + ".haps" + cmd = shapeit_cmd + " --input-vcf {0}".format(vcffile) + cmd += " --input-map {0}".format(mapfile) + cmd += " --effective-size 11418" + cmd += " --output-max {0}.haps {0}.sample".format(pf) + cmd += " --input-ref {0}.hap.gz {0}.legend.gz".format(rpf) + cmd += " {0}/1000GP_Phase3.sample --output-log {1}.log".format(kg, pf) + if chr == "X": + cmd += " --chrX" + mm.add(vcffile, hapsfile, cmd) + + if not vcf: + return + + phasedfile = pf + ".phased.vcf" + cmd = shapeit_cmd + " -convert --input-haps {0}".format(pf) + cmd += " --output-vcf {0}".format(phasedfile) + mm.add(hapsfile, phasedfile, cmd) + + return phasedfile + + +def impute(args): + """ + %prog impute input.vcf hs37d5.fa 1 + + Use IMPUTE2 to impute vcf on chromosome 1. + """ + from pyfaidx import Fasta + + p = OptionParser(impute.__doc__) + p.set_home("shapeit") + p.set_home("impute") + p.set_ref() + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 3: + sys.exit(not p.print_help()) + + vcffile, fastafile, chr = args + mm = MakeManager() + pf = vcffile.rsplit(".", 1)[0] + hapsfile = pf + ".haps" + kg = op.join(opts.ref, "1000GP_Phase3") + shapeit_phasing(mm, chr, vcffile, opts) + + fasta = Fasta(fastafile) + size = len(fasta[chr]) + binsize = 5000000 + bins = size / binsize # 5Mb bins + if size % binsize: + bins += 1 + impute_cmd = op.join(opts.impute_home, "impute2") + chunks = [] + for x in range(bins + 1): + chunk_start = x * binsize + 1 + chunk_end = min(chunk_start + binsize - 1, size) + outfile = pf + ".chunk{0:02d}.impute2".format(x) + mapfile = "{0}/genetic_map_chr{1}_combined_b37.txt".format(kg, chr) + rpf = "{0}/1000GP_Phase3_chr{1}".format(kg, chr) + cmd = impute_cmd + " -m {0}".format(mapfile) + cmd += " -known_haps_g {0}".format(hapsfile) + cmd += " -h {0}.hap.gz -l {0}.legend.gz".format(rpf) + cmd += " -Ne 20000 -int {0} {1}".format(chunk_start, chunk_end) + cmd += " -o {0} -allow_large_regions -seed 367946".format(outfile) + cmd += " && touch {0}".format(outfile) + mm.add(hapsfile, outfile, cmd) + chunks.append(outfile) + + # Combine all the files + imputefile = pf + ".impute2" + cmd = "cat {0} > {1}".format(" ".join(chunks), imputefile) + mm.add(chunks, imputefile, cmd) + + # Convert to vcf + vcffile = pf + ".impute2.vcf" + cmd = "python -m jcvi.formats.vcf fromimpute2 {0} {1} {2} > {3}".format( + imputefile, fastafile, chr, vcffile + ) + mm.add(imputefile, vcffile, cmd) + mm.write() + + +if __name__ == "__main__": + main() diff --git a/jcvi/variation/phase.py b/jcvi/variation/phase.py new file mode 100644 index 00000000..d1a6f70c --- /dev/null +++ b/jcvi/variation/phase.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Read-based phasing. +""" +import sys + +try: + import vcf +except ImportError: + pass +import pysam + +from ..apps.base import ActionDispatcher, OptionParser, logger + + +class CPRA: + def __init__(self, vcf_record): + r = vcf_record + self.chr = r.CHROM + self.pos = r.POS + self.ref = r.REF + self.alt = r.ALT + self.alleles = [self.ref] + self.alt + + @property + def is_valid(self): + """Only retain SNPs or single indels, and are bi-allelic""" + return len(self.ref) == 1 and len(self.alt) == 1 and len(self.alt[0]) == 1 + + def __str__(self): + return "_".join(str(x) for x in (self.chr, self.pos, self.ref, self.alt[0])) + + __repr__ = __str__ + + +def main(): + + actions = ( + ("prepare", "convert vcf and bam to variant list"), + ("counts", "collect allele counts from RO/AO fields"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def counts(args): + """ + %prog counts vcffile + + Collect allele counts from RO and AO fields. + """ + p = OptionParser(counts.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (vcffile,) = args + vcf_reader = vcf.Reader(open(vcffile)) + for r in vcf_reader: + v = CPRA(r) + if not v.is_valid: + continue + for sample in r.samples: + ro = sample["RO"] + ao = sample["AO"] + print("\t".join(str(x) for x in (v, ro, ao))) + + +def prepare(args): + """ + %prog prepare vcffile bamfile + + Convert vcf and bam to variant list. Inputs are: + - vcffile: contains the positions of variants + - bamfile: contains the reads that hold the variants + + Outputs: + - reads_to_phase: phasing for each read + - variants_to_phase: in format of phased vcf + """ + p = OptionParser(prepare.__doc__) + p.add_argument("--accuracy", default=0.85, help="Sequencing per-base accuracy") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + vcffile, bamfile = args + right = "{:.2f}".format(opts.accuracy) + wrong = "{:.2f}".format(1 - opts.accuracy) + vcf_reader = vcf.Reader(open(vcffile)) + variants = [] + for r in vcf_reader: + v = CPRA(r) + if not v.is_valid: + continue + variants.append(v) + + logger.debug( + "A total of %d bi-allelic SNVs imported from `%s`", len(variants), vcffile + ) + + bamfile = pysam.AlignmentFile(bamfile, "rb") + for v in variants: + pos = v.pos - 1 + for column in bamfile.pileup(v.chr, pos, pos + 1, truncate=True): + for read in column.pileups: + query_position = read.query_position + if query_position is None: + continue + read_name = read.alignment.query_name + query_base = read.alignment.query_sequence[query_position] + a, b = v.alleles + if query_base == a: + other_base = b + elif query_base == b: + other_base = a + else: + continue + print( + " ".join( + str(x) + for x in (v, read_name, query_base, right, other_base, wrong) + ) + ) + + +if __name__ == "__main__": + main() diff --git a/jcvi/variation/snp.py b/jcvi/variation/snp.py new file mode 100644 index 00000000..f289a45d --- /dev/null +++ b/jcvi/variation/snp.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Analyze SNPs in re-sequencing panels. +""" +import sys + +from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh +from ..apps.grid import MakeManager +from ..formats.base import is_number, write_file +from ..formats.fasta import Fasta + + +def main(): + + actions = ( + ("frommaf", "convert to four-column tabular format from MAF"), + ("freq", "call snp frequencies and keep AO and RO"), + ("rmdup", "remove PCR duplicates from BAM files"), + ("freebayes", "call snps using freebayes"), + ("mpileup", "call snps using samtools-mpileup"), + ("gatk", "call snps using GATK"), + ("somatic", "generate series of SPEEDSESQ-somatic commands"), + ("mappability", "generate 50mer mappability for reference genome"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def mappability(args): + """ + %prog mappability reference.fasta + + Generate 50mer mappability for reference genome. Commands are based on gem + mapper. See instructions: + + """ + p = OptionParser(mappability.__doc__) + p.add_argument("--mer", default=50, type=int, help="User mer size") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (ref,) = args + K = opts.mer + pf = ref.rsplit(".", 1)[0] + mm = MakeManager() + + gem = pf + ".gem" + cmd = "gem-indexer -i {} -o {}".format(ref, pf) + mm.add(ref, gem, cmd) + + mer = pf + ".{}mer".format(K) + mapb = mer + ".mappability" + cmd = "gem-mappability -I {} -l {} -o {} -T {}".format(gem, K, mer, opts.cpus) + mm.add(gem, mapb, cmd) + + wig = mer + ".wig" + cmd = "gem-2-wig -I {} -i {} -o {}".format(gem, mapb, mer) + mm.add(mapb, wig, cmd) + + bw = mer + ".bw" + cmd = "wigToBigWig {} {}.sizes {}".format(wig, mer, bw) + mm.add(wig, bw, cmd) + + bg = mer + ".bedGraph" + cmd = "bigWigToBedGraph {} {}".format(bw, bg) + mm.add(bw, bg, cmd) + + merged = mer + ".filtered-1.merge.bed" + cmd = "python -m jcvi.formats.bed filterbedgraph {} 1".format(bg) + mm.add(bg, merged, cmd) + + mm.write() + + +def gatk(args): + """ + %prog gatk bamfile reference.fasta + + Call SNPs based on GATK best practices. + """ + p = OptionParser(gatk.__doc__) + p.add_argument( + "--indelrealign", + default=False, + action="store_true", + help="Perform indel realignment", + ) + p.set_home("gatk") + p.set_home("picard") + p.set_phred() + p.set_cpus(cpus=24) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + bamfile, ref = args + pf = bamfile.rsplit(".", 1)[0] + mm = MakeManager() + picard = "java -Xmx32g -jar {0}/picard.jar".format(opts.picard_home) + tk = "java -Xmx32g -jar {0}/GenomeAnalysisTK.jar".format(opts.gatk_home) + tk += " -R {0}".format(ref) + + # Step 0 - build reference + dictfile = ref.rsplit(".", 1)[0] + ".dict" + cmd1 = picard + " CreateSequenceDictionary" + cmd1 += " R={0} O={1}".format(ref, dictfile) + cmd2 = "samtools faidx {0}".format(ref) + mm.add(ref, dictfile, (cmd1, cmd2)) + + # Step 1 - sort bam + sortedbamfile = pf + ".sorted.bam" + cmd = picard + " SortSam" + cmd += " INPUT={0} OUTPUT={1}".format(bamfile, sortedbamfile) + cmd += " SORT_ORDER=coordinate CREATE_INDEX=true" + mm.add(bamfile, sortedbamfile, cmd) + + # Step 2 - mark duplicates + dedupbamfile = pf + ".dedup.bam" + cmd = picard + " MarkDuplicates" + cmd += " INPUT={0} OUTPUT={1}".format(sortedbamfile, dedupbamfile) + cmd += " METRICS_FILE=dedup.log CREATE_INDEX=true" + mm.add(sortedbamfile, dedupbamfile, cmd) + + if opts.indelrealign: + # Step 3 - create indel realignment targets + intervals = pf + ".intervals" + cmd = tk + " -T RealignerTargetCreator" + cmd += " -I {0} -o {1}".format(dedupbamfile, intervals) + mm.add(dedupbamfile, intervals, cmd) + + # Step 4 - indel realignment + realignedbamfile = pf + ".realigned.bam" + cmd = tk + " -T IndelRealigner" + cmd += " -targetIntervals {0}".format(intervals) + cmd += " -I {0} -o {1}".format(dedupbamfile, realignedbamfile) + mm.add((dictfile, intervals), realignedbamfile, cmd) + else: + realignedbamfile = dedupbamfile + + # Step 5 - SNP calling + vcf = pf + ".vcf" + cmd = tk + " -T HaplotypeCaller" + cmd += " -I {0}".format(realignedbamfile) + cmd += " --genotyping_mode DISCOVERY" + cmd += " -stand_emit_conf 10 -stand_call_conf 30" + cmd += " -nct {0}".format(opts.cpus) + cmd += " -o {0}".format(vcf) + if opts.phred == "64": + cmd += " --fix_misencoded_quality_scores" + mm.add(realignedbamfile, vcf, cmd) + + # Step 6 - SNP filtering + filtered_vcf = pf + ".filtered.vcf" + cmd = tk + " -T VariantFiltration" + cmd += " -V {0}".format(vcf) + cmd += ' --filterExpression "DP < 10 || DP > 300 || QD < 2.0 || FS > 60.0 || MQ < 40.0"' + cmd += ' --filterName "LOWQUAL"' + cmd += ' --genotypeFilterExpression "isHomVar == 1"' + cmd += ' --genotypeFilterName "HOMOVAR"' + cmd += ' --genotypeFilterExpression "isHet == 1"' + cmd += ' --genotypeFilterName "HET"' + cmd += " -o {0}".format(filtered_vcf) + mm.add(vcf, filtered_vcf, cmd) + + mm.write() + + +def somatic(args): + """ + %prog somatic ref.fasta *.bam > somatic.sh + + Useful to identify somatic mutations in each sample compared to all other + samples. Script using SPEEDSEQ-somatic will be written to stdout. + """ + p = OptionParser(somatic.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 3: + sys.exit(not p.print_help()) + + ref, bams = args[0], args[1:] + tcmd = "~/export/speedseq/bin/speedseq somatic" + tcmd += " -t 32 -F .2 -C 3 -q 30" + cmds = [] + for b in bams: + pf = b.split(".")[0] + cmd = tcmd + cmd += " -o {0}".format(pf) + others = ",".join(sorted(set(bams) - {b})) + cmd += " {0} {1} {2}".format(ref, others, b) + cmds.append(cmd) + + write_file("somatic.sh", "\n".join(cmds)) + + +def rmdup(args): + """ + %prog rmdup *.bam > rmdup.cmds + + Remove PCR duplicates from BAM files, generate a list of commands. + """ + p = OptionParser(rmdup.__doc__) + p.add_argument( + "-S", default=False, action="store_true", help="Treat PE reads as SE in rmdup" + ) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + bams = args + cmd = "samtools rmdup" + if opts.S: + cmd += " -S" + for b in bams: + if "rmdup" in b: + continue + rb = b.rsplit(".", 1)[0] + ".rmdup.bam" + if not need_update(b, rb): + continue + print(" ".join((cmd, b, rb))) + + +def mpileup(args): + """ + %prog mpileup prefix ref.fa *.bam + + Call SNPs using samtools mpileup. + """ + p = OptionParser(mpileup.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + prefix, ref = args[0:2] + bams = args[2:] + cmd = "samtools mpileup -P ILLUMINA -E -ugD -r {0}" + cmd += " -f {0} {1}".format(ref, " ".join(bams)) + fmd = "bcftools view -cvg -" + seqids = list(Fasta(ref).iterkeys_ordered()) + for s in seqids: + outfile = prefix + ".{0}.vcf".format(s) + print(cmd.format(s), "|", fmd, ">", outfile) + + +def freebayes(args): + """ + %prog freebayes prefix ref.fa *.bam + + Call SNPs using freebayes. + """ + p = OptionParser(freebayes.__doc__) + p.add_argument("--mindepth", default=3, type=int, help="Minimum depth") + p.add_argument("--minqual", default=20, type=int, help="Minimum quality") + opts, args = p.parse_args(args) + + if len(args) < 2: + sys.exit(not p.print_help()) + + prefix, ref = args[0:2] + bams = args[2:] + cmd = "bamaddrg -R {0}" + cmd += " " + " ".join("-b {0}".format(x) for x in bams) + fmd = "freebayes --stdin -C {0} -f {1}".format(opts.mindepth, ref) + seqids = list(Fasta(ref).iterkeys_ordered()) + for s in seqids: + outfile = prefix + ".{0}.vcf".format(s) + print(cmd.format(s), "|", fmd + " -r {0} -v {1}".format(s, outfile)) + + +def freq(args): + """ + %prog freq fastafile bamfile + + Call SNP frequencies and generate GFF file. + """ + p = OptionParser(freq.__doc__) + p.add_argument("--mindepth", default=3, type=int, help="Minimum depth") + p.add_argument("--minqual", default=20, type=int, help="Minimum quality") + p.set_outfile() + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + fastafile, bamfile = args + cmd = "freebayes -f {0} --pooled-continuous {1}".format(fastafile, bamfile) + cmd += " -F 0 -C {0}".format(opts.mindepth) + cmd += ' | vcffilter -f "QUAL > {0}"'.format(opts.minqual) + cmd += " | vcfkeepinfo - AO RO TYPE" + sh(cmd, outfile=opts.outfile) + + +def frommaf(args): + """ + %prog frommaf maffile + + Convert to four-column tabular format from MAF. + """ + p = OptionParser(frommaf.__doc__) + p.add_argument("--validate", help="Validate coordinates against FASTA") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (maf,) = args + snpfile = maf.rsplit(".", 1)[0] + ".vcf" + fp = open(maf) + fw = open(snpfile, "w") + total = 0 + id = "." + qual = 20 + filter = "PASS" + info = "DP=20" + print("##fileformat=VCFv4.0", file=fw) + print("#CHROM POS ID REF ALT QUAL FILTER INFO".replace(" ", "\t"), file=fw) + for row in fp: + atoms = row.split() + c, pos, ref, alt = atoms[:4] + if is_number(c, int): + c = int(c) + else: + continue + c = "chr{0:02d}".format(c) + pos = int(pos) + print( + "\t".join(str(x) for x in (c, pos, id, ref, alt, qual, filter, info)), + file=fw, + ) + total += 1 + fw.close() + + validate = opts.validate + if not validate: + return + + from jcvi.utils.cbook import percentage + + f = Fasta(validate) + fp = open(snpfile) + nsnps = 0 + for row in fp: + if row[0] == "#": + continue + + c, pos, id, ref, alt, qual, filter, info = row.split("\t") + pos = int(pos) + feat = dict(chr=c, start=pos, stop=pos) + s = f.sequence(feat) + s = str(s) + assert s == ref, "Validation error: {0} is {1} (expect: {2})".format( + feat, s, ref + ) + nsnps += 1 + if nsnps % 50000 == 0: + logger.debug("SNPs parsed: %s", percentage(nsnps, total)) + logger.debug("A total of %d SNPs validated and written to `%s`.", nsnps, snpfile) + + +if __name__ == "__main__": + main() diff --git a/jcvi/variation/str.py b/jcvi/variation/str.py new file mode 100644 index 00000000..750df228 --- /dev/null +++ b/jcvi/variation/str.py @@ -0,0 +1,1568 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +""" +Identify repeat numbers in STR repeats. +""" +import re +import os +import os.path as op +import json +import sys + +from math import log, ceil +from collections import Counter, defaultdict +from multiprocessing import Pool + +import numpy as np +import pandas as pd +import pyfasta + +try: + import vcf +except ImportError: + pass + +from ..apps.base import ( + ActionDispatcher, + OptionParser, + datafile, + logger, + mkdir, + need_update, + sh, +) +from ..apps.grid import MakeManager +from ..formats.base import LineFile, must_open +from ..formats.base import timestamp +from ..formats.bed import natsorted +from ..utils.aws import check_exists_s3, ls_s3, pull_from_s3, push_to_s3 +from ..utils.cbook import percentage, uniqify + + +REF = "hg38" +REPO = datafile("TREDs.meta.csv") + +READLEN = 150 +MINSCORE = 36 +YSEARCH_HAPLOTYPE = """ +DYS393 DYS390 DYS19/DYS394 DYS19b DYS391 DYS385a DYS385b DYS426 DYS388 DYS439 +DYS389I DYS392 DYS389B DYS458 DYS459a/b DYS459a/b DYS455 DYS454 DYS447 DYS437 +DYS448 DYS449 DYS464a/b/c/d DYS464a/b/c/d DYS464a/b/c/d DYS464a/b/c/d DYS464e DYS464f DYS464g DYS460 +GATA-H4 YCAIIa YCAIIb DYS456 DYS607 DYS576 DYS570 CDYa CDYb DYS442 +DYS438 DYS531 DYS578 DYS395S1a/b DYS395S1a/b DYS590 DYS537 DYS641 DYS472 DYS406S1 +DYS511 DYS425 DYS413a DYS413b DYS557 DYS594 DYS436 DYS490 DYS534 DYS450 +DYS444 DYS481 DYS520 DYS446 DYS617 DYS568 DYS487 DYS572 DYS640 DYS492 +DYS565 DYS461 DYS462 GATA-A10 DYS635 GAAT1B07 DYS441 DYS445 DYS452 DYS463 +DYS434 DYS435 DYS485 DYS494 DYS495 DYS505 DYS522 DYS533 DYS549 DYS556 +DYS575 DYS589 DYS636 DYS638 DYS643 DYS714 DYS716 DYS717 DYS726 DXYS156-Y +""".split() +YSEARCH_LL = """ +L1 L2 L3 L4 L5 L6 L7 L8 L9 L10 +L11 L12 L13 L14 L15 L16 L17 L18 L19 L20 +L21 L22 L23 L24 L25 L26 L27 L28 L29 L30 +L31 L32 L33 L34 L35 L36 L37 L38 L39 L40 +L41 L54 L55 L56 L57 L58 L59 L60 L61 L62 +L63 L42 L64 L65 L66 L67 L68 L69 L70 L71 +L49 L72 L73 L51 L74 L75 L76 L77 L78 L79 +L80 L43 L44 L45 L46 L47 L48 L50 L52 L53 +L81 L82 L83 L84 L85 L86 L87 L88 L89 L90 +L91 L92 L93 L94 L95 L96 L97 L98 L99 L100 +""".split() +YHRD_YFILER = """ +DYS456 DYS389I DYS390 DYS389B DYS458 DYS19/DYS394 DYS385 +DYS393 DYS391 DYS439 DYS635 DYS392 GATA-H4 DYS437 DYS438 DYS448 +""".split() +YHRD_YFILERPLUS = """ +DYS576 DYS389I DYS635 DYS389B DYS627 DYS460 DYS458 DYS19/DYS394 GATA-H4 DYS448 DYS391 +DYS456 DYS390 DYS438 DYS392 DYS518 DYS570 DYS437 DYS385a DYS449 +DYS393 DYS439 DYS481 DYF387S1 DYS533 +""".split() +USYSTR_ALL = """ +DYF387S1 DYS19/DYS394 DYS385 DYS389I +DYS389B DYS390 DYS391 DYS392 +DYS393 DYS437 DYS438 DYS439 +DYS448 DYS449 DYS456 DYS458 +DYS460 DYS481 DYS518 DYS533 +DYS549 DYS570 DYS576 DYS627 +DYS635 DYS643 GATA-H4 +""".split() + + +class TREDsRepo(dict): + def __init__(self, ref=REF): + super().__init__() + self.ref = ref + df = pd.read_csv(REPO, index_col=0) + self.names = [] + for name, row in df.iterrows(): + self[name] = TRED(name, row, ref=ref) + self.names.append(name) + self.df = df + + def to_json(self): + s = self.df.to_json(orient="index") + s = s.decode("windows-1252").encode("utf8") + s = json.dumps(json.loads(s), sort_keys=True, indent=2) + return s + + def set_ploidy(self, haploid): + if not haploid: + return + for k, v in self.items(): + if v.chr in haploid: + v.ploidy = 1 + + def get_info(self, tredName): + tr = self.get(tredName) + info = "END={};MOTIF={};NS=1;REF={};CR={};IH={};RL={};VT=STR".format( + tr.repeat_end, + tr.repeat, + tr.ref_copy, + tr.cutoff_risk, + tr.inheritance, + tr.ref_copy * len(tr.repeat), + ) + return tr.chr, tr.repeat_start, tr.ref_copy, tr.repeat, info + + +class TRED(object): + def __init__(self, name, row, ref=REF): + + self.row = row + self.name = name + self.repeat = row["repeat"] + self.motif = row["motif"] + repeat_location_field = "repeat_location" + if ref != REF: + repeat_location_field += "." + ref.split("_")[0] + repeat_location = row[repeat_location_field] + if "_nochr" in ref: # Some reference version do not have chr + repeat_location = repeat_location.replace("chr", "") + self.chr, repeat_location = repeat_location.split(":") + repeat_start, repeat_end = repeat_location.split("-") + self.repeat_start = int(repeat_start) + self.repeat_end = int(repeat_end) + self.ref_copy = (self.repeat_end - self.repeat_start + 1) / len(self.repeat) + self.prefix = row["prefix"] + self.suffix = row["suffix"] + self.cutoff_prerisk = row["cutoff_prerisk"] + self.cutoff_risk = row["cutoff_risk"] + self.inheritance = row["inheritance"] + self.is_xlinked = self.inheritance[0] == "X" + self.is_recessive = self.inheritance[-1] == "R" + self.is_expansion = row["mutation_nature"] == "increase" + self.ploidy = 2 + + def __repr__(self): + return "{} inheritance={} id={}_{}_{}".format( + self.name, self.inheritance, self.chr, self.repeat_start, self.repeat + ) + + def __str__(self): + return ";".join( + str(x) + for x in ( + self.name, + self.repeat, + self.chr, + self.repeat_start, + self.repeat_end, + self.prefix, + self.suffix, + ) + ) + + +class STRLine(object): + def __init__(self, line): + args = line.split() + self.seqid = args[0] + self.start = int(args[1]) + self.end = int(args[2]) + self.period = int(args[3]) + self.copynum = args[4] + self.consensusSize = int(args[5]) + self.pctmatch = int(args[6]) + self.pctindel = int(args[7]) + self.score = args[8] + self.A = args[9] + self.C = args[10] + self.G = args[11] + self.T = args[12] + self.entropy = float(args[13]) + self.motif = args[14] + assert self.period == len(self.motif) + self.name = args[15] if len(args) > 15 else None + + def __str__(self): + fields = [ + self.seqid, + self.start, + self.end, + self.period, + self.copynum, + self.consensusSize, + self.pctmatch, + self.pctindel, + self.score, + self.A, + self.C, + self.G, + self.T, + "{0:.2f}".format(self.entropy), + self.motif, + ] + if self.name is not None: + fields += [self.name] + return "\t".join(str(x) for x in fields) + + @property + def longname(self): + return "_".join(str(x) for x in (self.seqid, self.start, self.motif)) + + def is_valid(self, maxperiod=6, maxlength=READLEN, minscore=MINSCORE): + return ( + 1 <= self.period <= maxperiod + and (self.end - self.start + 1) <= maxlength + and self.score >= minscore + ) + + def calc_entropy(self): + total = self.A + self.C + self.G + self.T + if total == 0: # Perhaps they are all Ns - might crash in lobstrindex() + return 0 + fractions = [x * 1.0 / total for x in [self.A, self.C, self.G, self.T]] + entropy = sum([-1.0 * x * log(x, 2) for x in fractions if x != 0]) + return entropy + + def iter_exact_str(self, genome): + pat = re.compile("(({0}){{2,}})".format(self.motif)) + start = self.start + s = genome[self.seqid][self.start - 1 : self.end].upper() + for m in re.finditer(pat, s): + self.start = start + m.start() + length = m.end() - m.start() + subseq = m.group(0) + assert length % self.period == 0 + assert subseq.startswith(self.motif) + + self.end = self.start - 1 + length + self.copynum = length / self.period + self.pctmatch = 100 + self.pctindel = 0 + self.score = 2 * length + self.fix_counts(subseq) + yield self + + def fix_counts(self, subseq): + length = int(ceil(self.period * float(self.copynum))) + # Sanity check for length, otherwise lobSTR misses units + self.end = max(self.end, self.start + length - 1) + self.A = subseq.count("A") + self.C = subseq.count("C") + self.G = subseq.count("G") + self.T = subseq.count("T") + self.entropy = self.calc_entropy() + + +class STRFile(LineFile): + def __init__(self, lobstr_home, db="hg38"): + filename = op.join(lobstr_home, "{0}/index.info".format(db)) + super().__init__(filename) + fp = open(filename) + for row in fp: + self.append(STRLine(row)) + + @property + def ids(self): + return [s.longname for s in self] + + @property + def register(self): + return dict(((s.seqid, s.start), s.name) for s in self) + + +class LobSTRvcf(dict): + def __init__(self, columnidsfile="STR.ids"): + super().__init__() + self.samplekey = None + self.evidence = {} # name: (supporting reads, stutter reads) + if columnidsfile: + fp = open(columnidsfile) + self.columns = [x.strip() for x in fp] + logger.debug( + "A total of {} markers imported from `{}`".format( + len(self.columns), columnidsfile + ) + ) + + def parse(self, filename, filtered=True, cleanup=False): + self.samplekey = op.basename(filename).split(".")[0] + logger.debug("Parse `{}` (filtered={})".format(filename, filtered)) + fp = must_open(filename) + reader = vcf.Reader(fp) + for record in reader: + if filtered and record.FILTER: + continue + info = record.INFO + ref = float(info["REF"]) + rpa = info.get("RPA", ref) + motif = info["MOTIF"] + name = "_".join(str(x) for x in (record.CHROM, record.POS, motif)) + for sample in record.samples: + gt = sample["GT"] + if filtered and sample["FT"] != "PASS": + continue + if gt == "0/0": + alleles = (ref, ref) + elif gt in ("0/1", "1/0"): + alleles = (ref, rpa[0]) + elif gt == "1/1": + alleles = (rpa[0], rpa[0]) + elif gt == "1/2": + alleles = rpa + try: + self[name] = ",".join(str(int(x)) for x in sorted(alleles)) + except: + self[name] = "-,-" + + # Collect supporting read evidence + motif_length = len(motif) + adjusted_alleles = [(x - ref) * motif_length for x in alleles] + support = stutters = 0 + allreads = sample["ALLREADS"] + for r in allreads.split(";"): + k, v = r.split("|") + k, v = int(k), int(v) + min_dist = min([abs(k - x) for x in adjusted_alleles]) + if motif_length * 0.5 < min_dist < motif_length * 1.5: + stutters += v + support += v + self.evidence[name] = "{},{}".format(stutters, support) + + if cleanup: + sh("rm -f {}".format(op.basename(filename))) + + @property + def csvline(self): + return ",".join([self.get(c, "-1,-1") for c in self.columns]) + + @property + def evline(self): + return ",".join([self.evidence.get(c, "-1,-1") for c in self.columns]) + + +def main(): + + actions = ( + # Compile population data - pipeline: compilevcf->mergecsv->meta->data->mask + ("bin", "convert tsv to binary format"), + ("filtervcf", "filter lobSTR VCF"), + ("compilevcf", "compile vcf results into master spreadsheet"), + ("mergecsv", "combine csv into binary array"), + ("meta", "compute allele frequencies and write to meta"), + ("data", "filter data based on the meta calls"), + ("mask", "compute P-values based on meta calls and data"), + ("treds", "compile allele_frequency for TRED results"), + # lobSTR related + ("lobstrindex", "make lobSTR index"), + ("batchlobstr", "run batch lobSTR"), + ("lobstr", "run lobSTR on a big BAM"), + ("locus", "extract selected locus and run lobSTR"), + ("stutter", "extract info from lobSTR vcf file"), + # Specific markers + ("liftover", "liftOver CODIS/Y-STR markers"), + ("trf", "run TRF on FASTA files"), + ("ystr", "print out Y-STR info given VCF"), + ) + p = ActionDispatcher(actions) + p.dispatch(globals()) + + +def treds(args): + """ + %prog treds hli.tred.tsv + + Compile allele_frequency for TREDs results. Write data.tsv, meta.tsv and + mask.tsv in one go. + """ + from jcvi.apps.base import datafile + + p = OptionParser(treds.__doc__) + p.add_argument( + "--csv", default=False, action="store_true", help="Also write `meta.csv`" + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (tredresults,) = args + df = pd.read_csv(tredresults, sep="\t") + + tredsfile = datafile("TREDs.meta.csv") + tf = pd.read_csv(tredsfile) + + tds = list(tf["abbreviation"]) + ids = list(tf["id"]) + tags = ["SampleKey"] + final_columns = ["SampleKey"] + afs = [] + for td, id in zip(tds, ids): + tag1 = "{}.1".format(td) + tag2 = "{}.2".format(td) + if tag2 not in df: + afs.append("{}") + continue + tags.append(tag2) + final_columns.append(id) + a = np.array(list(df[tag1]) + list(df[tag2])) + counts = alleles_to_counts(a) + af = counts_to_af(counts) + afs.append(af) + + tf["allele_frequency"] = afs + + metafile = "TREDs_{}_SEARCH.meta.tsv".format(timestamp()) + tf.to_csv(metafile, sep="\t", index=False) + logger.debug("File `{}` written.".format(metafile)) + if opts.csv: + metacsvfile = metafile.rsplit(".", 1)[0] + ".csv" + tf.to_csv(metacsvfile, index=False) + logger.debug("File `{}` written.".format(metacsvfile)) + + pp = df[tags] + pp.columns = final_columns + datafile = "TREDs_{}_SEARCH.data.tsv".format(timestamp()) + pp.to_csv(datafile, sep="\t", index=False) + logger.debug("File `{}` written.".format(datafile)) + + mask([datafile, metafile]) + + +def stutter(args): + """ + %prog stutter a.vcf.gz + + Extract info from lobSTR vcf file. Generates a file that has the following + fields: + + CHR, POS, MOTIF, RL, ALLREADS, Q + """ + p = OptionParser(stutter.__doc__) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (vcf,) = args + pf = op.basename(vcf).split(".")[0] + execid, sampleid = pf.split("_") + + C = "vcftools --remove-filtered-all --min-meanDP 10" + C += " --gzvcf {} --out {}".format(vcf, pf) + C += " --indv {}".format(sampleid) + + info = pf + ".INFO" + if need_update(vcf, info): + cmd = C + " --get-INFO MOTIF --get-INFO RL" + sh(cmd) + + allreads = pf + ".ALLREADS.FORMAT" + if need_update(vcf, allreads): + cmd = C + " --extract-FORMAT-info ALLREADS" + sh(cmd) + + q = pf + ".Q.FORMAT" + if need_update(vcf, q): + cmd = C + " --extract-FORMAT-info Q" + sh(cmd) + + outfile = pf + ".STUTTER" + if need_update((info, allreads, q), outfile): + cmd = "cut -f1,2,5,6 {}".format(info) + cmd += r" | sed -e 's/\t/_/g'" + cmd += " | paste - {} {}".format(allreads, q) + cmd += " | cut -f1,4,7" + sh(cmd, outfile=outfile) + + +def write_filtered(vcffile, lhome, store=None): + if vcffile.startswith("s3://"): + vcffile = pull_from_s3(vcffile) + + filteredvcf = op.basename(vcffile).replace(".vcf", ".filtered.vcf") + cmd = "python {}/scripts/lobSTR_filter_vcf.py".format(lhome) + cmd += " --vcf {}".format(vcffile) + cmd += " --loc-cov 5 --loc-log-score 0.8" + # cmd += " --loc-call-rate 0.8 --loc-max-ref-length 80" + # cmd += " --call-cov 5 --call-log-score 0.8 --call-dist-end 20" + sh(cmd, outfile=filteredvcf) + + if store: + push_to_s3(store, filteredvcf) + + return filteredvcf + + +def run_filter(arg): + vcffile, lhome, store = arg + filteredvcf = vcffile.replace(".vcf", ".filtered.vcf") + try: + if vcffile.startswith("s3://"): + if not check_exists_s3(filteredvcf, warn=True): + write_filtered(vcffile, lhome, store=store) + logger.debug("{} written and uploaded.".format(filteredvcf)) + else: + if need_update(vcffile, filteredvcf): + write_filtered(vcffile, lhome, store=None) + except Exception as e: + logger.debug("Thread failed! Error: {}".format(e)) + + +def filtervcf(args): + """ + %prog filtervcf NA12878.hg38.vcf.gz + + Filter lobSTR VCF using script shipped in lobSTR. Input file can be a list + of vcf files. + """ + p = OptionParser(filtervcf.__doc__) + p.set_home("lobstr", default="/mnt/software/lobSTR") + p.set_aws_opts(store="hli-mv-data-science/htang/str") + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (samples,) = args + lhome = opts.lobstr_home + store = opts.output_path + + if samples.endswith((".vcf", ".vcf.gz")): + vcffiles = [samples] + else: + vcffiles = [x.strip() for x in must_open(samples)] + + vcffiles = [x for x in vcffiles if ".filtered." not in x] + + run_args = [(x, lhome, x.startswith("s3://") and store) for x in vcffiles] + cpus = min(opts.cpus, len(run_args)) + p = Pool(processes=cpus) + for _ in p.map_async(run_filter, run_args).get(): + continue + + +def write_meta(af_file, gene_map, blacklist, filename="meta.tsv"): + fp = open(af_file) + fw = open(filename, "w") + header = "id title gene_name variant_type motif allele_frequency".replace(" ", "\t") + print(header, file=fw) + variant_type = "short tandem repeats" + title = "Short tandem repeats ({})n" + for row in fp: + locus, af, remove = row.split() + if remove == "MISSING": + continue + if locus in blacklist: + continue + + seqid, pos, motif = locus.split("_") + gene_name = gene_map.get((seqid, pos), "") + print( + "\t".join((locus, title.format(motif), gene_name, variant_type, motif, af)), + file=fw, + ) + fw.close() + logger.debug("Write meta file to `{}`".format(filename)) + + +def read_treds(tredsfile=datafile("TREDs.meta.csv")): + if tredsfile.endswith(".csv"): + df = pd.read_csv(tredsfile) + treds = set(df["id"]) + else: + df = pd.read_csv(tredsfile, sep="\t") + treds = set(df["abbreviation"]) + + logger.debug("Loaded {} treds from `{}`".format(len(treds), tredsfile)) + return treds, df + + +def meta(args): + """ + %prog meta data.bin samples STR.ids STR-exons.wo.bed + + Compute allele frequencies and prune sites based on missingness. + + Filter subset of loci that satisfy: + 1. no redundancy (unique chr:pos) + 2. variable (n_alleles > 1) + 3. low level of missing data (>= 50% autosomal + X, > 25% for Y) + + Write meta file with the following infor: + 1. id + 2. title + 3. gene_name + 4. variant_type + 5. motif + 6. allele_frequency + + `STR-exons.wo.bed` can be generated like this: + $ tail -n 694105 /mnt/software/lobSTR/hg38/index.tab | cut -f1-3 > all-STR.bed + $ intersectBed -a all-STR.bed -b all-exons.bed -wo > STR-exons.wo.bed + """ + p = OptionParser(meta.__doc__) + p.add_argument( + "--cutoff", + default=0.5, + type=float, + help="Percent observed required (chrY half cutoff)", + ) + p.set_cpus() + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + binfile, sampleids, strids, wobed = args + cutoff = opts.cutoff + + af_file = "allele_freq" + if need_update(binfile, af_file): + df, m, samples, loci = read_binfile(binfile, sampleids, strids) + nalleles = len(samples) + fw = must_open(af_file, "w") + for i, locus in enumerate(loci): + a = m[:, i] + counts = alleles_to_counts(a) + af = counts_to_af(counts) + seqid = locus.split("_")[0] + remove = counts_filter(counts, nalleles, seqid, cutoff=cutoff) + print("\t".join((locus, af, remove)), file=fw) + fw.close() + + logger.debug("Load gene intersections from `{}`".format(wobed)) + fp = open(wobed) + gene_map = defaultdict(set) + for row in fp: + chr1, start1, end1, chr2, start2, end2, name, ov = row.split() + gene_map[(chr1, start1)] |= set(name.split(",")) + for k, v in gene_map.items(): + non_enst = sorted(x for x in v if not x.startswith("ENST")) + # enst = sorted(x.rsplit(".", 1)[0] for x in v if x.startswith("ENST")) + gene_map[k] = ",".join(non_enst) + + TREDS, df = read_treds() + + metafile = "STRs_{}_SEARCH.meta.tsv".format(timestamp()) + write_meta(af_file, gene_map, TREDS, filename=metafile) + logger.debug("File `{}` written.".format(metafile)) + + +def alleles_to_counts(a): + # xa = a / 1000 + xb = a % 1000 + counts = Counter() + # counts.update(xa) + counts.update(xb) + del counts[-1] + del counts[999] + return counts + + +def counts_to_af(counts): + return "{" + ",".join("{}:{}".format(k, v) for k, v in sorted(counts.items())) + "}" + + +def af_to_counts(af): + countst = [x for x in af.strip("{}").split(",") if x] + countsd = {} + for x in countst: + a, b = x.split(":") + countsd[int(a)] = int(b) + return countsd + + +def bin(args): + """ + %prog bin data.tsv + + Conver tsv to binary format. + """ + p = OptionParser(bin.__doc__) + p.add_argument("--dtype", choices=("float32", "int32"), help="dtype of the matrix") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (tsvfile,) = args + dtype = opts.dtype + if dtype is None: # Guess + dtype = np.int32 if "data" in tsvfile else np.float32 + else: + dtype = np.int32 if dtype == "int32" else np.float32 + + print("dtype: {}".format(dtype), file=sys.stderr) + fp = open(tsvfile) + next(fp) + arrays = [] + for i, row in enumerate(fp): + a = np.fromstring(row, sep="\t", dtype=dtype) + a = a[1:] + arrays.append(a) + print(i, a, file=sys.stderr) + + print("Merging", file=sys.stderr) + b = np.concatenate(arrays) + print("Binary shape: {}".format(b.shape), file=sys.stderr) + binfile = tsvfile.rsplit(".", 1)[0] + ".bin" + b.tofile(binfile) + + +def counts_to_percentile(counts): + percentile = {} + s = 0 + for k, v in sorted(counts.items(), reverse=True): + s += v + percentile[k] = s + for k, v in percentile.items(): + v = "{:.6f}".format(v * 1.0 / s) + percentile[k] = v + return percentile + + +def convert_to_percentile(arg): + i, a, percentile = arg + pp = np.array([percentile.get(x, "1.000000") for x in a], dtype="S8") + if i % 1000 == 0: + print(i, file=sys.stderr) + print(a, file=sys.stderr) + print(pp, file=sys.stderr) + return i, pp + + +def write_csv(csvfile, m, index, columns, sep="\t", index_label="SampleKey"): + fw = open(csvfile, "w") + print(sep.join([index_label] + columns), file=fw) + for i, a in enumerate(m): + print(index[i] + sep + sep.join(str(x) for x in a), file=fw) + fw.close() + + +def read_meta(metafile): + df = pd.read_csv(metafile, sep="\t") + final_columns = [] + percentiles = {} + for i, row in df.iterrows(): + id = row["id"] + final_columns.append(id) + counts = row["allele_frequency"] + countsd = af_to_counts(counts) + percentile = counts_to_percentile(countsd) + percentiles[id] = percentile + return final_columns, percentiles + + +def write_mask(cpus, samples, final_columns, run_args, filename="mask.tsv"): + p = Pool(processes=cpus) + res = [] + r = p.map_async(convert_to_percentile, run_args, callback=res.append) + r.wait() + res.sort() + + if len(res) == 1: # sometimes res end up with one more nest + (res,) = res + + # Write mask (P-value) matrix + ii, pvalues = zip(*res) + m = np.vstack(pvalues).T + write_csv(filename, m, samples, final_columns) + + +def data(args): + """ + %prog data data.bin samples.ids STR.ids meta.tsv + + Make data.tsv based on meta.tsv. + """ + p = OptionParser(data.__doc__) + p.add_argument( + "--notsv", default=False, action="store_true", help="Do not write data.tsv" + ) + opts, args = p.parse_args(args) + + if len(args) != 4: + sys.exit(not p.print_help()) + + databin, sampleids, strids, metafile = args + final_columns, percentiles = read_meta(metafile) + df, m, samples, loci = read_binfile(databin, sampleids, strids) + + # Clean the data + m %= 1000 # Get the larger of the two alleles + m[m == 999] = -1 # Missing data + + final = set(final_columns) + remove = [] + for i, locus in enumerate(loci): + if locus not in final: + remove.append(locus) + continue + + pf = "STRs_{}_SEARCH".format(timestamp()) + filteredstrids = "{}.STR.ids".format(pf) + fw = open(filteredstrids, "w") + print("\n".join(final_columns), file=fw) + fw.close() + logger.debug( + "Dropped {} columns; Retained {} columns (`{}`)".format( + len(remove), len(final_columns), filteredstrids + ) + ) + + # Remove low-quality columns! + df.drop(remove, inplace=True, axis=1) + df.columns = final_columns + + filtered_bin = "{}.data.bin".format(pf) + if need_update(databin, filtered_bin): + m = df.as_matrix() + m.tofile(filtered_bin) + logger.debug("Filtered binary matrix written to `{}`".format(filtered_bin)) + + # Write data output + filtered_tsv = "{}.data.tsv".format(pf) + if not opts.notsv and need_update(databin, filtered_tsv): + df.to_csv(filtered_tsv, sep="\t", index_label="SampleKey") + + +def mask(args): + """ + %prog mask data.bin samples.ids STR.ids meta.tsv + + OR + + %prog mask data.tsv meta.tsv + + Compute P-values based on meta and data. The `data.bin` should be the matrix + containing filtered loci and the output mask.tsv will have the same + dimension. + """ + p = OptionParser(mask.__doc__) + opts, args = p.parse_args(args) + + if len(args) not in (2, 4): + sys.exit(not p.print_help()) + + if len(args) == 4: + databin, sampleids, strids, metafile = args + df, m, samples, loci = read_binfile(databin, sampleids, strids) + mode = "STRs" + elif len(args) == 2: + databin, metafile = args + df = pd.read_csv(databin, sep="\t", index_col=0) + m = df.as_matrix() + samples = df.index + loci = list(df.columns) + mode = "TREDs" + + pf = "{}_{}_SEARCH".format(mode, timestamp()) + final_columns, percentiles = read_meta(metafile) + + maskfile = pf + ".mask.tsv" + run_args = [] + for i, locus in enumerate(loci): + a = m[:, i] + percentile = percentiles[locus] + run_args.append((i, a, percentile)) + + if mode == "TREDs" or need_update(databin, maskfile): + cpus = min(8, len(run_args)) + write_mask(cpus, samples, final_columns, run_args, filename=maskfile) + logger.debug("File `{}` written.".format(maskfile)) + + +def counts_filter(countsd, nalleles, seqid, cutoff=0.5): + cutoff *= 100 + # Check for missingness + observed = sum(countsd.values()) + observed_pct = observed * 100 / nalleles + if observed_pct < cutoff: + if not (seqid == "chrY" and observed_pct >= cutoff / 2): + return "MISSING" + + # Check for variability + if len(countsd) < 2: + return "INVARIANT" + + return "PASS" + + +def read_binfile(binfile, sampleids, strids, dtype=np.int32): + m = np.fromfile(binfile, dtype=dtype) + samples = [x.strip() for x in open(sampleids)] + loci = [x.strip() for x in open(strids)] + nsamples, nloci = len(samples), len(loci) + print("{} x {} entries imported".format(nsamples, nloci), file=sys.stderr) + + m.resize(nsamples, nloci) + df = pd.DataFrame(m, index=samples, columns=loci) + return df, m, samples, loci + + +def mergecsv(args): + """ + %prog mergecsv *.csv + + Combine CSV into binary array. + """ + p = OptionParser(mergecsv.__doc__) + opts, args = p.parse_args(args) + + if len(args) < 1: + sys.exit(not p.print_help()) + + csvfiles = args + arrays = [] + samplekeys = [] + for csvfile in csvfiles: + samplekey = op.basename(csvfile).split(".")[0] + a = np.fromfile(csvfile, sep=",", dtype=np.int32) + x1 = a[::2] + x2 = a[1::2] + a = x1 * 1000 + x2 + a[a < 0] = -1 + arrays.append(a) + samplekeys.append(samplekey) + print(samplekey, a, file=sys.stderr) + print("Merging", file=sys.stderr) + b = np.concatenate(arrays) + b.tofile("data.bin") + + fw = open("samples", "w") + print("\n".join(samplekeys), file=fw) + fw.close() + + +def write_csv_ev(filename, filtered, cleanup, store=None): + lv = LobSTRvcf() + lv.parse(filename, filtered=filtered, cleanup=cleanup) + csvfile = op.basename(filename) + ".csv" + evfile = op.basename(filename) + ".ev" + + fw = open(csvfile, "w") + print(lv.csvline, file=fw) + fw.close() + + fw = open(evfile, "w") + print(lv.evline, file=fw) + fw.close() + + # Save to s3 + if store: + push_to_s3(store, csvfile) + push_to_s3(store, evfile) + + +def run_compile(arg): + filename, filtered, cleanup, store = arg + csvfile = filename + ".csv" + try: + if filename.startswith("s3://"): + if not check_exists_s3(csvfile, warn=True): + write_csv_ev(filename, filtered, cleanup, store=store) + logger.debug("{} written and uploaded.".format(csvfile)) + else: + if need_update(filename, csvfile): + write_csv_ev(filename, filtered, cleanup, store=None) + except Exception as e: + logger.debug("Thread failed! Error: {}".format(e)) + + +def compilevcf(args): + """ + %prog compilevcf samples.csv + + Compile vcf results into master spreadsheet. + """ + p = OptionParser(compilevcf.__doc__) + p.add_argument("--db", default="hg38", help="Use these lobSTR db") + p.add_argument( + "--nofilter", + default=False, + action="store_true", + help="Do not filter the variants", + ) + p.set_home("lobstr") + p.set_cpus() + p.set_aws_opts(store="hli-mv-data-science/htang/str-data") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (samples,) = args + workdir = opts.workdir + store = opts.output_path + cleanup = not opts.nocleanup + filtered = not opts.nofilter + dbs = opts.db.split(",") + cwd = os.getcwd() + mkdir(workdir) + os.chdir(workdir) + samples = op.join(cwd, samples) + + stridsfile = "STR.ids" + if samples.endswith((".vcf", ".vcf.gz")): + vcffiles = [samples] + else: + vcffiles = [x.strip() for x in must_open(samples)] + if not op.exists(stridsfile): + ids = [] + for db in dbs: + ids.extend(STRFile(opts.lobstr_home, db=db).ids) + uids = uniqify(ids) + logger.debug("Combined: {} Unique: {}".format(len(ids), len(uids))) + + fw = open(stridsfile, "w") + print("\n".join(uids), file=fw) + fw.close() + + run_args = [(x, filtered, cleanup, store) for x in vcffiles] + cpus = min(opts.cpus, len(run_args)) + p = Pool(processes=cpus) + for _ in p.map_async(run_compile, run_args).get(): + continue + + +def build_ysearch_link(r, ban=["DYS520", "DYS413a", "DYS413b"]): + template = "http://www.ysearch.org/search_search.asp?fail=2&uid=&freeentry=true&" + markers = [] + for i, marker in zip(YSEARCH_LL, YSEARCH_HAPLOTYPE): + z = r.get(marker, "null") + if "a/b" in marker or marker in ban: + z = "null" + m = "{0}={1}".format(i, z) + markers.append(m) + print(template + "&".join(markers)) + + +def build_yhrd_link(r, panel, ban=["DYS385"]): + L = [] + for marker in panel: + z = r.get(marker, "--") + if marker in ban: + z = "--" + L.append(z) + print(" ".join(str(x) for x in L)) + + +def ystr(args): + """ + %prog ystr chrY.vcf + + Print out Y-STR info given VCF. Marker name extracted from tabfile. + """ + from jcvi.utils.table import write_csv + + p = OptionParser(ystr.__doc__) + p.set_home("lobstr") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (vcffile,) = args + si = STRFile(opts.lobstr_home, db="hg38-named") + register = si.register + + header = "Marker|Reads|Ref|Genotype|Motif".split("|") + contents = [] + fp = must_open(vcffile) + reader = vcf.Reader(fp) + simple_register = {} + for record in reader: + name = register[(record.CHROM, record.POS)] + info = record.INFO + ref = int(float(info["REF"])) + rpa = info.get("RPA", ref) + if isinstance(rpa, list): + rpa = "|".join(str(int(float(x))) for x in rpa) + ru = info["RU"] + simple_register[name] = rpa + for sample in record.samples: + contents.append((name, sample["ALLREADS"], ref, rpa, ru)) + + # Multi-part markers + a, b, c = "DYS389I", "DYS389B.1", "DYS389B" + if a in simple_register and b in simple_register: + simple_register[c] = int(simple_register[a]) + int(simple_register[b]) + + # Multi-copy markers + mm = ["DYS385", "DYS413", "YCAII"] + for m in mm: + ma, mb = m + "a", m + "b" + if ma not in simple_register or mb not in simple_register: + simple_register[ma] = simple_register[mb] = None + del simple_register[ma] + del simple_register[mb] + continue + if simple_register[ma] > simple_register[mb]: + simple_register[ma], simple_register[mb] = ( + simple_register[mb], + simple_register[ma], + ) + + write_csv(header, contents, sep=" ") + print("[YSEARCH]") + build_ysearch_link(simple_register) + print("[YFILER]") + build_yhrd_link(simple_register, panel=YHRD_YFILER) + print("[YFILERPLUS]") + build_yhrd_link(simple_register, panel=YHRD_YFILERPLUS) + print("[YSTR-ALL]") + build_yhrd_link(simple_register, panel=USYSTR_ALL) + + +def get_motif(s, motif_length): + sl = len(s) + kmers = set() + # Get all kmers + for i in range(sl - motif_length): + ss = s[i : i + motif_length] + kmers.add(ss) + + kmer_counts = [] + for kmer in kmers: + kmer_counts.append((s.count(kmer), -s.index(kmer), kmer)) + + return sorted(kmer_counts, reverse=True)[0][-1] + + +def liftover(args): + """ + %prog liftover lobstr_v3.0.2_hg38_ref.bed hg38.upper.fa + + LiftOver CODIS/Y-STR markers. + """ + p = OptionParser(liftover.__doc__) + p.add_argument( + "--checkvalid", + default=False, + action="store_true", + help="Check minscore, period and length", + ) + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + refbed, fastafile = args + genome = pyfasta.Fasta(fastafile) + edits = [] + fp = open(refbed) + for i, row in enumerate(fp): + s = STRLine(row) + seq = genome[s.seqid][s.start - 1 : s.end].upper() + s.motif = get_motif(seq, len(s.motif)) + s.fix_counts(seq) + if opts.checkvalid and not s.is_valid(): + continue + edits.append(s) + if i % 10000 == 0: + print(i, "lines read", file=sys.stderr) + + edits = natsorted(edits, key=lambda x: (x.seqid, x.start)) + for e in edits: + print(str(e)) + + +def trf(args): + """ + %prog trf outdir + + Run TRF on FASTA files. + """ + from jcvi.apps.base import iglob + + cparams = "1 1 2 80 5 200 2000" + + p = OptionParser(trf.__doc__) + p.add_argument("--mismatch", default=31, type=int, help="Mismatch and gap penalty") + p.add_argument( + "--minscore", default=MINSCORE, type=int, help="Minimum score to report" + ) + p.add_argument("--period", default=6, type=int, help="Maximum period to report") + p.add_argument( + "--lobstr", + default=False, + action="store_true", + help="Generate output for lobSTR", + ) + p.add_argument( + "--telomeres", + default=False, + action="store_true", + help="Run telomere search: minscore=140 period=7", + ) + p.add_argument( + "--centromeres", + default=False, + action="store_true", + help="Run centromere search: {}".format(cparams), + ) + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (outdir,) = args + minlength = opts.minscore / 2 + mm = MakeManager() + if opts.telomeres: + opts.minscore, opts.period = 140, 7 + + params = "2 {0} {0} 80 10 {1} {2}".format( + opts.mismatch, opts.minscore, opts.period + ).split() + if opts.centromeres: + params = cparams.split() + + bedfiles = [] + for fastafile in natsorted(iglob(outdir, "*.fa,*.fasta")): + pf = op.basename(fastafile).rsplit(".", 1)[0] + # Commands starting with trf ignores errors + cmd1 = "-trf {0} {1} -d -h".format(fastafile, " ".join(params)) + datfile = op.basename(fastafile) + "." + ".".join(params) + ".dat" + bedfile = "{0}.trf.bed".format(pf) + cmd2 = "cat {} | grep -v ^Parameters".format(datfile) + if opts.lobstr: + cmd2 += " | awk '($8 >= {} && $8 <= {})'".format( + minlength, READLEN - minlength + ) + else: + cmd2 += " | awk '($8 >= 0)'" + cmd2 += " | sed 's/ /\\t/g'" + cmd2 += " | awk '{{print \"{0}\\t\" $0}}' > {1}".format(pf, bedfile) + mm.add(fastafile, datfile, cmd1) + mm.add(datfile, bedfile, cmd2) + bedfiles.append(bedfile) + + bedfile = "trf.bed" + cmd = "cat {0} > {1}".format(" ".join(natsorted(bedfiles)), bedfile) + mm.add(bedfiles, bedfile, cmd) + + mm.write() + + +def batchlobstr(args): + """ + %prog batchlobstr samples.csv + + Run lobSTR sequentially on list of samples. Each line contains: + sample-name,s3-location + """ + p = OptionParser(batchlobstr.__doc__) + p.add_argument("--sep", default=",", help="Separator for building commandline") + p.set_home("lobstr", default="s3://hli-mv-data-science/htang/str-build/lobSTR/") + p.set_aws_opts(store="hli-mv-data-science/htang/str-data") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (samplesfile,) = args + store = opts.output_path + computed = ls_s3(store) + fp = open(samplesfile) + skipped = total = 0 + for row in fp: + total += 1 + sample, s3file = row.strip().split(",")[:2] + exec_id, sample_id = sample.split("_") + bamfile = s3file.replace(".gz", "").replace(".vcf", ".bam") + + gzfile = sample + ".{0}.vcf.gz".format("hg38") + if gzfile in computed: + skipped += 1 + continue + + print( + opts.sep.join( + "python -m jcvi.variation.str lobstr".split() + + [ + "hg38", + "--input_bam_path", + bamfile, + "--output_path", + store, + "--sample_id", + sample_id, + "--workflow_execution_id", + exec_id, + "--lobstr_home", + opts.lobstr_home, + "--workdir", + opts.workdir, + ] + ) + ) + fp.close() + logger.debug("Total skipped: {0}".format(percentage(skipped, total))) + + +def lobstr(args): + """ + %prog lobstr lobstr_index1 lobstr_index2 ... + + Run lobSTR on a big BAM file. There can be multiple lobSTR indices. In + addition, bamfile can be S3 location and --lobstr_home can be S3 location + (e.g. s3://hli-mv-data-science/htang/str-build/lobSTR/) + """ + p = OptionParser(lobstr.__doc__) + p.add_argument( + "--haploid", default="chrY,chrM", help="Use haploid model for these chromosomes" + ) + p.add_argument("--chr", help="Run only this chromosome") + p.add_argument( + "--simulation", default=False, action="store_true", help="Simulation mode" + ) + p.set_home("lobstr", default="s3://hli-mv-data-science/htang/str-build/lobSTR/") + p.set_cpus() + p.set_aws_opts(store="hli-mv-data-science/htang/str-data") + opts, args = p.parse_args(args) + bamfile = opts.input_bam_path + + if len(args) < 1 or bamfile is None: + sys.exit(not p.print_help()) + + lbindices = args + if opts.simulation: # Simulation mode + cmd, vcf_file = allelotype_on_chr( + bamfile, "chr4", "/mnt/software/lobSTR/", "TREDs", haploid=opts.haploid + ) + stats_file = vcf_file.rsplit(".", 1)[0] + ".allelotype.stats" + results_dir = "lobstr_results" + mkdir(results_dir) + sh(cmd) + sh("mv {} {}/ && rm {}".format(vcf_file, results_dir, stats_file)) + return + + s3mode = bamfile.startswith("s3") + store = opts.output_path + cleanup = not opts.nocleanup + workdir = opts.workdir + mkdir(workdir) + os.chdir(workdir) + + lhome = opts.lobstr_home + if lhome.startswith("s3://"): + lhome = pull_from_s3(lhome, overwrite=False) + + exec_id, sample_id = opts.workflow_execution_id, opts.sample_id + prefix = [x for x in (exec_id, sample_id) if x] + if prefix: + pf = "_".join(prefix) + else: + pf = bamfile.split("/")[-1].split(".")[0] + + if s3mode: + gzfile = pf + ".{0}.vcf.gz".format(lbindices[-1]) + remotegzfile = "{0}/{1}".format(store, gzfile) + if check_exists_s3(remotegzfile): + logger.debug( + "Object `{0}` exists. Computation skipped.".format(remotegzfile) + ) + return + localbamfile = pf + ".bam" + localbaifile = localbamfile + ".bai" + if op.exists(localbamfile): + logger.debug("BAM file already downloaded.") + else: + pull_from_s3(bamfile, localbamfile) + if op.exists(localbaifile): + logger.debug("BAM index file already downloaded.") + else: + remotebaifile = bamfile + ".bai" + if check_exists_s3(remotebaifile): + pull_from_s3(remotebaifile, localbaifile) + else: + remotebaifile = bamfile.rsplit(".")[0] + ".bai" + if check_exists_s3(remotebaifile): + pull_from_s3(remotebaifile, localbaifile) + else: + logger.debug("BAM index cannot be found in S3!") + sh("samtools index {0}".format(localbamfile)) + bamfile = localbamfile + + chrs = [opts.chr] if opts.chr else (range(1, 23) + ["X", "Y"]) + for lbidx in lbindices: + makefile = "makefile.{0}".format(lbidx) + mm = MakeManager(filename=makefile) + vcffiles = [] + for chr in chrs: + cmd, vcffile = allelotype_on_chr( + bamfile, chr, lhome, lbidx, haploid=opts.haploid + ) + mm.add(bamfile, vcffile, cmd) + filteredvcffile = vcffile.replace(".vcf", ".filtered.vcf") + cmd = "python -m jcvi.variation.str filtervcf {}".format(vcffile) + cmd += " --lobstr_home {}".format(lhome) + mm.add(vcffile, filteredvcffile, cmd) + vcffiles.append(filteredvcffile) + + gzfile = bamfile.split(".")[0] + ".{0}.vcf.gz".format(lbidx) + cmd = "vcf-concat {0} | vcf-sort".format(" ".join(vcffiles)) + cmd += " | bgzip -c > {0}".format(gzfile) + mm.add(vcffiles, gzfile, cmd) + + mm.run(cpus=opts.cpus) + + if s3mode: + push_to_s3(store, gzfile) + + if cleanup: + mm.clean() + sh("rm -f {} {} *.bai *.stats".format(bamfile, mm.makefile)) + + +def allelotype_on_chr(bamfile, chr, lhome, lbidx, haploid="chrY,chrM"): + if "chr" not in chr.lower(): + chr = "chr" + chr + outfile = "{0}.{1}".format(bamfile.split(".")[0], chr) + cmd = "allelotype --command classify --bam {}".format(bamfile) + cmd += " --noise_model {0}/models/illumina_v3.pcrfree".format(lhome) + cmd += " --strinfo {0}/{1}/index.tab".format(lhome, lbidx) + cmd += " --index-prefix {0}/{1}/lobSTR_".format(lhome, lbidx) + cmd += " --chrom {0} --out {1}.{2}".format(chr, outfile, lbidx) + cmd += " --max-diff-ref {0}".format(READLEN) + cmd += " --realign" + cmd += " --haploid {}".format(haploid) + return cmd, ".".join((outfile, lbidx, "vcf")) + + +def locus(args): + """ + %prog locus bamfile + + Extract selected locus from a list of TREDs for validation, and run lobSTR. + """ + from jcvi.formats.sam import get_minibam + + # See `Format-lobSTR-database.ipynb` for a list of TREDs for validation + INCLUDE = ["HD", "SBMA", "SCA1", "SCA2", "SCA8", "SCA17", "DM1", "DM2", "FXTAS"] + db_choices = ("hg38", "hg19") + + p = OptionParser(locus.__doc__) + p.add_argument("--tred", choices=INCLUDE, help="TRED name") + p.add_argument("--ref", choices=db_choices, default="hg38", help="Reference genome") + p.set_home("lobstr") + opts, args = p.parse_args(args) + + if len(args) != 1: + sys.exit(not p.print_help()) + + (bamfile,) = args + ref = opts.ref + lhome = opts.lobstr_home + tred = opts.tred + + tredsfile = datafile("TREDs.meta.csv") + tf = pd.read_csv(tredsfile, index_col=0) + row = tf.ix[tred] + tag = "repeat_location" + ldb = "TREDs" + if ref == "hg19": + tag += "." + ref + ldb += "-" + ref + seqid, start_end = row[tag].split(":") + + PAD = 1000 + start, end = start_end.split("-") + start, end = int(start) - PAD, int(end) + PAD + region = "{}:{}-{}".format(seqid, start, end) + + minibamfile = get_minibam(bamfile, region) + c = seqid.replace("chr", "") + cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, ldb) + sh(cmd) + + parser = LobSTRvcf(columnidsfile=None) + parser.parse(vcf, filtered=False) + items = parser.items() + if not items: + print("No entry found!", file=sys.stderr) + return + + k, v = parser.items()[0] + print("{} => {}".format(tred, v.replace(",", "/")), file=sys.stderr) + + +def lobstrindex(args): + """ + %prog lobstrindex hg38.trf.bed hg38.upper.fa + + Make lobSTR index. Make sure the FASTA contain only upper case (so use + fasta.format --upper to convert from UCSC fasta). The bed file is generated + by str(). + """ + p = OptionParser(lobstrindex.__doc__) + p.add_argument( + "--notreds", + default=False, + action="store_true", + help="Remove TREDs from the bed file", + ) + p.set_home("lobstr") + opts, args = p.parse_args(args) + + if len(args) != 2: + sys.exit(not p.print_help()) + + trfbed, fastafile = args + pf = fastafile.split(".")[0] + lhome = opts.lobstr_home + mkdir(pf) + + if opts.notreds: + newbedfile = trfbed + ".new" + newbed = open(newbedfile, "w") + fp = open(trfbed) + retained = total = 0 + seen = set() + for row in fp: + r = STRLine(row) + total += 1 + name = r.longname + if name in seen: + continue + seen.add(name) + print(r, file=newbed) + retained += 1 + newbed.close() + logger.debug("Retained: {0}".format(percentage(retained, total))) + else: + newbedfile = trfbed + + mm = MakeManager() + cmd = "python {0}/scripts/lobstr_index.py".format(lhome) + cmd += " --str {0} --ref {1} --out {2}".format(newbedfile, fastafile, pf) + mm.add((newbedfile, fastafile), op.join(pf, "lobSTR_ref.fasta.rsa"), cmd) + + tabfile = "{0}/index.tab".format(pf) + cmd = "python {0}/scripts/GetSTRInfo.py".format(lhome) + cmd += " {0} {1} > {2}".format(newbedfile, fastafile, tabfile) + mm.add((newbedfile, fastafile), tabfile, cmd) + + infofile = "{0}/index.info".format(pf) + cmd = "cp {0} {1}".format(newbedfile, infofile) + mm.add(trfbed, infofile, cmd) + mm.write() + + +if __name__ == "__main__": + main() From daf10f38622a972ec34d768f3ad540720733a9c6 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 12:33:18 +1100 Subject: [PATCH 30/43] Move jcvi to src/ directory with git mv --- jcvi/__init__.py | 21 - jcvi/_version.py | 16 - jcvi/algorithms/__init__.py | 0 jcvi/algorithms/__main__.py | 11 - jcvi/algorithms/ec.py | 213 - jcvi/algorithms/formula.py | 255 - jcvi/algorithms/graph.py | 514 - jcvi/algorithms/lis.py | 214 - jcvi/algorithms/lpsolve.py | 802 - jcvi/algorithms/matrix.py | 209 - jcvi/algorithms/maxsum.py | 51 - jcvi/algorithms/supermap.py | 176 - jcvi/algorithms/tsp.py | 393 - jcvi/annotation/__init__.py | 0 jcvi/annotation/__main__.py | 12 - jcvi/annotation/ahrd.py | 708 - jcvi/annotation/automaton.py | 287 - jcvi/annotation/depth.py | 240 - jcvi/annotation/evm.py | 268 - jcvi/annotation/maker.py | 537 - jcvi/annotation/pasa.py | 595 - jcvi/annotation/qc.py | 376 - jcvi/annotation/reformat.py | 1398 -- jcvi/annotation/stats.py | 386 - jcvi/annotation/train.py | 227 - jcvi/annotation/trinity.py | 173 - jcvi/apps/__init__.py | 0 jcvi/apps/__main__.py | 11 - jcvi/apps/align.py | 713 - jcvi/apps/base.py | 2277 --- jcvi/apps/biomart.py | 426 - jcvi/apps/blastplus.py | 132 - jcvi/apps/bowtie.py | 213 - jcvi/apps/bwa.py | 301 - jcvi/apps/cdhit.py | 260 - jcvi/apps/emboss.py | 103 - jcvi/apps/fetch.py | 729 - jcvi/apps/gbsubmit.py | 676 - jcvi/apps/gmap.py | 253 - jcvi/apps/grid.py | 664 - jcvi/apps/lastz.py | 272 - jcvi/apps/mask.py | 126 - jcvi/apps/phylo.py | 1204 -- jcvi/apps/r.py | 82 - jcvi/apps/restriction.py | 168 - jcvi/apps/softlink.py | 155 - jcvi/apps/uclust.py | 1106 -- jcvi/apps/uniprot.py | 216 - jcvi/apps/vecscreen.py | 133 - jcvi/assembly/__init__.py | 0 jcvi/assembly/__main__.py | 11 - jcvi/assembly/allmaps.py | 2018 --- jcvi/assembly/allpaths.py | 530 - jcvi/assembly/automaton.py | 482 - jcvi/assembly/base.py | 210 - jcvi/assembly/chic.c | 14222 ------------------ jcvi/assembly/chic.pyx | 105 - jcvi/assembly/coverage.py | 160 - jcvi/assembly/gaps.py | 294 - jcvi/assembly/geneticmap.py | 714 - jcvi/assembly/goldenpath.py | 1192 -- jcvi/assembly/hic.py | 1772 --- jcvi/assembly/kmer.py | 1410 -- jcvi/assembly/opticalmap.py | 427 - jcvi/assembly/patch.py | 968 -- jcvi/assembly/postprocess.py | 537 - jcvi/assembly/preprocess.py | 735 - jcvi/assembly/sim.py | 215 - jcvi/assembly/soap.py | 331 - jcvi/assembly/syntenypath.py | 553 - jcvi/compara/__init__.py | 0 jcvi/compara/__main__.py | 11 - jcvi/compara/base.py | 164 - jcvi/compara/blastfilter.py | 325 - jcvi/compara/catalog.py | 982 -- jcvi/compara/fractionation.py | 854 -- jcvi/compara/ks.py | 1176 -- jcvi/compara/pad.py | 314 - jcvi/compara/pedigree.py | 270 - jcvi/compara/phylogeny.py | 91 - jcvi/compara/quota.py | 288 - jcvi/compara/reconstruct.py | 379 - jcvi/compara/synfind.py | 279 - jcvi/compara/synteny.py | 1883 --- jcvi/formats/__init__.py | 0 jcvi/formats/__main__.py | 11 - jcvi/formats/agp.py | 2188 --- jcvi/formats/base.py | 1196 -- jcvi/formats/bed.py | 2504 ---- jcvi/formats/blast.py | 1543 -- jcvi/formats/cblast.c | 16862 ---------------------- jcvi/formats/cblast.pyx | 210 - jcvi/formats/cdt.py | 122 - jcvi/formats/chain.py | 311 - jcvi/formats/contig.py | 182 - jcvi/formats/coords.py | 612 - jcvi/formats/excel.py | 246 - jcvi/formats/fasta.py | 2642 ---- jcvi/formats/fastq.py | 1104 -- jcvi/formats/genbank.py | 522 - jcvi/formats/gff.py | 3768 ----- jcvi/formats/html.py | 158 - jcvi/formats/maf.py | 286 - jcvi/formats/obo.py | 106 - jcvi/formats/paf.py | 127 - jcvi/formats/pdf.py | 101 - jcvi/formats/psl.py | 395 - jcvi/formats/pyblast.py | 101 - jcvi/formats/sam.py | 1025 -- jcvi/formats/sizes.py | 289 - jcvi/formats/vcf.py | 849 -- jcvi/graphics/__init__.py | 0 jcvi/graphics/__main__.py | 11 - jcvi/graphics/align.py | 554 - jcvi/graphics/assembly.py | 516 - jcvi/graphics/base.py | 843 -- jcvi/graphics/blastplot.py | 345 - jcvi/graphics/chromosome.py | 730 - jcvi/graphics/coverage.py | 245 - jcvi/graphics/dotplot.py | 549 - jcvi/graphics/glyph.py | 761 - jcvi/graphics/grabseeds.py | 881 -- jcvi/graphics/heatmap.py | 176 - jcvi/graphics/histogram.py | 387 - jcvi/graphics/karyotype.py | 476 - jcvi/graphics/landscape.py | 1316 -- jcvi/graphics/mummerplot.py | 158 - jcvi/graphics/synteny.py | 736 - jcvi/graphics/table.py | 184 - jcvi/graphics/tree.py | 688 - jcvi/graphics/wheel.py | 225 - jcvi/projects/__init__.py | 0 jcvi/projects/__main__.py | 11 - jcvi/projects/age.py | 738 - jcvi/projects/allmaps.py | 532 - jcvi/projects/bites.py | 229 - jcvi/projects/ies.py | 426 - jcvi/projects/jcvi.py | 335 - jcvi/projects/misc.py | 777 - jcvi/projects/napus.py | 858 -- jcvi/projects/pineapple.py | 411 - jcvi/projects/str.py | 2271 --- jcvi/projects/sugarcane.py | 807 -- jcvi/projects/synfind.py | 860 -- jcvi/projects/tgbs.py | 696 - jcvi/projects/vanilla.py | 450 - jcvi/utils/__init__.py | 0 jcvi/utils/__main__.py | 11 - jcvi/utils/aws.py | 810 -- jcvi/utils/cbook.py | 465 - jcvi/utils/console.py | 19 - jcvi/utils/data/Airswing.ttf | Bin 16912 -> 0 bytes jcvi/utils/data/Collegia.ttf | Bin 103940 -> 0 bytes jcvi/utils/data/HookedUp.ttf | Bin 20468 -> 0 bytes jcvi/utils/data/Humor-Sans.ttf | Bin 25832 -> 0 bytes jcvi/utils/data/TREDs.meta.csv | 33 - jcvi/utils/data/__init__.py | 0 jcvi/utils/data/adapters.fasta | 38 - jcvi/utils/data/blosum80.mat | 40 - jcvi/utils/data/chrY.hg38.unique_ccn.gc | 300 - jcvi/utils/data/colorchecker.txt | 4 - jcvi/utils/data/hg38.band.txt | 1294 -- jcvi/utils/data/hg38.chrom.sizes | 455 - jcvi/utils/data/instance.json | 42 - jcvi/utils/db.py | 334 - jcvi/utils/ez_setup.py | 167 - jcvi/utils/grouper.py | 114 - jcvi/utils/orderedcollections.py | 297 - jcvi/utils/range.py | 529 - jcvi/utils/table.py | 145 - jcvi/utils/taxonomy.py | 200 - jcvi/utils/validator.py | 56 - jcvi/utils/webcolors.py | 54 - jcvi/variation/__init__.py | 0 jcvi/variation/__main__.py | 11 - jcvi/variation/cnv.py | 1509 -- jcvi/variation/deconvolute.py | 258 - jcvi/variation/delly.py | 343 - jcvi/variation/impute.py | 384 - jcvi/variation/phase.py | 132 - jcvi/variation/snp.py | 369 - jcvi/variation/str.py | 1568 -- 182 files changed, 118827 deletions(-) delete mode 100644 jcvi/__init__.py delete mode 100644 jcvi/_version.py delete mode 100644 jcvi/algorithms/__init__.py delete mode 100644 jcvi/algorithms/__main__.py delete mode 100644 jcvi/algorithms/ec.py delete mode 100644 jcvi/algorithms/formula.py delete mode 100644 jcvi/algorithms/graph.py delete mode 100755 jcvi/algorithms/lis.py delete mode 100755 jcvi/algorithms/lpsolve.py delete mode 100644 jcvi/algorithms/matrix.py delete mode 100644 jcvi/algorithms/maxsum.py delete mode 100755 jcvi/algorithms/supermap.py delete mode 100644 jcvi/algorithms/tsp.py delete mode 100644 jcvi/annotation/__init__.py delete mode 100644 jcvi/annotation/__main__.py delete mode 100644 jcvi/annotation/ahrd.py delete mode 100644 jcvi/annotation/automaton.py delete mode 100755 jcvi/annotation/depth.py delete mode 100644 jcvi/annotation/evm.py delete mode 100644 jcvi/annotation/maker.py delete mode 100644 jcvi/annotation/pasa.py delete mode 100644 jcvi/annotation/qc.py delete mode 100644 jcvi/annotation/reformat.py delete mode 100644 jcvi/annotation/stats.py delete mode 100644 jcvi/annotation/train.py delete mode 100644 jcvi/annotation/trinity.py delete mode 100644 jcvi/apps/__init__.py delete mode 100644 jcvi/apps/__main__.py delete mode 100644 jcvi/apps/align.py delete mode 100644 jcvi/apps/base.py delete mode 100644 jcvi/apps/biomart.py delete mode 100755 jcvi/apps/blastplus.py delete mode 100644 jcvi/apps/bowtie.py delete mode 100644 jcvi/apps/bwa.py delete mode 100644 jcvi/apps/cdhit.py delete mode 100644 jcvi/apps/emboss.py delete mode 100644 jcvi/apps/fetch.py delete mode 100644 jcvi/apps/gbsubmit.py delete mode 100644 jcvi/apps/gmap.py delete mode 100644 jcvi/apps/grid.py delete mode 100755 jcvi/apps/lastz.py delete mode 100755 jcvi/apps/mask.py delete mode 100644 jcvi/apps/phylo.py delete mode 100644 jcvi/apps/r.py delete mode 100644 jcvi/apps/restriction.py delete mode 100644 jcvi/apps/softlink.py delete mode 100644 jcvi/apps/uclust.py delete mode 100644 jcvi/apps/uniprot.py delete mode 100644 jcvi/apps/vecscreen.py delete mode 100644 jcvi/assembly/__init__.py delete mode 100644 jcvi/assembly/__main__.py delete mode 100644 jcvi/assembly/allmaps.py delete mode 100644 jcvi/assembly/allpaths.py delete mode 100644 jcvi/assembly/automaton.py delete mode 100644 jcvi/assembly/base.py delete mode 100644 jcvi/assembly/chic.c delete mode 100644 jcvi/assembly/chic.pyx delete mode 100644 jcvi/assembly/coverage.py delete mode 100644 jcvi/assembly/gaps.py delete mode 100644 jcvi/assembly/geneticmap.py delete mode 100644 jcvi/assembly/goldenpath.py delete mode 100644 jcvi/assembly/hic.py delete mode 100644 jcvi/assembly/kmer.py delete mode 100644 jcvi/assembly/opticalmap.py delete mode 100644 jcvi/assembly/patch.py delete mode 100644 jcvi/assembly/postprocess.py delete mode 100644 jcvi/assembly/preprocess.py delete mode 100644 jcvi/assembly/sim.py delete mode 100644 jcvi/assembly/soap.py delete mode 100644 jcvi/assembly/syntenypath.py delete mode 100644 jcvi/compara/__init__.py delete mode 100644 jcvi/compara/__main__.py delete mode 100644 jcvi/compara/base.py delete mode 100755 jcvi/compara/blastfilter.py delete mode 100644 jcvi/compara/catalog.py delete mode 100644 jcvi/compara/fractionation.py delete mode 100644 jcvi/compara/ks.py delete mode 100644 jcvi/compara/pad.py delete mode 100644 jcvi/compara/pedigree.py delete mode 100644 jcvi/compara/phylogeny.py delete mode 100755 jcvi/compara/quota.py delete mode 100644 jcvi/compara/reconstruct.py delete mode 100755 jcvi/compara/synfind.py delete mode 100755 jcvi/compara/synteny.py delete mode 100644 jcvi/formats/__init__.py delete mode 100644 jcvi/formats/__main__.py delete mode 100644 jcvi/formats/agp.py delete mode 100644 jcvi/formats/base.py delete mode 100755 jcvi/formats/bed.py delete mode 100644 jcvi/formats/blast.py delete mode 100644 jcvi/formats/cblast.c delete mode 100644 jcvi/formats/cblast.pyx delete mode 100644 jcvi/formats/cdt.py delete mode 100644 jcvi/formats/chain.py delete mode 100644 jcvi/formats/contig.py delete mode 100644 jcvi/formats/coords.py delete mode 100644 jcvi/formats/excel.py delete mode 100644 jcvi/formats/fasta.py delete mode 100644 jcvi/formats/fastq.py delete mode 100644 jcvi/formats/genbank.py delete mode 100644 jcvi/formats/gff.py delete mode 100644 jcvi/formats/html.py delete mode 100644 jcvi/formats/maf.py delete mode 100755 jcvi/formats/obo.py delete mode 100644 jcvi/formats/paf.py delete mode 100644 jcvi/formats/pdf.py delete mode 100755 jcvi/formats/psl.py delete mode 100644 jcvi/formats/pyblast.py delete mode 100644 jcvi/formats/sam.py delete mode 100644 jcvi/formats/sizes.py delete mode 100644 jcvi/formats/vcf.py delete mode 100644 jcvi/graphics/__init__.py delete mode 100644 jcvi/graphics/__main__.py delete mode 100644 jcvi/graphics/align.py delete mode 100644 jcvi/graphics/assembly.py delete mode 100644 jcvi/graphics/base.py delete mode 100755 jcvi/graphics/blastplot.py delete mode 100644 jcvi/graphics/chromosome.py delete mode 100644 jcvi/graphics/coverage.py delete mode 100755 jcvi/graphics/dotplot.py delete mode 100644 jcvi/graphics/glyph.py delete mode 100644 jcvi/graphics/grabseeds.py delete mode 100644 jcvi/graphics/heatmap.py delete mode 100644 jcvi/graphics/histogram.py delete mode 100644 jcvi/graphics/karyotype.py delete mode 100644 jcvi/graphics/landscape.py delete mode 100644 jcvi/graphics/mummerplot.py delete mode 100644 jcvi/graphics/synteny.py delete mode 100644 jcvi/graphics/table.py delete mode 100644 jcvi/graphics/tree.py delete mode 100644 jcvi/graphics/wheel.py delete mode 100644 jcvi/projects/__init__.py delete mode 100644 jcvi/projects/__main__.py delete mode 100644 jcvi/projects/age.py delete mode 100644 jcvi/projects/allmaps.py delete mode 100644 jcvi/projects/bites.py delete mode 100644 jcvi/projects/ies.py delete mode 100644 jcvi/projects/jcvi.py delete mode 100644 jcvi/projects/misc.py delete mode 100644 jcvi/projects/napus.py delete mode 100644 jcvi/projects/pineapple.py delete mode 100644 jcvi/projects/str.py delete mode 100644 jcvi/projects/sugarcane.py delete mode 100644 jcvi/projects/synfind.py delete mode 100644 jcvi/projects/tgbs.py delete mode 100644 jcvi/projects/vanilla.py delete mode 100644 jcvi/utils/__init__.py delete mode 100644 jcvi/utils/__main__.py delete mode 100644 jcvi/utils/aws.py delete mode 100644 jcvi/utils/cbook.py delete mode 100644 jcvi/utils/console.py delete mode 100755 jcvi/utils/data/Airswing.ttf delete mode 100755 jcvi/utils/data/Collegia.ttf delete mode 100755 jcvi/utils/data/HookedUp.ttf delete mode 100644 jcvi/utils/data/Humor-Sans.ttf delete mode 100644 jcvi/utils/data/TREDs.meta.csv delete mode 100644 jcvi/utils/data/__init__.py delete mode 100644 jcvi/utils/data/adapters.fasta delete mode 100644 jcvi/utils/data/blosum80.mat delete mode 100644 jcvi/utils/data/chrY.hg38.unique_ccn.gc delete mode 100644 jcvi/utils/data/colorchecker.txt delete mode 100644 jcvi/utils/data/hg38.band.txt delete mode 100644 jcvi/utils/data/hg38.chrom.sizes delete mode 100644 jcvi/utils/data/instance.json delete mode 100644 jcvi/utils/db.py delete mode 100644 jcvi/utils/ez_setup.py delete mode 100755 jcvi/utils/grouper.py delete mode 100644 jcvi/utils/orderedcollections.py delete mode 100644 jcvi/utils/range.py delete mode 100644 jcvi/utils/table.py delete mode 100644 jcvi/utils/taxonomy.py delete mode 100644 jcvi/utils/validator.py delete mode 100755 jcvi/utils/webcolors.py delete mode 100644 jcvi/variation/__init__.py delete mode 100644 jcvi/variation/__main__.py delete mode 100644 jcvi/variation/cnv.py delete mode 100644 jcvi/variation/deconvolute.py delete mode 100644 jcvi/variation/delly.py delete mode 100644 jcvi/variation/impute.py delete mode 100644 jcvi/variation/phase.py delete mode 100644 jcvi/variation/snp.py delete mode 100644 jcvi/variation/str.py diff --git a/jcvi/__init__.py b/jcvi/__init__.py deleted file mode 100644 index dbd33d8d..00000000 --- a/jcvi/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from datetime import datetime - -__author__ = ( - "Haibao Tang", - "Vivek Krishnakumar", - "Xingtan Zhang", - "Won Cheol Yim", -) -__copyright__ = f"Copyright (c) 2010-{datetime.now().year}, Haibao Tang" -__email__ = "tanghaibao@gmail.com" -__license__ = "BSD" -__status__ = "Development" - -try: - from ._version import __version__ # noqa -except ImportError as exc: # pragma: no cover - raise ImportError( - "Failed to find (autogenerated) version.py. " - "This might be because you are installing from GitHub's tarballs, " - "use the PyPI ones." - ) from exc diff --git a/jcvi/_version.py b/jcvi/_version.py deleted file mode 100644 index d9acf684..00000000 --- a/jcvi/_version.py +++ /dev/null @@ -1,16 +0,0 @@ -# file generated by setuptools_scm -# don't change, don't track in version control -TYPE_CHECKING = False -if TYPE_CHECKING: - from typing import Tuple, Union - VERSION_TUPLE = Tuple[Union[int, str], ...] -else: - VERSION_TUPLE = object - -version: str -__version__: str -__version_tuple__: VERSION_TUPLE -version_tuple: VERSION_TUPLE - -__version__ = version = '1.4.24.dev30+gf11b1a04' -__version_tuple__ = version_tuple = (1, 4, 24, 'dev30', 'gf11b1a04') diff --git a/jcvi/algorithms/__init__.py b/jcvi/algorithms/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/algorithms/__main__.py b/jcvi/algorithms/__main__.py deleted file mode 100644 index baf6ccd7..00000000 --- a/jcvi/algorithms/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Implementations of several key algorithms, such as: TSP, Graph, SuperMap, Linear Programming, ML, etc. used by other modules. -""" - -from jcvi.apps.base import dmain - - -if __name__ == "__main__": - dmain(__file__) diff --git a/jcvi/algorithms/ec.py b/jcvi/algorithms/ec.py deleted file mode 100644 index 6106f877..00000000 --- a/jcvi/algorithms/ec.py +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -This module contains methods to interface with DEAP evolutionary computation -framewor, including a Genetic Algorithm (GA) based method to solve scaffold -ordering and orientation problem. -""" - -import array -import random -import multiprocessing - -from deap import base, creator, tools -from deap.algorithms import varAnd - -from ..apps.base import logger -from ..utils.console import printf - -from .lis import longest_monotonic_subseq_length - - -# This has to be in global space, otherwise runs into error "creator.Individual -# not found" when runnning on macOS. See also: -# https://github.com/DEAP/deap/issues/268 -creator.create("FitnessMax", base.Fitness, weights=(1.0,)) -creator.create("Individual", array.array, typecode="i", fitness=creator.FitnessMax) - - -def make_data(POINTS, SCF): - seq = range(POINTS) - scaffolds = [] - batch = POINTS // SCF - for i in range(SCF): - p = seq[i * batch : (i + 1) * batch] - scaffolds.append(p) - return scaffolds - - -def colinear_evaluate(tour, scaffolds): - series = [] - for t in tour: - series.extend(scaffolds[t]) - score, diff = longest_monotonic_subseq_length(series) - return (score,) - - -def genome_mutation(candidate): - """Return the mutants created by inversion mutation on the candidates. - - This function performs inversion or insertion. It randomly chooses two - locations along the candidate and reverses the values within that - slice. Insertion is done by popping one item and insert it back at random - position. - """ - size = len(candidate) - prob = random.random() - if prob > 0.5: # Inversion - p = random.randint(0, size - 1) - q = random.randint(0, size - 1) - if p > q: - p, q = q, p - q += 1 - s = candidate[p:q] - x = candidate[:p] + s[::-1] + candidate[q:] - return (creator.Individual(x),) - else: # Insertion - p = random.randint(0, size - 1) - q = random.randint(0, size - 1) - cq = candidate.pop(q) - candidate.insert(p, cq) - return (candidate,) - - -def genome_mutation_orientation(candidate): - size = len(candidate) - prob = random.random() - if prob > 0.5: # Range flip - p = random.randint(0, size - 1) - q = random.randint(0, size - 1) - if p > q: - p, q = q, p - q += 1 - for x in range(p, q): - candidate[x] = -candidate[x] - else: # Single flip - p = random.randint(0, size - 1) - candidate[p] = -candidate[p] - return (candidate,) - - -def GA_setup(guess): - toolbox = base.Toolbox() - - toolbox.register("individual", creator.Individual, guess) - toolbox.register("population", tools.initRepeat, list, toolbox.individual) - toolbox.register("mate", tools.cxPartialyMatched) - toolbox.register("mutate", genome_mutation) - toolbox.register("select", tools.selTournament, tournsize=3) - return toolbox - - -def eaSimpleConverge( - population, - toolbox, - cxpb, - mutpb, - ngen, - stats=None, - halloffame=None, - callback=None, - verbose=True, -): - """This algorithm reproduce the simplest evolutionary algorithm as - presented in chapter 7 of [Back2000]_. - - Modified to allow checking if there is no change for ngen, as a simple - rule for convergence. Interface is similar to eaSimple(). However, in - eaSimple, ngen is total number of iterations; in eaSimpleConverge, we - terminate only when the best is NOT updated for ngen iterations. - """ - # Evaluate the individuals with an invalid fitness - invalid_ind = [ind for ind in population if not ind.fitness.valid] - fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) - for ind, fit in zip(invalid_ind, fitnesses): - ind.fitness.values = fit - - if halloffame is not None: - halloffame.update(population) - - record = stats.compile(population) if stats else {} - - # Begin the generational process - gen = 1 - best = (0,) - while True: - # Select the next generation individuals - offspring = toolbox.select(population, len(population)) - - # Vary the pool of individuals - offspring = varAnd(offspring, toolbox, cxpb, mutpb) - - # Evaluate the individuals with an invalid fitness - invalid_ind = [ind for ind in offspring if not ind.fitness.valid] - fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) - for ind, fit in zip(invalid_ind, fitnesses): - ind.fitness.values = fit - - # Update the hall of fame with the generated individuals - if halloffame is not None: - halloffame.update(offspring) - - if callback is not None: - callback(halloffame[0], gen) - - # Replace the current population by the offspring - population[:] = offspring - - # Append the current generation statistics to the logbook - record = stats.compile(population) if stats else {} - current_best = record["max"] - if gen % 20 == 0 and verbose: - printf( - "Current iteration {0}: max_score={1}".format(gen, current_best), - ) - - if current_best > best: - best = current_best - updated = gen - - gen += 1 - if gen - updated > ngen: - break - - return population - - -def GA_run(toolbox, ngen=500, npop=100, seed=666, cpus=1, callback=None): - logger.debug("GA setup: ngen=%d npop=%d cpus=%d seed=%d", ngen, npop, cpus, seed) - if cpus > 1: - pool = multiprocessing.Pool(cpus) - toolbox.register("map", pool.map) - random.seed(seed) - pop = toolbox.population(n=npop) - hof = tools.HallOfFame(1) - - stats = tools.Statistics(lambda ind: ind.fitness.values) - stats.register("max", max) - stats.register("min", min) - - eaSimpleConverge( - pop, toolbox, 0.7, 0.2, ngen, stats=stats, halloffame=hof, callback=callback - ) - tour = hof[0] - if cpus > 1: - pool.terminate() - return tour, tour.fitness - - -if __name__ == "__main__": - POINTS, SCF = 200, 20 - scaffolds = make_data(POINTS, SCF) - - # Demo case: scramble of the list - guess = list(range(SCF)) - guess[5:15] = guess[5:15][::-1] - guess[7:18] = guess[7:18][::-1] - printf(guess) - - toolbox = GA_setup(guess) - toolbox.register("evaluate", colinear_evaluate, scaffolds=scaffolds) - tour, tour.fitness = GA_run(toolbox, cpus=8) - printf(tour, tour.fitness) diff --git a/jcvi/algorithms/formula.py b/jcvi/algorithms/formula.py deleted file mode 100644 index 64384c26..00000000 --- a/jcvi/algorithms/formula.py +++ /dev/null @@ -1,255 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Some math formula for various calculations -""" -import sys - -from collections import Counter -from functools import lru_cache -from math import log, exp, sqrt - -import numpy as np -import scipy - -from ..utils.cbook import human_size - - -def mean_confidence_interval(data, confidence=0.95): - # Compute the confidence interval around the mean - - a = 1.0 * np.array(data) - n = len(a) - m, se = np.mean(a), scipy.stats.sem(a) - h = se * scipy.stats.t._ppf((1 + confidence) / 2.0, n - 1) - return m, m - h, m + h - - -def confidence_interval(data, confidence=0.95): - # Compute the confidence interval of the data - # Note the difference from mean_confidence_interval() - a = 1.0 * np.array(data) - n = len(a) - m, stdev = np.mean(a), np.std(a) - h = 1.96 * stdev - return m, m - h, m + h - - -def MAD_interval(data): - # Compute the MAD interval of the data - A = 1.0 * np.array(data) - M = np.median(A) - D = np.absolute(A - M) - MAD = np.median(D) - return M, M - MAD, M + MAD - - -def erf(x): - # save the sign of x - sign = 1 if x >= 0 else -1 - x = abs(x) - - # constants - a1 = 0.254829592 - a2 = -0.284496736 - a3 = 1.421413741 - a4 = -1.453152027 - a5 = 1.061405429 - p = 0.3275911 - - # A&S formula 7.1.26 - t = 1.0 / (1.0 + p * x) - y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) - return sign * y # erf(-x) = -erf(x) - - -def gaussian_prob_le(mu, sigma, x): - if sigma == 0: - return 1 if mu <= x else 0 - z = (x - mu) / (sigma * sqrt(2)) - return 0.5 + 0.5 * erf(z) - - -def choose_insertsize(readlen=150, step=20, cutoff=0.01): - """ - Calculate ratio of overlap for a range of insert sizes. Idea borrowed from - ALLPATHS code (`allpaths_cache/CacheToAllPathsInputs.pl`). - """ - print("Insert-size\tOverlap", file=sys.stderr) - for i in range(0, 3 * readlen, step): - p = gaussian_prob_le(i, i / 5, 2 * readlen) - if p < cutoff or p > 1 - cutoff: - continue - print("{0}bp\t{1}%".format(i, int(round(100 * p))), file=sys.stderr) - - -def get_kmeans(a, k, iter=100): - from scipy.cluster.vq import vq, kmeans - - a = np.array(a) - centroids, _ = kmeans(a, k, iter=iter) - centroids.sort() - idx, _ = vq(a, centroids) - return idx - - -def spearmanr(x, y): - """ - Michiel de Hoon's library (available in BioPython or standalone as - PyCluster) returns Spearman rsb which does include a tie correction. - - >>> x = [5.05, 6.75, 3.21, 2.66] - >>> y = [1.65, 26.5, -5.93, 7.96] - >>> z = [1.65, 2.64, 2.64, 6.95] - >>> round(spearmanr(x, y), 4) - 0.4 - >>> round(spearmanr(x, z), 4) - -0.6325 - """ - from scipy import stats - - if not x or not y: - return 0 - corr, pvalue = stats.spearmanr(x, y) - return corr - - -def reject_outliers(a, threshold=3.5): - """ - Iglewicz and Hoaglin's robust test for multiple outliers (two sided test). - - - See also: - - - >>> a = [0, 1, 2, 4, 12, 58, 188, 189] - >>> list(reject_outliers(a)) - [False, False, False, False, False, True, True, True] - """ - if len(a) < 3: - return np.zeros(len(a), dtype=bool) - - A = np.array(a, dtype=float) - lb, ub = outlier_cutoff(A, threshold=threshold) - return np.logical_or(A > ub, A < lb) - - -def outlier_cutoff(a, threshold=3.5): - """ - Iglewicz and Hoaglin's robust, returns the cutoff values - lower bound and - upper bound. - """ - A = np.array(a, dtype=float) - M = np.median(A) - D = np.absolute(A - M) - MAD = np.median(D) - C = threshold / 0.67449 * MAD - return M - C, M + C - - -def recomb_probability(cM, method="kosambi"): - """ - - - >>> recomb_probability(1) - 0.009998666879965463 - >>> recomb_probability(100) - 0.48201379003790845 - >>> recomb_probability(10000) - 0.5 - """ - assert method in ("kosambi", "haldane") - d = cM / 100.0 - if method == "kosambi": - e4d = exp(4 * d) - return (e4d - 1) / (e4d + 1) / 2 - elif method == "haldane": - return (1 - exp(-2 * d)) / 2 - - -def jukesCantorD(p, L=100): - """ - >>> jukesCantorD(.1) - (0.10732563273050497, 0.001198224852071006) - >>> jukesCantorD(.7) - (2.0310376508266565, 0.47249999999999864) - """ - assert 0 <= p < 0.75 - - rD = 1 - 4.0 / 3 * p - D = -0.75 * log(rD) - varD = p * (1 - p) / (rD**2 * L) - - return D, varD - - -def jukesCantorP(D): - """ - >>> jukesCantorP(.1) - 0.09362001071778939 - >>> jukesCantorP(2) - 0.6978874115828988 - """ - rD = exp(-4.0 / 3 * D) - p = 0.75 * (1 - rD) - return p - - -def velvet(readsize, genomesize, numreads, K): - """ - Calculate velvet memory requirement. - - - Ram required for velvetg = -109635 + 18977*ReadSize + 86326*GenomeSize + - 233353*NumReads - 51092*K - - Read size is in bases. - Genome size is in millions of bases (Mb) - Number of reads is in millions - K is the kmer hash value used in velveth - """ - ram = ( - -109635 + 18977 * readsize + 86326 * genomesize + 233353 * numreads - 51092 * K - ) - print("ReadSize: {0}".format(readsize), file=sys.stderr) - print("GenomeSize: {0}Mb".format(genomesize), file=sys.stderr) - print("NumReads: {0}M".format(numreads), file=sys.stderr) - print("K: {0}".format(K), file=sys.stderr) - - ram = human_size(ram * 1000, a_kilobyte_is_1024_bytes=True) - print("RAM usage: {0} (MAXKMERLENGTH=31)".format(ram), file=sys.stderr) - - -@lru_cache(maxsize=None) -def calc_ldscore(a: str, b: str) -> float: - """ - Calculate Linkage disequilibrium (r2) between two genotypes. - """ - assert len(a) == len(b), f"{a}\n{b}" - # Assumes markers as A/B - c = Counter(zip(a, b)) - c_aa = c[("A", "A")] - c_ab = c[("A", "B")] - c_ba = c[("B", "A")] - c_bb = c[("B", "B")] - n = c_aa + c_ab + c_ba + c_bb - if n == 0: - return 0 - - f = 1.0 / n - x_aa = c_aa * f - x_ab = c_ab * f - x_ba = c_ba * f - x_bb = c_bb * f - p_a = x_aa + x_ab - p_b = x_ba + x_bb - q_a = x_aa + x_ba - q_b = x_ab + x_bb - D = x_aa - p_a * q_a - denominator = p_a * p_b * q_a * q_b - if denominator == 0: - return 0 - - r2 = D * D / denominator - return r2 diff --git a/jcvi/algorithms/graph.py b/jcvi/algorithms/graph.py deleted file mode 100644 index ffa90dea..00000000 --- a/jcvi/algorithms/graph.py +++ /dev/null @@ -1,514 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Wrapper for the common graph algorithms. -""" -import sys - -from collections import deque - -import networkx as nx - -from more_itertools import pairwise - -from ..apps.base import logger -from ..formats.base import must_open - - -""" -Bidirectional graph. -""" -dirs = (">", "<") -trans = str.maketrans("+?-", ">><") - - -class BiNode(object): - def __init__(self, v): - self.v = v - self.ins = [] - self.outs = [] - - def get_next(self, tag="<"): - """ - This function is tricky and took me a while to figure out. - - The tag specifies the direction where the current edge came from. - - tag ntag - ---> V >----> U - cur next - - This means the next vertex should follow the outs since this tag is - inward '<'. Check if there are multiple branches if len(L) == 1, and - also check if the next it finds has multiple incoming edges though if - len(B) == 1. - """ - next, ntag = None, None - - L = self.outs if tag == "<" else self.ins - - if len(L) == 1: - (e,) = L - if e.v1.v == self.v: - next, ntag = e.v2, e.o2 - ntag = "<" if ntag == ">" else ">" # Flip tag if on other end - else: - next, ntag = e.v1, e.o1 - - if next: # Validate the next vertex - B = next.ins if ntag == "<" else next.outs - if len(B) > 1: - return None, None - - return next, ntag - - def __str__(self): - return str(self.v) - - __repr__ = __str__ - - -class BiEdge(object): - def __init__(self, v1, v2, o1, o2, color="black", length=None): - o1 = o1.translate(trans) - o2 = o2.translate(trans) - assert o1 in dirs and o2 in dirs - self.o1 = o1 - self.o2 = o2 - - self.color = color - self.length = length - - def __str__(self): - return "".join(str(x) for x in (self.v1, self.o1, "--", self.o2, self.v2)) - - def flip(self): - self.v2, self.v1 = self.v1, self.v2 - o1, o2 = self.o1, self.o2 - self.o1 = ">" if o2 == "<" else "<" - self.o2 = ">" if o1 == "<" else "<" - - -class BiGraph(object): - def __init__(self): - self.nodes = {} - self.edges = {} - - def __str__(self): - return "BiGraph with {0} nodes and {1} edges".format( - len(self.nodes), len(self.edges) - ) - - def add_node(self, v): - if v not in self.nodes: - self.nodes[v] = BiNode(v) - - def add_edge(self, v1, v2, o1, o2, color="black", length=None): - for v in (v1, v2): - self.add_node(v) - n1 = self.nodes.get(v1) - n2 = self.nodes.get(v2) - - if (v1, v2) in self.edges or (v2, v1) in self.edges: - return - - e = BiEdge(v1, v2, o1, o2, color=color, length=length) - l = n1.outs if e.o1 == ">" else n1.ins - r = n2.ins if e.o2 == ">" else n2.outs - l.append(e) - r.append(e) - e.v1, e.v2 = n1, n2 - if v1 > v2: - v1, v2 = v2, v1 - e.flip() - self.edges[(v1, v2)] = e - - def get_node(self, v): - return self.nodes[v] - - def get_edge(self, av, bv): - flip = False - if av > bv: - av, bv = bv, av - flip = True - e = self.edges[(av, bv)] - if flip: - e.flip() - return e - - def iter_paths(self): - - discovered = set() - for v, vv in self.nodes.items(): - if v in discovered: - continue - - path = deque([vv]) - - # print "cur", v - discovered.add(v) - prev, ptag = vv.get_next(tag=">") - while prev: - # print prev, ptag - if prev.v in discovered: - break - path.appendleft(prev) - discovered.add(prev.v) - prev, ptag = prev.get_next(tag=ptag) - - next, ntag = vv.get_next(tag="<") - while next: - # print next, ntag - if next.v in discovered: - break - path.append(next) - discovered.add(next.v) - next, ntag = next.get_next(tag=ntag) - - # discovered |= set(x.v for x in path) - yield path - - def path(self, path, flip=False): - oo = [] - if len(path) == 1: - m = "Singleton {0}".format(path[0]) - oo.append((path[0].v, True)) - return m, oo - - edges = [] - for a, b in pairwise(path): - av, bv = a.v, b.v - e = self.get_edge(av, bv) - - if not oo: # First edge imports two nodes - oo.append((e.v1.v, e.o1 == ">")) - last = oo[-1] - assert last == (e.v1.v, e.o1 == ">") - oo.append((e.v2.v, e.o2 == ">")) - - if flip: - se = str(e) - e.flip() - else: - se = str(e) - edges.append(se) - - return "|".join(edges), oo - - def read(self, filename, color="black"): - fp = open(filename) - nedges = 0 - for row in fp: - a, b = row.strip().split("--") - oa = a[-1] - ob = b[0] - a, b = a.strip("<>"), b.strip("<>") - self.add_edge(a, b, oa, ob, color=color) - nedges += 1 - logger.debug( - "A total of {0} edges imported from `{1}` (color={2}).".format( - nedges, filename, color - ) - ) - - def write(self, filename="stdout"): - - fw = must_open(filename, "w") - for e in self.edges.values(): - print(e, file=fw) - logger.debug("Graph written to `{0}`.".format(filename)) - - def draw( - self, - pngfile, - dpi=96, - verbose=False, - namestart=0, - nodehighlight=None, - prog="circo", - ): - import pygraphviz as pgv - - G = pgv.AGraph() - for e in self.edges.values(): - arrowhead = e.o1 == ">" - arrowtail = e.o2 == "<" - if e.o1 != e.o2: # Not sure why this is necessary - arrowhead = not arrowhead - arrowtail = not arrowtail - arrowhead = "normal" if arrowhead else "inv" - arrowtail = "normal" if arrowtail else "inv" - v1, v2 = e.v1, e.v2 - v1, v2 = str(v1)[namestart:], str(v2)[namestart:] - G.add_edge(v1, v2, color=e.color, arrowhead=arrowhead, arrowtail=arrowtail) - - if nodehighlight: - for n in nodehighlight: - n = n[namestart:] - n = G.get_node(n) - n.attr["shape"] = "box" - - G.graph_attr.update(dpi=str(dpi)) - if verbose: - G.write(sys.stderr) - G.draw(pngfile, prog=prog) - logger.debug("Graph written to `{0}`.".format(pngfile)) - - def get_next(self, node, tag="<"): - return self.get_node(node).get_next(tag) - - def get_path(self, n1, n2, tag="<"): - # return all intermediate nodes on path n1 -> n2 - path = deque() - next, ntag = self.get_next(n1, tag=tag) - while next: - if next.v == n2: - return path - path.append((next, ntag)) - next, ntag = next.get_next(tag=ntag) - return path if n2 is None else None - - -def graph_stats(G, diameter=False): - logger.debug("Graph stats: |V|={0}, |E|={1}".format(len(G), G.size())) - if diameter: - d = max(nx.diameter(H) for H in nx.connected_component_subgraphs(G)) - logger.debug("Graph diameter: {0}".format(d)) - - -def graph_local_neighborhood(G, query, maxdegree=10000, maxsize=10000): - c = [k for k, d in G.degree().iteritems() if d > maxdegree] - if c: - logger.debug("Remove {0} nodes with deg > {1}".format(len(c), maxdegree)) - G.remove_nodes_from(c) - - logger.debug("BFS search from {0}".format(query)) - - queue = set(query) - # BFS search of max depth - seen = set(query) - coresize = len(query) - depth = 0 - while True: - neighbors = set() - for q in queue: - if q not in G: - continue - neighbors |= set(G.neighbors(q)) - queue = neighbors - seen - if not queue: - break - - if len(seen | queue) > maxsize + coresize: - break - - seen |= queue - print( - "iter: {0}, graph size={1} ({2} excluding core)".format( - depth, len(seen), len(seen) - coresize - ), - file=sys.stderr, - ) - depth += 1 - - return G.subgraph(seen) - - -def graph_simplify(G): - """ - Simplify big graphs: remove spurs and contract unique paths. - """ - spurs = [] - path_nodes = [] - for k, d in G.degree().iteritems(): - if d == 1: - spurs.append(k) - elif d == 2: - path_nodes.append(k) - - logger.debug("Remove {0} spurs.".format(len(spurs))) - G.remove_nodes_from(spurs) - - SG = G.subgraph(path_nodes) - cc = nx.connected_components(SG) - for c in cc: - if len(c) == 1: - continue - c = set(c) - neighbors = set() - for x in c: - neighbors |= set(G.neighbors(x)) - neighbors -= c - newtag = list(c)[0] + "*" - for n in neighbors: - G.add_edge(newtag, n) - G.remove_nodes_from(c) - logger.debug( - "Contract {0} path nodes into {1} nodes.".format(len(path_nodes), len(cc)) - ) - - -def bigraph_test(): - g = BiGraph() - g.add_edge(1, 2, ">", "<") - g.add_edge(2, 3, "<", "<", color="red") - g.add_edge(2, 3, ">", ">", color="blue") - g.add_edge(5, 3, ">", ">") - g.add_edge(4, 3, "<", ">") - g.add_edge(4, 6, ">", ">") - g.add_edge(7, 1, ">", ">") - g.add_edge(7, 5, "<", ">") - g.add_edge(8, 6, ">", "<") - print(g) - g.write() - for path in g.iter_paths(): - p, oo = g.path(path) - print(p) - print(oo) - - # g.draw("demo.png", verbose=True) - - -def update_weight(G, a, b, w): - if G.has_edge(a, b): # Parallel edges found! - G[a][b]["weight"] += w - else: - G.add_edge(a, b, weight=w) - - -def make_paths(paths, weights=None): - """ - Zip together paths. Called by merge_paths(). - """ - npaths = len(paths) - weights = weights or [1] * npaths - assert len(paths) == len(weights) - - G = nx.DiGraph() - for path, w in zip(paths, weights): - for a, b in pairwise(path): - update_weight(G, a, b, w) - return G - - -def reduce_paths(G): - """ - Make graph into a directed acyclic graph (DAG). - """ - from jcvi.algorithms.lpsolve import min_feedback_arc_set - - while not nx.is_directed_acyclic_graph(G): - edges = [] - for a, b, w in G.edges_iter(data=True): - w = w["weight"] - edges.append((a, b, w)) - mf, mf_score = min_feedback_arc_set(edges) - for a, b, w in mf: - G.remove_edge(a, b) - - assert nx.is_directed_acyclic_graph(G) - G = transitive_reduction(G) - return G - - -def draw_graph(G, pngfile, prog="dot"): - G = nx.to_agraph(G) - G.draw(pngfile, prog=prog) - logger.debug("Graph written to `{0}`.".format(pngfile)) - - -def transitive_reduction(G): - """ - Returns a transitive reduction of a graph. The original graph - is not modified. - - A transitive reduction H of G has a path from x to y if and - only if there was a path from x to y in G. Deleting any edge - of H destroys this property. A transitive reduction is not - unique in general. A transitive reduction has the same - transitive closure as the original graph. - - A transitive reduction of a complete graph is a tree. A - transitive reduction of a tree is itself. - - >>> G = nx.DiGraph([(1, 2), (1, 3), (2, 3), (2, 4), (3, 4)]) - >>> H = transitive_reduction(G) - >>> H.edges() - [(1, 2), (2, 3), (3, 4)] - """ - H = G.copy() - for a, b, w in G.edges_iter(data=True): - # Try deleting the edge, see if we still have a path - # between the vertices - H.remove_edge(a, b) - if not nx.has_path(H, a, b): # we shouldn't have deleted it - H.add_edge(a, b, w) - return H - - -def merge_paths(paths, weights=None): - """ - Zip together sorted lists. - - >>> paths = [[1, 2, 3], [1, 3, 4], [2, 4, 5]] - >>> G = merge_paths(paths) - >>> nx.topological_sort(G) - [1, 2, 3, 4, 5] - >>> paths = [[1, 2, 3, 4], [1, 2, 3, 2, 4]] - >>> G = merge_paths(paths, weights=(1, 2)) - >>> nx.topological_sort(G) - [1, 2, 3, 4] - """ - G = make_paths(paths, weights=weights) - G = reduce_paths(G) - return G - - -def longest_path_weighted_nodes(G, source, target, weights=None): - """ - The longest path problem is the problem of finding a simple path of maximum - length in a given graph. While for general graph, this problem is NP-hard, - but if G is a directed acyclic graph (DAG), longest paths in G can be found - in linear time with dynamic programming. - - >>> G = nx.DiGraph([(1, 2), (1, 3), (2, "M"), (3, "M")]) - >>> longest_path_weighted_nodes(G, 1, "M", weights={1: 1, 2: 1, 3: 2, "M": 1}) - ([1, 3, 'M'], 4) - """ - assert nx.is_directed_acyclic_graph(G) - - tree = nx.topological_sort(G) - node_to_index = dict((t, i) for i, t in enumerate(tree)) - - nnodes = len(tree) - weights = [weights.get(x, 1) for x in tree] if weights else [1] * nnodes - score, fromc = weights[:], [-1] * nnodes - si = node_to_index[source] - ti = node_to_index[target] - for a in tree[si:ti]: - ai = node_to_index[a] - for b, w in G[a].items(): - bi = node_to_index[b] - w = w.get("weight", 1) - d = score[ai] + weights[bi] * w # Favor heavier edges - if d <= score[bi]: - continue - score[bi] = d # Update longest distance so far - fromc[bi] = ai - - # Backtracking - path = [] - while ti != -1: - path.append(ti) - ti = fromc[ti] - - path = [tree[x] for x in path[::-1]] - return path, score[ti] - - -if __name__ == "__main__": - import doctest - - doctest.testmod() - # bigraph_test() diff --git a/jcvi/algorithms/lis.py b/jcvi/algorithms/lis.py deleted file mode 100755 index 78b399a8..00000000 --- a/jcvi/algorithms/lis.py +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Longest increasing subsequence, code stolen from internet (thanks) -http://wordaligned.org/articles/patience-sort -""" -import bisect - -# We want a maximum function which accepts a default value -from functools import partial, reduce - -maximum = partial(reduce, max) - - -def patience_sort(xs): - """Patience sort an iterable, xs. - - This function generates a series of pairs (x, pile), where "pile" - is the 0-based index of the pile "x" should be placed on top of. - Elements of "xs" must be less-than comparable. - """ - pile_tops = list() - for x in xs: - pile = bisect.bisect_left(pile_tops, x) - if pile == len(pile_tops): - pile_tops.append(x) - else: - pile_tops[pile] = x - yield x, pile - - -def longest_monotonic_subseq_length(xs): - """Return the length of the longest monotonic subsequence of xs, second - return value is the difference between increasing and decreasing lengths. - - >>> longest_monotonic_subseq_length((4, 5, 1, 2, 3)) - (3, 1) - >>> longest_monotonic_subseq_length((1, 2, 3, 5, 4)) - (4, 2) - >>> longest_monotonic_subseq_length((1, 2, 1)) - (2, 0) - """ - li = longest_increasing_subseq_length(xs) - ld = longest_decreasing_subseq_length(xs) - return max(li, ld), li - ld - - -def longest_increasing_subseq_length(xs): - """Return the length of the longest increasing subsequence of xs. - - >>> longest_increasing_subseq_length(range(3)) - 3 - >>> longest_increasing_subseq_length([3, 1, 2, 0]) - 2 - """ - return 1 + maximum((pile for x, pile in patience_sort(xs)), -1) - - -def longest_decreasing_subseq_length(xs): - return longest_increasing_subseq_length(reversed(xs)) - - -def longest_monotonic_subseq_length_loose(xs): - li = longest_increasing_subseq_length_loose(xs) - ld = longest_decreasing_subseq_length_loose(xs) - return max(li, ld), li - ld - - -def longest_increasing_subseq_length_loose(xs): - xs = [(x, i) for (i, x) in enumerate(xs)] - return longest_increasing_subseq_length(xs) - - -def longest_decreasing_subseq_length_loose(xs): - xs = [(x, -i) for (i, x) in enumerate(xs)] - return longest_decreasing_subseq_length(xs) - - -def longest_increasing_subsequence(xs): - """Return a longest increasing subsequence of xs. - - (Note that there may be more than one such subsequence.) - >>> longest_increasing_subsequence(range(3)) - [0, 1, 2] - >>> longest_increasing_subsequence([3, 1, 2, 0]) - [1, 2] - """ - # Patience sort xs, stacking (x, prev_ix) pairs on the piles. - # Prev_ix indexes the element at the top of the previous pile, - # which has a lower x value than the current x value. - piles = [[]] # Create a dummy pile 0 - for x, p in patience_sort(xs): - if p + 1 == len(piles): - piles.append([]) - # backlink to the top of the previous pile - piles[p + 1].append((x, len(piles[p]) - 1)) - # Backtrack to find a longest increasing subsequence - npiles = len(piles) - 1 - prev = 0 - lis = list() - for pile in range(npiles, 0, -1): - x, prev = piles[pile][prev] - lis.append(x) - lis.reverse() - return lis - - -def longest_decreasing_subsequence(xs): - """ - Wrapper that calls longest_increasing_subsequence - >>> longest_decreasing_subsequence([23, 19, 97, 16, 37, 44, 88, 77, 26]) - [97, 88, 77, 26] - """ - return list(reversed(longest_increasing_subsequence(reversed(xs)))) - - -def longest_monotonic_subsequence(xs): - lis = longest_increasing_subsequence(xs) - lds = longest_decreasing_subsequence(xs) - if len(lis) >= len(lds): - return lis - return lds - - -def longest_monotonic_subsequence_loose(xs): - lis = longest_increasing_subsequence_loose(xs) - lds = longest_decreasing_subsequence_loose(xs) - if len(lis) >= len(lds): - return lis - return lds - - -def longest_increasing_subsequence_loose(xs): - xs = [(x, i) for (i, x) in enumerate(xs)] - ll = longest_increasing_subsequence(xs) - return [x for (x, i) in ll] - - -def longest_decreasing_subsequence_loose(xs): - xs = [(x, -i) for (i, x) in enumerate(xs)] - ll = longest_decreasing_subsequence(xs) - return [x for (x, i) in ll] - - -def backtracking(a, L, bestsofar): - """ - Start with the heaviest weight and emit index - """ - w, j = max(L.items()) - while j != -1: - yield j - w, j = bestsofar[j] - - -def heaviest_increasing_subsequence(a, debug=False): - """ - Returns the heaviest increasing subsequence for array a. Elements are (key, - weight) pairs. - - >>> heaviest_increasing_subsequence([(3, 3), (2, 2), (1, 1), (0, 5)]) - ([(0, 5)], 5) - """ - # Stores the smallest idx of last element of a subsequence of weight w - L = {0: -1} - bestsofar = [(0, -1)] * len(a) # (best weight, from_idx) - for i, (key, weight) in enumerate(a): - - for w, j in list(L.items()): - if j != -1 and a[j][0] >= key: - continue - - new_weight = w + weight - if new_weight in L and a[L[new_weight]][0] <= key: - continue - - L[new_weight] = i - newbest = (new_weight, j) - if newbest > bestsofar[i]: - bestsofar[i] = newbest - - if debug: - # print (key, weight), L - print((key, weight), bestsofar) - - tb = reversed(list(backtracking(a, L, bestsofar))) - return [a[x] for x in tb], max(L.items())[0] - - -if __name__ == "__main__": - import doctest - - doctest.testmod() - - import numpy as np - - LENGTH = 20 - A = [np.random.randint(0, 20) for x in range(LENGTH)] - A = list(A) - B = list(zip(A, [1] * LENGTH)) - print(A) - lis = longest_increasing_subsequence(A) - print("longest increasing:", lis) - lds = longest_decreasing_subsequence(A) - print("longest decreasing:", lds) - lisl = longest_increasing_subsequence_loose(A) - print("longest increasing loose:", lisl) - ldsl = longest_decreasing_subsequence_loose(A) - print("longest decreasing loose:", ldsl) - # this should be the same as longest_increasing_subsequence - his, his_dd = heaviest_increasing_subsequence(B) - hlis, wts = zip(*his) - print("heaviest increasing (weight 1, compare with lis):", hlis) - assert len(lis) == len(his) diff --git a/jcvi/algorithms/lpsolve.py b/jcvi/algorithms/lpsolve.py deleted file mode 100755 index 0c85b6cf..00000000 --- a/jcvi/algorithms/lpsolve.py +++ /dev/null @@ -1,802 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Implement a few MIP solvers, based on benchmark found on -SCIP solver is ~16x faster than GLPK solver. However, I found in rare cases -it will segfault. Therefore the default is SCIP, the program will switch to -GLPK solver for crashed cases. - -The input lp_data is assumed in .lp format, see below - ->>> lp_data = ''' -... Maximize -... 5 x1 + 3 x2 + 2 x3 -... Subject to -... x2 + x3 <= 1 -... Binary -... x1 -... x2 -... x3 -... End''' ->>> print SCIPSolver(lp_data).results -[0, 1] ->>> print GLPKSolver(lp_data).results -[0, 1] -""" -import os.path as op - -from dataclasses import dataclass -from io import StringIO -from more_itertools import pairwise - -import networkx as nx - -from ..apps.base import cleanup, logger, mkdir, sh -from ..formats.base import flexible_cast -from ..utils.cbook import fill - -from .tsp import populate_edge_weights, node_to_edge - - -Work_dir = "lpsolve_work" - -# CPLEX LP format -# -MAXIMIZE = "Maximize" -MINIMIZE = "Minimize" -SUBJECTTO = "Subject To" -BOUNDS = "Bounds" -BINARY = "Binary" -GENERNAL = "General" -END = "End" - - -@dataclass -class MIPDataModel: - """Data model for use with OR-tools. Modeled after the tutorial.""" - - constraint_coeffs: list # List of dict of coefficients - bounds: list # Maximum value for each constraint clause - obj_coeffs: list # Coefficient in the objective function - num_vars: int - num_constraints: int - - def format_lp(self) -> str: - """Format data dictionary into MIP formatted string. - - Returns: - str: MIP formatted string - """ - lp_handle = StringIO() - - lp_handle.write(f"{MAXIMIZE}\n ") - records = 0 - for i, score in enumerate(self.obj_coeffs): - lp_handle.write("+ %d x%d " % (score, i)) - # SCIP does not like really long string per row - records += 1 - if records % 10 == 0: - lp_handle.write("\n") - lp_handle.write("\n") - - lp_handle.write(f"{SUBJECTTO}\n") - for constraint, bound in zip(self.constraint_coeffs, self.bounds): - additions = " + ".join("x{}".format(i) for (i, x) in constraint.items()) - lp_handle.write(" %s <= %d\n" % (additions, bound)) - - self.log() - - lp_handle.write(f"{BINARY}\n") - for i in range(self.num_vars): - lp_handle.write(" x{}\n".format(i)) - - lp_handle.write(f"{END}\n") - - lp_data = lp_handle.getvalue() - lp_handle.close() - - return lp_data - - def create_solver(self, backend: str = "SCIP"): - """ - Create OR-tools solver instance. See also: - https://developers.google.com/optimization/mip/mip_var_array - - Args: - backend (str, optional): Backend for the MIP solver. Defaults to "SCIP". - - Returns: - OR-tools solver instance - """ - from ortools.linear_solver import pywraplp - - solver = pywraplp.Solver.CreateSolver(backend) - x = {} - for j in range(self.num_vars): - x[j] = solver.IntVar(0, 1, "x[%i]" % j) - - for bound, constraint_coeff in zip(self.bounds, self.constraint_coeffs): - constraint = solver.RowConstraint(0, bound, "") - for j, coeff in constraint_coeff.items(): - constraint.SetCoefficient(x[j], coeff) - - self.log() - - objective = solver.Objective() - for j, score in enumerate(self.obj_coeffs): - objective.SetCoefficient(x[j], score) - objective.SetMaximization() - - return solver, x - - def log(self): - """Log the size of the MIP instance""" - logger.info( - "Number of variables (%d), number of constraints (%d)", - self.num_vars, - self.num_constraints, - ) - - def solve(self, work_dir="work", verbose=False): - """Solve the MIP instance. This runs OR-tools as default solver, then - SCIP, GLPK in that order. - - Args: - work_dir (str, optional): Work directory, only used when OR-tools fail. Defaults to "work". - verbose (bool, optional): Verbosity level, only used when OR-tools fail. Defaults to False. - - Returns: - list[int]: List of indices that are selected - """ - filtered_list = [] - - if has_ortools(): - # Use OR-tools - from ortools.linear_solver import pywraplp - - solver, x = self.create_solver() - status = solver.Solve() - if status == pywraplp.Solver.OPTIMAL: - logger.info("Objective value = %d", solver.Objective().Value()) - filtered_list = [ - j for j in range(self.num_vars) if x[j].solution_value() == 1 - ] - logger.info("Problem solved in %d milliseconds", solver.wall_time()) - logger.info("Problem solved in %d iterations", solver.iterations()) - logger.info( - "Problem solved in %d branch-and-bound nodes", solver.nodes() - ) - - # Use custom formatter as a backup - if not filtered_list: - lp_data = self.format_lp() - filtered_list = SCIPSolver(lp_data, work_dir, verbose=verbose).results - if not filtered_list: - logger.error("SCIP fails... trying GLPK") - filtered_list = GLPKSolver(lp_data, work_dir, verbose=verbose).results - - return filtered_list - - -class AbstractMIPSolver(object): - """ - Base class for LP solvers - """ - - obj_val: float - - def __init__(self, lp_data, work_dir=Work_dir, clean=True, verbose=False): - - self.work_dir = work_dir - self.clean = clean - self.verbose = verbose - - mkdir(work_dir) - - lpfile = op.join(work_dir, "data.lp") # problem instance - logger.debug("Write MIP instance to `%s`", lpfile) - - fw = open(lpfile, "w") - fw.write(lp_data) - fw.close() - - retcode, outfile = self.run(lpfile) - if retcode < 0: - self.results = [] - else: - self.results = self.parse_output(outfile) - - if self.results: - logger.debug("Optimized objective value (%s)", self.obj_val) - - def run(self, lp_data): - raise NotImplementedError - - def parse_output(self): - raise NotImplementedError - - def cleanup(self): - cleanup(self.work_dir) - - -class GLPKSolver(AbstractMIPSolver): - """ - GNU Linear Programming Kit (GLPK) solver, wrapper for calling GLPSOL - """ - - def run(self, lpfile): - - outfile = op.join(self.work_dir, "data.lp.out") # verbose output - listfile = op.join(self.work_dir, "data.lp.list") # simple output - # cleanup in case something wrong happens - cleanup(outfile, listfile) - - cmd = "glpsol --cuts --fpump --lp {0} -o {1} -w {2}".format( - lpfile, outfile, listfile - ) - - outf = None if self.verbose else "/dev/null" - retcode = sh(cmd, outfile=outf) - - if retcode == 127: - logger.error( - "You need to install program `glpsol` [http://www.gnu.org/software/glpk/]" - ) - return -1, None - - return retcode, listfile - - def parse_output(self, listfile, clean=False): - """Extract results from the GLPK output. The GLPK output looks like - - c Problem: - c Rows: 306 - c Columns: 520 - c Non-zeros: 623 - c Status: INTEGER OPTIMAL - c Objective: obj = 23405 (MAXimum) - c - s mip 306 520 o 23405 - i 1 1 - i 2 1 - i 3 1 - i 4 1 - i 5 1 - i 6 1 - ... - """ - fp = open(listfile) - results = [] - expected_cols, observed_cols = 0, 0 - for row in fp: - if row[0] == "s": - s, mip, rows, cols, o, obj_val = row.split() - expected_cols = int(cols) - self.obj_val = int(obj_val) - if row[0] != "j": - continue - observed_cols += 1 - tag, row_id, value = row.split() - assert tag == "j", "Expecting the first field == j" - row_id, value = int(row_id), int(value) - if value == 1: - results.append(row_id - 1) - - assert ( - expected_cols == observed_cols - ), "Number of columns mismatch: expected {}, observed {}".format( - expected_cols, observed_cols - ) - - fp.close() - - if self.clean: - self.cleanup() - - return results - - -class SCIPSolver(AbstractMIPSolver): - """ - SCIP solver, wrapper for calling SCIP executable - """ - - def run(self, lpfile): - - outfile = self.work_dir + "/data.lp.out" # verbose output - cleanup(outfile) - - cmd = "scip -f {0} -l {1}".format(lpfile, outfile) - - outf = None if self.verbose else "/dev/null" - retcode = sh(cmd, outfile=outf) - - if retcode == 127: - logger.error("You need to install program `scip` [http://scip.zib.de/]") - return -1, None - - return retcode, outfile - - def parse_output(self, outfile): - - fp = open(outfile) - for row in fp: - if row.startswith("objective value"): - obj_row = row - break - - results = [] - for row in fp: - """ - objective value: 8 - x1 1 (obj:5) - x2 1 (obj:3) - """ - if row.strip() == "": # blank line ends the section - break - x = row.split()[0] - results.append(int(x[1:]) - 1) # 0-based indexing - - if results: - self.obj_val = flexible_cast(obj_row.split(":")[1].strip()) - - fp.close() - - if self.clean: - self.cleanup() - - return results - - -class LPInstance(object): - """ - CPLEX LP format commonly contains three blocks: - objective, constraints, vars - spec - """ - - def __init__(self): - self.objective = MAXIMIZE - self.sum = "" - self.constraints = [] - self.bounds = [] - self.binaryvars = [] - self.generalvars = [] - - def print_instance(self): - self.handle = fw = StringIO() - print(self.objective, file=fw) - print(self.sum, file=fw) - print(SUBJECTTO, file=fw) - assert self.constraints, "Must contain constraints" - print("\n".join(self.constraints), file=fw) - if self.bounds: - print(BOUNDS, file=fw) - print("\n".join(self.bounds), file=fw) - if self.binaryvars: - print(BINARY, file=fw) - print("\n".join(self.binaryvars), file=fw) - if self.generalvars: - print(GENERNAL, file=fw) - print("\n".join(self.generalvars), file=fw) - print(END, file=fw) - - def add_objective(self, edges, objective=MAXIMIZE): - assert edges, "Edges must be non-empty" - self.objective = objective - items = [ - " + {0}x{1}".format(w, i + 1) for i, (a, b, w) in enumerate(edges) if w - ] - sums = fill(items, width=10) - self.sum = sums - - def add_vars(self, nedges, offset=1, binary=True): - vars = [" x{0}".format(i + offset) for i in range(nedges)] - if binary: - self.binaryvars = vars - else: - self.generalvars = vars - - def lpsolve(self, solver="scip", clean=True): - self.print_instance() - - solver = SCIPSolver if solver == "scip" else GLPKSolver - lp_data = self.handle.getvalue() - self.handle.close() - - g = solver(lp_data, clean=clean) - selected = set(g.results) - try: - obj_val = g.obj_val - except AttributeError: # No solution! - return None, None - return selected, obj_val - - -def has_ortools() -> bool: - """Do we have an installation of OR-tools? - - Returns: - bool: True if installed - """ - try: - from ortools.linear_solver import pywraplp - - return True - except ImportError: - return False - - -def summation(incident_edges): - s = "".join(" + x{0}".format(i + 1) for i in incident_edges) - return s - - -def edges_to_graph(edges): - G = nx.DiGraph() - for e in edges: - a, b = e[:2] - G.add_edge(a, b) - return G - - -def edges_to_path(edges): - """ - Connect edges and return a path. - """ - if not edges: - return None - - G = edges_to_graph(edges) - path = nx.topological_sort(G) - return path - - -def hamiltonian(edges, directed=False): - """ - Calculates shortest path that traverses each node exactly once. Convert - Hamiltonian path problem to TSP by adding one dummy point that has a distance - of zero to all your other points. Solve the TSP and get rid of the dummy - point - what remains is the Hamiltonian Path. - - >>> g = [(1,2), (2,3), (3,4), (4,2), (3,5)] - >>> hamiltonian(g) - [1, 2, 4, 3, 5] - >>> g = [(1,2), (2,3), (1,4), (2,5), (3,6)] - >>> hamiltonian(g) - """ - edges = populate_edge_weights(edges) - _, _, nodes = node_to_edge(edges, directed=False) - if not directed: # Make graph symmetric - dual_edges = edges[:] - for a, b, w in edges: - dual_edges.append((b, a, w)) - edges = dual_edges - - DUMMY = "DUMMY" - dummy_edges = ( - edges + [(DUMMY, x, 0) for x in nodes] + [(x, DUMMY, 0) for x in nodes] - ) - - results = tsp_gurobi(dummy_edges) - if results: - results = [x for x in results if DUMMY not in x] - results = edges_to_path(results) - if not directed: - results = min(results, results[::-1]) - return results - - -def tsp_gurobi(edges): - """ - Modeled using GUROBI python example. - """ - from gurobipy import Model, GRB, quicksum - - edges = populate_edge_weights(edges) - incoming, outgoing, nodes = node_to_edge(edges) - idx = dict((n, i) for i, n in enumerate(nodes)) - nedges = len(edges) - n = len(nodes) - - m = Model() - - def step(x): - return "u_{0}".format(x) - - # Create variables - vars = {} - for i, (a, b, w) in enumerate(edges): - vars[i] = m.addVar(obj=w, vtype=GRB.BINARY, name=str(i)) - for u in nodes[1:]: - u = step(u) - vars[u] = m.addVar(obj=0, vtype=GRB.INTEGER, name=u) - m.update() - - # Bounds for step variables - for u in nodes[1:]: - u = step(u) - vars[u].lb = 1 - vars[u].ub = n - 1 - - # Add degree constraint - for v in nodes: - incoming_edges = incoming[v] - outgoing_edges = outgoing[v] - m.addConstr(quicksum(vars[x] for x in incoming_edges) == 1) - m.addConstr(quicksum(vars[x] for x in outgoing_edges) == 1) - - # Subtour elimination - edge_store = dict(((idx[a], idx[b]), i) for i, (a, b, w) in enumerate(edges)) - - # Given a list of edges, finds the shortest subtour - def subtour(s_edges): - visited = [False] * n - cycles = [] - lengths = [] - selected = [[] for i in range(n)] - for x, y in s_edges: - selected[x].append(y) - while True: - current = visited.index(False) - thiscycle = [current] - while True: - visited[current] = True - neighbors = [x for x in selected[current] if not visited[x]] - if len(neighbors) == 0: - break - current = neighbors[0] - thiscycle.append(current) - cycles.append(thiscycle) - lengths.append(len(thiscycle)) - if sum(lengths) == n: - break - return cycles[lengths.index(min(lengths))] - - def subtourelim(model, where): - if where != GRB.callback.MIPSOL: - return - selected = [] - # make a list of edges selected in the solution - sol = model.cbGetSolution([model._vars[i] for i in range(nedges)]) - selected = [edges[i] for i, x in enumerate(sol) if x > 0.5] - selected = [(idx[a], idx[b]) for a, b, w in selected] - # find the shortest cycle in the selected edge list - tour = subtour(selected) - if len(tour) == n: - return - # add a subtour elimination constraint - c = tour - incident = [edge_store[a, b] for a, b in pairwise(c + [c[0]])] - model.cbLazy(quicksum(model._vars[x] for x in incident) <= len(tour) - 1) - - m.update() - - m._vars = vars - m.params.LazyConstraints = 1 - m.optimize(subtourelim) - - selected = [v.varName for v in m.getVars() if v.x > 0.5] - selected = [int(x) for x in selected if x[:2] != "u_"] - results = ( - sorted(x for i, x in enumerate(edges) if i in selected) if selected else None - ) - return results - - -def tsp(edges, constraint_generation=False): - """ - Calculates shortest cycle that traverses each node exactly once. Also known - as the Traveling Salesman Problem (TSP). - """ - edges = populate_edge_weights(edges) - incoming, outgoing, nodes = node_to_edge(edges) - - nedges, nnodes = len(edges), len(nodes) - L = LPInstance() - - L.add_objective(edges, objective=MINIMIZE) - balance = [] - # For each node, select exactly 1 incoming and 1 outgoing edge - for v in nodes: - incoming_edges = incoming[v] - outgoing_edges = outgoing[v] - icc = summation(incoming_edges) - occ = summation(outgoing_edges) - balance.append("{0} = 1".format(icc)) - balance.append("{0} = 1".format(occ)) - - # Subtour elimination - Miller-Tucker-Zemlin (MTZ) formulation - # - # Desrochers and laporte, 1991 (DFJ) has a stronger constraint - # See also: - # G. Laporte / The traveling salesman problem: Overview of algorithms - start_step = nedges + 1 - u0 = nodes[0] - nodes_to_steps = dict((n, start_step + i) for i, n in enumerate(nodes[1:])) - edge_store = dict((e[:2], i) for i, e in enumerate(edges)) - mtz = [] - for i, e in enumerate(edges): - a, b = e[:2] - if u0 in (a, b): - continue - na, nb = nodes_to_steps[a], nodes_to_steps[b] - con_ab = " x{0} - x{1} + {2}x{3}".format(na, nb, nnodes - 1, i + 1) - if (b, a) in edge_store: # This extra term is the stronger DFJ formulation - j = edge_store[(b, a)] - con_ab += " + {0}x{1}".format(nnodes - 3, j + 1) - con_ab += " <= {0}".format(nnodes - 2) - mtz.append(con_ab) - - # Step variables u_i bound between 1 and n, as additional variables - bounds = [] - for i in range(start_step, nedges + nnodes): - bounds.append(" 1 <= x{0} <= {1}".format(i, nnodes - 1)) - - L.add_vars(nedges) - - """ - Constraint generation seek to find 'cuts' in the LP problem, by solving the - relaxed form. The subtours were then incrementally added to the constraints. - """ - if constraint_generation: - L.constraints = balance - subtours = [] - while True: - selected, obj_val = L.lpsolve() - results = ( - sorted(x for i, x in enumerate(edges) if i in selected) - if selected - else None - ) - if not results: - break - G = edges_to_graph(results) - cycles = list(nx.simple_cycles(G)) - if len(cycles) == 1: - break - for c in cycles: - incident = [edge_store[a, b] for a, b in pairwise(c + [c[0]])] - icc = summation(incident) - subtours.append("{0} <= {1}".format(icc, len(incident) - 1)) - L.constraints = balance + subtours - else: - L.constraints = balance + mtz - L.add_vars(nnodes - 1, offset=start_step, binary=False) - L.bounds = bounds - selected, obj_val = L.lpsolve() - results = ( - sorted(x for i, x in enumerate(edges) if i in selected) - if selected - else None - ) - - return results - - -def path(edges, source, sink, flavor="longest"): - """ - Calculates shortest/longest path from list of edges in a graph - - >>> g = [(1,2,1),(2,3,9),(2,4,3),(2,5,2),(3,6,8),(4,6,10),(4,7,4)] - >>> g += [(6,8,7),(7,9,5),(8,9,6),(9,10,11)] - >>> path(g, 1, 8, flavor="shortest") - ([1, 2, 4, 6, 8], 21) - >>> path(g, 1, 8, flavor="longest") - ([1, 2, 3, 6, 8], 25) - """ - outgoing, incoming, nodes = node_to_edge(edges) - - nedges = len(edges) - L = LPInstance() - - assert flavor in ("longest", "shortest") - - objective = MAXIMIZE if flavor == "longest" else MINIMIZE - L.add_objective(edges, objective=objective) - - # Balancing constraint, incoming edges equal to outgoing edges except - # source and sink - - constraints = [] - for v in nodes: - incoming_edges = incoming[v] - outgoing_edges = outgoing[v] - icc = summation(incoming_edges) - occ = summation(outgoing_edges) - - if v == source: - if not outgoing_edges: - return None - constraints.append("{0} = 1".format(occ)) - elif v == sink: - if not incoming_edges: - return None - constraints.append("{0} = 1".format(icc)) - else: - # Balancing - constraints.append("{0}{1} = 0".format(icc, occ.replace("+", "-"))) - # Simple path - if incoming_edges: - constraints.append("{0} <= 1".format(icc)) - if outgoing_edges: - constraints.append("{0} <= 1".format(occ)) - - L.constraints = constraints - L.add_vars(nedges) - - selected, obj_val = L.lpsolve() - results = ( - sorted(x for i, x in enumerate(edges) if i in selected) if selected else None - ) - results = edges_to_path(results) - - return results, obj_val - - -def min_feedback_arc_set(edges, remove=False, maxcycles=20000): - """ - A directed graph may contain directed cycles, when such cycles are - undesirable, we wish to eliminate them and obtain a directed acyclic graph - (DAG). A feedback arc set has the property that it has at least one edge - of every cycle in the graph. A minimum feedback arc set is the set that - minimizes the total weight of the removed edges; or alternatively maximize - the remaining edges. See: . - - The MIP formulation proceeds as follows: use 0/1 indicator variable to - select whether an edge is in the set, subject to constraint that each cycle - must pick at least one such edge. - - >>> g = [(1, 2, 2), (2, 3, 2), (3, 4, 2)] + [(1, 3, 1), (3, 2, 1), (2, 4, 1)] - >>> min_feedback_arc_set(g) - ([(3, 2, 1)], 1) - >>> min_feedback_arc_set(g, remove=True) # Return DAG - ([(1, 2, 2), (2, 3, 2), (3, 4, 2), (1, 3, 1), (2, 4, 1)], 1) - """ - G = nx.DiGraph() - edge_to_index = {} - for i, (a, b, w) in enumerate(edges): - G.add_edge(a, b) - edge_to_index[a, b] = i - - nedges = len(edges) - L = LPInstance() - - L.add_objective(edges, objective=MINIMIZE) - - constraints = [] - ncycles = 0 - for c in nx.simple_cycles(G): - cycle_edges = [] - rc = c + [c[0]] # Rotate the cycle - for a, b in pairwise(rc): - cycle_edges.append(edge_to_index[a, b]) - cc = summation(cycle_edges) - constraints.append("{0} >= 1".format(cc)) - ncycles += 1 - if ncycles == maxcycles: - break - logger.debug("A total of %d cycles found.", ncycles) - - L.constraints = constraints - L.add_vars(nedges) - - selected, obj_val = L.lpsolve(clean=False) - if remove: - results = ( - [x for i, x in enumerate(edges) if i not in selected] if selected else None - ) - else: - results = ( - [x for i, x in enumerate(edges) if i in selected] if selected else None - ) - - return results, obj_val - - -if __name__ == "__main__": - - import doctest - - doctest.testmod() diff --git a/jcvi/algorithms/matrix.py b/jcvi/algorithms/matrix.py deleted file mode 100644 index 2f845af4..00000000 --- a/jcvi/algorithms/matrix.py +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Matrix related subroutines -""" - -import math -import numpy as np - - -is_symmetric = lambda M: (M.T == M).all() - - -def compact(A, factor=2): - """Make a matrix compact by a compact_factor. - Reference: - https://stackoverflow.com/questions/36383107/how-to-evaluate-the-sum-of-values-within-array-blocks - - Args: - A (numpy.ndarray): 2D matrix - factor (int, optional): Compact factor. Defaults to 2. - - Example: - >>> A = np.arange(16, dtype=int).reshape(4, 4); A - array([[ 0, 1, 2, 3], - [ 4, 5, 6, 7], - [ 8, 9, 10, 11], - [12, 13, 14, 15]]) - >>> compact(A, factor=2) - array([[10, 18], - [42, 50]]) - >>> compact(A, factor=4) - array([[120]]) - """ - assert len(A.shape) == 2, "Input matrix must be 2D" - rows, cols = A.shape - new_rows = rows // factor * factor - new_cols = cols // factor * factor - if (new_rows, new_cols) != A.shape: - A = A[:new_rows, :new_cols] - A_reshaped = A.reshape(rows // factor, factor, cols // factor, factor) - return np.einsum("ijkl->ik", A_reshaped) - - -def moving_sum(a, window=10): - kernel = np.repeat(1, window) - return np.convolve(a, kernel, mode="same") - - -def moving_average(a, window=10): - kernel = np.repeat(1.0, window) / window - return np.convolve(a, kernel) - - -def chunk_average(a, window=10, offset=None): - # Fixed size window, take average within the window - offset = offset or window - - bins = int(math.ceil((a.size - window) * 1.0 / offset)) + 1 - r = np.zeros((bins,), dtype=np.float) - start = 0 - for i in range(bins): - r[i] = np.average(a[start : start + window]) - start += offset - return r - - -def determine_positions(nodes, edges): - """ - Construct the problem instance to solve the positions of contigs. - - The input for spring_system() is A, K, L, which looks like the following. - A = np.array([[1, -1, 0], [0, 1, -1], [1, 0, -1]]) - K = np.eye(3, dtype=int) - L = np.array([1, 2, 3]) - - For example, A-B distance 1, B-C distance 2, A-C distance 3, solve positions - - >>> determine_positions([0, 1, 2], [(0, 1, 1), (1, 2, 2), (0, 2, 3)]) - array([0, 1, 3]) - """ - N = len(nodes) - E = len(edges) - - A = np.zeros((E, N), dtype=int) - for i, (a, b, distance) in enumerate(edges): - A[i, a] = 1 - A[i, b] = -1 - - K = np.eye(E, dtype=int) - L = np.array([x[-1] for x in edges]) - - s = spring_system(A, K, L) - return np.array([0] + [int(round(x, 0)) for x in s]) - - -def determine_signs(nodes, edges, cutoff=1e-10): - """ - Construct the orientation matrix for the pairs on N molecules. - - >>> determine_signs([0, 1, 2], [(0, 1, 1), (0, 2, -1), (1, 2, -1)]) - array([ 1, 1, -1]) - """ - N = len(nodes) - M = np.zeros((N, N), dtype=float) - for a, b, w in edges: - M[a, b] += w - M = symmetrize(M) - - return get_signs(M, cutoff=cutoff, validate=False) - - -def symmetrize(M): - """ - If M only has a triangle filled with values, all the rest are zeroes, - this function will copy stuff to the other triangle - """ - return M + M.T - np.diag(M.diagonal()) - - -def get_signs(M, cutoff=1e-10, validate=True, ambiguous=True): - """ - Given a numpy array M that contains pairwise orientations, find the largest - eigenvalue and associated eigenvector and return the signs for the - eigenvector. This should correspond to the original orientations for the - individual molecule. In the first example below, let's say 3 molecules A, B - and C, A-B:same direction, A-C:opposite direction, B-C:opposite - direction. The final solution is to flip C. - - >>> M = np.array([[0,1,-1],[1,0,-1],[-1,-1,0]]) - >>> get_signs(M) - array([ 1, 1, -1]) - >>> M = np.array([[0,1,-1],[1,0,0],[-1,0,0]]) - >>> get_signs(M) - array([ 1, 1, -1]) - """ - # Is this a symmetric matrix? - assert is_symmetric(M), "the matrix is not symmetric:\n{0}".format(str(M)) - N, x = M.shape - - # eigh() works on symmetric matrix (Hermitian) - w, v = np.linalg.eigh(M) - m = np.argmax(w) - mv = v[:, m] - f = lambda x: (x if abs(x) > cutoff else 0) - mv = [f(x) for x in mv] - - sign_array = np.array(np.sign(mv), dtype=int) - - # it does not really matter, but we prefer as few flippings as possible - if np.sum(sign_array) < 0: - sign_array = -sign_array - - if validate: - diag = np.eye(N, dtype=int) * sign_array - # final = diag @ M @ diag - final = diag.dot(M).dot(diag) # Python2.7 compatible - # The final result should have all pairwise in the same direction - assert (final >= 0).all(), "result check fails:\n{0}".format(final) - - if not ambiguous: # Do we allow ambiguous orientation (=0) ? - sign_array[sign_array == 0] = 1 - - return sign_array - - -def spring_system(A, K, L): - """ - Solving the equilibrium positions of the objects, linked by springs of - length L, stiffness of K, and connectivity matrix A. Then solving: - - F_nodes = -A'KAx - A'KL = 0 - - In the context of scaffolding, lengths (L) are inferred by mate inserts, - stiffness (K) is inferred via the number of links, connectivity (A) is the - contigs they connect. The mate pairs form the linkages between the contigs, - and can be considered as "springs" of certain lengths. The "springs" are - stretched or compressed if the distance deviates from the expected insert size. - - See derivation from Dayarian et al. 2010. SOPRA paper. - - o---------o--------------o - x0 x1 x2 - |~~~~L1~~~|~~~~~~L2~~~~~~| - |~~~~~~~~~~L3~~~~~~~~~~~~| - - >>> A = np.array([[1, -1, 0], [0, 1, -1], [1, 0, -1]]) - >>> K = np.eye(3, dtype=int) - >>> L = np.array([1, 2, 3]) - >>> spring_system(A, K, L) - array([1., 3.]) - """ - # Linear equation is A'KAx = -A'KL - C = np.dot(A.T, K) - left = np.dot(C, A) - right = -np.dot(C, L) - - left = left[1:, 1:] - right = right[1:] - x = np.linalg.solve(left, right) - - return x - - -if __name__ == "__main__": - import doctest - - doctest.testmod() diff --git a/jcvi/algorithms/maxsum.py b/jcvi/algorithms/maxsum.py deleted file mode 100644 index 8843a0af..00000000 --- a/jcvi/algorithms/maxsum.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Implements the max sum segment algorithm, using Kadane's algorithm, see - - -""" -Infinity = 1e10000 - - -def max_sum(a): - """ - For an input array a, output the range that gives the largest sum - - >>> max_sum([4, 4, 9, -5, -6, -1, 5, -6, -8, 9]) - (17, 0, 2) - >>> max_sum([8, -10, 10, -9, -6, 9, -7, -4, -10, -8]) - (10, 2, 2) - >>> max_sum([10, 1, -10, -8, 6, 10, -10, 6, -3, 10]) - (19, 4, 9) - """ - - max_sum, max_start_index, max_end_index = -Infinity, 0, 0 - current_max_sum = 0 - current_start_index = 0 - for current_end_index, x in enumerate(a): - current_max_sum += x - if current_max_sum > max_sum: - max_sum, max_start_index, max_end_index = ( - current_max_sum, - current_start_index, - current_end_index, - ) - if current_max_sum < 0: - current_max_sum = 0 - current_start_index = current_end_index + 1 - - return max_sum, max_start_index, max_end_index - - -if __name__ == "__main__": - import doctest - - doctest.testmod() - - import numpy as np - - A = np.random.random_integers(-10, 10, 10) - print("max_sum(%s)" % list(A)) - print(max_sum(A)) diff --git a/jcvi/algorithms/supermap.py b/jcvi/algorithms/supermap.py deleted file mode 100755 index e4f54bad..00000000 --- a/jcvi/algorithms/supermap.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog infile [options] - -This script combines pairwise alignments, sort and filter the alignments. -Infile expect BLAST tabular format (-m8) or nucmer .coords file. - -In order to handle dups, we have to run two monotonic chains in both genomes, -first chain using ref, and a second chain using query and we will have options -to keep either the union or the intersection of retain chained alignments from -both genomes, similar to the SUPERMAP algorithm. This operation is symmetrical. -""" -import sys - -from ..apps.base import OptionParser, logger -from ..formats.blast import BlastLine -from ..formats.coords import CoordsLine -from ..utils.range import Range, range_chain - - -def BlastOrCoordsLine(filename, filter="ref", dialect="blast", clip=0): - allowed_filters = ("ref", "query") - REF, QUERY = range(len(allowed_filters)) - - allowed_dialects = ("blast", "coords") - BLAST, COORDS = range(len(allowed_dialects)) - - assert filter in allowed_filters - filter = allowed_filters.index(filter) - - assert dialect in allowed_dialects - dialect = allowed_dialects.index(dialect) - - fp = open(filename) - for i, row in enumerate(fp): - if row[0] == "#": - continue - if dialect == BLAST: - b = BlastLine(row) - if filter == QUERY: - query, start, end = b.query, b.qstart, b.qstop - else: - query, start, end = b.subject, b.sstart, b.sstop - else: - try: - b = CoordsLine(row) - except AssertionError: - continue - - if filter == QUERY: - query, start, end = b.query, b.start2, b.end2 - else: - query, start, end = b.ref, b.start1, b.end1 - - if start > end: - start, end = end, start - - if clip: - # clip cannot be more than 5% of the range - r = end - start + 1 - cc = min(0.05 * r, clip) - start = start + cc - end = end - cc - - yield Range(query, start, end, b.score, i) - - -def supermap(blast_file, filter="intersection", dialect="blast", clip=0): - # filter by query - if filter != "ref": - logger.debug("filter by query") - ranges = list( - BlastOrCoordsLine(blast_file, filter="query", dialect=dialect, clip=clip) - ) - - query_selected, query_score = range_chain(ranges) - query_idx = set(x.id for x in query_selected) - - # filter by ref - if filter != "query": - logger.debug("filter by ref") - ranges = list( - BlastOrCoordsLine(blast_file, filter="ref", dialect=dialect, clip=clip) - ) - - ref_selected, ref_score = range_chain(ranges) - ref_idx = set(x.id for x in ref_selected) - - if filter == "ref": - selected_idx = ref_idx - - elif filter == "query": - selected_idx = query_idx - - elif filter == "intersection": - logger.debug("perform intersection") - selected_idx = ref_idx & query_idx - - elif filter == "union": - logger.debug("perform union") - selected_idx = ref_idx | query_idx - - assert len(selected_idx) != 0 - - # selected_idx is in fact the lineno in the BLAST file - fp = open(blast_file) - - if filter == "intersection": - tag = "" - else: - tag = "." + filter - supermapfile = blast_file + tag + ".supermap" - fw = open(supermapfile, "w") - - selected_idx = iter(sorted(selected_idx)) - selected = next(selected_idx) - for i, row in enumerate(fp): - if i < selected: - continue - print(row.rstrip(), file=fw) - try: - selected = next(selected_idx) - except StopIteration: - break - - logger.debug("Write output file to `{0}`".format(supermapfile)) - fw.close() - - from jcvi.formats.blast import sort - - ofilter = "ref" if filter == "ref" else "query" - args = [supermapfile, "--" + ofilter] - if dialect == "coords": - args += ["--coords"] - - sort(args) - - return supermapfile - - -if __name__ == "__main__": - - p = OptionParser(__doc__) - - filter_choices = ("ref", "query", "intersection", "union") - dialect_choices = ("blast", "coords") - p.add_argument( - "--filter", - choices=filter_choices, - default="intersection", - help="Available filters", - ) - p.add_argument("--dialect", choices=dialect_choices, help="Input format") - p.add_argument( - "--clip", - default=0, - type=int, - help="Clip ranges so that to allow minor overlaps", - ) - - opts, args = p.parse_args() - - if len(args) != 1: - sys.exit(p.print_help()) - - (blast_file,) = args - - dialect = opts.dialect - if not dialect: - # guess from the suffix - dialect = "coords" if blast_file.endswith(".coords") else "blast" - logger.debug("dialect is %s" % dialect) - - supermap(blast_file, filter=opts.filter, dialect=dialect, clip=opts.clip) diff --git a/jcvi/algorithms/tsp.py b/jcvi/algorithms/tsp.py deleted file mode 100644 index 2288d25d..00000000 --- a/jcvi/algorithms/tsp.py +++ /dev/null @@ -1,393 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -TSP solver using Concorde or OR-tools. This is much faster than the LP-formulation in -algorithms.lpsolve.tsp(). See also: -https://developers.google.com/optimization/routing/tsp -""" -import os.path as op - -from collections import defaultdict -from dataclasses import dataclass -from itertools import combinations - -import numpy as np - -from more_itertools import pairwise - -from jcvi.formats.base import must_open -from jcvi.apps.base import cleanup, logger, mkdir, sh, which - - -INF = 10000 -NEG_INF = -INF -Work_dir = "tsp_work" - - -@dataclass -class TSPDataModel: - edges: list # List of tuple (source, target, weight) - - def distance_matrix(self, precision=0) -> tuple: - """Compute the distance matrix - - Returns: - np.array: Numpy square matrix with integer entries as distance - """ - _, _, nodes = node_to_edge(self.edges, directed=False) - nodes_indices = dict((n, i) for i, n in enumerate(nodes)) - nnodes = len(nodes) - - # TSPLIB requires explicit weights to be integral, and non-negative - weights = [x[-1] for x in self.edges] - max_x, min_x = max(weights), min(weights) - inf = 2 * max(abs(max_x), abs(min_x)) - factor = 10**precision - logger.debug( - "TSP rescale: max_x=%d, min_x=%d, inf=%d, factor=%d", - max_x, - min_x, - inf, - factor, - ) - - D = np.ones((nnodes, nnodes), dtype=float) * inf - for a, b, w in self.edges: - ia, ib = nodes_indices[a], nodes_indices[b] - D[ia, ib] = D[ib, ia] = w - D = (D - min_x) * factor - D = D.astype(int) - return D, nodes - - def solve(self, time_limit=5, concorde=False, precision=0) -> list: - """Solve the TSP instance. - - Args: - time_limit (int, optional): Time limit to run. Default to 5 seconds. - concorde (bool, optional): Shall we run concorde? Defaults to False. - precision (int, optional): Float precision of distance. Defaults to 0. - - Returns: - list: Ordered list of node indices to visit - """ - if concorde: - return Concorde(self, precision=precision).tour - - # Use OR-tools - from ortools.constraint_solver import routing_enums_pb2 - from ortools.constraint_solver import pywrapcp - - D, nodes = self.distance_matrix(precision) - nnodes = len(nodes) - - # Create the routing index manager - manager = pywrapcp.RoutingIndexManager(nnodes, 1, 0) - - # Create routing model - routing = pywrapcp.RoutingModel(manager) - - def distance_callback(from_index, to_index): - """Returns the distance between the two nodes.""" - from_node = manager.IndexToNode(from_index) - to_node = manager.IndexToNode(to_index) - return D[from_node, to_node] - - transit_callback_index = routing.RegisterTransitCallback(distance_callback) - - # Define cost of each arc - routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index) - - # Search strategy - search_parameters = pywrapcp.DefaultRoutingSearchParameters() - search_parameters.local_search_metaheuristic = ( - routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH - ) - search_parameters.time_limit.seconds = time_limit - - # Solve the problem - solution = routing.SolveWithParameters(search_parameters) - - tour = [] - logger.info("Objective: %d", solution.ObjectiveValue()) - index = routing.Start(0) - route_distance = 0 - while not routing.IsEnd(index): - tour.append(manager.IndexToNode(index)) - previous_index = index - index = solution.Value(routing.NextVar(index)) - route_distance = routing.GetArcCostForVehicle(previous_index, index, 0) - logger.info("Route distance: %d", route_distance) - - return [nodes[x] for x in tour] - - -class Concorde(object): - def __init__( - self, - data: TSPDataModel, - work_dir=Work_dir, - clean=True, - verbose=False, - precision=0, - seed=666, - ): - """Run concorde on TSP instance - - Args: - data (TSPDataModel): TSP instance with edge weights - work_dir ([type], optional): Path to the work dir. Defaults to Work_dir. - clean (bool, optional): Clean up intermediate results. Defaults to True. - verbose (bool, optional): Show verbose messages. Defaults to False. - precision (int, optional): Float precision of distance. Defaults to 0. - seed (int, optional): Random seed. Defaults to 666. - """ - self.data = data - self.work_dir = work_dir - self.clean = clean - self.verbose = verbose - - mkdir(work_dir) - tspfile = op.join(work_dir, "data.tsp") - self.print_to_tsplib(tspfile, precision=precision) - _, outfile = self.run_concorde(tspfile, seed=seed) - self.tour = self.parse_output(outfile) - - if clean: - cleanup(work_dir) - residual_output = ["data.sol", "data.res", "Odata.res"] - cleanup(residual_output) - - def print_to_tsplib(self, tspfile, precision=0): - """ - See TSPlib format: - - - NAME: bayg29 - TYPE: TSP - COMMENT: 29 Cities in Bavaria, geographical distances - DIMENSION: 29 - EDGE_WEIGHT_TYPE: EXPLICIT - EDGE_WEIGHT_FORMAT: UPPER_ROW - DISPLAY_DATA_TYPE: TWOD_DISPLAY - EDGE_WEIGHT_SECTION - (... numbers ...) - """ - fw = must_open(tspfile, "w") - D, nodes = self.data.distance_matrix(precision) - self.nodes = nodes - self.nnodes = len(nodes) - - print("NAME: data", file=fw) - print("TYPE: TSP", file=fw) - print("DIMENSION: {}".format(self.nnodes), file=fw) - print("EDGE_WEIGHT_TYPE: EXPLICIT", file=fw) - print("EDGE_WEIGHT_FORMAT: FULL_MATRIX", file=fw) - print("EDGE_WEIGHT_SECTION", file=fw) - - for row in D: # Dump the full matrix - print(" " + " ".join(str(x) for x in row), file=fw) - - print("EOF", file=fw) - fw.close() - logger.debug("Write TSP instance to `%s`", tspfile) - - def run_concorde(self, tspfile, seed=666): - outfile = op.join(self.work_dir, "data.sol") - cleanup(outfile) - - cc = "concorde" - assert which(cc), ( - "You must install `concorde` on your PATH" - + " [http://www.math.uwaterloo.ca/tsp/concorde.html]" - ) - cmd = "{0} -s {1} -x -o {2} {3}".format(cc, seed, outfile, tspfile) - - outf = None if self.verbose else "/dev/null" - retcode = sh(cmd, outfile=outf, errfile=outf) - return retcode, outfile - - def parse_output(self, outfile): - fp = open(outfile) - dimension = int(next(fp).strip()) # header - assert dimension == self.nnodes - tour = [] - for row in fp: - tour += [int(x) for x in row.split()] - tour = [self.nodes[x] for x in tour] - return tour - - -def node_to_edge(edges, directed=True): - """ - From list of edges, record per node, incoming and outgoing edges - """ - outgoing = defaultdict(set) - incoming = defaultdict(set) if directed else outgoing - nodes = set() - for i, edge in enumerate(edges): - ( - a, - b, - ) = edge[:2] - outgoing[a].add(i) - incoming[b].add(i) - nodes.add(a) - nodes.add(b) - nodes = list(nodes) - return outgoing, incoming, nodes - - -def populate_edge_weights(edges): - # assume weight is 1 if not specified - new_edges = [] - for e in edges: - assert len(e) in (2, 3) - if len(e) == 2: - a, b = e - w = 1 - else: - a, b, w = e - new_edges.append((a, b, w)) - return new_edges - - -def hamiltonian(edges, directed=False, time_limit=5, concorde=False, precision=0): - """ - Calculates shortest path that traverses each node exactly once. Convert - Hamiltonian path problem to TSP by adding one dummy point that has a distance - of zero to all your other points. Solve the TSP and get rid of the dummy - point - what remains is the Hamiltonian Path. - - >>> g = [(1,2), (2,3), (3,4), (4,2), (3,5)] - >>> hamiltonian(g) - [1, 2, 4, 3, 5] - >>> hamiltonian([(1, 2), (2, 3)], directed=True) - [1, 2, 3] - """ - edges = populate_edge_weights(edges) - _, _, nodes = node_to_edge(edges, directed=False) - DUMMY = "DUMMY" - dummy_edges = edges + [(DUMMY, x, 0) for x in nodes] - if directed: - dummy_edges += [(x, DUMMY, 0) for x in nodes] - dummy_edges = reformulate_atsp_as_tsp(dummy_edges) - - tour = tsp( - dummy_edges, time_limit=time_limit, concorde=concorde, precision=precision - ) - - dummy_index = tour.index(DUMMY) - tour = tour[dummy_index:] + tour[:dummy_index] - if directed: - dummy_star_index = tour.index((DUMMY, "*")) - assert dummy_star_index in (1, len(tour) - 1), tour - if dummy_star_index == len(tour) - 1: # need to flip - tour = tour[1:] + tour[:1] - tour = tour[::-1] - path = tour[1:] - path = [x for x in path if not isinstance(x, tuple)] - else: - path = tour[1:] - - return path - - -def tsp(edges, time_limit=5, concorde=False, precision=0) -> list: - """Compute TSP solution - - Args: - edges (list): List of tuple (source, target, weight) - time_limit (int, optional): Time limit to run. Default to 5 seconds. - concorde (bool, optional): Shall we run concorde? Defaults to False. - precision (int, optional): Float precision of distance. Defaults to 0. - - Returns: - list: List of nodes to visit - """ - data = TSPDataModel(edges) - return data.solve(time_limit=time_limit, concorde=concorde, precision=precision) - - -def reformulate_atsp_as_tsp(edges): - """ - To reformulate the ATSP as a TSP, for each city a dummy city (e.g, for New - York, a dummy city New York* is added. Between each city and its - corresponding dummy city a negative or very small distance with value cheap - is used. This makes sure that each cities always occurs in the solution - together with its dummy city. The original distances are used between the - cities and the dummy cities, where each city is responsible for the distance - going to the city and the dummy city is responsible for the distance coming - from the city. The distances between all cities and the distances between - all dummy cities are set to infeasible. - """ - _, _, nodes = node_to_edge(edges, directed=False) - new_edges = [] - for a, b, w in edges: - new_edges.append(((a, "*"), b, w)) - for n in nodes: - new_edges.append((n, (n, "*"), NEG_INF)) # A negative weight - return new_edges - - -def make_data(N, directed=False): - x = np.random.randn(N) - y = np.random.randn(N) - xy = list(zip(x, y)) - M = np.zeros((N, N), dtype=float) - for ia, ib in combinations(range(N), 2): - ax, ay = xy[ia] - bx, by = xy[ib] - d = ((ax - bx) ** 2 + (ay - by) ** 2) ** 0.5 - M[ia, ib] = M[ib, ia] = d - - edges = [] - for ia, ib in combinations(range(N), 2): - edges.append((ia, ib, M[ia, ib])) - if directed: - edges.append((ib, ia, M[ib, ia])) - - return x, y, M, edges - - -def evaluate(tour, M): - score = 0 - for ia, ib in pairwise(tour): - score += M[ia, ib] - return score - - -def plot_data(x, y, tour, M): - from jcvi.graphics.base import plt, savefig - - plt.plot(x, y, "ro") - for ia, ib in pairwise(tour): - plt.plot((x[ia], x[ib]), (y[ia], y[ib]), "r-") - - score = evaluate(tour, M) - plt.title("Score={0:.2f}".format(score)) - - savefig("demo.pdf") - - -def concorde_demo(POINTS=100): - x, y, M, edges = make_data(POINTS) - ctour = hamiltonian(edges, precision=3) - plot_data(x, y, ctour, M) - - -def compare_lpsolve_to_concorde(POINTS=80, directed=False): - from jcvi.algorithms.lpsolve import hamiltonian as lhamiltonian - - _, _, M, edges = make_data(POINTS, directed=directed) - ltour = lhamiltonian(edges, directed=directed) - print(ltour, evaluate(ltour, M)) - - ctour = hamiltonian(edges, directed=directed, precision=3) - print(ctour, evaluate(ctour, M)) - - -if __name__ == "__main__": - import doctest - - doctest.testmod() diff --git a/jcvi/annotation/__init__.py b/jcvi/annotation/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/annotation/__main__.py b/jcvi/annotation/__main__.py deleted file mode 100644 index 74cb9a18..00000000 --- a/jcvi/annotation/__main__.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Collection of scripts to run gene finders, execute annotation pipelines, perform QC checks and generate summary statistics -""" - - -from ..apps.base import dmain - - -if __name__ == "__main__": - dmain(__file__) diff --git a/jcvi/annotation/ahrd.py b/jcvi/annotation/ahrd.py deleted file mode 100644 index b8b28208..00000000 --- a/jcvi/annotation/ahrd.py +++ /dev/null @@ -1,708 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Utility to run Automated Human Readable Description (AHRD) pipeline. - - -""" -import os.path as op -import sys -import re - -from os import symlink - -from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir, glob -from ..formats.base import must_open - - -# --- Compiled RegExps ---- -# Cellular locations -loc_pat = re.compile(r",\s*(chloroplastic|cytoplasmic|mitochondrial).*?\s$", re.I) -# Any word that matches e.g. Os02g0234800 -osg_pat = re.compile(r"\bOs\d{2}g\d{7}.*?\s", re.I) -# (fragment) -frag_pat = re.compile(r"\(fragment[s]?\)", re.I) -# Trailing protein numeric copy (e.g. Myb 1) -trail_pat = re.compile(r"(? ' -apos_pat = re.compile(r"'?") - -# > => none -gt_pat = re.compile(r">") - -# -like to -like protein -like_pat = re.compile(r"[-]like$", re.I) - -# 'repeat$' to 'repeat protein' -repeat_pat = re.compile(r"repeat$", re.I) - -# re used by the following 3 cases -Protein_pat = re.compile(r"Protein\s+", re.I) - -# 'binding$' to 'binding protein' -binding_pat = re.compile(r"binding$", re.I) - -# 'domain$' to 'domain-containing protein' -domain_pat = re.compile(r"domain$", re.I) - -# 'related$' to '-like protein' -related_pat = re.compile(r"[,\s+]*[\s+|-]*related$", re.I) - -# '[0-9]+ homolog' to '-like protein' -homolog_pat1 = re.compile(r"(? sulfur -# sulph -> sulf -sulfer_pat = re.compile(r"sulfer") -sulph_pat = re.compile(r"sulph") - -# monoxy to monooxy -monoxy_pat = re.compile(r"monoxy") - -# proteine to protein -proteine_pat = re.compile(r"proteine") - -# signalling to signaling -signalling_pat = re.compile(r"signalling") - -# aluminium to aluminum -aluminium_pat = re.compile(r"aluminium", re.I) - -# haem to heme -# haemo to hemo -haem_pat = re.compile(r"\bhaem\b", re.I) -haemo_pat = re.compile(r"haemo", re.I) - -# assessory -> accessory -assessory_pat = re.compile(r"assessory") - -# british to american spelling conversion -# -ise -> -ize -# -ised -> -ized -# -isation -> -ization -# -bre -> -ber -ise_pat = re.compile(r"\b([A-z]+)ise([d]?)\b") -isation_pat = re.compile(r"\b([A-z]+)isation\b") -bre_pat = re.compile(r"\b([A-z]+)bre\b") - -# /with \S+ and \S+/ pattern -# /, and \S+/ pattern -# identify names with two domains -with_and_pat = re.compile(r"[with|,]\s*\S+and\S+") - -Template = """ -proteins_fasta: {2} -token_score_bit_score_weight: {4} -token_score_database_score_weight: {5} -token_score_overlap_score_weight: {6} -description_score_relative_description_frequency_weight: 0.6 -output: {3} -blast_dbs: - swissprot: - weight: 100 - file: ./swissprot/{1}.swissprot.tab - database: ./dbs/swissprot.fasta - blacklist: {0}/blacklist_descline.txt - filter: {0}/filter_descline_sprot.txt - token_blacklist: {0}/blacklist_token.txt - description_score_bit_score_weight: 0.2 - - tair: - weight: 50 - file: ./tair/{1}.tair.tab - database: ./dbs/tair.fasta - blacklist: {0}/blacklist_descline.txt - filter: {0}/filter_descline_tair.txt - fasta_header_regex: "^>(?[aA][tT][0-9mMcC][gG]\\\\d+(\\\\.\\\\d+)?)\\\\s+\\\\|[^\\\\|]+\\\\|\\\\s+(?[^\\\\|]+)(\\\\s*\\\\|.*)?$" - short_accession_regex: "^(?.+)$" - token_blacklist: {0}/blacklist_token.txt - description_score_bit_score_weight: 0.4 - - trembl: - weight: 10 - file: ./trembl/{1}.trembl.tab - database: ./dbs/trembl.fasta - blacklist: {0}/blacklist_descline.txt - filter: {0}/filter_descline_trembl.txt - token_blacklist: {0}/blacklist_token.txt - description_score_bit_score_weight: 0.4 -{7} -""" - -iprscanTemplate = """ -interpro_database: ./interpro.xml -interpro_result: {0} -""" - -# Necessary for the script to know the location of `interpro.xml` and `interpro.dtd` -iprscan_datadir = "/usr/local/devel/ANNOTATION/iprscan/iprscan_v4.7/data" - - -def main(): - - actions = ( - ("batch", "batch run AHRD"), - ("merge", "merge AHRD run results"), - ("fix", "fix AHRD names"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -Unknown = "Unknown protein" -Hypothetical = "hypothetical protein" - - -def read_interpro(ipr): - store = {} - fp = open(ipr) - # Aco000343.1 0d98a55eb3399a408e06252a2e24efcf 2083 Pfam - # PF00476 DNA polymerase family A 1685 2075 1.70E-55 T - # 10-10-2014 IPR001098 "DNA-directed DNA polymerase, family A, - # palm domain" GO:0003677|GO:0003887|GO:0006260 KEGG: - # 00230+2.7.7.7|KEGG: 00240+2.7.7.7 - for row in fp: - ( - accession, - md5, - seqlen, - analysis, - signature, - signature_description, - start, - stop, - score, - status, - date, - interpro, - interpro_description, - GO, - pathway, - ) = row.split("\t") - accession = accession.split(".")[0] - interpro_description = interpro_description.replace('"', "") - pathway = pathway.strip() - if accession not in ipr: - store[accession] = (interpro, interpro_description, GO, pathway) - return store - - -def fix_text(s, ignore_sym_pat=False): - - if not ignore_sym_pat: - # Fix descriptions like D7TDB1 ( - s = re.sub(r"([A-Z0-9]){6} \(", "", s) - s = s.split(";")[0] - - # Fix parantheses containing names - s = s.strip("[]") - s = s.replace("(-)", "[-]") - s = s.replace("(+)", "[+]") - s = s.replace("(Uncharacterized protein)", "") - if not ignore_sym_pat: - s = s.strip("()") - - # fix minor typos, seen in `autonaming` output - # change 'protei ' to 'protein ' - # change 'hypthetical' to 'hypothetical' - # fix string starting with 'ytochrome' - if "protei " in s: - s = s.replace("protei ", "protein ") - if "hypthetical" in s: - s = s.replace("hypthetical", "hypothetical") - if s.startswith("ytochrome"): - s = s.replace("ytochrome", "cytochrome") - - # before trimming off at the first ";", check if name has glycosidic - # linkage information (e.g 1,3 or 1,4). If so, also check if multiple - # linkages are separated by ";". If so, replace ";" by "-" - m = re.findall(glycosidic_link_pat, s) - if m and ";" in s: - s = re.sub(r";\s*", "-", s) - - # remove underscore from description - s = re.sub("_", " ", s) - - # Cellular locations - # Any word that matches e.g. AT5G54690 - # Any word that matches e.g. Os02g0234800 - # (fragment) - # UPF - # Remove 'DDB_G\d+' ID - # '_At[0-9]+g[0-9]+' to '' - for pat in (loc_pat, osg_pat, frag_pat, upf_pat, ddb_pat): - # below is a hack since word boundaries don't work on / - s = s.strip() + " " - s = re.sub(pat, "", s) - - # '? => ' - s = re.sub(apos_pat, "'", s) - # > => none - s = re.sub(gt_pat, "", s) - # reduce runs such as -- ''' - s = re.sub(r"[-]+", "-", s) - s = re.sub(r"[']+", "'", s) - - s = s.strip() - - # -like to -like protein - s = re.sub(like_pat, "-like protein", s) - - # 'repeat$' to 'repeat protein' - if re.search(repeat_pat, s): - s += "-containing protein" - - # 'binding$' to 'binding protein' - if re.search(binding_pat, s): - s += " protein" - if re.match(Protein_pat, s): - s = re.sub(Protein_pat, "", s) - - # 'domain$' to 'domain-containing protein' - if re.search(domain_pat, s): - s += "-containing protein" - if re.search(r"-domain", s): - s = re.sub(r"-domain", " domain", s) - if re.match(Protein_pat, s): - s = re.sub(Protein_pat, "", s) - - # 'related$' to '-like protein' - if re.search(related_pat, s): - s = re.sub(related_pat, "-like protein", s) - if re.match(Protein_pat, s) and not re.match(r"Protein kinase", s): - s = re.sub(Protein_pat, "", s) - - # '[0-9]+ homolog' to '-like protein' - if re.search(homolog_pat1, s): - s = re.sub(homolog_pat1, "-like protein", s) - if re.match(Protein_pat, s): - s = re.sub(Protein_pat, "", s) - - # 'Protein\s+(.*)\s+homolog' to '$1-like protein' - match = re.search(homolog_pat2, s) - if match and not re.match(r"Protein kinase", s): - ret = match.group(1) - s = re.sub(homolog_pat2, ret + "-like protein", s) - s = re.sub(r"^\s+", "", s) - s = s.capitalize() - - # 'homolog protein' to '-like protein' - # 'homologue$' to '-like protein' - # 'homolog$' to '-like protein' - for pat in (homolog_pat3, homolog_pat5, homolog_pat6): - if re.search(pat, s): - s = re.sub(pat, "-like protein", s) - - # 'Agenet domain-containing protein / bromo-adjacent homology (BAH) domain-containing protein' - # to 'Agenet and bromo-adjacent homology (BAH) domain-containing protein' - if re.search(agenet_pat, s): - s = re.sub(agenet_pat, "Agenet and ", s) - - # plural to singular - if re.search(plural_pat, s): - if (s.find("biogenesis") == -1 and s.find("Topors") == -1) or ( - not re.search(with_and_pat, s) - ): - s = re.sub(r"s$", "", s) - - # 'like_TBP' or 'likeTBP' to 'like TBP' - if re.search(tbp_pat, s): - s = re.sub(tbp_pat, "like TBP", s) - - # 'protein protein' to 'protein' - if re.search(prot_pat, s): - s = re.sub(prot_pat, "protein", s) - - # 'dimerisation' to 'dimerization' - if re.search(dimer_pat, s): - s = re.sub(dimer_pat, "dimerization", s) - - # Any AHRD that matches e.g. "AT5G54690-like protein" - # Any AHRD that contains the words '^Belongs|^Encoded|^Expression|^highly' - for pat in (atg_pat, athila_pat1): - if re.search(pat, s): - s = Unknown - - # remove 'arabidopsis[ thaliana]' and/or embedded Atg IDs - for pat in (atg_id_pat, athila_pat2, athila_pat3, athila_pat4): - # below is a hack since word boundaries don't work on / - s = s.strip() + " " - s = re.sub(pat, "", s) - - # remove "\s+LENGTH=\d+" from TAIR deflines - if re.search(length_pat, s): - s = re.sub(length_pat, "", s) - - # if name has a dot followed by a space (". ") in it and contains multiple - # parts separated by a comma, strip name starting from first occurrence of "," - if re.search(r"\. ", s): - if re.search(r",", s): - s = s.split(",")[0] - - # if name contains any of the disallowed words, - # remove word occurrence from name - # if name contains references to any other organism, trim name upto - # that occurrence - for pat in (disallow_pat, organism_pat): - if re.search(pat, s): - s = re.sub(pat, "", s) - - s = s.strip() - - if not ignore_sym_pat: - # 'homolog \d+' to '-like protein' - if re.search(homolog_pat4, s): - s = re.sub(homolog_pat4, "", s) - - # Trailing protein numeric copy (e.g. Myb 1) - if re.search(trail_pat, s): - s = re.sub(trail_pat, "", s) - - # if name is entirely a gene symbol-like (all capital letters, maybe followed by numbers) - # add a "-like protein" at the end - if (re.search(sym_pat, s) or re.search(lc_sym_pat, s)) and not re.search( - spada_pat, s - ): - s = s + "-like protein" - - # if gene symbol in parantheses at EOL, remove symbol - if re.search(eol_sym_pat, s): - s = re.sub(eol_sym_pat, "", s) - - # if name terminates at a symbol([^A-Za-z0-9_]), trim it off - if re.search(r"\W+$", s) and not re.search(r"\)$", s): - s = re.sub(r"\W+$", "", s) - - if "uncharacterized" in s: - s = "uncharacterized protein" - - # change sulfer to sulfur - if re.search(sulfer_pat, s): - s = re.sub(sulfer_pat, "sulfur", s) - - # change sulph to sulf - if re.search(sulph_pat, s): - s = re.sub(sulph_pat, "sulf", s) - - # change monoxy to monooxy - if re.search(monoxy_pat, s): - s = re.sub(monoxy_pat, "monooxy", s) - - # change proteine to protein - if re.search(proteine_pat, s): - s = re.sub(proteine_pat, "protein", s) - - # change signalling to signaling - if re.search(signalling_pat, s): - s = re.sub(signalling_pat, "signaling", s) - - # change aluminium to aluminum - if re.search(aluminium_pat, s): - s = re.sub(aluminium_pat, "aluminum", s) - - # change haem to heme - if re.search(haem_pat, s): - s = re.sub(haem_pat, "heme", s) - - # chage haemo to hemo - if re.search(haemo_pat, s): - s = re.sub(haemo_pat, "hemo", s) - - # change assessory to accessory - if re.search(assessory_pat, s): - s = re.sub(assessory_pat, "accessory", s) - - # change -ise/-ised/-isation to -ize/-ized/-ization - match = re.search(ise_pat, s) - if match: - ret = match.group(1) - if match.group(2): - suff = match.group(2) - s = re.sub(ise_pat, "{0}ize{1}".format(ret, suff), s) - else: - s = re.sub(ise_pat, "{0}ize".format(ret), s) - - match = re.search(isation_pat, s) - if match: - ret = match.group(1) - s = re.sub(isation_pat, "{0}ization".format(ret), s) - - # change -bre to -ber - match = re.search(bre_pat, s) - if match: - ret = match.group(1) - s = re.sub(bre_pat, "{0}ber".format(ret), s) - - if not s.startswith(Hypothetical): - # 'Candidate|Hypothetical|Novel|Predicted|Possible|Probable|Uncharacterized' to 'Putative' - if s.startswith("Uncharacterized") and any( - pat in s for pat in ("UCP", "UPF", "protein") - ): - pass - else: - if re.search(put_pat, s): - s = re.sub(put_pat, "Putative", s) - - sl = s.lower() - - # Any mention of `clone` or `contig` is not informative - if "clone" in sl or "contig" in sl: - s = Unknown - - # All that's left is `protein` is not informative - if sl in ("protein", "protein, putative", ""): - s = Unknown - - if Unknown.lower() in sl: - s = Unknown - - if "FUNCTIONS IN".lower() in sl and "unknown" in sl: - s = Unknown - - if "LOCATED IN".lower() in sl: - s = Unknown - - s = re.sub(r"[,]*\s+putative$", "", s) - - if s == Unknown or s.strip() == "protein": - s = Hypothetical - - # Compact all spaces - s = " ".join(s.split()) - - assert s.strip() - - return s - - -def fix(args): - """ - %prog fix ahrd.csv > ahrd.fixed.csv - - Fix ugly names from Uniprot. - """ - p = OptionParser(fix.__doc__) - p.add_argument( - "--ignore_sym_pat", - default=False, - action="store_true", - help="Do not fix names matching symbol patterns i.e." - + " names beginning or ending with gene symbols or a series of numbers." - + " e.g. `ARM repeat superfamily protein`, `beta-hexosaminidase 3`," - + " `CYCLIN A3;4`, `WALL ASSOCIATED KINASE (WAK)-LIKE 10`", - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - (csvfile,) = args - fp = open(csvfile) - fw = must_open(opts.outfile, "w") - for row in fp: - if row[0] == "#": - continue - if row.strip() == "": - continue - atoms = row.rstrip("\r\n").split("\t") - name, hit, ahrd_code, desc = ( - atoms[:4] if len(atoms) > 2 else (atoms[0], None, None, atoms[-1]) - ) - - newdesc = fix_text(desc, ignore_sym_pat=opts.ignore_sym_pat) - if hit and hit.strip() != "" and newdesc == Hypothetical: - newdesc = "conserved " + newdesc - print("\t".join(atoms[:4] + [newdesc] + atoms[4:]), file=fw) - - -def merge(args): - """ - %prog merge output/*.csv > ahrd.csv - - Merge AHRD results, remove redundant headers, empty lines, etc. If there are - multiple lines containing the same ID (first column). Then whatever comes - the first will get retained. - """ - p = OptionParser(merge.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - csvfiles = args - cf = csvfiles[0] - fp = open(cf) - for row in fp: - if row.startswith("Protein"): - break - header = row.rstrip() - print(header) - - seen = set() - for cf in csvfiles: - fp = open(cf) - for row in fp: - if row[0] == "#": - continue - if row.strip() == "": - continue - if row.strip() == header: - continue - - atoms = row.rstrip().split("\t") - id = atoms[0] - if id in seen: - logger.error("ID `%s` ignored.", id) - continue - - seen.add(id) - print(row.strip()) - - -def batch(args): - """ - %prog batch splits output - - The arguments are two folders. - Input FASTA sequences are in splits/. - Output csv files are in output/. - - Must have folders swissprot/, tair/, trembl/ that contains the respective - BLAST output. Once finished, you can run, for example: - - $ parallel java -Xmx2g -jar ~/code/AHRD/dist/ahrd.jar {} ::: output/*.yml - """ - p = OptionParser(batch.__doc__) - - ahrd_weights = {"blastp": [0.5, 0.3, 0.2], "blastx": [0.6, 0.4, 0.0]} - blast_progs = tuple(ahrd_weights.keys()) - - p.add_argument( - "--path", - default="~/code/AHRD/", - help="Path where AHRD is installed", - ) - p.add_argument( - "--blastprog", - default="blastp", - choices=blast_progs, - help="Specify the blast program being run. Based on this option," - + " the AHRD parameters (score_weights) will be modified", - ) - p.add_argument( - "--iprscan", - default=None, - help="Specify path to InterProScan results file if available." - + " If specified, the yml conf file will be modified" - + " appropriately", - ) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - splits, output = args - mkdir(output) - - bit_score, db_score, ovl_score = ahrd_weights[opts.blastprog] - - for f in glob("{0}/*.fa*".format(splits)): - fb = op.basename(f).rsplit(".", 1)[0] - fw = open(op.join(output, fb + ".yml"), "w") - - path = op.expanduser(opts.path) - dir = op.join(path, "test/resources") - outfile = op.join(output, fb + ".csv") - interpro = iprscanTemplate.format(opts.iprscan) if opts.iprscan else "" - - print( - Template.format( - dir, fb, f, outfile, bit_score, db_score, ovl_score, interpro - ), - file=fw, - ) - - if opts.iprscan: - if not op.lexists("interpro.xml"): - symlink(op.join(iprscan_datadir, "interpro.xml"), "interpro.xml") - - if not op.lexists("interpro.dtd"): - symlink(op.join(iprscan_datadir, "interpro.dtd"), "interpro.dtd") - - -if __name__ == "__main__": - main() diff --git a/jcvi/annotation/automaton.py b/jcvi/annotation/automaton.py deleted file mode 100644 index 98782db1..00000000 --- a/jcvi/annotation/automaton.py +++ /dev/null @@ -1,287 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Automate genome annotation by iterating processing a set of files, individually. -""" - -import os.path as op -import sys - -from functools import partial -from tempfile import mkdtemp - -from ..assembly.automaton import iter_project -from ..apps.grid import Jobs, MakeManager -from ..formats.base import FileMerger, split -from ..apps.base import ( - ActionDispatcher, - OptionParser, - cleanup, - iglob, - logger, - mkdir, - need_update, - sh, -) - - -def main(): - - actions = ( - ("augustus", "run parallel AUGUSTUS"), - ("cufflinks", "run cufflinks following tophat"), - ("star", "run star alignment"), - ("tophat", "run tophat on a list of inputs"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def augustuswrap(fastafile, species="maize", gff3=True, cfgfile=None, hintsfile=None): - cmd = "augustus {0}".format(fastafile) - if gff3: - cmd += " --gff3=on" - cmd += " --species={0}".format(species) - if cfgfile: - cmd += " --extrinsicCfgFile={0}".format(cfgfile) - if hintsfile: - cmd += " --alternatives-from-evidence=true" - cmd += " --hintsfile={0} --allow_hinted_splicesites=atac".format(hintsfile) - cmd += " --introns=on --genemodel=complete" - suffix = ".gff3" if gff3 else ".out" - outfile = fastafile.rsplit(".", 1)[0] + suffix - sh(cmd, outfile=outfile) - return outfile - - -def augustus(args): - """ - %prog augustus fastafile - - Run parallel AUGUSTUS. Final results can be reformatted using - annotation.reformat.augustus(). - """ - p = OptionParser(augustus.__doc__) - p.add_argument( - "--species", default="maize", help="Use species model for prediction" - ) - p.add_argument("--hintsfile", help="Hint-guided AUGUSTUS") - p.add_argument( - "--nogff3", default=False, action="store_true", help="Turn --gff3=off" - ) - p.set_home("augustus") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - cpus = opts.cpus - mhome = opts.augustus_home - gff3 = not opts.nogff3 - suffix = ".gff3" if gff3 else ".out" - cfgfile = op.join(mhome, "config/extrinsic/extrinsic.M.RM.E.W.cfg") - - outdir = mkdtemp(dir=".") - fs = split([fastafile, outdir, str(cpus)]) - - augustuswrap_params = partial( - augustuswrap, - species=opts.species, - gff3=gff3, - cfgfile=cfgfile, - hintsfile=opts.hintsfile, - ) - g = Jobs(augustuswrap_params, fs.names) - g.run() - - gff3files = [x.rsplit(".", 1)[0] + suffix for x in fs.names] - outfile = fastafile.rsplit(".", 1)[0] + suffix - FileMerger(gff3files, outfile=outfile).merge() - cleanup(outdir) - - if gff3: - from jcvi.annotation.reformat import augustus as reformat_augustus - - reformat_outfile = outfile.replace(".gff3", ".reformat.gff3") - reformat_augustus([outfile, "--outfile={0}".format(reformat_outfile)]) - - -def star(args): - """ - %prog star folder reference - - Run star on a folder with reads. - """ - p = OptionParser(star.__doc__) - p.add_argument( - "--single", default=False, action="store_true", help="Single end mapping" - ) - p.set_fastq_names() - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - cpus = opts.cpus - mm = MakeManager() - - num = 1 if opts.single else 2 - folder, reference = args - gd = "GenomeDir" - mkdir(gd) - STAR = "STAR --runThreadN {0} --genomeDir {1}".format(cpus, gd) - - # Step 0: build genome index - genomeidx = op.join(gd, "Genome") - if need_update(reference, genomeidx): - cmd = STAR + " --runMode genomeGenerate" - cmd += " --genomeFastaFiles {0}".format(reference) - mm.add(reference, genomeidx, cmd) - - # Step 1: align - for p, prefix in iter_project(folder, opts.names, num): - pf = "{0}_star".format(prefix) - bamfile = pf + "Aligned.sortedByCoord.out.bam" - cmd = STAR + " --readFilesIn {0}".format(" ".join(p)) - if p[0].endswith(".gz"): - cmd += " --readFilesCommand zcat" - cmd += " --outSAMtype BAM SortedByCoordinate" - cmd += " --outFileNamePrefix {0}".format(pf) - cmd += " --twopassMode Basic" - # Compatibility for cufflinks - cmd += " --outSAMstrandField intronMotif" - cmd += " --outFilterIntronMotifs RemoveNoncanonical" - mm.add(p, bamfile, cmd) - - mm.write() - - -def cufflinks(args): - """ - %prog cufflinks folder reference - - Run cufflinks on a folder containing tophat results. - """ - p = OptionParser(cufflinks.__doc__) - p.add_argument("--gtf", help="Reference annotation") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - folder, reference = args - cpus = opts.cpus - gtf = opts.gtf - transcripts = "transcripts.gtf" - - mm = MakeManager() - gtfs = [] - for bam in iglob(folder, "*.bam"): - pf = op.basename(bam).split(".")[0] - outdir = pf + "_cufflinks" - cmd = "cufflinks" - cmd += " -o {0}".format(outdir) - cmd += " -p {0}".format(cpus) - if gtf: - cmd += " -g {0}".format(gtf) - cmd += " --frag-bias-correct {0}".format(reference) - cmd += " --multi-read-correct" - cmd += " {0}".format(bam) - cgtf = op.join(outdir, transcripts) - mm.add(bam, cgtf, cmd) - gtfs.append(cgtf) - - assemblylist = "assembly_list.txt" - cmd = 'find . -name "{0}" > {1}'.format(transcripts, assemblylist) - mm.add(gtfs, assemblylist, cmd) - - mergedgtf = "merged/merged.gtf" - cmd = "cuffmerge" - cmd += " -o merged" - cmd += " -p {0}".format(cpus) - if gtf: - cmd += " -g {0}".format(gtf) - cmd += " -s {0}".format(reference) - cmd += " {0}".format(assemblylist) - mm.add(assemblylist, mergedgtf, cmd) - - mm.write() - - -def tophat(args): - """ - %prog tophat folder reference - - Run tophat on a folder of reads. - """ - from jcvi.apps.bowtie import check_index - from jcvi.formats.fastq import guessoffset - - p = OptionParser(tophat.__doc__) - p.add_argument("--gtf", help="Reference annotation") - p.add_argument( - "--single", default=False, action="store_true", help="Single end mapping" - ) - p.add_argument( - "--intron", - default=15000, - type=int, - help="Max intron size", - ) - p.add_argument( - "--dist", - default=-50, - type=int, - help="Mate inner distance", - ) - p.add_argument( - "--stdev", - default=50, - type=int, - help="Mate standard deviation", - ) - p.set_phred() - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - num = 1 if opts.single else 2 - folder, reference = args - reference = check_index(reference) - for p, prefix in iter_project(folder, n=num): - outdir = "{0}_tophat".format(prefix) - outfile = op.join(outdir, "accepted_hits.bam") - if op.exists(outfile): - logger.debug("File `%s` found. Skipping.", outfile) - continue - - cmd = "tophat -p {0}".format(opts.cpus) - if opts.gtf: - cmd += " -G {0}".format(opts.gtf) - cmd += " -o {0}".format(outdir) - - if num == 1: # Single-end - (a,) = p - else: # Paired-end - a, b = p - cmd += " --max-intron-length {0}".format(opts.intron) - cmd += " --mate-inner-dist {0}".format(opts.dist) - cmd += " --mate-std-dev {0}".format(opts.stdev) - - phred = opts.phred or str(guessoffset([a])) - if phred == "64": - cmd += " --phred64-quals" - cmd += " {0} {1}".format(reference, " ".join(p)) - - sh(cmd) - - -if __name__ == "__main__": - main() diff --git a/jcvi/annotation/depth.py b/jcvi/annotation/depth.py deleted file mode 100755 index 2fbd8971..00000000 --- a/jcvi/annotation/depth.py +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -From genomeCovergeBed results, initialize the count array, set cutoffs -and optimize against the truth, to determine the cutoff for incorporating -RNA-seq into annotation pipelines. -""" -import sys -import os.path as op - -from itertools import groupby - -import numpy as np - -from ..apps.base import ActionDispatcher, OptionParser, logger -from ..formats.base import BaseFile, must_open -from ..formats.sizes import Sizes - - -class BinFile(BaseFile): - """ - The binfile contains per base count, fastafile provides the coordinate - system. - """ - - def __init__(self, binfile, dtype=np.uint8): - super().__init__(binfile) - assert op.exists( - binfile - ), "Binary file `{0}` not found. Rerun depth.count().".format(binfile) - self.dtype = dtype - - @property - def array(self): - binfile = self.filename - return np.fromfile(binfile, dtype=self.dtype) - - @property - def mmarray(self): - binfile = self.filename - return np.memmap(binfile, dtype=self.dtype, mode="r") - - -def main(): - - actions = ( - ("count", "initialize the count array"), - ("query", "query the count array to get depth at particular site"), - ("merge", "merge several count arrays into one"), - ( - "bed", - "write bed files where the bases have at least certain depth", - ), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def bed(args): - """ - %prog bed binfile fastafile - - Write bed files where the bases have at least certain depth. - """ - p = OptionParser(bed.__doc__) - p.add_argument( - "-o", - dest="output", - default="stdout", - help="Output file name", - ) - p.add_argument( - "--cutoff", - dest="cutoff", - default=10, - type=int, - help="Minimum read depth to report intervals", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - binfile, fastafile = args - fw = must_open(opts.output, "w") - cutoff = opts.cutoff - assert cutoff >= 0, "Need non-negative cutoff" - - b = BinFile(binfile) - ar = b.array - - fastasize, sizes, offsets = get_offsets(fastafile) - s = Sizes(fastafile) - for ctg, ctglen in s.iter_sizes(): - offset = offsets[ctg] - subarray = ar[offset : offset + ctglen] - key = lambda x: x[1] >= cutoff - for tf, array_elements in groupby(enumerate(subarray), key=key): - array_elements = list(array_elements) - if not tf: - continue - - # 0-based system => 1-based system - start = array_elements[0][0] + 1 - end = array_elements[-1][0] + 1 - - mean_depth = sum([x[1] for x in array_elements]) / len(array_elements) - mean_depth = int(mean_depth) - - name = "na" - print( - "\t".join(str(x) for x in (ctg, start - 1, end, name, mean_depth)), - file=fw, - ) - - -def merge(args): - """ - %prog merge *.bin merged.bin - - Merge several count arrays into one. Overflows will be capped at uint8_max - (255). - """ - p = OptionParser(merge.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - binfiles = args[:-1] - mergedbin = args[-1] - if op.exists(mergedbin): - logger.error("`{0}` file exists. Remove before proceed.".format(mergedbin)) - return - - b = BinFile(binfiles[0]) - ar = b.mmarray - (fastasize,) = ar.shape - logger.debug("Initialize array of uint16 with size {0}".format(fastasize)) - - merged_ar = np.zeros(fastasize, dtype=np.uint16) - for binfile in binfiles: - b = BinFile(binfile) - merged_ar += b.array - - logger.debug("Resetting the count max to 255.") - merged_ar[merged_ar > 255] = 255 - - logger.debug("Compact array back to uint8 with size {0}".format(fastasize)) - merged_ar = np.array(merged_ar, dtype=np.uint8) - merged_ar.tofile(mergedbin) - logger.debug("Merged array written to `{0}`".format(mergedbin)) - - -def query(args): - """ - %prog query binfile fastafile ctgID baseID - - Get the depth at a particular base. - """ - p = OptionParser(query.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - binfile, fastafile, ctgID, baseID = args - b = BinFile(binfile) - ar = b.mmarray - - fastasize, sizes, offsets = get_offsets(fastafile) - oi = offsets[ctgID] + int(baseID) - 1 - print("\t".join((ctgID, baseID, str(ar[oi])))) - - -def update_array(ar, coveragefile, offsets): - fp = open(coveragefile) - logger.debug("Parse file `{0}`".format(coveragefile)) - for k, rows in groupby(fp, key=(lambda x: x.split()[0])): - rows = list(rows) - offset = offsets[k] - ctglen = len(rows) - - if ctglen < 100000: - sys.stdout.write(".") - else: - print(k, offset) - - # assert ctglen == sizes[k] - for i, row in enumerate(rows): - ctgID, baseID, count = row.split() - oi = offset + i - newcount = ar[oi] + int(count) - if newcount > 255: - newcount = 255 - - ar[oi] = newcount - - -def get_offsets(fastafile): - s = Sizes(fastafile) - fastasize = s.totalsize - sizes = s.mapping - offsets = s.cumsizes_mapping - return fastasize, sizes, offsets - - -def count(args): - """ - %prog count t.coveragePerBase fastafile - - Serialize the genomeCoverage results. The coordinate system of the count array - will be based on the fastafile. - """ - p = OptionParser(count.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - coveragefile, fastafile = args - - countsfile = coveragefile.split(".")[0] + ".bin" - if op.exists(countsfile): - logger.error("`{0}` file exists. Remove before proceed.".format(countsfile)) - return - - fastasize, sizes, offsets = get_offsets(fastafile) - logger.debug("Initialize array of uint8 with size {0}".format(fastasize)) - ar = np.zeros(fastasize, dtype=np.uint8) - - update_array(ar, coveragefile, offsets) - - ar.tofile(countsfile) - logger.debug("Array written to `{0}`".format(countsfile)) - - -if __name__ == "__main__": - main() diff --git a/jcvi/annotation/evm.py b/jcvi/annotation/evm.py deleted file mode 100644 index 6764470d..00000000 --- a/jcvi/annotation/evm.py +++ /dev/null @@ -1,268 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Wrapper for running series of EVM commands. There are two flavors of running -EVM - TIGR only mode which communicates with the Sybase db; evm mode which -communicates with GFF file. -""" -import os.path as op -import sys - -from collections import defaultdict - -from ..apps.base import ActionDispatcher, OptionParser, need_update, sh -from ..formats.base import write_file -from ..formats.fasta import ids - - -EVMRUN = r""" -W=`pwd`/weights.txt - -$EVM/EvmUtils/write_EVM_commands.pl --genome genome.fasta --weights $W \ - --gene_predictions {0} \ - --transcript_alignments {1} \ - --protein_alignments {2} \ - --terminalExons pasa.terminal_exons.gff3 \ - --output_file_name evm.out --partitions partitions_list.out > commands.list - -$EGC_SCRIPTS/run_cmds_on_grid.pl commands.list 0372 - -#$EVM/EvmUtils/execute_EVM_commands.pl commands.list -""" - -EVMLOAD = r""" -$EVM/EvmUtils/recombine_EVM_partial_outputs.pl \ - --partitions partitions_list.out \ - --output_file_name evm.out - -$EVM/TIGR-only/TIGR_EVM_loader.pl --db {0} \ - --partitions partitions_list.out \ - --output_file_name evm.out \ - --ev_type {1} - -#$EVM/EvmUtils/convert_EVM_outputs_to_GFF3.pl \ -# --partitions partitions_list.out \ -# --output evm.out -""" - - -def main(): - - actions = ( - ("pasa", "extract terminal exons"), - ("tigrprepare", "run EVM in TIGR-only mode"), - ("tigrload", "load EVM results into TIGR db"), - ("maker", "run EVM based on MAKER output"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def partition(evs): - partition_list = "partitions_list.out" - A, T, P = evs - if not need_update(evs, partition_list): - return - - cmd = "$EVM/EvmUtils/partition_EVM_inputs.pl --genome genome.fasta" - cmd += " --gene_predictions {0}".format(A) - cmd += " --transcript_alignments {0}".format(T) - cmd += " --protein_alignments {0}".format(P) - cmd += " --segmentSize 500000 --overlapSize 10000 " - cmd += " --partition_listing partitions_list.out" - - termexons = "pasa.terminal_exons.gff3" - if op.exists(termexons): - cmd += " --pasaTerminalExons {0}".format(termexons) - - sh(cmd) - - -def maker(args): - """ - %prog maker maker.gff3 genome.fasta - - Prepare EVM inputs by separating tracks from MAKER. - """ - from jcvi.formats.base import SetFile - from jcvi.apps.base import cleanup - - A, T, P = "ABINITIO_PREDICTION", "TRANSCRIPT", "PROTEIN" - # Stores default weights and types - Registry = { - "maker": (A, 5), - "augustus_masked": (A, 1), - "snap_masked": (A, 1), - "genemark": (A, 1), - "est2genome": (T, 5), - "est_gff": (T, 5), - "protein2genome": (P, 5), - "blastx": (P, 1), - } - - p = OptionParser(maker.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gffile, fastafile = args - - types = "type.ids" - if need_update(gffile, types): - cmd = "cut -f2 -s {0} | sort -u".format(gffile) - sh(cmd, outfile=types) - - types = SetFile(types) - reg = defaultdict(list) - weightsfile = "weights.txt" - contents = [] - for s in types: - rs = s.split(":")[0] - if rs not in Registry: - continue - - type, weight = Registry[rs] - reg[type].append(s) - contents.append("\t".join(str(x) for x in (type, s, weight))) - - contents = "\n".join(sorted(contents)) - write_file(weightsfile, contents) - - evs = [x + ".gff" for x in (A, T, P)] - cleanup(evs) - - for type, tracks in reg.items(): - for t in tracks: - cmd = "grep '\t{0}' {1} | grep -v '_match\t' >> {2}.gff".format( - t, gffile, type - ) - sh(cmd) - - partition(evs) - runfile = "run.sh" - contents = EVMRUN.format(*evs) - write_file(runfile, contents) - - -def tigrload(args): - """ - %prog tigrload db ev_type - - Load EVM results into TIGR db. Actually, just write a load.sh script. The - ev_type should be set, e.g. "EVM1", "EVM2", etc. - """ - p = OptionParser(tigrload.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - db, ev_type = args - - runfile = "load.sh" - contents = EVMLOAD.format(db, ev_type) - write_file(runfile, contents) - - -def pasa(args): - """ - %prog pasa pasa_db fastafile - - Run EVM in TIGR-only mode. - """ - p = OptionParser(pasa.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - pasa_db, fastafile = args - - termexons = "pasa.terminal_exons.gff3" - if need_update(fastafile, termexons): - cmd = "$ANNOT_DEVEL/PASA2/scripts/pasa_asmbls_to_training_set.dbi" - cmd += ' -M "{0}:mysql.tigr.org" -p "access:access"'.format(pasa_db) - cmd += " -g {0}".format(fastafile) - sh(cmd) - - cmd = "$EVM/PasaUtils/retrieve_terminal_CDS_exons.pl" - cmd += " trainingSetCandidates.fasta trainingSetCandidates.gff" - sh(cmd, outfile=termexons) - - return termexons - - -def fix_transcript(): - # Fix `transcript_alignments.gff3` - transcript = "transcript_alignments.gff3" - fixedtranscript = "transcript_alignments.fixed.gff3" - if need_update(transcript, fixedtranscript): - fp = open(transcript) - fw = open(fixedtranscript, "w") - stack = "" - for row in fp: - row = row.rstrip() - goodline = len(row.split()) == 9 - if goodline: - if stack: - print(stack, file=fw) - stack = row - else: - print(stack + row, file=fw) - stack = "" - - fw.close() - - return fixedtranscript - - -def tigrprepare(args): - """ - %prog tigrprepare asmbl.fasta asmbl.ids db pasa.terminal_exons.gff3 - - Run EVM in TIGR-only mode. - """ - p = OptionParser(tigrprepare.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - fastafile, asmbl_id, db, pasa_db = args - if asmbl_id == "all": - idsfile = fastafile + ".ids" - if need_update(fastafile, idsfile): - ids([fastafile, "-o", idsfile]) - else: - idsfile = asmbl_id - - oneid = next(open(idsfile)).strip() - - weightsfile = "weights.txt" - if need_update(idsfile, weightsfile): - cmd = "$EVM/TIGR-only/create_sample_weights_file.dbi" - cmd += " {0} {1} | tee weights.txt".format(db, oneid) - sh(cmd) - - evs = [ - "gene_predictions.gff3", - "transcript_alignments.gff3", - "protein_alignments.gff3", - ] - if need_update(weightsfile, evs): - cmd = "$EVM/TIGR-only/write_GFF3_files.dbi" - cmd += " --db {0} --asmbl_id {1} --weights {2}".format(db, idsfile, weightsfile) - sh(cmd) - - evs[1] = fix_transcript() - - partition(evs) - runfile = "run.sh" - contents = EVMRUN.format(*evs) - write_file(runfile, contents) - - -if __name__ == "__main__": - main() diff --git a/jcvi/annotation/maker.py b/jcvi/annotation/maker.py deleted file mode 100644 index bdb7c139..00000000 --- a/jcvi/annotation/maker.py +++ /dev/null @@ -1,537 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Utility script for annotations based on MAKER. - -Many of the routines in this script is to select among a set of conflicting -models, either through accuracy (batcheval) or simply the length (longest). -""" - -import os -import os.path as op -import sys - -from collections import Counter, defaultdict - -from ..apps.grid import GridProcess, get_grid_engine, PBS_STANZA -from ..apps.base import ( - ActionDispatcher, - OptionParser, - logger, - need_update, - popen, - sh, - mkdir, - glob, - get_abs_path, -) -from ..formats.base import BaseFile, LineFile, write_file - - -class CTLine(object): - def __init__(self, row): - row = row.strip() - tag = value = real = comment = "" - if "#" in row: - real, comment = row.split("#", 1) - if "=" in real: - tag, value = real.split("=", 1) - - self.tag = tag.strip() - self.value = value.strip() - self.comment = comment.strip() - - def __str__(self): - tag = self.tag - value = self.value - comment = self.comment - - s = "=".join(str(x) for x in (tag, value)) if tag else "" - if s: - if comment: - s += " # " + comment - else: - if comment: - s += "# " + comment - return s - - -class CTLFile(LineFile): - def __init__(self, filename): - super().__init__(filename) - fp = open(filename) - for row in fp: - self.append(CTLine(row)) - fp.close() - - def update_abs_path(self): - for r in self: - path = r.value - if path and op.exists(path): - npath = get_abs_path(path) - logger.debug("{0}={1} => {2}".format(r.tag, path, npath)) - r.value = npath - - def update_tag(self, key, value): - for r in self: - if r.tag == key: - logger.debug("{0}={1} => {2}".format(r.tag, r.value, value)) - r.value = value - break - - def write_file(self, filename): - fw = open(filename, "w") - for r in self: - print(r, file=fw) - fw.close() - logger.debug("File written to `%s`.", filename) - - -class DatastoreIndexFile(BaseFile): - def __init__(self, filename): - super().__init__(filename) - scaffold_status = {} - failed = [] - - fp = open(filename) - for row in fp: - scaffold, dir, status = row.strip().split("\t") - scaffold_status[scaffold] = status - for scaffold, status in scaffold_status.items(): - if status != "FINISHED": - failed.append(scaffold) - - self.scaffold_status = scaffold_status - self.failed = failed - - -def main(): - - actions = ( - ("parallel", "partition the genome into parts and run separately"), - ("merge", "generate the gff files after parallel"), - ("validate", "validate after MAKER run to check for failures"), - ("datastore", "generate a list of gff filenames to merge"), - ("split", "split MAKER models by checking against evidences"), - ("batcheval", "calls bed.evaluate() in batch"), - ("longest", "pick the longest model per group"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -arraysh = """ -DIR=`awk "NR==$SGE_TASK_ID" {0}` -cd $DIR -{1} --ignore_nfs_tmp""" - -arraysh_ua = ( - PBS_STANZA - + """ -cd $PBS_O_WORKDIR -DIR=`awk "NR==$PBS_ARRAY_INDEX" {2}` -cd $DIR -{3} --ignore_nfs_tmp > ../maker.$PBS_ARRAY_INDEX.out 2>&1 -""" -) - - -def parallel(args): - """ - %prog parallel genome.fasta N - - Partition the genome into parts and run separately. This is useful if MAKER - is to be run on the grid. - """ - from jcvi.formats.base import split - - p = OptionParser(parallel.__doc__) - p.set_home("maker") - p.set_tmpdir(tmpdir="tmp") - p.set_grid_opts(array=True) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - genome, NN = args - threaded = opts.threaded or 1 - tmpdir = opts.tmpdir - - mkdir(tmpdir) - tmpdir = get_abs_path(tmpdir) - - N = int(NN) - assert 1 <= N < 1000, "Required: 1 < N < 1000!" - - outdir = "outdir" - fs = split([genome, outdir, NN]) - - c = CTLFile("maker_opts.ctl") - c.update_abs_path() - if threaded > 1: - c.update_tag("cpus", threaded) - - cwd = os.getcwd() - dirs = [] - for name in fs.names: - fn = get_abs_path(name) - bn = op.basename(name) - dirs.append(bn) - c.update_tag("genome", fn) - mkdir(bn) - sh("cp *.ctl {0}".format(bn)) - - os.chdir(bn) - c.write_file("maker_opts.ctl") - os.chdir(cwd) - - jobs = "jobs" - fw = open(jobs, "w") - print("\n".join(dirs), file=fw) - fw.close() - - # Submit to grid - ncmds = len(dirs) - runfile = "array.sh" - cmd = op.join(opts.maker_home, "bin/maker") - if tmpdir: - cmd += " -TMP {0}".format(tmpdir) - - engine = get_grid_engine() - contents = ( - arraysh.format(jobs, cmd) - if engine == "SGE" - else arraysh_ua.format(N, threaded, jobs, cmd) - ) - write_file(runfile, contents) - - if engine == "PBS": - return - - # qsub script - outfile = r"maker.\$TASK_ID.out" - p = GridProcess( - runfile, outfile=outfile, errfile=outfile, arr=ncmds, grid_opts=opts - ) - qsubfile = "qsub.sh" - qsub = p.build() - write_file(qsubfile, qsub) - - -mergesh = """ -BASE=$1 -cd $1{0}/$1.maker.output -{1} -n -d $1_master_datastore_index.log -mv $1.all.gff ../../ -""" - - -def get_fsnames(outdir): - fnames = glob(op.join(outdir, "*.fa*")) - suffix = "." + fnames[0].split(".")[-1] - fsnames = [op.basename(x).rsplit(".", 1)[0] for x in fnames] - - return fsnames, suffix - - -def merge(args): - """ - %prog merge outdir output.gff - - Follow-up command after grid jobs are completed after parallel(). - """ - from jcvi.formats.gff import merge as gmerge - - p = OptionParser(merge.__doc__) - p.set_home("maker") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - outdir, outputgff = args - fsnames, suffix = get_fsnames(outdir) - nfs = len(fsnames) - cmd = op.join(opts.maker_home, "bin/gff3_merge") - - outfile = "merge.sh" - write_file(outfile, mergesh.format(suffix, cmd)) - - # Generate per split directory - # Note that gff3_merge write to /tmp, so I limit processes here to avoid - # filling up disk space - sh("parallel -j 8 merge.sh {} ::: " + " ".join(fsnames)) - - # One final output - gffnames = glob("*.all.gff") - assert len(gffnames) == nfs - - # Again, DO NOT USE gff3_merge to merge with a smallish /tmp/ area - gfflist = "gfflist" - fw = open(gfflist, "w") - print("\n".join(gffnames), file=fw) - fw.close() - - nlines = sum(1 for _ in open(gfflist)) - assert nlines == nfs # Be extra, extra careful to include all results - gmerge([gfflist, "-o", outputgff]) - logger.debug("Merged GFF file written to `{0}`".format(outputgff)) - - -def validate(args): - """ - %prog validate outdir genome.fasta - - Validate current folder after MAKER run and check for failures. Failed batch - will be written to a directory for additional work. - """ - p = OptionParser(validate.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - outdir, genome = args - counter = Counter() - - fsnames, suffix = get_fsnames(outdir) - dsfile = "{0}{1}/{0}.maker.output/{0}_master_datastore_index.log" - dslogs = [dsfile.format(x, suffix) for x in fsnames] - all_failed = [] - for f, d in zip(fsnames, dslogs): - dslog = DatastoreIndexFile(d) - counter.update(dslog.scaffold_status.values()) - all_failed.extend([(f, x) for x in dslog.failed]) - - cmd = 'tail maker.*.out | grep -c "now finished"' - n = int(popen(cmd).read()) - assert len(fsnames) == n - print("ALL jobs have been finished", file=sys.stderr) - - nfailed = len(all_failed) - if nfailed == 0: - print("ALL scaffolds are completed with no errors", file=sys.stderr) - return - - print("Scaffold status:", file=sys.stderr) - print(counter, file=sys.stderr) - failed = "FAILED" - fw = open(failed, "w") - print("\n".join(["\t".join((f, x)) for f, x in all_failed]), file=fw) - fw.close() - - nlines = sum(1 for _ in open("FAILED")) - assert nlines == nfailed - print("FAILED !! {0} instances.".format(nfailed), file=sys.stderr) - - # Rebuild the failed batch - failed_ids = failed + ".ids" - failed_fasta = failed + ".fasta" - cmd = "cut -f2 {0}".format(failed) - sh(cmd, outfile=failed_ids) - if need_update((genome, failed_ids), failed_fasta): - cmd = "faSomeRecords {} {} {}".format(genome, failed_ids, failed_fasta) - sh(cmd) - - -def batcheval(args): - """ - %prog batcheval model.ids gff_file evidences.bed fastafile - - Get the accuracy for a list of models against evidences in the range of the - genes. For example: - - $ %prog batcheval all.gff3 isoforms.ids proteins.bed scaffolds.fasta - - Outfile contains the scores for the models can be found in models.scores - """ - from jcvi.formats.bed import evaluate - from jcvi.formats.gff import make_index - - p = OptionParser(evaluate.__doc__) - p.add_argument( - "--type", - default="CDS", - help="list of features to extract, use comma to separate (e.g." - "'five_prime_UTR,CDS,three_prime_UTR')", - ) - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - model_ids, gff_file, evidences_bed, fastafile = args - type = set(opts.type.split(",")) - - g = make_index(gff_file) - fp = open(model_ids) - prefix = model_ids.rsplit(".", 1)[0] - fwscores = open(prefix + ".scores", "w") - - for row in fp: - cid = row.strip() - b = next(g.parents(cid, 1)) - query = "{0}:{1}-{2}".format(b.chrom, b.start, b.stop) - children = [c for c in g.children(cid, 1)] - - cidbed = prefix + ".bed" - fw = open(cidbed, "w") - for c in children: - if c.featuretype not in type: - continue - - fw.write(c.to_bed()) - - fw.close() - - b = evaluate([cidbed, evidences_bed, fastafile, "--query={0}".format(query)]) - print("\t".join((cid, b.score)), file=fwscores) - fwscores.flush() - - -def get_bed_file(gff_file, stype, key): - - from jcvi.formats.gff import bed - - opr = stype.replace(",", "") + ".bed" - bed_opts = ["--type=" + stype, "--key=" + key] - bed_file = ".".join((gff_file.split(".")[0], opr)) - - if need_update(gff_file, bed_file): - bed([gff_file, "--outfile={0}".format(bed_file)] + bed_opts) - - return bed_file - - -def get_splits(split_bed, gff_file, stype, key): - """ - Use intersectBed to find the fused gene => split genes mappings. - """ - bed_file = get_bed_file(gff_file, stype, key) - cmd = "intersectBed -a {0} -b {1} -wao".format(split_bed, bed_file) - cmd += " | cut -f4,10" - p = popen(cmd) - splits = defaultdict(set) - for row in p: - a, b = row.split() - splits[a].add(b) - - return splits - - -def get_accuracy(query, gff_file, evidences_bed, sizesfile, type, key): - """ - Get sensitivity, specificity and accuracy given gff_file, and a query range - that look like "chr1:1-10000". - """ - from jcvi.formats.bed import evaluate - - bed_file = get_bed_file(gff_file, type, key) - b = evaluate([bed_file, evidences_bed, sizesfile, "--query={0}".format(query)]) - - return b - - -def split(args): - """ - %prog split split.bed evidences.bed predictor1.gff predictor2.gff fastafile - - Split MAKER models by checking against predictors (such as AUGUSTUS and - FGENESH). For each region covered by a working model. Find out the - combination of predictors that gives the best accuracy against evidences - (such as PASA). - - `split.bed` can be generated by pulling out subset from a list of ids - $ python -m jcvi.formats.base join split.ids working.bed - --column=0,3 --noheader | cut -f2-7 > split.bed - """ - from jcvi.formats.bed import Bed - - p = OptionParser(split.__doc__) - p.add_argument( - "--key", - default="Name", - help="Key in the attributes to extract predictor.gff", - ) - p.add_argument( - "--parents", - default="match", - help="list of features to extract, use comma to separate (e.g.'gene,mRNA')", - ) - p.add_argument( - "--children", - default="match_part", - help="list of features to extract, use comma to separate (e.g." - "'five_prime_UTR,CDS,three_prime_UTR')", - ) - opts, args = p.parse_args(args) - - if len(args) != 5: - sys.exit(not p.print_help()) - - split_bed, evidences_bed, p1_gff, p2_gff, fastafile = args - parents = opts.parents - children = opts.children - key = opts.key - - bed = Bed(split_bed) - - s1 = get_splits(split_bed, p1_gff, parents, key) - s2 = get_splits(split_bed, p2_gff, parents, key) - - for b in bed: - query = "{0}:{1}-{2}".format(b.seqid, b.start, b.end) - b1 = get_accuracy(query, p1_gff, evidences_bed, fastafile, children, key) - b2 = get_accuracy(query, p2_gff, evidences_bed, fastafile, children, key) - accn = b.accn - c1 = "|".join(s1[accn]) - c2 = "|".join(s2[accn]) - ac1 = b1.accuracy - ac2 = b2.accuracy - tag = p1_gff if ac1 >= ac2 else p2_gff - tag = tag.split(".")[0] - - ac1 = "{0:.3f}".format(ac1) - ac2 = "{0:.3f}".format(ac2) - - print("\t".join((accn, tag, ac1, ac2, c1, c2))) - - -def datastore(args): - """ - %prog datastore datastore.log > gfflist.log - - Generate a list of gff filenames to merge. The `datastore.log` file can be - generated by something like: - - $ find - /usr/local/scratch/htang/EVM_test/gannotation/maker/1132350111853_default/i1/ - -maxdepth 4 -name "*datastore*.log" > datastore.log - """ - p = OptionParser(datastore.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (ds,) = args - fp = open(ds) - for row in fp: - fn = row.strip() - assert op.exists(fn) - pp, logfile = op.split(fn) - flog = open(fn) - for inner_row in flog: - ctg, folder, status = inner_row.split() - if status != "FINISHED": - continue - - gff_file = op.join(pp, folder, ctg + ".gff") - assert op.exists(gff_file) - print(gff_file) - - -if __name__ == "__main__": - main() diff --git a/jcvi/annotation/pasa.py b/jcvi/annotation/pasa.py deleted file mode 100644 index 34c6358d..00000000 --- a/jcvi/annotation/pasa.py +++ /dev/null @@ -1,595 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Utilities for submitting PASA jobs and processing PASA results. -""" -import os -import os.path as op -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger, sh, symlink, which -from ..formats.base import write_file, must_open, FileMerger - - -alignAssembly_conf = """ -# MySQL settings -MYSQLDB={0} - -#script validate_alignments_in_db.dbi -validate_alignments_in_db.dbi:--MIN_PERCENT_ALIGNED={1} -validate_alignments_in_db.dbi:--MIN_AVG_PER_ID={2} -validate_alignments_in_db.dbi:--NUM_BP_PERFECT_SPLICE_BOUNDARY={3} - -#script subcluster_builder.dbi -subcluster_builder.dbi:-m=50 -""" - -annotCompare_conf = """ -# MySQL settings -MYSQLDB={0} - -#script cDNA_annotation_comparer.dbi -cDNA_annotation_comparer.dbi:--MIN_PERCENT_OVERLAP={1} -cDNA_annotation_comparer.dbi:--MIN_PERCENT_PROT_CODING={2} -cDNA_annotation_comparer.dbi:--MIN_PERID_PROT_COMPARE={3} -cDNA_annotation_comparer.dbi:--MIN_PERCENT_LENGTH_FL_COMPARE={4} -cDNA_annotation_comparer.dbi:--MIN_PERCENT_LENGTH_NONFL_COMPARE={5} -cDNA_annotation_comparer.dbi:--MIN_FL_ORF_SIZE={6} -cDNA_annotation_comparer.dbi:--MIN_PERCENT_ALIGN_LENGTH={7} -cDNA_annotation_comparer.dbi:--MIN_PERCENT_OVERLAP_GENE_REPLACE={8} -cDNA_annotation_comparer.dbi:--STOMP_HIGH_PERCENTAGE_OVERLAPPING_GENE={9} -cDNA_annotation_comparer.dbi:--TRUST_FL_STATUS={10} -cDNA_annotation_comparer.dbi:--MAX_UTR_EXONS={11} -""" - -annotation = "annotation.gff3" -tdn, flaccs = "tdn.accs", "FL_accs.txt" -tfasta, gfasta = "transcripts.fasta", "genome.fasta" -aaconf, acconf = "alignAssembly.conf", "annotCompare.conf" -ALLOWED_ALIGNERS = ("blat", "gmap") - - -def main(): - - actions = ( - ("assemble", "run pasa alignment assembly pipeline"), - ("compare", "run pasa annotation comparison pipeline"), - ("longest", "label longest transcript per gene as full-length"), - ( - "consolidate", - "generate consolidated annotation set from 2 or more annot compare results", - ), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def assemble(args): - """ - %prog assemble pasa_db_name genome.fasta transcripts-dn.fasta [transcript-gg.fasta] - - Run the PASA alignment assembly pipeline - - If two transcript fasta files (Trinity denovo and genome guided) are provided - and the `--compreh` param is enabled, the PASA Comprehensive Transcriptome DB - protocol is followed - - Using the `--prepare` option creates a shell script with the run commands without - executing the pipeline - """ - p = OptionParser(assemble.__doc__) - p.set_pasa_opts() - p.add_argument( - "--prepare", - default=False, - action="store_true", - help="Prepare PASA run script with commands", - ) - p.set_grid() - p.set_grid_opts() - opts, args = p.parse_args(args) - - if len(args) not in (3, 4): - sys.exit(not p.print_help()) - - ( - pasa_db, - genome, - dnfasta, - ) = args[:3] - ggfasta = args[3] if len(args) == 4 else None - - PASA_HOME = opts.pasa_home - if not op.isdir(PASA_HOME): - logger.error("PASA_HOME={0} directory does not exist".format(PASA_HOME)) - sys.exit() - - aligners = opts.aligners.split(",") - for aligner in aligners: - if aligner not in ALLOWED_ALIGNERS: - logger.error("Error: Unknown aligner `{0}`".format(aligner)) - logger.error( - "Can be any of {0}, ".format("|".join(ALLOWED_ALIGNERS)) - + "combine multiple aligners in list separated by comma" - ) - sys.exit() - - clean = opts.clean - seqclean = op.join(opts.tgi_home, "seqclean") - - accn_extract = which(op.join(PASA_HOME, "misc_utilities", "accession_extractor.pl")) - launch_pasa = which(op.join(PASA_HOME, "scripts", "Launch_PASA_pipeline.pl")) - build_compreh_trans = which( - op.join(PASA_HOME, "scripts", "build_comprehensive_transcriptome.dbi") - ) - - fl_accs = opts.fl_accs - cpus = opts.cpus - grid = opts.grid - prepare, runfile = opts.prepare, "run.sh" - pctcov, pctid = opts.pctcov, opts.pctid - compreh_pctid = opts.compreh_pctid - compreh_pctcov, bpsplice = opts.compreh_pctcov, opts.bpsplice - - cmds = [] - - # set PASAHOME env variable if preparing shell script - if prepare: - env_cmd = 'export PASAHOME="{0}"'.format(PASA_HOME) - cmds.append(env_cmd) - - if ggfasta: - transcripts = FileMerger([dnfasta, ggfasta], tfasta).merge() - accn_extract_cmd = "cat {0} | {1} > {2}".format(dnfasta, accn_extract, tdn) - cmds.append(accn_extract_cmd) - if not prepare: - sh(accn_extract_cmd) - else: - symlink(dnfasta, tfasta) - transcripts = tfasta - - if opts.grid and not opts.threaded: - opts.threaded = opts.cpus - - prjobid = None - if clean: - ccpus = 16 if cpus >= 16 else cpus - cleancmd = "{0} {1} -c {2} -l 60".format(seqclean, transcripts, ccpus) - if prepare: - cmds.append(cleancmd) - else: - prjobid = sh(cleancmd, grid=grid, grid_opts=opts) - - aafw = must_open(aaconf, "w") - print( - alignAssembly_conf.format("{0}_pasa".format(pasa_db), pctcov, pctid, bpsplice), - file=aafw, - ) - aafw.close() - - symlink(genome, gfasta) - - aacmd = "{0} -c {1} -C -R -g {2}".format(launch_pasa, aaconf, gfasta) - aacmd += ( - " -t {0}.clean -T -u {0}".format(transcripts) - if clean - else " -t {0}".format(transcripts) - ) - if fl_accs: - symlink(fl_accs, flaccs) - aacmd += " -f {0}".format(flaccs) - if ggfasta: - aacmd += " --TDN {0}".format(tdn) - aacmd += " --ALIGNERS {0} -I {1} --CPU {2}".format( - ",".join(aligners), opts.intron, cpus - ) - - if prepare: - cmds.append(aacmd) - else: - opts.hold_jid = prjobid - prjobid = sh(aacmd, grid=grid, grid_opts=opts) - - if opts.compreh and ggfasta: - comprehcmd = "{0} -c {1} -t {2}".format( - build_compreh_trans, aaconf, transcripts - ) - comprehcmd += " --min_per_ID {0} --min_per_aligned {1}".format( - compreh_pctid, compreh_pctcov - ) - - if prepare: - cmds.append(comprehcmd) - else: - opts.hold_jid = prjobid - prjobid = sh(comprehcmd, grid=grid, grid_opts=opts) - - if prepare: - write_file(runfile, "\n".join(cmds)) # initialize run script - - -def compare(args): - """ - %prog compare pasa_db_name [--annots_gff3=annotation.gff3] - - Run the PASA annotation comparison pipeline - - This assumes that PASA alignment assembly has alredy been completed and - run directory contains `genome.fasta` and `transcript.fasta` files. - - If `--annots_gff3` is specified, the PASA database is loaded with the annotations - first before starting annotation comparison. Otherwise, it uses previously - loaded annotation data. - - Using the `--prepare` option creates a shell script with the run commands without - executing the pipeline - """ - p = OptionParser(compare.__doc__) - p.set_pasa_opts(action="compare") - p.add_argument( - "--prepare", - default=False, - action="store_true", - help="Prepare PASA run script with commands", - ) - p.set_grid() - p.set_grid_opts() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - (pasa_db,) = args - - PASA_HOME = opts.pasa_home - if not op.isdir(PASA_HOME): - logger.error("PASA_HOME={0} directory does not exist".format(PASA_HOME)) - sys.exit() - - launch_pasa = which(op.join(PASA_HOME, "scripts", "Launch_PASA_pipeline.pl")) - - annots_gff3 = opts.annots_gff3 - grid = opts.grid - prepare, runfile = opts.prepare, "run.sh" - - os.chdir(pasa_db) - - if prepare: - write_file(runfile, "", append=True, skipcheck=True) # initialize run script - - acfw = must_open(acconf, "w") - print( - annotCompare_conf.format( - "{0}_pasa".format(pasa_db), - opts.pctovl, - opts.pct_coding, - opts.pctid_prot, - opts.pctlen_FL, - opts.pctlen_nonFL, - opts.orf_size, - opts.pct_aln, - opts.pctovl_gene, - opts.stompovl, - opts.trust_FL, - opts.utr_exons, - ), - file=acfw, - ) - acfw.close() - - if not op.exists(gfasta): - sys.exit("Genome fasta file `{0}` does not exist".format(gfasta)) - - transcripts = tfasta - if not op.exists(transcripts): - sys.exit("Transcript fasta file `{0}` does not exist".format(transcripts)) - - if op.exists("{0}.clean".format(transcripts)): - transcripts = "{0}.clean".format(transcripts) - - accmd = "{0} -c {1} -A -g {2} -t {3} --GENETIC_CODE {4}".format( - launch_pasa, acconf, gfasta, transcripts, opts.genetic_code - ) - - if annots_gff3: - if not op.exists(annots_gff3): - sys.exit("Annotation gff3 file `{0}` does not exist".format(annots_gff3)) - symlink(annots_gff3, annotation) - accmd += " -L --annots_gff3 {0}".format(annotation) - - if prepare: - write_file(runfile, accmd, append=True) - else: - sh(accmd, grid=grid, grid_opts=opts) - - -def longest(args): - """ - %prog longest pasa.fasta output.subclusters.out - - Find the longest PASA assembly and label it as full-length. Also removes - transcripts shorter than half the length of the longest, or shorter than - 200bp. The assemblies for the same locus is found in - `output.subclusters.out`. In particular the lines that look like: - - sub-cluster: asmbl_25 asmbl_26 asmbl_27 - """ - from jcvi.formats.fasta import Fasta, SeqIO - from jcvi.formats.sizes import Sizes - - p = OptionParser(longest.__doc__) - p.add_argument( - "--prefix", - default="pasa", - help="Replace asmbl_ with prefix", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, subclusters = args - prefix = fastafile.rsplit(".", 1)[0] - - idsfile = prefix + ".fl.ids" - fw = open(idsfile, "w") - sizes = Sizes(fastafile).mapping - - name_convert = lambda x: x.replace("asmbl", opts.prefix) - - keep = set() # List of IDs to write - fp = open(subclusters) - nrecs = 0 - for row in fp: - if not row.startswith("sub-cluster:"): - continue - asmbls = row.split()[1:] - longest_asmbl = max(asmbls, key=lambda x: sizes[x]) - longest_size = sizes[longest_asmbl] - print(name_convert(longest_asmbl), file=fw) - nrecs += 1 - cutoff = max(longest_size / 2, 200) - keep.update(set(x for x in asmbls if sizes[x] >= cutoff)) - - fw.close() - logger.debug("{0} fl-cDNA records written to `{1}`.".format(nrecs, idsfile)) - - f = Fasta(fastafile, lazy=True) - newfastafile = prefix + ".clean.fasta" - fw = open(newfastafile, "w") - nrecs = 0 - for name, rec in f.iteritems_ordered(): - if name not in keep: - continue - - rec.id = name_convert(name) - rec.description = "" - SeqIO.write([rec], fw, "fasta") - nrecs += 1 - - fw.close() - logger.debug("{0} valid records written to `{1}`.".format(nrecs, newfastafile)) - - -def consolidate(args): - """ - %prog consolidate gffile1 gffile2 ... > consolidated.out - - Given 2 or more gff files generated by pasa annotation comparison, - iterate through each locus (shared locus name or overlapping CDS) - and identify same/different isoforms (shared splicing structure) - across the input datasets. - - If `slop` is enabled, consolidation will collapse any variation - in terminal UTR lengths, keeping the longest as representative. - """ - from jcvi.formats.base import longest_unique_prefix - from jcvi.formats.gff import make_index, match_subfeats - from jcvi.utils.cbook import AutoVivification - from jcvi.utils.grouper import Grouper - from itertools import combinations, product - - supported_modes = ["name", "coords"] - p = OptionParser(consolidate.__doc__) - p.add_argument( - "--slop", - default=False, - action="store_true", - help="allow minor variation in terminal 5'/3' UTR start/stop position", - ) - p.add_argument( - "--inferUTR", - default=False, - action="store_true", - help="infer presence of UTRs from exon coordinates", - ) - p.add_argument( - "--mode", - default="name", - choices=supported_modes, - help="method used to determine overlapping loci", - ) - p.add_argument( - "--summary", - default=False, - action="store_true", - help="Generate summary table of consolidation process", - ) - p.add_argument( - "--clusters", - default=False, - action="store_true", - help="Generate table of cluster members after consolidation", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - slop = opts.slop - inferUTR = opts.inferUTR - mode = opts.mode - - if len(args) < 2: - sys.exit(not p.print_help()) - - gffdbx = {} - for gffile in args: - dbn = longest_unique_prefix(gffile, args) - gffdbx[dbn] = make_index(gffile) - - loci = Grouper() - for dbn in gffdbx: - odbns = [odbn for odbn in gffdbx if dbn != odbn] - for gene in gffdbx[dbn].features_of_type("gene", order_by=("seqid", "start")): - if mode == "name": - loci.join(gene.id, (gene.id, dbn)) - else: - if (gene.id, dbn) not in loci: - loci.join((gene.id, dbn)) - gene_cds = list( - gffdbx[dbn].children(gene, featuretype="CDS", order_by="start") - ) - gene_cds_start, gene_cds_stop = gene_cds[0].start, gene_cds[-1].stop - for odbn in odbns: - for ogene_cds in gffdbx[odbn].region( - seqid=gene.seqid, - start=gene_cds_start, - end=gene_cds_stop, - strand=gene.strand, - featuretype="CDS", - ): - for ogene in gffdbx[odbn].parents( - ogene_cds, featuretype="gene" - ): - loci.join((gene.id, dbn), (ogene.id, odbn)) - - gfeats = {} - mrna = AutoVivification() - for i, locus in enumerate(loci): - gene = "gene_{0:0{pad}}".format(i, pad=6) if mode == "coords" else None - - for elem in locus: - if type(elem) == tuple: - _gene, dbn = elem - if gene is None: - gene = _gene - - g = gffdbx[dbn][_gene] - if gene not in gfeats: - gfeats[gene] = g - gfeats[gene].attributes["ID"] = [gene] - else: - if g.start < gfeats[gene].start: - gfeats[gene].start = g.start - if g.stop > gfeats[gene].stop: - gfeats[gene].stop = g.stop - - c = list( - gffdbx[dbn].children(_gene, featuretype="mRNA", order_by="start") - ) - if len(c) > 0: - mrna[gene][dbn] = c - - fw = must_open(opts.outfile, "w") - print("##gff-version 3", file=fw) - seen = {} - if opts.summary: - summaryfile = "{0}.summary.txt".format(opts.outfile.rsplit(".")[0]) - sfw = must_open(summaryfile, "w") - summary = ["id"] - summary.extend(gffdbx.keys()) - print("\t".join(str(x) for x in summary), file=sfw) - if opts.clusters: - clustersfile = "{0}.clusters.txt".format(opts.outfile.rsplit(".")[0]) - cfw = must_open(clustersfile, "w") - clusters = ["id", "dbns", "members", "trlens"] - print("\t".join(str(x) for x in clusters), file=cfw) - for gene in mrna: - g = Grouper() - dbns = list(combinations(mrna[gene], 2)) - if len(dbns) > 0: - for dbn1, dbn2 in dbns: - dbx1, dbx2 = gffdbx[dbn1], gffdbx[dbn2] - for mrna1, mrna2 in product(mrna[gene][dbn1], mrna[gene][dbn2]): - mrna1s, mrna2s = ( - mrna1.stop - mrna1.start + 1, - mrna2.stop - mrna2.start + 1, - ) - g.join((dbn1, mrna1.id, mrna1s)) - g.join((dbn2, mrna2.id, mrna2s)) - - if match_subfeats(mrna1, mrna2, dbx1, dbx2, featuretype="CDS"): - res = [] - ftypes = ( - ["exon"] - if inferUTR - else ["five_prime_UTR", "three_prime_UTR"] - ) - for ftype in ftypes: - res.append( - match_subfeats( - mrna1, - mrna2, - dbx1, - dbx2, - featuretype=ftype, - slop=slop, - ) - ) - - if all(res): - g.join((dbn1, mrna1.id, mrna1s), (dbn2, mrna2.id, mrna2s)) - else: - for dbn1 in mrna[gene]: - for mrna1 in mrna[gene][dbn1]: - g.join((dbn1, mrna1.id, mrna1.stop - mrna1.start + 1)) - - print(gfeats[gene], file=fw) - - for group in g: - group.sort(key=lambda x: x[2], reverse=True) - dbs, mrnas = [el[0] for el in group], [el[1] for el in group] - d, m = dbs[0], mrnas[0] - - dbid, _mrnaid = "|".join(str(x) for x in set(dbs)), [] - for x in mrnas: - if x not in _mrnaid: - _mrnaid.append(x) - mrnaid = "{0}|{1}".format(dbid, "-".join(_mrnaid)) - if mrnaid not in seen: - seen[mrnaid] = 0 - else: - seen[mrnaid] += 1 - mrnaid = "{0}-{1}".format(mrnaid, seen[mrnaid]) - - _mrna = gffdbx[d][m] - _mrna.attributes["ID"] = [mrnaid] - _mrna.attributes["Parent"] = [gene] - children = gffdbx[d].children(m, order_by="start") - print(_mrna, file=fw) - for child in children: - child.attributes["ID"] = ["{0}|{1}".format(dbid, child.id)] - child.attributes["Parent"] = [mrnaid] - print(child, file=fw) - - if opts.summary: - summary = [mrnaid] - summary.extend(["Y" if db in set(dbs) else "N" for db in gffdbx]) - print("\t".join(str(x) for x in summary), file=sfw) - - if opts.clusters: - clusters = [mrnaid] - clusters.append(",".join(str(el[0]) for el in group)) - clusters.append(",".join(str(el[1]) for el in group)) - clusters.append(",".join(str(el[2]) for el in group)) - print("\t".join(str(x) for x in clusters), file=cfw) - - fw.close() - if opts.summary: - sfw.close() - if opts.clusters: - cfw.close() - - -if __name__ == "__main__": - main() diff --git a/jcvi/annotation/qc.py b/jcvi/annotation/qc.py deleted file mode 100644 index 72d8b1d1..00000000 --- a/jcvi/annotation/qc.py +++ /dev/null @@ -1,376 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Run quality control (QC) on gene annotation. MAKER output was used during -testing. Several aspects of annotation QC are implemented in this script. - -- Trim UTRs. MAKER sometimes predict UTRs that extend into other genes. -- Remove overlapping models. -""" -import sys - -from ..apps.base import ActionDispatcher, OptionParser -from ..formats.gff import ( - Gff, - get_piles, - make_index, - import_feats, - populate_children, - to_range, -) -from ..formats.base import must_open -from ..formats.sizes import Sizes -from ..utils.range import range_chain, range_minmax, range_overlap - - -def main(): - - actions = ( - ("trimUTR", "remove UTRs in the annotation set"), - ("uniq", "remove overlapping gene models"), - ("nmd", "identify transcript variant candidates for nonsense-mediated decay"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def uniq(args): - """ - %prog uniq gffile cdsfasta - - Remove overlapping gene models. Similar to formats.gff.uniq(), overlapping - 'piles' are processed, one by one. - - Here, we use a different algorithm, that retains the best non-overlapping - subset witin each pile, rather than single best model. Scoring function is - also different, rather than based on score or span, we optimize for the - subset that show the best combined score. Score is defined by: - - score = (1 - AED) * length - """ - - p = OptionParser(uniq.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gffile, cdsfasta = args - gff = Gff(gffile) - sizes = Sizes(cdsfasta).mapping - gene_register = {} - for g in gff: - if g.type != "mRNA": - continue - aed = float(g.attributes["_AED"][0]) - gene_register[g.parent] = (1 - aed) * sizes[g.accn] - - allgenes = import_feats(gffile) - g = get_piles(allgenes) - - bestids = set() - for group in g: - ranges = [to_range(x, score=gene_register[x.accn], id=x.accn) for x in group] - selected_chain, score = range_chain(ranges) - bestids |= set(x.id for x in selected_chain) - - removed = set(x.accn for x in allgenes) - bestids - fw = open("removed.ids", "w") - print("\n".join(sorted(removed)), file=fw) - fw.close() - populate_children(opts.outfile, bestids, gffile, "gene") - - -def get_cds_minmax(g, cid, level=2): - cds = [x for x in g.children(cid, level) if x.featuretype == "CDS"] - cdsranges = [(x.start, x.end) for x in cds] - return range_minmax(cdsranges) - - -def trim(c, start, end, trim5=False, trim3=False, both=True): - cstart, cend = c.start, c.end - # Trim coordinates for feature c based on overlap to start and end - if ((trim5 or both) and c.strand == "+") or ((trim3 or both) and c.strand == "-"): - c.start = max(cstart, start) - if ((trim3 or both) and c.strand == "+") or ((trim5 or both) and c.strand == "-"): - c.end = min(cend, end) - - if c.start != cstart or c.end != cend: - print( - c.id, - "trimmed [{0}, {1}] => [{2}, {3}]".format(cstart, cend, c.start, c.end), - file=sys.stderr, - ) - else: - print(c.id, "no change", file=sys.stderr) - - -def reinstate(c, rc, trim5=False, trim3=False, both=True): - cstart, cend = c.start, c.end - # reinstate coordinates for feature `c` based on reference feature `refc` - if ((trim5 or both) and c.strand == "+") or ((trim3 or both) and c.strand == "-"): - c.start = rc.start - if ((trim3 or both) and c.strand == "+") or ((trim5 or both) and c.strand == "-"): - c.end = rc.end - - if c.start != cstart or c.end != cend: - print( - c.id, - "reinstated [{0}, {1}] => [{2}, {3}]".format(cstart, cend, c.start, c.end), - file=sys.stderr, - ) - else: - print(c.id, "no change", file=sys.stderr) - - -def cmp_children(cid, gff, refgff, cftype="CDS"): - start, end = get_cds_minmax(gff, cid, level=1) - rstart, rend = get_cds_minmax(refgff, cid, level=1) - return ( - ((start == rstart) and (end == rend)) - and ( - len(list(gff.children(cid, featuretype=cftype))) - == len(list(refgff.children(cid, featuretype=cftype))) - ) - and ( - gff.children_bp(cid, child_featuretype=cftype) - == refgff.children_bp(cid, child_featuretype=cftype) - ) - ) - - -def fprint(c, fw): - if c.start > c.end: - print(c.id, "destroyed [{0} > {1}]".format(c.start, c.end), file=sys.stderr) - else: - print(c, file=fw) - - -def trimUTR(args): - """ - %prog trimUTR gffile - - Remove UTRs in the annotation set. - - If reference GFF3 is provided, reinstate UTRs from reference - transcripts after trimming. - - Note: After running trimUTR, it is advised to also run - `python -m jcvi.formats.gff fixboundaries` on the resultant GFF3 - to adjust the boundaries of all parent 'gene' features - """ - import gffutils - from jcvi.formats.base import SetFile - - p = OptionParser(trimUTR.__doc__) - p.add_argument( - "--trim5", - default=None, - type=str, - help="File containing gene list for 5' UTR trimming", - ) - p.add_argument( - "--trim3", - default=None, - type=str, - help="File containing gene list for 3' UTR trimming", - ) - p.add_argument( - "--trimrange", - default=None, - type=str, - help="File containing gene list for UTR trim back" - + "based on suggested (start, stop) coordinate range", - ) - p.add_argument( - "--refgff", - default=None, - type=str, - help="Reference GFF3 used as fallback to replace UTRs", - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - gff = make_index(gffile) - - trim_both = False if (opts.trim5 or opts.trim3) else True - trim5 = SetFile(opts.trim5) if opts.trim5 else set() - trim3 = SetFile(opts.trim3) if opts.trim3 else set() - trimrange = dict() - if opts.trimrange: - trf = must_open(opts.trimrange) - for tr in trf: - assert ( - len(tr.split("\t")) == 3 - ), "Must specify (start, stop) coordinate range" - id, start, stop = tr.split("\t") - trimrange[id] = (int(start), int(stop)) - trf.close() - - refgff = make_index(opts.refgff) if opts.refgff else None - - fw = must_open(opts.outfile, "w") - for feat in gff.iter_by_parent_childs( - featuretype="gene", order_by=("seqid", "start"), level=1 - ): - for c in feat: - cid, ctype, cparent = ( - c.id, - c.featuretype, - c.attributes.get("Parent", [None])[0], - ) - t5, t3 = False, False - if ctype == "gene": - t5 = True if cid in trim5 else False - t3 = True if cid in trim3 else False - start, end = get_cds_minmax(gff, cid) - trim(c, start, end, trim5=t5, trim3=t3, both=trim_both) - fprint(c, fw) - elif ctype == "mRNA": - utr_types, extras = [], set() - if any(id in trim5 for id in (cid, cparent)): - t5 = True - trim5.add(cid) - if any(id in trim3 for id in (cid, cparent)): - t3 = True - trim3.add(cid) - refc = None - if refgff: - try: - refc = refgff[cid] - refctype = refc.featuretype - refptype = refgff[refc.attributes["Parent"][0]].featuretype - if refctype == "mRNA" and refptype == "gene": - if cmp_children(cid, gff, refgff, cftype="CDS"): - reinstate(c, refc, trim5=t5, trim3=t3, both=trim_both) - if t5: - utr_types.append("five_prime_UTR") - if t3: - utr_types.append("three_prime_UTR") - for utr_type in utr_types: - for utr in refgff.children( - refc, featuretype=utr_type - ): - extras.add(utr) - for exon in refgff.region( - region=utr, featuretype="exon" - ): - if exon.attributes["Parent"][0] == cid: - extras.add(exon) - else: - refc = None - except gffutils.exceptions.FeatureNotFoundError: - pass - start, end = get_cds_minmax(gff, cid, level=1) - if cid in trimrange: - start, end = range_minmax([trimrange[cid], (start, end)]) - if not refc: - trim(c, start, end, trim5=t5, trim3=t3, both=trim_both) - fprint(c, fw) - for cc in gff.children(cid, order_by="start"): - _ctype = cc.featuretype - if _ctype not in utr_types: - if _ctype != "CDS": - if _ctype == "exon": - eskip = [ - range_overlap(to_range(cc), to_range(x)) - for x in extras - if x.featuretype == "exon" - ] - if any(eskip): - continue - trim(cc, start, end, trim5=t5, trim3=t3, both=trim_both) - fprint(cc, fw) - else: - fprint(cc, fw) - for x in extras: - fprint(x, fw) - fw.close() - - -def nmd(args): - """ - %prog nmd gffile - - Identify transcript variants which might be candidates for nonsense - mediated decay (NMD) - - A transcript is considered to be a candidate for NMD when the CDS stop - codon is located more than 50nt upstream of terminal splice site donor - - References: - http://www.nature.com/horizon/rna/highlights/figures/s2_spec1_f3.html - http://www.biomedcentral.com/1741-7007/7/23/figure/F1 - """ - from jcvi.utils.cbook import enumerate_reversed - - p = OptionParser(nmd.__doc__) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - gff = make_index(gffile) - - fw = must_open(opts.outfile, "w") - for gene in gff.features_of_type("gene", order_by=("seqid", "start")): - _enumerate = enumerate if gene.strand == "-" else enumerate_reversed - for mrna in gff.children(gene, featuretype="mRNA", order_by="start"): - tracker = dict() - tracker["exon"] = list( - gff.children(mrna, featuretype="exon", order_by="start") - ) - tracker["cds"] = [None] * len(tracker["exon"]) - - tcds_pos = None - for i, exon in _enumerate(tracker["exon"]): - for cds in gff.region( - region=exon, featuretype="CDS", completely_within=True - ): - if mrna.id in cds["Parent"]: - tracker["cds"][i] = cds - tcds_pos = i - break - if tcds_pos: - break - - NMD, distance = False, 0 - if (mrna.strand == "+" and tcds_pos + 1 < len(tracker["exon"])) or ( - mrna.strand == "-" and tcds_pos - 1 >= 0 - ): - tcds = tracker["cds"][tcds_pos] - texon = tracker["exon"][tcds_pos] - - PTC = tcds.end if mrna.strand == "+" else tcds.start - TDSS = texon.end if mrna.strand == "+" else texon.start - distance = abs(TDSS - PTC) - NMD = True if distance > 50 else False - - print( - "\t".join( - str(x) - for x in ( - gene.id, - mrna.id, - gff.children_bp(mrna, child_featuretype="CDS"), - distance, - NMD, - ) - ), - file=fw, - ) - - fw.close() - - -if __name__ == "__main__": - main() diff --git a/jcvi/annotation/reformat.py b/jcvi/annotation/reformat.py deleted file mode 100644 index b7139a98..00000000 --- a/jcvi/annotation/reformat.py +++ /dev/null @@ -1,1398 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Convert common output files from gene prediction software into gff3 format. - -Similar to the utilities in DAWGPAWS. - -""" -import os -import sys -import re - -from collections import defaultdict -from itertools import groupby, product - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - cleanup, - logger, - need_update, - popen, - sh, -) -from ..utils.cbook import AutoVivification -from ..utils.grouper import Grouper -from ..formats.bed import Bed, BedLine, sort -from ..formats.base import SetFile, flexible_cast, get_number, must_open - - -FRAME, RETAIN, OVERLAP, NEW = "FRAME", "RETAIN", "OVERLAP", "NEW" -PRIORITY = (FRAME, RETAIN, OVERLAP, NEW) - -new_id_pat = re.compile(r"^\d+\.[cemtx]+\S+") -atg_name_pat = re.compile( - r""" - ^(?P - (?:(?P\w+[\D\d\D])\.?)(?P[\d|C|M]+)(?P[A-z]+)(?P\d+) - ) - \.?(?P\d+)? - """, - re.VERBOSE, -) - - -class Stride(object): - """ - Allows four basic strides and three extended strides: - __. - 0 10 | - 0 5 10 | basic set of strides - 0 3 7 10 | - 0 2 5 8 10 __| - 0 2 4 6 8 10 | - 0 1 3 5 7 9 10 | extended set of strides - 01 23 45 67 89 10 __| - - We have main parameters, # we need, # available go through all possible - numbers excluding everything in black. - """ - - def __init__(self, needed, available, extended=False): - configurations = ("0", "05", "037", "0258") - if extended: - configurations += ("02468", "013579", "0123456789") - nneeded = len(needed) - self.conf = None - self.available = None - for c in configurations: - a = [x for x in available if str(x)[-1] in c] - if len(a) >= nneeded: - self.conf = c - self.available = a - break - - -class NameRegister(object): - def __init__(self, prefix="Medtr", pad0=6, uc=False): - self.black = set() - self.gaps = [] - self.prefix = prefix - self.pad0 = pad0 - self.uc = uc - - def get_blacklist(self, filename): - black = SetFile(filename) - for x in black: - chr, rank = atg_name(x) - self.black.add((chr, rank)) - - def get_gaps(self, filename): - self.gapfile = filename - - def allocate(self, info, chr, start_id, end_id, id_table, extended_stride=False): - - start_bp = info[0].start - end_bp = info[-1].end - - current_chr = chr_number(chr) - needed = info - assert end_id > start_id, "end ({0}) > start ({1})".format(end_id, start_id) - - spots = end_id - start_id - 1 - available = [ - x for x in range(start_id + 1, end_id) if (current_chr, x) not in self.black - ] - - message = "{0} need {1} ids, has {2} spots ({3} available)".format( - chr, len(needed), spots, len(available) - ) - - start_gene = gene_name( - current_chr, start_id, prefix=self.prefix, pad0=self.pad0, uc=self.uc - ) - end_gene = gene_name( - current_chr, end_id, prefix=self.prefix, pad0=self.pad0, uc=self.uc - ) - message += " between {0} - {1}\n".format(start_gene, end_gene) - - assert end_bp > start_bp - - b = "\t".join(str(x) for x in (chr, start_bp - 1, end_bp)) - cmd = "echo '{0}' |".format(b) - cmd += " intersectBed -a {0} -b stdin".format(self.gapfile) - gaps = list(BedLine(x) for x in popen(cmd, debug=False)) - ngaps = len(gaps) - - gapsexpanded = [] - GeneDensity = 10000.0 # assume 10Kb per gene - for gap in gaps: - gap_bp = int(gap.score) - gap_ids = int(round(gap_bp / GeneDensity)) - gapsexpanded += [gap] * gap_ids - - lines = sorted(info + gapsexpanded, key=lambda x: x.start) - - message += "between bp: {0} - {1}, there are {2} gaps (total {3} ids)".format( - start_bp, end_bp, ngaps, len(lines) - ) - - needed = lines - stride = Stride(needed, available, extended=extended_stride) - conf = stride.conf - message += " stride: {0}".format(conf) - print(message, file=sys.stderr) - - nneeded = len(needed) - if conf is None: # prefix rule - prepend version number for spills - magic = 400000 # version 4 - firstdigit = 100000 - step = 10 # stride for the prefixed ids - rank = start_id + magic - if rank > magic + firstdigit: - rank -= firstdigit - available = [] - while len(available) != nneeded: - rank += step - if (current_chr, rank) in self.black: # avoid blacklisted ids - continue - available.append(rank) - - else: # follow the best stride - available = stride.available - if start_id == 0: # follow right flank at start of chr - available = available[-nneeded:] - else: # follow left flank otherwise - available = available[:nneeded] - - # Finally assign the ids - assert len(needed) == len(available) - for b, rank in zip(needed, available): - name = gene_name( - current_chr, rank, prefix=self.prefix, pad0=self.pad0, uc=self.uc - ) - print("\t".join((str(b), name)), file=sys.stderr) - id_table[b.accn] = name - self.black.add((current_chr, rank)) - print(file=sys.stderr) - - -def main(): - - actions = ( - ("rename", "rename genes for annotation release"), - # perform following actions on list files - ("reindex", "reindex isoforms per gene locus"), - ("publocus", "create pub_locus identifiers according to GenBank specs"), - # Medicago gene renumbering - ("annotate", "annotation new bed file with features from old"), - ("renumber", "renumber genes for annotation updates"), - ("instantiate", "instantiate NEW genes tagged by renumber"), - ("plot", "plot gene identifiers along certain chromosome"), - # External gene prediction programs - ("augustus", "convert augustus output into gff3"), - ("tRNAscan", "convert tRNAscan-SE output into gff3"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def plot(args): - """ - %prog plot tagged.new.bed chr1 - - Plot gene identifiers along a particular chromosome, often to illustrate the - gene id assignment procedure. - """ - from jcvi.graphics.base import plt, savefig - from jcvi.graphics.chromosome import ChromosomeMap - - p = OptionParser(plot.__doc__) - p.add_argument("--firstn", type=int, help="Only plot the first N genes") - p.add_argument("--ymax", type=int, help="Y-axis max value") - p.add_argument("--log", action="store_true", help="Write plotting data") - opts, args, iopts = p.set_image_options(args, figsize="6x4") - - if len(args) != 2: - sys.exit(not p.print_help()) - - taggedbed, chr = args - bed = Bed(taggedbed) - beds = list(bed.sub_bed(chr)) - old, new = [], [] - i = 0 - for b in beds: - accn = b.extra[0] - if "te" in accn: - continue - - accn, tag = accn.split("|") - if tag == "OVERLAP": - continue - - c, r = atg_name(accn) - if tag == "NEW": - new.append((i, r)) - else: - old.append((i, r)) - i += 1 - - ngenes = i - assert ngenes == len(new) + len(old) - - logger.debug("Imported {0} ranks on {1}.".format(ngenes, chr)) - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - xstart, xend = 0.2, 0.8 - ystart, yend = 0.2, 0.8 - pad = 0.02 - - ngenes = opts.firstn or ngenes - ymax = opts.ymax or 500000 - - title = "Assignment of Medtr identifiers" - if opts.ymax: - subtitle = "{0}, first {1} genes".format(chr, ngenes) - else: - subtitle = "{0}, {1} genes ({2} new)".format(chr, ngenes, len(new)) - - chr_map = ChromosomeMap( - fig, root, xstart, xend, ystart, yend, pad, 0, ymax, 5, title, subtitle - ) - - ax = chr_map.axes - - if opts.log: - from jcvi.utils.table import write_csv - - header = ["x", "y"] - write_csv(header, new, filename=chr + ".new") - write_csv(header, old, filename=chr + ".old") - - x, y = zip(*new) - ax.plot(x, y, "b,") - x, y = zip(*old) - ax.plot(x, y, "r,") - - # Legends - ymid = (ystart + yend) / 2 - y = ymid + pad - root.plot([0.2], [y], "r.", lw=2) - root.text(0.2 + pad, y, "Existing Medtr ids", va="center", size=10) - y = ymid - pad - root.plot([0.2], [y], "b.", lw=2) - root.text(0.2 + pad, y, "Newly instantiated ids", va="center", size=10) - - ax.set_xlim(0, ngenes) - ax.set_ylim(0, ymax) - ax.set_axis_off() - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - image_name = chr + ".identifiers." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def instantiate(args): - """ - %prog instantiate tagged.bed blacklist.ids big_gaps.bed - - instantiate NEW genes tagged by renumber. - """ - p = OptionParser(instantiate.__doc__) - p.set_annot_reformat_opts() - p.add_argument( - "--extended_stride", - default=False, - action="store_true", - help="Toggle extended strides for gene numbering", - ) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - taggedbed, blacklist, gapsbed = args - r = NameRegister(prefix=opts.prefix, pad0=opts.pad0, uc=opts.uc) - r.get_blacklist(blacklist) - r.get_gaps(gapsbed) - - # Run through the bed, identify stretch of NEW ids to instantiate, - # identify the flanking FRAMEs, interpolate! - bed = Bed(taggedbed) - outputbed = taggedbed.rsplit(".", 1)[0] + ".new.bed" - fw = open(outputbed, "w") - - tagkey = lambda x: x.rsplit("|", 1)[-1] - for chr, sbed in bed.sub_beds(): - current_chr = chr_number(chr) - if not current_chr: - continue - - sbed = list(sbed) - - ranks = [] - for i, s in enumerate(sbed): - nametag = s.extra[0] - tag = tagkey(nametag) - - if tag in (NEW, FRAME): - ranks.append((i, nametag)) - - blocks = [] - for tag, names in groupby(ranks, key=lambda x: tagkey(x[-1])): - names = list(names) - if tag == NEW: - blocks.append((tag, [sbed[x[0]] for x in names])) - else: - start, end = names[0][-1], names[-1][-1] - start, end = ( - atg_name(start, retval="rank"), - atg_name(end, retval="rank"), - ) - blocks.append((tag, [start, end])) - - id_table = {} # old to new name conversion - for i, (tag, info) in enumerate(blocks): - if tag != NEW: - continue - - start_id = 0 if i == 0 else blocks[i - 1][1][-1] - end_id = start_id + 10000 if i == len(blocks) - 1 else blocks[i + 1][1][0] - - r.allocate( - info, - chr, - start_id, - end_id, - id_table, - extended_stride=opts.extended_stride, - ) - - # Output new names - for i, s in enumerate(sbed): - nametag = s.extra[0] - name, tag = nametag.split("|") - - if tag == NEW: - assert name == "." - name = id_table[s.accn] - elif tag == OVERLAP: - if name in id_table: - name = id_table[name] - - s.extra[0] = "|".join((name, tag)) - print(s, file=fw) - - fw.close() - - -def atg_name(name, retval="chr,rank", trimpad0=True): - seps = ["g", "te", "trna", "s", "u", "nc"] - pad0s = ["rank"] - - if name is not None: - m = re.match(atg_name_pat, name) - if m is not None and m.group("sep").lower() in seps: - retvals = [] - for grp in retval.split(","): - if grp == "chr": - val = chr_number(m.group(grp)) - else: - val = ( - get_number(m.group(grp)) - if trimpad0 and grp in pad0s - else m.group(grp) - ) - retvals.append(val) - - return (x for x in retvals) if len(retvals) > 1 else retvals[0] - - return (None for _ in retval.split(",")) - - -def gene_name(current_chr, x, prefix="Medtr", sep="g", pad0=6, uc=False): - identifier = "{0}{1}{2}{3:0{4}}".format(prefix, current_chr, sep, x, pad0) - if uc: - identifier = identifier.upper() - return identifier - - -def chr_number(chr): - chr_pat = re.compile( - r"(?P\D*)(?P[\d|C|M]+)$", re.VERBOSE | re.IGNORECASE - ) - - if chr is not None: - m = re.match(chr_pat, chr) - if m is not None: - return flexible_cast(m.group("chr")) - - return None - - -def prepare(bedfile): - """ - Remove prepended tags in gene names. - """ - pf = bedfile.rsplit(".", 1)[0] - abedfile = pf + ".a.bed" - bbedfile = pf + ".b.bed" - fwa = open(abedfile, "w") - fwb = open(bbedfile, "w") - - bed = Bed(bedfile) - seen = set() - for b in bed: - accns = b.accn.split(";") - new_accns = [] - for accn in accns: - if ":" in accn: - method, a = accn.split(":", 1) - if method in ("liftOver", "GMAP", ""): - accn = a - if accn in seen: - logger.error("Duplicate id {0} found. Ignored.".format(accn)) - continue - - new_accns.append(accn) - b.accn = accn - print(b, file=fwa) - seen.add(accn) - - b.accn = ";".join(new_accns) - print(b, file=fwb) - fwa.close() - fwb.close() - - -def renumber(args): - """ - %prog renumber Mt35.consolidated.bed > tagged.bed - - Renumber genes for annotation updates. - """ - from jcvi.algorithms.lis import longest_increasing_subsequence - - p = OptionParser(renumber.__doc__) - p.set_annot_reformat_opts() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - - pf = bedfile.rsplit(".", 1)[0] - abedfile = pf + ".a.bed" - bbedfile = pf + ".b.bed" - if need_update(bedfile, (abedfile, bbedfile)): - prepare(bedfile) - - mbed = Bed(bbedfile) - g = Grouper() - for s in mbed: - accn = s.accn - g.join(*accn.split(";")) - - bed = Bed(abedfile) - for chr, sbed in bed.sub_beds(): - current_chr = chr_number(chr) - if not current_chr: - continue - - ranks = [] - gg = set() - for s in sbed: - accn = s.accn - achr, arank = atg_name(accn) - if achr != current_chr: - continue - ranks.append(arank) - gg.add(accn) - - lranks = longest_increasing_subsequence(ranks) - print( - current_chr, - len(sbed), - "==>", - len(ranks), - "==>", - len(lranks), - file=sys.stderr, - ) - - granks = set( - gene_name(current_chr, x, prefix=opts.prefix, pad0=opts.pad0, uc=opts.uc) - for x in lranks - ) | set( - gene_name( - current_chr, x, prefix=opts.prefix, pad0=opts.pad0, sep="te", uc=opts.uc - ) - for x in lranks - ) - - tagstore = {} - for s in sbed: - achr, arank = atg_name(s.accn) - accn = s.accn - if accn in granks: - tag = (accn, FRAME) - elif accn in gg: - tag = (accn, RETAIN) - else: - tag = (".", NEW) - - tagstore[accn] = tag - - # Find cases where genes overlap - for s in sbed: - accn = s.accn - gaccn = g[accn] - tags = [((tagstore[x][-1] if x in tagstore else NEW), x) for x in gaccn] - group = [(PRIORITY.index(tag), x) for tag, x in tags] - best = min(group)[-1] - - if accn != best: - tag = (best, OVERLAP) - else: - tag = tagstore[accn] - - print("\t".join((str(s), "|".join(tag)))) - - -def annotate(args): - r""" - %prog annotate new.bed old.bed 2> log - - Annotate the `new.bed` with features from `old.bed` for the purpose of - gene numbering. - - Ambiguity in ID assignment can be resolved by either of the following 2 methods: - - `alignment`: make use of global sequence alignment score (calculated by `needle`) - - `overlap`: make use of overlap length (calculated by `intersectBed`) - - Transfer over as many identifiers as possible while following guidelines: - http://www.arabidopsis.org/portals/nomenclature/guidelines.jsp#editing - - Note: Following RegExp pattern describes the structure of the identifier - assigned to features in the `new.bed` file. - - new_id_pat = re.compile(r"^\d+\.[cemtx]+\S+") - - Examples: 23231.m312389, 23231.t004898, 23231.tRNA.144 - Adjust the value of `new_id_pat` manually as per your ID naming conventions. - """ - valid_resolve_choices = ["alignment", "overlap"] - - p = OptionParser(annotate.__doc__) - p.add_argument( - "--resolve", - default="alignment", - choices=valid_resolve_choices, - help="Resolve ID assignment based on a certain metric", - ) - p.add_argument( - "--atg_name", - default=False, - action="store_true", - help="Specify is locus IDs in `new.bed` file follow ATG nomenclature", - ) - - g1 = p.add_argument_group( - "Optional parameters (alignment):\n" - + "Use if resolving ambiguities based on sequence `alignment`", - ) - g1.add_argument( - "--pid", - dest="pid", - default=35.0, - type=float, - help="Percent identity cutoff", - ) - g1.add_argument( - "--score", - dest="score", - default=250.0, - type=float, - help="Alignment score cutoff", - ) - - g2 = p.add_argument_group( - "Optional parameters (overlap):\n" - + "Use if resolving ambiguities based on `overlap` length\n" - + "Parameters equivalent to `intersectBed`", - ) - g2.add_argument( - "-f", - dest="f", - default=0.5, - type=float, - help="Minimum overlap fraction (0.0 - 1.0)", - ) - g2.add_argument( - "-r", - dest="r", - default=False, - action="store_true", - help="Require fraction overlap to be reciprocal", - ) - g2.add_argument( - "-s", - dest="s", - default=True, - action="store_true", - help="Require same strandedness", - ) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - nbedfile, obedfile = args - npf, opf = nbedfile.rsplit(".", 1)[0], obedfile.rsplit(".", 1)[0] - - # Make consolidated.bed - cbedfile = "consolidated.bed" - if not os.path.isfile(cbedfile): - consolidate(nbedfile, obedfile, cbedfile) - else: - logger.warning("`{0}` already exists. Skipping step".format(cbedfile)) - - logger.warning( - "Resolving ID assignment ambiguity based on `{0}`".format(opts.resolve) - ) - - if opts.resolve == "alignment": - # Get pairs and prompt to run needle - pairsfile = "nw.pairs" - scoresfile = "nw.scores" - if not os.path.isfile(pairsfile): - get_pairs(cbedfile, pairsfile) - else: - logger.warning( - "`{0}` already exists. Checking for needle output".format(pairsfile) - ) - - # If needle scores do not exist, prompt user to run needle - if not os.path.isfile(scoresfile): - logger.error( - "`{0}` does not exist. Please process {1} using `needle`".format( - scoresfile, pairsfile - ) - ) - sys.exit() - else: - scoresfile = "ovl.scores" - # Calculate overlap length using intersectBed - calculate_ovl(nbedfile, obedfile, opts, scoresfile) - - logger.warning("`{0}' exists. Storing scores in memory".format(scoresfile)) - scores = read_scores(scoresfile, opts) - - # Iterate through consolidated bed and - # filter piles based on score - abedline = {} - - cbed = Bed(cbedfile) - g = Grouper() - for c in cbed: - accn = c.accn - g.join(*accn.split(";")) - - nbedline = {} - nbed = Bed(nbedfile) - for line in nbed: - nbedline[line.accn] = line - - splits = set() - for chr, chrbed in nbed.sub_beds(): - abedline, splits = annotate_chr(chr, chrbed, g, scores, abedline, opts, splits) - - if splits is not None: - abedline = process_splits(splits, scores, nbedline, abedline) - - abedfile = npf + ".annotated.bed" - afh = open(abedfile, "w") - for accn in abedline: - print(abedline[accn], file=afh) - afh.close() - - sort([abedfile, "-i"]) - - -def calculate_ovl(nbedfile, obedfile, opts, scoresfile): - from pybedtools import BedTool - - nbedtool = BedTool(nbedfile) - obedtool = BedTool(obedfile) - - ab = nbedtool.intersect(obedtool, wao=True, f=opts.f, r=opts.r, s=opts.s) - cmd = """cut -f4,5,10,13 | awk -F $'\t' 'BEGIN { OFS = FS } ($3 != "."){ print $1,$3,$2,$4; }'""" - sh(cmd, infile=ab.fn, outfile=scoresfile) - - -def read_scores(scoresfile, opts=None, sort=False, trimsuffix=True): - scores = {} - _pid, _score, resolve = ( - (0.0, 0.0, "alignment") - if opts is None - else (opts.pid, opts.score, opts.resolve) - ) - - fp = must_open(scoresfile) - logger.debug("Load scores file `{0}`".format(scoresfile)) - for row in fp: - (new, old, identity, score) = row.strip().split("\t") - if trimsuffix: - old = re.sub(r"\.\d+$", "", old) - if resolve == "alignment": - match = re.search(r"\d+/\d+\s+\(\s*(\d+\.\d+)%\)", identity) - pid = match.group(1) - if float(pid) < _pid or float(score) < _score: - continue - else: - pid = identity - - if new not in scores: - scores[new] = [] - - scores[new].append((new, old, float(pid), float(score))) - - if sort: - for new in scores: - scores[new].sort(key=lambda k: (-k[2], -k[3])) - - return scores - - -def annotate_chr(chr, chrbed, g, scores, abedline, opts, splits): - current_chr = chr_number(chr) - - for line in chrbed: - accn = line.accn - if accn not in g or (opts.atg_name and not current_chr): - abedline[accn] = line - continue - - gaccns = g[accn] - new = [a for a in gaccns if re.search(new_id_pat, a)] - newgrp = ";".join(sorted(new)) - - if accn in scores: - scores[accn] = sorted(scores[accn], key=lambda x: x[1]) - scores[accn] = sorted(scores[accn], key=lambda x: float(x[3]), reverse=True) - - accns = [] - print(accn, file=sys.stderr) - for elem in scores[accn]: - print("\t" + ", ".join([str(x) for x in elem[1:]]), file=sys.stderr) - if opts.atg_name: - achr, arank = atg_name(elem[1]) - if not achr or achr != current_chr: - continue - - accns.append(elem[1]) - if len(new) > 1: - if newgrp not in scores: - scores[newgrp] = [] - scores[newgrp].append(elem) - else: - accns[0:0] = [accn] - line.accn = ";".join([str(x) for x in accns]) - if len(scores[accn]) > 1: - break - - if len(new) > 1: - splits.add(newgrp) - else: - abedline[line.accn] = line - - return abedline, splits - - -def process_splits(splits, scores, nbedline, abedline): - for newgrp in splits: - new = newgrp.split(";") - print(new, file=sys.stderr) - if newgrp in scores: - best = {} - scores[newgrp] = sorted(scores[newgrp], key=lambda x: (x[0], x[1])) - scores[newgrp] = sorted( - scores[newgrp], key=lambda x: float(x[3]), reverse=True - ) - - for elem in scores[newgrp]: - if elem[1] not in best: - best[elem[1]] = elem[0] - - for n in new: - line = nbedline[n] - if n in scores: - accns = set() - scores[n] = sorted(scores[n], key=lambda x: x[1]) - scores[n] = sorted( - scores[n], key=lambda x: float(x[3]), reverse=True - ) - accns.add(n) - print("\t" + n, file=sys.stderr) - for elem in scores[n]: - if not elem[0] == n: - continue - print( - "\t\t" + ", ".join([str(x) for x in elem[1:]]), - file=sys.stderr, - ) - if elem[1] in best and n == best[elem[1]]: - accns.add(elem[1]) - accns = sorted(accns) - line.accn = ";".join([str(x) for x in accns]) - break - abedline[line.accn] = line - else: - for n in new: - abedline[n] = nbedline[n] - - return abedline - - -def get_pairs(cbedfile, pairsfile): - fp = open(pairsfile, "w") - bed = Bed(cbedfile) - for b in bed: - if ";" in b.accn: - genes = b.accn.split(";") - new = [x for x in genes if re.search(new_id_pat, x)] - old = [x for x in genes if not re.search(new_id_pat, x)] - for a, b in product(new, old): - print("\t".join((a, b)), file=fp) - - fp.close() - - -def consolidate(nbedfile, obedfile, cbedfile): - from pybedtools import BedTool - - nbedtool = BedTool(nbedfile) - obedtool = BedTool(obedfile) - - ab = nbedtool.intersect(obedtool, s=True, u=True) - ba = obedtool.intersect(nbedtool, s=True, u=True) - - cmd = "cat {0} {1} | sort -k1,1 -k2,2n".format(ab.fn, ba.fn) - fp = popen(cmd) - ovl = BedTool(fp.readlines()) - - abmerge = ovl.merge(s=True, nms=True, scores="mean").sort() - cmd = "cat {0}".format(abmerge.fn) - fp = popen(cmd, debug=False) - ovl = BedTool(fp.readlines()) - - notovl = nbedtool.intersect(ovl.sort(), s=True, v=True) - - infile = "{0} {1}".format(notovl.fn, ovl.fn) - tmpfile = "/tmp/reformat.{0}.bed".format(os.getpid()) - cmd = "sort -k1,1 -k2,2n" - sh(cmd, infile=infile, outfile=tmpfile) - - fp = open(cbedfile, "w") - bed = Bed(tmpfile) - for b in bed: - if ";" in b.accn: - accns = set() - for accn in b.accn.split(";"): - accns.add(accn) - b.accn = ";".join(accns) - print(b, file=fp) - fp.close() - cleanup(tmpfile) - - sort([cbedfile, "-i"]) - - -def rename(args): - """ - %prog rename genes.bed [gaps.bed] - - Rename genes for annotation release. - - For genes on chromosomes (e.g. the 12th gene on C1): - Bo1g00120 - - For genes on scaffolds (e.g. the 12th gene on unplaced Scaffold00285): - Bo00285s120 - - The genes identifiers will increment by 10. So assuming no gap, these are - the consecutive genes: - Bo1g00120, Bo1g00130, Bo1g00140... - Bo00285s120, Bo00285s130, Bo00285s140... - - When we encounter gaps, we would like the increment to be larger. For example, - Bo1g00120, , Bo1g01120... - - Gaps bed file is optional. - """ - import string - - p = OptionParser(rename.__doc__) - p.add_argument( - "-a", - dest="gene_increment", - default=10, - type=int, - help="Increment for continuous genes", - ) - p.add_argument( - "-b", - dest="gap_increment", - default=1000, - type=int, - help="Increment for gaps", - ) - p.add_argument( - "--pad0", - default=6, - type=int, - help="Pad gene identifiers with 0", - ) - p.add_argument( - "--spad0", - default=4, - type=int, - help="Pad gene identifiers on small scaffolds", - ) - p.add_argument("--prefix", default="Bo", help="Genome prefix") - p.add_argument( - "--jgi", - default=False, - action="store_true", - help="Create JGI style identifier PREFIX.NN[G|TE]NNNNN.1", - ) - opts, args = p.parse_args(args) - - if len(args) not in (1, 2): - sys.exit(not p.print_help()) - - genebed = args[0] - gapbed = args[1] if len(args) == 2 else None - prefix = opts.prefix - gene_increment = opts.gene_increment - gap_increment = opts.gap_increment - - genes = Bed(genebed) - if gapbed: - fp = open(gapbed) - for row in fp: - genes.append(BedLine(row)) - - genes.sort(key=genes.key) - idsfile = prefix + ".ids" - newbedfile = prefix + ".bed" - gap_increment -= gene_increment - assert gap_increment >= 0 - - if opts.jgi: - prefix += "." - fw = open(idsfile, "w") - for chr, lines in groupby(genes, key=lambda x: x.seqid): - lines = list(lines) - pad0 = opts.pad0 if len(lines) > 1000 else opts.spad0 - isChr = chr[0].upper() == "C" - digits = "".join(x for x in chr if x in string.digits) - gs = "g" if isChr else "s" - pp = prefix + digits + gs - idx = 0 - if isChr: - idx += gap_increment - - for r in lines: - isGap = r.strand not in ("+", "-") - if isGap: - idx += gap_increment - continue - else: - idx += gene_increment - accn = pp + "{0:0{1}d}".format(idx, pad0) - oldaccn = r.accn - print("\t".join((oldaccn, accn)), file=fw) - r.accn = accn - - genes.print_to_file(newbedfile) - logger.debug("Converted IDs written to `{0}`.".format(idsfile)) - logger.debug("Converted bed written to `{0}`.".format(newbedfile)) - - -def parse_prefix(identifier): - """ - Parse identifier such as a|c|le|d|li|re|or|AT4G00480.1 and return - tuple of prefix string (separated at '|') and suffix (AGI identifier) - """ - pf, id = (), identifier - if "|" in identifier: - pf, id = tuple(identifier.split("|")[:-1]), identifier.split("|")[-1] - - return pf, id - - -def reindex(args): - """ - %prog reindex gffile pep.fasta ref.pep.fasta - - Reindex the splice isoforms (mRNA) in input GFF file, preferably - generated after PASA annotation update - - In the input GFF file, there can be several types of mRNA within a locus: - * CDS matches reference, UTR extended, inherits reference mRNA ID - * CDS (slightly) different from reference, inherits reference mRNA ID - * Novel isoform added by PASA, have IDs like "LOCUS.1.1", "LOCUS.1.2" - * Multiple mRNA collapsed due to shared structure, have IDs like "LOCUS.1-LOCUS.1.1" - - In the case of multiple mRNA which have inherited the same reference mRNA ID, - break ties by comparing the new protein with the reference protein using - EMBOSS `needle` to decide which mRNA retains ID and which is assigned a new ID. - - All mRNA identifiers should follow the AGI naming conventions. - - When reindexing the isoform identifiers, order mRNA based on: - * decreasing transcript length - * decreasing support from multiple input datasets used to run pasa.consolidate() - """ - from jcvi.formats.gff import make_index - from jcvi.formats.fasta import Fasta - from jcvi.apps.emboss import needle - from tempfile import mkstemp - - p = OptionParser(reindex.__doc__) - p.add_argument( - "--scores", type=str, help="read from existing EMBOSS `needle` scores file" - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - ( - gffile, - pep, - refpep, - ) = args - gffdb = make_index(gffile) - reffasta = Fasta(refpep) - - if not opts.scores: - fh, pairsfile = mkstemp(prefix="pairs", suffix=".txt", dir=".") - fw = must_open(pairsfile, "w") - - conflict, novel = AutoVivification(), {} - for gene in gffdb.features_of_type("gene", order_by=("seqid", "start")): - geneid = atg_name(gene.id, retval="locus") - novel[geneid] = [] - updated_mrna, hybrid_mrna = [], [] - for mrna in gffdb.children( - gene, featuretype="mRNA", order_by=("seqid", "start") - ): - if re.match(atg_name_pat, mrna.id) is not None and "_" not in mrna.id: - pf, mrnaid = parse_prefix(mrna.id) - mlen = gffdb.children_bp(mrna, child_featuretype="exon") - if "-" in mrna.id: - hybrid_mrna.append((mrna.id, mrna.start, mlen, len(pf))) - else: - updated_mrna.append((mrna.id, mrna.start, mlen, len(pf))) - - for mrna in sorted(updated_mrna, key=lambda k: (k[1], -k[2], -k[3])): - pf, mrnaid = parse_prefix(mrna[0]) - mstart, mlen = mrna[1], mrna[2] - - iso = atg_name(mrnaid, retval="iso") - newiso = "{0}{1}".format(iso, re.sub(atg_name_pat, "", mrnaid)) - if iso == newiso: - if iso not in conflict[geneid]: - conflict[geneid][iso] = [] - conflict[geneid][iso].append( - (mrna[0], iso, newiso, mstart, mlen, len(pf)) - ) - else: - novel[geneid].append((mrna[0], None, newiso, mstart, mlen, len(pf))) - - for mrna in sorted(hybrid_mrna, key=lambda k: (k[1], -k[2], -k[3])): - pf, mrnaid = parse_prefix(mrna[0]) - mstart, mlen = mrna[1], mrna[2] - - _iso, _newiso = [], [] - for id in sorted(mrnaid.split("-")): - a = atg_name(id, retval="iso") - b = "{0}{1}".format(a, re.sub(atg_name_pat, "", id)) - _iso.append(a) - _newiso.append(b) - - _novel = None - newiso = "-".join(str(x) for x in set(_newiso)) - for iso, niso in zip(_iso, _newiso): - if iso == niso: - if iso not in conflict[geneid]: - conflict[geneid][iso] = [ - (mrna[0], iso, newiso, mstart, mlen, len(pf)) - ] - _novel = None - break - - _novel = True - - if _novel is not None: - novel[geneid].append((mrna[0], None, newiso, mstart, mlen, len(pf))) - - if not opts.scores: - for isoform in sorted(conflict[geneid]): - mrnaid = "{0}.{1}".format(geneid, isoform) - if mrnaid in reffasta.keys(): - for mrna in conflict[geneid][isoform]: - print("\t".join(str(x) for x in (mrnaid, mrna[0])), file=fw) - - if not opts.scores: - fw.close() - needle([pairsfile, refpep, pep]) - cleanup(pairsfile) - scoresfile = "{0}.scores".format(pairsfile.rsplit(".")[0]) - else: - scoresfile = opts.scores - - scores = read_scores(scoresfile, sort=True, trimsuffix=False) - - primary = {} - for geneid in conflict: - primary[geneid] = [] - for iso in sorted(conflict[geneid]): - conflict[geneid][iso].sort(key=lambda k: (k[3], -k[4], -k[5])) - _iso = "{0}.{1}".format(geneid, iso) - if _iso not in scores: - novel[geneid].extend(conflict[geneid][iso]) - continue - top_score = scores[_iso][0][1] - result = next( - (i for i, v in enumerate(conflict[geneid][iso]) if v[0] == top_score), - None, - ) - if result is not None: - primary[geneid].append(conflict[geneid][iso][result]) - del conflict[geneid][iso][result] - if geneid not in novel: - novel[geneid] = [] - novel[geneid].extend(conflict[geneid][iso]) - novel[geneid].sort(key=lambda k: (k[3], -k[4], -k[5])) - - fw = must_open(opts.outfile, "w") - for gene in gffdb.features_of_type("gene", order_by=("seqid", "start")): - geneid = gene.id - print(gene, file=fw) - seen = [] - if geneid in primary: - all_mrna = primary[geneid] - all_mrna.extend(novel[geneid]) - for iso, mrna in enumerate(all_mrna): - _mrna = gffdb[mrna[0]] - _iso = mrna[1] - if mrna not in novel[geneid]: - seen.append(int(mrna[1])) - else: - mseen = 0 if len(seen) == 0 else max(seen) - _iso = (mseen + iso + 1) - len(seen) - - _mrnaid = "{0}.{1}".format(geneid, _iso) - _mrna["ID"], _mrna["_old_ID"] = [_mrnaid], [_mrna.id] - - print(_mrna, file=fw) - for c in gffdb.children(_mrna, order_by="start"): - c["Parent"] = [_mrnaid] - print(c, file=fw) - else: - for feat in gffdb.children(gene, order_by=("seqid", "start")): - print(feat, file=fw) - - fw.close() - - -def publocus(args): - """ - %prog publocus idsfile > idsfiles.publocus - - Given a list of model identifiers, convert each into a GenBank approved - pub_locus. - - Example output: - Medtr1g007020.1 MTR_1g007020 - Medtr1g007030.1 MTR_1g007030 - Medtr1g007060.1 MTR_1g007060A - Medtr1g007060.2 MTR_1g007060B - """ - p = OptionParser(publocus.__doc__) - p.add_argument("--locus_tag", default="MTR_", help="GenBank locus tag") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - locus_tag = opts.locus_tag - - index = AutoVivification() - (idsfile,) = args - fp = must_open(idsfile) - for row in fp: - locus, chrom, sep, rank, iso = atg_name(row, retval="locus,chr,sep,rank,iso") - if None in (locus, chrom, sep, rank, iso): - logger.warning("{0} is not a valid gene model identifier".format(row)) - continue - if locus not in index.keys(): - pub_locus = gene_name(chrom, rank, prefix=locus_tag, sep=sep) - index[locus]["pub_locus"] = pub_locus - index[locus]["isos"] = set() - - index[locus]["isos"].add(int(iso)) - - for locus in index: - pub_locus = index[locus]["pub_locus"] - index[locus]["isos"] = sorted(index[locus]["isos"]) - if len(index[locus]["isos"]) > 1: - new = [chr(n + 64) for n in index[locus]["isos"] if n < 27] - for i, ni in zip(index[locus]["isos"], new): - print( - "\t".join( - x - for x in ( - "{0}.{1}".format(locus, i), - "{0}{1}".format(pub_locus, ni), - ) - ) - ) - else: - print( - "\t".join( - x - for x in ( - "{0}.{1}".format(locus, index[locus]["isos"][0]), - pub_locus, - ) - ) - ) - - -def augustus(args): - """ - %prog augustus augustus.gff3 > reformatted.gff3 - - AUGUSTUS does generate a gff3 (--gff3=on) but need some refinement. - """ - from jcvi.formats.gff import Gff - - p = OptionParser(augustus.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (ingff3,) = args - gff = Gff(ingff3) - fw = must_open(opts.outfile, "w") - seen = defaultdict(int) - for g in gff: - if g.type not in ("gene", "transcript", "CDS"): - continue - - if g.type == "transcript": - g.type = "mRNA" - - prefix = g.seqid + "_" - pid = prefix + g.id - newid = "{0}-{1}".format(pid, seen[pid]) if pid in seen else pid - seen[pid] += 1 - g.attributes["ID"] = [newid] - g.attributes["Parent"] = [(prefix + x) for x in g.attributes["Parent"]] - g.update_attributes() - print(g, file=fw) - fw.close() - - -def tRNAscan(args): - """ - %prog tRNAscan all.trna > all.trna.gff3 - - Convert tRNAscan-SE output into gff3 format. - - Sequence tRNA Bounds tRNA Anti Intron Bounds Cove - Name tRNA # Begin End Type Codon Begin End Score - -------- ------ ---- ------ ---- ----- ----- ---- ------ - 23231 1 335355 335440 Tyr GTA 335392 335404 69.21 - 23231 2 1076190 1076270 Leu AAG 0 0 66.33 - - Conversion based on PERL one-liner in: - - """ - from jcvi.formats.gff import sort - - p = OptionParser(tRNAscan.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (trnaout,) = args - gffout = trnaout + ".gff3" - fp = open(trnaout) - fw = open(gffout, "w") - - next(fp) - next(fp) - row = next(fp) - assert row.startswith("--------") - - for row in fp: - atoms = [x.strip() for x in row.split("\t")] - contig, trnanum, start, end, aa, codon, intron_start, intron_end, score = atoms - - start, end = int(start), int(end) - orientation = "+" - if start > end: - start, end = end, start - orientation = "-" - - source = "tRNAscan" - type = "tRNA" - if codon == "???": - codon = "XXX" - - comment = "ID={0}.tRNA.{1};Name=tRNA-{2} (anticodon: {3})".format( - contig, trnanum, aa, codon - ) - - print( - "\t".join( - str(x) - for x in ( - contig, - source, - type, - start, - end, - score, - orientation, - ".", - comment, - ) - ), - file=fw, - ) - - fw.close() - sort([gffout, "-i"]) - - -if __name__ == "__main__": - main() diff --git a/jcvi/annotation/stats.py b/jcvi/annotation/stats.py deleted file mode 100644 index ee60436d..00000000 --- a/jcvi/annotation/stats.py +++ /dev/null @@ -1,386 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Collect gene statistics based on gff file: -Exon length, Intron length, Gene length, Exon count -""" -import os.path as op -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir, need_update -from ..formats.base import DictFile, must_open -from ..formats.fasta import Fasta -from ..formats.gff import make_index -from ..utils.cbook import SummaryStats, human_size, percentage -from ..utils.range import range_interleave -from ..utils.table import tabulate - - -metrics = ("Exon_Length", "Intron_Length", "Gene_Length", "Exon_Count") - - -class GeneStats(object): - def __init__(self, feat, conf_class, transcript_sizes, exons): - self.fid = feat.id - self.conf_class = conf_class - self.num_exons = len(exons) - self.num_transcripts = len(transcript_sizes) - self.locus_size = feat.stop - feat.start + 1 - self.cum_transcript_size = sum(transcript_sizes) - self.cum_exon_size = sum((stop - start + 1) for (c, start, stop) in exons) - - def __str__(self): - return "\t".join( - str(x) - for x in ( - self.fid, - self.conf_class, - self.num_exons, - self.num_transcripts, - self.locus_size, - self.cum_transcript_size, - self.cum_exon_size, - ) - ) - - -def main(): - - actions = ( - ("stats", "collect gene statistics based on gff file"), - ("statstable", "print gene statistics table based on output of stats"), - ("histogram", "plot gene statistics based on output of stats"), - # summary tables of various styles - ("genestats", "print detailed gene statistics"), - ("summary", "print detailed gene/exon/intron statistics"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def gc(seqs): - gc = total = 0 - for s in seqs: - s = s.upper() - gc += s.count("G") + s.count("C") - total += sum(s.count(x) for x in "ACGT") - return percentage(gc, total, precision=0, mode=-1) - - -def summary(args): - """ - %prog summary gffile fastafile - - Print summary stats, including: - - Gene/Exon/Intron - - Number - - Average size (bp) - - Median size (bp) - - Total length (Mb) - - % of genome - - % GC - """ - p = OptionParser(summary.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gff_file, ref = args - s = Fasta(ref) - g = make_index(gff_file) - geneseqs, exonseqs, intronseqs = [], [], [] # Calc % GC - for f in g.features_of_type("gene"): - fid = f.id - fseq = s.sequence({"chr": f.chrom, "start": f.start, "stop": f.stop}) - geneseqs.append(fseq) - exons = set( - (c.chrom, c.start, c.stop) - for c in g.children(fid, 2) - if c.featuretype == "exon" - ) - exons = list(exons) - for chrom, start, stop in exons: - fseq = s.sequence({"chr": chrom, "start": start, "stop": stop}) - exonseqs.append(fseq) - introns = range_interleave(exons) - for chrom, start, stop in introns: - fseq = s.sequence({"chr": chrom, "start": start, "stop": stop}) - intronseqs.append(fseq) - - r = {} # Report - for t, tseqs in zip(("Gene", "Exon", "Intron"), (geneseqs, exonseqs, intronseqs)): - tsizes = [len(x) for x in tseqs] - tsummary = SummaryStats(tsizes, dtype=int) - r[t, "Number"] = tsummary.size - r[t, "Average size (bp)"] = tsummary.mean - r[t, "Median size (bp)"] = tsummary.median - r[t, "Total length (Mb)"] = human_size(tsummary.sum, precision=0, target="Mb") - r[t, "% of genome"] = percentage( - tsummary.sum, s.totalsize, precision=0, mode=-1 - ) - r[t, "% GC"] = gc(tseqs) - - print(tabulate(r), file=sys.stderr) - - -def genestats(args): - """ - %prog genestats gffile - - Print summary stats, including: - - Number of genes - - Number of single-exon genes - - Number of multi-exon genes - - Number of distinct exons - - Number of genes with alternative transcript variants - - Number of predicted transcripts - - Mean number of distinct exons per gene - - Mean number of transcripts per gene - - Mean gene locus size (first to last exon) - - Mean transcript size (UTR, CDS) - - Mean exon size - - Stats modeled after barley genome paper Table 1. - A physical, genetic and functional sequence assembly of the barley genome - """ - p = OptionParser(genestats.__doc__) - p.add_argument( - "--groupby", default="conf_class", help="Print separate stats groupby" - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gff_file,) = args - gb = opts.groupby - g = make_index(gff_file) - - tf = gff_file + ".transcript.sizes" - if need_update(gff_file, tf): - fw = open(tf, "w") - for feat in g.features_of_type("mRNA"): - fid = feat.id - conf_class = feat.attributes.get(gb, "all") - tsize = sum( - (c.stop - c.start + 1) - for c in g.children(fid, 1) - if c.featuretype == "exon" - ) - print("\t".join((fid, str(tsize), conf_class)), file=fw) - fw.close() - - tsizes = DictFile(tf, cast=int) - conf_classes = DictFile(tf, valuepos=2) - logger.debug("A total of {0} transcripts populated.".format(len(tsizes))) - - genes = [] - for feat in g.features_of_type("gene"): - fid = feat.id - transcripts = [c.id for c in g.children(fid, 1) if c.featuretype == "mRNA"] - if len(transcripts) == 0: - continue - transcript_sizes = [tsizes[x] for x in transcripts] - exons = set( - (c.chrom, c.start, c.stop) - for c in g.children(fid, 2) - if c.featuretype == "exon" - ) - conf_class = conf_classes[transcripts[0]] - gs = GeneStats(feat, conf_class, transcript_sizes, exons) - genes.append(gs) - - r = {} # Report - distinct_groups = set(conf_classes.values()) - for g in distinct_groups: - num_genes = num_single_exon_genes = num_multi_exon_genes = 0 - num_genes_with_alts = num_transcripts = num_exons = max_transcripts = 0 - cum_locus_size = cum_transcript_size = cum_exon_size = 0 - for gs in genes: - if gs.conf_class != g: - continue - num_genes += 1 - if gs.num_exons == 1: - num_single_exon_genes += 1 - else: - num_multi_exon_genes += 1 - num_exons += gs.num_exons - if gs.num_transcripts > 1: - num_genes_with_alts += 1 - if gs.num_transcripts > max_transcripts: - max_transcripts = gs.num_transcripts - num_transcripts += gs.num_transcripts - cum_locus_size += gs.locus_size - cum_transcript_size += gs.cum_transcript_size - cum_exon_size += gs.cum_exon_size - - mean_num_exons = num_exons * 1.0 / num_genes - mean_num_transcripts = num_transcripts * 1.0 / num_genes - mean_locus_size = cum_locus_size * 1.0 / num_genes - mean_transcript_size = cum_transcript_size * 1.0 / num_transcripts - mean_exon_size = cum_exon_size * 1.0 / num_exons if num_exons != 0 else 0 - - r[("Number of genes", g)] = num_genes - r[("Number of single-exon genes", g)] = percentage( - num_single_exon_genes, num_genes, mode=1 - ) - r[("Number of multi-exon genes", g)] = percentage( - num_multi_exon_genes, num_genes, mode=1 - ) - r[("Number of distinct exons", g)] = num_exons - r[("Number of genes with alternative transcript variants", g)] = percentage( - num_genes_with_alts, num_genes, mode=1 - ) - r[("Number of predicted transcripts", g)] = num_transcripts - r[("Mean number of distinct exons per gene", g)] = mean_num_exons - r[("Mean number of transcripts per gene", g)] = mean_num_transcripts - r[("Max number of transcripts per gene", g)] = max_transcripts - r[("Mean gene locus size (first to last exon)", g)] = mean_locus_size - r[("Mean transcript size (UTR, CDS)", g)] = mean_transcript_size - r[("Mean exon size", g)] = mean_exon_size - - fw = must_open(opts.outfile, "w") - print(tabulate(r), file=fw) - fw.close() - - -def statstable(args): - """ - %prog statstable *.gff - - Print gene statistics table. - """ - p = OptionParser(statstable.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - gff_files = args - for metric in metrics: - logger.debug("Parsing files in `{0}`..".format(metric)) - - table = {} - for x in gff_files: - pf = op.basename(x).split(".")[0] - numberfile = op.join(metric, pf + ".txt") - ar = [int(x.strip()) for x in open(numberfile)] - sum = SummaryStats(ar).todict().items() - keys, vals = zip(*sum) - keys = [(pf, x) for x in keys] - table.update(dict(zip(keys, vals))) - - print(tabulate(table), file=sys.stderr) - - -def histogram(args): - """ - %prog histogram *.gff - - Plot gene statistics based on output of stats. For each gff file, look to - see if the metrics folder (i.e. Exon_Length) contains the data and plot - them. - """ - from jcvi.graphics.histogram import histogram_multiple - - p = OptionParser(histogram.__doc__) - p.add_argument( - "--bins", - dest="bins", - default=40, - type=int, - help="number of bins to plot in the histogram", - ) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - gff_files = args - # metrics = ("Exon_Length", "Intron_Length", "Gene_Length", "Exon_Count") - colors = ("red", "green", "blue", "black") - vmaxes = (1000, 1000, 4000, 20) - xlabels = ("bp", "bp", "bp", "number") - for metric, color, vmax, xlabel in zip(metrics, colors, vmaxes, xlabels): - logger.debug("Parsing files in `{0}`..".format(metric)) - numberfiles = [ - op.join(metric, op.basename(x).split(".")[0] + ".txt") for x in gff_files - ] - - histogram_multiple( - numberfiles, - 0, - vmax, - xlabel, - metric, - bins=opts.bins, - facet=True, - fill=color, - prefix=metric + ".", - ) - - -def stats(args): - """ - %prog stats infile.gff - - Collect gene statistics based on gff file. There are some terminology issues - here and so normally we call "gene" are actually mRNA, and sometimes "exon" - are actually CDS, but they are configurable. - - Thee numbers are written to text file in four separate folders, - corresponding to the four metrics: - - Exon length, Intron length, Gene length, Exon count - - With data written to disk then you can run %prog histogram - """ - p = OptionParser(stats.__doc__) - p.add_argument("--gene", default="mRNA", help="The gene type") - p.add_argument("--exon", default="CDS", help="The exon type") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gff_file,) = args - g = make_index(gff_file) - exon_lengths = [] - intron_lengths = [] - gene_lengths = [] - exon_counts = [] - for feat in g.features_of_type(opts.gene): - exons = [] - for c in g.children(feat.id, 1): - if c.featuretype != opts.exon: - continue - exons.append((c.chrom, c.start, c.stop)) - introns = range_interleave(exons) - feat_exon_lengths = [(stop - start + 1) for (chrom, start, stop) in exons] - feat_intron_lengths = [(stop - start + 1) for (chrom, start, stop) in introns] - exon_lengths += feat_exon_lengths - intron_lengths += feat_intron_lengths - gene_lengths.append(sum(feat_exon_lengths)) - exon_counts.append(len(feat_exon_lengths)) - - a = SummaryStats(exon_lengths) - b = SummaryStats(intron_lengths) - c = SummaryStats(gene_lengths) - d = SummaryStats(exon_counts) - for x, title in zip((a, b, c, d), metrics): - x.title = title - print(x, file=sys.stderr) - - prefix = gff_file.split(".")[0] - for x in (a, b, c, d): - dirname = x.title - mkdir(dirname) - txtfile = op.join(dirname, prefix + ".txt") - x.tofile(txtfile) - - -if __name__ == "__main__": - main() diff --git a/jcvi/annotation/train.py b/jcvi/annotation/train.py deleted file mode 100644 index 40c35b3d..00000000 --- a/jcvi/annotation/train.py +++ /dev/null @@ -1,227 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Train ab initio gene predictors. -""" -import os -import os.path as op -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir, need_update, sh - - -def main(): - - actions = ( - ("pasa", "extract pasa training models"), - ("snap", "train snap model"), - ("augustus", "train augustus model"), - ("genemark", "train genemark model"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def pasa(args): - """ - %prog ${pasadb}.assemblies.fasta ${pasadb}.pasa_assemblies.gff3 - - Wraps `pasa_asmbls_to_training_set.dbi`. - """ - from jcvi.formats.base import SetFile - from jcvi.formats.gff import Gff - - p = OptionParser(pasa.__doc__) - p.set_home("pasa") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, gffile = args - transcodergff = fastafile + ".transdecoder.gff3" - transcodergenomegff = fastafile + ".transdecoder.genome.gff3" - if need_update((fastafile, gffile), (transcodergff, transcodergenomegff)): - cmd = "{0}/scripts/pasa_asmbls_to_training_set.dbi".format(opts.pasa_home) - cmd += " --pasa_transcripts_fasta {0} --pasa_transcripts_gff3 {1}".format( - fastafile, gffile - ) - sh(cmd) - - completeids = fastafile.rsplit(".", 1)[0] + ".complete.ids" - if need_update(transcodergff, completeids): - cmd = "grep complete {0} | cut -f1 | sort -u".format(transcodergff) - sh(cmd, outfile=completeids) - - complete = SetFile(completeids) - seen = set() - completegff = transcodergenomegff.rsplit(".", 1)[0] + ".complete.gff3" - fw = open(completegff, "w") - gff = Gff(transcodergenomegff) - for g in gff: - a = g.attributes - if "Parent" in a: - id = a["Parent"][0] - else: - id = a["ID"][0] - asmbl_id = id.split("|")[0] - if asmbl_id not in complete: - continue - print(g, file=fw) - if g.type == "gene": - seen.add(id) - - fw.close() - logger.debug( - "A total of {0} complete models extracted to `{1}`.".format( - len(seen), completegff - ) - ) - - -def genemark(args): - """ - %prog genemark species fastafile - - Train GENEMARK model given fastafile. GENEMARK self-trains so no trainig - model gff file is needed. - """ - p = OptionParser(genemark.__doc__) - p.add_argument("--junctions", help="Path to `junctions.bed` from Tophat2") - p.set_home("gmes") - p.set_cpus(cpus=32) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - species, fastafile = args - junctions = opts.junctions - mhome = opts.gmes_home - - license = op.expanduser("~/.gm_key") - assert op.exists(license), "License key ({0}) not found!".format(license) - cmd = "{0}/gmes_petap.pl --sequence {1}".format(mhome, fastafile) - cmd += " --cores {0}".format(opts.cpus) - if junctions: - intronsgff = "introns.gff" - if need_update(junctions, intronsgff): - jcmd = "{0}/bet_to_gff.pl".format(mhome) - jcmd += " --bed {0} --gff {1} --label Tophat2".format(junctions, intronsgff) - sh(jcmd) - cmd += " --ET {0} --et_score 10".format(intronsgff) - else: - cmd += " --ES" - sh(cmd) - - logger.debug("GENEMARK matrix written to `output/gmhmm.mod") - - -def snap(args): - """ - %prog snap species gffile fastafile - - Train SNAP model given gffile and fastafile. Whole procedure taken from: - - """ - p = OptionParser(snap.__doc__) - p.set_home("maker") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - species, gffile, fastafile = args - gffile = os.path.abspath(gffile) - fastafile = os.path.abspath(fastafile) - mhome = opts.maker_home - snapdir = "snap" - mkdir(snapdir) - - cwd = os.getcwd() - os.chdir(snapdir) - - newgffile = "training.gff3" - logger.debug("Construct GFF file combined with sequence ...") - sh("cat {0} > {1}".format(gffile, newgffile)) - sh('echo "##FASTA" >> {0}'.format(newgffile)) - sh("cat {0} >> {1}".format(fastafile, newgffile)) - - logger.debug("Make models ...") - sh("{0}/src/bin/maker2zff training.gff3".format(mhome)) - sh("{0}/exe/snap/fathom -categorize 1000 genome.ann genome.dna".format(mhome)) - sh("{0}/exe/snap/fathom -export 1000 -plus uni.ann uni.dna".format(mhome)) - sh("{0}/exe/snap/forge export.ann export.dna".format(mhome)) - sh("{0}/exe/snap/hmm-assembler.pl {1} . > {1}.hmm".format(mhome, species)) - - os.chdir(cwd) - logger.debug("SNAP matrix written to `{0}/{1}.hmm`".format(snapdir, species)) - - -def augustus(args): - """ - %prog augustus species gffile fastafile - - Train AUGUSTUS model given gffile and fastafile. Whole procedure taken from: - - """ - p = OptionParser(augustus.__doc__) - p.add_argument( - "--autotrain", - default=False, - action="store_true", - help="Run autoAugTrain.pl to iteratively train AUGUSTUS", - ) - p.set_home("augustus") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - species, gffile, fastafile = args - gffile = os.path.abspath(gffile) - fastafile = os.path.abspath(fastafile) - mhome = opts.augustus_home - augdir = "augustus" - - cwd = os.getcwd() - mkdir(augdir) - os.chdir(augdir) - target = "{0}/config/species/{1}".format(mhome, species) - - if op.exists(target): - logger.debug("Removing existing target `{0}`".format(target)) - sh("rm -rf {0}".format(target)) - - config_path = "{0}/config".format(mhome) - sh( - "{0}/scripts/new_species.pl --species={1} --AUGUSTUS_CONFIG_PATH={2}".format( - mhome, species, config_path - ) - ) - sh( - "{0}/scripts/gff2gbSmallDNA.pl {1} {2} 1000 raw.gb".format( - mhome, gffile, fastafile - ) - ) - sh("{0}/bin/etraining --species={1} raw.gb 2> train.err".format(mhome, species)) - sh(r"cat train.err | perl -pe 's/.*in sequence (\S+): .*/$1/' > badgenes.lst") - sh("{0}/scripts/filterGenes.pl badgenes.lst raw.gb > training.gb".format(mhome)) - sh("grep -c LOCUS raw.gb training.gb") - - # autoAugTrain failed to execute, disable for now - if opts.autotrain: - sh("rm -rf {0}".format(target)) - sh( - "{0}/scripts/autoAugTrain.pl --trainingset=training.gb --species={1}".format( - mhome, species - ) - ) - - os.chdir(cwd) - sh("cp -r {0} augustus/".format(target)) - - -if __name__ == "__main__": - main() diff --git a/jcvi/annotation/trinity.py b/jcvi/annotation/trinity.py deleted file mode 100644 index 2f7e31f1..00000000 --- a/jcvi/annotation/trinity.py +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Trinity assembly of RNAseq reads. Contains de novo (DN) method and genome-guided -(GG) method. - -DN: http://trinityrnaseq.sourceforge.net/ -GG: http://trinityrnaseq.sourceforge.net/genome_guided_trinity.html -""" - -import os.path as op -import os -import sys - -from ..apps.base import ActionDispatcher, OptionParser, iglob, mkdir -from ..formats.base import FileMerger, write_file - - -def main(): - - actions = ( - ("prepare", "prepare shell script to run trinity-dn/gg on a folder of reads"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def prepare(args): - """ - %prog prepare [--options] folder [--bam rnaseq.coordSorted.bam] - - Run Trinity on a folder of reads. When paired-end (--paired) mode is on, - filenames will be scanned based on whether they contain the patterns - ("_1_" and "_2_") or (".1." and ".2.") or ("_1." and "_2."). - - By default, prepare script for DN-Trinity. - - If coord-sorted BAM is provided, prepare script for GG-Trinity, using BAM - as starting point. - - Newer versions of trinity can take multiple fastq files as input. - If "--merge" is specified, the fastq files are merged together before assembling - """ - p = OptionParser(prepare.__doc__) - p.add_argument( - "--paired", - default=False, - action="store_true", - help="Paired-end mode", - ) - p.add_argument( - "--merge", - default=False, - action="store_true", - help="Merge individual input fastq's into left/right/single file(s)", - ) - p.set_trinity_opts() - p.set_fastq_names() - p.set_grid() - opts, args = p.parse_args(args) - - if len(args) not in (1, 2): - sys.exit(not p.print_help()) - - (inparam,) = args[:1] - - paired = opts.paired - merge = opts.merge - trinity_home = opts.trinity_home - hpc_grid_runner_home = opts.hpcgridrunner_home - - method = "DN" - bam = opts.bam - if bam and op.exists(bam): - bam = op.abspath(bam) - method = "GG" - - pf = inparam.split(".")[0] - tfolder = "{0}_{1}".format(pf, method) - - cwd = os.getcwd() - mkdir(tfolder) - os.chdir(tfolder) - - cmds = [] - - # set TRINITY_HOME env variable when preparing shell script - env_cmd = 'export TRINITY_HOME="{0}"'.format(trinity_home) - cmds.append(env_cmd) - - if method == "DN": - assert op.exists("../" + inparam) - - flist = iglob("../" + inparam, opts.names) - if paired: - f1 = [ - x for x in flist if "_1_" in x or ".1." in x or "_1." in x or "_R1" in x - ] - f2 = [ - x for x in flist if "_2_" in x or ".2." in x or "_2." in x or "_R2" in x - ] - assert len(f1) == len(f2) - if merge: - r1, r2 = "left.fastq", "right.fastq" - reads = ((f1, r1), (f2, r2)) - else: - if merge: - r = "single.fastq" - reads = ((flist, r),) - - if merge: - for fl, r in reads: - fm = FileMerger(fl, r) - fm.merge(checkexists=True) - - cmd = op.join(trinity_home, "Trinity") - cmd += " --seqType fq --max_memory {0} --CPU {1}".format(opts.max_memory, opts.cpus) - cmd += " --min_contig_length {0}".format(opts.min_contig_length) - - if opts.bflyGCThreads: - cmd += " --bflyGCThreads {0}".format(opts.bflyGCThreads) - - if method == "GG": - cmd += " --genome_guided_bam {0}".format(bam) - cmd += " --genome_guided_max_intron {0}".format(opts.max_intron) - else: - if paired: - if merge: - cmd += " --left {0} --right {1}".format(reads[0][-1], reads[1][-1]) - else: - cmd += " --left {0}".format(",".join(f1)) - cmd += " --right {0}".format(",".join(f2)) - else: - if merge: - cmd += " --single {0}".format(reads[0][-1]) - else: - for f in flist: - cmd += " --single {0}".format(f) - - if opts.grid and opts.grid_conf_file: - hpc_grid_runner = op.join(hpc_grid_runner_home, "hpc_cmds_GridRunner.pl") - hpc_grid_conf_file = op.join( - hpc_grid_runner_home, "hpc_conf", opts.grid_conf_file - ) - assert op.exists( - hpc_grid_conf_file - ), "HpcGridRunner conf file does not exist: {0}".format(hpc_grid_conf_file) - - cmd += ' --grid_exec "{0} --grid_conf {1} -c"'.format( - hpc_grid_runner, hpc_grid_conf_file - ) - - if opts.extra: - cmd += " {0}".format(opts.extra) - - cmds.append(cmd) - - if opts.cleanup: - cleanup_cmd = ( - 'rm -rf !("Trinity.fasta"|"Trinity.gene_trans_map"|"Trinity.timing")' - if method == "DN" - else 'rm -rf !("Trinity-GG.fasta"|"Trinity-GG.gene_trans_map"|"Trinity.timing")' - ) - cmds.append(cleanup_cmd) - - runfile = "run.sh" - write_file(runfile, "\n".join(cmds)) - os.chdir(cwd) - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/__init__.py b/jcvi/apps/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/apps/__main__.py b/jcvi/apps/__main__.py deleted file mode 100644 index 5eb8aad6..00000000 --- a/jcvi/apps/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Miscellany of wrapper scripts for command-line bioinformatics tools, public data downloaders and other generic routines. -""" - -from .base import dmain - - -if __name__ == "__main__": - dmain(__file__) diff --git a/jcvi/apps/align.py b/jcvi/apps/align.py deleted file mode 100644 index bb22b90b..00000000 --- a/jcvi/apps/align.py +++ /dev/null @@ -1,713 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Perform DNA-DNA alignment using BLAST, NUCMER and BLAT. Keep the interface the -same and does parallelization both in core and on grid. -""" -import os.path as op -import sys -import shutil - -from subprocess import CalledProcessError, STDOUT - -from ..utils.cbook import depends - -from .base import ( - ActionDispatcher, - OptionParser, - cleanup, - get_abs_path, - logger, - mkdir, - sh, - which, -) -from .grid import MakeManager - - -@depends -def run_formatdb(infile=None, outfile=None, dbtype="nucl"): - cmd = "makeblastdb" - cmd += " -dbtype {0} -in {1}".format(dbtype, infile) - sh(cmd) - - -@depends -def run_diamond_makedb(infile=None, outfile=None): - cmd = "diamond makedb" - cmd += " --in {0} --db {1} -p 5".format(infile, infile) - sh(cmd) - - -@depends -def run_blat( - infile=None, - outfile=None, - db="UniVec_Core", - pctid=95, - hitlen=50, - cpus=16, - overwrite=True, -): - cmd = "pblat -threads={0}".format(cpus) if which("pblat") else "blat" - cmd += " {0} {1} -out=blast8 {2}".format(db, infile, outfile) - sh(cmd) - - blatfile = outfile - filtered_blatfile = outfile + ".P{0}L{1}".format(pctid, hitlen) - run_blast_filter( - infile=blatfile, outfile=filtered_blatfile, pctid=pctid, hitlen=hitlen - ) - if overwrite: - shutil.move(filtered_blatfile, blatfile) - - -@depends -def run_vecscreen(infile=None, outfile=None, db="UniVec_Core", pctid=None, hitlen=None): - """ - BLASTN parameters reference: - http://www.ncbi.nlm.nih.gov/VecScreen/VecScreen_docs.html - """ - db = get_abs_path(db) - nin = db + ".nin" - run_formatdb(infile=db, outfile=nin) - - cmd = "blastn" - cmd += " -task blastn" - cmd += " -query {0} -db {1} -out {2}".format(infile, db, outfile) - cmd += " -penalty -5 -gapopen 4 -gapextend 4 -dust yes -soft_masking true" - cmd += " -searchsp 1750000000000 -evalue 0.01 -outfmt 6 -num_threads 8" - sh(cmd) - - -@depends -def run_megablast( - infile=None, - outfile=None, - db=None, - wordsize=None, - pctid=98, - hitlen=100, - best=None, - evalue=0.01, - task="megablast", - cpus=16, -): - assert db, "Need to specify database fasta file." - - db = get_abs_path(db) - nin = db + ".nin" - nin00 = db + ".00.nin" - nin = nin00 if op.exists(nin00) else (db + ".nin") - run_formatdb(infile=db, outfile=nin) - - cmd = "blastn" - cmd += " -query {0} -db {1} -out {2}".format(infile, db, outfile) - cmd += " -evalue {0} -outfmt 6 -num_threads {1}".format(evalue, cpus) - cmd += " -task {0}".format(task) - if wordsize: - cmd += " -word_size {0}".format(wordsize) - if pctid: - cmd += " -perc_identity {0}".format(pctid) - if best: - cmd += " -max_target_seqs {0}".format(best) - sh(cmd) - - if pctid and hitlen: - blastfile = outfile - filtered_blastfile = outfile + ".P{0}L{1}".format(pctid, hitlen) - run_blast_filter( - infile=blastfile, outfile=filtered_blastfile, pctid=pctid, hitlen=hitlen - ) - shutil.move(filtered_blastfile, blastfile) - - -def run_blast_filter(infile=None, outfile=None, pctid=95, hitlen=50): - from jcvi.formats.blast import filter - - logger.debug("Filter BLAST result (pctid={0}, hitlen={1})".format(pctid, hitlen)) - pctidopt = "--pctid={0}".format(pctid) - hitlenopt = "--hitlen={0}".format(hitlen) - filter([infile, pctidopt, hitlenopt]) - - -def main(): - actions = ( - ("blast", "run blastn using query against reference"), - ("blat", "run blat using query against reference"), - ("blasr", "run blasr on a set of pacbio reads"), - ("nucmer", "run nucmer using query against reference"), - ("last", "run last using query against reference"), - ("lastgenome", "run whole genome LAST"), - ("lastgenomeuniq", "run whole genome LAST and screen for 1-to-1 matches"), - ("minimap", "run minimap2 aligner"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def minimap(args): - """ - %prog minimap ref.fasta query.fasta - - Wrap minimap2 aligner using query against sequences. When query and ref - is the same, we are in "self-scan" mode (e.g. useful for finding internal - duplications resulted from mis-assemblies). - """ - from jcvi.formats.fasta import Fasta - - p = OptionParser(minimap.__doc__) - p.add_argument( - "--chunks", - type=int, - default=2000000, - help="Split ref.fasta into chunks of size in self-scan mode", - ) - p.set_outdir(outdir="outdir") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - ref, query = args - chunks = opts.chunks - outdir = opts.outdir - if ref != query: - raise NotImplementedError - - # "self-scan" mode - # build faidx (otherwise, parallel make may complain) - sh("samtools faidx {}".format(ref)) - f = Fasta(ref) - mkdir(outdir) - mm = MakeManager() - for name, size in f.itersizes(): - start = 0 - for end in range(chunks, size, chunks): - fafile = op.join(outdir, "{}_{}_{}.fa".format(name, start + 1, end)) - cmd = "samtools faidx {} {}:{}-{} -o {}".format( - ref, name, start + 1, end, fafile - ) - mm.add(ref, fafile, cmd) - - paffile = fafile.rsplit(".", 1)[0] + ".paf" - cmd = "minimap2 -P {} {} > {}".format(fafile, fafile, paffile) - mm.add(fafile, paffile, cmd) - - epsfile = fafile.rsplit(".", 1)[0] + ".eps" - cmd = "minidot {} > {}".format(paffile, epsfile) - mm.add(paffile, epsfile, cmd) - start += chunks - - mm.write() - - -def nucmer(args): - """ - %prog nucmer ref.fasta query.fasta - - Run NUCMER using query against reference. Parallel implementation derived - from: - """ - from itertools import product - - from jcvi.formats.base import split - - p = OptionParser(nucmer.__doc__) - p.add_argument( - "--chunks", type=int, help="Split both query and subject into chunks" - ) - p.set_params(prog="nucmer", params="-l 100 -c 500") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - ref, query = args - cpus = opts.cpus - nrefs = nqueries = opts.chunks or int(cpus**0.5) - refdir = ref.split(".")[0] + "-outdir" - querydir = query.split(".")[0] + "-outdir" - reflist = split([ref, refdir, str(nrefs)]).names - querylist = split([query, querydir, str(nqueries)]).names - - mm = MakeManager() - for i, (r, q) in enumerate(product(reflist, querylist)): - pf = "{0:04d}".format(i) - cmd = "nucmer -maxmatch" - cmd += " {0}".format(opts.extra) - cmd += " {0} {1} -p {2}".format(r, q, pf) - deltafile = pf + ".delta" - mm.add((r, q), deltafile, cmd) - print(cmd) - - mm.write() - - -def blasr(args): - """ - %prog blasr ref.fasta fofn - - Run blasr on a set of PacBio reads. This is based on a divide-and-conquer - strategy described below. - """ - from more_itertools import grouper - - p = OptionParser(blasr.__doc__) - p.set_cpus(cpus=8) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - reffasta, fofn = args - flist = sorted([x.strip() for x in open(fofn)]) - h5list = [] - mm = MakeManager() - for i, fl in enumerate(grouper(flist, 3)): - chunkname = "chunk{0:03d}".format(i) - fn = chunkname + ".fofn" - h5 = chunkname + ".cmp.h5" - fw = open(fn, "w") - print("\n".join(fl), file=fw) - fw.close() - - cmd = "pbalign {0} {1} {2}".format(fn, reffasta, h5) - cmd += " --nproc {0} --forQuiver --tmpDir .".format(opts.cpus) - mm.add((fn, reffasta), h5, cmd) - h5list.append(h5) - - # Merge h5, sort and repack - allh5 = "all.cmp.h5" - tmph5 = "tmp.cmp.h5" - cmd_merge = "cmph5tools.py merge --outFile {0}".format(allh5) - cmd_merge += " " + " ".join(h5list) - cmd_sort = "cmph5tools.py sort --deep {0} --tmpDir .".format(allh5) - cmd_repack = "h5repack -f GZIP=1 {0} {1}".format(allh5, tmph5) - cmd_repack += " && mv {0} {1}".format(tmph5, allh5) - mm.add(h5list, allh5, [cmd_merge, cmd_sort, cmd_repack]) - - # Quiver - pf = reffasta.rsplit(".", 1)[0] - variantsgff = pf + ".variants.gff" - consensusfasta = pf + ".consensus.fasta" - cmd_faidx = "samtools faidx {0}".format(reffasta) - cmd = "quiver -j 32 {0}".format(allh5) - cmd += " -r {0} -o {1} -o {2}".format(reffasta, variantsgff, consensusfasta) - mm.add(allh5, consensusfasta, [cmd_faidx, cmd]) - - mm.write() - - -def get_outfile(reffasta, queryfasta, suffix="blast", outdir=None): - q = op.basename(queryfasta).split(".")[0] - r = op.basename(reffasta).split(".")[0] - outfile = ".".join((q, r, suffix)) - if outdir: - outfile = op.join(outdir, outfile) - - return outfile - - -def blat(args): - """ - %prog blat ref.fasta query.fasta - - Calls blat and filters BLAST hits. - """ - p = OptionParser(blat.__doc__) - p.set_align(pctid=95, hitlen=30) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - reffasta, queryfasta = args - blastfile = get_outfile(reffasta, queryfasta, suffix="blat") - - run_blat( - infile=queryfasta, - outfile=blastfile, - db=reffasta, - pctid=opts.pctid, - hitlen=opts.hitlen, - cpus=opts.cpus, - overwrite=False, - ) - - return blastfile - - -def blast(args): - """ - %prog blast ref.fasta query.fasta - - Calls blast and then filter the BLAST hits. Default is megablast. - """ - task_choices = ("blastn", "blastn-short", "dc-megablast", "megablast", "vecscreen") - p = OptionParser(blast.__doc__) - p.set_align(pctid=0, evalue=0.01) - p.add_argument("--wordsize", type=int, help="Word size") - p.add_argument("--best", default=1, type=int, help="Only look for best N hits") - p.add_argument( - "--task", default="megablast", choices=task_choices, help="Task of the blastn" - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - reffasta, queryfasta = args - blastfile = get_outfile(reffasta, queryfasta) - - run_megablast( - infile=queryfasta, - outfile=blastfile, - db=reffasta, - wordsize=opts.wordsize, - pctid=opts.pctid, - evalue=opts.evalue, - hitlen=None, - best=opts.best, - task=opts.task, - cpus=opts.cpus, - ) - - return blastfile - - -def lastgenome(args): - """ - %prog genome_A.fasta genome_B.fasta - - Run LAST by calling LASTDB, LASTAL. The script runs the following steps: - $ lastdb -P0 -uNEAR -R01 Chr10A-NEAR Chr10A.fa - $ lastal -E0.05 -C2 Chr10A-NEAR Chr10A.fa -fTAB > Chr10A.Chr10A.tab - $ last-dotplot Chr10A.Chr10A.tab - """ - p = OptionParser(lastgenome.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gA, gB = args - mm = MakeManager() - bb = lambda x: op.basename(x).rsplit(".", 1)[0] - gA_pf, gB_pf = bb(gA), bb(gB) - - # Build LASTDB - dbname = "-".join((gA_pf, "NEAR")) - dbfile = dbname + ".suf" - build_db_cmd = "lastdb -P0 -uNEAR -R01 {} {}".format(dbfile, gA) - mm.add(gA, dbfile, build_db_cmd) - - # Run LASTAL - tabfile = "{}.{}.tab".format(gA_pf, gB_pf) - lastal_cmd = "lastal -E0.05 -C2 {} {}".format(dbname, gB) - lastal_cmd += " -fTAB > {}".format(tabfile) - mm.add([dbfile, gB], tabfile, lastal_cmd) - - mm.write() - - -def lastgenomeuniq(args): - """ - %prog genome_A.fasta genome_B.fasta - - Run LAST by calling LASTDB, LASTAL and LAST-SPLIT. The recipe is based on - tutorial here: - - - - The script runs the following steps: - $ lastdb -P0 -uNEAR -R01 Chr10A-NEAR Chr10A.fa - $ lastal -E0.05 -C2 Chr10A-NEAR Chr10B.fa | last-split -m1 | maf-swap | last-split -m1 -fMAF > Chr10A.Chr10B.1-1.maf - $ maf-convert -n blasttab Chr10A.Chr10B.1-1.maf > Chr10A.Chr10B.1-1.blast - - Works with LAST v959. - """ - p = OptionParser(lastgenome.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gA, gB = args - mm = MakeManager() - bb = lambda x: op.basename(x).rsplit(".", 1)[0] - gA_pf, gB_pf = bb(gA), bb(gB) - - # Build LASTDB - dbname = "-".join((gA_pf, "NEAR")) - dbfile = dbname + ".suf" - build_db_cmd = "lastdb -P0 -uNEAR -R01 {} {}".format(dbfile, gA) - mm.add(gA, dbfile, build_db_cmd) - - # Run LASTAL - maffile = "{}.{}.1-1.maf".format(gA_pf, gB_pf) - lastal_cmd = "lastal -E0.05 -C2 {} {}".format(dbname, gB) - lastal_cmd += " | last-split -m1" - lastal_cmd += " | maf-swap" - lastal_cmd += " | last-split -m1 -fMAF > {}".format(maffile) - mm.add([dbfile, gB], maffile, lastal_cmd) - - # Convert to BLAST format - blastfile = maffile.replace(".maf", ".blast") - convert_cmd = "maf-convert -n blasttab {} > {}".format(maffile, blastfile) - mm.add(maffile, blastfile, convert_cmd) - - mm.write() - - -@depends -def run_lastdb( - infile=None, outfile=None, mask=False, lastdb_bin="lastdb", dbtype="nucl" -): - outfilebase = outfile.rsplit(".", 1)[0] - db = "-p " if dbtype == "prot" else "" - mask = "-c " if mask else "" - cmd = "{0} {1}{2}{3} {4}".format(lastdb_bin, db, mask, outfilebase, infile) - sh(cmd) - - -def last(args, dbtype=None): - """ - %prog database.fasta query.fasta - - Run LAST by calling LASTDB and LASTAL. LAST program available: - - - Works with LAST-719. - """ - p = OptionParser(last.__doc__) - p.add_argument( - "--dbtype", - default="nucl", - choices=("nucl", "prot"), - help="Molecule type of subject database", - ) - p.add_argument("--path", help="Specify LAST path") - p.add_argument( - "--mask", default=False, action="store_true", help="Invoke -c in lastdb" - ) - p.add_argument( - "--format", - default="BlastTab", - choices=("TAB", "MAF", "BlastTab", "BlastTab+"), - help="Output format", - ) - p.add_argument( - "--minlen", - default=0, - type=int, - help="Filter alignments by how many bases match", - ) - p.add_argument("--minid", default=0, type=int, help="Minimum sequence identity") - p.set_cpus() - p.set_outdir() - p.set_params() - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - subject, query = args - path = opts.path - cpus = opts.cpus - if not dbtype: - dbtype = opts.dbtype - getpath = lambda x: op.join(path, x) if path else x - lastdb_bin = getpath("lastdb") - lastal_bin = getpath("lastal") - for bin in (lastdb_bin, lastal_bin): - if not which(bin): - logger.fatal("`%s` not found on PATH. Have you installed LAST?", bin) - sys.exit(1) - - subjectdb = subject.rsplit(".", 1)[0] - run_lastdb( - infile=subject, - outfile=subjectdb + ".prj", - mask=opts.mask, - lastdb_bin=lastdb_bin, - dbtype=dbtype, - ) - - u = 2 if opts.mask else 0 - cmd = "{0} -u {1} -i3G".format(lastal_bin, u) - cmd += " -f {0}".format(opts.format) - - minlen = opts.minlen - minid = opts.minid - extra = opts.extra - assert minid != 100, "Perfect match not yet supported" - mm = minid / (100 - minid) - - if minlen: - extra += " -e{0}".format(minlen) - if minid: - extra += " -r1 -q{0} -a{0} -b{0}".format(mm) - if extra: - cmd += " " + extra.strip() - - lastfile = get_outfile(subject, query, suffix="last", outdir=opts.outdir) - # Make several attempts to run LASTAL - try: - sh( - cmd + f" -P {cpus} {subjectdb} {query}", - outfile=lastfile, - check=True, - redirect_error=STDOUT, - ) - except CalledProcessError as e: # multi-threading disabled - message = "lastal failed with message:" - message += "\n{0}".format(e.output.decode()) - logger.error(message) - try: - logger.debug("Failed to run `lastal` with multi-threading. Trying again.") - sh( - cmd + f" -P 1 {subjectdb} {query}", - outfile=lastfile, - check=True, - redirect_error=STDOUT, - ) - except CalledProcessError as e: - message = "lastal failed with message:" - message += "\n{0}".format(e.output.decode()) - logger.error(message) - logger.fatal("Failed to run `lastal`. Aborted.") - cleanup(lastfile) - sys.exit(1) - return lastfile - - -def blast_main(args, dbtype=None): - """ - %prog database.fasta query.fasta - - Run blastp/blastn by calling BLAST+ blastp/blastn depends on dbtype. - """ - p = OptionParser(blast_main.__doc__) - p.add_argument( - "--dbtype", - default="nucl", - choices=("nucl", "prot"), - help="Molecule type of subject database", - ) - p.add_argument("--path", help="Specify BLAST path for blastn or blastp") - - p.set_cpus() - p.set_outdir() - p.set_params() - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - subject, query = args - path = opts.path - cpus = opts.cpus - if not dbtype: - dbtype = opts.dbtype - - getpath = lambda x: op.join(path, x) if path else x - cmd = "blastn" if dbtype == "nucl" else "blastp" - lastdb_bin = getpath("makeblastdb") - lastal_bin = getpath(cmd) - for bin in (lastdb_bin, lastal_bin): - if not which(bin): - logger.fatal("`%s` not found on PATH. Have you installed BLAST?", bin) - sys.exit(1) - - db_suffix = ".nin" if dbtype == "nucl" else ".pin" - - run_formatdb(infile=subject, outfile=subject + db_suffix, dbtype=dbtype) - - blastfile = get_outfile(subject, query, suffix="last", outdir=opts.outdir) - # Make several attempts to run LASTAL - try: - sh( - cmd - + f" -num_threads {cpus} -query {query} -db {subject} -out {blastfile}" - + " -outfmt 6 -max_target_seqs 1000 -evalue 1e-5", - check=False, - redirect_error=STDOUT, - ) - except CalledProcessError as e: # multi-threading disabled - message = f"{cmd} failed with message:" - message += "\n{0}".format(e.output.decode()) - logger.error(message) - logger.fatal("Failed to run `blast`. Aborted.") - cleanup(blastfile) - sys.exit(1) - return blastfile - - -def diamond_blastp_main(args, dbtype="prot"): - """ - %prog database.fasta query.fasta - - Run diamond blastp for protein alignment. - """ - p = OptionParser(diamond_blastp_main.__doc__) - - p.add_argument("--path", help="Specify diamond path for diamond blastp") - - p.set_cpus() - p.set_outdir() - p.set_params() - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - subject, query = args - path = opts.path - cpus = opts.cpus - if not dbtype: - dbtype = opts.dbtype - - getpath = lambda x: op.join(path, x) if path else x - cmd = "diamond blastp" - diamond_bin = getpath("diamond") - for bin in (diamond_bin,): - if not which(bin): - logger.fatal("`%s` not found on PATH. Have you installed Diamond?", bin) - sys.exit(1) - - run_diamond_makedb( - infile=subject, - outfile=subject + ".dmnd", - ) - - blastfile = get_outfile(subject, query, suffix="last", outdir=opts.outdir) - # Make several attempts to run LASTAL - try: - sh( - cmd - + f" --threads {cpus} --query {query} --db {subject} --out {blastfile}" - + " --ultra-sensitive --max-target-seqs 1000 --evalue 1e-5 --outfmt 6", - check=False, - redirect_error=STDOUT, - ) - except CalledProcessError as e: # multi-threading disabled - message = f"{cmd} failed with message:" - message += "\n{0}".format(e.output.decode()) - logger.error(message) - logger.fatal("Failed to run `diamond blastp`. Aborted.") - cleanup(blastfile) - sys.exit(1) - return blastfile - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/base.py b/jcvi/apps/base.py deleted file mode 100644 index ba9c761f..00000000 --- a/jcvi/apps/base.py +++ /dev/null @@ -1,2277 +0,0 @@ -""" -Basic support for running library as script -""" - -import errno -import fnmatch -import logging -import os -import os.path as op -import platform -import shutil -import signal -import sys -import time - -from argparse import ArgumentParser, SUPPRESS -from collections.abc import Iterable -from configparser import ( - ConfigParser, - RawConfigParser, - NoOptionError, - NoSectionError, - ParsingError, -) -from http.client import HTTPSConnection -from socket import gethostname -from subprocess import CalledProcessError, PIPE, call, check_output -from time import ctime -from typing import Any, Collection, List, Optional, Tuple, Union -from urllib.parse import urlencode - -from natsort import natsorted -from rich.console import Console -from rich.logging import RichHandler - -from .. import __copyright__, __version__ as version - - -os.environ["LC_ALL"] = "C" -# http://newbebweb.blogspot.com/2012/02/python-head-ioerror-errno-32-broken.html -signal.signal(signal.SIGPIPE, signal.SIG_DFL) -JCVIHELP = f"JCVI utility libraries {version} [{__copyright__}]\n" -TextCollection = Union[str, List[str], Tuple[str, ...]] - - -def get_logger(name: str, level: int = logging.DEBUG): - """ - Return a logger with a default ColoredFormatter. - """ - log = logging.getLogger(name) - if log.hasHandlers(): - log.handlers.clear() - log.addHandler(RichHandler(console=Console(stderr=True))) - log.propagate = False - log.setLevel(level) - return log - - -logger = get_logger("jcvi") - - -class ActionDispatcher(object): - """ - This class will be invoked - a) when the base package is run via __main__, listing all MODULESs - a) when a directory is run via __main__, listing all SCRIPTs - b) when a script is run directly, listing all ACTIONs - - This is controlled through the meta variable, which is automatically - determined in get_meta(). - """ - - def __init__(self, actions): - self.actions = actions - if not actions: - actions = [(None, None)] - self.valid_actions, self.action_helps = zip(*actions) - - def get_meta(self): - args = splitall(sys.argv[0])[-3:] - args[-1] = args[-1].replace(".py", "") - if args[-2] == "jcvi": - meta = "MODULE" - elif args[-1] == "__main__": - meta = "SCRIPT" - else: - meta = "ACTION" - return meta, args - - def print_help(self): - meta, args = self.get_meta() - if meta == "MODULE": - del args[0] - args[-1] = meta - elif meta == "SCRIPT": - args[-1] = meta - else: - args[-1] += " " + meta - - help = "Usage:\n python -m {0}\n\n\n".format(".".join(args)) - help += "Available {0}s:\n".format(meta) - max_action_len = max(len(action) for action, ah in self.actions) - for action, action_help in sorted(self.actions): - action = action.rjust(max_action_len + 4) - help += ( - " | ".join((action, action_help[0].upper() + action_help[1:])) + "\n" - ) - help += "\n" + JCVIHELP - - sys.stderr.write(help) - sys.exit(1) - - def dispatch(self, globals): - from difflib import get_close_matches - - meta = "ACTION" # function is only invoked for listing ACTIONs - if len(sys.argv) == 1: - self.print_help() - - action = sys.argv[1] - - if not action in self.valid_actions: - print("[error] {0} not a valid {1}\n".format(action, meta), file=sys.stderr) - alt = get_close_matches(action, self.valid_actions) - print( - "Did you mean one of these?\n\t{0}\n".format(", ".join(alt)), - file=sys.stderr, - ) - self.print_help() - - globals[action](sys.argv[2:]) - - -class OptionParser(ArgumentParser): - """ - This class is a wrapper around argparse.ArgumentParser, with some added - features. - """ - - def __init__(self, doc: Optional[str]): - usage = doc.replace("%prog", "%(prog)s") if doc else None - super().__init__(usage=usage, epilog=JCVIHELP) - - def parse_args(self, args=None): - """ - Parse the command line arguments. - """ - dests = set() - ol = [] - for g in [self] + self._action_groups: - ol += g._actions - for o in ol: - if o.dest in dests: - continue - self.add_help_from_choices(o) - dests.add(o.dest) - - return self.parse_known_args(args) - - def add_help_from_choices(self, o): - if o.help == SUPPRESS: - return - - default_tag = "%(default)s" - assert o.help, "Option {0} do not have help string".format(o) - help_pf = o.help[:1].upper() + o.help[1:] - if "[" in help_pf: - help_pf = help_pf.rsplit("[", 1)[0] - help_pf = help_pf.strip() - - if o.type == "choice": - if o.default is None: - default_tag = "guess" - ctext = "|".join(natsorted(str(x) for x in o.choices)) - if len(ctext) > 100: - ctext = ctext[:100] + " ... " - choice_text = "must be one of {0}".format(ctext) - o.help = "{0}, {1} [default: {2}]".format(help_pf, choice_text, default_tag) - else: - o.help = help_pf - if o.default is None: - default_tag = "disabled" - if not set(o.option_strings) & set(("--help", "--version")): - o.help += " [default: {0}]".format(default_tag) - - def set_grid(self): - """ - Add --grid options for command line programs - """ - self.add_argument( - "--grid", - dest="grid", - default=False, - action="store_true", - help="Run on the grid", - ) - - def set_grid_opts(self, array: bool = False): - group = self.add_argument_group("Grid parameters") - group.add_argument( - "-l", - dest="queue", - help="Name of the queue", - ) - group.add_argument( - "-t", - dest="threaded", - default=None, - type=int, - help="Append '-pe threaded N'", - ) - if array: - group.add_argument( - "-c", - dest="concurrency", - type=int, - help="Append task concurrency limit '-tc N'", - ) - group.add_argument( - "-d", - dest="outdir", - default=".", - help="Specify directory to store grid output/error files", - ) - group.add_argument( - "-N", dest="name", default=None, help="Specify descriptive name for the job" - ) - group.add_argument( - "-H", dest="hold_jid", default=None, help="Define the job dependency list" - ) - - def set_table(self, sep=",", align=False): - group = self.add_argument_group("Table formatting") - group.add_argument("--sep", default=sep, help="Separator") - if align: - group.add_argument( - "--noalign", - dest="align", - default=True, - action="store_false", - help="Cell alignment", - ) - else: - group.add_argument( - "--align", default=False, action="store_true", help="Cell alignment" - ) - - def set_downloader(self, downloader=None): - """ - Add --downloader options for given command line program. - """ - from jcvi.utils.ez_setup import ALL_DOWNLOADERS - - downloader_choices = [x[0] for x in ALL_DOWNLOADERS] - self.add_argument( - "--downloader", - default=downloader, - choices=downloader_choices, - help="Use the specified downloader to retrieve resources", - ) - - def set_params(self, prog=None, params=""): - """ - Add --params options for given command line programs - """ - dest_prog = "to {0}".format(prog) if prog else "" - self.add_argument( - "--params", - dest="extra", - default=params, - help="Extra parameters to pass {0}".format(dest_prog) - + " (these WILL NOT be validated)", - ) - - def set_outfile(self, outfile: Optional[str] = "stdout"): - """ - Add --outfile options to print out to filename. - """ - self.add_argument("-o", "--outfile", default=outfile, help="Outfile name") - - def set_outdir(self, outdir: Optional[str] = "."): - self.add_argument("--outdir", default=outdir, help="Specify output directory") - - def set_email(self): - """ - Add --email option to specify an email address - """ - self.add_argument( - "--email", - default=get_email_address(), - help="Specify an email address", - ) - - def set_tmpdir(self, tmpdir=None): - """ - Add --temporary_directory option to specify unix `sort` tmpdir - """ - self.add_argument( - "-T", "--tmpdir", default=tmpdir, help="Use temp directory instead of $TMP" - ) - - def set_cpus(self, cpus=0): - """ - Add --cpus options to specify how many threads to use. - """ - from multiprocessing import cpu_count - - max_cpus = cpu_count() - if not 0 < cpus < max_cpus: - cpus = max_cpus - self.add_argument( - "--cpus", - default=cpus, - type=int, - help="Number of CPUs to use, 0=unlimited", - ) - - def set_db_opts(self, dbname="mta4", credentials=True): - """ - Add db connection specific attributes - """ - from jcvi.utils.db import valid_dbconn, get_profile - - self.add_argument( - "--db", - default=dbname, - dest="dbname", - help="Specify name of database to query", - ) - self.add_argument( - "--connector", - default="Sybase", - dest="dbconn", - choices=valid_dbconn.keys(), - help="Specify database connector", - ) - hostname, username, password = get_profile() - if credentials: - self.add_argument("--hostname", default=hostname, help="Specify hostname") - self.add_argument( - "--username", default=username, help="Username to connect to database" - ) - self.add_argument( - "--password", default=password, help="Password to connect to database" - ) - self.add_argument("--port", type=int, help="Specify port number") - - def set_aws_opts(self, store="hli-mv-data-science/htang"): - from jcvi.utils.aws import s3ify - - store = s3ify(store) - group = self.add_argument_group("AWS and Docker options") - # https://github.com/hlids/infrastructure/wiki/Docker-calling-convention - group.add_argument("--sample_id", help="Sample ID") - group.add_argument("--workflow_execution_id", help="Workflow execution ID") - group.add_argument("--input_bam_path", help="Input BAM location (s3 ok)") - group.add_argument("--output_path", default=store, help="Output s3 path") - group.add_argument("--workdir", default=os.getcwd(), help="Specify work dir") - group.add_argument( - "--nocleanup", - default=False, - action="store_true", - help="Don't clean up after done", - ) - - def set_stripnames(self, default=True): - if default: - self.add_argument( - "--no_strip_names", - dest="strip_names", - action="store_false", - default=True, - help="do not strip alternative splicing " - "(e.g. At5g06540.1 -> At5g06540)", - ) - else: - self.add_argument( - "--strip_names", - action="store_true", - default=False, - help="strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", - ) - - def set_fixchrnames(self, orgn="medicago"): - self.add_argument( - "--fixchrname", - default=orgn, - dest="fix_chr_name", - help="Fix quirky chromosome names", - ) - - def set_SO_opts(self): - verifySO_choices = ("verify", "resolve:prefix", "resolve:suffix") - self.add_argument( - "--verifySO", - choices=verifySO_choices, - help="Verify validity of GFF3 feature type against the SO; " - + "`resolve` will try to converge towards a valid SO " - + "term by removing elements from the feature type " - + "string by splitting at underscores. Example: " - + "`mRNA_TE_gene` resolves to `mRNA` using 'resolve:prefix'", - ) - - def set_beds(self): - self.add_argument("--qbed", help="Path to qbed") - self.add_argument("--sbed", help="Path to sbed") - - def set_histogram(self, vmin=0, vmax=None, bins=20, xlabel="value", title=None): - self.add_argument( - "--vmin", default=vmin, type=int, help="Minimum value, inclusive" - ) - self.add_argument( - "--vmax", default=vmax, type=int, help="Maximum value, inclusive" - ) - self.add_argument( - "--bins", - default=bins, - type=int, - help="Number of bins to plot in the histogram", - ) - self.add_argument("--xlabel", default=xlabel, help="Label on the X-axis") - self.add_argument("--title", default=title, help="Title of the plot") - - def set_sam_options(self, extra=True, bowtie=False): - self.add_argument( - "--sam", - dest="bam", - default=True, - action="store_false", - help="Write to SAM file instead of BAM", - ) - self.add_argument( - "--uniq", - default=False, - action="store_true", - help="Keep only uniquely mapped", - ) - if bowtie: - self.add_argument( - "--mapped", default=False, action="store_true", help="Keep mapped reads" - ) - self.add_argument( - "--unmapped", default=False, action="store_true", help="Keep unmapped reads" - ) - if extra: - self.set_cpus() - self.set_params() - - def set_mingap(self, default=100): - self.add_argument( - "--mingap", default=default, type=int, help="Minimum size of gaps" - ) - - def set_align( - self, - pctid=None, - hitlen=None, - pctcov=None, - evalue=None, - compreh_pctid=None, - compreh_pctcov=None, - intron=None, - bpsplice=None, - ): - if pctid is not None: - self.add_argument( - "--pctid", default=pctid, type=float, help="Sequence percent identity" - ) - if hitlen is not None: - self.add_argument( - "--hitlen", default=hitlen, type=int, help="Minimum overlap length" - ) - if pctcov is not None: - self.add_argument( - "--pctcov", - default=pctcov, - type=int, - help="Percentage coverage cutoff", - ) - if evalue is not None: - self.add_argument( - "--evalue", default=evalue, type=float, help="E-value cutoff" - ) - if compreh_pctid is not None: - self.add_argument( - "--compreh_pctid", - default=compreh_pctid, - type=int, - help="Sequence percent identity cutoff used to " - + "build PASA comprehensive transcriptome", - ) - if compreh_pctcov is not None: - self.add_argument( - "--compreh_pctcov", - default=compreh_pctcov, - type=int, - help="Percent coverage cutoff used to " - + "build PASA comprehensive transcriptome", - ) - if intron is not None: - self.add_argument( - "--intron", - default=intron, - type=int, - help="Maximum intron length used for mapping", - ) - if bpsplice is not None: - self.add_argument( - "--bpsplice", - default=bpsplice, - type=int, - help="Number of bp of perfect splice boundary", - ) - - def set_image_options( - self, - args=None, - figsize="6x6", - dpi=300, - format="pdf", - font="Helvetica", - style="darkgrid", - cmap="jet", - seed: Optional[int] = None, - ): - """ - Add image format options for given command line programs. - """ - from jcvi.graphics.base import ( - GRAPHIC_FORMATS, - ImageOptions, - is_tex_available, - setup_theme, - ) - - allowed_fonts = ( - "Helvetica", - "Liberation Sans", - "Palatino", - "Schoolbook", - "Arial", - ) - allowed_styles = ("darkgrid", "whitegrid", "dark", "white", "ticks") - allowed_diverge = ( - "BrBG", - "PiYG", - "PRGn", - "PuOr", - "RdBu", - "RdGy", - "RdYlBu", - "RdYlGn", - "Spectral", - ) - - group = self.add_argument_group("Image options") - group.add_argument( - "--figsize", default=figsize, help="Figure size `width`x`height` in inches" - ) - group.add_argument( - "--dpi", - default=dpi, - type=int, - help="Physical dot density (dots per inch)", - ) - group.add_argument( - "--format", - default=format, - choices=GRAPHIC_FORMATS, - help="Generate image of format", - ) - group.add_argument( - "--font", default=font, choices=allowed_fonts, help="Font name" - ) - group.add_argument( - "--style", default=style, choices=allowed_styles, help="Axes background" - ) - group.add_argument( - "--diverge", - default="PiYG", - choices=allowed_diverge, - help="Contrasting color scheme", - ) - group.add_argument("--cmap", default=cmap, help="Use this color map") - group.add_argument( - "--notex", default=False, action="store_true", help="Do not use tex" - ) - # https://github.com/tanghaibao/jcvi/issues/515#issuecomment-1327305211 - if ( - "--seed" not in self._option_string_actions - and "--seed" not in group._option_string_actions - ): - group.add_argument( - "--seed", - default=seed, - type=int, - help="Random seed when assigning colors (supported only for some plots)", - ) - - if args is None: - args = sys.argv[1:] - - opts, args = self.parse_args(args) - - assert opts.dpi > 0 - assert "x" in opts.figsize - - iopts = ImageOptions(opts) - - if opts.notex: - logger.info("--notex=%s. latex use is disabled.", opts.notex) - elif not is_tex_available(): - if not bool(which("latex")): - logger.info("`latex` not found. latex use is disabled.") - if not bool(which("lp")): - logger.info("`lp` not found. latex use is disabled.") - - setup_theme(style=opts.style, font=opts.font, usetex=iopts.usetex) - - return opts, args, iopts - - def set_dotplot_opts(self, theme: int = 2): - """ - Used in compara.catalog and graphics.dotplot - """ - from jcvi.graphics.base import set1 - - group = self.add_argument_group("Dot plot parameters") - group.add_argument( - "--skipempty", - default=False, - action="store_true", - help="Skip seqids that do not have matches", - ) - group.add_argument( - "--nochpf", - default=False, - action="store_true", - help="Do not change the contig name", - ) - group.add_argument( - "--nostdpf", - default=False, - action="store_true", - help="Do not standardize contig names", - ) - group.add_argument( - "--genomenames", - type=str, - default=None, - help="genome names for labeling axes in the form of qname_sname, " - 'eg. "*Vitis vinifera*_*Oryza sativa*"', - ) - group.add_argument( - "--theme", - choices=[str(x) for x in range(len(set1))], - default=str(theme), - help="Color index within the palette for contig grid boundaries. Palette contains: {}".format( - "|".join(set1) - ), - ) - return group - - def set_depth(self, depth=50): - self.add_argument("--depth", default=depth, type=float, help="Desired depth") - - def set_rclip(self, rclip=0): - self.add_argument( - "--rclip", - default=rclip, - type=int, - help="Pair ID is derived from rstrip N chars", - ) - - def set_chr(self, chr=",".join([str(x) for x in range(1, 23)] + ["X", "Y", "MT"])): - self.add_argument("--chr", default=chr, help="Chromosomes to process") - - def set_ref(self, ref="/mnt/ref"): - self.add_argument("--ref", default=ref, help="Reference folder") - - def set_cutoff(self, cutoff=0): - self.add_argument( - "--cutoff", - default=cutoff, - type=int, - help="Distance to call valid links between mates", - ) - - def set_mateorientation(self, mateorientation=None): - self.add_argument( - "--mateorientation", - default=mateorientation, - choices=("++", "--", "+-", "-+"), - help="Use only certain mate orientations", - ) - - def set_mates(self, rclip=0, cutoff=0, mateorientation=None): - self.set_rclip(rclip=rclip) - self.set_cutoff(cutoff=cutoff) - self.set_mateorientation(mateorientation=mateorientation) - - def set_bedpe(self): - self.add_argument( - "--norc", - dest="rc", - default=True, - action="store_false", - help="Do not reverse complement, expect innie reads", - ) - self.add_argument( - "--minlen", default=2000, type=int, help="Minimum insert size" - ) - self.add_argument( - "--maxlen", default=8000, type=int, help="Maximum insert size" - ) - self.add_argument( - "--dup", - default=10, - type=int, - help="Filter duplicates with coordinates within this distance", - ) - - def set_fastq_names(self): - self.add_argument( - "--names", - default="*.fq,*.fastq,*.fq.gz,*.fastq.gz", - help="File names to search, use comma to separate multiple", - ) - - def set_pairs(self): - """ - %prog pairs - - Report how many paired ends mapped, avg distance between paired ends, etc. - Paired reads must have the same prefix, use --rclip to remove trailing - part, e.g. /1, /2, or .f, .r, default behavior is to truncate until last - char. - """ - self.usage = self.set_pairs.__doc__ - - self.add_argument( - "--pairsfile", default=None, help="Write valid pairs to pairsfile" - ) - self.add_argument( - "--nrows", default=200000, type=int, help="Only use the first n lines" - ) - self.set_mates() - self.add_argument( - "--pdf", - default=False, - action="store_true", - help="Print PDF instead ASCII histogram", - ) - self.add_argument( - "--bins", default=20, type=int, help="Number of bins in the histogram" - ) - self.add_argument( - "--distmode", - default="ss", - choices=("ss", "ee"), - help="Distance mode between paired reads, ss is outer distance, " - "ee is inner distance", - ) - - def set_sep(self, sep="\t", help="Separator in the tabfile", multiple=False): - if multiple: - help += ", multiple values allowed" - self.add_argument("--sep", default=sep, help=help) - - def set_firstN(self, firstN=100000): - self.add_argument( - "--firstN", default=firstN, type=int, help="Use only the first N reads" - ) - - def set_tag(self, tag=False, specify_tag=False): - if not specify_tag: - self.add_argument( - "--tag", - default=tag, - action="store_true", - help="Add tag (/1, /2) to the read name", - ) - else: - tag_choices = ["/1", "/2"] - self.add_argument( - "--tag", - default=None, - choices=tag_choices, - help="Specify tag to be added to read name", - ) - - def set_phred(self, phred=None): - phdchoices = ("33", "64") - self.add_argument( - "--phred", - default=phred, - choices=phdchoices, - help="Phred score offset {0} [default: guess]".format(phdchoices), - ) - - def set_size(self, size=0): - self.add_argument( - "--size", - default=size, - type=int, - help="Insert mean size, stdev assumed to be 20% around mean", - ) - - def set_trinity_opts(self): - self.set_home("trinity") - self.set_home("hpcgridrunner") - self.set_cpus() - self.set_params(prog="Trinity") - topts = self.add_argument_group("General Trinity options") - topts.add_argument( - "--max_memory", - default="128G", - type=str, - help="Jellyfish memory allocation", - ) - topts.add_argument( - "--min_contig_length", - default=90, - type=int, - help="Minimum assembled contig length to report", - ) - topts.add_argument( - "--bflyGCThreads", - default=None, - type=int, - help="Threads for garbage collection", - ) - topts.add_argument( - "--grid_conf_file", - default="JCVI_SGE.0689.conf", - type=str, - help="HpcGridRunner config file for supported compute farms", - ) - topts.add_argument( - "--cleanup", - default=False, - action="store_true", - help="Force clean-up of unwanted files after Trinity run is complete", - ) - ggopts = self.add_argument_group("Genome-guided Trinity options") - ggopts.add_argument( - "--bam", - default=None, - type=str, - help="provide coord-sorted bam file as starting point", - ) - ggopts.add_argument( - "--max_intron", - default=15000, - type=int, - help="maximum allowed intron length", - ) - - def set_pasa_opts(self, action="assemble"): - self.set_home("pasa") - if action == "assemble": - self.set_home("tgi") - self.add_argument( - "--clean", - default=False, - action="store_true", - help="Clean transcripts using tgi seqclean", - ) - self.set_align(pctid=95, pctcov=90, intron=15000, bpsplice=3) - self.add_argument( - "--aligners", - default="blat,gmap", - help="Specify splice aligners to use for mapping", - ) - self.add_argument( - "--fl_accs", - default=None, - type=str, - help="File containing list of FL-cDNA accessions", - ) - self.set_cpus() - self.add_argument( - "--compreh", - default=False, - action="store_true", - help="Run comprehensive transcriptome assembly", - ) - self.set_align(compreh_pctid=95, compreh_pctcov=30) - self.add_argument( - "--prefix", - default="compreh_init_build", - type=str, - help="Prefix for compreh_trans output file names", - ) - elif action == "compare": - self.add_argument( - "--annots_gff3", - default=None, - type=str, - help="Reference annotation to load and compare against", - ) - genetic_code = [ - "universal", - "Euplotes", - "Tetrahymena", - "Candida", - "Acetabularia", - ] - self.add_argument( - "--genetic_code", - default="universal", - choices=genetic_code, - help="Choose translation table", - ) - self.add_argument( - "--pctovl", - default=50, - type=int, - help="Minimum pct overlap between gene and FL assembly", - ) - self.add_argument( - "--pct_coding", - default=50, - type=int, - help="Minimum pct of cDNA sequence to be protein coding", - ) - self.add_argument( - "--orf_size", - default=0, - type=int, - help="Minimum size of ORF encoded protein", - ) - self.add_argument( - "--utr_exons", default=2, type=int, help="Maximum number of UTR exons" - ) - self.add_argument( - "--pctlen_FL", - default=70, - type=int, - help="Minimum protein length for comparisons involving " - + "FL assemblies", - ) - self.add_argument( - "--pctlen_nonFL", - default=70, - type=int, - help="Minimum protein length for comparisons involving " - + "non-FL assemblies", - ) - self.add_argument( - "--pctid_prot", - default=70, - type=int, - help="Minimum pctid allowed for protein pairwise comparison", - ) - self.add_argument( - "--pct_aln", - default=70, - type=int, - help="Minimum pct of shorter protein length aligning to " - + "update protein or isoform", - ) - self.add_argument( - "--pctovl_gene", - default=80, - type=int, - help="Minimum pct overlap among genome span of the ORF of " - + "each overlapping gene to allow merging", - ) - self.add_argument( - "--stompovl", - default="", - action="store_true", - help="Ignore alignment results, only consider genome span of ORF", - ) - self.add_argument( - "--trust_FL", - default="", - action="store_true", - help="Trust FL-status of cDNA", - ) - - def set_annot_reformat_opts(self): - self.add_argument( - "--pad0", default=6, type=int, help="Pad gene identifiers with 0" - ) - self.add_argument("--prefix", default="Medtr", help="Genome prefix") - self.add_argument( - "--uc", - default=False, - action="store_true", - help="Toggle gene identifier upper case", - ) - - def set_home(self, prog, default=None): - tag = f"--{prog}_home" - if default is None: # Last attempt at guessing the path - try: - default = op.dirname(which(prog)) - except: - default = None - else: - default = op.expanduser(default) - help = f"Home directory for {prog.upper()}" - self.add_argument(tag, default=default, help=help) - - def set_aligner(self, aligner="bowtie"): - valid_aligners = ("bowtie", "bwa") - self.add_argument( - "--aligner", default=aligner, choices=valid_aligners, help="Use aligner" - ) - - def set_verbose(self, help="Print detailed reports"): - self.add_argument("--verbose", default=False, action="store_true", help=help) - - -def ConfigSectionMap(Config, section): - """ - Read a specific section from a ConfigParser() object and return - a dict of all key-value pairs in that section - """ - cfg = {} - options = Config.options(section) - for option in options: - try: - cfg[option] = Config.get(section, option) - if cfg[option] == -1: - logger.debug("Skip: %s", option) - except: - logger.error("Exception on %s", option) - cfg[option] = None - return cfg - - -def get_abs_path(link_name): - source = link_name - if op.islink(source): - source = os.readlink(source) - else: - source = op.basename(source) - - link_dir = op.dirname(link_name) - source = op.normpath(op.join(link_dir, source)) - source = op.abspath(source) - if source == link_name: - return source - else: - return get_abs_path(source) - - -datadir = get_abs_path(op.join(op.dirname(__file__), "../utils/data")) - - -def datafile(x: str, datadir: str = datadir): - """ - Return the full path to the data file in the data directory. - """ - return op.join(datadir, x) - - -def splitall(path): - allparts = [] - while True: - path, p1 = op.split(path) - if not p1: - break - allparts.append(p1) - allparts = allparts[::-1] - return allparts - - -def get_module_docstring(filepath): - """Get module-level docstring of Python module at filepath, e.g. 'path/to/file.py'.""" - co = compile(open(filepath).read(), filepath, "exec") - if co.co_consts and isinstance(co.co_consts[0], str): - docstring = co.co_consts[0] - else: - docstring = None - return docstring - - -def dmain(mainfile, type="action"): - cwd = op.dirname(mainfile) - pyscripts = ( - [x for x in glob(op.join(cwd, "*", "__main__.py"))] - if type == "module" - else glob(op.join(cwd, "*.py")) - ) - actions = [] - for ps in sorted(pyscripts): - action = ( - op.basename(op.dirname(ps)) - if type == "module" - else op.basename(ps).replace(".py", "") - ) - if action[0] == "_": # hidden namespace - continue - pd = get_module_docstring(ps) - action_help = ( - [ - x.rstrip(":.,\n") - for x in pd.splitlines(True) - if len(x.strip()) > 10 and x[0] != "%" - ][0] - if pd - else "no docstring found" - ) - actions.append((action, action_help)) - - a = ActionDispatcher(actions) - a.print_help() - - -def backup(filename): - bakname = filename + ".bak" - if op.exists(filename): - logger.debug("Backup `%s` to `%s`", filename, bakname) - sh("mv {0} {1}".format(filename, bakname)) - return bakname - - -def getusername(): - from getpass import getuser - - return getuser() - - -def getdomainname(): - from socket import getfqdn - - return ".".join(str(x) for x in getfqdn().split(".")[1:]) - - -def sh( - cmd, - grid=False, - infile=None, - outfile=None, - errfile=None, - append=False, - background=False, - threaded=None, - log=True, - grid_opts=None, - silent=False, - shell="/bin/bash", - check=False, - redirect_error=None, -): - """ - simple wrapper for system calls - """ - if not cmd: - return 1 - if silent: - outfile = errfile = "/dev/null" - if grid: - from jcvi.apps.grid import GridProcess - - pr = GridProcess( - cmd, - infile=infile, - outfile=outfile, - errfile=errfile, - threaded=threaded, - grid_opts=grid_opts, - ) - pr.start() - return pr.jobid - else: - if infile: - cat = "cat" - if infile.endswith(".gz"): - cat = "zcat" - cmd = "{0} {1} |".format(cat, infile) + cmd - if outfile and outfile not in ("-", "stdout"): - if outfile.endswith(".gz"): - cmd += " | gzip" - tag = ">" - if append: - tag = ">>" - cmd += " {0}{1}".format(tag, outfile) - if errfile: - if errfile == outfile: - errfile = "&1" - cmd += " 2>{0}".format(errfile) - if background: - cmd += " &" - - if log: - logger.debug(cmd) - - call_func = check_output if check else call - return call_func(cmd, shell=True, executable=shell, stderr=redirect_error) - - -def Popen(cmd, stdin=None, stdout=PIPE, debug=False, shell="/bin/bash"): - """ - Capture the cmd stdout output to a file handle. - """ - from subprocess import Popen as P - - if debug: - logger.debug(cmd) - # See: - proc = P(cmd, bufsize=1, stdin=stdin, stdout=stdout, shell=True, executable=shell) - return proc - - -def get_system_processor() -> Tuple[str, str]: - """ - Get the system and processor information. - """ - return platform.system(), platform.processor() - - -def is_macOS_arm() -> bool: - """ - Check if the system is macOS on ARM. - """ - system, processor = get_system_processor() - return system == "Darwin" and "arm" in processor - - -def setup_magick_home(): - """ - Set MAGICK_HOME for ImageMagick. - """ - if "MAGICK_HOME" not in os.environ: - if is_macOS_arm(): - magick_home = "/opt/homebrew/opt/imagemagick" - if op.isdir(magick_home): - os.environ["MAGICK_HOME"] = magick_home - else: - logger.warning("MAGICK_HOME not set") - - -def popen(cmd, debug=True, shell="/bin/bash"): - return Popen(cmd, debug=debug, shell=shell).stdout - - -def is_exe(fpath): - return op.isfile(fpath) and os.access(fpath, os.X_OK) - - -def which(program): - """ - Emulates the unix which command. - - >>> which("cat") - "/bin/cat" - >>> which("nosuchprogram") - """ - fpath, _ = op.split(program) - if fpath: - if is_exe(program): - return program - else: - for path in os.environ["PATH"].split(os.pathsep): - exe_file = op.join(path, program) - if is_exe(exe_file): - return exe_file - - return None - - -def glob(pathname, pattern=None): - """ - Wraps around glob.glob(), but return a sorted list. - """ - import glob as gl - - if pattern: - pathname = op.join(pathname, pattern) - return natsorted(gl.glob(pathname)) - - -def iglob(pathname, patterns): - """ - Allow multiple file formats. This is also recursive. For example: - - >>> iglob("apps", "*.py,*.pyc") - """ - matches = [] - patterns = patterns.split(",") if "," in patterns else listify(patterns) - for root, dirnames, filenames in os.walk(pathname): - matching = [] - for pattern in patterns: - matching.extend(fnmatch.filter(filenames, pattern)) - for filename in matching: - matches.append(op.join(root, filename)) - return natsorted(matches) - - -def symlink(target, link_name): - try: - os.symlink(target, link_name) - except OSError as e: - if e.errno == errno.EEXIST: - os.remove(link_name) - os.symlink(target, link_name) - - -def mkdir(dirname, overwrite=False): - """ - Wraps around os.mkdir(), but checks for existence first. - """ - if op.isdir(dirname): - if overwrite: - cleanup(dirname) - os.mkdir(dirname) - logger.debug("Overwrite folder `%s`", dirname) - else: - return False # Nothing is changed - else: - try: - os.mkdir(dirname) - except: - os.makedirs(dirname) - logger.debug("`%s` not found. Creating new.", dirname) - - return True - - -def is_newer_file(a, b): - """ - Check if the file a is newer than file b - """ - if not (op.exists(a) and op.exists(b)): - return False - am = os.stat(a).st_mtime - bm = os.stat(b).st_mtime - return am > bm - - -def parse_multi_values(param): - values = None - if param: - if op.isfile(param): - values = list(set(x.strip() for x in open(param))) - else: - values = list(set(param.split(","))) - return values - - -def listify(a: TextCollection) -> TextCollection: - """ - Convert something to a list if it is not already a list. - """ - return a if isinstance(a, (list, tuple)) else [a] # type: ignore - - -def last_updated(a: str) -> float: - """ - Check the time since file was last updated. - """ - return time.time() - op.getmtime(a) - - -def need_update(a: TextCollection, b: TextCollection, warn: bool = False) -> bool: - """ - Check if file a is newer than file b and decide whether or not to update - file b. Can generalize to two lists. - - Args: - a: file or list of files - b: file or list of files - warn: whether or not to print warning message - - Returns: - True if file a is newer than file b - """ - a = listify(a) - b = listify(b) - - should_update = ( - any((not op.exists(x)) for x in b) - or all((os.stat(x).st_size == 0 for x in b)) - or any(is_newer_file(x, y) for x in a for y in b) - ) - if (not should_update) and warn: - logger.debug("File `%s` found. Computation skipped.", ", ".join(b)) - return should_update - - -def flatten(input_list: Iterable) -> list: - """ - Flatten a list of lists and stop at the first non-list element. - """ - ans = [] - for i in input_list: - if isinstance(i, Iterable) and not isinstance(i, str): - for subc in flatten(i): - ans.append(subc) - else: - ans.append(i) - return ans - - -def cleanup(*args: Union[str, Iterable]) -> None: - """ - Remove a bunch of files in args; ignore if not found. - """ - for path in flatten(args): - if op.exists(path): - if op.isdir(path): - shutil.rmtree(path) - else: - os.remove(path) - - -def get_today(): - """ - Returns the date in 2010-07-14 format - """ - from datetime import date - - return str(date.today()) - - -def ls_ftp(dir): - """List the contents of a remote FTP server path. - - Args: - dir (URL): URL of a remote FTP server path. - - Returns: - [str]: List of remote paths available, analogous to `ls`. - """ - from urllib.parse import urlparse - from ftpretty import ftpretty - - o = urlparse(dir) - - ftp = ftpretty(o.netloc, "anonymous", "anonymous@") - return [op.basename(x) for x in ftp.list(o.path)] - - -def download( - url, filename=None, debug=True, cookies=None, handle_gzip=False, downloader=None -): - """Download URL to local - - Args: - url (str): Link to the file on the internet. - filename (str, optional): Local file name. Defaults to None. - debug (bool, optional): Print debug messages. Defaults to True. - cookies (str, optional): cookies file. Defaults to None. - handle_gzip (bool, optional): Postprocess .gz files, either compress or - uncompress. Defaults to False. - downloader (str, optional): Use a given downloader. One of wget|curl|powershell|insecure. - Defaults to None. - - Returns: - str: Local file name. - """ - from urllib.parse import urlsplit - - _, _, path, _, _ = urlsplit(url) - basepath = op.basename(path) - if basepath: - url_gzipped = basepath.endswith(".gz") - filename_gzipped = filename and filename.endswith(".gz") - need_gunzip = url_gzipped and (not filename_gzipped) - need_gzip = (not url_gzipped) and filename_gzipped - if handle_gzip and ( - need_gunzip or need_gzip - ): # One more compress/decompress step after download - target = basepath - else: # Just download - target = filename or basepath - else: - need_gunzip, need_gzip = False, False - target = filename or "index.html" - - success = False - final_filename = filename or target - if op.exists(final_filename): - if debug: - logger.info("File `%s` exists. Download skipped.", final_filename) - success = True - else: - from jcvi.utils.ez_setup import get_best_downloader - - downloader = get_best_downloader(downloader=downloader) - if downloader: - try: - downloader(url, target, cookies=cookies) - success = True - except (CalledProcessError, KeyboardInterrupt) as e: - print(e, file=sys.stderr) - else: - print("Cannot find a suitable downloader", file=sys.stderr) - - if success and handle_gzip: - if need_gunzip: - sh("gzip -dc {}".format(target), outfile=filename) - cleanup(target) - elif need_gzip: - sh("gzip -c {}".format(target), outfile=filename) - cleanup(target) - - if not success: - cleanup(target) - - return final_filename - - -def getfilesize(filename, ratio=None): - rawsize = op.getsize(filename) - if not filename.endswith(".gz"): - return rawsize - - import struct - - fo = open(filename, "rb") - fo.seek(-4, 2) - r = fo.read() - fo.close() - size = struct.unpack(" 2**32: - logger.warning("Gzip file estimated uncompressed size: %d", size) - - return size - - -def main(): - actions = ( - ("expand", "move files in subfolders into the current folder"), - ("less", "enhance the unix `less` command"), - ("mdownload", "multiple download a list of files"), - ("mergecsv", "merge a set of tsv files"), - ("notify", "send an email/push notification"), - ("timestamp", "record timestamps for all files in the current folder"), - ("touch", "recover timestamps for files in the current folder"), - ("waitpid", "wait for a PID to finish and then perform desired action"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def mdownload(args): - """ - %prog mdownload links.txt - - Multiple download a list of files. Use formats.html.links() to extract the - links file. - """ - from jcvi.apps.grid import Jobs - - p = OptionParser(mdownload.__doc__) - _, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (linksfile,) = args - links = [(x.strip(),) for x in open(linksfile)] - j = Jobs(download, links) - j.run() - - -def expand(args): - """ - %prog expand */* - - Move files in subfolders into the current folder. Use --symlink to create a - link instead. - """ - p = OptionParser(expand.__doc__) - p.add_argument( - "--symlink", default=False, action="store_true", help="Create symbolic link" - ) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - seen = set() - for a in args: - oa = a.replace("/", "_") - if oa in seen: - logger.debug("Name collision `%s`, ignored", oa) - continue - - cmd = "cp -s" if opts.symlink else "mv" - cmd += " {0} {1}".format(a, oa) - sh(cmd) - seen.add(oa) - - -def fname(): - return sys._getframe().f_back.f_code.co_name - - -def get_times(filename): - st = os.stat(filename) - atime = st.st_atime - mtime = st.st_mtime - return atime, mtime - - -def timestamp(args): - """ - %prog timestamp path > timestamp.info - - Record the timestamps for all files in the current folder. - filename atime mtime - - This file can be used later to recover previous timestamps through touch(). - """ - p = OptionParser(timestamp.__doc__) - _, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (path,) = args - for root, _, files in os.walk(path): - for f in files: - filename = op.join(root, f) - atime, mtime = get_times(filename) - print(filename, atime, mtime) - - -def touch(args): - """ - %prog touch timestamp.info - - Recover timestamps for files in the current folder. - CAUTION: you must execute this in the same directory as timestamp(). - """ - p = OptionParser(touch.__doc__) - _, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (info,) = args - fp = open(info) - for row in fp: - path, atime, mtime = row.split() - atime = float(atime) - mtime = float(mtime) - current_atime, current_mtime = get_times(path) - - # Check if the time has changed, with resolution up to 1 sec - if int(atime) == int(current_atime) and int(mtime) == int(current_mtime): - continue - - times = [ctime(x) for x in (current_atime, current_mtime, atime, mtime)] - msg = "{0} : ".format(path) - msg += "({0}, {1}) => ({2}, {3})".format(*times) - print(msg, file=sys.stderr) - os.utime(path, (atime, mtime)) - - -def snapshot(fp, p, fsize, counts=None): - pos = int(p * fsize) - print("==>> File `{0}`: {1} ({2}%)".format(fp.name, pos, int(p * 100))) - fp.seek(pos) - next(fp) - for i, row in enumerate(fp): - if counts and i > counts: - break - try: - sys.stdout.write(row) - except IOError: - break - - -def less(args): - """ - %prog less filename position | less - - Enhance the unix `less` command by seeking to a file location first. This is - useful to browse big files. Position is relative 0.00 - 1.00, or bytenumber. - - $ %prog less myfile 0.1 # Go to 10% of the current file and streaming - $ %prog less myfile 0.1,0.2 # Stream at several positions - $ %prog less myfile 100 # Go to certain byte number and streaming - $ %prog less myfile 100,200 # Stream at several positions - $ %prog less myfile all # Generate a snapshot every 10% (10%, 20%, ..) - """ - from jcvi.formats.base import must_open - - p = OptionParser(less.__doc__) - _, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - filename, pos = args - fsize = getfilesize(filename) - - if pos == "all": - pos = [x / 10.0 for x in range(0, 10)] - else: - pos = [float(x) for x in pos.split(",")] - - if pos[0] > 1: - pos = [x / fsize for x in pos] - - if len(pos) > 1: - counts = 20 - else: - counts = None - - fp = must_open(filename) - for p in pos: - snapshot(fp, p, fsize, counts=counts) - - -# notification specific variables -valid_notif_methods = ["email"] -available_push_api = {"push": ["pushover", "nma", "pushbullet"]} - - -def pushover( - message, token, user, title="JCVI: Job Monitor", priority=0, timestamp=None -): - """ - pushover.net python API - - - """ - assert -1 <= priority <= 2, "Priority should be an int() between -1 and 2" - - if timestamp is None: - from time import time - - timestamp = int(time()) - - retry, expire = (300, 3600) if priority == 2 else (None, None) - - conn = HTTPSConnection("api.pushover.net:443") - conn.request( - "POST", - "/1/messages.json", - urlencode( - { - "token": token, - "user": user, - "message": message, - "title": title, - "priority": priority, - "timestamp": timestamp, - "retry": retry, - "expire": expire, - } - ), - {"Content-type": "application/x-www-form-urlencoded"}, - ) - conn.getresponse() - - -def nma(description, apikey, event="JCVI: Job Monitor", priority=0): - """ - notifymyandroid.com API - - - """ - assert -2 <= priority <= 2, "Priority should be an int() between -2 and 2" - - conn = HTTPSConnection("www.notifymyandroid.com") - conn.request( - "POST", - "/publicapi/notify", - urlencode( - { - "apikey": apikey, - "application": "python notify", - "event": event, - "description": description, - "priority": priority, - } - ), - {"Content-type": "application/x-www-form-urlencoded"}, - ) - conn.getresponse() - - -def pushbullet(body, apikey, device, title="JCVI: Job Monitor"): - """ - pushbullet.com API - - - """ - import base64 - - headers = {} - auth = base64.encodestring("{0}:".format(apikey).encode("utf-8")).strip() - headers["Authorization"] = "Basic {0}".format(auth) - headers["Content-type"] = "application/x-www-form-urlencoded" - - conn = HTTPSConnection("api.pushbullet.com".format(apikey)) - conn.request( - "POST", - "/api/pushes", - urlencode({"iden": device, "type": "note", "title": title, "body": body}), - headers, - ) - conn.getresponse() - - -def pushnotify(subject, message, api="pushover", priority=0, timestamp=None): - """ - Send push notifications using pre-existing APIs - - Requires a config `pushnotify.ini` file in the user home area containing - the necessary api tokens and user keys. - - Default API: "pushover" - - Config file format: - ------------------- - [pushover] - token: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx - user: yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy - - [nma] - apikey: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz - - [pushbullet] - apikey: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb - iden: dddddddddddddddddddddddddddddddddddd - """ - assert ( - type(priority) is int and -1 <= priority <= 2 - ), "Priority should be and int() between -1 and 2" - - cfgfile = op.join(op.expanduser("~"), "pushnotify.ini") - Config = ConfigParser() - if op.exists(cfgfile): - Config.read(cfgfile) - else: - sys.exit( - "Push notification config file `{0}`".format(cfgfile) + " does not exist!" - ) - - if api == "pushover": - cfg = ConfigSectionMap(Config, api) - token, key = cfg["token"], cfg["user"] - pushover( - message, token, key, title=subject, priority=priority, timestamp=timestamp - ) - elif api == "nma": - cfg = ConfigSectionMap(Config, api) - apikey = cfg["apikey"] - nma(message, apikey, event=subject, priority=priority) - elif api == "pushbullet": - cfg = ConfigSectionMap(Config, api) - apikey, iden = cfg["apikey"], cfg["iden"] - pushbullet(message, apikey, iden, title=subject, type="note") - - -def send_email(fromaddr, toaddr, subject, message): - """ - Send an email message - """ - from smtplib import SMTP - from email.mime.text import MIMEText - - SERVER = "localhost" - _message = MIMEText(message) - _message["Subject"] = subject - _message["From"] = fromaddr - _message["To"] = ", ".join(toaddr) - - server = SMTP(SERVER) - server.sendmail(fromaddr, toaddr, _message.as_string()) - server.quit() - - -def get_email_address(whoami="user"): - """Auto-generate the FROM and TO email address""" - if whoami == "user": - username = getusername() - domain = getdomainname() - - myemail = "{0}@{1}".format(username, domain) - return myemail - else: - fromaddr = "notifier-donotreply@{0}".format(getdomainname()) - return fromaddr - - -def is_valid_email(email): - """ - RFC822 Email Address Regex - -------------------------- - - Originally written by Cal Henderson - c.f. http://iamcal.com/publish/articles/php/parsing_email/ - - Translated to Python by Tim Fletcher, with changes suggested by Dan Kubb. - - Licensed under a Creative Commons Attribution-ShareAlike 2.5 License - http://creativecommons.org/licenses/by-sa/2.5/ - """ - import re - - qtext = "[^\\x0d\\x22\\x5c\\x80-\\xff]" - dtext = "[^\\x0d\\x5b-\\x5d\\x80-\\xff]" - atom = "[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+" - quoted_pair = "\\x5c[\\x00-\\x7f]" - domain_literal = "\\x5b(?:%s|%s)*\\x5d" % (dtext, quoted_pair) - quoted_string = "\\x22(?:%s|%s)*\\x22" % (qtext, quoted_pair) - domain_ref = atom - sub_domain = "(?:%s|%s)" % (domain_ref, domain_literal) - word = "(?:%s|%s)" % (atom, quoted_string) - domain = "%s(?:\\x2e%s)*" % (sub_domain, sub_domain) - local_part = "%s(?:\\x2e%s)*" % (word, word) - addr_spec = "%s\\x40%s" % (local_part, domain) - - email_address = re.compile(r"\A%s\Z" % addr_spec) - if email_address.match(email): - return True - return False - - -def notify(args): - """ - %prog notify "Message to be sent" - - Send a message via email/push notification. - - Email notify: Recipient email address is constructed by joining the login `username` - and `dnsdomainname` of the server - - Push notify: Uses available API - """ - valid_notif_methods.extend(available_push_api.keys()) - - fromaddr = get_email_address(whoami="notifier") - - p = OptionParser(notify.__doc__) - p.add_argument( - "--method", - default="email", - choices=valid_notif_methods, - help="Specify the mode of notification", - ) - p.add_argument( - "--subject", - default="JCVI: job monitor", - help="Specify the subject of the notification message", - ) - p.set_email() - - g1 = p.add_argument_group("Optional `push` parameters") - g1.add_argument( - "--api", - default="pushover", - choices=flatten(available_push_api.values()), - help="Specify API used to send the push notification", - ) - g1.add_argument( - "--priority", default=0, type=int, help="Message priority (-1 <= p <= 2)" - ) - g1.add_argument( - "--timestamp", - default=None, - type=int, - dest="timestamp", - help="Message timestamp in unix format", - ) - - opts, args = p.parse_args(args) - - if len(args) == 0: - logger.error("Please provide a brief message to be sent") - sys.exit(not p.print_help()) - - subject = opts.subject - message = " ".join(args).strip() - - if opts.method == "email": - toaddr = opts.email.split(",") # TO address should be in a list - for addr in toaddr: - if not is_valid_email(addr): - logger.debug("Email address `%s` is not valid!", addr) - sys.exit() - send_email(fromaddr, toaddr, subject, message) - else: - pushnotify( - subject, - message, - api=opts.api, - priority=opts.priority, - timestamp=opts.timestamp, - ) - - -def pid_exists(pid): - """Check whether pid exists in the current process table.""" - if pid < 0: - return False - - try: - os.kill(pid, 0) - except OSError as e: - return e.errno == errno.EPERM - else: - return True - - -class TimeoutExpired(Exception): - pass - - -def _waitpid(pid, interval=None, timeout=None): - """ - Wait for process with pid 'pid' to terminate and return its - exit status code as an integer. - - If pid is not a children of os.getpid() (current process) just - waits until the process disappears and return None. - - If pid does not exist at all return None immediately. - - Raise TimeoutExpired on timeout expired (if specified). - - Source: http://code.activestate.com/recipes/578022-wait-for-pid-and-check-for-pid-existance-posix - """ - - def check_timeout(delay): - if timeout is not None: - if time.time() >= stop_at: - raise TimeoutExpired - time.sleep(delay) - return min(delay * 2, interval) - - if timeout is not None: - waitcall = lambda: os.waitpid(pid, os.WNOHANG) - stop_at = time.time() + timeout - else: - waitcall = lambda: os.waitpid(pid, 0) - - delay = 0.0001 - while 1: - try: - retpid, status = waitcall() - except OSError as err: - if err.errno == errno.EINTR: - delay = check_timeout(delay) - continue - elif err.errno == errno.ECHILD: - # This has two meanings: - # - pid is not a child of os.getpid() in which case - # we keep polling until it's gone - # - pid never existed in the first place - # In both cases we'll eventually return None as we - # can't determine its exit status code. - while 1: - if pid_exists(pid): - delay = check_timeout(delay) - else: - return - else: - raise - else: - if retpid == 0: - # WNOHANG was used, pid is still running - delay = check_timeout(delay) - continue - - # process exited due to a signal; return the integer of - # that signal - if os.WIFSIGNALED(status): - return os.WTERMSIG(status) - # process exited using exit(2) system call; return the - # integer exit(2) system call has been called with - elif os.WIFEXITED(status): - return os.WEXITSTATUS(status) - else: - # should never happen - raise RuntimeError("unknown process exit status") - - -def waitpid(args): - """ - %prog waitpid PID ::: "./command_to_run param1 param2 ...." - - Given a PID, this script will wait for the PID to finish running and - then perform a desired action (notify user and/or execute a new command) - - Specify "--notify=METHOD` to send the user a notification after waiting for PID - Specify `--grid` option to send the new process to the grid after waiting for PID - """ - import shlex - - valid_notif_methods.extend(flatten(available_push_api.values())) - - p = OptionParser(waitpid.__doc__) - p.add_argument( - "--notify", - default="email", - choices=valid_notif_methods, - help="Specify type of notification to be sent after waiting", - ) - p.add_argument( - "--interval", - default=120, - type=int, - help="Specify PID polling interval in seconds", - ) - p.add_argument("--message", help="Specify notification message") - p.set_email() - p.set_grid() - opts, args = p.parse_args(args) - - if len(args) == 0: - sys.exit(not p.print_help()) - - sep = ":::" - cmd = None - if sep in args: - sepidx = args.index(sep) - cmd = " ".join(args[sepidx + 1 :]).strip() - args = args[:sepidx] - - pid = int(" ".join(args).strip()) - - status = pid_exists(pid) - if status: - if opts.message: - msg = opts.message - else: - get_origcmd = "ps -p {0} -o cmd h".format(pid) - msg = check_output(shlex.split(get_origcmd)).strip() - _waitpid(pid, interval=opts.interval) - else: - logger.debug("Process with PID %d does not exist", pid) - sys.exit() - - if opts.notify: - notifycmd = ["[{0}] `{1}`".format(gethostname(), msg)] - if opts.notify != "email": - notifycmd.append("--method={0}".format("push")) - notifycmd.append("--api={0}".format(opts.notify)) - else: - notifycmd.append("--email={0}".format(opts.email)) - notify(notifycmd) - - if cmd is not None: - bg = False if opts.grid else True - sh(cmd, grid=opts.grid, background=bg) - - -def get_config(path): - config = RawConfigParser() - try: - config.read(path) - except ParsingError: - e = sys.exc_info()[1] - logger.error( - "There was a problem reading or parsing your credentials file: %s", - e.args[0], - ) - return config - - -def getpath( - cmd: str, - name: Optional[str] = None, - url: Optional[str] = None, - cfg: str = "~/.jcvirc", - warn: str = "exit", -) -> Optional[str]: - """ - Get install locations of common binaries - First, check ~/.jcvirc file to get the full path - If not present, ask on the console and store - """ - p = which(cmd) # if in PATH, just returns it - if p: - return p - - PATH = "Path" - config = RawConfigParser() - cfg = op.expanduser(cfg) - changed = False - if op.exists(cfg): - config.read(cfg) - - assert name is not None, "Need a program name" - - try: - fullpath = config.get(PATH, name) - except NoSectionError: - config.add_section(PATH) - - try: - fullpath = config.get(PATH, name) - except NoOptionError: - msg = f"=== Configure path for {name} ===\n" - if url: - msg += f"URL: {url}\n" - msg += f"[Directory that contains `{cmd}`]: " - fullpath = input(msg).strip() - - path = op.join(op.expanduser(fullpath), cmd) - if is_exe(path): - config.set(PATH, name, fullpath) - changed = True - else: - err_msg = f"Cannot execute binary `{path}`. Please verify and rerun." - if warn == "exit": - logger.fatal(err_msg) - else: - logger.warning(err_msg) - return None - - if changed: - configfile = open(cfg, "w") - config.write(configfile) - configfile.close() - logger.debug("Configuration written to `%s`", cfg) - - return path - - -def inspect(object): - """A better dir() showing attributes and values""" - for k in dir(object): - try: - details = getattr(object, k) - except Exception as e: - details = e - - try: - details = str(details) - except Exception as e: - details = e - - print("{}: {}".format(k, details), file=sys.stderr) - - -def sample_N(a: Collection, N: int, seed: Optional[int] = None) -> List: - """ - When size of N is > size of a, random.sample() will emit an error: - ValueError: sample larger than population - - This method handles such restrictions by repeatedly sampling when that - happens. Guaranteed to cover all items if N is > size of a. - - Examples: - >>> sample_N([1, 2, 3], 2, seed=666) - [2, 3] - >>> sample_N([1, 2, 3], 3, seed=666) - [2, 3, 1] - >>> sample_N([1, 2, 3], 4, seed=666) - [2, 3, 1, 2] - """ - import random - - random.seed(seed) - - ret = [] - while N > len(a): - ret += random.sample(a, len(a)) - N -= len(a) - - return ret + random.sample(a, N) - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/biomart.py b/jcvi/apps/biomart.py deleted file mode 100644 index d5f9dcac..00000000 --- a/jcvi/apps/biomart.py +++ /dev/null @@ -1,426 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Builds the queries for Globus and BioMart servie, usefu for extraction of -phytozome data sets. Certain portion of the codes are ported from R package -`biomaRt` (thanks). -""" -import sys -import urllib - -from urllib.parse import urljoin -from xml.etree.ElementTree import ElementTree, Element, SubElement, tostring - -from .base import ActionDispatcher, OptionParser, download, logger - - -class GlobusXMLParser(ElementTree): - def __init__(self, xml_file): - """Parse an Globus directory listing XML file - - Args: - xml_file (str): Path to the XML file - """ - with open(xml_file) as fp: - self.parse(fp) - - def get_genomes(self): - """ - Only folders containing `assembly` and `annotation` are of interest. - """ - root = PhytozomePath(next(self.iter(tag="organismDownloads"))) - genomes = {} - for child in root.values(): - if child.has_genome_release: - genomes[child.name] = child - - # early_release - early_release = root.get("early_release") - if early_release: - for child in early_release.values(): - if child.has_genome_release: - genomes[child.name] = child - - return genomes - - -class PhytozomePath(dict): - TAGS_OF_INTEREST = ("organismDownloads", "folder", "file") - - def __init__(self, element): - """Deserialize XML => dict-like structure to ease navigation - between folders. Keys are folder or file names. - - Args: - element (ElementTree): XML parse tree - """ - tag = element.tag - assert tag in self.TAGS_OF_INTEREST - self.url = None - if tag == "file": - self.name = element.attrib["filename"] - self.url = element.attrib["url"] - else: - self.name = element.attrib["name"] - self.tag = tag - for child in list(element): - if child.tag not in self.TAGS_OF_INTEREST: - continue - child = PhytozomePath(child) - self[child.name] = child - - @property - def has_genome_release(self): - """Only the folders that contain both `assembly` and `annotation` are of interest here.""" - return "assembly" in self and "annotation" in self - - def download(self, name, base_url, cookies, downloader=None): - """Download the file if it has an URL. Otherwise, this will recursively search the children. - - See also: - - - Args: - name (str, optional): Name of the file. Defaults to None. - base_url (str): Link to the file on the internet. - cookies (str, optional): cookies file. Defaults to None. - downloader (str, optional): Use a given downloader. One of wget|curl|powershell|insecure. - Defaults to None. - """ - if self.name == name and base_url and self.url: - url = urljoin(base_url, self.url) - download( - url, filename=name, debug=True, cookies=cookies, downloader=downloader - ) - else: - for child_name, child in self.items(): - if child_name == name: - child.download(name, base_url, cookies, downloader=downloader) - return name - - def __repr__(self): - return "{}: [{}]".format(self.name, ", ".join(repr(v) for v in self)) - - -class MartXMLParser(ElementTree): - def __init__(self, xml_data): - self.parse(xml_data) - - def parse_marts(self): - for t in self.getiterator("MartURLLocation"): - if t.attrib["visible"] == "1": - yield Mart(**t.attrib) - - def parse_configuration(self): - # the attributes - for t in self.getiterator("AttributeDescription"): - yield Attribute(**t.attrib) - - # the filters - for t in self.getiterator("FilterDescription"): - f = Filter(**t.attrib) - options = [Option(**x.attrib) for x in t.getiterator("Option")] - f.add_arguments(options) - yield f - - -class Mart(dict): - def __init__( - self, - host="www.biomart.org", - path="/biomart/martservice", - port="80", - name="ensembl", - virtual_schema="default", - **attrib - ): - - self.__dict__ = attrib.copy() - self.__dict__.update( - x for x in locals().items() if x[0] not in ("self", "attrib") - ) - - self.registry = {} - self.url = "http://{0}:{1}{2}".format(self.host, self.port, path) - self.display_name = self.__dict__.get("displayName", "") - self.virtual_schema = self.__dict__.get( - "serverVirtualSchema", self.virtual_schema - ) - - def __str__(self): - return "\t".join((self.name, self.display_name, self.virtual_schema)) - - def get_registry(self, archive=False): - type = "registry_archive" if archive else "registry" - params = urllib.urlencode(dict(type=type)) - xml_data = urllib.urlopen(self.url, params) - - parser = MartXMLParser(xml_data) - for t in parser.parse_marts(): - self.registry[t.name] = t - - def list_registry(self): - if len(self.registry) == 0: - self.get_registry() - for m in sorted(self.registry.values()): - print(m) - - def get_datasets(self): - params = urllib.urlencode(dict(type="datasets", mart=self.name)) - web_data = urllib.urlopen(self.url, params) - - for row in web_data: - atoms = row.strip().split("\t") - if atoms[0] == "TableSet": - name, description, last_updated = atoms[1], atoms[2], atoms[-1] - self[name] = Dataset(name, description, last_updated, self) - - def list_datasets(self): - if len(self) == 0: - self.get_datasets() - for m in sorted(self.values(), key=str): - print(m) - - -class Dataset(object): - """ - Connect to a specified dataset in the database - """ - - def __init__(self, name, description, last_updated, mart): - self.name = name - self.description = description - self.last_updated = last_updated - self.mart = mart - - self.attributes = {} - self.filters = {} - - def __str__(self): - return "\t".join((self.name, self.description, self.last_updated)) - - def get_configuration(self): - params = urllib.urlencode(dict(type="configuration", dataset=self.name)) - xml_data = urllib.urlopen(self.mart.url, params) - - parser = MartXMLParser(xml_data) - for t in parser.parse_configuration(): - if isinstance(t, Attribute): - self.attributes[t.internalName] = t - elif isinstance(t, Filter): - self.filters[t.internalName] = t - - def list_attributes(self): - if len(self.attributes) == 0: - self.get_configuration() - for m in sorted(self.attributes.values()): - print(m) - - def list_filters(self): - if len(self.filters) == 0: - self.get_configuration() - for m in sorted(self.filters.values()): - print(m) - - def query(self, filters={}, attributes=()): - q = MartQuery(dataset=self) - q.add_filters(**filters) - q.add_attributes(attributes) - return q.execute() - - -class MartQuery(object): - def __init__( - self, dataset=None, formatter="TSV", header="0", unique_rows="0", count="0" - ): - self.dataset = dataset - self.url = dataset.mart.url - self.virtual_schema = dataset.mart.virtual_schema - self.formatter = formatter - self.header = header - self.unique_rows = unique_rows - self.count = count - self.name = dataset.name - self.attributes = [] - self.filters = {} - - def add_filters(self, **filters): - for key, val in filters.items(): - self.filters[key] = str(val) - - def add_attributes(self, attributes): - for key in attributes: - self.attributes.append(key) - - def set_header(self, flag): - self.header = str(flag) - - def set_formatter(self, format="TSV"): - self.formatter = format - - def build_query(self): - query_t = Element( - "Query", - dict( - virtualSchemaName=self.virtual_schema, - formatter=self.formatter, - header=self.header, - uniqueRows=self.unique_rows, - count=self.count, - datasetConfigVersion="0.6", - ), - ) - dataset_t = SubElement( - query_t, "Dataset", dict(name=self.name, interface="default") - ) - for key, val in self.filters.items(): - SubElement(dataset_t, "Filter", dict(name=key, value=val)) - for attribute in self.attributes: - SubElement(dataset_t, "Attribute", dict(name=attribute)) - - return tostring(query_t) - - def execute(self, debug=False): - xml_data = self.build_query() - if debug: - print(xml_data, file=sys.stderr) - data = urllib.urlencode(dict(query=xml_data)) - return urllib.urlopen(self.url, data) - - -class MartArgument(object): - def __init__(self, **attrib): - self.__dict__ = attrib.copy() - - def __str__(self): - return self.__class__.__name__ + str(self.__dict__) - - -class Attribute(MartArgument): - """ - Attributes define the values that we are retrieving. - - For example, the gene start, stop, or chromosomes it belongs to - """ - - pass - - -class Filter(MartArgument): - """ - Filters define a restriction on the query. - - For example, you can restrict output to all genes located on chr. 1 - then use the filter chromosome_name with value `1` - """ - - def add_arguments(self, options): - self.options = dict((x.displayName, x) for x in options) - - -class Option(MartArgument): - pass - - -class Sequence(object): - def __init__(self, seq): - self.seq = seq - - def export_fasta(self): - pass - - -def test_biomart(): - bm = Mart() - bm.list_registry() - bm.list_datasets() - return bm - - -def get_ensembl_dataset(): - bm = Mart() - ensembl = bm.registry["ensembl"] - ensembl.get_datasets() - dataset = ensembl["mmusculus_gene_ensembl"] - return dataset - - -def get_phytozome_dataset(): - # Either of the following method is okay - # bm = Mart() - # phytozome = bm.registry["phytozome_mart"] - - phytozome = Mart( - host="www.phytozome.net", - port="80", - name="phytozome_mart", - virtual_schema="zome_mart", - ) - - phytozome.get_datasets() - dataset = phytozome["phytozome"] - return dataset - - -def get_bed_from_phytozome(genelist): - """ - >>> data = get_bed_from_phytozome(["AT5G54690", "AT1G01010"]) - >>> print data.read() #doctest: +NORMALIZE_WHITESPACE - Chr1 3631 5899 AT1G01010 - Chr5 22219224 22221840 AT5G54690 - - """ - genelist = ",".join(genelist) - dataset = get_phytozome_dataset() - filters = dict(gene_name_filter=genelist) - attributes = "chr_name1,gene_chrom_start,gene_chrom_end,gene_name1".split(",") - - data = dataset.query(filters=filters, attributes=attributes) - return data - - -def main(): - - actions = (("bed", "get gene bed from phytozome"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def bed(args): - """ - %prog bed genes.ids - - Get gene bed from phytozome. `genes.ids` contains the list of gene you want - to pull from Phytozome. Write output to .bed file. - """ - p = OptionParser(bed.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (idsfile,) = args - ids = set(x.strip() for x in open(idsfile)) - data = get_bed_from_phytozome(list(ids)) - - pf = idsfile.rsplit(".", 1)[0] - bedfile = pf + ".bed" - fw = open(bedfile, "w") - for i, row in enumerate(data): - row = row.strip() - if row == "": - continue - - print(row, file=fw) - - logger.debug("A total of %d records written to `%s`.", i + 1, bedfile) - - -if __name__ == "__main__": - - import doctest - - doctest.testmod() - - main() diff --git a/jcvi/apps/blastplus.py b/jcvi/apps/blastplus.py deleted file mode 100755 index 071cd6f2..00000000 --- a/jcvi/apps/blastplus.py +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -import os.path as op -import sys - -from multiprocessing import Lock - -from ..formats.base import must_open, split - -from .align import run_formatdb -from .base import OptionParser, Popen, logger -from .grid import Jobs - - -def blastplus(out_fh, cmd, query, lock): - cmd += " -query {0}".format(query) - proc = Popen(cmd) - - logger.debug("job <%d> started: %s", proc.pid, cmd) - for row in proc.stdout: - if row[0] == "#": - continue - lock.acquire() - out_fh.write(row) - out_fh.flush() - lock.release() - logger.debug("job <%d> finished", proc.pid) - - -def main(): - """ - %prog database.fa query.fa [options] - - Wrapper for NCBI BLAST+. - """ - p = OptionParser(main.__doc__) - - p.add_argument( - "--format", - default=" '6 qseqid sseqid pident length " - "mismatch gapopen qstart qend sstart send evalue bitscore' ", - help='0-11, learn more with "blastp -help"', - ) - p.add_argument( - "--path", - dest="blast_path", - default=None, - help="specify BLAST+ path including the program name", - ) - p.add_argument( - "--prog", - dest="blast_program", - default="blastp", - help="specify BLAST+ program to use. See complete list here: " - "http://www.ncbi.nlm.nih.gov/books/NBK52640/#chapter1.Installation", - ) - p.set_align(evalue=0.01) - p.add_argument( - "--best", - default=1, - type=int, - help="Only look for best N hits", - ) - p.set_cpus() - p.add_argument( - "--nprocs", - default=1, - type=int, - help="number of BLAST processes to run in parallel. " - + "split query.fa into `nprocs` chunks, " - + "each chunk uses -num_threads=`cpus`", - ) - p.set_params() - p.set_outfile() - opts, args = p.parse_args() - - if len(args) != 2 or opts.blast_program is None: - sys.exit(not p.print_help()) - - bfasta_fn, afasta_fn = args - for fn in (afasta_fn, bfasta_fn): - assert op.exists(fn) - - afasta_fn = op.abspath(afasta_fn) - bfasta_fn = op.abspath(bfasta_fn) - out_fh = must_open(opts.outfile, "w") - - extra = opts.extra - blast_path = opts.blast_path - blast_program = opts.blast_program - - blast_bin = blast_path or blast_program - if op.basename(blast_bin) != blast_program: - blast_bin = op.join(blast_bin, blast_program) - - nprocs, cpus = opts.nprocs, opts.cpus - if nprocs > 1: - logger.debug("Dispatch job to %d processes", nprocs) - outdir = "outdir" - fs = split([afasta_fn, outdir, str(nprocs)]) - queries = fs.names - else: - queries = [afasta_fn] - - dbtype = "prot" if op.basename(blast_bin) in ("blastp", "blastx") else "nucl" - - db = bfasta_fn - if dbtype == "prot": - nin = db + ".pin" - else: - nin00 = db + ".00.nin" - nin = nin00 if op.exists(nin00) else (db + ".nin") - - run_formatdb(infile=db, outfile=nin, dbtype=dbtype) - - lock = Lock() - - blastplus_template = "{0} -db {1} -outfmt {2}" - blast_cmd = blastplus_template.format(blast_bin, bfasta_fn, opts.format) - blast_cmd += " -evalue {0} -max_target_seqs {1}".format(opts.evalue, opts.best) - blast_cmd += " -num_threads {0}".format(cpus) - if extra: - blast_cmd += " " + extra.strip() - - args = [(out_fh, blast_cmd, query, lock) for query in queries] - g = Jobs(target=blastplus, args=args) - g.run() - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/bowtie.py b/jcvi/apps/bowtie.py deleted file mode 100644 index bf626b0f..00000000 --- a/jcvi/apps/bowtie.py +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Run bowtie2 command and skips the manual run of naming intermediate output -files. Bowtie2 help: - - -""" -import sys - -from ..formats.base import BaseFile -from ..formats.sam import get_prefix, get_samfile, output_bam -from ..utils.cbook import percentage - -from .base import ( - ActionDispatcher, - OptionParser, - logger, - need_update, - sh, - get_abs_path, -) - - -first_tag = lambda fp: next(fp).split()[0] - - -class BowtieLogFile(BaseFile): - """ - Simple file that contains mapping rate: - - 100000 reads; of these: - 100000 (100.00%) were unpaired; of these: - 88453 (88.45%) aligned 0 times - 9772 (9.77%) aligned exactly 1 time - 1775 (1.77%) aligned >1 times - 11.55% overall alignment rate - """ - - def __init__(self, filename): - - super().__init__(filename) - fp = open(filename) - self.total = int(first_tag(fp)) - self.unpaired = int(first_tag(fp)) - self.unmapped = int(first_tag(fp)) - self.unique = int(first_tag(fp)) - self.multiple = int(first_tag(fp)) - self.mapped = self.unique + self.multiple - self.rate = float(first_tag(fp).rstrip("%")) - fp.close() - - def __str__(self): - return "Total mapped: {0}".format(percentage(self.mapped, self.total)) - - __repr__ = __str__ - - -def main(): - - actions = ( - ("index", "wraps bowtie2-build"), - ("align", "wraps bowtie2"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def check_index(dbfile): - dbfile = get_abs_path(dbfile) - safile = dbfile + ".1.bt2" - if need_update(dbfile, safile): - cmd = "bowtie2-build {0} {0}".format(dbfile) - sh(cmd) - else: - logger.error("`{0}` exists. `bowtie2-build` already run.".format(safile)) - - return dbfile - - -def index(args): - """ - %prog index database.fasta - - Wrapper for `bowtie2-build`. Same interface. - """ - p = OptionParser(index.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (dbfile,) = args - check_index(dbfile) - - -def align(args): - """ - %prog align database.fasta read1.fq [read2.fq] - - Wrapper for `bowtie2` single-end or paired-end, depending on the number of args. - """ - from jcvi.formats.fastq import guessoffset - - p = OptionParser(align.__doc__) - p.set_firstN(firstN=0) - p.add_argument( - "--full", - default=False, - action="store_true", - help="Enforce end-to-end alignment [default: local]", - ) - p.add_argument( - "--reorder", - default=False, - action="store_true", - help="Keep the input read order", - ) - p.add_argument( - "--null", - default=False, - action="store_true", - help="Do not write to SAM/BAM output", - ) - p.add_argument( - "--fasta", default=False, action="store_true", help="Query reads are FASTA" - ) - p.set_cutoff(cutoff=800) - p.set_mateorientation(mateorientation="+-") - p.set_sam_options(bowtie=True) - - opts, args = p.parse_args(args) - extra = opts.extra - mo = opts.mateorientation - if mo == "+-": - extra += "" - elif mo == "-+": - extra += "--rf" - else: - extra += "--ff" - - PE = True - if len(args) == 2: - logger.debug("Single-end alignment") - PE = False - elif len(args) == 3: - logger.debug("Paired-end alignment") - else: - sys.exit(not p.print_help()) - - firstN = opts.firstN - mapped = opts.mapped - unmapped = opts.unmapped - fasta = opts.fasta - gl = "--end-to-end" if opts.full else "--local" - - dbfile, readfile = args[0:2] - dbfile = check_index(dbfile) - prefix = get_prefix(readfile, dbfile) - samfile, mapped, unmapped = get_samfile( - readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam - ) - logfile = prefix + ".log" - if not fasta: - offset = guessoffset([readfile]) - - if not need_update(dbfile, samfile): - logger.error("`{0}` exists. `bowtie2` already run.".format(samfile)) - return samfile, logfile - - cmd = "bowtie2 -x {0}".format(dbfile) - if PE: - r1, r2 = args[1:3] - cmd += " -1 {0} -2 {1}".format(r1, r2) - cmd += " --maxins {0}".format(opts.cutoff) - mtag, utag = "--al-conc", "--un-conc" - else: - cmd += " -U {0}".format(readfile) - mtag, utag = "--al", "--un" - - if mapped: - cmd += " {0} {1}".format(mtag, mapped) - if unmapped: - cmd += " {0} {1}".format(utag, unmapped) - - if firstN: - cmd += " --upto {0}".format(firstN) - cmd += " -p {0}".format(opts.cpus) - if fasta: - cmd += " -f" - else: - cmd += " --phred{0}".format(offset) - cmd += " {0}".format(gl) - if opts.reorder: - cmd += " --reorder" - - cmd += " {0}".format(extra) - # Finally the log - cmd += " 2> {0}".format(logfile) - - if opts.null: - samfile = "/dev/null" - - cmd = output_bam(cmd, samfile) - sh(cmd) - print(open(logfile).read(), file=sys.stderr) - - return samfile, logfile - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/bwa.py b/jcvi/apps/bwa.py deleted file mode 100644 index 152db8bd..00000000 --- a/jcvi/apps/bwa.py +++ /dev/null @@ -1,301 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Run bwa command and skips the manual run of naming intermediate output files -The whole pipeline is following bwa documentation at - -""" - -import os.path as op -import sys - -from ..apps.grid import MakeManager -from ..assembly.automaton import iter_project -from ..formats.sam import get_samfile, mapped, output_bam - -from .base import ( - ActionDispatcher, - OptionParser, - cleanup, - get_abs_path, - logger, - mkdir, - need_update, - sh, -) - - -def main(): - - actions = ( - ("index", "wraps bwa index"), - ("align", "wraps bwa aln|mem|bwasw"), - ("batch", "run bwa in batch mode"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def batch(args): - """ - %proj batch database.fasta project_dir output_dir - - Run bwa in batch mode. - """ - p = OptionParser(batch.__doc__) - set_align_options(p) - p.set_sam_options() - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - ref_fasta, proj_dir, outdir = args - outdir = outdir.rstrip("/") - s3dir = None - if outdir.startswith("s3://"): - s3dir = outdir - outdir = op.basename(outdir) - mkdir(outdir) - - mm = MakeManager() - for p, pf in iter_project(proj_dir): - targs = [ref_fasta] + p - cmd1, bamfile = mem(targs, opts) - if cmd1: - cmd1 = output_bam(cmd1, bamfile) - nbamfile = op.join(outdir, bamfile) - cmd2 = "mv {} {}".format(bamfile, nbamfile) - cmds = [cmd1, cmd2] - - if s3dir: - cmd = "aws s3 cp {} {} --sse".format(nbamfile, op.join(s3dir, bamfile)) - cmds.append(cmd) - - mm.add(p, nbamfile, cmds) - - mm.write() - - -def check_index(dbfile): - dbfile = get_abs_path(dbfile) - safile = dbfile + ".sa" - if not op.exists(safile): - cmd = "bwa index {0}".format(dbfile) - sh(cmd) - else: - logger.error("`{0}` exists. `bwa index` already run.".format(safile)) - - return dbfile - - -def check_aln(dbfile, readfile, cpus=32): - from jcvi.formats.fastq import guessoffset - - saifile = readfile.rsplit(".", 1)[0] + ".sai" - if need_update((dbfile, readfile), saifile): - offset = guessoffset([readfile]) - cmd = "bwa aln " + " ".join((dbfile, readfile)) - cmd += " -t {0}".format(cpus) - if offset == 64: - cmd += " -I" - sh(cmd, outfile=saifile) - else: - logger.error("`{0}` exists. `bwa aln` already run.".format(saifile)) - - return saifile - - -def index(args): - """ - %prog index database.fasta - - Wrapper for `bwa index`. Same interface. - """ - p = OptionParser(index.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (dbfile,) = args - check_index(dbfile) - - -def set_align_options(p): - """Used in align() and batch()""" - p.add_argument("--bwa", default="bwa", help="Run bwa at this path") - p.add_argument("--rg", help="Read group") - p.add_argument( - "--readtype", - choices=("pacbio", "pbread", "ont2d", "intractg"), - help="Read type in bwa-mem", - ) - p.set_cutoff(cutoff=800) - - -def align(args): - """ - %prog align database.fasta read1.fq [read2.fq] - - Wrapper for three modes of BWA - mem (default), aln, bwasw (long reads). - """ - valid_modes = ("bwasw", "aln", "mem") - p = OptionParser(align.__doc__) - p.add_argument("--mode", default="mem", choices=valid_modes, help="BWA mode") - set_align_options(p) - p.set_sam_options() - - opts, args = p.parse_args(args) - mode = opts.mode - nargs = len(args) - - if nargs not in (2, 3): - sys.exit(not p.print_help()) - - tag = "bwa-{0}: ".format(mode) - c = mem - if nargs == 2: - tag += "Single-end alignment" - if mode == "bwasw": - c = bwasw - elif mode == "aln": - c = samse - else: - assert mode != "bwasw", "Cannot use --bwasw with paired-end mode" - tag += "Paired-end alignment" - if mode == "aln": - c = sampe - - logger.debug(tag) - cmd, samfile = c(args, opts) - if cmd: - cmd = output_bam(cmd, samfile) - - bam = opts.bam - unmapped = opts.unmapped - - sh(cmd) - if unmapped: - mopts = [samfile, "--unmapped"] - if not bam: - mopts += ["--sam"] - mapped(mopts) - cleanup(samfile) - - return samfile, None - - -def samse(args, opts): - """ - %prog samse database.fasta short_read.fastq - - Wrapper for `bwa samse`. Output will be short_read.sam. - """ - dbfile, readfile = args - dbfile = check_index(dbfile) - saifile = check_aln(dbfile, readfile, cpus=opts.cpus) - - samfile, _, unmapped = get_samfile( - readfile, dbfile, bam=opts.bam, unmapped=opts.unmapped - ) - if not need_update((dbfile, saifile), samfile): - logger.error("`{0}` exists. `bwa samse` already run.".format(samfile)) - return "", samfile - - cmd = "bwa samse {0} {1} {2}".format(dbfile, saifile, readfile) - cmd += " " + opts.extra - if opts.uniq: - cmd += " -n 1" - - return cmd, samfile - - -def sampe(args, opts): - """ - %prog sampe database.fasta read1.fq read2.fq - - Wrapper for `bwa sampe`. Output will be read1.sam. - """ - dbfile, read1file, read2file = args - dbfile = check_index(dbfile) - sai1file = check_aln(dbfile, read1file, cpus=opts.cpus) - sai2file = check_aln(dbfile, read2file, cpus=opts.cpus) - - samfile, _, unmapped = get_samfile( - read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped - ) - if not need_update((dbfile, sai1file, sai2file), samfile): - logger.error("`{0}` exists. `bwa samse` already run.".format(samfile)) - return "", samfile - - cmd = "bwa sampe " + " ".join((dbfile, sai1file, sai2file, read1file, read2file)) - cmd += " " + opts.extra - if opts.cutoff: - cmd += " -a {0}".format(opts.cutoff) - if opts.uniq: - cmd += " -n 1" - - return cmd, samfile - - -def mem(args, opts): - """ - %prog mem database.fasta read1.fq [read2.fq] - - Wrapper for `bwa mem`. Output will be read1.sam. - """ - dbfile, read1file = args[:2] - readtype = opts.readtype - pl = readtype or "illumina" - - pf = op.basename(read1file).split(".")[0] - rg = opts.rg or r"@RG\tID:{0}\tSM:sm\tLB:lb\tPL:{1}".format(pf, pl) - dbfile = check_index(dbfile) - args[0] = dbfile - samfile, _, unmapped = get_samfile( - read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped - ) - if not need_update(read1file, samfile): - logger.error("`{0}` exists. `bwa mem` already run.".format(samfile)) - return "", samfile - - cmd = "{} mem".format(opts.bwa) - """ - -M Mark shorter split hits as secondary (for Picard compatibility). - """ - cmd += " -M -t {0}".format(opts.cpus) - cmd += ' -R "{0}"'.format(rg) - if readtype: - cmd += " -x {0}".format(readtype) - cmd += " " + opts.extra - cmd += " ".join(args) - - return cmd, samfile - - -def bwasw(args, opts): - """ - %prog bwasw database.fasta long_read.fastq - - Wrapper for `bwa bwasw`. Output will be long_read.sam. - """ - dbfile, readfile = args - dbfile = check_index(dbfile) - - samfile, _, unmapped = get_samfile( - readfile, dbfile, bam=opts.bam, unmapped=opts.unmapped - ) - if not need_update(dbfile, samfile): - logger.error("`{0}` exists. `bwa bwasw` already run.".format(samfile)) - return "", samfile - - cmd = "bwa bwasw " + " ".join(args) - cmd += " -t {0}".format(opts.cpus) - cmd += " " + opts.extra - return cmd, samfile - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/cdhit.py b/jcvi/apps/cdhit.py deleted file mode 100644 index 0a20e228..00000000 --- a/jcvi/apps/cdhit.py +++ /dev/null @@ -1,260 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Using CD-HIT to remove duplicate reads. -""" -import os.path as op -import sys - -from collections import defaultdict - -from ..formats.base import LineFile, read_block, must_open -from ..formats.fastq import fasta -from ..utils.cbook import percentage - -from .base import ActionDispatcher, OptionParser, logger, need_update, sh - - -class ClstrLine(object): - """ - Lines like these: - 0 12067nt, >LAP012517... at -/99.85% - 1 15532nt, >MOL158919... * - 2 15515nt, >SES069071... at +/99.85% - """ - - def __init__(self, row): - a, b = row.split(">", 1) - a = a.split("nt")[0] - sid, size = a.split() - self.size = int(size) - self.name = b.split("...")[0] - self.rep = row.rstrip()[-1] == "*" - - -class ClstrFile(LineFile): - def __init__(self, filename): - super().__init__(filename) - assert filename.endswith(".clstr") - - fp = open(filename) - for clstr, members in read_block(fp, ">"): - self.append([ClstrLine(x) for x in members]) - - def iter_sizes(self): - for members in self: - yield len(members) - - def iter_reps(self): - for i, members in enumerate(self): - for b in members: - if b.rep: - yield i, b.name - - def iter_reps_prefix(self, prefix=3): - for i, members in enumerate(self): - d = defaultdict(list) - for b in members: - pp = b.name[:prefix] - d[pp].append(b) - - for pp, members_with_same_pp in sorted(d.items()): - yield i, max(members_with_same_pp, key=lambda x: x.size).name - - -def main(): - - actions = ( - ("ids", "get the representative ids from clstr file"), - ("deduplicate", "use `cd-hit-est` to remove duplicate reads"), - ("filter", "filter consensus sequence with min cluster size"), - ("summary", "parse cdhit.clstr file to get distribution of cluster sizes"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def filter(args): - """ - %prog filter *.consensus.fasta - - Filter consensus sequence with min cluster size. - """ - from jcvi.formats.fasta import Fasta, SeqIO - - p = OptionParser(filter.__doc__) - p.add_argument("--minsize", default=2, type=int, help="Minimum cluster size") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - fastafiles = args - minsize = opts.minsize - totalreads = totalassembled = 0 - fw = must_open(opts.outfile, "w") - for i, fastafile in enumerate(fastafiles): - f = Fasta(fastafile, lazy=True) - pf = "s{0:03d}".format(i) - nreads = nsingletons = nclusters = 0 - for desc, rec in f.iterdescriptions_ordered(): - nclusters += 1 - if desc.startswith("singleton"): - nsingletons += 1 - nreads += 1 - continue - # consensus_for_cluster_0 with 63 sequences - name, w, size, seqs = desc.split() - assert w == "with" - size = int(size) - nreads += size - if size < minsize: - continue - rec.description = rec.description.split(None, 1)[-1] - rec.id = pf + "_" + rec.id - SeqIO.write(rec, fw, "fasta") - logger.debug("Scanned {0} clusters with {1} reads ..".format(nclusters, nreads)) - cclusters, creads = nclusters - nsingletons, nreads - nsingletons - logger.debug( - "Saved {0} clusters (min={1}) with {2} reads (avg:{3}) [{4}]".format( - cclusters, minsize, creads, creads / cclusters, pf - ) - ) - totalreads += nreads - totalassembled += nreads - nsingletons - logger.debug("Total assembled: {0}".format(percentage(totalassembled, totalreads))) - - -def ids(args): - """ - %prog ids cdhit.clstr - - Get the representative ids from clstr file. - """ - p = OptionParser(ids.__doc__) - p.add_argument("--prefix", type=int, help="Find rep id for prefix of len") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (clstrfile,) = args - cf = ClstrFile(clstrfile) - prefix = opts.prefix - if prefix: - reads = list(cf.iter_reps_prefix(prefix=prefix)) - else: - reads = list(cf.iter_reps()) - - nreads = len(reads) - idsfile = clstrfile.replace(".clstr", ".ids") - fw = open(idsfile, "w") - for i, name in reads: - print("\t".join(str(x) for x in (i, name)), file=fw) - - logger.debug( - "A total of {0} unique reads written to `{1}`.".format(nreads, idsfile) - ) - fw.close() - - return idsfile - - -def summary(args): - """ - %prog summary cdhit.clstr - - Parse cdhit.clstr file to get distribution of cluster sizes. - """ - from jcvi.graphics.histogram import loghistogram - - p = OptionParser(summary.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (clstrfile,) = args - cf = ClstrFile(clstrfile) - data = list(cf.iter_sizes()) - loghistogram(data, summary=True) - - -def deduplicate(args): - """ - %prog deduplicate fastafile - - Wraps `cd-hit-est` to remove duplicate sequences. - """ - p = OptionParser(deduplicate.__doc__) - p.set_align(pctid=96, pctcov=0) - p.add_argument( - "--fast", - default=False, - action="store_true", - help="Place sequence in the first cluster", - ) - p.add_argument( - "--consensus", - default=False, - action="store_true", - help="Compute consensus sequences", - ) - p.add_argument( - "--reads", - default=False, - action="store_true", - help="Use `cd-hit-454` to deduplicate", - ) - p.add_argument( - "--samestrand", - default=False, - action="store_true", - help="Enforce same strand alignment", - ) - p.set_home("cdhit") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - identity = opts.pctid / 100.0 - fastafile, qualfile = fasta([fastafile, "--seqtk"]) - - ocmd = "cd-hit-454" if opts.reads else "cd-hit-est" - cmd = op.join(opts.cdhit_home, ocmd) - cmd += " -c {0}".format(identity) - if ocmd == "cd-hit-est": - cmd += " -d 0" # include complete defline - if opts.samestrand: - cmd += " -r 0" - if not opts.fast: - cmd += " -g 1" - if opts.pctcov != 0: - cmd += " -aL {0} -aS {0}".format(opts.pctcov / 100.0) - - dd = fastafile + ".P{0}.cdhit".format(opts.pctid) - clstr = dd + ".clstr" - - cmd += " -M 0 -T {0} -i {1} -o {2}".format(opts.cpus, fastafile, dd) - if need_update(fastafile, (dd, clstr)): - sh(cmd) - - if opts.consensus: - cons = dd + ".consensus" - cmd = op.join(opts.cdhit_home, "cdhit-cluster-consensus") - cmd += " clustfile={0} fastafile={1} output={2} maxlen=1".format( - clstr, fastafile, cons - ) - if need_update((clstr, fastafile), cons): - sh(cmd) - - return dd - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/emboss.py b/jcvi/apps/emboss.py deleted file mode 100644 index 01764208..00000000 --- a/jcvi/apps/emboss.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Run EMBOSS programs. -""" -import sys -import multiprocessing as mp - -from ..formats.base import must_open -from .base import ActionDispatcher, OptionParser, cleanup - - -class NeedleHeader(object): - def __init__(self, filename): - fp = must_open(filename) - for row in fp: - if row[0] != "#": - continue - # Identity: 89/89 (100.0%) - if row.startswith("# Identity"): - self.identity = row.split(":")[-1].strip() - if row.startswith("# Score"): - self.score = row.split(":")[-1].strip() - - -def main(): - - actions = (("needle", "take protein pairs and needle them"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def _needle(fa, fb, needlefile, a, b, results): - """ - Run single needle job - """ - from Bio.Emboss.Applications import NeedleCommandline - - needle_cline = NeedleCommandline( - asequence=fa, bsequence=fb, gapopen=10, gapextend=0.5, outfile=needlefile - ) - _, _ = needle_cline() - nh = NeedleHeader(needlefile) - cleanup(fa, fb, needlefile) - r = ["\t".join((a, b, nh.identity, nh.score))] - - results.extend(r) - - -def needle(args): - """ - %prog needle nw.pairs a.pep.fasta b.pep.fasta - - Take protein pairs and needle them - Automatically writes output file `nw.scores` - """ - from jcvi.formats.fasta import Fasta, SeqIO - - p = OptionParser(needle.__doc__) - - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - manager = mp.Manager() - results = manager.list() - needle_pool = mp.Pool(processes=mp.cpu_count()) - - pairsfile, apep, bpep = args - afasta, bfasta = Fasta(apep), Fasta(bpep) - fp = must_open(pairsfile) - for i, row in enumerate(fp): - a, b = row.split() - a, b = afasta[a], bfasta[b] - fa, fb = must_open("{0}_{1}_a.fasta".format(pairsfile, i), "w"), must_open( - "{0}_{1}_b.fasta".format(pairsfile, i), "w" - ) - SeqIO.write([a], fa, "fasta") - SeqIO.write([b], fb, "fasta") - fa.close() - fb.close() - - needlefile = "{0}_{1}_ab.needle".format(pairsfile, i) - needle_pool.apply_async( - _needle, (fa.name, fb.name, needlefile, a.id, b.id, results) - ) - - needle_pool.close() - needle_pool.join() - - fp.close() - - scoresfile = "{0}.scores".format(pairsfile.rsplit(".")[0]) - fw = must_open(scoresfile, "w") - for result in results: - print(result, file=fw) - fw.close() - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/fetch.py b/jcvi/apps/fetch.py deleted file mode 100644 index 3ce134ca..00000000 --- a/jcvi/apps/fetch.py +++ /dev/null @@ -1,729 +0,0 @@ -""" -Wrapper for fetching data from various online repositories \ -(Entrez, Ensembl, Phytozome, and SRA) -""" - -import os.path as op -import re -import sys -import time - -from os.path import join as urljoin -from urllib.error import HTTPError, URLError - -from Bio import Entrez, SeqIO -from more_itertools import grouper - -from ..formats.base import must_open -from ..formats.fasta import print_first_difference -from ..formats.fastq import fromsra -from ..utils.cbook import tile -from ..utils.console import printf - -from .base import ( - ActionDispatcher, - OptionParser, - logger, - cleanup, - download, - get_email_address, - last_updated, - ls_ftp, - mkdir, - sh, - which, -) - - -myEmail = get_email_address() -Entrez.email = myEmail -PHYTOZOME_COOKIES = ".phytozome_cookies" - - -def batch_taxonomy(list_of_taxids): - """ - Convert list of taxids to Latin names - """ - for taxid in list_of_taxids: - handle = Entrez.efetch(db="Taxonomy", id=taxid, retmode="xml") - records = Entrez.read(handle) - yield records[0]["ScientificName"] - - -def batch_taxids(list_of_names): - """ - Opposite of batch_taxonomy(): - - Convert list of Latin names to taxids - """ - for name in list_of_names: - handle = Entrez.esearch(db="Taxonomy", term=name, retmode="xml") - records = Entrez.read(handle) - yield records["IdList"][0] - - -def batch_entrez( - list_of_terms, db="nuccore", retmax=1, rettype="fasta", batchsize=1, email=myEmail -): - """ - Retrieve multiple rather than a single record - """ - - for term in list_of_terms: - - logger.debug("Search term %s", term) - success = False - ids = None - if not term: - continue - - while not success: - try: - search_handle = Entrez.esearch(db=db, retmax=retmax, term=term) - rec = Entrez.read(search_handle) - success = True - ids = rec["IdList"] - except (HTTPError, URLError, RuntimeError, KeyError) as e: - logger.error(e) - logger.debug("wait 5 seconds to reconnect...") - time.sleep(5) - - if not ids: - logger.error("term {0} not found".format(term)) - continue - - assert ids - nids = len(ids) - if nids > 1: - logger.debug("A total of {0} results found.".format(nids)) - - if batchsize != 1: - logger.debug("Use a batch size of {0}.".format(batchsize)) - - ids = list(grouper(ids, batchsize)) - - for id in ids: - id = [x for x in id if x] - size = len(id) - id = ",".join(id) - - success = False - while not success: - try: - fetch_handle = Entrez.efetch( - db=db, id=id, rettype=rettype, email=email - ) - success = True - except (HTTPError, URLError, RuntimeError) as e: - logger.error(e) - logger.debug("wait 5 seconds to reconnect...") - time.sleep(5) - - yield id, size, term, fetch_handle - - -def main(): - - actions = ( - ("entrez", "fetch records from entrez using a list of GenBank accessions"), - ("bisect", "determine the version of the accession by querying entrez"), - ( - "phytozome9", - "retrieve genomes and annotations from phytozome version 9.0 (legacy)", - ), - ("phytozome", "retrieve genomes and annotations from phytozome"), - ("ensembl", "retrieve genomes and annotations from ensembl"), - ("sra", "retrieve files from SRA via the sra-instant FTP"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def ensembl(args): - """ - %prog ensembl species - - Retrieve genomes and annotations from ensembl FTP. Available species - listed below. Use comma to give a list of species to download. For example: - - $ %prog ensembl danio_rerio,gasterosteus_aculeatus - """ - p = OptionParser(ensembl.__doc__) - p.add_argument("--version", default="75", help="Ensembl version") - opts, args = p.parse_args(args) - - version = opts.version - url = "ftp://ftp.ensembl.org/pub/release-{0}/".format(version) - fasta_url = url + "fasta/" - - valid_species = [x for x in ls_ftp(fasta_url) if "." not in x] - doc = "\n".join((ensembl.__doc__, tile(valid_species))) - p.usage = doc - - if len(args) != 1: - sys.exit(not p.print_help()) - - (species,) = args - species = species.split(",") - for s in species: - download_species_ensembl(s, valid_species, url) - - -def download_species_ensembl(species, valid_species, url): - assert species in valid_species, "{0} is not in the species list".format(species) - - # We want to download assembly and annotation for given species - ann_url = urljoin(url, "gtf/{0}".format(species)) - cds_url = urljoin(url, "fasta/{0}/cds".format(species)) - - for u in (ann_url, cds_url): - valid_files = [x for x in ls_ftp(u) if x.endswith(".gz")] - for f in valid_files: - f = urljoin(u, f) - download(f) - - -def get_cookies(cookies=PHYTOZOME_COOKIES): - from jcvi.utils.console import console - - # Check if cookies is still good - if op.exists(cookies) and last_updated(cookies) < 3600: - return cookies - - if console.is_terminal: - username = console.input("[bold green]Phytozome Login: ") - pw = console.input("[bold green]Phytozome Password: ", password=True) - else: - username, pw = None, None - curlcmd = which("curl") - if curlcmd is None: - logger.error("curl command not installed. Aborting.") - return None - cmd = "{} https://signon.jgi.doe.gov/signon/create".format(curlcmd) - cmd += " --data-urlencode 'login={0}' --data-urlencode 'password={1}' -b {2} -c {2}".format( - username, pw, cookies - ) - sh(cmd, outfile="/dev/null", errfile="/dev/null", log=False) - if not op.exists(cookies): - logger.error("Cookies file `{}` not created. Aborting.".format(cookies)) - return None - - return cookies - - -def phytozome(args): - """ - %prog phytozome species - - Retrieve genomes and annotations from phytozome using Globus API. Available - species listed below. Use comma to give a list of species to download. For - example: - - $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum - - The downloader will prompt you to enter Phytozome user name and password - during downloading. Please register for a login at: - https://phytozome.jgi.doe.gov/pz/portal.html. - """ - from jcvi.apps.biomart import GlobusXMLParser - - p = OptionParser(phytozome.__doc__) - p.add_argument( - "--version", - default="12", - choices=("9", "10", "11", "12", "12_unrestricted", "13"), - help="Phytozome version", - ) - p.add_argument( - "--assembly", - default=False, - action="store_true", - help="Download assembly", - ) - p.add_argument( - "--format", - default=False, - action="store_true", - help="Format to CDS and BED for synteny inference", - ) - p.set_downloader() - opts, args = p.parse_args(args) - - downloader = opts.downloader - directory_listing = ".phytozome_directory_V{}.xml".format(opts.version) - # Get directory listing - base_url = "http://genome.jgi.doe.gov" - dlist = "{}/ext-api/downloads/get-directory?organism=PhytozomeV{}".format( - base_url, opts.version - ) - - # Make sure we have a valid cookies - cookies = get_cookies() - if cookies is None: - logger.error("Error fetching cookies ... cleaning up") - cleanup(directory_listing) - sys.exit(1) - - # Proceed to use the cookies and download the species list - try: - download( - dlist, - filename=directory_listing, - cookies=cookies, - downloader=downloader, - ) - g = GlobusXMLParser(directory_listing) - except Exception as _: - logger.error("Error downloading directory listing ... cleaning up") - cleanup(directory_listing, cookies) - sys.exit(1) - - genomes = g.get_genomes() - valid_species = genomes.keys() - species_tile = tile(valid_species) - p.usage = "\n".join((phytozome.__doc__, species_tile)) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (species,) = args - if species == "all": - species = ",".join(valid_species) - - species = species.split(",") - for s in species: - res = download_species_phytozome( - genomes, - s, - valid_species, - base_url, - cookies, - assembly=opts.assembly, - downloader=downloader, - ) - if not res: - logger.error("No files downloaded") - gff, fa = res.get("gff"), res.get("cds") - if opts.format: - format_bed_and_cds(s, gff, fa) - - -def download_species_phytozome( - genomes, species, valid_species, base_url, cookies, assembly=False, downloader=None -): - """Download assembly FASTA and annotation GFF. - - Args: - genomes (dict): Dictionary parsed from Globus XML. - species (str): Target species to download. - valid_species (List[str]): Allowed set of species - base_url (str): URL. - cookies (str): cookies file path. - assembly (bool, optional): Do we download assembly FASTA (can be big). - Defaults to False. - downloader (str, optional): Use a given downloader. One of wget|curl|powershell|insecure. - Defaults to None. - """ - assert species in valid_species, "{} is not in the species list".format(species) - res = {} - genome = genomes.get(species) - if not genome: - return res - - genome_assembly = genome.get("assembly") - if assembly and genome_assembly: - asm_name = next(x for x in genome_assembly if x.endswith(".fa.gz")) - if asm_name: - res["asm"] = genome_assembly.download( - asm_name, base_url, cookies, downloader=downloader - ) - - genome_annotation = genome.get("annotation") - if genome_annotation: - gff_name = next(x for x in genome_annotation if x.endswith(".gene.gff3.gz")) - if gff_name: - res["gff"] = genome_annotation.download( - gff_name, base_url, cookies, downloader=downloader - ) - cds_name = next(x for x in genome_annotation if x.endswith(".cds.fa.gz")) - if cds_name: - res["cds"] = genome_annotation.download( - cds_name, base_url, cookies, downloader=downloader - ) - - return res - - -def phytozome9(args): - """ - %prog phytozome9 species - - Retrieve genomes and annotations from phytozome FTP. Available species - listed below. Use comma to give a list of species to download. For example: - - $ %prog phytozome9 Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum - """ - p = OptionParser(phytozome9.__doc__) - p.add_argument( - "--assembly", - default=False, - action="store_true", - help="Download assembly", - ) - p.add_argument( - "--format", - default=False, - action="store_true", - help="Format to CDS and BED for synteny inference", - ) - opts, args = p.parse_args(args) - - version = "9.0" - url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".format(version) - valid_species = [x for x in ls_ftp(url) if "." not in x] - - doc = "\n".join((phytozome9.__doc__, tile(valid_species))) - p.usage = doc - - if len(args) != 1: - sys.exit(not p.print_help()) - - (species,) = args - if species == "all": - species = ",".join(valid_species) - - species = species.split(",") - - for s in species: - res = download_species_phytozome9(s, valid_species, url, assembly=opts.assembly) - if not res: - logger.error("No files downloaded") - gff, cdsfa = res.get("gff"), res.get("cds") - if opts.format: - format_bed_and_cds(s, gff, cdsfa) - - -def format_bed_and_cds(species, gff, cdsfa): - """Run gff.format() and fasta.format() to generate BED and CDS files. - This prepares the input files for the MCscan synteny workflow. - - https://github.com/tanghaibao/jcvi/wiki/MCscan-(Python-version) - - Args: - species (str): Name of the species - gff (str): Path to the GFF file - cdsfa (str): Path to the FASTA file - """ - from jcvi.formats.gff import bed as gff_bed - from jcvi.formats.fasta import format as fasta_format - - # We have to watch out when the gene names and mRNA names mismatch, in which - # case we just extract the mRNA names - use_IDs = set() - use_mRNAs = { - "Cclementina", - "Creinhardtii", - "Csinensis", - "Fvesca", - "Lusitatissimum", - "Mesculenta", - "Mguttatus", - "Ppersica", - "Pvirgatum", - "Rcommunis", - "Sitalica", - "Tcacao", - "Thalophila", - "Vcarteri", - "Vvinifera", - "Zmays", - } - key = "ID" if species in use_IDs else "Name" - ttype = "mRNA" if species in use_mRNAs else "gene" - bedfile = species + ".bed" - cdsfile = species + ".cds" - gff_bed([gff, "--type={}".format(ttype), "--key={}".format(key), "-o", bedfile]) - fasta_format([cdsfa, cdsfile, r"--sep=|"]) - - -def download_species_phytozome9(species, valid_species, base_url, assembly=False): - assert species in valid_species, "{} is not in the species list".format(species) - - # We want to download assembly and annotation for given species - surl = urljoin(base_url, species) - contents = [x for x in ls_ftp(surl) if x.endswith("_readme.txt")] - magic = contents[0].split("_")[1] # Get the magic number - logger.debug("Found magic number for {0}: {1}".format(species, magic)) - - pf = "{0}_{1}".format(species, magic) - asm_url = urljoin(surl, "assembly/{0}.fa.gz".format(pf)) - ann_url = urljoin(surl, "annotation/{0}_gene.gff3.gz".format(pf)) - cds_url = urljoin(surl, "annotation/{0}_cds.fa.gz".format(pf)) - res = {} - if assembly: - res["asm"] = download(asm_url) - res["gff"] = download(ann_url) - res["cds"] = download(cds_url) - return res - - -def get_first_rec(fastafile): - """ - Returns the first record in the fastafile - """ - f = list(SeqIO.parse(fastafile, "fasta")) - - if len(f) > 1: - logger.debug( - "{0} records found in {1}, using the first one".format(len(f), fastafile) - ) - - return f[0] - - -def bisect(args): - """ - %prog bisect acc accession.fasta - - determine the version of the accession by querying entrez, based on a fasta file. - This proceeds by a sequential search from xxxx.1 to the latest record. - """ - p = OptionParser(bisect.__doc__) - p.set_email() - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - acc, fastafile = args - arec = get_first_rec(fastafile) - - valid = None - for i in range(1, 100): - term = "%s.%d" % (acc, i) - try: - query = list(batch_entrez([term], email=opts.email)) - except AssertionError as e: - logger.debug(f"no records found for {term}. terminating. {e}") - return - - id, term, handle = query[0] - brec = next(SeqIO.parse(handle, "fasta")) - - match = print_first_difference( - arec, brec, ignore_case=True, ignore_N=True, rc=True - ) - if match: - valid = term - break - - if valid: - printf() - printf("[green]{} matches the sequence in `{}`".format(valid, fastafile)) - - -def entrez(args): - """ - %prog entrez - - `filename` contains a list of terms to search. Or just one term. If the - results are small in size, e.g. "--format=acc", use "--batchsize=100" to speed - the download. - """ - p = OptionParser(entrez.__doc__) - - allowed_databases = { - "fasta": ["genome", "nuccore", "nucgss", "protein", "nucest"], - "asn.1": ["genome", "nuccore", "nucgss", "protein", "gene"], - "xml": ["genome", "nuccore", "nucgss", "nucest", "gene"], - "gb": ["genome", "nuccore", "nucgss"], - "est": ["nucest"], - "gss": ["nucgss"], - "acc": ["nuccore"], - } - - valid_formats = tuple(allowed_databases.keys()) - valid_databases = ("genome", "nuccore", "nucest", "nucgss", "protein", "gene") - - p.add_argument( - "--noversion", - dest="noversion", - default=False, - action="store_true", - help="Remove trailing accession versions", - ) - p.add_argument( - "--format", - default="fasta", - choices=valid_formats, - help="download format", - ) - p.add_argument( - "--database", - default="nuccore", - choices=valid_databases, - help="search database", - ) - p.add_argument( - "--retmax", - default=1000000, - type=int, - help="how many results to return", - ) - p.add_argument( - "--skipcheck", - default=False, - action="store_true", - help="turn off prompt to check file existence", - ) - p.add_argument( - "--batchsize", - default=500, - type=int, - help="download the results in batch for speed-up", - ) - p.set_outdir(outdir=None) - p.add_argument("--outprefix", default="out", help="output file name prefix") - p.set_email() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (filename,) = args - if op.exists(filename): - pf = filename.rsplit(".", 1)[0] - list_of_terms = [row.strip() for row in open(filename)] - if opts.noversion: - list_of_terms = [x.rsplit(".", 1)[0] for x in list_of_terms] - else: - pf = filename - # the filename is the search term - list_of_terms = [filename.strip()] - - fmt = opts.format - database = opts.database - batchsize = opts.batchsize - - assert ( - database in allowed_databases[fmt] - ), "For output format '{0}', allowed databases are: {1}".format( - fmt, allowed_databases[fmt] - ) - assert batchsize >= 1, "batchsize must >= 1" - - if " " in pf: - pf = opts.outprefix - - outfile = "{0}.{1}".format(pf, fmt) - - outdir = opts.outdir - if outdir: - mkdir(outdir) - - # If noprompt, will not check file existence - if not outdir: - fw = must_open(outfile, "w", checkexists=True, skipcheck=opts.skipcheck) - if fw is None: - return - - seen = set() - totalsize = 0 - for id, size, term, handle in batch_entrez( - list_of_terms, - retmax=opts.retmax, - rettype=fmt, - db=database, - batchsize=batchsize, - email=opts.email, - ): - if outdir: - outfile = urljoin(outdir, "{0}.{1}".format(term, fmt)) - fw = must_open(outfile, "w", checkexists=True, skipcheck=opts.skipcheck) - if fw is None: - continue - - rec = handle.read() - if id in seen: - logger.error("Duplicate key ({0}) found".format(rec)) - continue - - totalsize += size - print(rec, file=fw) - print(file=fw) - - seen.add(id) - - if seen: - printf( - "A total of {0} {1} records downloaded.".format(totalsize, fmt.upper()), - ) - - return outfile - - -def sra(args): - """ - %prog sra [term|term.ids] - - Given an SRA run ID, fetch the corresponding .sra file from the sra-instant FTP. - The term can also be a file containing list of SRR ids, one per line. - - Once downloaded, the SRA file is processed through `fastq-dump` to produce - FASTQ formatted sequence files, which are gzipped by default. - """ - p = OptionParser(sra.__doc__) - - p.add_argument( - "--nogzip", - dest="nogzip", - default=False, - action="store_true", - help="Do not gzip the FASTQ generated by fastq-dump", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (term,) = args - if op.isfile(term): - terms = [x.strip() for x in open(term)] - else: - terms = [term] - - for term in terms: - srafile = download_srr_term(term) - pf = srafile.split(".")[0] - mkdir(pf) - _opts = [srafile, "--paired", "--outdir={0}".format(pf)] - if not opts.nogzip: - _opts.append("--compress=gzip") - fromsra(_opts) - - -def download_srr_term(term): - sra_base_url = "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/" - sra_run_id_re = re.compile(r"^([DES]RR)(\d{3})(\d{3,4})$") - - m = re.search(sra_run_id_re, term) - if m is None: - logger.error( - "Incorrect SRA identifier format " - + "[should be like SRR126150, SRR1001901. " - + "len(identifier) should be between 9-10 characters]" - ) - sys.exit() - - prefix, subprefix = m.group(1), "{0}{1}".format(m.group(1), m.group(2)) - download_url = urljoin( - sra_base_url, prefix, subprefix, term, "{0}.sra".format(term) - ) - - logger.debug("Downloading file: {0}".format(download_url)) - return download(download_url) - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/gbsubmit.py b/jcvi/apps/gbsubmit.py deleted file mode 100644 index ca629dec..00000000 --- a/jcvi/apps/gbsubmit.py +++ /dev/null @@ -1,676 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Prepare the data for Genbank submission -""" -import os.path as op -import sys -import string - -from collections import defaultdict -from functools import lru_cache - -from Bio import SeqIO - -from ..formats.base import DictFile -from ..utils.orderedcollections import parse_qs - -from .base import ActionDispatcher, OptionParser, glob, logger, mkdir, sh - - -""" -GSS submission template files - - -""" - -# Modify the following if a different submission -# TODO: make this generic and exist outside source code -Title = """Comparative Genomics of Sisymbrium irio""" - -Authors = """Town,C.D., Tang,H., Paterson,A.H. and Pires,J.C.""" - -Libname = "Sisymbrium irio BAC library SIL" -Contact = "Chris D. Town" - -PublicationTemplate = """TYPE: Pub -MEDUID: -TITLE: -{Title} -AUTHORS: -{Authors} -JOURNAL: -VOLUME: -ISSUE: -PAGES: -YEAR: 2011 -STATUS: 1 -||""" - -LibraryTemplate = """TYPE: Lib -NAME: {Libname} -ORGANISM: Sisymbrium irio -STRAIN: Gomez-Campo 1146-67 -SEX: -STAGE: -TISSUE: -CELL_TYPE: -VECTOR: pCC1BAC -RE_1: HindIII -DESCR: -Constructed by Amplicon Express; -Transformed into Invitrogen DH10b phage resistant E. coli. -||""" - -ContactTemplate = """TYPE: Cont -NAME: {Contact} -FAX: 301-795-7070 -TEL: 301-795-7523 -EMAIL: cdtown@jcvi.org -LAB: Plant Genomics -INST: J. Craig Venter Institute -ADDR: 9704 Medical Center Dr., Rockville, MD 20850, USA -||""" - -Directions = {"forward": "TR", "reverse": "TV"} - -Primers = { - "TR": "M13 Universal For 18bp Primer (TGTAAAACGACGGCCAGT)", - "TV": "T7 Rev 20bp Primer (TAATACGACTCACTATAGGG)", -} - -GSSTemplate = """TYPE: GSS -STATUS: New -CONT_NAME: {Contact} -GSS#: {gssID} -CLONE: {cloneID} -SOURCE: JCVI -OTHER_GSS: {othergss} -CITATION: -{Title} -INSERT: 120000 -PLATE: {plate} -ROW: {row} -COLUMN: {column} -SEQ_PRIMER: {primer} -DNA_TYPE: Genomic -CLASS: BAC ends -LIBRARY: {Libname} -PUBLIC: -PUT_ID: -COMMENT: -SEQUENCE: -{seq} -||""" - -Nrows, Ncols = 16, 24 -vars = globals() - - -def main(): - - actions = ( - ("fcs", "process the results from Genbank contaminant screen"), - ("gss", "prepare package for genbank gss submission"), - ("htg", "prepare sqn to update existing genbank htg records"), - ("htgnew", "prepare sqn to submit new genbank htg records"), - ("asn", "get the name tags from a bunch of asn.1 files"), - ("t384", "print out a table converting between 96 well to 384 well"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def fcs(args): - """ - %prog fcs fcsfile - - Process the results from Genbank contaminant screen. An example of the file - looks like: - - contig name, length, span(s), apparent source - contig0746 11760 1..141 vector - contig0751 14226 13476..14226 vector - contig0800 124133 30512..30559 primer/adapter - """ - p = OptionParser(fcs.__doc__) - p.add_argument( - "--cutoff", - default=200, - help="Skip small components less than", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fcsfile,) = args - cutoff = opts.cutoff - fp = open(fcsfile) - for row in fp: - if row[0] == "#": - continue - - sep = "\t" if "\t" in row else None - atoms = row.rstrip().split(sep, 3) - contig, length = atoms[:2] - length = int(length) - label = atoms[-1] - label = label.replace(" ", "_") - - if len(atoms) == 3: - ranges = "{0}..{1}".format(1, length) - else: - assert len(atoms) == 4 - ranges = atoms[2] - - for ab in ranges.split(","): - a, b = ab.split("..") - a, b = int(a), int(b) - assert a <= b - ahang = a - 1 - bhang = length - b - if ahang < cutoff: - a = 1 - if bhang < cutoff: - b = length - print("\t".join(str(x) for x in (contig, a - 1, b, label))) - - -def asn(args): - """ - %prog asn asnfiles - - Mainly to get this block, and extract `str` field: - - general { - db "TIGR" , - tag - str "mtg2_12952" } , - genbank { - accession "AC148996" , - """ - from jcvi.formats.base import must_open - - p = OptionParser(asn.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - fw = must_open(opts.outfile, "w") - for asnfile in args: - fp = open(asnfile) - ingeneralblock = False - ingenbankblock = False - gb, name = None, None - for row in fp: - if row.strip() == "": - continue - - tag = row.split()[0] - - if tag == "general": - ingeneralblock = True - if ingeneralblock and tag == "str": - if name is None: # Only allow first assignment - name = row.split('"')[1] - ingeneralblock = False - - if tag == "genbank": - ingenbankblock = True - if ingenbankblock and tag == "accession": - if gb is None: - gb = row.split('"')[1] - ingenbankblock = False - - assert gb and name - print("{0}\t{1}".format(gb, name), file=fw) - - -def verify_sqn(sqndir, accession): - valfile = "{0}/{1}.val".format(sqndir, accession) - contents = open(valfile).read().strip() - assert not contents, "Validation error:\n{0}".format(contents) - - cmd = "gb2fasta {0}/{1}.gbf".format(sqndir, accession) - outfile = "{0}/{1}.fasta".format(sqndir, accession) - sh(cmd, outfile=outfile) - - -def htgnew(args): - """ - %prog htgnew fastafile phasefile template.sbt - - Prepare sqnfiles for submitting new Genbank HTG records. - - `fastafile` contains the sequences. - `phasefile` contains the phase information, it is a two column file: - - mth2-45h12 3 - - `template.sbt` is the Genbank submission template. - - This function is simpler than htg, since the record names have not be - assigned yet (so less bookkeeping). - """ - from jcvi.formats.fasta import sequin - - p = OptionParser(htgnew.__doc__) - p.add_argument("--comment", default="", help="Comments for this submission") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - fastafile, phasefile, sbtfile = args - comment = opts.comment - - fastadir = "fasta" - sqndir = "sqn" - mkdir(fastadir) - mkdir(sqndir) - - cmd = "faSplit byname {0} {1}/".format(fastafile, fastadir) - sh(cmd, outfile="/dev/null", errfile="/dev/null") - - acmd = "tbl2asn -a z -p fasta -r {sqndir}" - acmd += " -i {splitfile} -t {sbtfile} -C tigr" - acmd += ' -j "[tech=htgs {phase}] [organism=Medicago truncatula] [strain=A17]"' - acmd += " -o {sqndir}/{accession_nv}.sqn -V Vbr" - acmd += ' -y "{comment}" -W T -T T' - - nupdated = 0 - for row in open(phasefile): - name, phase = row.split()[:2] - fafile = op.join(fastadir, name + ".fa") - cloneopt = "--clone={0}".format(name) - splitfile, gaps = sequin([fafile, cloneopt]) - splitfile = op.basename(splitfile) - accession = accession_nv = name - - phase = int(phase) - assert phase in (1, 2, 3) - - cmd = acmd.format( - accession_nv=accession_nv, - sqndir=sqndir, - sbtfile=sbtfile, - splitfile=splitfile, - phase=phase, - comment=comment, - ) - sh(cmd) - - verify_sqn(sqndir, accession) - nupdated += 1 - - print("A total of {0} records updated.".format(nupdated), file=sys.stderr) - - -def htg(args): - """ - %prog htg fastafile template.sbt - - Prepare sqnfiles for Genbank HTG submission to update existing records. - - `fastafile` contains the records to update, multiple records are allowed - (with each one generating separate sqn file in the sqn/ folder). The record - defline has the accession ID. For example, - >AC148290.3 - - Internally, this generates two additional files (phasefile and namesfile) - and download records from Genbank. Below is implementation details: - - `phasefile` contains, for each accession, phase information. For example: - AC148290.3 3 HTG 2 mth2-45h12 - - which means this is a Phase-3 BAC. Record with only a single contig will be - labeled as Phase-3 regardless of the info in the `phasefile`. Template file - is the Genbank sbt template. See jcvi.formats.sbt for generation of such - files. - - Another problem is that Genbank requires the name of the sequence to stay - the same when updating and will kick back with a table of name conflicts. - For example: - - We are unable to process the updates for these entries - for the following reason: - - Seqname has changed - - Accession Old seq_name New seq_name - --------- ------------ ------------ - AC239792 mtg2_29457 AC239792.1 - - To prepare a submission, this script downloads genbank and asn.1 format, - and generate the phase file and the names file (use formats.agp.phase() and - apps.gbsubmit.asn(), respectively). These get automatically run. - - However, use --phases if the genbank files contain outdated information. - For example, the clone name changes or phase upgrades. In this case, run - formats.agp.phase() manually, modify the phasefile and use --phases to override. - """ - from jcvi.formats.fasta import sequin, ids - from jcvi.formats.agp import phase - from jcvi.apps.fetch import entrez - - p = OptionParser(htg.__doc__) - p.add_argument( - "--phases", - default=None, - help="Use another phasefile to override", - ) - p.add_argument("--comment", default="", help="Comments for this update") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, sbtfile = args - pf = fastafile.rsplit(".", 1)[0] - - idsfile = pf + ".ids" - phasefile = pf + ".phases" - namesfile = pf + ".names" - - ids([fastafile, "--outfile={0}".format(idsfile)]) - - asndir = "asn.1" - mkdir(asndir) - entrez([idsfile, "--format=asn.1", "--outdir={0}".format(asndir)]) - asn(glob("{0}/*".format(asndir)) + ["--outfile={0}".format(namesfile)]) - - if opts.phases is None: - gbdir = "gb" - mkdir(gbdir) - entrez([idsfile, "--format=gb", "--outdir={0}".format(gbdir)]) - phase(glob("{0}/*".format(gbdir)) + ["--outfile={0}".format(phasefile)]) - else: - phasefile = opts.phases - - assert op.exists(namesfile) and op.exists(phasefile) - - newphasefile = phasefile + ".new" - newphasefw = open(newphasefile, "w") - comment = opts.comment - - fastadir = "fasta" - sqndir = "sqn" - mkdir(fastadir) - mkdir(sqndir) - - from jcvi.graphics.histogram import stem_leaf_plot - - names = DictFile(namesfile) - assert len(set(names.keys())) == len(set(names.values())) - - phases = DictFile(phasefile) - ph = [int(x) for x in phases.values()] - # vmin 1, vmax 4, bins 3 - stem_leaf_plot(ph, 1, 4, 3, title="Counts of phases before updates") - logger.debug("Information loaded for {0} records.".format(len(phases))) - assert len(names) == len(phases) - - newph = [] - - cmd = "faSplit byname {0} {1}/".format(fastafile, fastadir) - sh(cmd, outfile="/dev/null", errfile="/dev/null") - - acmd = "tbl2asn -a z -p fasta -r {sqndir}" - acmd += " -i {splitfile} -t {sbtfile} -C tigr" - acmd += ' -j "{qualifiers}"' - acmd += " -A {accession_nv} -o {sqndir}/{accession_nv}.sqn -V Vbr" - acmd += ' -y "{comment}" -W T -T T' - - qq = "[tech=htgs {phase}] [organism=Medicago truncatula] [strain=A17]" - - nupdated = 0 - for row in open(phasefile): - atoms = row.rstrip().split("\t") - # see formats.agp.phase() for column contents - accession, phase, clone = atoms[0], atoms[1], atoms[-1] - fafile = op.join(fastadir, accession + ".fa") - accession_nv = accession.split(".", 1)[0] - - newid = names[accession_nv] - newidopt = "--newid={0}".format(newid) - cloneopt = "--clone={0}".format(clone) - splitfile, gaps = sequin([fafile, newidopt, cloneopt]) - splitfile = op.basename(splitfile) - phase = int(phase) - assert phase in (1, 2, 3) - - oldphase = phase - if gaps == 0 and phase != 3: - phase = 3 - - if gaps != 0 and phase == 3: - phase = 2 - - print("{0}\t{1}\t{2}".format(accession_nv, oldphase, phase), file=newphasefw) - newph.append(phase) - - qualifiers = qq.format(phase=phase) - if ";" in clone: - qualifiers += " [keyword=HTGS_POOLED_MULTICLONE]" - - cmd = acmd.format( - accession=accession, - accession_nv=accession_nv, - sqndir=sqndir, - sbtfile=sbtfile, - splitfile=splitfile, - qualifiers=qualifiers, - comment=comment, - ) - sh(cmd) - - verify_sqn(sqndir, accession) - nupdated += 1 - - stem_leaf_plot(newph, 1, 4, 3, title="Counts of phases after updates") - print("A total of {0} records updated.".format(nupdated), file=sys.stderr) - - -@lru_cache(maxsize=None) -def get_rows_cols(nrows=Nrows, ncols=Ncols): - rows, cols = string.ascii_uppercase[:nrows], range(1, ncols + 1) - return rows, cols - - -@lru_cache(maxsize=None) -def get_plate(nrows=Nrows, ncols=Ncols): - - rows, cols = get_rows_cols(nrows, ncols) - plate = [[""] * ncols for _ in range(nrows)] - n = 0 - # 384 to (96+quadrant) - for i in range(0, nrows, 2): - for j in range(0, ncols, 2): - n += 1 - prefix = "{0:02d}".format(n) - plate[i][j] = prefix + "A" - plate[i][j + 1] = prefix + "B" - plate[i + 1][j] = prefix + "C" - plate[i + 1][j + 1] = prefix + "D" - - # (96+quadrant) to 384 - splate = {} - for i in range(nrows): - for j in range(ncols): - c = plate[i][j] - splate[c] = "{0}{1}".format(rows[i], j + 1) - - return plate, splate - - -def convert_96_to_384(c96, quad, ncols=Ncols): - """ - Convert the 96-well number and quad number to 384-well number - - >>> convert_96_to_384("B02", 1) - 'C3' - >>> convert_96_to_384("H09", 4) - 'P18' - """ - rows, cols = get_rows_cols() - plate, splate = get_plate() - - n96 = rows.index(c96[0]) * ncols // 2 + int(c96[1:]) - q = "{0:02d}{1}".format(n96, "ABCD"[quad - 1]) - return splate[q] - - -def t384(args): - """ - %prog t384 - - Print out a table converting between 96 well to 384 well - """ - p = OptionParser(t384.__doc__) - p.parse_args(args) - - plate, splate = get_plate() - - fw = sys.stdout - for i in plate: - for j, p in enumerate(i): - if j != 0: - fw.write("|") - fw.write(p) - fw.write("\n") - - -def parse_description(s): - """ - Returns a dictionary based on the FASTA header, assuming JCVI data - """ - s = "".join(s.split()[1:]).replace("/", ";") - a = parse_qs(s) - return a - - -def gss(args): - """ - %prog gss fastafile plateMapping - - Generate sequence files and metadata templates suited for gss submission. - The FASTA file is assumed to be exported from the JCVI data delivery folder - which looks like: - - >1127963806024 /library_name=SIL1T054-B-01-120KB /clear_start=0 - /clear_end=839 /primer_id=1049000104196 /trace_id=1064147620169 - /trace_file_id=1127963805941 /clone_insert_id=1061064364776 - /direction=reverse /sequencer_run_id=1064147620155 - /sequencer_plate_barcode=B906423 /sequencer_plate_well_coordinates=C3 - /sequencer_plate_96well_quadrant=1 /sequencer_plate_96well_coordinates=B02 - /template_plate_barcode=CC0251602AB /growth_plate_barcode=BB0273005AB - AGCTTTAGTTTCAAGGATACCTTCATTGTCATTCCCGGTTATGATGATATCATCAAGATAAACAAGAATG - ACAATGATACCTGTTTGGTTCTGAAGTGTAAAGAGGGTATGTTCAGCTTCAGATCTTCTAAACCCTTTGT - CTAGTAAGCTGGCACTTAGCTTCCTATACCAAACCCTTTGTGATTGCTTCAGTCCATAAATTGCCTTTTT - - Plate mapping file maps the JTC `sequencer_plate_barcode` to external IDs. - For example: - B906423 SIL-001 - """ - p = OptionParser(gss.__doc__) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(p.print_help()) - - fastafile, mappingfile = args - seen = defaultdict(int) - clone = defaultdict(set) - - plateMapping = DictFile(mappingfile) - - fw = open("MetaData.txt", "w") - print(PublicationTemplate.format(**vars), file=fw) - print(LibraryTemplate.format(**vars), file=fw) - print(ContactTemplate.format(**vars), file=fw) - logger.debug("Meta data written to `{0}`".format(fw.name)) - - fw = open("GSS.txt", "w") - fw_log = open("GSS.log", "w") - for rec in SeqIO.parse(fastafile, "fasta"): - # First pass just check well number matchings and populate sequences in - # the same clone - description = rec.description - a = parse_description(description) - direction = a["direction"][0] - sequencer_plate_barcode = a["sequencer_plate_barcode"][0] - sequencer_plate_well_coordinates = a["sequencer_plate_well_coordinates"][0] - sequencer_plate_96well_quadrant = a["sequencer_plate_96well_quadrant"][0] - sequencer_plate_96well_coordinates = a["sequencer_plate_96well_coordinates"][0] - - # Check the 96-well ID is correctly converted to 384-well ID - w96 = sequencer_plate_96well_coordinates - w96quad = int(sequencer_plate_96well_quadrant) - w384 = sequencer_plate_well_coordinates - assert convert_96_to_384(w96, w96quad) == w384 - - plate = sequencer_plate_barcode - assert plate in plateMapping, "{0} not found in `{1}` !".format( - plate, mappingfile - ) - - plate = plateMapping[plate] - d = Directions[direction] - - cloneID = "{0}{1}".format(plate, w384) - gssID = "{0}{1}".format(cloneID, d) - seen[gssID] += 1 - - if seen[gssID] > 1: - gssID = "{0}{1}".format(gssID, seen[gssID]) - - seen[gssID] += 1 - clone[cloneID].add(gssID) - - seen = defaultdict(int) - for rec in SeqIO.parse(fastafile, "fasta"): - # need to populate gssID, mateID, cloneID, seq, plate, row, column - description = rec.description - a = parse_description(description) - direction = a["direction"][0] - sequencer_plate_barcode = a["sequencer_plate_barcode"][0] - sequencer_plate_well_coordinates = a["sequencer_plate_well_coordinates"][0] - w384 = sequencer_plate_well_coordinates - - plate = sequencer_plate_barcode - plate = plateMapping[plate] - d = Directions[direction] - - cloneID = "{0}{1}".format(plate, w384) - gssID = "{0}{1}".format(cloneID, d) - seen[gssID] += 1 - - if seen[gssID] > 1: - logger.error("duplicate key {0} found".format(gssID)) - gssID = "{0}{1}".format(gssID, seen[gssID]) - - othergss = clone[cloneID] - {gssID} - othergss = ", ".join(sorted(othergss)) - vars.update(locals()) - - print(GSSTemplate.format(**vars), file=fw) - - # Write conversion logs to log file - print("{0}\t{1}".format(gssID, description), file=fw_log) - print("=" * 60, file=fw_log) - - logger.debug("A total of {0} seqs written to `{1}`".format(len(seen), fw.name)) - fw.close() - fw_log.close() - - -if __name__ == "__main__": - import doctest - - doctest.testmod() - main() diff --git a/jcvi/apps/gmap.py b/jcvi/apps/gmap.py deleted file mode 100644 index 000e800f..00000000 --- a/jcvi/apps/gmap.py +++ /dev/null @@ -1,253 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Run GMAP/GSNAP commands. GMAP/GSNAP manual: - - -""" - -import os.path as op -import sys - -from ..formats.sam import get_prefix - -from .base import ( - ActionDispatcher, - OptionParser, - backup, - logger, - need_update, - sh, -) - - -def main(): - - actions = ( - ("index", "wraps gmap_build"), - ("align", "wraps gsnap"), - ("gmap", "wraps gmap"), - ("bam", "convert GSNAP output to BAM"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def bam(args): - """ - %prog snp input.gsnap ref.fasta - - Convert GSNAP output to BAM. - """ - from jcvi.formats.sizes import Sizes - from jcvi.formats.sam import index - - p = OptionParser(bam.__doc__) - p.set_home("eddyyeh") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gsnapfile, fastafile = args - EYHOME = opts.eddyyeh_home - pf = gsnapfile.rsplit(".", 1)[0] - uniqsam = pf + ".unique.sam" - samstats = uniqsam + ".stats" - sizesfile = Sizes(fastafile).filename - if need_update((gsnapfile, sizesfile), samstats): - cmd = op.join(EYHOME, "gsnap2gff3.pl") - cmd += " --format sam -i {0} -o {1}".format(gsnapfile, uniqsam) - cmd += " -u -l {0} -p {1}".format(sizesfile, opts.cpus) - sh(cmd) - - index([uniqsam]) - - return uniqsam - - -def check_index(dbfile, supercat=False, go=True): - if supercat: - updated = False - pf = dbfile.rsplit(".", 1)[0] - supercatfile = pf + ".supercat" - coordsfile = supercatfile + ".coords" - if go and need_update(dbfile, supercatfile): - cmd = "tGBS-Generate_Pseudo_Genome.pl" - cmd += " -f {0} -o {1}".format(dbfile, supercatfile) - sh(cmd) - # Rename .coords file since gmap_build will overwrite it - coordsbak = backup(coordsfile) - updated = True - dbfile = supercatfile + ".fasta" - - # dbfile = get_abs_path(dbfile) - dbdir, filename = op.split(dbfile) - if not dbdir: - dbdir = "." - dbname = filename.rsplit(".", 1)[0] - safile = op.join(dbdir, "{0}/{0}.genomecomp".format(dbname)) - if dbname == filename: - dbname = filename + ".db" - - if not go: - return dbdir, dbname - - if need_update(dbfile, safile): - cmd = "gmap_build -D {0} -d {1} {2}".format(dbdir, dbname, filename) - sh(cmd) - else: - logger.error("`{0}` exists. `gmap_build` already run.".format(safile)) - - if go and supercat and updated: - sh("mv {0} {1}".format(coordsbak, coordsfile)) - - return dbdir, dbname - - -def index(args): - """ - %prog index database.fasta - ` - Wrapper for `gmap_build`. Same interface. - """ - p = OptionParser(index.__doc__) - p.add_argument( - "--supercat", - default=False, - action="store_true", - help="Concatenate reference to speed up alignment", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (dbfile,) = args - check_index(dbfile, supercat=opts.supercat) - - -def gmap(args): - """ - %prog gmap database.fasta fastafile - - Wrapper for `gmap`. - """ - p = OptionParser(gmap.__doc__) - p.add_argument( - "--cross", default=False, action="store_true", help="Cross-species alignment" - ) - p.add_argument( - "--npaths", - default=0, - type=int, - help="Maximum number of paths to show." - " If set to 0, prints two paths if chimera" - " detected, else one.", - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - dbfile, fastafile = args - assert op.exists(dbfile) and op.exists(fastafile) - prefix = get_prefix(fastafile, dbfile) - logfile = prefix + ".log" - gmapfile = prefix + ".gmap.gff3" - - if not need_update((dbfile, fastafile), gmapfile): - logger.error("`{0}` exists. `gmap` already run.".format(gmapfile)) - else: - dbdir, dbname = check_index(dbfile) - cmd = "gmap -D {0} -d {1}".format(dbdir, dbname) - cmd += " -f 2 --intronlength=100000" # Output format 2 - cmd += " -t {0}".format(opts.cpus) - cmd += " --npaths {0}".format(opts.npaths) - if opts.cross: - cmd += " --cross-species" - cmd += " " + fastafile - - sh(cmd, outfile=gmapfile, errfile=logfile) - - return gmapfile, logfile - - -def align(args): - """ - %prog align database.fasta read1.fq read2.fq - - Wrapper for `gsnap` single-end or paired-end, depending on the number of - args. - """ - from jcvi.formats.fastq import guessoffset - - p = OptionParser(align.__doc__) - p.add_argument( - "--rnaseq", - default=False, - action="store_true", - help="Input is RNA-seq reads, turn splicing on", - ) - p.add_argument( - "--native", - default=False, - action="store_true", - help="Convert GSNAP output to NATIVE format", - ) - p.set_home("eddyyeh") - p.set_outdir() - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) == 2: - logger.debug("Single-end alignment") - elif len(args) == 3: - logger.debug("Paired-end alignment") - else: - sys.exit(not p.print_help()) - - dbfile, readfile = args[:2] - outdir = opts.outdir - assert op.exists(dbfile) and op.exists(readfile) - prefix = get_prefix(readfile, dbfile) - logfile = op.join(outdir, prefix + ".log") - gsnapfile = op.join(outdir, prefix + ".gsnap") - nativefile = gsnapfile.rsplit(".", 1)[0] + ".unique.native" - - if not need_update((dbfile, readfile), gsnapfile): - logger.error("`{0}` exists. `gsnap` already run.".format(gsnapfile)) - else: - dbdir, dbname = check_index(dbfile) - cmd = "gsnap -D {0} -d {1}".format(dbdir, dbname) - cmd += " -B 5 -m 0.1 -i 2 -n 3" # memory, mismatch, indel penalty, nhits - if opts.rnaseq: - cmd += " -N 1" - cmd += " -t {0}".format(opts.cpus) - cmd += " --gmap-mode none --nofails" - if readfile.endswith(".gz"): - cmd += " --gunzip" - try: - offset = "sanger" if guessoffset([readfile]) == 33 else "illumina" - cmd += " --quality-protocol {0}".format(offset) - except AssertionError: - pass - cmd += " " + " ".join(args[1:]) - sh(cmd, outfile=gsnapfile, errfile=logfile) - - if opts.native: - EYHOME = opts.eddyyeh_home - if need_update(gsnapfile, nativefile): - cmd = op.join(EYHOME, "convert2native.pl") - cmd += " --gsnap {0} -o {1}".format(gsnapfile, nativefile) - cmd += " -proc {0}".format(opts.cpus) - sh(cmd) - - return gsnapfile, logfile - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/grid.py b/jcvi/apps/grid.py deleted file mode 100644 index 5a3a3b4f..00000000 --- a/jcvi/apps/grid.py +++ /dev/null @@ -1,664 +0,0 @@ -""" -Codes to submit multiple jobs to JCVI grid engine -""" - -import os.path as op -import sys -import re -import platform - -from multiprocessing import ( - Pool, - Process, - Value, - cpu_count, - get_context, - set_start_method, -) -from multiprocessing.queues import Queue - -from ..formats.base import write_file, must_open - -from .base import ( - ActionDispatcher, - OptionParser, - backup, - listify, - logger, - mkdir, - popen, - sh, -) - - -class SharedCounter(object): - """A synchronized shared counter. - - The locking done by multiprocessing.Value ensures that only a single - process or thread may read or write the in-memory ctypes object. However, - in order to do n += 1, Python performs a read followed by a write, so a - second process may read the old value before the new one is written by the - first process. The solution is to use a multiprocessing.Lock to guarantee - the atomicity of the modifications to Value. - - This class comes almost entirely from Eli Bendersky's blog: - http://eli.thegreenplace.net/2012/01/04/shared-counter-with-pythons-multiprocessing/ - """ - - def __init__(self, n=0): - self.count = Value("i", n) - - def increment(self, n=1): - """Increment the counter by n (default = 1)""" - with self.count.get_lock(): - self.count.value += n - - @property - def value(self): - """Return the value of the counter""" - return self.count.value - - -class Queue(Queue): - """A portable implementation of multiprocessing.Queue. - - Because of multithreading / multiprocessing semantics, Queue.qsize() may - raise the NotImplementedError exception on Unix platforms like Mac OS X - where sem_getvalue() is not implemented. This subclass addresses this - problem by using a synchronized shared counter (initialized to zero) and - increasing / decreasing its value every time the put() and get() methods - are called, respectively. This not only prevents NotImplementedError from - being raised, but also allows us to implement a reliable version of both - qsize() and empty(). - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, ctx=get_context()) - self.size = SharedCounter(0) - - def put(self, *args, **kwargs): - self.size.increment(1) - super().put(*args, **kwargs) - - def get(self, *args, **kwargs): - self.size.increment(-1) - return super().get(*args, **kwargs) - - def qsize(self): - """Reliable implementation of multiprocessing.Queue.qsize()""" - return self.size.value - - def empty(self): - """Reliable implementation of multiprocessing.Queue.empty()""" - return not self.qsize() - - -class Parallel(object): - """ - Run a number of commands in parallel. - """ - - def __init__(self, cmds, cpus=cpu_count()): - self.cmds = cmds - self.cpus = min(len(cmds), cpus) - - def run(self): - p = Pool(processes=self.cpus) - p.map(sh, self.cmds) - - -class Dependency(object): - """ - Used by MakeManager. - """ - - def __init__(self, source, target, cmds, id, remove=False): - self.id = id - self.source = listify(source) - self.target = listify(target) - self.cmds = listify(cmds) - if remove: - rm_cmd = "rm -f {0}".format(" ".join(self.target)) - self.cmds = [rm_cmd] + self.cmds - - def __str__(self): - source = " ".join(self.source) - target = " ".join(self.target) - # When there are multiple targets, use .INTERMEDIATE - # - if len(self.target) > 1: - intermediate = "{0}.intermediate".format(self.id) - s = "{0} : {1}\n".format(target, intermediate) - s += ".INTERMEDIATE: {0}\n".format(intermediate) - s += "{0} : {1}\n".format(intermediate, source) - else: - s = "{0} : {1}\n".format(target, source) - - for c in self.cmds: - c = c.replace("$", "$$") # Command escaping - s += "\t" + c + "\n" - return s - - -class MakeManager(list): - """ - Write and execute makefile. - """ - - def __init__(self, filename="makefile"): - self.makefile = filename - self.targets = set() - self.ndeps = 0 - - def add(self, source, target, cmds, remove=False): - self.ndeps += 1 - d = Dependency(source, target, cmds, self.ndeps, remove=remove) - self.append(d) - self.targets |= set(listify(target)) - - def write(self): - assert self.targets, "No targets specified" - filename = self.makefile - if op.exists(filename): - backup(filename) - fw = open(filename, "w") - print("all : {0}\n".format(" ".join(sorted(self.targets))), file=fw) - for d in self: - print(d, file=fw) - print("clean :\n\trm -rf {0}\n".format(" ".join(self.targets)), file=fw) - fw.close() - logger.debug("Makefile written to `{0}`.".format(self.makefile)) - - def run(self, cpus=1): - if not op.exists(self.makefile): - self.write() - cmd = "make -j {0} -f {1}".format(cpus, self.makefile) - sh(cmd) - - def clean(self): - cmd = "make clean -f {}".format(self.makefile) - sh(cmd) - - -class Jobs(list): - """ - Runs multiple funcion calls on the SAME computer, using multiprocessing. - """ - - def __init__(self, target, args): - - for x in args: - x = listify(x) - self.append(Process(target=target, args=x)) - - def start(self): - for pi in self: - pi.start() - - def join(self): - for pi in self: - pi.join() - - def run(self): - self.start() - self.join() - - -class Poison: - pass - - -class WriteJobs(object): - """ - Runs multiple function calls, but write to the same file. - - Producer-consumer model. - """ - - def __init__(self, target, args, filename, cpus=cpu_count()): - # macOS starts process with fork by default: https://zhuanlan.zhihu.com/p/144771768 - if platform.system() == "Darwin": - set_start_method("fork") - - workerq = Queue() - writerq = Queue() - - for a in args: - workerq.put(a) - - cpus = min(cpus, len(args)) - for i in range(cpus): - workerq.put(Poison()) - - self.worker = Jobs(work, args=[(workerq, writerq, target)] * cpus) - self.writer = Process(target=write, args=(workerq, writerq, filename, cpus)) - - def run(self): - self.worker.start() - self.writer.start() - self.worker.join() - self.writer.join() - - -def work(queue_in, queue_out, target): - while True: - a = queue_in.get() - if isinstance(a, Poison): - break - res = target(a) - queue_out.put(res) - queue_out.put(Poison()) - - -def write(queue_in, queue_out, filename, cpus): - from rich.progress import Progress - - fw = must_open(filename, "w") - isize = queue_in.qsize() - logger.debug("A total of {0} items to compute.".format(isize)) - isize = isize or 1 - poisons = 0 - with Progress() as progress: - task = progress.add_task("[green]Processing ...", total=isize) - while True: - res = queue_out.get() - qsize = queue_in.qsize() - progress.update(task, completed=isize - qsize) - if isinstance(res, Poison): - poisons += 1 - if poisons == cpus: # wait all workers finish - break - elif res: - print(res, file=fw) - fw.flush() - fw.close() - - -class GridOpts(dict): - def __init__(self, opts): - export = ( - "pcode", - "queue", - "threaded", - "concurrency", - "outdir", - "name", - "hold_jid", - ) - for e in export: - if e in opts.__dict__: - self[e] = getattr(opts, e) - - -class GridProcess(object): - - pat1 = re.compile(r"Your job (?P[0-9]*) ") - pat2 = re.compile(r"Your job-array (?P\S*) ") - - def __init__( - self, - cmd, - jobid="", - pcode="99999", - queue="default", - threaded=None, - infile=None, - outfile=None, - errfile=None, - arr=None, - concurrency=None, - outdir=".", - name=None, - hold_jid=None, - extra_opts=None, - grid_opts=None, - ): - - self.cmd = cmd - self.jobid = jobid - self.queue = queue - self.threaded = threaded - self.infile = infile - self.outfile = outfile or "" - self.errfile = errfile or "" - self.arr = arr - self.concurrency = concurrency - self.outdir = outdir - self.name = name - self.pcode = pcode - self.hold_jid = hold_jid - self.pat = self.pat2 if arr else self.pat1 - self.extra = extra_opts if extra_opts else None - if grid_opts: - self.__dict__.update(GridOpts(grid_opts)) - - def __str__(self): - return "\t".join((x for x in (self.jobid, self.cmd, self.outfile) if x)) - - def build(self): - # Shell commands - if "|" in self.cmd or "&&" in self.cmd or "||" in self.cmd: - quote = '"' if "'" in self.cmd else "'" - self.cmd = "sh -c {1}{0}{1}".format(self.cmd, quote) - - # qsub command (the project code is specific to jcvi) - qsub = "qsub -P {0} -cwd".format(self.pcode) - if self.queue != "default": - qsub += " -l {0}".format(self.queue) - if self.threaded: - qsub += " -pe threaded {0}".format(self.threaded) - if self.arr: - assert 1 <= self.arr < 100000 - qsub += " -t 1-{0}".format(self.arr) - if self.concurrency: - qsub += " -tc {0}".format(self.concurrency) - if self.name: - qsub += ' -N "{0}"'.format(self.name) - if self.hold_jid: - param = "-hold_jid_ad" if self.arr else "-hold_jid" - qsub += " {0} {1}".format(param, self.hold_jid) - if self.extra: - qsub += " {0}".format(self.extra) - - # I/O - infile = self.infile - outfile = self.outfile - errfile = self.errfile - outdir = self.outdir - mkdir(outdir) - redirect_same = outfile and (outfile == errfile) - - if infile: - qsub += " -i {0}".format(infile) - if outfile: - self.outfile = op.join(outdir, outfile) - qsub += " -o {0}".format(self.outfile) - if errfile: - if redirect_same: - qsub += " -j y" - else: - self.errfile = op.join(outdir, errfile) - qsub += " -e {0}".format(self.errfile) - - cmd = " ".join((qsub, self.cmd)) - return cmd - - def start(self): - cmd = self.build() - # run the command and get the job-ID (important) - output = popen(cmd, debug=False).read() - - if output.strip() != "": - self.jobid = re.search(self.pat, output).group("id") - else: - self.jobid = "-1" - - msg = "[{0}] {1}".format(self.jobid, self.cmd) - if self.infile: - msg += " < {0} ".format(self.infile) - if self.outfile: - backup(self.outfile) - msg += " > {0} ".format(self.outfile) - if self.errfile: - backup(self.errfile) - msg += " 2> {0} ".format(self.errfile) - - logger.debug(msg) - - -class Grid(list): - def __init__(self, cmds, outfiles=[]): - - assert cmds, "Commands empty!" - if not outfiles: - outfiles = [None] * len(cmds) - - for cmd, outfile in zip(cmds, outfiles): - self.append(GridProcess(cmd, outfile=outfile)) - - def run(self): - for pi in self: - pi.start() - - -PBS_STANZA = """ -#PBS -q {0} -#PBS -J 1-{1} -#PBS -l select=1:ncpus={2}:mem=23gb -#PBS -l pvmem=23gb -#PBS -l walltime=100:00:00 -""" - -arraysh = """ -CMD=`awk "NR==$SGE_TASK_ID" {0}` -$CMD""" - -arraysh_ua = ( - PBS_STANZA - + """ -cd $PBS_O_WORKDIR -CMD=`awk "NR==$PBS_ARRAY_INDEX" {3}` -$CMD""" -) - - -def get_grid_engine(): - cmd = "qsub --version" - ret = popen(cmd, debug=False).read().decode("utf-8").upper() - return "PBS" if "PBS" in ret else "SGE" - - -def main(): - - actions = ( - ("run", "run a normal command on grid"), - ("array", "run an array job"), - ("kill", "wrapper around the `qdel` command"), - ) - - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def array(args): - """ - %prog array commands.list - - Parallelize a set of commands on grid using array jobs. - """ - p = OptionParser(array.__doc__) - p.set_grid_opts(array=True) - p.set_params(prog="grid") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (cmds,) = args - fp = open(cmds) - N = sum(1 for _ in fp) - fp.close() - - pf = cmds.rsplit(".", 1)[0] - runfile = pf + ".sh" - assert runfile != cmds, "Commands list file should not have a `.sh` extension" - - engine = get_grid_engine() - threaded = opts.threaded or 1 - contents = ( - arraysh.format(cmds) - if engine == "SGE" - else arraysh_ua.format(opts.queue, N, threaded, cmds) - ) - write_file(runfile, contents) - - if engine == "PBS": - return - - outfile = "{0}.{1}.out".format(pf, r"\$TASK_ID") - errfile = "{0}.{1}.err".format(pf, r"\$TASK_ID") - p = GridProcess( - "sh {0}".format(runfile), - outfile=outfile, - errfile=errfile, - arr=N, - extra_opts=opts.extra, - grid_opts=opts, - ) - p.start() - - -def run(args): - """ - %prog run command ::: file1 file2 - - Parallelize a set of commands on grid. The syntax is modeled after GNU - parallel - - {} - input line - {.} - input line without extension - {_} - input line first part - {/} - basename of input line - {/.} - basename of input line without extension - {/_} - basename of input line first part - {#} - sequence number of job to run - ::: - Use arguments from the command line as input source instead of stdin - (standard input). - - If file name is `t/example.tar.gz`, then, - {} is "t/example.tar.gz", {.} is "t/example.tar", {_} is "t/example" - {/} is "example.tar.gz", {/.} is "example.tar", {/_} is "example" - - A few examples: - ls -1 *.fastq | %prog run process {} {.}.pdf # use stdin - %prog run process {} {.}.pdf ::: *fastq # use ::: - %prog run "zcat {} > {.}" ::: *.gz # quote redirection - %prog run < commands.list # run a list of commands - """ - p = OptionParser(run.__doc__) - p.set_grid_opts() - p.set_params(prog="grid") - opts, args = p.parse_args(args) - - if len(args) == 0: - sys.exit(not p.print_help()) - - sep = ":::" - if sep in args: - sepidx = args.index(sep) - filenames = args[sepidx + 1 :] - args = args[:sepidx] - if not filenames: - filenames = [""] - else: - filenames = sys.stdin if not sys.stdin.isatty() else [""] - - cmd = " ".join(args) - - cmds = [] if filenames else [(cmd, None)] - for i, filename in enumerate(filenames): - filename = filename.strip() - noextname = filename.rsplit(".", 1)[0] - prefix, basename = op.split(filename) - basenoextname = basename.rsplit(".", 1)[0] - basefirstname = basename.split(".")[0] - firstname = op.join(prefix, basefirstname) - ncmd = cmd - - if "{" in ncmd: - ncmd = ncmd.replace("{}", filename) - else: - ncmd += " " + filename - - ncmd = ncmd.replace("{.}", noextname) - ncmd = ncmd.replace("{_}", firstname) - ncmd = ncmd.replace("{/}", basename) - ncmd = ncmd.replace("{/.}", basenoextname) - ncmd = ncmd.replace("{/_}", basefirstname) - ncmd = ncmd.replace("{#}", str(i)) - - outfile = None - if ">" in ncmd: - ncmd, outfile = ncmd.split(">", 1) - ncmd, outfile = ncmd.strip(), outfile.strip() - - ncmd = ncmd.strip() - cmds.append((ncmd, outfile)) - - for ncmd, outfile in cmds: - p = GridProcess(ncmd, outfile=outfile, extra_opts=opts.extra, grid_opts=opts) - p.start() - - -def guess_method(tag): - from jcvi.formats.base import is_number - - jobids = tag.split(",") - for jobid in jobids: - if not is_number(jobid): - return "pattern" - return "jobid" - - -def kill(args): - """ - %prog kill [options] JOBNAMEPAT/JOBIDs - - Kill jobs based on JOBNAME pattern matching (case-sensitive) - or list of JOBIDs (comma separated) - - Examples: - %prog kill "pyth*" # Use regex - %prog kill 160253,160245,160252 # Use list of job ids - %prog kill all # Everything - """ - import shlex - from jcvi.apps.base import sh, getusername - from subprocess import check_output, CalledProcessError - import xml.etree.ElementTree as ET - - valid_methods = ("pattern", "jobid") - p = OptionParser(kill.__doc__) - p.add_argument( - "--method", - choices=valid_methods, - help="Identify jobs based on [default: guess]", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - username = getusername() - (tag,) = args - tag = tag.strip() - - if tag == "all": - sh("qdel -u {0}".format(username)) - return - - valid_jobids = set() - method = opts.method or guess_method(tag) - if method == "jobid": - jobids = tag.split(",") - valid_jobids |= set(jobids) - elif method == "pattern": - qsxmlcmd = 'qstat -u "{}" -j "{}" -nenv -njd -xml'.format(username, tag) - try: - qsxml = check_output(shlex.split(qsxmlcmd)).strip() - except CalledProcessError as e: - qsxml = None - logger.debug(f'No jobs matching the pattern "{tag}": {e}') - - if qsxml is not None: - for job in ET.fromstring(qsxml).findall("djob_info"): - for elem in job.findall("element"): - jobid = elem.find("JB_job_number").text - valid_jobids.add(jobid) - - if valid_jobids: - sh("qdel {0}".format(",".join(valid_jobids))) - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/lastz.py b/jcvi/apps/lastz.py deleted file mode 100755 index 3b6ec2c5..00000000 --- a/jcvi/apps/lastz.py +++ /dev/null @@ -1,272 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -import os.path as op -import sys - -from math import exp -from multiprocessing import Lock, Pool - -from ..formats.base import must_open - -from .grid import Jobs -from .base import OptionParser, Popen, logger, mkdir - - -# LASTZ options -Darkspace = "nameparse=darkspace" -Unmask = "unmask" -Multiple = "multiple" -Subsample = "subsample={0}/{1}" -Lastz_template = "{0} --ambiguous=iupac {1}[{2}] {3}[{4}]" - -blast_fields = ( - "query,subject,pctid,hitlen,nmismatch,ngaps," - "qstart,qstop,sstart,sstop,evalue,score" -) - -lastz_fields = ( - "name2,name1,identity,nmismatch,ngap," - "start2+,end2+,strand2,start1,end1,strand1,score" -) - -# For assembly-assembly comparison, Bob Harris recommended: -similarOptions = ( - " --seed=match12 --notransition --step=20 --exact=50 " - "--identity=99 --matchcount=1000" -) - -# conversion between blastz and ncbi is taken from Kent src -# src/lib/blastOut.c -# this is not rigorous definition of e-value (assumes human genome) !! -blastz_score_to_ncbi_bits = lambda bz_score: bz_score * 0.0205 - - -def blastz_score_to_ncbi_expectation(bz_score): - bits = blastz_score_to_ncbi_bits(bz_score) - log_prob = -bits * 0.693147181 - # this number looks like.. human genome? - return 3.0e9 * exp(log_prob) - - -def lastz_to_blast(row): - """ - Convert the lastz tabular to the blast tabular, see headers above - Obsolete after LASTZ version 1.02.40 - """ - atoms = row.strip().split("\t") - ( - name1, - name2, - coverage, - identity, - nmismatch, - ngap, - start1, - end1, - strand1, - start2, - end2, - strand2, - score, - ) = atoms - identity = identity.replace("%", "") - hitlen = coverage.split("/")[1] - score = float(score) - same_strand = strand1 == strand2 - if not same_strand: - start2, end2 = end2, start2 - - evalue = blastz_score_to_ncbi_expectation(score) - score = blastz_score_to_ncbi_bits(score) - evalue, score = "%.2g" % evalue, "%.1f" % score - return "\t".join( - ( - name1, - name2, - identity, - hitlen, - nmismatch, - ngap, - start1, - end1, - start2, - end2, - evalue, - score, - ) - ) - - -def add_mask(ref_tags, qry_tags, mask=False): - if not mask: - ref_tags.append(Unmask) - qry_tags.append(Unmask) - - ref_tags = ",".join(ref_tags) - qry_tags = ",".join(qry_tags) - - return ref_tags, qry_tags - - -def lastz_2bit(t): - """ - Used for formats other than BLAST, i.e. lav, maf, etc. which requires the - database file to contain a single FASTA record. - """ - bfasta_fn, afasta_fn, outfile, lastz_bin, extra, mask, format = t - - ref_tags = [Darkspace] - qry_tags = [Darkspace] - ref_tags, qry_tags = add_mask(ref_tags, qry_tags, mask=mask) - - lastz_cmd = Lastz_template.format( - lastz_bin, bfasta_fn, ref_tags, afasta_fn, qry_tags - ) - if extra: - lastz_cmd += " " + extra.strip() - - lastz_cmd += " --format={0}".format(format) - proc = Popen(lastz_cmd) - out_fh = open(outfile, "w") - - logger.debug("job <%d> started: %s" % (proc.pid, lastz_cmd)) - for row in proc.stdout: - out_fh.write(row) - out_fh.flush() - logger.debug("job <%d> finished" % proc.pid) - - -def lastz(k, n, bfasta_fn, afasta_fn, out_fh, lock, lastz_bin, extra, mask=False): - - ref_tags = [Multiple, Darkspace] - qry_tags = [Darkspace] - if n != 1: - qry_tags.append(Subsample.format(k, n)) - - ref_tags, qry_tags = add_mask(ref_tags, qry_tags, mask=mask) - - lastz_cmd = Lastz_template.format( - lastz_bin, bfasta_fn, ref_tags, afasta_fn, qry_tags - ) - if extra: - lastz_cmd += " " + extra.strip() - - lastz_cmd += " --format=general-:%s" % lastz_fields - # The above conversion is no longer necessary after LASTZ v1.02.40 - # (of which I contributed a patch) - # lastz_cmd += " --format=BLASTN-" - - proc = Popen(lastz_cmd) - - logger.debug("job <%d> started: %s" % (proc.pid, lastz_cmd)) - for row in proc.stdout: - row = lastz_to_blast(row) - lock.acquire() - print(row, file=out_fh) - out_fh.flush() - lock.release() - logger.debug("job <%d> finished" % proc.pid) - - -def main(): - """ - %prog database.fa query.fa [options] - - Run LASTZ similar to the BLAST interface, and generates -m8 tabular format - """ - p = OptionParser(main.__doc__) - - supported_formats = tuple( - x.strip() - for x in "lav, lav+text, axt, axt+, maf, maf+, maf-, sam, softsam, " - "sam-, softsam-, cigar, BLASTN, BLASTN-, differences, rdotplot, text".split(",") - ) - - p.add_argument( - "--format", - default="BLASTN-", - choices=supported_formats, - help="Ooutput format", - ) - p.add_argument("--path", dest="lastz_path", default=None, help="specify LASTZ path") - p.add_argument( - "--mask", - dest="mask", - default=False, - action="store_true", - help="treat lower-case letters as mask info", - ) - p.add_argument( - "--similar", - default=False, - action="store_true", - help="Use options tuned for close comparison", - ) - p.set_cpus(cpus=32) - p.set_params() - p.set_outfile() - opts, args = p.parse_args() - - if len(args) != 2: - sys.exit(p.print_help()) - - bfasta_fn, afasta_fn = args - for fn in (afasta_fn, bfasta_fn): - assert op.exists(fn) - - afasta_fn = op.abspath(afasta_fn) - bfasta_fn = op.abspath(bfasta_fn) - out_fh = must_open(opts.outfile, "w") - - extra = opts.extra - if opts.similar: - extra += similarOptions - - lastz_bin = opts.lastz_path or "lastz" - assert lastz_bin.endswith("lastz"), "You need to include lastz in your path" - - mask = opts.mask - cpus = opts.cpus - logger.debug("Dispatch job to %d cpus" % cpus) - format = opts.format - blastline = format == "BLASTN-" - - # The axt, maf, etc. format can only be run on splitted database (i.e. one - # FASTA record per file). The splitted files are then parallelized for the - # computation, as opposed to splitting queries through "subsample". - outdir = "outdir" - if not blastline: - from jcvi.formats.fasta import Fasta - from jcvi.formats.chain import faToTwoBit - - mkdir(outdir) - - bfasta_2bit = faToTwoBit(bfasta_fn) - bids = list(Fasta(bfasta_fn, lazy=True).iterkeys_ordered()) - - apf = op.basename(afasta_fn).split(".")[0] - args = [] - # bfasta_fn, afasta_fn, outfile, lastz_bin, extra, mask, format - for id in bids: - bfasta = "/".join((bfasta_2bit, id)) - outfile = op.join(outdir, "{0}.{1}.{2}".format(apf, id, format)) - args.append((bfasta, afasta_fn, outfile, lastz_bin, extra, mask, format)) - - p = Pool(cpus) - p.map(lastz_2bit, args) - - return - - lock = Lock() - - args = [ - (k + 1, cpus, bfasta_fn, afasta_fn, out_fh, lock, lastz_bin, extra, mask) - for k in range(cpus) - ] - g = Jobs(target=lastz, args=args) - g.run() - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/mask.py b/jcvi/apps/mask.py deleted file mode 100755 index 448f07af..00000000 --- a/jcvi/apps/mask.py +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Mask low complexity regions in the genome. -""" -import os.path as op -import sys - -from ..formats.fasta import Fasta -from ..utils.cbook import depends, percentage - -from .base import ActionDispatcher, OptionParser, sh - - -@depends -def wm_mk_counts(infile=None, outfile=None): - cmd = "windowmasker -in {0} -mk_counts".format(infile) - cmd += " -out {0}".format(outfile) - sh(cmd) - - -@depends -def wm_mk_masks(infile=None, outfile=None, genomefile=None): - cmd = "windowmasker -in {0} -ustat {1}".format(genomefile, infile) - cmd += " -outfmt fasta -dust T -out {0}".format(outfile) - sh(cmd) - - -def hardmask(fastafile): - cmd = "maskOutFa {0} hard {0}".format(fastafile) - sh(cmd) - - -def main(): - - actions = ( - ("mask", "use windowmasker to mask low-complexity bases"), - ("summary", "report the number of bases and sequences masked"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def summary(args): - """ - %prog summary fastafile - - Report the number of bases and sequences masked. - """ - p = OptionParser(summary.__doc__) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - f = Fasta(fastafile, index=False) - - halfmaskedseqs = set() - allmasked = 0 - allbases = 0 - cutoff = 50 - for key, seq in f.iteritems(): - masked = 0 - for base in seq: - if base not in "AGCT": - masked += 1 - seqlen = len(seq) - if masked * 100.0 / seqlen > cutoff: - halfmaskedseqs.add(key) - allmasked += masked - allbases += seqlen - - seqnum = len(f) - maskedseqnum = len(halfmaskedseqs) - - print( - "Total masked bases: {0}".format(percentage(allmasked, allbases)), - file=sys.stderr, - ) - print( - "Total masked sequences (contain > {0}% masked): {1}".format( - cutoff, percentage(maskedseqnum, seqnum) - ), - file=sys.stderr, - ) - - -def mask(args): - """ - %prog mask fastafile - - This script pipelines the windowmasker in NCBI BLAST+. Masked fasta file - will have an appended suffix of .mask with all the low-complexity bases masked - (default to lower case, set --hard for hardmasking). - """ - p = OptionParser(mask.__doc__) - p.add_argument( - "--hard", - dest="hard", - default=False, - action="store_true", - help="Hard mask the low-complexity bases", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (genomefile,) = args - - # entire pipeline - countsfile = genomefile + ".counts" - wm_mk_counts(infile=genomefile, outfile=countsfile) - - maskedfastafile = "%s.masked%s" % op.splitext(genomefile) - wm_mk_masks(infile=countsfile, outfile=maskedfastafile, genomefile=genomefile) - - if opts.hard: - hardmask(maskedfastafile) - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/phylo.py b/jcvi/apps/phylo.py deleted file mode 100644 index e93485e7..00000000 --- a/jcvi/apps/phylo.py +++ /dev/null @@ -1,1204 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Construct and visualize phylogenetic trees from: -1. MCSCAN output -2. CDS sequences in FASTA format - -Options are provided for each step: -1. sequence alignment: - ClustalW2 or MUSCLE (wrapped on Biopython) -2. alignment editting: - GBlocks (optional) -3. build trees: - NJ: PHYLIP - ML: RAxML or PHYML - -Optional steps: -- reroot tree -- alternative topology test (SH test) -- TreeFix - -The external software needs be installed first. -""" -import sys -import os -import os.path as op -import re -import warnings - -from math import ceil -from itertools import chain -from functools import partial - -import numpy as np -from ete3 import Tree -from Bio import SeqIO, AlignIO -from Bio.Data import CodonTable -from Bio.Emboss.Applications import ( - FSeqBootCommandline, - FDNADistCommandline, - FNeighborCommandline, - FConsenseCommandline, -) -from Bio.Phylo.Applications import PhymlCommandline, RaxmlCommandline - -from ..compara.ks import ( - AbstractCommandline, - find_first_isoform, - run_mrtrans, - clustal_align_protein, - muscle_align_protein, -) -from ..formats.base import must_open, DictFile, LineFile -from ..formats.fasta import Fasta -from ..utils.orderedcollections import OrderedDict -from ..graphics.base import plt, savefig - -from .base import ActionDispatcher, OptionParser, getpath, logger, mkdir, sh - - -GBLOCKS_BIN = partial(getpath, name="GBLOCKS", warn="warn") -PHYML_BIN = partial(getpath, name="PHYML", warn="warn") -RAXML_BIN = partial(getpath, name="RAXML", warn="warn") -FPHYLIP_BIN = partial(getpath, name="FPHYLIP", warn="warn") -TREEFIX_BIN = partial(getpath, name="TREEFIX", warn="warn") - - -class GblocksCommandline(AbstractCommandline): - """Little commandline for Gblocks - (http://molevol.cmima.csic.es/castresana/Gblocks.html). - - Accepts alignment in FASTA or NBRF/PIR format. - """ - - def __init__( - self, aln_file, aln_type="c", command=GBLOCKS_BIN("Gblocks"), **kwargs - ): - - self.aln_file = aln_file - self.aln_type = aln_type - self.command = command - - params = {"b4": 5, "b5": "h", "p": "n"} - params.update(kwargs) - self.parameters = ["-{0}={1}".format(k, v) for k, v in params.items()] - - def __str__(self): - return ( - self.command - + " %s -t=%s " % (self.aln_file, self.aln_type) - + " ".join(self.parameters) - ) - - -class FfitchCommandline(AbstractCommandline): - """Little commandline for ffitch in EMBOSS - (http://www.molgen.mpg.de/~beck/embassy/phylipnew/ffitch.html). - - Infer branch lengths of tree. - """ - - def __init__( - self, - datafile, - outtreefile, - command=FPHYLIP_BIN("ffitch"), - intreefile=None, - **kwargs - ): - - self.datafile = datafile - self.outtreefile = outtreefile - self.outfile = datafile.rsplit(".", 1)[0] + ".ffitch" - self.command = command - self.intreefile = intreefile if intreefile else '""' - - self.parameters = ["-{0} {1}".format(k, v) for k, v in kwargs.items()] - - def __str__(self): - return ( - self.command - + " -datafile %s -intreefile %s -outfile %s -outtreefile %s " - % ( - self.datafile, - self.intreefile, - self.outfile, - self.outtreefile, - ) - + " ".join(self.parameters) - ) - - -class TreeFixCommandline(AbstractCommandline): - """Little commandline for TreeFix - (http://compbio.mit.edu/treefix/). - """ - - def __init__( - self, - input, - stree_file, - smap_file, - a_ext, - command=TREEFIX_BIN("treefix"), - r=False, - **kwargs - ): - - self.input = input - self.s = stree_file - self.S = smap_file - self.A = a_ext - self.command = command - - params = {"V": 1, "l": input.rsplit(".", 1)[0] + ".treefix.log"} - params.update(kwargs) - self.parameters = ["-{0} {1}".format(k, v) for k, v in params.items()] - if r: - self.parameters.append("-r") - - def __str__(self): - return ( - self.command - + " -s %s -S %s -A %s " % (self.s, self.S, self.A) - + " ".join(self.parameters) - + " %s" % self.input - ) - - -def run_treefix( - input, - stree_file, - smap_file, - a_ext=".fasta", - o_ext=".dnd", - n_ext=".treefix.dnd", - **kwargs -): - """ - get the ML tree closest to the species tree - """ - cl = TreeFixCommandline( - input=input, - stree_file=stree_file, - smap_file=smap_file, - a_ext=a_ext, - o=o_ext, - n=n_ext, - **kwargs - ) - outtreefile = input.rsplit(o_ext, 1)[0] + n_ext - print("TreeFix:", cl, file=sys.stderr) - r, e = cl.run() - - if e: - print("***TreeFix could not run", file=sys.stderr) - return None - else: - logger.debug("new tree written to {0}".format(outtreefile)) - return outtreefile - - -def run_gblocks(align_fasta_file, **kwargs): - """ - remove poorly aligned positions and divergent regions with Gblocks - """ - cl = GblocksCommandline(aln_file=align_fasta_file, **kwargs) - r, e = cl.run() - - print("Gblocks:", cl, file=sys.stderr) - - if e: - print("***Gblocks could not run", file=sys.stderr) - return None - else: - print(r, file=sys.stderr) - alignp = re.sub( - r".*Gblocks alignment:.*\(([0-9]{1,3}) %\).*", r"\1", r, flags=re.DOTALL - ) - alignp = int(alignp) - if alignp <= 10: - print( - "** WARNING ** Only %s %% positions retained by Gblocks. " - "Results aborted. Using original alignment instead.\n" % alignp, - file=sys.stderr, - ) - return None - else: - return align_fasta_file + "-gb" - - -def run_ffitch(distfile, outtreefile, intreefile=None, **kwargs): - """ - Infer tree branch lengths using ffitch in EMBOSS PHYLIP - """ - cl = FfitchCommandline( - datafile=distfile, outtreefile=outtreefile, intreefile=intreefile, **kwargs - ) - r, e = cl.run() - - if e: - print("***ffitch could not run", file=sys.stderr) - return None - else: - print("ffitch:", cl, file=sys.stderr) - return outtreefile - - -def smart_reroot(treefile, outgroupfile, outfile, format=0): - """ - simple function to reroot Newick format tree using ete2 - - Tree reading format options see here: - http://packages.python.org/ete2/tutorial/tutorial_trees.html#reading-newick-trees - """ - tree = Tree(treefile, format=format) - leaves = [t.name for t in tree.get_leaves()][::-1] - outgroup = [] - for o in must_open(outgroupfile): - o = o.strip() - for leaf in leaves: - if leaf[: len(o)] == o: - outgroup.append(leaf) - if outgroup: - break - - if not outgroup: - print( - "Outgroup not found. Tree {0} cannot be rerooted.".format(treefile), - file=sys.stderr, - ) - return treefile - - try: - tree.set_outgroup(tree.get_common_ancestor(*outgroup)) - except ValueError: - assert type(outgroup) == list - outgroup = outgroup[0] - tree.set_outgroup(outgroup) - tree.write(outfile=outfile, format=format) - - logger.debug("Rerooted tree printed to {0}".format(outfile)) - return outfile - - -def build_nj_phylip(alignment, outfile, outgroup, work_dir="."): - """ - build neighbor joining tree of DNA seqs with PHYLIP in EMBOSS - - PHYLIP manual - http://evolution.genetics.washington.edu/phylip/doc/ - """ - - phy_file = op.join(work_dir, "work", "aln.phy") - try: - AlignIO.write(alignment, open(phy_file, "w"), "phylip") - except ValueError: - print( - "Repeated seq name, possibly due to truncation. NJ tree not built.", - file=sys.stderr, - ) - return None - - seqboot_out = phy_file.rsplit(".", 1)[0] + ".fseqboot" - seqboot_cl = FSeqBootCommandline( - FPHYLIP_BIN("fseqboot"), - sequence=phy_file, - outfile=seqboot_out, - seqtype="d", - reps=100, - seed=12345, - ) - stdout, stderr = seqboot_cl() - logger.debug("Resampling alignment: %s" % seqboot_cl) - - dnadist_out = phy_file.rsplit(".", 1)[0] + ".fdnadist" - dnadist_cl = FDNADistCommandline( - FPHYLIP_BIN("fdnadist"), sequence=seqboot_out, outfile=dnadist_out, method="f" - ) - stdout, stderr = dnadist_cl() - logger.debug("Calculating distance for bootstrapped alignments: %s" % dnadist_cl) - - neighbor_out = phy_file.rsplit(".", 1)[0] + ".njtree" - e = phy_file.rsplit(".", 1)[0] + ".fneighbor" - neighbor_cl = FNeighborCommandline( - FPHYLIP_BIN("fneighbor"), - datafile=dnadist_out, - outfile=e, - outtreefile=neighbor_out, - ) - stdout, stderr = neighbor_cl() - logger.debug("Building Neighbor Joining tree: %s" % neighbor_cl) - - consense_out = phy_file.rsplit(".", 1)[0] + ".consensustree.nodesupport" - e = phy_file.rsplit(".", 1)[0] + ".fconsense" - consense_cl = FConsenseCommandline( - FPHYLIP_BIN("fconsense"), - intreefile=neighbor_out, - outfile=e, - outtreefile=consense_out, - ) - stdout, stderr = consense_cl() - logger.debug("Building consensus tree: %s" % consense_cl) - - # distance without bootstrapping - dnadist_out0 = phy_file.rsplit(".", 1)[0] + ".fdnadist0" - dnadist_cl0 = FDNADistCommandline( - FPHYLIP_BIN("fdnadist"), sequence=phy_file, outfile=dnadist_out0, method="f" - ) - stdout, stderr = dnadist_cl0() - logger.debug("Calculating distance for original alignment: %s" % dnadist_cl0) - - # infer branch length on consensus tree - consensustree1 = phy_file.rsplit(".", 1)[0] + ".consensustree.branchlength" - run_ffitch( - distfile=dnadist_out0, outtreefile=consensustree1, intreefile=consense_out - ) - - # write final tree - ct_s = Tree(consense_out) - - if outgroup: - t1 = consensustree1 + ".rooted" - t2 = smart_reroot(consensustree1, outgroup, t1) - if t2 == t1: - outfile = outfile.replace(".unrooted", "") - ct_b = Tree(t2) - else: - ct_b = Tree(consensustree1) - - nodesupport = {} - for node in ct_s.traverse("postorder"): - node_children = tuple(sorted([f.name for f in node])) - if len(node_children) > 1: - nodesupport[node_children] = node.dist / 100.0 - - for k, v in nodesupport.items(): - ct_b.get_common_ancestor(*k).support = v - print(ct_b) - ct_b.write(format=0, outfile=outfile) - - try: - s = op.getsize(outfile) - except OSError: - s = 0 - if s: - logger.debug("NJ tree printed to %s" % outfile) - return outfile, phy_file - else: - logger.debug("Something was wrong. NJ tree was not built.") - return None - - -def build_ml_phyml(alignment, outfile, work_dir=".", **kwargs): - """ - build maximum likelihood tree of DNA seqs with PhyML - """ - phy_file = op.join(work_dir, "work", "aln.phy") - AlignIO.write(alignment, open(phy_file, "w"), "phylip-relaxed") - - phyml_cl = PhymlCommandline(cmd=PHYML_BIN("phyml"), input=phy_file, **kwargs) - logger.debug("Building ML tree using PhyML: %s" % phyml_cl) - stdout, stderr = phyml_cl() - - tree_file = phy_file + "_phyml_tree.txt" - if not op.exists(tree_file): - print("***PhyML failed.", file=sys.stderr) - return None - sh("cp {0} {1}".format(tree_file, outfile), log=False) - - logger.debug("ML tree printed to %s" % outfile) - - return outfile, phy_file - - -def build_ml_raxml(alignment, outfile, work_dir=".", **kwargs): - """ - build maximum likelihood tree of DNA seqs with RAxML - """ - work_dir = op.join(work_dir, "work") - mkdir(work_dir) - phy_file = op.join(work_dir, "aln.phy") - AlignIO.write(alignment, open(phy_file, "w"), "phylip-relaxed") - - raxml_work = op.abspath(op.join(op.dirname(phy_file), "raxml_work")) - mkdir(raxml_work) - raxml_cl = RaxmlCommandline( - cmd=RAXML_BIN("raxmlHPC"), - sequences=phy_file, - algorithm="a", - model="GTRGAMMA", - parsimony_seed=12345, - rapid_bootstrap_seed=12345, - num_replicates=100, - name="aln", - working_dir=raxml_work, - **kwargs - ) - - logger.debug("Building ML tree using RAxML: %s" % raxml_cl) - stdout, stderr = raxml_cl() - - tree_file = "{0}/RAxML_bipartitions.aln".format(raxml_work) - if not op.exists(tree_file): - print("***RAxML failed.", file=sys.stderr) - sh("rm -rf %s" % raxml_work, log=False) - return None - sh("cp {0} {1}".format(tree_file, outfile), log=False) - - logger.debug("ML tree printed to %s" % outfile) - sh("rm -rf %s" % raxml_work) - - return outfile, phy_file - - -def SH_raxml(reftree, querytree, phy_file, shout="SH_out.txt"): - """ - SH test using RAxML - - querytree can be a single tree or a bunch of trees (eg. from bootstrapping) - """ - assert op.isfile(reftree) - shout = must_open(shout, "a") - - raxml_work = op.abspath(op.join(op.dirname(phy_file), "raxml_work")) - mkdir(raxml_work) - raxml_cl = RaxmlCommandline( - cmd=RAXML_BIN("raxmlHPC"), - sequences=phy_file, - algorithm="h", - model="GTRGAMMA", - name="SH", - starting_tree=reftree, - bipartition_filename=querytree, - working_dir=raxml_work, - ) - - logger.debug("Running SH test in RAxML: %s" % raxml_cl) - o, stderr = raxml_cl() - # hard coded - try: - pval = re.search("(Significantly.*:.*)", o).group(0) - except: - print("SH test failed.", file=sys.stderr) - else: - pval = pval.strip().replace("\t", " ").replace("%", "\%") - print("{0}\t{1}".format(op.basename(querytree), pval), file=shout) - logger.debug("SH p-value appended to %s" % shout.name) - - shout.close() - return shout.name - - -CODON_TRANSLATION = CodonTable.standard_dna_table.forward_table -FOURFOLD = { - "CTT": "L", - "ACA": "T", - "ACG": "T", - "CCT": "P", - "CTG": "L", - "CTA": "L", - "ACT": "T", - "CCG": "P", - "CCA": "P", - "CCC": "P", - "GGT": "G", - "CGA": "R", - "CGC": "R", - "CGG": "R", - "GGG": "G", - "GGA": "G", - "GGC": "G", - "CGT": "R", - "GTA": "V", - "GTC": "V", - "GTG": "V", - "GTT": "V", - "CTC": "L", - "TCT": "S", - "TCG": "S", - "TCC": "S", - "ACC": "T", - "TCA": "S", - "GCA": "A", - "GCC": "A", - "GCG": "A", - "GCT": "A", -} - - -def subalignment(alnfle, subtype, alntype="fasta"): - """ - Subset synonymous or fourfold degenerate sites from an alignment - - input should be a codon alignment - """ - aln = AlignIO.read(alnfle, alntype) - alnlen = aln.get_alignment_length() - nseq = len(aln) - subaln = None - subalnfile = alnfle.rsplit(".", 1)[0] + "_{0}.{1}".format(subtype, alntype) - - if subtype == "synonymous": - for j in range(0, alnlen, 3): - aa = None - for i in range(nseq): - codon = str(aln[i, j : j + 3].seq) - if codon not in CODON_TRANSLATION: - break - if aa and CODON_TRANSLATION[codon] != aa: - break - else: - aa = CODON_TRANSLATION[codon] - else: - if subaln is None: - subaln = aln[:, j : j + 3] - else: - subaln += aln[:, j : j + 3] - - if subtype == "fourfold": - for j in range(0, alnlen, 3): - for i in range(nseq): - codon = str(aln[i, j : j + 3].seq) - if codon not in FOURFOLD: - break - else: - if subaln is None: - subaln = aln[:, j : j + 3] - else: - subaln += aln[:, j : j + 3] - - if subaln: - AlignIO.write(subaln, subalnfile, alntype) - return subalnfile - else: - print("No sites {0} selected.".format(subtype), file=sys.stderr) - return None - - -def merge_rows_local( - filename, ignore=".", colsep="\t", local=10, fieldcheck=True, fsep="," -): - """ - merge overlapping rows within given row count distance - """ - fw = must_open(filename + ".merged", "w") - rows = open(filename).readlines() - rows = [row.strip().split(colsep) for row in rows] - l = len(rows[0]) - - for rowi, row in enumerate(rows): - n = len(rows) - i = rowi + 1 - while i <= min(rowi + local, n - 1): - merge = 1 - row2 = rows[i] - for j in range(l): - a = row[j] - b = row2[j] - if fieldcheck: - a = set(a.split(fsep)) - a = fsep.join(sorted(list(a))) - b = set(b.split(fsep)) - b = fsep.join(sorted(list(b))) - - if all([a != ignore, b != ignore, a not in b, b not in a]): - merge = 0 - i += 1 - break - - if merge: - for x in range(l): - if row[x] == ignore: - rows[rowi][x] = row2[x] - elif row[x] in row2[x]: - rows[rowi][x] = row2[x] - else: - rows[rowi][x] = row[x] - row = rows[rowi] - rows.remove(row2) - - print(colsep.join(row), file=fw) - fw.close() - - return fw.name - - -def add_tandems(mcscanfile, tandemfile): - """ - add tandem genes to anchor genes in mcscan file - """ - tandems = [f.strip().split(",") for f in open(tandemfile)] - fw = must_open(mcscanfile + ".withtandems", "w") - fp = must_open(mcscanfile) - seen = set() - for i, row in enumerate(fp): - if row[0] == "#": - continue - anchorslist = row.strip().split("\t") - anchors = set([a.split(",")[0] for a in anchorslist]) - anchors.remove(".") - if anchors & seen == anchors: - continue - - newanchors = [] - for a in anchorslist: - if a == ".": - newanchors.append(a) - continue - for t in tandems: - if a in t: - newanchors.append(",".join(t)) - seen.update(t) - break - else: - newanchors.append(a) - seen.add(a) - print("\t".join(newanchors), file=fw) - - fw.close() - newmcscanfile = merge_rows_local(fw.name) - - logger.debug( - "Tandems added to `{0}`. Results in `{1}`".format(mcscanfile, newmcscanfile) - ) - fp.seek(0) - logger.debug( - "{0} rows merged to {1} rows".format( - len(fp.readlines()), len(open(newmcscanfile).readlines()) - ) - ) - sh("rm %s" % fw.name) - - return newmcscanfile - - -def main(): - - actions = ( - ("prepare", "prepare cds sequences from .mcscan"), - ("build", "build NJ and ML trees from cds"), - ("draw", "draw Newick formatted trees"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def prepare(args): - """ - %prog prepare mcscanfile cdsfile [options] - - Pick sequences from cdsfile to form fasta files, according to multiple - alignment in the mcscanfile. - The fasta sequences can then be used to construct phylogenetic tree. - - Use --addtandem=tandemfile to collapse tandems of anchors into single row. - The tandemfile must be provided with *ALL* genomes involved, otherwise - result will be incomplete and redundant. - """ - from jcvi.graphics.base import discrete_rainbow - - p = OptionParser(prepare.__doc__) - p.add_argument("--addtandem", help="path to tandemfile") - p.add_argument( - "--writecolors", - default=False, - action="store_true", - help="generate a gene_name to color mapping file which will be taken " - "by jcvi.apps.phylo.draw", - ) - p.set_outdir(outdir="sequences") - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - mcscanfile, cdsfile = args - - if opts.addtandem: - tandemfile = opts.addtandem - mcscanfile_with_tandems = add_tandems(mcscanfile, tandemfile) - mcscanfile = mcscanfile_with_tandems - - seqdir = opts.outdir - mkdir(seqdir) - f = Fasta(cdsfile) - fp = must_open(mcscanfile) - if opts.writecolors: - fc = must_open("leafcolors.txt", "w") - - n = 0 - for i, row in enumerate(fp): - row = row.strip().split("\t") - if i == 0: - l = len(row) - if l <= 20: - colors = discrete_rainbow(l, shuffle=False)[1] - else: - colors = discrete_rainbow(l, usepreset=False, shuffle=False)[1] - warnings.warn( - "*** WARNING ***\n" - "Too many columns. Colors may not be all distinctive." - ) - - assert len(row) == l, "All rows should have same number of fields." - - anchors = set() - for j, atom in enumerate(row): - color = "%s,%s,%s" % colors[j] - if atom == ".": - continue - elif "," in atom: - atom = atom.split(",") - for a in atom: - fc.write("{0}\t{1}\n".format(a, color)) - anchors.add(a) - else: - fc.write("{0}\t{1}\n".format(atom, color)) - anchors.add(atom) - - if len(anchors) <= 3: - print( - "Not enough seqs to build trees for {0}".format(anchors), - file=sys.stderr, - ) - continue - - pivot = row[0] - fw = must_open("%s/%s.cds" % (seqdir, pivot), "w") - for a in anchors: - if a not in f: - print(a) - a = find_first_isoform(a, f) - assert a, a - arec = f[a] - SeqIO.write(arec, fw, "fasta") - fw.close() - n += 1 - - if opts.writecolors: - fc.close() - logger.debug("leaf colors written to `{0}`".format(fc.name)) - - logger.debug("cds of {0} syntelog groups written to {1}/".format(n, seqdir)) - - return seqdir - - -def build(args): - """ - %prog build [prot.fasta] cds.fasta [options] --outdir=outdir - - This function wraps on the following steps: - 1. msa using ClustalW2 or MUSCLE(default) - 2. (optional) alignment editing using Gblocks - 3. build NJ tree using PHYLIP in EMBOSS package - seq names should be unique by first 10 chars (restriction of PHYLIP) - 4. build ML tree using RAxML(default) or PHYML, use keywords raxml or phyml, - *WARNING* maybe slow with large dataset - - If an outgroup file is provided, the result tree will be rooted on the - outgroup according to order in the file, i.e. the name in row1 will be - tried first. If not found, row2 will be used, etc. - Tail truncated names can be provided so long as it is unique among the seqs. - If not uniq, the first occurrence will be used. For example, if you have - two moss sequences in your input, then the tree will be rooted on the - first moss sequence encountered by the program, unless they are monophylic, - in which case the root will be their common ancestor. - - --stree and --smap are required if --treefix is set. - - Trees can be edited again using an editor such as Dendroscope. This - is the recommended way to get highly customized trees. - - Newick format trees will be deposited into outdir (. by default). - """ - from jcvi.formats.fasta import translate - - p = OptionParser(build.__doc__) - p.add_argument( - "--longest", - action="store_true", - help="Get longest ORF, only works if no pep file, e.g. ESTs", - ) - p.add_argument( - "--nogblocks", - action="store_true", - help="don't use Gblocks to edit alignment", - ) - p.add_argument( - "--synonymous", - action="store_true", - help="extract synonymous sites of the alignment", - ) - p.add_argument( - "--fourfold", - action="store_true", - help="extract fourfold degenerate sites of the alignment", - ) - p.add_argument( - "--msa", - default="muscle", - choices=("clustalw", "muscle"), - help="software used to align the proteins", - ) - p.add_argument( - "--noneighbor", - action="store_true", - help="don't build NJ tree", - ) - p.add_argument( - "--ml", - default=None, - choices=("raxml", "phyml"), - help="software used to build ML tree", - ) - p.add_argument("--outgroup", help="path to file containing outgroup orders") - p.add_argument("--SH", help="path to reference Newick tree") - p.add_argument("--shout", default="SH_out.txt", help="SH output file name") - p.add_argument( - "--treefix", - action="store_true", - help="use TreeFix to rearrange ML tree", - ) - p.add_argument("--stree", help="path to species Newick tree") - p.add_argument( - "--smap", - help="path to smap file: gene_name_patternspecies_name", - ) - p.set_outdir() - - opts, args = p.parse_args(args) - gblocks = not opts.nogblocks - synonymous = opts.synonymous - fourfold = opts.fourfold - neighbor = not opts.noneighbor - outgroup = opts.outgroup - outdir = opts.outdir - - if len(args) == 1: - protein_file, dna_file = None, args[0] - elif len(args) == 2: - protein_file, dna_file = args - else: - print("Incorrect arguments", file=sys.stderr) - sys.exit(not p.print_help()) - - if opts.treefix: - stree = opts.stree - smap = opts.smap - assert stree and smap, "TreeFix requires stree and smap files." - opts.ml = "raxml" - - treedir = op.join(outdir, "tree") - mkdir(treedir) - - if not protein_file: - protein_file = dna_file + ".pep" - translate_args = [dna_file, "--outfile=" + protein_file] - if opts.longest: - translate_args += ["--longest"] - dna_file, protein_file = translate(translate_args) - - work_dir = op.join(outdir, "alignment") - mkdir(work_dir) - p_recs = list(SeqIO.parse(open(protein_file), "fasta")) - if opts.msa == "clustalw": - align_fasta = clustal_align_protein(p_recs, work_dir) - elif opts.msa == "muscle": - align_fasta = muscle_align_protein(p_recs, work_dir) - - n_recs = list(SeqIO.parse(open(dna_file), "fasta")) - mrtrans_fasta = run_mrtrans(align_fasta, n_recs, work_dir, outfmt="fasta") - - if not mrtrans_fasta: - logger.debug("pal2nal aborted. Cannot reliably build tree for %s", dna_file) - return - - codon_aln_fasta = mrtrans_fasta - if gblocks: - gb_fasta = run_gblocks(mrtrans_fasta) - codon_aln_fasta = gb_fasta if gb_fasta else codon_aln_fasta - - else: - if synonymous: - codon_aln_fasta = subalignment(mrtrans_fasta, "synonymous") - - if fourfold: - codon_aln_fasta = subalignment(mrtrans_fasta, "fourfold") - - if not neighbor and not opts.ml: - return codon_aln_fasta - - alignment = AlignIO.read(codon_aln_fasta, "fasta") - if len(alignment) <= 3: - raise ValueError("Too few seqs to build tree.") - - mkdir(op.join(treedir, "work")) - if neighbor: - out_file = op.join( - treedir, op.basename(dna_file).rsplit(".", 1)[0] + ".NJ.unrooted.dnd" - ) - try: - outfile, phy_file = build_nj_phylip( - alignment, outfile=out_file, outgroup=outgroup, work_dir=treedir - ) - except: - print("NJ tree cannot be built for {0}".format(dna_file)) - - if opts.SH: - reftree = opts.SH - querytree = outfile - SH_raxml(reftree, querytree, phy_file, shout=opts.shout) - - if opts.ml: - out_file = op.join( - treedir, op.basename(dna_file).rsplit(".", 1)[0] + ".ML.unrooted.dnd" - ) - - if opts.ml == "phyml": - try: - outfile, phy_file = build_ml_phyml( - alignment, outfile=out_file, work_dir=treedir - ) - except: - print("ML tree cannot be built for {0}".format(dna_file)) - - elif opts.ml == "raxml": - try: - outfile, phy_file = build_ml_raxml( - alignment, outfile=out_file, work_dir=treedir - ) - except: - print("ML tree cannot be built for {0}".format(dna_file)) - - if outgroup: - new_out_file = out_file.replace(".unrooted", "") - t = smart_reroot( - treefile=out_file, outgroupfile=outgroup, outfile=new_out_file - ) - if t == new_out_file: - sh("rm %s" % out_file) - outfile = new_out_file - - if opts.SH: - reftree = opts.SH - querytree = outfile - SH_raxml(reftree, querytree, phy_file, shout=opts.shout) - - if opts.treefix: - treefix_dir = op.join(treedir, "treefix") - assert mkdir(treefix_dir, overwrite=True) - - sh("cp {0} {1}/".format(outfile, treefix_dir)) - input = op.join(treefix_dir, op.basename(outfile)) - aln_file = input.rsplit(".", 1)[0] + ".fasta" - SeqIO.write(alignment, aln_file, "fasta") - - outfile = run_treefix( - input=input, - stree_file=stree, - smap_file=smap, - a_ext=".fasta", - o_ext=".dnd", - n_ext=".treefix.dnd", - ) - - return outfile - - -def _draw_trees( - trees, nrow=1, ncol=1, rmargin=0.3, iopts=None, outdir=".", shfile=None, **kwargs -): - """ - Draw one or multiple trees on one plot. - """ - from jcvi.graphics.tree import draw_tree - - if shfile: - SHs = DictFile(shfile, delimiter="\t") - - ntrees = len(trees) - n = nrow * ncol - for x in range(int(ceil(float(ntrees) / n))): - fig = plt.figure(1, (iopts.w, iopts.h)) if iopts else plt.figure(1, (5, 5)) - root = fig.add_axes([0, 0, 1, 1]) - - xiv = 1.0 / ncol - yiv = 1.0 / nrow - xstart = list(np.arange(0, 1, xiv)) * nrow - ystart = list(chain(*zip(*[list(np.arange(0, 1, yiv))[::-1]] * ncol))) - for i in range(n * x, n * (x + 1)): - if i == ntrees: - break - ax = fig.add_axes([xstart[i % n], ystart[i % n], xiv, yiv]) - f = trees.keys()[i] - tree = trees[f] - try: - SH = SHs[f] - except: - SH = None - draw_tree( - ax, - tree, - rmargin=rmargin, - reroot=False, - supportcolor="r", - SH=SH, - **kwargs - ) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - format = iopts.format if iopts else "pdf" - dpi = iopts.dpi if iopts else 300 - if n == 1: - image_name = f.rsplit(".", 1)[0] + "." + format - else: - image_name = "trees{0}.{1}".format(x, format) - image_name = op.join(outdir, image_name) - savefig(image_name, dpi=dpi, iopts=iopts) - plt.clf() - - -def draw(args): - """ - %prog draw --input newicktrees [options] - - Draw phylogenetic trees into single or combined plots. - Input trees should be one of the following: - 1. single Newick format tree file - 2. a dir containing *ONLY* the tree files to be drawn - - Newick format: - http://evolution.genetics.washington.edu/phylip/newicktree.html - - This function wraps on jcvi.graphics.tree - This function is better used for trees generated by jcvi.apps.phylo (rooted - if possible). For drawing general Newick trees from external sources invoke - jcvi.graphics.tree directly, which also gives more drawing options. - """ - trunc_name_options = ["headn", "oheadn", "tailn", "otailn"] - p = OptionParser(draw.__doc__) - p.add_argument( - "--input", - help="path to single input tree file or a dir " - "containing ONLY the input tree files", - ) - p.add_argument( - "--combine", - type=str, - default="1x1", - help="combine multiple trees into one plot in nrowxncol", - ) - p.add_argument( - "--trunc_name", - default=None, - help="Options are: {0}. " - "truncate first n chars, retains only first n chars, " - "truncate last n chars, retain only last chars. " - "n=1~99.".format(trunc_name_options), - ) - p.add_argument( - "--SH", - default=None, - help="path to a file containing SH test p-values in format:" - "tree_file_namep-values " - "This file can be generated with jcvi.apps.phylo build", - ) - p.add_argument( - "--scutoff", - default=50, - type=int, - help="cutoff for displaying node support, 0-100", - ) - p.add_argument( - "--barcode", - default=None, - help="path to seq/taxon name barcode mapping file: " - "barcodenew_name " - "This option is downstream of `--trunc_name`", - ) - p.add_argument( - "--leafcolorfile", - default=None, - help="path to a mapping file containing font colors " - "for the OTUs: leafnamecolor", - ) - p.set_outdir() - opts, args, iopts = p.set_image_options(figsize="8x6") - input = opts.input - outdir = opts.outdir - combine = opts.combine.split("x") - trunc_name = opts.trunc_name - SH = opts.SH - - mkdir(outdir) - if not input: - sys.exit(not p.print_help()) - elif op.isfile(input): - trees_file = input - treenames = [op.basename(input)] - elif op.isdir(input): - trees_file = op.join(outdir, "alltrees.dnd") - treenames = [] - for f in sorted(os.listdir(input)): - sh("cat {0}/{1} >> {2}".format(input, f, trees_file), log=False) - treenames.append(f) - else: - sys.exit(not p.print_help()) - - trees = OrderedDict() - tree = "" - i = 0 - for row in LineFile(trees_file, comment="#", load=True).lines: - if i == len(treenames): - break - if not len(row): - continue - - if ";" in row: - # sanity check - if row.index(";") != len(row) - 1: - ts = row.split(";") - for ii in range(len(ts) - 1): - ts[ii] += ";" - else: - ts = [row] - for t in ts: - if ";" in t: - tree += t - if tree: - trees[treenames[i]] = tree - tree = "" - i += 1 - else: - tree += t - else: - tree += row - - logger.debug("A total of {0} trees imported.".format(len(trees))) - sh("rm {0}".format(op.join(outdir, "alltrees.dnd"))) - - _draw_trees( - trees, - nrow=int(combine[0]), - ncol=int(combine[1]), - rmargin=0.3, - iopts=iopts, - outdir=outdir, - shfile=SH, - trunc_name=trunc_name, - scutoff=opts.scutoff, - barcodefile=opts.barcode, - leafcolorfile=opts.leafcolorfile, - ) - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/r.py b/jcvi/apps/r.py deleted file mode 100644 index c4b60f59..00000000 --- a/jcvi/apps/r.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -uses R for statistics and graphics -""" - -import sys - -from string import Template - -from ..formats.base import must_open - -from .base import ActionDispatcher, OptionParser, cleanup, sh - - -class RTemplate(object): - """ - Creates a R script and runs it - """ - - def __init__(self, template, parameters): - - self.template = Template(template) - self.parameters = parameters - - def run(self, clean=True): - """ - Create a temporary file and run it - """ - template = self.template - parameters = self.parameters - # write to a temporary R script - fw = must_open("tmp", "w") - path = fw.name - - fw.write(template.safe_substitute(**parameters)) - fw.close() - - sh("Rscript %s" % path) - if clean: - cleanup(path) - # I have no idea why using ggsave, there is one extra image - # generated, but here I remove it - rplotspdf = "Rplots.pdf" - cleanup(rplotspdf) - - -def main(): - - actions = (("rdotplot", "dot plot based on lastz rdotplot output"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def rdotplot(args): - """ - %prog rdotplotfile - - Dot plot to visualize relationship between two sequences, by plotting - .rdotplot file (often generated by LASTZ) - """ - p = OptionParser(rdotplot.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - dotplot_template = """ - dots <- read.table('$rdotplotfile', header=T) - png('$pngfile') - plot(dots, type='l') - dev.off() - """ - - (rdotplotfile,) = args - assert rdotplotfile.endswith(".rdotplot") - pngfile = rdotplotfile.replace(".rdotplot", ".png") - - rtemplate = RTemplate(dotplot_template, locals()) - rtemplate.run() - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/restriction.py b/jcvi/apps/restriction.py deleted file mode 100644 index 4ecb4bf0..00000000 --- a/jcvi/apps/restriction.py +++ /dev/null @@ -1,168 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Procedure to cut genome using restriction enzymes. -""" -import sys - -from Bio.Restriction.Restriction import AllEnzymes, Analysis - -from ..formats.base import must_open -from ..formats.fasta import Fasta, SeqRecord, SeqIO - -from .base import ActionDispatcher, OptionParser, logger - - -def main(): - - actions = ( - ("fragment", "extract upstream and downstream seq of particular RE"), - ("digest", "digest FASTA file to map restriction site positions"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def digest(args): - """ - %prog digest fastafile NspI,BfuCI - - Digest fasta sequences to map restriction site positions. - """ - p = OptionParser(digest.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, enzymes = args - enzymes = enzymes.split(",") - enzymes = [x for x in AllEnzymes if str(x) in enzymes] - f = Fasta(fastafile, lazy=True) - fw = must_open(opts.outfile, "w") - - header = ["Contig", "Length"] + [str(x) for x in enzymes] - print("\t".join(header), file=fw) - for name, rec in f.iteritems_ordered(): - row = [name, len(rec)] - for e in enzymes: - pos = e.search(rec.seq) - pos = "na" if not pos else "|".join(str(x) for x in pos) - row.append(pos) - print("\t".join(str(x) for x in row), file=fw) - - -def extract_full(rec, sites, flank, fw): - """ - Full extraction of seq flanking the sites. - """ - for s in sites: - newid = "{0}:{1}".format(rec.name, s) - left = max(s - flank, 0) - right = min(s + flank, len(rec)) - frag = rec.seq[left:right].strip("Nn") - newrec = SeqRecord(frag, id=newid, description="") - SeqIO.write([newrec], fw, "fasta") - - -def extract_ends(rec, sites, flank, fw, maxfragsize=800): - """ - Extraction of ends of fragments above certain size. - """ - nsites = len(sites) - size = len(rec) - for i, s in enumerate(sites): - newid = "{0}:{1}".format(rec.name, s) - recs = [] - - if i == 0 or s - sites[i - 1] <= maxfragsize: - newidL = newid + "L" - left = max(s - flank, 0) - right = s - frag = rec.seq[left:right].strip("Nn") - recL = SeqRecord(frag, id=newidL, description="") - if i == 0 and s > maxfragsize: # Contig L-end - pass - else: - recs.append(recL) - - if i == nsites - 1 or sites[i + 1] - s <= maxfragsize: - newidR = newid + "R" - left = s - right = min(s + flank, size) - frag = rec.seq[left:right].strip("Nn") - recR = SeqRecord(frag, id=newidR, description="") - if i == nsites - 1 and size - s > maxfragsize: # Contig R-end - pass - else: - recs.append(recR) - - SeqIO.write(recs, fw, "fasta") - - -def fragment(args): - """ - %prog fragment fastafile enzyme - - Cut the fastafile using the specified enzyme, and grab upstream and - downstream nucleotide sequence along with the cut site. In this case, the - sequences extracted are: - - |- PstI - ============|=========== - (-------) - - Sometimes we need to limit the size of the restriction fragments, for - example the GBS protocol does not allow fragments larger than 800bp. - - |-PstI |- PstI |- PstI - ~~~====|=============|==========~~~~~~~===|============ - (---) (---) - - In this case, the second fragment is longer than 800bp, therefore the two - ends are NOT extracted, as in the first fragment. - """ - p = OptionParser(fragment.__doc__) - p.add_argument( - "--flank", - default=150, - type=int, - help="Extract flanking bases of the cut sites", - ) - p.add_argument( - "--full", - default=False, - action="store_true", - help="The full extraction mode", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, enzyme = args - flank = opts.flank - assert flank > 0 - extract = extract_full if opts.full else extract_ends - tag = "full" if opts.full else "ends" - - assert enzyme in set(str(x) for x in AllEnzymes) - fragfastafile = fastafile.split(".")[0] + ".{0}.flank{1}.{2}.fasta".format( - enzyme, flank, tag - ) - enzyme = [x for x in AllEnzymes if str(x) == enzyme][0] - - f = Fasta(fastafile, lazy=True) - fw = open(fragfastafile, "w") - for name, rec in f.iteritems_ordered(): - a = Analysis([enzyme], rec.seq) - sites = a.full()[enzyme] - extract(rec, sites, flank, fw) - - logger.debug("Fragments written to `%s`.", fragfastafile) - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/softlink.py b/jcvi/apps/softlink.py deleted file mode 100644 index ad055c0a..00000000 --- a/jcvi/apps/softlink.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Procedure to touch and copy softlinks -""" -import os -import os.path as op -import sys - -from .base import ActionDispatcher, OptionParser, get_abs_path, logger - - -def main(): - - actions = ( - ("touch", "touch all the symlinks"), - ("cp", "cp all the symlinks to current folder"), - ("clean", "removes all the symlinks in current folder"), - ("size", "print the file sizes for the files pointed by symlinks"), - ("link", "link source to target based on a tabular file"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def lnsf(source, target, log=False): - # re-link the symlinks (similar to `ln -sf`) - if op.lexists(target): - os.unlink(target) - os.symlink(source, target) - if log: - logger.debug("{0} => {1}".format(source, target)) - - -def link(args): - """ - %prog link metafile - - Link source to target based on a tabular file. - """ - from jcvi.apps.base import mkdir - - p = OptionParser(link.__doc__) - p.add_argument("--dir", help="Place links in a subdirectory") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (meta,) = args - d = opts.dir - if d: - mkdir(d) - - fp = open(meta) - cwd = op.dirname(get_abs_path(meta)) - for row in fp: - source, target = row.split() - source = op.join(cwd, source) - if d: - target = op.join(d, target) - lnsf(source, target, log=True) - - -def touch(args): - """ - find . -type l | %prog touch - - Linux commands `touch` wouldn't modify mtime for links, this script can. - Use find to pipe in all the symlinks. - """ - p = OptionParser(touch.__doc__) - opts, args = p.parse_args(args) - fp = sys.stdin - - for link_name in fp: - link_name = link_name.strip() - if not op.islink(link_name): - continue - if not op.exists(link_name): - continue - - source = get_abs_path(link_name) - lnsf(source, link_name) - - -def clean(args): - """ - %prog clean - - Removes all symlinks from current folder - """ - p = OptionParser(clean.__doc__) - opts, args = p.parse_args(args) - - for link_name in os.listdir(os.getcwd()): - if not op.islink(link_name): - continue - logger.debug("remove symlink `{0}`".format(link_name)) - os.unlink(link_name) - - -def cp(args): - """ - find folder -type l | %prog cp - - Copy all the softlinks to the current folder, using absolute paths - """ - p = OptionParser(cp.__doc__) - fp = sys.stdin - - for link_name in fp: - link_name = link_name.strip() - if not op.exists(link_name): - continue - - source = get_abs_path(link_name) - link_name = op.basename(link_name) - if not op.exists(link_name): - os.symlink(source, link_name) - logger.debug(" => ".join((source, link_name))) - - -def size(args): - """ - find folder -type l | %prog size - - Get the size for all the paths that are pointed by the links - """ - from jcvi.utils.cbook import human_size - - p = OptionParser(size.__doc__) - fp = sys.stdin - - results = [] - for link_name in fp: - link_name = link_name.strip() - if not op.islink(link_name): - continue - - source = get_abs_path(link_name) - - link_name = op.basename(link_name) - filesize = op.getsize(source) - results.append((filesize, link_name)) - - # sort by descending file size - for filesize, link_name in sorted(results, reverse=True): - filesize = human_size(filesize, a_kilobyte_is_1024_bytes=True) - print("%10s\t%s" % (filesize, link_name), file=sys.stderr) - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/uclust.py b/jcvi/apps/uclust.py deleted file mode 100644 index 1da53f33..00000000 --- a/jcvi/apps/uclust.py +++ /dev/null @@ -1,1106 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Using VCLUST to derep, cluster, and make consensus from duplicate reads. -The VCLUST implementation borrows ideas and code from PyRAD. PyRAD link: - - -""" -import os.path as op -import sys - -from collections import defaultdict -from copy import deepcopy -from functools import partial -from itertools import groupby -from subprocess import Popen, PIPE, STDOUT -from tempfile import mkdtemp - -import numpy as np -import scipy -import scipy.stats -import scipy.optimize - -from more_itertools import grouper - -from ..formats.base import BaseFile, FileMerger, must_open, split -from ..formats.fasta import parse_fasta -from ..formats.fastq import fasta -from ..utils.orderedcollections import DefaultOrderedDict -from ..utils.table import write_csv - -from .base import ( - OptionParser, - ActionDispatcher, - cleanup, - datadir, - iglob, - listify, - logger, - mkdir, - need_update, - sh, -) - - -SEP = "//" -CONSTAG = ">CONSENS0" -BASES = "ACTGN_-" # CAUTION: DO NOT CHANGE THIS LINE -REAL = BASES[:4] -GAPS = BASES[-2:] -NBASES = len(BASES) -ACHEADER = """ -TAXON CHR POS REF_NT REF_ALLELE ALT_ALLELE REF_COUNT -ALT_COUNT OTHER_COUNT TOTAL_READS A G C T -READ_INS READ_DEL TOTAL_READS -""".split() -ACHEADER_NO_TAXON = ACHEADER[1:] - - -alleles = lambda x: (",".join(x).replace("-", "*") if x else "N") -getsize = lambda name: ( - 0 if ";" not in name else int(name.split(";")[1].replace("size=", "")) -) - - -class ClustFile(BaseFile): - def __init__(self, filename): - super().__init__(filename) - - def __iter__(self): - nstacks = 0 - fp = must_open(self.filename) - for tag, contents in groupby(fp, lambda row: row[0] == "/"): - if tag: - continue - data = Clust() - for name, seq in grouper(contents, 2): - name, seq = name.strip(), seq.strip() - nrep = getsize(name) - data.append((name, seq, nrep)) - yield data - nstacks += 1 - if nstacks % 10000 == 0: - logger.debug("{0} stacks parsed".format(nstacks)) - - -class Clust(list): - def __init__(self): - super().__init__(self) - - def __str__(self): - s = [] - for d in self: - s.append("\n".join(d[:2])) - return "\n".join(s) + "\n" + SEP - - -class ClustStore(BaseFile): - def __init__(self, consensfile): - super().__init__(consensfile) - binfile = consensfile + ".bin" - idxfile = consensfile + ".idx" - self.bin = np.fromfile(binfile, dtype=np.uint16) - assert self.bin.size % NBASES == 0 - - self.bin = self.bin.reshape((self.bin.size / NBASES, NBASES)) - self.index = {} - fp = open(idxfile) - for row in fp: - name, start, end = row.split() - start, end = int(start), int(end) - self.index[name.strip(">")] = (start, end) - - def __getitem__(self, name): - start, end = self.index[name] - return self.bin[start:end, :] - - -class AlleleCount(object): - """ - Each record represents a line in the .allele_count file - - Fields are: - # CHR POS REF_NT REF_ALLELE ALT_ALLELE REF_COUNT - # ALT_COUNT OTHER_COUNT TOTAL_READS A G C T - # READ_INS READ_DEL TOTAL_READS - """ - - def __init__(self, taxon, chr, pos, ref_allele, alt_allele, profile): - self.taxon = taxon - self.chr = chr - self.pos = pos - self.ref_nt = listify(ref_allele) - self.ref_allele = listify(ref_allele) - self.alt_allele = listify(alt_allele) - self.update(profile) - - def tostring(self, taxon=False): - ref_allele = alleles(self.ref_allele) - ar = [ - self.chr, - self.pos, - ref_allele, - ref_allele, - alleles(self.alt_allele), - self.ref_count, - self.alt_count, - self.other_count, - self.total_count, - self.A, - self.G, - self.C, - self.T, - self.read_ins, - self.read_del, - self.total_count, - ] - if taxon: - ar = [self.taxon] + ar - return "\t".join(str(x) for x in ar) - - def update(self, profile): - self.ref_count = sum(profile[BASES.index(x)] for x in self.ref_allele) - self.alt_count = sum(profile[BASES.index(x)] for x in self.alt_allele) - self.A, self.C, self.T, self.G, N, tgaps, gaps = profile - self.total_count = sum(profile) - tgaps - others = set(BASES) - set(self.ref_allele) - set(self.alt_allele) - self.other_count = sum(profile[BASES.index(x)] for x in others) - tgaps - self.read_ins = self.total_count if "-" in self.ref_allele else 0 - self.read_del = gaps - - def clear(self): - self.update([0] * NBASES) - - -class ClustStores(dict): - """ - ClustStores provides random access to any consensus read - """ - - def __init__(self, consensfiles): - super().__init__(self) - for cs in consensfiles: - name = op.basename(cs).split(".")[0] - self[name] = ClustStore(cs) - - -def main(): - - actions = ( - ("align", "align clustfile to clustSfile"), - ("estimateHE", "estimate heterozygosity and error rate for stacks"), - ("cluster", "cluster within samples"), - ("consensus", "call consensus bases within samples"), - ("mcluster", "cluster across samples"), - ("mconsensus", "call consensus bases across samples"), - ("stats", "generate table summarizing .stats files"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def stats(args): - """ - %prog stats folder - - Generate table summarizing .stats files. - """ - p = OptionParser(stats.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (folder,) = args - statsfiles = iglob(folder, "*.stats") - after_equal = lambda x: x.split("=")[-1] - header = "Library Assembled_reads Contigs".split() - contents = [] - # label=M0096 total=7443 cnts=948 mean=7.851 std=35.96 - for statsfile in statsfiles: - fp = open(statsfile) - for row in fp: - if row.startswith("label="): - break - label, total, cnts = row.split()[:3] - label = after_equal(label) - reads = int(after_equal(total)) - contigs = int(after_equal(cnts)) - contents.append((label, reads, contigs)) - - all_labels, all_reads, all_contigs = zip(*contents) - contents.append(("SUM", sum(all_reads), sum(all_contigs))) - contents.append( - ("AVERAGE (per sample)", int(np.mean(all_reads)), int(np.mean(all_contigs))) - ) - contents.append( - ("MEDIAN (per sample)", int(np.median(all_reads)), int(np.median(all_contigs))) - ) - write_csv(header, contents, filename=opts.outfile) - - -def add_consensus_options(p): - p.add_argument("--prefix", default="mcluster", help="Output prefix") - p.add_argument("--minlength", default=30, type=int, help="Min contig length") - p.add_argument("--mindepth", default=3, type=int, help="Min depth for each stack") - p.add_argument("--minsamp", default=3, type=int, help="Min number of samples") - - -def find_pctid(consensusfiles): - pctid = min( - [int(op.basename(x).split(".")[-2].replace("P", "")) for x in consensusfiles] - ) - logger.debug("Set pctid={0}".format(pctid)) - return pctid - - -def mcluster(args): - """ - %prog mcluster *.consensus - - Cluster across samples using consensus sequences. - """ - p = OptionParser(mcluster.__doc__) - add_consensus_options(p) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - consensusfiles = args - minlength = opts.minlength - cpus = opts.cpus - pf = opts.prefix - pctid = find_pctid(consensusfiles) - - pf += ".P{0}".format(pctid) - consensusfile = pf + ".consensus.fasta" - if need_update(consensusfiles, consensusfile): - fw_cons = must_open(consensusfile, "w") - totalseqs = 0 - for cf in consensusfiles: - nseqs = 0 - s = op.basename(cf).split(".")[0] - for name, seq in parse_fasta(cf): - name = ".".join((s, name)) - print(">{0}\n{1}".format(name, seq), file=fw_cons) - nseqs += 1 - logger.debug("Read `{0}`: {1} seqs".format(cf, nseqs)) - totalseqs += nseqs - logger.debug("Total: {0} seqs".format(totalseqs)) - fw_cons.close() - - userfile = pf + ".u" - notmatchedfile = pf + ".notmatched" - if need_update(consensusfile, userfile): - cluster_smallmem( - consensusfile, userfile, notmatchedfile, minlength, pctid, cpus - ) - - clustfile = pf + ".clust" - if need_update((consensusfile, userfile, notmatchedfile), clustfile): - makeclust(consensusfile, userfile, notmatchedfile, clustfile) - - clustSfile = pf + ".clustS" - if need_update(clustfile, clustSfile): - parallel_musclewrap(clustfile, cpus, minsamp=opts.minsamp) - - -def makeloci(clustSfile, store, prefix, minsamp=3, pctid=95): - C = ClustFile(clustSfile) - pf = clustSfile.rsplit(".", 1)[0] - locifile = pf + ".loci" - finalfastafile = pf + ".final.fasta" - fw = open(locifile, "w") - fw_finalfasta = open(finalfastafile, "w") - locid = 0 - AC = [] - diffratio = 1 - pctid / 100.0 - for data in C: - names, seqs, nreps = zip(*data) - # Strip off cut site - seqs = [x.upper() for x in seqs] - fname = "{0}_{1}".format(prefix, locid) - ntaxa = sum(1 for s, nrep in zip(seqs, nreps) if nrep) - - # Record variable sites - cons_name, cons_seq, cons_nrep = get_seed(data) - ncols = len(cons_seq) - snpsite = [" "] * ncols - seed_ungapped_pos = [] - ref_alleles = [] - alt_alleles = [] - ungapped_i = 0 - for i in range(ncols): - ref_allele = cons_seq[i] - ref_alleles.append(ref_allele) - seed_ungapped_pos.append(ungapped_i) - if ref_allele in GAPS: # Skip if reference is a deletion - alt_alleles.append([]) - continue - else: - ungapped_i += 1 - - site = [s[i] for s, nrep in zip(seqs, nreps) if nrep] # Column slice in MSA - reals = [x for x in site if x in REAL] - - realcounts = sorted([(reals.count(x), x) for x in REAL], reverse=True) - nreals = sum(x[0] for x in realcounts) - refcount = realcounts[0][0] - altcount = realcounts[1][0] - # Select SNP column - if ( - altcount >= minsamp - and nreals >= ntaxa / 2 - and (refcount + altcount) >= nreals * 0.9 - ): - snpsite[i] = "*" - if snpsite.count("*") > ncols * diffratio: - snpsite = [" "] * ncols - nonzeros = [x for c, x in realcounts if (c and x != ref_allele)] - alt_alleles.append(nonzeros[:1]) # Keep only two alleles - - assert len(seed_ungapped_pos) == ncols - assert len(ref_alleles) == ncols - assert len(alt_alleles) == ncols - cons_seq = cons_seq.strip("_N").replace("-", "") - - for name, seq in zip(names, seqs): - name = name.strip(">") - if "." not in name: # CONSENS0 - continue - taxon, readname = name.split(".", 1) - profile = store[taxon][readname] - assert len(seq) == ncols - - ungapped_i = 0 - gap_p = [0, 0, 0, 0, 0, 0, sum(profile[0])] - for pos, ref_allele, alt_allele, r, ispoly in zip( - seed_ungapped_pos, ref_alleles, alt_alleles, seq, snpsite - ): - if r in GAPS: # insertion in ref, deletion in read - p = gap_p - else: - p = profile[ungapped_i] - ungapped_i += 1 - - if ispoly != "*": - continue - - assert cons_seq[pos] == ref_allele # Sanity check - ac = AlleleCount( - taxon, - fname, - pos + 1, # 1-based coordinate - ref_allele, - alt_allele, - p, - ) - AC.append(ac) - - longname = max(len(x) for x in names) - longname = max(len(fname) + 3, longname) + 1 - print("// {0}".format(fname).ljust(longname) + "".join(snpsite) + "|", file=fw) - for name, seq, nrep in data: - print(name.ljust(longname) + seq, file=fw) - - print( - ">{0} with {1} sequences\n{2}".format(fname, sum(nreps), cons_seq), - file=fw_finalfasta, - ) - locid += 1 - - logger.debug("Stacks written to `{0}`".format(locifile)) - logger.debug( - "Final consensus sequences written to `{0}` (n={1})".format( - finalfastafile, locid - ) - ) - fw.close() - fw_finalfasta.close() - - return AC - - -def mconsensus(args): - """ - %prog mconsensus *.consensus - - Call consensus along the stacks from cross-sample clustering. - """ - p = OptionParser(mconsensus.__doc__) - p.add_argument( - "--allele_counts", - default="allele_counts", - help="Directory to generate allele counts", - ) - add_consensus_options(p) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - consensusfiles = args - prefix = opts.prefix - acdir = opts.allele_counts - store = ClustStores(consensusfiles) - pctid = find_pctid(consensusfiles) - pf = prefix + ".P{0}".format(pctid) - - clustSfile = pf + ".clustS" - AC = makeloci(clustSfile, store, prefix, minsamp=opts.minsamp, pctid=pctid) - - mkdir(acdir) - acfile = pf + ".allele_counts" - fw = open(acfile, "w") - seen = DefaultOrderedDict(list) # chr, pos => taxa - print("# " + "\t".join(ACHEADER), file=fw) - # Sort allele counts into separate files - for ac in AC: - chrpos = ac.chr, ac.pos - seen[chrpos].append(ac) - print(ac.tostring(taxon=True), file=fw) - fw.close() - - logger.debug("Populate all taxa and instantiate empty vector if missing") - all_taxa = set([op.basename(x).split(".")[0] for x in consensusfiles]) - taxon_to_ac = defaultdict(list) - for chrpos, aclist in seen.items(): - included_taxa = set([x.taxon for x in aclist]) - missing_taxa = all_taxa - included_taxa - template = deepcopy(aclist[0]) - template.clear() - for ac in aclist: - taxon_to_ac[ac.taxon].append(ac) - for tx in missing_taxa: - taxon_to_ac[tx].append(template) - - logger.debug("Write allele counts for all taxa") - for tx, aclist in sorted(taxon_to_ac.items()): - tx_acfile = op.join(acdir, tx + ".allele_counts") - fw = open(tx_acfile, "w") - print("# " + "\t".join(ACHEADER_NO_TAXON), file=fw) - for ac in aclist: - print(ac.tostring(), file=fw) - fw.close() - logger.debug("Written {0} sites in `{1}`".format(len(aclist), tx_acfile)) - - -def get_seed(data): - if len(data) == 1: - return data[0] - - for name, seq, nrep in data[::-1]: - if name == CONSTAG: - break - return name, seq, nrep - - -def compute_consensus(fname, cons_seq, RAD, S, totalsize, mindepth=3, verbose=False): - # Strip N's from either end and gaps - gaps = set() - fixed = set() - assert len(cons_seq) == len(RAD) - - # Correct consensus by converting to top voting bases - shortcon = "" - for i, (base, site) in enumerate(zip(cons_seq, RAD)): - good = site[:4] + [site[-1]] - # Handles terminal regions delete columns if consensus is a terminal gap, - # or bases plus 'internal' gaps not covering half of the total abundance - if base == "_" or sum(good) < max(mindepth, totalsize / 2): - gaps.add(i) - continue - # Check count for original base for possible ties - n0 = site[BASES.index(base)] - n1 = max(good) # Base with highest count - if n1 > n0: - base = BASES[site.index(n1)] - fixed.add(i) - if base in GAPS: - gaps.add(i) - continue - shortcon += base - - shortRAD = [j for (i, j) in enumerate(RAD) if i not in gaps] - assert len(shortcon) == len(shortRAD) - - if verbose: - print(fname) - print("\n".join(["{0} {1}".format(*x) for x in S])) - display = "" - basecounts = [""] * NBASES - for i, (b, p) in enumerate(zip(cons_seq, RAD)): - display += ("+" if i in fixed else b) if i not in gaps else " " - for j, k in enumerate(p): - basecounts[j] += (str(k) if k < 10 else "#") if k else "." - print("=" * len(cons_seq)) - print(cons_seq) - print(display) - print("=" * len(cons_seq)) - for j, k in enumerate(basecounts): - if BASES[j] == "N": - continue - print("".join(k)) - print("=" * len(cons_seq)) - - return shortcon, shortRAD - - -def consensus(args): - """ - %prog consensus clustSfile - - Call consensus along the stacks. Tabulate bases at each site, tests for - errors according to error rate, calls consensus. - """ - p = OptionParser(consensus.__doc__) - p.add_argument( - "--ploidy", default=2, type=int, help="Number of haplotypes per locus" - ) - add_consensus_options(p) - p.set_verbose() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (clustSfile,) = args - pf = clustSfile.rsplit(".", 1)[0] - mindepth = opts.mindepth - minlength = opts.minlength - verbose = opts.verbose - - C = ClustFile(clustSfile) - output = [] - bins = [] - indices = [] - start = end = 0 # Index into base count array - for data in C: - names, seqs, nreps = zip(*data) - total_nreps = sum(nreps) - # Depth filter - if total_nreps < mindepth: - continue - - first_name, first_seq, first_nrep = data[0] - fname = first_name.split(";")[0] + ";size={0};".format(total_nreps) - cons_name, cons_seq, cons_nrep = get_seed(data) - if len(data) > 1 and cons_name != CONSTAG: - logger.debug("Tag {0} not found in cluster {1}".format(CONSTAG, cons_name)) - - # List for sequence data - S = [(seq, nrep) for name, seq, nrep in data if nrep] - # Pileups for base counting - RAD = stack(S) - - if len(data) == 1: # No computation needed - output.append((fname, seq)) - bins.extend(RAD) - start = end - end += len(seq) - indices.append((fname, start, end)) - continue - - shortcon, shortRAD = compute_consensus( - fname, cons_seq, RAD, S, total_nreps, mindepth=mindepth, verbose=verbose - ) - if len(shortcon) < minlength: - shortcon, shortRAD = compute_consensus( - fname, - first_seq, - RAD, - S, - total_nreps, - mindepth=mindepth, - verbose=verbose, - ) - - if len(shortcon) < minlength: # Stop trying - continue - - output.append((fname, shortcon)) - bins.extend(shortRAD) - - start = end - end += len(shortcon) - indices.append((fname, start, end)) - - consensfile = pf + ".consensus" - consens = open(consensfile, "w") - for k, v in output: - print("\n".join((k, v)), file=consens) - consens.close() - logger.debug("Consensus sequences written to `{0}`".format(consensfile)) - - binfile = consensfile + ".bin" - bins = np.array(bins, dtype=np.uint32) - ulimit = 65535 - bins[bins > ulimit] = ulimit - bins = np.array(bins, dtype=np.uint16) # Compact size - bins.tofile(binfile) - logger.debug("Allele counts written to `{0}`".format(binfile)) - - idxfile = consensfile + ".idx" - fw = open(idxfile, "w") - for fname, start, end in indices: - print("\t".join(str(x) for x in (fname, start, end)), file=fw) - fw.close() - logger.debug("Serializing indices to `{0}`".format(idxfile)) - - return consensfile, binfile, idxfile - - -def stack(S): - """ - From list of bases at a site D, make counts of bases - """ - S, nreps = zip(*S) - S = np.array([list(x) for x in S]) - rows, cols = S.shape - counts = [] - for c in range(cols): - freq = [0] * NBASES - for b, nrep in zip(S[:, c], nreps): - freq[BASES.index(b)] += nrep - counts.append(freq) - return counts - - -def get_left_right(seq): - """ - Find position of the first and last base - """ - cseq = seq.strip(GAPS) - leftjust = seq.index(cseq[0]) - rightjust = seq.rindex(cseq[-1]) - - return leftjust, rightjust - - -def cons(f, mindepth): - """ - Makes a list of lists of reads at each site - """ - C = ClustFile(f) - for data in C: - names, seqs, nreps = zip(*data) - total_nreps = sum(nreps) - # Depth filter - if total_nreps < mindepth: - continue - - S = [] - for name, seq, nrep in data: - # Append sequence * number of dereps - S.append([seq, nrep]) - - # Make list for each site in sequences - res = stack(S) - yield [x[:4] for x in res if sum(x[:4]) >= mindepth] - - -def makeP(N): - # Make list of freq. for BASES - sump = float(sum([sum(i) for i in N])) - if sump: - p1 = sum([i[0] for i in N]) / sump - p2 = sum([i[1] for i in N]) / sump - p3 = sum([i[2] for i in N]) / sump - p4 = sum([i[3] for i in N]) / sump - else: - p1 = p2 = p3 = p4 = 0.0 - return [p1, p2, p3, p4] - - -def makeC(N): - """ - Makes a dictionary with counts of base counts [x,x,x,x]:x, - speeds up Likelihood calculation - """ - C = defaultdict(int) - for d in N: - C[tuple(d)] += 1 - - return [i for i in C.items() if (0, 0, 0, 0) not in i] - - -def L1(E, P, N): - # Probability of homozygous - h = [] - s = sum(N) - for i, l in enumerate(N): - p = P[i] - b = scipy.stats.binom.pmf(s - l, s, E) - h.append(p * b) - return sum(h) - - -def L2(E, P, N): - # Probability of heterozygous - h = [] - s = sum(N) - for l, i in enumerate(N): - for j, k in enumerate(N): - if j > l: - one = 2.0 * P[l] * P[j] - two = scipy.stats.binom.pmf(s - i - k, s, (2.0 * E) / 3.0) - three = scipy.stats.binom.pmf(i, k + i, 0.5) - four = 1.0 - (sum([q**2.0 for q in P])) - h.append(one * two * (three / four)) - return sum(h) - - -def totlik(E, P, H, N): - # Total probability - lik = ((1 - H) * L1(E, P, N)) + (H * L2(E, P, N)) - return lik - - -def LL(x0, P, C): - # Log likelihood score given values [H, E] - H, E = x0 - L = [] - if H <= 0.0 or E <= 0.0: - r = np.exp(100) - else: - for i in C: - ll = totlik(E, P, H, i[0]) - if ll > 0: - L.append(i[1] * np.log(ll)) - r = -sum(L) - return r - - -def estimateHE(args): - """ - %prog estimateHE clustSfile - - Estimate heterozygosity (H) and error rate (E). Idea borrowed heavily from - the PyRad paper. - """ - p = OptionParser(estimateHE.__doc__) - add_consensus_options(p) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (clustSfile,) = args - HEfile = clustSfile.rsplit(".", 1)[0] + ".HE" - if not need_update(clustSfile, HEfile, warn=True): - return HEfile - - D = [] - for d in cons(clustSfile, opts.mindepth): - D.extend(d) - - logger.debug("Computing base frequencies ...") - P = makeP(D) - C = makeC(D) - logger.debug("Solving log-likelihood function ...") - x0 = [0.01, 0.001] # initital values - H, E = scipy.optimize.fmin(LL, x0, args=(P, C)) - - fw = must_open(HEfile, "w") - print(H, E, file=fw) - fw.close() - - return HEfile - - -def alignfast(names, seqs): - """ - Performs MUSCLE alignments on cluster and returns output as string - """ - matfile = op.join(datadir, "blosum80.mat") - cmd = "poa -read_fasta - -pir stdout {0} -tolower -silent -hb -fuse_all".format( - matfile - ) - p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) - s = "" - for i, j in zip(names, seqs): - s += "\n".join((i, j)) + "\n" - return p.communicate(s)[0] - - -def replace_terminal(seq): - leftjust, rightjust = get_left_right(seq) - seq = ( - "_" * leftjust - + seq[leftjust : rightjust + 1] - + "_" * (len(seq) - rightjust - 1) - ) - return seq - - -def sortalign(stringnames): - G = stringnames.split("\n>") - aligned = [ - ( - ">" + i.split("\n")[0].strip(">"), - replace_terminal("".join(i.split("\n")[1:]).upper()), - ) - for i in G - ] - return aligned - - -def parallel_musclewrap(clustfile, cpus, minsamp=0): - musclewrap_minsamp = partial(musclewrap, minsamp=minsamp) - if cpus == 1: - return musclewrap_minsamp(clustfile) - - from jcvi.apps.grid import Jobs - - outdir = mkdtemp(dir=".") - fs = split([clustfile, outdir, str(cpus), "--format=clust"]) - g = Jobs(musclewrap_minsamp, fs.names) - g.run() - - clustnames = [x.replace(".clust", ".clustS") for x in fs.names] - clustSfile = clustfile.replace(".clust", ".clustS") - FileMerger(clustnames, outfile=clustSfile).merge() - cleanup(outdir) - - -def filter_samples(names, seqs, sep="."): - """ - When there are uncollapsed contigs within the same sample, only retain the - first seq, or the seq that is most abundant (with cluster_size). - """ - seen = set() - filtered_names, filtered_seqs = [], [] - for name, seq in zip(names, seqs): - samp = name.split(sep, 1)[0] - if samp in seen: - continue - seen.add(samp) - filtered_names.append(name) - filtered_seqs.append(seq) - - nfiltered, nnames = len(filtered_names), len(names) - assert nfiltered == len(seen) - - return filtered_names, filtered_seqs, seen - - -def musclewrap(clustfile, minsamp=0): - cnts = 0 - C = ClustFile(clustfile) - clustSfile = clustfile.replace(".clust", ".clustS") - fw = open(clustSfile, "w") - for data in C: - STACK = Clust() - names = [] - seqs = [] - names, seqs, nreps = zip(*data) - if minsamp: # Filter based on samples, applicable in mcluster() - names, seqs, samples = filter_samples(names, seqs) - if len(samples) < minsamp: - continue - else: - names, seqs = names[:256], seqs[:256] # Reduce high coverage data - - if len(names) == 1: - STACK.append((names[0], seqs[0])) - else: - stringnames = alignfast(names, seqs) - aligned = sortalign(stringnames) - # Reorder keys by derep number - D1 = [(getsize(name), name, seq) for name, seq in aligned] - D1.sort(key=lambda x: (-x[0], x[1])) - for size, name, seq in D1: - STACK.append((name, seq)) - - if STACK: - print(STACK, file=fw) - cnts += 1 - - fw.close() - - -def makestats(clustSfile, statsfile, mindepth): - C = ClustFile(clustSfile) - depth = [] - for data in C: - d = 0 - for name, seq, nrep in data: - d += nrep - depth.append(d) - namecheck = op.basename(clustSfile).split(".")[0] - if depth: - me = round(np.mean(depth), 3) - std = round(np.std(depth), 3) - else: - me = std = 0.0 - out = dict(label=namecheck, total=sum(depth), cnts=len(depth), mean=me, std=std) - header = "label total cnts mean std".split() - - bins = [0, 5, 10, 15, 20, 25, 30, 35, 40, 50, 100, 250, 500, 99999] - ohist, edges = np.histogram(depth, bins) - hist = [float(i) / sum(ohist) for i in ohist] - hist = [int(round(i * 30)) for i in hist] - - logger.debug("Sample {0} finished, {1} loci".format(clustSfile, len(depth))) - - fw = open(statsfile, "w") - print("# Params: mindepth={0}".format(mindepth), file=fw) - print(" ".join("{0}={1}".format(k, out[k]) for k in header), file=fw) - print("\nbins\tdepth_histogram\tcnts", file=fw) - print(" :\t0------------50-------------100%", file=fw) - - for i, j, k in zip(edges, hist, ohist): - firststar = " " - if k > 0: - firststar = "*" - print(i, "\t", firststar + "*" * j + " " * (34 - j), k, file=fw) - fw.close() - - -def makeclust(derepfile, userfile, notmatchedfile, clustfile, mindepth=3): - D = dict(parse_fasta(derepfile)) - U = defaultdict(list) # Clusters - fp = open(userfile) - for row in fp: - query, target, id, qcov, tcov = row.rstrip().split("\t") - U[target].append((query, getsize(query), float(id) * float(qcov) * float(tcov))) - - fw = open(clustfile, "w") - for key, members in U.items(): - keysize = getsize(key) - members.sort(key=lambda x: (-x[1], -x[2])) - totalsize = keysize + sum(x[1] for x in members) - if totalsize < mindepth: - continue - - # Recruit cluster members - seqs = [(">" + key, D[key])] - for name, size, id in members: - seqs.append((">" + name, D[name])) - - seq = "\n".join("\n".join(x) for x in seqs) - print("\n".join((seq, SEP)), file=fw) - - I = dict(parse_fasta(notmatchedfile)) - singletons = set(I.keys()) - set(U.keys()) - for key in singletons: - if getsize(key) < mindepth: - continue - print("\n".join((">" + key, I[key], SEP)), file=fw) - fw.close() - - -def derep(fastafile, derepfile, minlength, cpus, usearch="vsearch"): - cmd = usearch + " -minseqlength {0}".format(minlength) - cmd += " -derep_fulllength {0}".format(fastafile) - cmd += " -output {0} -sizeout".format(derepfile) - cmd += " -threads {0}".format(cpus) - sh(cmd) - - -def cluster_smallmem( - derepfile, - userfile, - notmatchedfile, - minlength, - pctid, - cpus, - cov=0.8, - usearch="vsearch", -): - identity = pctid / 100.0 - cmd = usearch + " -minseqlength {0}".format(minlength) - cmd += " -cluster_size {0}".format(derepfile) - cmd += " -id {0}".format(identity) - cmd += " -mincols {0}".format(minlength) - cmd += " -query_cov {0}".format(cov) - cmd += " -target_cov {0}".format(cov) - cmd += " -userout {0}".format(userfile) - cmd += " -userfields query+target+id+qcov+tcov" - cmd += " -maxaccepts 1 -maxrejects 16" # Decrease maxrejects for speed - cmd += " -usersort -sizein" - cmd += " -notmatched {0}".format(notmatchedfile) - cmd += " -threads {0}".format(cpus) - sh(cmd) - - -def cluster(args): - """ - %prog cluster prefix fastqfiles - - Use `vsearch` to remove duplicate reads. This routine is heavily influenced - by PyRAD: . - """ - p = OptionParser(cluster.__doc__) - add_consensus_options(p) - p.set_align(pctid=95) - p.set_outdir() - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - prefix = args[0] - fastqfiles = args[1:] - cpus = opts.cpus - pctid = opts.pctid - mindepth = opts.mindepth - minlength = opts.minlength - fastafile, qualfile = fasta( - fastqfiles - + [ - "--seqtk", - "--outdir={0}".format(opts.outdir), - "--outfile={0}".format(prefix + ".fasta"), - ] - ) - - prefix = op.join(opts.outdir, prefix) - pf = prefix + ".P{0}".format(pctid) - derepfile = prefix + ".derep" - if need_update(fastafile, derepfile): - derep(fastafile, derepfile, minlength, cpus) - - userfile = pf + ".u" - notmatchedfile = pf + ".notmatched" - if need_update(derepfile, userfile): - cluster_smallmem(derepfile, userfile, notmatchedfile, minlength, pctid, cpus) - - clustfile = pf + ".clust" - if need_update((derepfile, userfile, notmatchedfile), clustfile): - makeclust(derepfile, userfile, notmatchedfile, clustfile, mindepth=mindepth) - - clustSfile = pf + ".clustS" - if need_update(clustfile, clustSfile): - parallel_musclewrap(clustfile, cpus) - - statsfile = pf + ".stats" - if need_update(clustSfile, statsfile): - makestats(clustSfile, statsfile, mindepth=mindepth) - - -def align(args): - """ - %prog align clustfile - - Align clustfile to clustSfile. Useful for benchmarking aligners. - """ - p = OptionParser(align.__doc__) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (clustfile,) = args - parallel_musclewrap(clustfile, opts.cpus) - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/uniprot.py b/jcvi/apps/uniprot.py deleted file mode 100644 index c1ea5668..00000000 --- a/jcvi/apps/uniprot.py +++ /dev/null @@ -1,216 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Programatically accessing UniprotKB to get data from a list of queries -""" -import os.path as op -import sys -import time - -from urllib.parse import urlencode -from urllib.request import Request, urlopen -from urllib.error import HTTPError, URLError - -from ..formats.base import must_open - -from .base import ActionDispatcher, OptionParser, logger - - -uniprot_url = "http://www.uniprot.org/uniprot/" - -valid_formats = [ - "html", - "tab", - "xls", - "fasta", - "gff", - "txt", - "xml", - "rdf", - "list", - "rss", -] -valid_columns = [ - "citation", - "clusters", - "comments", - "database", - "domains", - "domain", - "ec", - "id", - "entry name", - "existence", - "families", - "features", - "genes", - "go", - "go-id", - "interpro", - "interactor", - "keywords", - "keyword-id", - "last-modified", - "length", - "organism", - "organism-id", - "pathway", - "protein names", - "reviewed", - "score", - "sequence", - "3d", - "subcellular locations", - "taxon", - "tools", - "version", - "virus hosts", -] - -valid_column_formats = ["tab", "xls"] -valid_include_formats = ["fasta", "rdf"] - - -def main(): - - actions = (("fetch", "fetch records from uniprot. input is a list of query terms"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def fetch(args): - """ - %prog fetch "query" - OR - %prog fetch queries.txt - - Please provide a UniProt compatible `query` to retrieve data. If `query` contains - spaces, please remember to "quote" it. - - You can also specify a `filename` which contains queries, one per line. - - Follow this syntax - to query any of the documented fields - """ - import re - import csv - - p = OptionParser(fetch.__doc__) - - p.add_argument( - "--format", - default="tab", - choices=valid_formats, - help="download format", - ) - p.add_argument( - "--columns", - default="entry name, protein names, genes,organism", - help="columns to download, if --format is `tab` or `xls`", - ) - p.add_argument( - "--include", - default=False, - action="store_true", - help="Include isoforms when --format is `fasta` or include `description` when --format is `rdf`.", - ) - p.add_argument( - "--limit", - default=10, - type=int, - help="Max number of results to retrieve", - ) - p.add_argument( - "--offset", - default=0, - type=int, - help="Offset of first result, used with --limit", - ) - p.add_argument( - "--skipcheck", - default=False, - action="store_true", - help="Turn off prompt to check file existence", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (query,) = args - url_params = {} - if op.exists(query): - pf = query.rsplit(".", 1)[0] - list_of_queries = [row.strip() for row in open(query)] - else: - # the query is the search term - pf = query.strip().strip('"') - list_of_queries = [pf] - pf = re.sub(r"\s+", "_", pf) - - assert len(list_of_queries) > 0, "Please provide atleast one input query" - - url_params["format"] = opts.format - - if opts.columns and opts.format in valid_column_formats: - reader = csv.reader([opts.columns], skipinitialspace=True) - cols = [col for r in reader for col in r] - for col in cols: - assert ( - col in valid_columns - ), "Column '{0}' is not a valid. Allowed options are {1}".format( - col, valid_columns - ) - url_params["columns"] = ",".join(cols) - - if opts.include and opts.format in valid_include_formats: - url_params["include"] = "yes" - - url_params["limit"] = opts.limit - url_params["offset"] = opts.offset - - outfile = "{0}.{1}".format(pf, opts.format) - - # If noprompt, will not check file existence - fw = must_open(outfile, "w", checkexists=True, skipcheck=opts.skipcheck) - if fw is None: - return - - seen = set() - for query in list_of_queries: - if query in seen: - logger.error("Duplicate query ({0}) found".format(query)) - continue - - url_params["query"] = query - - data = urlencode(url_params) - try: - request = Request(uniprot_url, data) - response = urlopen(request) - except (HTTPError, URLError, RuntimeError, KeyError) as e: - logger.error(e) - logger.debug("wait 5 seconds to reconnect...") - time.sleep(5) - - page = response.read() - if not page: - logger.error("query `{0}` yielded no results".format(query)) - continue - - print(page, file=fw) - - seen.add(query) - - if seen: - print( - "A total of {0} out of {1} queries returned results.".format( - len(seen), len(list_of_queries) - ), - file=sys.stderr, - ) - - -if __name__ == "__main__": - main() diff --git a/jcvi/apps/vecscreen.py b/jcvi/apps/vecscreen.py deleted file mode 100644 index a1c97b62..00000000 --- a/jcvi/apps/vecscreen.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Run through NCBI vecscreen on a local machine. -""" -import os.path as op -import sys - -from ..formats.base import must_open -from ..formats.blast import BlastLine -from ..formats.fasta import tidy -from ..utils.range import range_merge - -from .align import run_vecscreen, run_megablast -from .base import ActionDispatcher, OptionParser, download, sh - -ECOLI_URL = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/019/425/GCF_000019425.1_ASM1942v1/GCF_000019425.1_ASM1942v1_genomic.fna.gz" -UNIVEC_URL = "ftp://ftp.ncbi.nih.gov/pub/UniVec/UniVec_Core" - - -def main(): - - actions = (("mask", "mask the contaminants"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def is_internet_file(url): - """Return if url starts with http://, https://, or ftp://. - - Args: - url (str): URL of the link - """ - return ( - url.startswith("http://") - or url.startswith("https://") - or url.startswith("ftp://") - ) - - -def mask(args): - """ - %prog mask fastafile - - Mask the contaminants. By default, this will compare against UniVec_Core and - Ecoli.fasta. Merge the contaminant results, and use `maskFastaFromBed`. Can - perform FASTA tidy if requested. - """ - p = OptionParser(mask.__doc__) - p.add_argument( - "--db", - default=ECOLI_URL, - help="Contaminant db other than Ecoli K12, will download if file starts with http://, https://, or ftp://", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - db = opts.db - assert op.exists(fastafile) - - outfastafile = fastafile.rsplit(".", 1)[0] + ".masked.fasta" - vecbedfile = blast([fastafile]) - ecolifile = ( - download(db, filename="Ecoli.fasta", handle_gzip=True) - if is_internet_file(db) - else db - ) - assert op.exists(ecolifile) - ecolibedfile = blast([fastafile, "--db={0}".format(ecolifile)]) - - cmd = "cat {0} {1}".format(vecbedfile, ecolibedfile) - cmd += " | sort -k1,1 -k2,2n" - cmd += " | mergeBed -c 4 -o distinct -d 100 -i stdin" - cmd += " | maskFastaFromBed -fi {0} -bed stdin -fo {1}".format( - fastafile, outfastafile - ) - sh(cmd) - - return tidy([outfastafile]) - - -def blast(args): - """ - %prog blast fastafile - - Run BLASTN against database (default is UniVec_Core). Output .bed format - on the vector/contaminant ranges. - """ - p = OptionParser(blast.__doc__) - p.add_argument( - "--dist", - default=100, - type=int, - help="Merge adjacent HSPs separated by", - ) - p.add_argument("--db", help="Use a different database rather than UniVec_Core") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - fastaprefix = fastafile.split(".", 1)[0] - - univec = opts.db or download(UNIVEC_URL) - uniprefix = univec.split(".", 1)[0] - - fastablast = fastaprefix + ".{0}.blast".format(uniprefix) - - prog = run_megablast if opts.db else run_vecscreen - prog(infile=fastafile, outfile=fastablast, db=univec, pctid=95, hitlen=50) - - fp = open(fastablast) - ranges = [] - for row in fp: - b = BlastLine(row) - ranges.append((b.query, b.qstart, b.qstop)) - - merged_ranges = range_merge(ranges, dist=opts.dist) - bedfile = fastaprefix + ".{0}.bed".format(uniprefix) - fw = must_open(bedfile, "w") - for seqid, start, end in merged_ranges: - print("\t".join(str(x) for x in (seqid, start - 1, end, uniprefix)), file=fw) - - return bedfile - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/__init__.py b/jcvi/assembly/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/assembly/__main__.py b/jcvi/assembly/__main__.py deleted file mode 100644 index e71fb0f9..00000000 --- a/jcvi/assembly/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Assemblage of genome-assembly related scripts: ALLMAPS algorithm, scaffolding, k-mer analysis, QC, tool wrappers, etc. -""" - -from ..apps.base import dmain - - -if __name__ == "__main__": - dmain(__file__) diff --git a/jcvi/assembly/allmaps.py b/jcvi/assembly/allmaps.py deleted file mode 100644 index 575554ce..00000000 --- a/jcvi/assembly/allmaps.py +++ /dev/null @@ -1,2018 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Scaffold Ordering with Weighted Maps. -""" -import os.path as op -import os -import sys - -from collections import Counter, defaultdict -from functools import partial -from itertools import combinations, product -from typing import Optional - -import numpy as np -import networkx as nx - -from cmmodule.utils import read_chain_file -from cmmodule.mapbed import crossmap_bed_file -from more_itertools import pairwise - -from ..algorithms.ec import GA_setup, GA_run -from ..algorithms.formula import reject_outliers, spearmanr -from ..algorithms.lis import ( - longest_monotonic_subseq_length_loose as lms, - longest_monotonic_subsequence_loose as lmseq, -) -from ..algorithms.matrix import determine_signs -from ..apps.base import ( - ActionDispatcher, - OptionParser, - SUPPRESS, - cleanup, - flatten, - get_today, - logger, - mkdir, - need_update, - sh, - version, -) -from ..formats.agp import AGP, order_to_agp, build as agp_build, reindex -from ..formats.base import DictFile, FileMerger, must_open, read_block -from ..formats.bed import Bed, BedLine, natsorted, sort -from ..formats.chain import fromagp -from ..formats.sizes import Sizes -from ..graphics.landscape import draw_gauge -from ..utils.cbook import human_size, percentage -from ..utils.grouper import Grouper -from ..utils.table import tabulate - - -START, END = "START", "END" -distance_choices = ("cM", "rank") -linkage_choices = ("single", "double", "complete", "average", "median") -np.seterr(invalid="ignore") - - -class Scaffold(object): - def __init__(self, seqid, mapc): - self.markers = mapc.extract(seqid) - self.seqid = seqid - self.mapc = mapc - - @property - def mlg_counts(self): - return Counter([x.mlg for x in self.markers]) - - def add_LG_pairs(self, G, mappair): - # Computes co-occurrences of LG pairs - cc = self.mlg_counts.items() - mappair = sorted(mappair) - for (ak, av), (bk, bv) in combinations(cc, 2): - aks, bks = ak.split("-")[0], bk.split("-")[0] - if sorted((aks, bks)) != mappair: - continue - weight = min(av, bv) - G[ak, bk] += weight - G[bk, ak] += weight - - -class LinkageGroup(object): - def __init__(self, lg, length, markers, function=(lambda x: x.rank), linkage=min): - self.lg = lg - self.length = length - self.markers = markers - self.function = f = function - self.linkage = linkage - - self.mapname = lg.split("-")[0] - self.series = {} - self.nmarkers = {} - self.oo = {} - self.position = {} - self.guide = {} - for k, v in markers.items(): # keyed by scaffold ids - self.series[k] = xs = [f(x) for x in v] - self.nmarkers[k] = len(v) - physical_to_cm = [(x.pos, f(x)) for x in v] - self.oo[k] = get_rho(physical_to_cm) - self.position[k] = np.median(xs) - self.guide[k] = np.median([x.cm for x in v]) - - path = sorted((v, self.guide[k], k) for k, v in self.position.items()) - vv, gg, path = zip(*path) - self.path = path - self.rho = 0 - - def populate_pairwise_distance(self): - distances = {} - series = self.series - linkage = self.linkage - for a, b in combinations(self.path, 2): - d = linkage_distance(series[a], series[b], linkage=linkage) - distances[a, b] = distances[b, a] = d - - for p in self.path: - adist = linkage_distance([0], series[p], linkage=linkage) - bdist = linkage_distance(series[p], [self.length], linkage=linkage) - if self.rho < 0: - adist, bdist = bdist, adist - distances[START, p] = distances[p, START] = adist - distances[END, p] = distances[p, END] = bdist - - self.distances = distances - - return distances - - -class ScaffoldOO(object): - """ - This contains the routine to construct order and orientation for the - scaffolds per partition. - """ - - def __init__( - self, - lgs, - scaffolds, - mapc, - pivot, - weights, - sizes, - function=(lambda x: x.rank), - linkage=min, - fwtour=None, - ngen=500, - npop=100, - cpus=8, - seed=666, - ): - - self.lgs = lgs - self.lengths = mapc.lengths - self.bins = mapc.bins - self.sizes = sizes - self.scaffolds = scaffolds - self.pivot = pivot - self.weights = weights - self.function = function - self.linkage = linkage - - self.prepare_linkage_groups() # populate all data - for mlg in self.lgs: - mapname, lg = mlg.rsplit("-", 1) - if mapname == pivot: - self.object = "chr{0}".format(lg) - break - - tag = "|".join(lgs) - tour = zip(scaffolds, len(scaffolds) * [1]) - print_tour(fwtour, self.object, tag, "INIT", tour, recode=True) - signs = self.assign_orientation() - assert len(signs) == len(scaffolds) - tour = list(zip(scaffolds, signs)) - scaffolds_oo = dict(tour) - print_tour(fwtour, self.object, tag, "FLIP", tour, recode=True) - tour = self.assign_order() - tour = [(x, scaffolds_oo[x]) for x in tour] - print_tour(fwtour, self.object, tag, "TSP", tour, recode=True) - - def callback(tour, gen, i=0): - fitness = tour.fitness if hasattr(tour, "fitness") else None - tour = [scaffolds[x] for x in tour] - tour = [(x, scaffolds_oo[x]) for x in tour] - label = "GA{0}-{1}".format(i, gen) - if fitness: - fitness = "{0}".format(fitness).split(".")[0].replace("(", "") - label += "-" + fitness - print_tour(fwtour, self.object, tag, label, tour, recode=True) - return tour - - i = 0 - best_fitness = None - while True: # Multiple EC rounds due to orientation fixes - logger.debug("Start EC round %d", i) - scaffolds_oo = dict(tour) - scfs, tour, ww = self.prepare_ec(scaffolds, tour, weights) - callbacki = partial(callback, i=i) - toolbox = GA_setup(tour) - toolbox.register("evaluate", colinear_evaluate_multi, scfs=scfs, weights=ww) - tour, fitness = GA_run( - toolbox, ngen=ngen, npop=npop, cpus=cpus, seed=seed, callback=callbacki - ) - tour = callbacki(tour, "FIN") - if best_fitness and fitness <= best_fitness: - logger.debug("No fitness improvement: %s. Exit EC.", best_fitness) - break - tour = self.fix_orientation(tour) - best_fitness = fitness - print_tour( - fwtour, self.object, tag, "GA{0}-FIXORI".format(i), tour, recode=True - ) - logger.debug("Current best fitness: %s", best_fitness) - i += 1 - - tour = self.fix_tour(tour) - self.tour = recode_tour(tour) - for fw in (sys.stderr, fwtour): - print_tour(fw, self.object, tag, "FINAL", self.tour) - - def prepare_ec(self, scaffolds, tour, weights): - """ - Prepare Evolutionary Computation. This converts scaffold names into - indices (integer) in the scaffolds array. - """ - scaffolds_ii = dict((s, i) for i, s in enumerate(scaffolds)) - scfs = [] - ww = [] - for mlg in self.linkage_groups: - w = float(weights[mlg.mapname]) - scf = {} - for s, o in tour: - si = scaffolds_ii[s] - scf[si] = self.get_series(mlg.lg, s, orientation=o) - scfs.append(scf) - ww.append(w) - tour = [scaffolds_ii[x] for x, o in tour] - - return scfs, tour, ww - - def weighted_mean(self, a): - a, w = zip(*a) - w = [self.weights[x] for x in w] - return np.average(a, weights=w) - - def get_markers(self, lg, scaffold, orientation=0): - xs = self.bins.get((lg, scaffold), []) - if orientation < 0: - xs = xs[::-1] - return xs - - def get_series(self, lg, scaffold, orientation=0): - xs = self.get_markers(lg, scaffold, orientation=orientation) - return [self.function(x) for x in xs] - - def prepare_linkage_groups(self): - self.linkage_groups = [] - for lg in self.lgs: - length = self.lengths[lg] - markers = {} - for s in self.scaffolds: - xs = self.get_markers(lg, s) - if xs: - markers[s] = xs - if not markers: - continue - LG = LinkageGroup( - lg, length, markers, function=self.function, linkage=self.linkage - ) - self.linkage_groups.append(LG) - - def distances_to_tour(self): - scaffolds = self.scaffolds - distances = self.distances - G = nx.DiGraph() - for (a, b), v in distances.items(): - d = self.weighted_mean(v) - G.add_edge(a, b, weight=d) - if a == START or b == END: - continue - G.add_edge(b, a, weight=d) - - logger.debug("Graph size: |V|=%d, |E|=%d", len(G), G.size()) - - L = dict(nx.all_pairs_dijkstra_path_length(G)) - for a, b in combinations(scaffolds, 2): - if G.has_edge(a, b): - continue - if a in L and b in L[a]: - l = L[a][b] - G.add_edge(a, b, weight=l) - G.add_edge(b, a, weight=l) - - edges = [] - for a, b, d in G.edges(data=True): - edges.append((a, b, d["weight"])) - - return scaffolds[:] - - def assign_order(self): - """ - The goal is to assign scaffold orders. To help order the scaffolds, two - dummy node, START and END, mark the ends of the chromosome. We connect - START to each scaffold (directed), and each scaffold to END. - """ - linkage_groups = self.linkage_groups - for mlg in linkage_groups: - mapname = mlg.mapname - if mapname == self.pivot: - pivot_position = mlg.position - - for mlg in linkage_groups: - position = mlg.position - # Flip order if path goes in the opposite direction to the pivot - common = [] - for a, ap in position.items(): - if a not in pivot_position: - continue - pp = pivot_position[a] - common.append((ap, pp)) - - mlg.rho = get_rho(common) - if mlg.rho < 0: - mlg.path = mlg.path[::-1] - - mlg.populate_pairwise_distance() - - # Preparation of TSP - distances = defaultdict(list) - for mlg in linkage_groups: - mapname = mlg.mapname - position = mlg.position - length = mlg.length - path = mlg.path - rho = mlg.rho - dd = mlg.distances - for a, b in combinations(path, 2): - d = dd[a, b] - distances[a, b].append((d, mapname)) - for p in path: - adist, bdist = position[p], length - position[p] - if rho < 0: - adist, bdist = bdist, adist - distances[START, p].append((adist, mapname)) - distances[p, END].append((bdist, mapname)) - - self.distances = distances - tour = self.distances_to_tour() - return tour - - def get_orientation(self, si, sj): - """ - si, sj are two number series. To compute whether these two series have - same orientation or not. We combine them in the two orientation - configurations and compute length of the longest monotonic series. - """ - if not si or not sj: - return 0 - # Same orientation configuration - a = lms(si + sj) - b = lms(sj + si) - # Opposite orientation configuration - c = lms(si + sj[::-1]) - d = lms(sj[::-1] + si) - return max(a, b)[0] - max(c, d)[0] - - def assign_orientation(self): - signs = defaultdict(list) - scaffolds = self.scaffolds - for mlg in self.linkage_groups: - mapname = mlg.mapname - series = mlg.series - if mapname == self.pivot: - pivot_oo = mlg.oo - pivot_nmarkers = mlg.nmarkers - - for i, j in combinations(range(len(scaffolds)), 2): - si, sj = scaffolds[i], scaffolds[j] - si, sj = series.get(si, []), series.get(sj, []) - d = self.get_orientation(si, sj) - if not d: - continue - signs[i, j].append((d, mapname)) - - for e, v in signs.items(): - signs[e] = self.weighted_mean(v) - - signs_edges = sorted((a, b, w) for (a, b), w in signs.items()) - signs = determine_signs(scaffolds, signs_edges) - - # Finally flip this according to pivot map, then weight by #_markers - pivot_oo = [pivot_oo.get(x, 0) for x in scaffolds] - nmarkers = [pivot_nmarkers.get(x, 0) for x in scaffolds] - flipr = signs * np.sign(np.array(pivot_oo)) * nmarkers - if sum(flipr) < 0: - signs = -signs - return signs - - def fix_tour(self, tour): - """ - Test each scaffold if dropping does not decrease LMS. - """ - scaffolds, oos = zip(*tour) - keep = set() - for mlg in self.linkage_groups: - lg = mlg.lg - for s, o in tour: - i = scaffolds.index(s) - L = [self.get_series(lg, x, xo) for x, xo in tour[:i]] - U = [self.get_series(lg, x, xo) for x, xo in tour[i + 1 :]] - L, U = list(flatten(L)), list(flatten(U)) - M = self.get_series(lg, s, o) - score_with = lms(L + M + U)[0] - score_without = lms(L + U)[0] - assert score_with >= score_without - if score_with > score_without: - keep.add(s) - dropped = len(tour) - len(keep) - logger.debug("Dropped %d minor scaffolds", dropped) - return [(s, o) for (s, o) in tour if s in keep] - - def fix_orientation(self, tour): - """ - Test each scaffold if flipping will increass longest monotonic chain - length. - """ - orientations = dict(tour) # old configuration here - scaffold_oo = defaultdict(list) - scaffolds, oos = zip(*tour) - for mlg in self.linkage_groups: - lg = mlg.lg - mapname = mlg.mapname - for s, o in tour: - i = scaffolds.index(s) - L = [self.get_series(lg, x, xo) for x, xo in tour[:i]] - U = [self.get_series(lg, x, xo) for x, xo in tour[i + 1 :]] - L, U = list(flatten(L)), list(flatten(U)) - M = self.get_series(lg, s) - plus = lms(L + M + U) - minus = lms(L + M[::-1] + U) - d = plus[0] - minus[0] - if not d: - continue - scaffold_oo[s].append((d, mapname)) # reset orientation - - fixed = 0 - for s, v in scaffold_oo.items(): - d = self.weighted_mean(v) - old_d = orientations[s] - new_d = np.sign(d) - if new_d != old_d: - orientations[s] = new_d - fixed += 1 - - tour = [(x, orientations[x]) for x in scaffolds] - logger.debug("Fixed orientations for %d scaffolds.", fixed) - return tour - - -class CSVMapLine(object): - def __init__(self, row, sep=",", mapname=None): - # ScaffoldID,ScaffoldPosition,LinkageGroup,GeneticPosition - args = [x.strip() for x in row.split(sep)] - self.seqid = args[0] - self.pos = int(args[1]) - self.lg = args[2] - self.cm = float(args[3]) - self.mapname = mapname - - @property - def bedline(self): - marker = "{0}-{1}:{2:.6f}".format(self.mapname, self.lg, self.cm) - track = "{0}:{1}".format(self.seqid, self.pos) - return "\t".join( - str(x) for x in (self.seqid, self.pos - 1, self.pos, marker, track) - ) - - -class Marker(object): - def __init__(self, b): - self.seqid = b.seqid - self.pos = b.start - self.mlg, cm = b.accn.split(":") - try: - self.mapname, self.lg = b.accn.split("-", 1) - except ValueError: - logger.error("Malformed marker name: %s", b.accn) - sys.exit(1) - self.cm = float(cm) - self.accn = b.accn - self.args = b.args - self.rank = -1 - - def parse_scaffold_info(self): - self.scaffoldaccn = self.args[-1] - self.scaffoldid, scaffoldpos = self.scaffoldaccn.split(":") - self.scaffoldpos = int(scaffoldpos) - - def __str__(self): - return "\t".join( - str(x) for x in (self.seqid, self.pos - 1, self.pos, self.accn, self.rank) - ) - - __repr__ = __str__ - - -class Map(list): - def __init__( - self, - filename, - scaffold_info=False, - compress=1e-6, - remove_outliers=False, - function=(lambda x: x.rank), - ): - super().__init__() - bed = Bed(filename) - for b in bed: - self.append(Marker(b)) - self.report() - self.ranks = self.compute_ranks(compress) - self.lengths = self.compute_lengths(function) - self.bins = self.get_bins(function, remove_outliers) - if scaffold_info: - for b in self: - b.parse_scaffold_info() - - def report(self): - self.nmarkers = len(self) - self.seqids = sorted(set(x.seqid for x in self)) - self.mapnames = sorted(set(x.mapname for x in self)) - self.mlgs = sorted(set(x.mlg for x in self)) - logger.debug( - "Map contains %d markers in %d linkage groups.", - self.nmarkers, - len(self.mlgs), - ) - - def extract(self, seqid): - r = [x for x in self if x.seqid == seqid] - return sorted(r, key=lambda x: x.pos) - - def extract_mlg(self, mlg): - r = [x for x in self if x.mlg == mlg] - return sorted(r, key=lambda x: x.cm) - - def compute_ranks(self, compress): - ranks = {} # Store the length for each linkage group - for mlg in self.mlgs: - rank = 0 - mlg_set = self.extract_mlg(mlg) - for i, marker in enumerate(mlg_set): - if i == 0: - marker.rank = rank - continue - if marker.cm - mlg_set[i - 1].cm > compress: - rank += 1 - marker.rank = rank - ranks[mlg] = mlg_set - return ranks - - def compute_lengths(self, function): - lengths = {} - for mlg, v in self.ranks.items(): - lengths[mlg] = max(function(x) for x in v) - return lengths - - def get_bins(self, function, remove_outliers): - s = defaultdict(list) - for m in self: - s[(m.mlg, m.seqid)].append(m) - - if remove_outliers: - original = clean = 0 - for pair, markers in s.items(): - cm = self.remove_outliers(markers, function) - s[pair] = cm - original += len(markers) - clean += len(cm) - logger.debug("Retained %s clean markers.", percentage(clean, original)) - return s - - def remove_outliers(self, markers, function): - data = [function(x) for x in markers] - reject = reject_outliers(data) - clean_markers = [m for m, r in zip(markers, reject) if not r] - return clean_markers - - -class MapSummary(object): - def __init__(self, markers, l50, s, scaffolds=None): - markers = self.unique_markers(markers) - self.num_markers = len(markers) - self.num_lgs = len(set(x.mlg for x in markers)) - scaffolds = scaffolds or set(x.seqid for x in markers) - n50_scaffolds = [x for x in scaffolds if s.mapping[x] >= l50] - self.num_scaffolds = len(scaffolds) - self.num_n50_scaffolds = len(n50_scaffolds) - self.total_bases = sum(s.mapping[x] for x in scaffolds) - self.tally_markers(markers) - - def unique_markers(self, markers): - umarkers = [] - seen = set() - for m in markers: - mt = (m.seqid, m.pos) - if mt in seen: - continue - umarkers.append(m) - seen.add(mt) - return umarkers - - def tally_markers(self, markers): - counter = Counter([x.seqid for x in markers]) - self.scaffold_1m = len([x for x in counter.values() if x == 1]) - self.scaffold_2m = len([x for x in counter.values() if x == 2]) - self.scaffold_3m = len([x for x in counter.values() if x == 3]) - self.scaffold_4m = len([x for x in counter.values() if x >= 4]) - - def export_table(self, r, mapname, total): - r["Markers (unique)", mapname] = self.num_markers - r["Markers per Mb", mapname] = ( - self.num_markers * 1e6 / self.total_bases if self.total_bases else 0 - ) - r["Scaffolds", mapname] = self.num_scaffolds - r["N50 Scaffolds", mapname] = self.num_n50_scaffolds - r["Total bases", mapname] = percentage(self.total_bases, total, mode=1) - r["Scaffolds with 1 marker", mapname] = self.scaffold_1m - r["Scaffolds with 2 markers", mapname] = self.scaffold_2m - r["Scaffolds with 3 markers", mapname] = self.scaffold_3m - r["Scaffolds with >=4 markers", mapname] = self.scaffold_4m - - -class Weights(DictFile): - def __init__(self, filename, mapnames, cast=int): - super().__init__(filename, cast=cast) - self.maps = [x.split()[0] for x in must_open(filename)] - self.update_maps(mapnames) - pivot_weight, o, pivot = self.get_pivot(mapnames) - ref = self.maps[0] - self.pivot = pivot - self.ref = ref - - logger.debug("Map weights: %s", self.items()) - - def update_maps(self, mapnames, default=1): - keys = list(self.keys()) - for m in keys: - if m not in mapnames: - del self[m] - for m in mapnames: - if m in self: - continue - self[m] = default - logger.debug("Weight for `%s` set to %d.", m, default) - - def get_pivot(self, mapnames): - # Break ties by occurence in file - common_mapnames = set(self.maps) & set(mapnames) - if not common_mapnames: - logger.error("No common names found between %s and %s", self.maps, mapnames) - sys.exit(1) - return max( - (w, -self.maps.index(m), m) for m, w in self.items() if m in common_mapnames - ) - - -class Layout(object): - def __init__(self, mlgsizes): - - self.mlgsizes = mlgsizes - self.partition() - self.calculate_coords() - - def partition(self, N=2): - # Partition LGs into two sides with approximately similar sum of sizes - endtime = [0] * N - parts = [] - for i in range(N): - parts.append([]) - # LPT greedy algorithm, sort by LG size decreasing - for mlg, mlgsize in sorted(self.mlgsizes.items(), key=lambda x: -x[-1]): - mt, mi = min((x, i) for (i, x) in enumerate(endtime)) - endtime[mi] += mlgsize - parts[mi].append((mlg, mlgsize)) - self.parts = parts - - def calculate_coords(self, r=0.8, gapsize=0.1): - # Find the larger partition - part_sizes = [] - for p in self.parts: - ps = sum(ms for m, ms in p) - part_sizes.append((ps, len(p) - 1)) - max_part_size, ngaps = max(part_sizes) - gaps = gapsize * ngaps - ratio = (r - gaps) / max_part_size - self.ratio = ratio - - coords = {} - for x, p, (ps, ngaps) in zip((0.25, 0.75), self.parts, part_sizes): - gaps = gapsize * ngaps - ystart = (1 + ratio * ps + gaps) / 2 - for m, ms in p: - mlen = ratio * ms - coords[m] = (x, ystart - mlen, ystart) - ystart -= mlen + gapsize - self.coords = coords - - -class GapEstimator(object): - def __init__(self, mapc, agp, seqid, mlg, function=lambda x: x.cm): - mm = mapc.extract_mlg(mlg) - logger.debug("Extracted %d markers for %s-%s", len(mm), seqid, mlg) - self.mlgsize = max(function(x) for x in mm) - - self.agp = [x for x in agp if x.object == seqid] - self.scaffolds = [x.component_id for x in self.agp if not x.is_gap] - self.pp = [x.object_beg for x in self.agp if x.is_gap] - self.chrsize = max(x.object_end for x in self.agp) - - s = Scaffold(seqid, mapc) - self.scatter_data = [] - self.scaffold_markers = defaultdict(list) - for x in s.markers: - if x.mlg != mlg: - continue - self.scaffold_markers[x.scaffoldid].append(x) - self.scatter_data.append((x.pos, function(x))) - self.scatter_data.sort() - self.get_splines() - - def get_gapsize(self, scaffold): - # Find the gap size right after a query scaffold - i = self.scaffolds.index(scaffold) - return self.gapsizes[i] - - def get_splines(self, floor=25 * 1e-9, ceil=25 * 1e-6): - from scipy.interpolate import UnivariateSpline - - mx, my = zip(*self.scatter_data) - yy, xx = zip(*lmseq(zip(my, mx))) # filter with LMS - spl = UnivariateSpline(xx, yy) - spld = spl.derivative() - - def spl_derivative(x): - s = abs(spld(x)) - s[s < floor] = floor - s[s > ceil] = ceil - return s - - self.spl = spl - self.spld = spl_derivative - - def compute_one_gap(self, a, b, gappos, minsize, maxsize, verbose=False): - ma, mb = self.scaffold_markers[a], self.scaffold_markers[b] - all_marker_pairs = [] - for x, y in product(ma, mb): - cm_dist = abs(x.cm - y.cm) - (ratio,) = self.spld([gappos]) - converted_dist = int(round(cm_dist / ratio)) - overhang_x = abs(x.pos - gappos) - overhang_y = abs(y.pos - gappos) - minsize - estimated = converted_dist - overhang_x - overhang_y - if estimated < minsize: - estimated = minsize - if estimated > maxsize: - estimated = maxsize - if verbose: - print("=" * 10) - print(x) - print(y) - print(x.scaffoldaccn, y.scaffoldaccn) - print("Converted dist:", cm_dist, ratio, converted_dist) - print("Overhangs:", overhang_x, overhang_y) - print("Estimated", estimated) - all_marker_pairs.append(estimated) - - gapsize = min(all_marker_pairs) if all_marker_pairs else None - if verbose: - print("*" * 5, a, b, gapsize) - return gapsize - - def compute_all_gaps(self, minsize=100, maxsize=500000, verbose=False): - self.gapsizes = [] - for (a, b), gappos in zip(pairwise(self.scaffolds), self.pp): - gapsize = self.compute_one_gap( - a, b, gappos, minsize, maxsize, verbose=verbose - ) - self.gapsizes.append(gapsize) - - -def colinear_evaluate_multi(tour, scfs, weights): - weighted_score = 0 - for scf, w in zip(scfs, weights): - subtour = [x for x in tour if x in scf] - series = [] - for t in subtour: - series.extend(scf[t]) - score, diff = lms(series) - weighted_score += score * w - return (weighted_score,) - - -def get_rho(xy): - if not xy: - return 0 - x, y = zip(*xy) - rho = spearmanr(x, y) - if np.isnan(rho): - rho = 0 - return rho - - -def linkage_distance(a, b, linkage=min): - return linkage([abs(i - j) for i, j in product(a, b)]) - - -def double_linkage(L): - if len(L) == 1: - return L[0] - L.sort() - a, b = L[:2] - return (a + b) / 2.0 - - -def main(): - - actions = ( - ("fake", "make fake scaffolds.fasta"), - ("merge", "merge csv maps and convert to bed format"), - ("mergebed", "merge maps in bed format"), - ("path", "construct golden path given a set of genetic maps"), - ("estimategaps", "estimate sizes of inter-scaffold gaps"), - ("build", "build associated FASTA and CHAIN file"), - ("split", "split suspicious scaffolds"), - ("summary", "report summary stats for maps and final consensus"), - # Visualization - ("plot", "plot matches between goldenpath and maps for single object"), - ("plotall", "plot matches between goldenpath and maps for all objects"), - ("plotratio", "illustrate physical vs map distance ratio"), - ("movie", "visualize history of scaffold OO"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def normalize_lms_axis( - ax, xlim=None, ylim=None, xfactor=1e-6, yfactor=1, xlabel=None, ylabel="Map (cM)" -): - """Normalize the axis limits and labels to beautify axis.""" - if xlim: - ax.set_xlim(0, xlim) - if ylim: - ax.set_ylim(0, ylim) - if xlabel: - xticklabels = [int(round(x * xfactor)) for x in ax.get_xticks()] - ax.set_xticklabels(xticklabels, family="Helvetica") - ax.set_xlabel(xlabel) - else: - ax.set_xticks([]) - if ylabel: - yticklabels = [int(round(x * yfactor)) for x in ax.get_yticks()] - ax.set_yticklabels(yticklabels, family="Helvetica") - ax.set_ylabel(ylabel) - else: - ax.set_yticks([]) - - -def plotratio(args): - """ - %prog plotratio JM-2 chr23 JMMale-23 - - Illustrate physical vs map distance ratio, that were used in the gap estimation algorithm. - """ - from ..graphics.base import plt, savefig, normalize_axes, panel_labels, set2 - - p = OptionParser(estimategaps.__doc__) - _, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) - - if len(args) != 3: - sys.exit(not p.print_help()) - - pf, seqid, mlg = args - bedfile = pf + ".lifted.bed" - agpfile = pf + ".agp" - - function = lambda x: x.cm - cc = Map(bedfile, scaffold_info=True, function=function) - agp = AGP(agpfile) - - g = GapEstimator(cc, agp, seqid, mlg, function=function) - pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize - spl, spld = g.spl, g.spld - g.compute_all_gaps(verbose=False) - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - # Panel A - xstart, ystart = 0.15, 0.55 - w, h = 0.7, 0.4 - t = np.linspace(0, chrsize, 1000) - ax = fig.add_axes([xstart, ystart, w, h]) - mx, my = zip(*g.scatter_data) - rho = spearmanr(mx, my) - - dsg = "g" - ax.vlines(pp, 0, mlgsize, colors="beige") - ax.plot(mx, my, ".", color=set2[3]) - ax.plot(t, spl(t), "-", color=dsg) - ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes) - normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") - if rho < 0: - ax.invert_yaxis() - - # Panel B - ystart = 0.1 - ax = fig.add_axes([xstart, ystart, w, h]) - ax.vlines(pp, 0, mlgsize, colors="beige") - ax.plot(t, spld(t), "-", lw=2, color=dsg) - ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) - normalize_lms_axis( - ax, - xlim=chrsize, - ylim=25 * 1e-6, - xfactor=1e-6, - xlabel="Physical position (Mb) on {}".format(seqid), - yfactor=1000000, - ylabel="Recomb. rate\n(cM / Mb)", - ) - ax.xaxis.grid(False) - - labels = ((0.05, 0.95, "A"), (0.05, 0.5, "B")) - panel_labels(root, labels) - normalize_axes(root) - - pf = "plotratio" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def fake(args): - """ - %prog fake input.bed - - Make fake `scaffolds.fasta`. Use case for this is that sometimes I would - receive just the csv/bed file and I'd like to use path() out of the box. - """ - from math import ceil - from random import choice - - from Bio import SeqIO - from Bio.Seq import Seq - from Bio.SeqRecord import SeqRecord - - p = OptionParser(fake.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (inputbed,) = args - bed = Bed(inputbed) - recs = [] - for seqid, sb in bed.sub_beds(): - maxend = max(x.end for x in sb) - size = int(ceil(maxend / 1000.0) * 1000) - seq = "".join([choice("ACGT") for x in range(size)]) - rec = SeqRecord(Seq(seq), id=seqid, description="") - recs.append(rec) - - fw = must_open(opts.outfile, "w") - SeqIO.write(recs, fw, "fasta") - - -def compute_score(markers, bonus, penalty): - """ - Compute chain score using dynamic programming. If a marker is the same - linkage group as a previous one, we add bonus; otherwise, we penalize the - chain switching. - """ - nmarkers = len(markers) - s = [bonus] * nmarkers # score - f = [-1] * nmarkers # from - for i in range(1, nmarkers): - for j in range(i): - mi, mj = markers[i], markers[j] - t = bonus if mi.mlg == mj.mlg else penalty + bonus - if s[i] < s[j] + t: - s[i] = s[j] + t - f[i] = j - # Recover the highest scoring chain - highest_score = max(s) - si = s.index(highest_score) - onchain = set() - while True: - if si < 0: - break - si = f[si] - onchain.add(si) - return [x for i, x in enumerate(markers) if i in onchain] - - -def split(args): - """ - %prog split input.bed - - Split suspicious scaffolds. Suspicious scaffolds are those that contain - chunks that map to more than one linkage group. The chunk size can be - modified through --chunk option. - """ - p = OptionParser(split.__doc__) - p.add_argument( - "--chunk", default=4, type=int, help="Split chunks of at least N markers" - ) - p.add_argument( - "--splitsingle", - default=False, - action="store_true", - help="Split breakpoint range right in the middle", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (inputbed,) = args - bonus = 2 - nchunk = opts.chunk - nbreaks = 0 - penalty = -(nchunk * bonus - 1) - bed = Bed(inputbed) - for seqid, bb in bed.sub_beds(): - markers = [Marker(x) for x in bb] - markers = compute_score(markers, bonus, penalty) - for mi, mj in pairwise(markers): - if mi.mlg == mj.mlg: - continue - assert mi.seqid == mj.seqid - start, end = mi.pos, mj.pos - if start > end: - start, end = end, start - if opts.splitsingle: - start = end = (start + end) / 2 - print("\t".join(str(x) for x in (mi.seqid, start - 1, end))) - nbreaks += 1 - logger.debug("A total of %d breakpoints inferred (--chunk=%d)", nbreaks, nchunk) - - -def movie(args): - """ - %prog movie input.bed scaffolds.fasta chr1 - - Visualize history of scaffold OO. The history is contained within the - tourfile, generated by path(). For each historical scaffold OO, the program - plots a separate PDF file. The plots can be combined to show the progression - as a little animation. The third argument limits the plotting to a - specific pseudomolecule, for example `chr1`. - """ - p = OptionParser(movie.__doc__) - p.add_argument( - "--gapsize", - default=100, - type=int, - help="Insert gaps of size between scaffolds", - ) - add_allmaps_plot_options(p) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - inputbed, scaffoldsfasta, seqid = args - gapsize = opts.gapsize - pf = inputbed.rsplit(".", 1)[0] - agpfile = pf + ".chr.agp" - tourfile = pf + ".tour" - - fp = open(tourfile) - sizes = Sizes(scaffoldsfasta).mapping - ffmpeg = "ffmpeg" - mkdir(ffmpeg) - score = None - i = 1 - for header, block in read_block(fp, ">"): - s, tag, label = header[1:].split() - if s != seqid: - continue - tour = block[0].split() - tour = [(x[:-1], x[-1]) for x in tour] - if label.startswith("GA"): - cur_score = label.split("-")[-1] - if cur_score == score: - i += 1 - continue - score = cur_score - - image_name = ".".join((seqid, "{0:04d}".format(i), label, "pdf")) - if need_update(tourfile, image_name): - fwagp = must_open(agpfile, "w") - order_to_agp(seqid, tour, sizes, fwagp, gapsize=gapsize, evidence="map") - fwagp.close() - logger.debug("%s written to `%s`.", header, agpfile) - build([inputbed, scaffoldsfasta, "--cleanup"]) - pdf_name = plot([inputbed, seqid, "--title={0}".format(label)]) - sh("mv {0} {1}".format(pdf_name, image_name)) - if label in ("INIT", "FLIP", "TSP", "FINAL"): - for j in range(5): # Delay for 5 frames - image_delay = image_name.rsplit(".", 1)[0] + ".d{0}.pdf".format(j) - sh("cp {0} {1}/{2}".format(image_name, ffmpeg, image_delay)) - else: - sh("cp {0} {1}/".format(image_name, ffmpeg)) - i += 1 - - make_movie(ffmpeg, pf) - - -def make_movie(workdir, pf, dpi=120, fps=1, format="pdf", engine="ffmpeg"): - """Make the movie using either ffmpeg or gifsicle.""" - os.chdir(workdir) - if format != "png": - cmd = "parallel convert -density {}".format(dpi) - cmd += " {} {.}.png ::: " + "*.{}".format(format) - sh(cmd) - - assert engine in ( - "ffmpeg", - "gifsicle", - ), "Only ffmpeg or gifsicle is currently supported" - if engine == "ffmpeg": - cmd = "ffmpeg -framerate {} -pattern_type glob -i '*.png' {}.mp4".format( - fps, pf - ) - elif engine == "gifsicle": - cmd = "convert *.png gif:- |" - cmd += " gifsicle --delay {} --loop --optimize=3".format(100 // fps) - cmd += " --colors=256 --multifile - > {}.gif".format(pf) - - sh(cmd) - - -def estimategaps(args): - """ - %prog estimategaps input.bed - - Estimate sizes of inter-scaffold gaps. The AGP file generated by path() - command has unknown gap sizes with a generic number of Ns (often 100 Ns). - The AGP file `input.chr.agp` will be modified in-place. - """ - p = OptionParser(estimategaps.__doc__) - p.add_argument("--minsize", default=100, type=int, help="Minimum gap size") - p.add_argument("--maxsize", default=500000, type=int, help="Maximum gap size") - p.add_argument( - "--links", - default=10, - type=int, - help="Only use linkage grounds with matchings more than", - ) - p.set_verbose(help="Print details for each gap calculation") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (inputbed,) = args - pf = inputbed.rsplit(".", 1)[0] - agpfile = pf + ".chr.agp" - bedfile = pf + ".lifted.bed" - - cc = Map(bedfile, scaffold_info=True) - agp = AGP(agpfile) - minsize, maxsize = opts.minsize, opts.maxsize - links = opts.links - verbose = opts.verbose - - outagpfile = pf + ".estimategaps.agp" - fw = must_open(outagpfile, "w") - - for ob, components in agp.iter_object(): - components = list(components) - s = Scaffold(ob, cc) - mlg_counts = s.mlg_counts - gaps = [x for x in components if x.is_gap] - gapsizes = [None] * len(gaps) # master - for mlg, count in mlg_counts.items(): - if count < links: - continue - g = GapEstimator(cc, agp, ob, mlg) - g.compute_all_gaps(minsize=minsize, maxsize=maxsize, verbose=verbose) - # Merge evidence from this mlg into master - assert len(g.gapsizes) == len(gaps) - for i, gs in enumerate(gapsizes): - gg = g.gapsizes[i] - if gs is None: - gapsizes[i] = gg - elif gg: - gapsizes[i] = min(gs, gg) - - print(gapsizes) - # Modify AGP - i = 0 - for x in components: - if x.is_gap: - x.gap_length = gapsizes[i] or minsize - x.component_type = "U" if x.gap_length == 100 else "N" - i += 1 - print(x, file=fw) - - fw.close() - reindex([outagpfile, "--inplace"]) - - -def filename_to_mapname(filename): - # Infer map name based on file name - mapname = op.basename(filename).rsplit(".", 1)[0] - return mapname.replace("-", "_").replace(":", "_").replace(".", "_") - - -def merge(args): - """ - %prog merge map1 map2 map3 ... - - Convert csv maps to bed format. - - Each input map is csv formatted, for example: - - ScaffoldID,ScaffoldPosition,LinkageGroup,GeneticPosition - scaffold_2707,11508,1,0 - scaffold_2707,11525,1,1.2 - scaffold_759,81336,1,9.7 - """ - p = OptionParser(merge.__doc__) - p.add_argument( - "-w", "--weightsfile", default="weights.txt", help="Write weights to file" - ) - p.set_outfile("out.bed") - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - maps = args - outfile = opts.outfile - fp = must_open(maps) - b = Bed() - mapnames = set() - for row in fp: - mapname = filename_to_mapname(fp.filename()) - mapnames.add(mapname) - try: - m = CSVMapLine(row, mapname=mapname) - if m.cm < 0: - logger.error("Ignore marker with negative genetic distance") - print(row.strip(), file=sys.stderr) - else: - b.append(BedLine(m.bedline)) - except (IndexError, ValueError): # header or mal-formed line - continue - - b.print_to_file(filename=outfile, sorted=True) - logger.debug("A total of %d markers written to `%s`.", len(b), outfile) - - assert len(maps) == len(mapnames), "You have a collision in map names" - write_weightsfile(mapnames, weightsfile=opts.weightsfile) - - -def mergebed(args): - """ - %prog mergebed map1.bed map2.bed map3.bed ... - - Combine bed maps to bed format, adding the map name. - """ - p = OptionParser(mergebed.__doc__) - p.add_argument( - "-w", "--weightsfile", default="weights.txt", help="Write weights to file" - ) - p.set_outfile("out.bed") - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - maps = args - outfile = opts.outfile - fp = must_open(maps) - b = Bed() - mapnames = set() - for row in fp: - mapname = filename_to_mapname(fp.filename()) - mapnames.add(mapname) - try: - m = BedLine(row) - m.accn = "{0}-{1}".format(mapname, m.accn) - m.extra = ["{0}:{1}".format(m.seqid, m.start)] - b.append(m) - except (IndexError, ValueError): # header or mal-formed line - continue - - b.print_to_file(filename=outfile, sorted=True) - logger.debug("A total of %d markers written to `%s`.", len(b), outfile) - - assert len(maps) == len(mapnames), "You have a collision in map names" - write_weightsfile(mapnames, weightsfile=opts.weightsfile) - - -def write_weightsfile(mapnames, weightsfile="weights.txt"): - if op.exists(weightsfile): - logger.debug("Weights file `%s` found. Will not overwrite.", weightsfile) - return - - fw = open(weightsfile, "w") - for mapname in sorted(mapnames): - weight = 1 - print(mapname, weight, file=fw) - logger.debug("Weights file written to `%s`.", weightsfile) - - -def best_no_ambiguous(d, label): - best, best_value = max(d.items(), key=lambda x: x[1]) - if list(d.values()).count(best_value) > 1: # tie - print("AMBIGUOUS", label, d, file=sys.stderr) - return None, None - return best, best_value - - -def get_function(field): - assert field in distance_choices - return (lambda x: x.cm) if field == "cM" else (lambda x: x.rank) - - -def print_tour(fw, object, tag, label, tour, recode=False): - if recode: - tour = recode_tour(tour) - if fw: - print(">{0} ({1}) {2}".format(object, tag, label), file=fw) - print(" ".join("".join(x) for x in tour), file=fw) - - -def recode_tour(tour): - recode = {0: "?", 1: "+", -1: "-"} - return [(x, recode[o]) for x, o in tour] - - -def path(args): - """ - %prog path input.bed scaffolds.fasta - - Construct golden path given a set of genetic maps. The respective weight for - each map is given in file `weights.txt`. The map with the highest weight is - considered the pivot map. The final output is an AGP file that contains - ordered scaffolds. - - Please note that BED file and FASTA file cannot share the same prefix. - """ - oargs = args - p = OptionParser(path.__doc__) - p.add_argument("-b", "--bedfile", help=SUPPRESS) - p.add_argument("-s", "--fastafile", help=SUPPRESS) - p.add_argument( - "-w", "--weightsfile", default="weights.txt", help="Use weights from file" - ) - p.add_argument( - "--compress", - default=1e-6, - type=float, - help="Compress markers with distance <=", - ) - p.add_argument( - "--noremoveoutliers", - default=False, - action="store_true", - help="Don't remove outlier markers", - ) - p.add_argument( - "--distance", - default="rank", - choices=distance_choices, - help="Distance function when building initial consensus", - ) - p.add_argument( - "--linkage", - default="double", - choices=linkage_choices, - help="Linkage function when building initial consensus", - ) - p.add_argument( - "--gapsize", - default=100, - type=int, - help="Insert gaps of size between scaffolds", - ) - p.add_argument("--seqid", help="Only run partition with this seqid") - p.add_argument("--partitions", help="Use predefined partitions of LGs") - p.add_argument( - "--links", default=10, type=int, help="Only plot matchings more than" - ) - p.add_argument( - "--mincount", default=1, type=int, help="Minimum markers on a contig" - ) - p.add_argument( - "--noplot", - default=False, - action="store_true", - help="Do not visualize the alignments", - ) - p.add_argument( - "--renumber", - default=False, - action="store_true", - help="Renumber chromosome based on decreasing sizes", - ) - p.set_cpus(cpus=16) - - q = p.add_argument_group("Genetic algorithm options") - q.add_argument( - "--ngen", default=500, type=int, help="Iterations in GA, higher ~ slower" - ) - q.add_argument( - "--npop", default=100, type=int, help="Population size in GA, higher ~ slower" - ) - q.add_argument("--seed", default=666, type=int, help="Random seed number") - opts, args, iopts = p.set_image_options(args, figsize="10x6") - - if len(args) != 2: - sys.exit(not p.print_help()) - - inputbed, fastafile = args - inputbed = opts.bedfile or inputbed - fastafile = opts.fastafile or fastafile - - pf = inputbed.rsplit(".", 1)[0] - if op.basename(fastafile).split(".")[0] == pf: - print( - "ERROR: Filename collision `{}`. We suggest to rename `{}`".format( - pf, inputbed - ), - file=sys.stderr, - ) - sys.exit(1) - - bedfile = pf + ".bed" - weightsfile = opts.weightsfile - partitionsfile = opts.partitions - gapsize = opts.gapsize - mincount = opts.mincount - ngen = opts.ngen - npop = opts.npop - cpus = opts.cpus - seed = opts.seed - if sys.version_info[:2] < (2, 7): - logger.debug( - "Python version: %s. CPUs set to 1.", sys.version.splitlines()[0].strip() - ) - cpus = 1 - - function = get_function(opts.distance) - cc = Map( - bedfile, - function=function, - compress=opts.compress, - remove_outliers=(not opts.noremoveoutliers), - ) - mapnames = cc.mapnames - allseqids = cc.seqids - weights = Weights(weightsfile, mapnames) - pivot = weights.pivot - ref = weights.ref - linkage = opts.linkage - oseqid = opts.seqid - logger.debug("Linkage function: %s-linkage", linkage) - linkage = { - "single": min, - "double": double_linkage, - "complete": max, - "average": np.mean, - "median": np.median, - }[linkage] - - # Partition the linkage groups into consensus clusters - C = Grouper() - # Initialize the partitions - for mlg in cc.mlgs: - C.join(mlg) - - if partitionsfile: - logger.debug("Partition LGs based on `%s`", partitionsfile) - fp = open(partitionsfile) - for row in fp: - C.join(*row.strip().split(",")) - else: - logger.debug("Partition LGs based on %s", ref) - for mapname in mapnames: - if mapname == ref: - continue - # Compute co-occurrence between LG pairs - G = defaultdict(int) - for s in allseqids: - s = Scaffold(s, cc) - s.add_LG_pairs(G, (ref, mapname)) - # Convert edge list to adj list - nodes = defaultdict(list) - for (a, b), w in G.items(): - nodes[a].append((b, w)) - # Find the best ref LG every non-ref LG matches to - for n, neighbors in nodes.items(): - if n.split("-")[0] == ref: - continue - neighbors = dict(neighbors) - best_neighbor, best_value = best_no_ambiguous(neighbors, n) - if best_neighbor is None: - continue - C.join(n, best_neighbor) - - partitions = defaultdict(list) - # Partition the scaffolds and assign them to one consensus - for s in allseqids: - s = Scaffold(s, cc) - seqid = s.seqid - counts = {} - for mlg, count in s.mlg_counts.items(): - consensus = C[mlg] - mapname = mlg.split("-")[0] - mw = weights[mapname] - if consensus not in counts: - counts[consensus] = 0 - if count < mincount: - continue - counts[consensus] += count * mw - best_consensus, best_value = best_no_ambiguous(counts, seqid) - if best_consensus is None: - continue - partitions[best_consensus].append(seqid) - - # Perform OO within each partition - agpfile = pf + ".chr.agp" - tourfile = pf + ".tour" - sizes = Sizes(fastafile).mapping - fwagp = must_open(agpfile, "w") - fwtour = must_open(tourfile, "w") - solutions = [] - for lgs, scaffolds in natsorted(partitions.items()): - if oseqid and oseqid not in lgs: - continue - tag = "|".join(lgs) - lgs_maps = set(x.split("-")[0] for x in lgs) - if pivot not in lgs_maps: - logger.debug("Skipping %s ...", tag) - continue - logger.debug("Working on %s ...", tag) - s = ScaffoldOO( - lgs, - scaffolds, - cc, - pivot, - weights, - sizes, - function=function, - linkage=linkage, - fwtour=fwtour, - ngen=ngen, - npop=npop, - cpus=cpus, - seed=seed, - ) - - solutions.append(s) - fwtour.close() - - # Renumber chromosome based on decreasing size - if opts.renumber: - chrsizes = {} - conversion = {} - for s in solutions: - chrsizes[s.object] = ( - sum(sizes[x] for (x, o) in s.tour) + (len(s.tour) - 1) * gapsize - ) - for i, (c, size) in enumerate(sorted(chrsizes.items(), key=lambda x: -x[1])): - newc = "chr{0}".format(i + 1) - logger.debug("%s: %d => %d", c, size, newc) - conversion[c] = newc - for s in solutions: - s.object = conversion[s.object] - - # meta-data about the run parameters - command = "# COMMAND: python -m jcvi.assembly.allmaps path {0}".format( - " ".join(oargs) - ) - comment = "Generated by ALLMAPS {} ({})\n{}".format(version, get_today(), command) - AGP.print_header(fwagp, comment=comment) - - for s in natsorted(solutions, key=lambda x: x.object): - order_to_agp(s.object, s.tour, sizes, fwagp, gapsize=gapsize, evidence="map") - fwagp.close() - - logger.debug("AGP file written to `%s`.", agpfile) - logger.debug("Tour file written to `%s`.", tourfile) - - build([inputbed, fastafile]) - - summaryfile = pf + ".summary.txt" - summary([inputbed, fastafile, "--outfile={0}".format(summaryfile)]) - - if not opts.noplot: - plotall( - [ - inputbed, - "-w", - opts.weightsfile, - "--links={0}".format(opts.links), - "--figsize={0}".format(opts.figsize), - ] - ) - - -def write_unplaced_agp(agpfile, scaffolds, unplaced_agp): - agp = AGP(agpfile) - scaffolds_seen = set(x.component_id for x in agp) - sizes = Sizes(scaffolds).mapping - fwagp = must_open(unplaced_agp, "w") - for s in natsorted(sizes.keys()): - if s in scaffolds_seen: - continue - order_to_agp(s, [(s, "?")], sizes, fwagp) - logger.debug("Write unplaced AGP to `%s`", unplaced_agp) - - -def summary(args): - """ - %prog summary input.bed scaffolds.fasta - - Print out summary statistics per map, followed by consensus summary of - scaffold anchoring based on multiple maps. - """ - p = OptionParser(summary.__doc__) - p.set_table(sep="|", align=True) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - inputbed, scaffolds = args - pf = inputbed.rsplit(".", 1)[0] - mapbed = pf + ".bed" - chr_agp = pf + ".chr.agp" - sep = opts.sep - align = opts.align - cc = Map(mapbed) - mapnames = cc.mapnames - s = Sizes(scaffolds) - total, l50, n50 = s.summary - r = {} - maps = [] - - fw = must_open(opts.outfile, "w") - print("*** Summary for each individual map ***", file=fw) - for mapname in mapnames: - markers = [x for x in cc if x.mapname == mapname] - ms = MapSummary(markers, l50, s) - r["Linkage Groups", mapname] = ms.num_lgs - ms.export_table(r, mapname, total) - maps.append(ms) - print(tabulate(r, sep=sep, align=align), file=fw) - - r = {} - agp = AGP(chr_agp) - print("*** Summary for consensus map ***", file=fw) - consensus_scaffolds = set(x.component_id for x in agp if not x.is_gap) - oriented_scaffolds = set( - x.component_id for x in agp if (not x.is_gap) and x.orientation != "?" - ) - unplaced_scaffolds = set(s.mapping.keys()) - consensus_scaffolds - - for mapname, sc in ( - ("Anchored", consensus_scaffolds), - ("Oriented", oriented_scaffolds), - ("Unplaced", unplaced_scaffolds), - ): - markers = [x for x in cc if x.seqid in sc] - ms = MapSummary(markers, l50, s, scaffolds=sc) - ms.export_table(r, mapname, total) - print(tabulate(r, sep=sep, align=align), file=fw) - - -def liftover( - chain_file: str, - in_file: str, - out_file: str, - unmapfile: Optional[str], - cstyle: str = "l", -): - """ - Lifts over a bed file from one assembly to another using a chain file. - """ - mapTree, _, _ = read_chain_file(chain_file) - crossmap_bed_file(mapTree, in_file, out_file, unmapfile=unmapfile, cstyle=cstyle) - - -def build(args): - """ - %prog build input.bed scaffolds.fasta - - Build associated genome FASTA file and CHAIN file that can be used to lift - old coordinates to new coordinates. The CHAIN file will be used to lift the - original marker positions to new positions in the reconstructed genome. The - new positions of the markers will be reported in *.lifted.bed. - """ - p = OptionParser(build.__doc__) - p.add_argument( - "--cleanup", - default=False, - action="store_true", - help="Clean up bulky FASTA files, useful for plotting", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - inputbed, scaffolds = args - pf = inputbed.rsplit(".", 1)[0] - mapbed = pf + ".bed" - chr_agp = pf + ".chr.agp" - chr_fasta = pf + ".chr.fasta" - if need_update((chr_agp, scaffolds), chr_fasta): - agp_build([chr_agp, scaffolds, chr_fasta]) - - unplaced_agp = pf + ".unplaced.agp" - if need_update((chr_agp, scaffolds), unplaced_agp): - write_unplaced_agp(chr_agp, scaffolds, unplaced_agp) - - unplaced_fasta = pf + ".unplaced.fasta" - if need_update((unplaced_agp, scaffolds), unplaced_fasta): - agp_build([unplaced_agp, scaffolds, unplaced_fasta]) - - combined_agp = pf + ".agp" - if need_update((chr_agp, unplaced_agp), combined_agp): - FileMerger((chr_agp, unplaced_agp), combined_agp).merge() - - combined_fasta = pf + ".fasta" - if need_update((chr_fasta, unplaced_fasta), combined_fasta): - FileMerger((chr_fasta, unplaced_fasta), combined_fasta).merge() - - chainfile = pf + ".chain" - if need_update((combined_agp, scaffolds, combined_fasta), chainfile): - fromagp([combined_agp, scaffolds, combined_fasta]) - - liftedbed = mapbed.rsplit(".", 1)[0] + ".lifted.bed" - if need_update((mapbed, chainfile), liftedbed): - logger.debug( - "Lifting markers from positions in `%s` to new positions in `%s`", - mapbed, - liftedbed, - ) - liftover(chainfile, mapbed, liftedbed, unmapfile="unmapped", cstyle="l") - - if opts.cleanup: - cleanup( - chr_fasta, - unplaced_fasta, - combined_fasta, - chainfile, - unplaced_agp, - combined_fasta + ".sizes", - "unmapped", - ) - - sort([liftedbed, "-i"]) # Sort bed in place - - -def add_allmaps_plot_options(p): - p.add_argument( - "-w", "--weightsfile", default="weights.txt", help="Use weights from file" - ) - p.add_argument( - "--distance", - default="cM", - choices=distance_choices, - help="Plot markers based on distance", - ) - p.add_argument( - "--links", default=10, type=int, help="Only plot matchings more than" - ) - p.add_argument( - "--panels", default=False, action="store_true", help="Add panel labels A/B" - ) - - -def plot(args): - """ - %prog plot input.bed seqid - - Plot the matchings between the reconstructed pseudomolecules and the maps. - Two types of visualizations are available in one canvas: - - 1. Parallel axes, and matching markers are shown in connecting lines; - 2. Scatter plot. - """ - from ..graphics.base import ( - plt, - savefig, - normalize_axes, - set2, - panel_labels, - shorten, - ) - from ..graphics.chromosome import Chromosome, GeneticMap, HorizontalChromosome - - p = OptionParser(plot.__doc__) - p.add_argument("--title", help="Title of the plot") - add_allmaps_plot_options(p) - opts, args, iopts = p.set_image_options(args, figsize="10x6") - - if len(args) != 2: - sys.exit(not p.print_help()) - - inputbed, seqid = args - pf = inputbed.rsplit(".", 1)[0] - bedfile = pf + ".lifted.bed" - agpfile = pf + ".agp" - weightsfile = opts.weightsfile - links = opts.links - - function = get_function(opts.distance) - cc = Map(bedfile, function=function) - allseqids = cc.seqids - mapnames = cc.mapnames - weights = Weights(weightsfile, mapnames) - assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids) - - s = Scaffold(seqid, cc) - mlgs = [k for k, v in s.mlg_counts.items() if v >= links] - while not mlgs: - links //= 2 - logger.error("No markers to plot, --links reset to %d", links) - mlgs = [k for k, v in s.mlg_counts.items() if v >= links] - - mlgsizes = {} - for mlg in mlgs: - mm = cc.extract_mlg(mlg) - mlgsize = max(function(x) for x in mm) - mlgsizes[mlg] = mlgsize - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - bbox = dict(boxstyle="round", fc="darkslategray", ec="darkslategray") - if opts.title: - root.text(0.5, 0.95, opts.title, color="w", bbox=bbox, size=16) - ax1 = fig.add_axes([0, 0, 0.5, 1]) - ax2 = fig.add_axes([0.5, 0, 0.5, 1]) - - # Find the layout first - ystart, ystop = 0.9, 0.1 - L = Layout(mlgsizes) - coords = L.coords - - tip = 0.02 - marker_pos = {} - # Palette - colors = dict((mapname, set2[i % len(set2)]) for i, mapname in enumerate(mapnames)) - colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs) - - rhos = {} - # Parallel coordinates - for mlg, (x, y1, y2) in coords.items(): - mm = cc.extract_mlg(mlg) - markers = [(m.accn, function(m)) for m in mm] # exhaustive marker list - xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid] - mx, my = zip(*xy) - rho = spearmanr(mx, my) - rhos[mlg] = rho - flip = rho < 0 - - g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip) - extra = -3 * tip if x < 0.5 else 3 * tip - ha = "right" if x < 0.5 else "left" - mapname = mlg.split("-")[0] - tlg = shorten(mlg.replace("_", ".")) # Latex does not like underscore char - label = "{0} (w={1})".format(tlg, weights[mapname]) - ax1.text( - x + extra, - (y1 + y2) / 2, - label, - color=colors[mlg], - ha=ha, - va="center", - rotation=90, - ) - marker_pos.update(g.marker_pos) - - agp = AGP(agpfile) - agp = [x for x in agp if x.object == seqid] - chrsize = max(x.object_end for x in agp) - - # Pseudomolecules in the center - r = ystart - ystop - ratio = r / chrsize - f = lambda x: (ystart - ratio * x) - patchstart = [f(x.object_beg) for x in agp if not x.is_gap] - Chromosome(ax1, 0.5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2) - - label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0)) - ax1.text(0.5, ystart + tip, label, ha="center") - - scatter_data = defaultdict(list) - # Connecting lines - for b in s.markers: - marker_name = b.accn - if marker_name not in marker_pos: - continue - - cx = 0.5 - cy = f(b.pos) - mx = coords[b.mlg][0] - my = marker_pos[marker_name] - - extra = -tip if mx < cx else tip - extra *= 1.25 # leave boundaries for aesthetic reasons - cx += extra - mx -= extra - ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg]) - scatter_data[b.mlg].append((b.pos, function(b))) - - # Scatter plot, same data as parallel coordinates - xstart, xstop = sorted((ystart, ystop)) - f = lambda x: (xstart + ratio * x) - pp = [x.object_beg for x in agp if not x.is_gap] - patchstart = [f(x) for x in pp] - HorizontalChromosome( - ax2, xstart, xstop, ystop, height=2 * tip, patch=patchstart, lw=2 - ) - draw_gauge(ax2, xstart, chrsize) - - gap = 0.03 - ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values()) - - tlgs = [] - for mlg, mlgsize in sorted(mlgsizes.items()): - height = ratio * mlgsize - ystart -= height - xx = 0.5 + xstart / 2 - width = r / 2 - color = colors[mlg] - ax = fig.add_axes([xx, ystart, width, height]) - ypos = ystart + height / 2 - ystart -= gap - sd = scatter_data[mlg] - xx, yy = zip(*sd) - ax.vlines(pp, 0, 2 * mlgsize, colors="beige") - ax.plot(xx, yy, ".", color=color) - rho = rhos[mlg] - ax.text( - 0.5, - 1 - 0.4 * gap / height, - r"$\rho$={0:.3f}".format(rho), - ha="center", - va="top", - transform=ax.transAxes, - color="gray", - ) - tlg = shorten(mlg.replace("_", ".")) - tlgs.append((tlg, ypos, color)) - ax.set_xlim(0, chrsize) - ax.set_ylim(0, mlgsize) - ax.set_xticks([]) - while height / len(ax.get_yticks()) < 0.03 and len(ax.get_yticks()) >= 2: - ax.set_yticks(ax.get_yticks()[::2]) # Sparsify the ticks - yticklabels = [int(x) for x in ax.get_yticks()] - ax.set_yticks(yticklabels) - ax.set_yticklabels(yticklabels, family="Helvetica") - if rho < 0: - ax.invert_yaxis() - - for i, (tlg, ypos, color) in enumerate(tlgs): - ha = "center" - if len(tlgs) > 4: - ha = "right" if i % 2 else "left" - root.text(0.5, ypos, tlg, color=color, rotation=90, ha=ha, va="center") - - if opts.panels: - labels = ((0.04, 0.96, "A"), (0.48, 0.96, "B")) - panel_labels(root, labels) - - normalize_axes(ax1, ax2, root) - image_name = seqid + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - plt.close(fig) - return image_name - - -def plotall(xargs): - """ - %prog plotall input.bed - - Plot the matchings between the reconstructed pseudomolecules and the maps. - This command will plot each reconstructed object (non-singleton). - """ - p = OptionParser(plotall.__doc__) - add_allmaps_plot_options(p) - opts, args, iopts = p.set_image_options(xargs, figsize="10x6") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (inputbed,) = args - pf = inputbed.rsplit(".", 1)[0] - agpfile = pf + ".chr.agp" - agp = AGP(agpfile) - objects = [ob for ob, lines in agp.iter_object()] - for seqid in natsorted(objects): - plot(xargs + [seqid]) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/allpaths.py b/jcvi/assembly/allpaths.py deleted file mode 100644 index e95c3e18..00000000 --- a/jcvi/assembly/allpaths.py +++ /dev/null @@ -1,530 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Subroutines to aid ALLPATHS-LG assembly. -""" -import os.path as op -import sys - -from struct import pack, unpack -from itertools import islice - -import numpy as np - -from ..formats.base import BaseFile -from ..apps.grid import Jobs -from ..apps.base import ( - ActionDispatcher, - OptionParser, - cleanup, - glob, - logger, - need_update, - sh, -) - -from .base import FastqNamings, Library - - -class PairsFile(BaseFile): - def __init__(self, filename): - super().__init__(filename) - - fp = open(filename, "rb") - (binwrite,) = unpack("8s", fp.read(8)) - assert binwrite == "BINWRITE" - - (self.version,) = unpack("i", fp.read(4)) - assert self.version == 1 - - (self.nreads,) = unpack("Q", fp.read(8)) - (self.nlibs,) = unpack("Q", fp.read(8)) - self.libstats = [] - self.libnames = [] - - for i in range(self.nlibs): - self.libstats.append(unpack("ii", fp.read(8))) - - (nlibs,) = unpack("Q", fp.read(8)) - assert nlibs == self.nlibs - - for i in range(self.nlibs): - (slen,) = unpack("i", fp.read(4)) - libname, nul = unpack("{0}sc".format(slen - 1), fp.read(slen)) - self.libnames.append(libname) - - (npairs,) = unpack("Q", fp.read(8)) - self.r1 = np.fromfile(fp, dtype=np.int64, count=npairs) - - (npairs2,) = unpack("Q", fp.read(8)) - assert npairs2 == npairs - self.r2 = np.fromfile(fp, dtype=np.int64, count=npairs) - - (npairsl,) = unpack("Q", fp.read(8)) - assert npairsl == npairs - self.libs = np.fromfile(fp, dtype=np.int8, count=npairs) - - assert len(fp.read()) == 0 # EOF - self.npairs = npairs - - @property - def header(self): - from jcvi.utils.cbook import percentage - - s = "Number of paired reads: {0}\n".format( - percentage(self.npairs * 2, self.nreads) - ) - s += "Libraries: {0}\n".format(", ".join(self.libnames)) - s += "LibraryStats: {0}\n".format(self.libstats) - s += "r1: {0}\n".format(self.r1) - s += "r2: {0}\n".format(self.r2) - s += "libs: {0}".format(self.libs) - return s - - def fixLibraryStats(self, sep, sd): - libstat = (sep, sd) - logger.debug("New library stat: {0}".format(libstat)) - self.libstats = [libstat] * self.nlibs - - def write(self, filename): - fw = open(filename, "wb") - fw.write(pack("8s", "BINWRITE")) - fw.write(pack("i", self.version)) - fw.write(pack("Q", self.nreads)) - fw.write(pack("Q", self.nlibs)) - for a, b in self.libstats: - fw.write(pack("ii", a, b)) - fw.write(pack("Q", self.nlibs)) - for name in self.libnames: - slen = len(name) + 1 - fw.write(pack("i", slen)) - fw.write(pack("{0}s".format(slen), name)) - fw.write(pack("Q", self.npairs)) - self.r1.tofile(fw) - fw.write(pack("Q", self.npairs)) - self.r2.tofile(fw) - fw.write(pack("Q", self.npairs)) - self.libs.tofile(fw) - logger.debug("New pairs file written to `{0}`.".format(filename)) - - -def main(): - - actions = ( - ("prepare", "prepare ALLPATHS csv files and run script"), - ("log", "prepare a log of created files"), - ("pairs", "parse ALLPATHS pairs file"), - ("dump", "export ALLPATHS fastb file to fastq"), - ("fixpairs", "fix pairs library stats"), - ("fill", "run FillFragments on `frag_reads_corr.fastb`"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def dump(args): - """ - %prog dump fastbfile - - Export ALLPATHS fastb file to fastq file. Use --dir to indicate a previously - run allpaths folder. - """ - p = OptionParser(dump.__doc__) - p.add_argument("--dir", help="Working directory") - p.add_argument( - "--nosim", - default=False, - action="store_true", - help="Do not simulate qual to 50", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastbfile,) = args - d = opts.dir - if d: - from jcvi.assembly.preprocess import export_fastq - - rc = "jump" in fastbfile - export_fastq(d, fastbfile, rc=rc) - return - - sim = not opts.nosim - pf = "j" if "jump" in fastbfile else "f" - - statsfile = "{0}.lib_stats".format(pf) - cleanup(statsfile) - - cmd = "SplitReadsByLibrary READS_IN={0}".format(fastbfile) - cmd += " READS_OUT={0} QUALS=True".format(pf) - sh(cmd) - - libs = [] - fp = open(statsfile) - next(fp) - next(fp) # skip two rows - for row in fp: - if row.strip() == "": - continue - - libname = row.split()[0] - if libname == "Unpaired": - continue - - libs.append(libname) - - logger.debug("Found libraries: {0}".format(",".join(libs))) - - cmds = [] - for libname in libs: - cmd = "FastbQualbToFastq" - cmd += " HEAD_IN={0}.{1}.AB HEAD_OUT={1}".format(pf, libname) - cmd += " PAIRED=True PHRED_OFFSET=33" - if sim: - cmd += " SIMULATE_QUALS=True" - if pf == "j": - cmd += " FLIP=True" - - cmds.append((cmd,)) - - m = Jobs(target=sh, args=cmds) - m.run() - - for libname in libs: - cmd = "mv {0}.A.fastq {0}.1.fastq".format(libname) - sh(cmd) - cmd = "mv {0}.B.fastq {0}.2.fastq".format(libname) - sh(cmd) - - -def fixpairs(args): - """ - %prog fixpairs pairsfile sep sd - - Fix pairs library stats. This is sometime useful to modify library stats, - for example, the separation between paired reads after importing the data. - """ - p = OptionParser(fixpairs.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - pairsfile, sep, sd = args - newpairsfile = pairsfile.rsplit(".", 1)[0] + ".new.pairs" - sep = int(sep) - sd = int(sd) - - p = PairsFile(pairsfile) - p.fixLibraryStats(sep, sd) - p.write(newpairsfile) - - -def fill(args): - """ - %prog fill frag_reads_corr.fastb - - Run FillFragments on `frag_reads_corr.fastb`. - """ - p = OptionParser(fill.__doc__) - p.add_argument( - "--stretch", - default=3, - type=int, - help="MAX_STRETCH to pass to FillFragments", - ) - p.set_cpus() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastb,) = args - assert fastb == "frag_reads_corr.fastb" - - pcfile = "frag_reads_corr.k28.pc.info" - nthreads = " NUM_THREADS={0}".format(opts.cpus) - maxstretch = " MAX_STRETCH={0}".format(opts.stretch) - if need_update(fastb, pcfile): - cmd = "PathReads READS_IN=frag_reads_corr" - cmd += nthreads - sh(cmd) - - filledfastb = "filled_reads.fastb" - if need_update(pcfile, filledfastb): - cmd = "FillFragments PAIRS_OUT=frag_reads_corr_cpd" - cmd += " PRECORRECT_LIBSTATS=True" - cmd += maxstretch - cmd += nthreads - sh(cmd) - - filledfasta = "filled_reads.fasta" - if need_update(filledfastb, filledfasta): - cmd = "Fastb2Fasta IN=filled_reads.fastb OUT=filled_reads.fasta" - sh(cmd) - - -def extract_pairs(fastqfile, p1fw, p2fw, fragsfw, p, suffix=False): - """ - Take fastqfile and array of pair ID, extract adjacent pairs to outfile. - Perform check on numbers when done. p1fw, p2fw is a list of file handles, - each for one end. p is a Pairs instance. - """ - fp = open(fastqfile) - current_id = 0 - npairs = nfrags = 0 - for x, lib in zip(p.r1, p.libs): - while current_id != x: - fragsfw.writelines(islice(fp, 4)) # Exhaust the iterator - current_id += 1 - nfrags += 1 - a = list(islice(fp, 4)) - b = list(islice(fp, 4)) - if suffix: - name = a[0].rstrip() - a[0] = name + "/1\n" - b[0] = name + "/2\n" - else: - b[0] = a[0] # Keep same read ID for pairs - - p1fw[lib].writelines(a) - p2fw[lib].writelines(b) - current_id += 2 - npairs += 2 - - # Write the remaining single reads - while True: - contents = list(islice(fp, 4)) - if not contents: - break - fragsfw.writelines(contents) - nfrags += 1 - - logger.debug( - "A total of {0} paired reads written to `{1}`.".format( - npairs, ",".join(x.name for x in p1fw + p2fw) - ) - ) - logger.debug( - "A total of {0} single reads written to `{1}`.".format(nfrags, fragsfw.name) - ) - - # Validate the numbers - expected_pairs = 2 * p.npairs - expected_frags = p.nreads - 2 * p.npairs - assert npairs == expected_pairs, "Expect {0} paired reads, got {1} instead".format( - expected_pairs, npairs - ) - assert nfrags == expected_frags, "Expect {0} single reads, got {1} instead".format( - expected_frags, nfrags - ) - - -def pairs(args): - """ - %prog pairs pairsfile - - Parse ALLPATHS pairs file, and write pairs IDs and single read IDs in - respective ids files: e.g. `lib1.pairs.fastq`, `lib2.pairs.fastq`, - and single `frags.fastq` (with single reads from lib1/2). - """ - from jcvi.assembly.preprocess import run_FastbAndQualb2Fastq - - p = OptionParser(pairs.__doc__) - p.add_argument( - "--header", - default=False, - action="store_true", - help="Print header only", - ) - p.add_argument( - "--suffix", - default=False, - action="store_true", - help="Add suffix /1, /2 to read names", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - pairsfile, fastqfile = args - pf = op.basename(fastqfile).split(".")[0] - p = PairsFile(pairsfile) - print(p.header, file=sys.stderr) - - if opts.header: - return - - if fastqfile.endswith(".fastb"): - fastbfile = fastqfile - fastqfile = fastbfile.replace(".fastb", ".fastq") - run_FastbAndQualb2Fastq(infile=fastbfile, outfile=fastqfile) - - p1file = "{0}.1.corr.fastq" - p2file = "{0}.2.corr.fastq" - fragsfile = "{0}.corr.fastq" - p1fw = [open(p1file.format(x), "w") for x in p.libnames] - p2fw = [open(p2file.format(x), "w") for x in p.libnames] - fragsfw = open(fragsfile.format(pf), "w") - - extract_pairs(fastqfile, p1fw, p2fw, fragsfw, p, suffix=opts.suffix) - - -ALLPATHSRUN = r""" -ulimit -s 100000 - -if [ -f frag_reads_orig.fastb ] -then - echo "'frag_reads_orig.fastb' exists. Skip loading reads." -else - mkdir -p $PWD/read_cache - echo "Load reads ..." - CacheLibs.pl CACHE_DIR=$PWD/read_cache \ - ACTION=Add IN_LIBS_CSV=in_libs.csv - if [ -f in_groups_33.csv ] - then - CacheGroups.pl CACHE_DIR=$PWD/read_cache \ - ACTION=Add IN_GROUP_CSV=in_groups_33.csv PHRED_64=0 HOSTS='{1}' - fi - if [ -f in_groups_64.csv ] - then - CacheGroups.pl CACHE_DIR=$PWD/read_cache \ - ACTION=Add IN_GROUP_CSV=in_groups_64.csv PHRED_64=1 HOSTS='{1}' - fi - PrepareAllPathsInputs.pl DATA_DIR=$PWD PLOIDY={0} HOSTS='{1}' -fi - -RunAllPathsLG PRE=. REFERENCE_NAME=. OVERWRITE=True HAPLOIDIFY=False \ - DATA_SUBDIR=. RUN=allpaths SUBDIR=run THREADS={1} MIN_CONTIG=200 \ - {2} | tee allpaths.log""" - - -def prepare(args): - """ - %prog prepare "B. oleracea" *.fastq - - Scan input fastq files (see below) and create `in_groups.csv` and - `in_libs.csv`. The species name does not really matter. - """ - from jcvi.utils.table import write_csv - from jcvi.formats.base import write_file - from jcvi.formats.fastq import guessoffset, readlen - - p = OptionParser(prepare.__doc__ + FastqNamings) - p.add_argument( - "--corr", - default=False, - action="store_true", - help="Extra parameters for corrected data", - ) - p.add_argument( - "--norun", - default=False, - action="store_true", - help="Don't write `run.sh` script", - ) - p.add_argument("--ploidy", default="2", choices=("1", "2"), help="Ploidy") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - organism_name = args[0] - project_name = "".join(x[0] for x in organism_name.split()).upper() - fnames = sorted(glob("*.fastq*") if len(args) == 1 else args[1:]) - for x in fnames: - assert op.exists(x), "File `{0}` not found.".format(x) - - groupheader = "group_name library_name file_name".split() - libheader = ( - "library_name project_name organism_name type paired " - "frag_size frag_stddev insert_size insert_stddev read_orientation " - "genomic_start genomic_end".split() - ) - groups_33 = [] - groups_64 = [] - libs = [] - for file_name in fnames: - offset = guessoffset([file_name]) - group_name = op.basename(file_name).split(".")[0] - library_name = "-".join(group_name.split("-")[:2]) - - # Handle paired files and convert to wildcard - if ".1." in file_name: - file_name = file_name.replace(".1.", ".?.") - elif ".2." in file_name: - continue - - groupscontents = groups_64 if offset == 64 else groups_33 - groupscontents.append((group_name, library_name, file_name)) - if library_name not in libs: - libs.append(library_name) - - libcontents = [] - for library_name in libs: - L = Library(library_name) - size = L.size - stddev = L.stddev - type = L.type - paired = L.paired - read_orientation = L.read_orientation - - size = size or "" - stddev = stddev or "" - frag_size = size if type == "fragment" else "" - frag_stddev = stddev if type == "fragment" else "" - insert_size = size if type != "fragment" else "" - insert_stddev = stddev if type != "fragment" else "" - genomic_start, genomic_end = "", "" - libcontents.append( - ( - library_name, - project_name, - organism_name, - type, - paired, - frag_size, - frag_stddev, - insert_size, - insert_stddev, - read_orientation, - genomic_start, - genomic_end, - ) - ) - - for groups, csvfile in ( - (groups_33, "in_groups_33.csv"), - (groups_64, "in_groups_64.csv"), - (groups_33 + groups_64, "in_groups.csv"), - ): - if not groups: - continue - write_csv(groupheader, groups, filename=csvfile, tee=True) - logger.debug("`{0}` created (# of groups = {1}).".format(csvfile, len(groups))) - - write_csv(libheader, libcontents, filename="in_libs.csv", tee=True) - logger.debug("`in_libs.csv` created (# of libs = {0}).".format(len(libcontents))) - - runfile = "run.sh" - - # ALLPATHS stalls on reads over 250bp - max_rd_len = max(readlen([f]) for f in fnames) - extra = "CLOSE_UNIPATH_GAPS=False " if max_rd_len > 200 else "" - if opts.corr: - extra += "FE_NUM_CYCLES=1 EC_K=28 FE_QUAL_CEIL_RADIUS=0" - extra += " REMOVE_DODGY_READS_FRAG=False FE_MAX_KMER_FREQ_TO_MARK=1" - - if not opts.norun: - contents = ALLPATHSRUN.format(opts.ploidy, opts.cpus, extra) - write_file(runfile, contents) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/automaton.py b/jcvi/assembly/automaton.py deleted file mode 100644 index 08ca0153..00000000 --- a/jcvi/assembly/automaton.py +++ /dev/null @@ -1,482 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Automate genome assembly by iterating assembly on a set of files, individually. -""" -import os -import os.path as op -import sys - -from more_itertools import grouper - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - get_abs_path, - glob, - iglob, - logger, - mkdir, - need_update, - sh, -) -from ..formats.base import LineFile, write_file -from ..formats.fastq import first, pairspf - - -class Meta(object): - def __init__(self, fastq, guess=True): - # Note the guesswork is largely based on JIRA LIMS naming convention - self.fastq = fastq.strip() - self.suffix = op.splitext(fastq)[-1] - if ".1." in fastq or ".2." in fastq: - paired = ".1" if ".1." in fastq else ".2" - elif "_R1_" in fastq or "_R2_" in fastq: - paired = ".1" if "_R1_" in fastq else ".2" - else: - paired = "" - self.paired = paired - if guess: - self.guess() - - def __str__(self): - return "\t".join((self.genome, self.tag, self.fastq)) - - @property - def link(self): - linkname = "{0}{1}{2}".format(self.tag, self.paired, self.suffix) - return op.join(self.genome, linkname) - - def make_link(self, firstN=0): - mkdir(self.genome) - if firstN > 0: - first([str(firstN), self.fastq, "--outfile={0}".format(self.link)]) - return - - if op.islink(self.link): - os.unlink(self.link) - os.symlink(get_abs_path(self.fastq), self.link) - - def guess(self): - # Try to guess library info based on file name - # SUBAC47-MP-IL73-1_CGGAAT_L001_R1_filtered.fastq - basename = op.basename(self.fastq) - baseparts = basename.split("-") - self.genome = baseparts[0] - self.tag = baseparts[1] - - if self.genome.endswith("BP"): - self.genome, bp = self.genome[:-5], self.genome[-5:-2] - self.tag = "-".join((self.tag, bp)) # 500BP - - -class MetaFile(LineFile): - def __init__(self, filename): - super().__init__(filename) - fp = open(filename) - for row in fp: - genome, tag, fastq = row.split() - m = Meta(fastq, guess=False) - m.genome, m.tag = genome, tag - self.append(m) - - self.sort(key=lambda x: (x.genome, x.tag, x.fastq)) - - -def main(): - - actions = ( - ("prepare", "parse list of FASTQ files and prepare input"), - ("pairs", "estimate insert sizes for input files"), - ("contamination", "remove contaminated reads"), - ("allpaths", "run automated ALLPATHS"), - ("spades", "run automated SPADES assembly"), - ("allpathsX", "run automated ALLPATHS on list of files"), - ("soapX", "run automated SOAP on list of files"), - ("correctX", "run automated ALLPATHS correction on list of files"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def spades(args): - """ - %prog spades folder - - Run automated SPADES. - """ - from jcvi.formats.fastq import readlen - - p = OptionParser(spades.__doc__) - opts, args = p.parse_args(args) - - if len(args) == 0: - sys.exit(not p.print_help()) - - (folder,) = args - for p, pf in iter_project(folder): - rl = readlen([p[0], "--silent"]) - - # - kmers = None - if rl >= 150: - kmers = "21,33,55,77" - elif rl >= 250: - kmers = "21,33,55,77,99,127" - - cmd = "spades.py" - if kmers: - cmd += " -k {0}".format(kmers) - cmd += " --careful" - cmd += " --pe1-1 {0} --pe1-2 {1}".format(*p) - cmd += " -o {0}_spades".format(pf) - print(cmd) - - -def contamination(args): - """ - %prog contamination folder Ecoli.fasta - - Remove contaminated reads. The FASTQ files in the folder will automatically - pair and filtered against Ecoli.fasta to remove contaminants using BOWTIE2. - """ - from jcvi.apps.bowtie import align - - p = OptionParser(contamination.__doc__) - p.add_argument( - "--mapped", - default=False, - action="store_true", - help="Retain contaminated reads instead", - ) - p.set_cutoff(cutoff=800) - p.set_mateorientation(mateorientation="+-") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - folder, ecoli = args - ecoli = get_abs_path(ecoli) - tag = "--mapped" if opts.mapped else "--unmapped" - for p, pf in iter_project(folder): - align_opts = [ecoli] + p + [tag] - align_opts += ["--cutoff={0}".format(opts.cutoff), "--null"] - if opts.mateorientation: - align_opts += ["--mateorientation={0}".format(opts.mateorientation)] - align(align_opts) - - -def pairs(args): - """ - %prog pairs folder reference.fasta - - Estimate insert size distribution. Compatible with a variety of aligners, - including BOWTIE and BWA. - """ - p = OptionParser(pairs.__doc__) - p.set_firstN() - p.set_mates() - p.set_aligner() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - cwd = os.getcwd() - aligner = opts.aligner - work = "-".join(("pairs", aligner)) - mkdir(work) - - from jcvi.formats.sam import pairs as ps - - if aligner == "bowtie": - from jcvi.apps.bowtie import align - elif aligner == "bwa": - from jcvi.apps.bwa import align - - folder, ref = args - ref = get_abs_path(ref) - messages = [] - for p, prefix in iter_project(folder): - samplefq = [] - for i in range(2): - samplefq.append(op.join(work, prefix + "_{0}.first.fastq".format(i + 1))) - first([str(opts.firstN)] + [p[i]] + ["-o", samplefq[i]]) - - os.chdir(work) - align_args = [ref] + [op.basename(fq) for fq in samplefq] - outfile, logfile = align(align_args) - bedfile, stats = ps([outfile, "--rclip={0}".format(opts.rclip)]) - os.chdir(cwd) - - median = stats.median - tag = "MP" if median > 1000 else "PE" - median = str(median) - pf, sf = median[:2], median[2:] - if sf and int(sf) != 0: - pf = str(int(pf) + 1) # Get the first two effective digits - lib = "{0}-{1}".format(tag, pf + "0" * len(sf)) - for i, xp in enumerate(p): - suffix = "fastq.gz" if xp.endswith(".gz") else "fastq" - link = "{0}-{1}.{2}.{3}".format(lib, prefix.replace("-", ""), i + 1, suffix) - m = "\t".join(str(x) for x in (xp, link)) - messages.append(m) - - messages = "\n".join(messages) - write_file("f.meta", messages, tee=True) - - -def allpaths(args): - """ - %prog allpaths folder1 folder2 ... - - Run automated ALLPATHS on list of dirs. - """ - p = OptionParser(allpaths.__doc__) - p.add_argument("--ploidy", default="1", choices=("1", "2"), help="Ploidy") - opts, args = p.parse_args(args) - - if len(args) == 0: - sys.exit(not p.print_help()) - - folders = args - for pf in folders: - if not op.isdir(pf): - continue - assemble_dir( - pf, - target=["final.contigs.fasta", "final.assembly.fasta"], - ploidy=opts.ploidy, - ) - - -def prepare(args): - """ - %prog prepare jira.txt - - Parse JIRA report and prepare input. Look for all FASTQ files in the report - and get the prefix. Assign fastq to a folder and a new file name indicating - the library type (e.g. PE-500, MP-5000, etc.). - - Note that JIRA report can also be a list of FASTQ files. - """ - p = OptionParser(prepare.__doc__) - p.add_argument( - "--first", - default=0, - type=int, - help="Use only first N reads", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (jfile,) = args - metafile = jfile + ".meta" - - if need_update(jfile, metafile): - fp = open(jfile) - fastqfiles = [x.strip() for x in fp if ".fastq" in x] - metas = [Meta(x) for x in fastqfiles] - - fw = open(metafile, "w") - print("\n".join(str(x) for x in metas), file=fw) - print( - "Now modify `{0}`, and restart this script.".format(metafile), - file=sys.stderr, - ) - print("Each line is : genome library fastqfile", file=sys.stderr) - fw.close() - return - - mf = MetaFile(metafile) - for m in mf: - m.make_link(firstN=opts.first) - - -def slink(p, pf, tag, extra=None): - - mkdir(pf, overwrite=True) - cwd = os.getcwd() - os.chdir(pf) - - # Create sym-links for the input files - i = 1 - for f in sorted(p): - gz = ".gz" if f.endswith(".gz") else "" - if "PE-0" in f: - sh("ln -sf ../{0} PE-0.fastq{1}".format(f, gz)) - continue - for t in tag: - sh("ln -sf ../{0} {1}.{2}.fastq{3}".format(f, t, i, gz)) - i += 1 - - if extra: - for e in extra: - sh("ln -sf {0}".format(e)) - - os.chdir(cwd) - - -def assemble_pairs(p, pf, tag, target=["final.contigs.fasta"]): - """ - Take one pair of reads and assemble to contigs.fasta. - """ - slink(p, pf, tag) - assemble_dir(pf, target) - - -def assemble_dir(pf, target, ploidy="1"): - from jcvi.assembly.allpaths import prepare - - logger.debug("Work on {0}".format(pf)) - asm = [x.replace("final", pf) for x in target] - if not need_update(pf, asm, warn=True): - return - - cwd = os.getcwd() - os.chdir(pf) - prepare( - [pf] - + sorted(glob("*.fastq") + glob("*.fastq.gz")) - + ["--ploidy={0}".format(ploidy)] - ) - sh("./run.sh") - - for a, t in zip(asm, target): - sh("cp allpaths/ASSEMBLIES/run/{0} ../{1}".format(t, a)) - - logger.debug("Assembly finished: {0}".format(asm)) - os.chdir(cwd) - - -def correct_pairs(p, pf, tag): - """ - Take one pair of reads and correct to generate *.corr.fastq. - """ - from jcvi.assembly.preprocess import correct as cr - - logger.debug("Work on {0} ({1})".format(pf, ",".join(p))) - itag = tag[0] - cm = ".".join((pf, itag)) - targets = (cm + ".1.corr.fastq", cm + ".2.corr.fastq", pf + ".PE-0.corr.fastq") - if not need_update(p, targets, warn=True): - return - - slink(p, pf, tag) - - cwd = os.getcwd() - os.chdir(pf) - cr(sorted(glob("*.fastq") + glob("*.fastq.gz")) + ["--nofragsdedup"]) - sh("mv {0}.1.corr.fastq ../{1}".format(itag, targets[0])) - sh("mv {0}.2.corr.fastq ../{1}".format(itag, targets[1])) - sh("mv frag_reads_corr.corr.fastq ../{0}".format(targets[2])) - - logger.debug("Correction finished: {0}".format(targets)) - os.chdir(cwd) - - -def soap_trios(p, pf, tag, extra): - """ - Take one pair of reads and 'widow' reads after correction and run SOAP. - """ - from jcvi.assembly.soap import prepare - - logger.debug("Work on {0} ({1})".format(pf, ",".join(p))) - asm = "{0}.closed.scafSeq".format(pf) - if not need_update(p, asm, warn=True): - return - - slink(p, pf, tag, extra) - - cwd = os.getcwd() - os.chdir(pf) - prepare( - sorted(glob("*.fastq") + glob("*.fastq.gz")) - + ["--assemble_1st_rank_only", "-K 31"] - ) - sh("./run.sh") - sh("cp asm31.closed.scafSeq ../{0}".format(asm)) - - logger.debug("Assembly finished: {0}".format(asm)) - os.chdir(cwd) - - -def iter_project( - folder, pattern="*.fq,*.fq.gz,*.fastq,*.fastq.gz", n=2, commonprefix=True -): - # Check for paired reads and extract project id - filelist = [x for x in iglob(folder, pattern)] - for p in grouper(filelist, n): - if len(p) != n or None in p: - continue - - pp = [op.basename(x) for x in p] - pf = pairspf(pp, commonprefix=commonprefix) - yield sorted(p), pf - - -def soapX(args): - """ - %prog soapX folder tag [*.fastq] - - Run SOAP on a folder of paired reads and apply tag before assembly. - Optional *.fastq in the argument list will be symlinked in each folder and - co-assembled. - """ - p = OptionParser(soapX.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - folder, tag = args[:2] - extra = args[2:] - extra = [get_abs_path(x) for x in extra] - tag = tag.split(",") - for p, pf in iter_project(folder, n=3): - soap_trios(p, pf, tag, extra) - - -def correctX(args): - """ - %prog correctX folder tag - - Run ALLPATHS correction on a folder of paired reads and apply tag. - """ - p = OptionParser(correctX.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - folder, tag = args - tag = tag.split(",") - for p, pf in iter_project(folder): - correct_pairs(p, pf, tag) - - -def allpathsX(args): - """ - %prog allpathsX folder tag - - Run assembly on a folder of paired reads and apply tag (PE-200, PE-500). - Allow multiple tags separated by comma, e.g. PE-350,TT-1050 - """ - p = OptionParser(allpathsX.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - folder, tag = args - tag = tag.split(",") - for p, pf in iter_project(folder): - assemble_pairs(p, pf, tag) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/base.py b/jcvi/assembly/base.py deleted file mode 100644 index 77ff98b6..00000000 --- a/jcvi/assembly/base.py +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Base utilties for genome assembly related calculations and manipulations -""" -import os.path as op -import sys - -from math import log -from bisect import bisect - -import numpy as np - -from ..formats.base import must_open -from ..formats.fasta import Fasta -from ..apps.base import ActionDispatcher, OptionParser, glob - -ln2 = log(2) - - -types = {"PE": "fragment", "MP": "jumping", "TT": "jumping", "LL": "long"} -header = ("Length", "L50", "N50", "Min", "Max", "N") - -FastqNamings = """ - The naming schemes for the fastq files are. - - PE-376.fastq (paired end) - MP-3000.fastq (mate pairs) - TT-3000.fastq (mate pairs, but from 454 data, so expected to be +-) - LL-0.fastq (long reads) - - Paired reads in different files must be in the form of (note the .1. and .2.): - PE-376.1.fastq and PE-376.2.fastq to be considered - - The reads are assumed to be NOT paired if the number after the PE-, MP-, - etc. is 0. Otherwise, they are considered paired at the given distance. -""" - - -class Library(object): - """ - The sequence files define a library. - """ - - def __init__(self, library_name): - - self.library_name = library_name - if "-" in library_name: - pf, size = library_name.split("-", 1) - assert pf in types, "Library prefix must be one of {0}".format(types.keys()) - else: - pf, size = "PE", 0 - - self.size = size = int(size) - self.type = types[pf] - self.stddev = size / 6 if self.type == "jumping" else size / 9 - self.paired = 0 if size == 0 else 1 - self.read_orientation = "outward" if pf == "MP" else "inward" - self.reverse_seq = 1 if pf == "MP" else 0 - self.asm_flags = 3 if pf != "MP" else 2 - if not self.paired: - self.read_orientation = "" - - def get_lib_seq(self, wildcard, prefix, readlen, rank): - # lib_seq wildcard prefix insAvg insSdev avgReadLen hasInnieArtifact - # isRevComped useForContigging scaffRound useForGapClosing 5pWiggleRoom - # 3pWiggleRoom (used by MERACULOUS) - useForContigging = useForGapClosing = int(self.asm_flags == 3) - return ( - "lib_seq", - wildcard, - prefix, - self.size, - self.stddev, - readlen, - int(self.type == "jumping"), - self.reverse_seq, - useForContigging, - rank, - useForGapClosing, - 0, - 0, - ) - - -def get_libs(args): - from itertools import groupby - - fnames = args or glob("*.fastq*") - fnames = sorted(fnames) - for x in fnames: - assert op.exists(x), "File `{0}` not found.".format(x) - - library_name = lambda x: "-".join(op.basename(x).split(".")[0].split("-")[:2]) - libs = [(Library(x), sorted(fs)) for x, fs in groupby(fnames, key=library_name)] - - libs.sort(key=lambda x: x[0].size) - return libs - - -def calculate_A50(ctgsizes, cutoff=0, percent=50): - """ - Given an array of contig sizes, produce A50, N50, and L50 values - """ - - ctgsizes = np.array(ctgsizes, dtype=int) - ctgsizes = np.sort(ctgsizes)[::-1] - ctgsizes = ctgsizes[ctgsizes >= cutoff] - - a50 = np.cumsum(ctgsizes) - - total = np.sum(ctgsizes) - idx = bisect(a50, total * percent / 100.0) - l50 = ctgsizes[idx] - n50 = idx + 1 - - return a50, l50, n50 - - -""" -Discriminator A-statistics: - -If n reads are uniform sample of the genome of length G, -we expect k = n * delta / G to start in a region of length delta - -Use poisson distribution: -A(delta, k) = ln(prob(1-copy) / prob(2-copies)) = n * delta / G - k * ln2 -""" - - -def Astat(delta, k, G, n): - """ - delta: contig size - k: reads mapped in contig - G: total genome size - n: total reads mapped to genome - """ - return n * delta * 1.0 / G - k * ln2 - - -def main(): - - actions = (("n50", "Given FASTA or a list of contig sizes, calculate N50"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def n50(args): - """ - %prog n50 filename - - Given a file with a list of numbers denoting contig lengths, calculate N50. - Input file can be both FASTA or a list of sizes. - """ - from jcvi.graphics.histogram import loghistogram - - p = OptionParser(n50.__doc__) - p.add_argument( - "--print0", - default=False, - action="store_true", - help="Print size and L50 to stdout", - ) - - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - ctgsizes = [] - - # Guess file format - probe = open(args[0]).readline()[0] - isFasta = probe == ">" - if isFasta: - for filename in args: - f = Fasta(filename) - ctgsizes += list(b for a, b in f.itersizes()) - - else: - for row in must_open(args): - try: - ctgsize = int(float(row.split()[-1])) - except ValueError: - continue - ctgsizes.append(ctgsize) - - a50, l50, nn50 = calculate_A50(ctgsizes) - sumsize = sum(ctgsizes) - minsize = min(ctgsizes) - maxsize = max(ctgsizes) - n = len(ctgsizes) - print(", ".join(args), file=sys.stderr) - - summary = (sumsize, l50, nn50, minsize, maxsize, n) - print( - " ".join("{0}={1}".format(a, b) for a, b in zip(header, summary)), - file=sys.stderr, - ) - loghistogram(ctgsizes) - - if opts.print0: - print("\t".join(str(x) for x in (",".join(args), sumsize, l50))) - - return zip(header, summary) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/chic.c b/jcvi/assembly/chic.c deleted file mode 100644 index 96c3e78f..00000000 --- a/jcvi/assembly/chic.c +++ /dev/null @@ -1,14222 +0,0 @@ -/* Generated by Cython 3.0.11 */ - -/* BEGIN: Cython Metadata -{ - "distutils": { - "depends": [ - "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include/numpy/arrayobject.h", - "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include/numpy/arrayscalars.h", - "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include/numpy/ndarrayobject.h", - "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include/numpy/ndarraytypes.h", - "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include/numpy/ufuncobject.h" - ], - "extra_compile_args": [ - "-O3" - ], - "include_dirs": [ - "/private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/_core/include" - ], - "name": "jcvi.assembly.chic", - "sources": [ - "src/jcvi/assembly/chic.pyx" - ] - }, - "module_name": "jcvi.assembly.chic" -} -END: Cython Metadata */ - -#ifndef PY_SSIZE_T_CLEAN -#define PY_SSIZE_T_CLEAN -#endif /* PY_SSIZE_T_CLEAN */ -#if defined(CYTHON_LIMITED_API) && 0 - #ifndef Py_LIMITED_API - #if CYTHON_LIMITED_API+0 > 0x03030000 - #define Py_LIMITED_API CYTHON_LIMITED_API - #else - #define Py_LIMITED_API 0x03030000 - #endif - #endif -#endif - -#include "Python.h" - - #if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyFloat_FromString(obj) PyFloat_FromString(obj) - #else - #define __Pyx_PyFloat_FromString(obj) PyFloat_FromString(obj, NULL) - #endif - - - #if PY_MAJOR_VERSION <= 2 - #define PyDict_GetItemWithError _PyDict_GetItemWithError - #endif - - - #if (PY_VERSION_HEX < 0x030700b1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030600)) && !defined(PyContextVar_Get) - #define PyContextVar_Get(var, d, v) ((d) ? ((void)(var), Py_INCREF(d), (v)[0] = (d), 0) : ((v)[0] = NULL, 0) ) - #endif - -#ifndef Py_PYTHON_H - #error Python headers needed to compile C extensions, please install development version of Python. -#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) - #error Cython requires Python 2.7+ or Python 3.3+. -#else -#if defined(CYTHON_LIMITED_API) && CYTHON_LIMITED_API -#define __PYX_EXTRA_ABI_MODULE_NAME "limited" -#else -#define __PYX_EXTRA_ABI_MODULE_NAME "" -#endif -#define CYTHON_ABI "3_0_11" __PYX_EXTRA_ABI_MODULE_NAME -#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI -#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "." -#define CYTHON_HEX_VERSION 0x03000BF0 -#define CYTHON_FUTURE_DIVISION 1 -#include -#ifndef offsetof - #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) -#endif -#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS) - #ifndef __stdcall - #define __stdcall - #endif - #ifndef __cdecl - #define __cdecl - #endif - #ifndef __fastcall - #define __fastcall - #endif -#endif -#ifndef DL_IMPORT - #define DL_IMPORT(t) t -#endif -#ifndef DL_EXPORT - #define DL_EXPORT(t) t -#endif -#define __PYX_COMMA , -#ifndef HAVE_LONG_LONG - #define HAVE_LONG_LONG -#endif -#ifndef PY_LONG_LONG - #define PY_LONG_LONG LONG_LONG -#endif -#ifndef Py_HUGE_VAL - #define Py_HUGE_VAL HUGE_VAL -#endif -#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX -#if defined(GRAALVM_PYTHON) - /* For very preliminary testing purposes. Most variables are set the same as PyPy. - The existence of this section does not imply that anything works or is even tested */ - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 1 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #undef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #undef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 1 - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) - #endif - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif - #undef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 -#elif defined(PYPY_VERSION) - #define CYTHON_COMPILING_IN_PYPY 1 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #undef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 1 - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) - #endif - #if PY_VERSION_HEX < 0x03090000 - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00) - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif - #undef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 -#elif defined(CYTHON_LIMITED_API) - #ifdef Py_LIMITED_API - #undef __PYX_LIMITED_VERSION_HEX - #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API - #endif - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 1 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_CLINE_IN_TRACEBACK - #define CYTHON_CLINE_IN_TRACEBACK 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #undef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 1 - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #ifndef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #endif - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 1 - #ifndef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #endif - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif - #undef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 -#elif defined(Py_GIL_DISABLED) || defined(Py_NOGIL) - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 1 - #ifndef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 1 - #endif - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #ifndef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #ifndef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #ifndef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 1 - #endif - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #ifndef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 1 - #endif - #ifndef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 1 - #endif - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #ifndef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 1 - #endif - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #ifndef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #ifndef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #endif - #ifndef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 1 - #endif - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 - #endif - #ifndef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 - #endif -#else - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 1 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #ifndef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 1 - #endif - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #ifndef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 1 - #endif - #if PY_MAJOR_VERSION < 3 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #ifndef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 1 - #endif - #ifndef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 1 - #endif - #ifndef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 1 - #endif - #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #elif !defined(CYTHON_USE_UNICODE_WRITER) - #define CYTHON_USE_UNICODE_WRITER 1 - #endif - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #ifndef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 1 - #endif - #ifndef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 1 - #endif - #ifndef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 1 - #endif - #ifndef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6) - #endif - #ifndef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1) - #endif - #ifndef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 1 - #endif - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #ifndef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #endif - #if PY_VERSION_HEX < 0x030400a1 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #elif !defined(CYTHON_USE_TP_FINALIZE) - #define CYTHON_USE_TP_FINALIZE 1 - #endif - #if PY_VERSION_HEX < 0x030600B1 - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #elif !defined(CYTHON_USE_DICT_VERSIONS) - #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX < 0x030C00A5) - #endif - #if PY_VERSION_HEX < 0x030700A3 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #elif !defined(CYTHON_USE_EXC_INFO_STACK) - #define CYTHON_USE_EXC_INFO_STACK 1 - #endif - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 - #endif - #ifndef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 1 - #endif -#endif -#if !defined(CYTHON_FAST_PYCCALL) -#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) -#endif -#if !defined(CYTHON_VECTORCALL) -#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1) -#endif -#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1) -#if CYTHON_USE_PYLONG_INTERNALS - #if PY_MAJOR_VERSION < 3 - #include "longintrepr.h" - #endif - #undef SHIFT - #undef BASE - #undef MASK - #ifdef SIZEOF_VOID_P - enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; - #endif -#endif -#ifndef __has_attribute - #define __has_attribute(x) 0 -#endif -#ifndef __has_cpp_attribute - #define __has_cpp_attribute(x) 0 -#endif -#ifndef CYTHON_RESTRICT - #if defined(__GNUC__) - #define CYTHON_RESTRICT __restrict__ - #elif defined(_MSC_VER) && _MSC_VER >= 1400 - #define CYTHON_RESTRICT __restrict - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define CYTHON_RESTRICT restrict - #else - #define CYTHON_RESTRICT - #endif -#endif -#ifndef CYTHON_UNUSED - #if defined(__cplusplus) - /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17 - * but leads to warnings with -pedantic, since it is a C++17 feature */ - #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) - #if __has_cpp_attribute(maybe_unused) - #define CYTHON_UNUSED [[maybe_unused]] - #endif - #endif - #endif -#endif -#ifndef CYTHON_UNUSED -# if defined(__GNUC__) -# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) -# define CYTHON_UNUSED __attribute__ ((__unused__)) -# else -# define CYTHON_UNUSED -# endif -# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) -# define CYTHON_UNUSED __attribute__ ((__unused__)) -# else -# define CYTHON_UNUSED -# endif -#endif -#ifndef CYTHON_UNUSED_VAR -# if defined(__cplusplus) - template void CYTHON_UNUSED_VAR( const T& ) { } -# else -# define CYTHON_UNUSED_VAR(x) (void)(x) -# endif -#endif -#ifndef CYTHON_MAYBE_UNUSED_VAR - #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x) -#endif -#ifndef CYTHON_NCP_UNUSED -# if CYTHON_COMPILING_IN_CPYTHON -# define CYTHON_NCP_UNUSED -# else -# define CYTHON_NCP_UNUSED CYTHON_UNUSED -# endif -#endif -#ifndef CYTHON_USE_CPP_STD_MOVE - #if defined(__cplusplus) && (\ - __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600)) - #define CYTHON_USE_CPP_STD_MOVE 1 - #else - #define CYTHON_USE_CPP_STD_MOVE 0 - #endif -#endif -#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) -#ifdef _MSC_VER - #ifndef _MSC_STDINT_H_ - #if _MSC_VER < 1300 - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - #else - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; - #endif - #endif - #if _MSC_VER < 1300 - #ifdef _WIN64 - typedef unsigned long long __pyx_uintptr_t; - #else - typedef unsigned int __pyx_uintptr_t; - #endif - #else - #ifdef _WIN64 - typedef unsigned __int64 __pyx_uintptr_t; - #else - typedef unsigned __int32 __pyx_uintptr_t; - #endif - #endif -#else - #include - typedef uintptr_t __pyx_uintptr_t; -#endif -#ifndef CYTHON_FALLTHROUGH - #if defined(__cplusplus) - /* for clang __has_cpp_attribute(fallthrough) is true even before C++17 - * but leads to warnings with -pedantic, since it is a C++17 feature */ - #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) - #if __has_cpp_attribute(fallthrough) - #define CYTHON_FALLTHROUGH [[fallthrough]] - #endif - #endif - #ifndef CYTHON_FALLTHROUGH - #if __has_cpp_attribute(clang::fallthrough) - #define CYTHON_FALLTHROUGH [[clang::fallthrough]] - #elif __has_cpp_attribute(gnu::fallthrough) - #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] - #endif - #endif - #endif - #ifndef CYTHON_FALLTHROUGH - #if __has_attribute(fallthrough) - #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) - #else - #define CYTHON_FALLTHROUGH - #endif - #endif - #if defined(__clang__) && defined(__apple_build_version__) - #if __apple_build_version__ < 7000000 - #undef CYTHON_FALLTHROUGH - #define CYTHON_FALLTHROUGH - #endif - #endif -#endif -#ifdef __cplusplus - template - struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);}; - #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL::value) -#else - #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0) -#endif -#if CYTHON_COMPILING_IN_PYPY == 1 - #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000) -#else - #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000) -#endif -#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer)) - -#ifndef CYTHON_INLINE - #if defined(__clang__) - #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) - #elif defined(__GNUC__) - #define CYTHON_INLINE __inline__ - #elif defined(_MSC_VER) - #define CYTHON_INLINE __inline - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define CYTHON_INLINE inline - #else - #define CYTHON_INLINE - #endif -#endif - -#define __PYX_BUILD_PY_SSIZE_T "n" -#define CYTHON_FORMAT_SSIZE_T "z" -#if PY_MAJOR_VERSION < 3 - #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" - #define __Pyx_DefaultClassType PyClass_Type - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#else - #define __Pyx_BUILTIN_MODULE_NAME "builtins" - #define __Pyx_DefaultClassType PyType_Type -#if CYTHON_COMPILING_IN_LIMITED_API - static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, - PyObject *code, PyObject *c, PyObject* n, PyObject *v, - PyObject *fv, PyObject *cell, PyObject* fn, - PyObject *name, int fline, PyObject *lnos) { - PyObject *exception_table = NULL; - PyObject *types_module=NULL, *code_type=NULL, *result=NULL; - #if __PYX_LIMITED_VERSION_HEX < 0x030B0000 - PyObject *version_info; - PyObject *py_minor_version = NULL; - #endif - long minor_version = 0; - PyObject *type, *value, *traceback; - PyErr_Fetch(&type, &value, &traceback); - #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000 - minor_version = 11; - #else - if (!(version_info = PySys_GetObject("version_info"))) goto end; - if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end; - minor_version = PyLong_AsLong(py_minor_version); - Py_DECREF(py_minor_version); - if (minor_version == -1 && PyErr_Occurred()) goto end; - #endif - if (!(types_module = PyImport_ImportModule("types"))) goto end; - if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end; - if (minor_version <= 7) { - (void)p; - result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code, - c, n, v, fn, name, fline, lnos, fv, cell); - } else if (minor_version <= 10) { - result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code, - c, n, v, fn, name, fline, lnos, fv, cell); - } else { - if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end; - result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code, - c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell); - } - end: - Py_XDECREF(code_type); - Py_XDECREF(exception_table); - Py_XDECREF(types_module); - if (type) { - PyErr_Restore(type, value, traceback); - } - return result; - } - #ifndef CO_OPTIMIZED - #define CO_OPTIMIZED 0x0001 - #endif - #ifndef CO_NEWLOCALS - #define CO_NEWLOCALS 0x0002 - #endif - #ifndef CO_VARARGS - #define CO_VARARGS 0x0004 - #endif - #ifndef CO_VARKEYWORDS - #define CO_VARKEYWORDS 0x0008 - #endif - #ifndef CO_ASYNC_GENERATOR - #define CO_ASYNC_GENERATOR 0x0200 - #endif - #ifndef CO_GENERATOR - #define CO_GENERATOR 0x0020 - #endif - #ifndef CO_COROUTINE - #define CO_COROUTINE 0x0080 - #endif -#elif PY_VERSION_HEX >= 0x030B0000 - static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, - PyObject *code, PyObject *c, PyObject* n, PyObject *v, - PyObject *fv, PyObject *cell, PyObject* fn, - PyObject *name, int fline, PyObject *lnos) { - PyCodeObject *result; - PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); - if (!empty_bytes) return NULL; - result = - #if PY_VERSION_HEX >= 0x030C0000 - PyUnstable_Code_NewWithPosOnlyArgs - #else - PyCode_NewWithPosOnlyArgs - #endif - (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes); - Py_DECREF(empty_bytes); - return result; - } -#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#else - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#endif -#endif -#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE) - #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type) -#else - #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type)) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is) - #define __Pyx_Py_Is(x, y) Py_Is(x, y) -#else - #define __Pyx_Py_Is(x, y) ((x) == (y)) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone) - #define __Pyx_Py_IsNone(ob) Py_IsNone(ob) -#else - #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue) - #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob) -#else - #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse) - #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob) -#else - #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False) -#endif -#define __Pyx_NoneAsNull(obj) (__Pyx_Py_IsNone(obj) ? NULL : (obj)) -#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o) -#else - #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o) -#endif -#ifndef CO_COROUTINE - #define CO_COROUTINE 0x80 -#endif -#ifndef CO_ASYNC_GENERATOR - #define CO_ASYNC_GENERATOR 0x200 -#endif -#ifndef Py_TPFLAGS_CHECKTYPES - #define Py_TPFLAGS_CHECKTYPES 0 -#endif -#ifndef Py_TPFLAGS_HAVE_INDEX - #define Py_TPFLAGS_HAVE_INDEX 0 -#endif -#ifndef Py_TPFLAGS_HAVE_NEWBUFFER - #define Py_TPFLAGS_HAVE_NEWBUFFER 0 -#endif -#ifndef Py_TPFLAGS_HAVE_FINALIZE - #define Py_TPFLAGS_HAVE_FINALIZE 0 -#endif -#ifndef Py_TPFLAGS_SEQUENCE - #define Py_TPFLAGS_SEQUENCE 0 -#endif -#ifndef Py_TPFLAGS_MAPPING - #define Py_TPFLAGS_MAPPING 0 -#endif -#ifndef METH_STACKLESS - #define METH_STACKLESS 0 -#endif -#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL) - #ifndef METH_FASTCALL - #define METH_FASTCALL 0x80 - #endif - typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs); - typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args, - Py_ssize_t nargs, PyObject *kwnames); -#else - #if PY_VERSION_HEX >= 0x030d00A4 - # define __Pyx_PyCFunctionFast PyCFunctionFast - # define __Pyx_PyCFunctionFastWithKeywords PyCFunctionFastWithKeywords - #else - # define __Pyx_PyCFunctionFast _PyCFunctionFast - # define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords - #endif -#endif -#if CYTHON_METH_FASTCALL - #define __Pyx_METH_FASTCALL METH_FASTCALL - #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast - #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords -#else - #define __Pyx_METH_FASTCALL METH_VARARGS - #define __Pyx_PyCFunction_FastCall PyCFunction - #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords -#endif -#if CYTHON_VECTORCALL - #define __pyx_vectorcallfunc vectorcallfunc - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET - #define __Pyx_PyVectorcall_NARGS(n) PyVectorcall_NARGS((size_t)(n)) -#elif CYTHON_BACKPORT_VECTORCALL - typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args, - size_t nargsf, PyObject *kwnames); - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1)) - #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)) -#else - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET 0 - #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(n)) -#endif -#if PY_MAJOR_VERSION >= 0x030900B1 -#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_CheckExact(func) -#else -#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_Check(func) -#endif -#define __Pyx_CyOrPyCFunction_Check(func) PyCFunction_Check(func) -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) (((PyCFunctionObject*)(func))->m_ml->ml_meth) -#elif !CYTHON_COMPILING_IN_LIMITED_API -#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) PyCFunction_GET_FUNCTION(func) -#endif -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_CyOrPyCFunction_GET_FLAGS(func) (((PyCFunctionObject*)(func))->m_ml->ml_flags) -static CYTHON_INLINE PyObject* __Pyx_CyOrPyCFunction_GET_SELF(PyObject *func) { - return (__Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_STATIC) ? NULL : ((PyCFunctionObject*)func)->m_self; -} -#endif -static CYTHON_INLINE int __Pyx__IsSameCFunction(PyObject *func, void *cfunc) { -#if CYTHON_COMPILING_IN_LIMITED_API - return PyCFunction_Check(func) && PyCFunction_GetFunction(func) == (PyCFunction) cfunc; -#else - return PyCFunction_Check(func) && PyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; -#endif -} -#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCFunction(func, cfunc) -#if __PYX_LIMITED_VERSION_HEX < 0x030900B1 - #define __Pyx_PyType_FromModuleAndSpec(m, s, b) ((void)m, PyType_FromSpecWithBases(s, b)) - typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *); -#else - #define __Pyx_PyType_FromModuleAndSpec(m, s, b) PyType_FromModuleAndSpec(m, s, b) - #define __Pyx_PyCMethod PyCMethod -#endif -#ifndef METH_METHOD - #define METH_METHOD 0x200 -#endif -#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) - #define PyObject_Malloc(s) PyMem_Malloc(s) - #define PyObject_Free(p) PyMem_Free(p) - #define PyObject_Realloc(p) PyMem_Realloc(p) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) - #define __Pyx_PyFrame_SetLineNumber(frame, lineno) -#else - #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) - #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyThreadState_Current PyThreadState_Get() -#elif !CYTHON_FAST_THREAD_STATE - #define __Pyx_PyThreadState_Current PyThreadState_GET() -#elif PY_VERSION_HEX >= 0x030d00A1 - #define __Pyx_PyThreadState_Current PyThreadState_GetUnchecked() -#elif PY_VERSION_HEX >= 0x03060000 - #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() -#elif PY_VERSION_HEX >= 0x03000000 - #define __Pyx_PyThreadState_Current PyThreadState_GET() -#else - #define __Pyx_PyThreadState_Current _PyThreadState_Current -#endif -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op) -{ - void *result; - result = PyModule_GetState(op); - if (!result) - Py_FatalError("Couldn't find the module state"); - return result; -} -#endif -#define __Pyx_PyObject_GetSlot(obj, name, func_ctype) __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype) -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((func_ctype) PyType_GetSlot((type), Py_##name)) -#else - #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((type)->name) -#endif -#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT) -#include "pythread.h" -#define Py_tss_NEEDS_INIT 0 -typedef int Py_tss_t; -static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) { - *key = PyThread_create_key(); - return 0; -} -static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) { - Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t)); - *key = Py_tss_NEEDS_INIT; - return key; -} -static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) { - PyObject_Free(key); -} -static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) { - return *key != Py_tss_NEEDS_INIT; -} -static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) { - PyThread_delete_key(*key); - *key = Py_tss_NEEDS_INIT; -} -static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) { - return PyThread_set_key_value(*key, value); -} -static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { - return PyThread_get_key_value(*key); -} -#endif -#if PY_MAJOR_VERSION < 3 - #if CYTHON_COMPILING_IN_PYPY - #if PYPY_VERSION_NUM < 0x07030600 - #if defined(__cplusplus) && __cplusplus >= 201402L - [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]] - #elif defined(__GNUC__) || defined(__clang__) - __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))) - #elif defined(_MSC_VER) - __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")) - #endif - static CYTHON_INLINE int PyGILState_Check(void) { - return 0; - } - #else // PYPY_VERSION_NUM < 0x07030600 - #endif // PYPY_VERSION_NUM < 0x07030600 - #else - static CYTHON_INLINE int PyGILState_Check(void) { - PyThreadState * tstate = _PyThreadState_Current; - return tstate && (tstate == PyGILState_GetThisThreadState()); - } - #endif -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 || defined(_PyDict_NewPresized) -#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) -#else -#define __Pyx_PyDict_NewPresized(n) PyDict_New() -#endif -#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION - #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) -#else - #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && PY_VERSION_HEX < 0x030d0000 && CYTHON_USE_UNICODE_INTERNALS -#define __Pyx_PyDict_GetItemStrWithError(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash) -static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) { - PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name); - if (res == NULL) PyErr_Clear(); - return res; -} -#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000) -#define __Pyx_PyDict_GetItemStrWithError PyDict_GetItemWithError -#define __Pyx_PyDict_GetItemStr PyDict_GetItem -#else -static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) { -#if CYTHON_COMPILING_IN_PYPY - return PyDict_GetItem(dict, name); -#else - PyDictEntry *ep; - PyDictObject *mp = (PyDictObject*) dict; - long hash = ((PyStringObject *) name)->ob_shash; - assert(hash != -1); - ep = (mp->ma_lookup)(mp, name, hash); - if (ep == NULL) { - return NULL; - } - return ep->me_value; -#endif -} -#define __Pyx_PyDict_GetItemStr PyDict_GetItem -#endif -#if CYTHON_USE_TYPE_SLOTS - #define __Pyx_PyType_GetFlags(tp) (((PyTypeObject *)tp)->tp_flags) - #define __Pyx_PyType_HasFeature(type, feature) ((__Pyx_PyType_GetFlags(type) & (feature)) != 0) - #define __Pyx_PyObject_GetIterNextFunc(obj) (Py_TYPE(obj)->tp_iternext) -#else - #define __Pyx_PyType_GetFlags(tp) (PyType_GetFlags((PyTypeObject *)tp)) - #define __Pyx_PyType_HasFeature(type, feature) PyType_HasFeature(type, feature) - #define __Pyx_PyObject_GetIterNextFunc(obj) PyIter_Next -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v) -#else - #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v) -#endif -#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000 -#define __Pyx_PyHeapTypeObject_GC_Del(obj) {\ - PyTypeObject *type = Py_TYPE((PyObject*)obj);\ - assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\ - PyObject_GC_Del(obj);\ - Py_DECREF(type);\ -} -#else -#define __Pyx_PyHeapTypeObject_GC_Del(obj) PyObject_GC_Del(obj) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define CYTHON_PEP393_ENABLED 1 - #define __Pyx_PyUnicode_READY(op) (0) - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GetLength(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((void)u, 1114111U) - #define __Pyx_PyUnicode_KIND(u) ((void)u, (0)) - #define __Pyx_PyUnicode_DATA(u) ((void*)u) - #define __Pyx_PyUnicode_READ(k, d, i) ((void)k, PyUnicode_ReadChar((PyObject*)(d), i)) - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GetLength(u)) -#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) - #define CYTHON_PEP393_ENABLED 1 - #if PY_VERSION_HEX >= 0x030C0000 - #define __Pyx_PyUnicode_READY(op) (0) - #else - #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ - 0 : _PyUnicode_Ready((PyObject *)(op))) - #endif - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) - #define __Pyx_PyUnicode_KIND(u) ((int)PyUnicode_KIND(u)) - #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) - #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) - #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, (Py_UCS4) ch) - #if PY_VERSION_HEX >= 0x030C0000 - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) - #else - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) - #else - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) - #endif - #endif -#else - #define CYTHON_PEP393_ENABLED 0 - #define PyUnicode_1BYTE_KIND 1 - #define PyUnicode_2BYTE_KIND 2 - #define PyUnicode_4BYTE_KIND 4 - #define __Pyx_PyUnicode_READY(op) (0) - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U) - #define __Pyx_PyUnicode_KIND(u) ((int)sizeof(Py_UNICODE)) - #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) - #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) - #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch) - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) -#endif -#if CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) - #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) -#else - #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) - #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ - PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) -#endif -#if CYTHON_COMPILING_IN_PYPY - #if !defined(PyUnicode_DecodeUnicodeEscape) - #define PyUnicode_DecodeUnicodeEscape(s, size, errors) PyUnicode_Decode(s, size, "unicode_escape", errors) - #endif - #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500) - #undef PyUnicode_Contains - #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) - #endif - #if !defined(PyByteArray_Check) - #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) - #endif - #if !defined(PyObject_Format) - #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) - #endif -#endif -#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) -#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) -#if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) -#else - #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) -#endif -#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) - #define PyObject_ASCII(o) PyObject_Repr(o) -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyBaseString_Type PyUnicode_Type - #define PyStringObject PyUnicodeObject - #define PyString_Type PyUnicode_Type - #define PyString_Check PyUnicode_Check - #define PyString_CheckExact PyUnicode_CheckExact -#ifndef PyObject_Unicode - #define PyObject_Unicode PyObject_Str -#endif -#endif -#if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) - #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) -#else - #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) - #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) -#endif -#if CYTHON_COMPILING_IN_CPYTHON - #define __Pyx_PySequence_ListKeepNew(obj)\ - (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj)) -#else - #define __Pyx_PySequence_ListKeepNew(obj) PySequence_List(obj) -#endif -#ifndef PySet_CheckExact - #define PySet_CheckExact(obj) __Pyx_IS_TYPE(obj, &PySet_Type) -#endif -#if PY_VERSION_HEX >= 0x030900A4 - #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt) - #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size) -#else - #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt) - #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size) -#endif -#if CYTHON_ASSUME_SAFE_MACROS - #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i) - #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq) - #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0)) - #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0)) - #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o) - #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o) - #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o) - #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o) - #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o) -#else - #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i) - #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq) - #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v) - #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v) - #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o) - #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o) - #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o) - #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o) - #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o) -#endif -#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 - #define __Pyx_PyImport_AddModuleRef(name) PyImport_AddModuleRef(name) -#else - static CYTHON_INLINE PyObject *__Pyx_PyImport_AddModuleRef(const char *name) { - PyObject *module = PyImport_AddModule(name); - Py_XINCREF(module); - return module; - } -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyIntObject PyLongObject - #define PyInt_Type PyLong_Type - #define PyInt_Check(op) PyLong_Check(op) - #define PyInt_CheckExact(op) PyLong_CheckExact(op) - #define __Pyx_Py3Int_Check(op) PyLong_Check(op) - #define __Pyx_Py3Int_CheckExact(op) PyLong_CheckExact(op) - #define PyInt_FromString PyLong_FromString - #define PyInt_FromUnicode PyLong_FromUnicode - #define PyInt_FromLong PyLong_FromLong - #define PyInt_FromSize_t PyLong_FromSize_t - #define PyInt_FromSsize_t PyLong_FromSsize_t - #define PyInt_AsLong PyLong_AsLong - #define PyInt_AS_LONG PyLong_AS_LONG - #define PyInt_AsSsize_t PyLong_AsSsize_t - #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask - #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask - #define PyNumber_Int PyNumber_Long -#else - #define __Pyx_Py3Int_Check(op) (PyLong_Check(op) || PyInt_Check(op)) - #define __Pyx_Py3Int_CheckExact(op) (PyLong_CheckExact(op) || PyInt_CheckExact(op)) -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyBoolObject PyLongObject -#endif -#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY - #ifndef PyUnicode_InternFromString - #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) - #endif -#endif -#if PY_VERSION_HEX < 0x030200A4 - typedef long Py_hash_t; - #define __Pyx_PyInt_FromHash_t PyInt_FromLong - #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t -#else - #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t - #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t -#endif -#if CYTHON_USE_ASYNC_SLOTS - #if PY_VERSION_HEX >= 0x030500B1 - #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods - #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) - #else - #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) - #endif -#else - #define __Pyx_PyType_AsAsync(obj) NULL -#endif -#ifndef __Pyx_PyAsyncMethodsStruct - typedef struct { - unaryfunc am_await; - unaryfunc am_aiter; - unaryfunc am_anext; - } __Pyx_PyAsyncMethodsStruct; -#endif - -#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) - #if !defined(_USE_MATH_DEFINES) - #define _USE_MATH_DEFINES - #endif -#endif -#include -#ifdef NAN -#define __PYX_NAN() ((float) NAN) -#else -static CYTHON_INLINE float __PYX_NAN() { - float value; - memset(&value, 0xFF, sizeof(value)); - return value; -} -#endif -#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) -#define __Pyx_truncl trunc -#else -#define __Pyx_truncl truncl -#endif - -#define __PYX_MARK_ERR_POS(f_index, lineno) \ - { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; } -#define __PYX_ERR(f_index, lineno, Ln_error) \ - { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; } - -#ifdef CYTHON_EXTERN_C - #undef __PYX_EXTERN_C - #define __PYX_EXTERN_C CYTHON_EXTERN_C -#elif defined(__PYX_EXTERN_C) - #ifdef _MSC_VER - #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") - #else - #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. - #endif -#else - #ifdef __cplusplus - #define __PYX_EXTERN_C extern "C" - #else - #define __PYX_EXTERN_C extern - #endif -#endif - -#define __PYX_HAVE__jcvi__assembly__chic -#define __PYX_HAVE_API__jcvi__assembly__chic -/* Early includes */ -#include -#include - - /* Using NumPy API declarations from "numpy/__init__.cython-30.pxd" */ - -#include "numpy/arrayobject.h" -#include "numpy/ndarrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/arrayscalars.h" -#include "numpy/ufuncobject.h" -#include -#include "pythread.h" - - #if CYTHON_COMPILING_IN_PYPY - #ifdef _MSC_VER - #pragma message ("This module uses CPython specific internals of 'array.array', which are not available in PyPy.") - #else - #warning This module uses CPython specific internals of 'array.array', which are not available in PyPy. - #endif - #endif - -#ifdef _OPENMP -#include -#endif /* _OPENMP */ - -#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) -#define CYTHON_WITHOUT_ASSERTIONS -#endif - -typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; - const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; - -#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 -#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0 -#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8) -#define __PYX_DEFAULT_STRING_ENCODING "" -#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString -#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize -#define __Pyx_uchar_cast(c) ((unsigned char)c) -#define __Pyx_long_cast(x) ((long)x) -#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ - (sizeof(type) < sizeof(Py_ssize_t)) ||\ - (sizeof(type) > sizeof(Py_ssize_t) &&\ - likely(v < (type)PY_SSIZE_T_MAX ||\ - v == (type)PY_SSIZE_T_MAX) &&\ - (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ - v == (type)PY_SSIZE_T_MIN))) ||\ - (sizeof(type) == sizeof(Py_ssize_t) &&\ - (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ - v == (type)PY_SSIZE_T_MAX))) ) -static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) { - return (size_t) i < (size_t) limit; -} -#if defined (__cplusplus) && __cplusplus >= 201103L - #include - #define __Pyx_sst_abs(value) std::abs(value) -#elif SIZEOF_INT >= SIZEOF_SIZE_T - #define __Pyx_sst_abs(value) abs(value) -#elif SIZEOF_LONG >= SIZEOF_SIZE_T - #define __Pyx_sst_abs(value) labs(value) -#elif defined (_MSC_VER) - #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) -#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define __Pyx_sst_abs(value) llabs(value) -#elif defined (__GNUC__) - #define __Pyx_sst_abs(value) __builtin_llabs(value) -#else - #define __Pyx_sst_abs(value) ((value<0) ? -value : value) -#endif -static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s); -static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); -static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); -static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char*); -#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) -#define __Pyx_PyBytes_FromString PyBytes_FromString -#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); -#if PY_MAJOR_VERSION < 3 - #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString - #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize -#else - #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString - #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize -#endif -#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyObject_AsWritableString(s) ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsWritableSString(s) ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) -#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) -#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) -#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) -#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) -#define __Pyx_PyUnicode_FromOrdinal(o) PyUnicode_FromOrdinal((int)o) -#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode -#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) -#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) -static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b); -static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); -static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*); -static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); -#define __Pyx_PySequence_Tuple(obj)\ - (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) -static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); -static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); -static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); -#if CYTHON_ASSUME_SAFE_MACROS -#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) -#else -#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) -#endif -#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) -#if PY_MAJOR_VERSION >= 3 -#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) -#else -#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) -#endif -#if CYTHON_USE_PYLONG_INTERNALS - #if PY_VERSION_HEX >= 0x030C00A7 - #ifndef _PyLong_SIGN_MASK - #define _PyLong_SIGN_MASK 3 - #endif - #ifndef _PyLong_NON_SIZE_BITS - #define _PyLong_NON_SIZE_BITS 3 - #endif - #define __Pyx_PyLong_Sign(x) (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK) - #define __Pyx_PyLong_IsNeg(x) ((__Pyx_PyLong_Sign(x) & 2) != 0) - #define __Pyx_PyLong_IsNonNeg(x) (!__Pyx_PyLong_IsNeg(x)) - #define __Pyx_PyLong_IsZero(x) (__Pyx_PyLong_Sign(x) & 1) - #define __Pyx_PyLong_IsPos(x) (__Pyx_PyLong_Sign(x) == 0) - #define __Pyx_PyLong_CompactValueUnsigned(x) (__Pyx_PyLong_Digits(x)[0]) - #define __Pyx_PyLong_DigitCount(x) ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS)) - #define __Pyx_PyLong_SignedDigitCount(x)\ - ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x)) - #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue) - #define __Pyx_PyLong_IsCompact(x) PyUnstable_Long_IsCompact((PyLongObject*) x) - #define __Pyx_PyLong_CompactValue(x) PyUnstable_Long_CompactValue((PyLongObject*) x) - #else - #define __Pyx_PyLong_IsCompact(x) (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS)) - #define __Pyx_PyLong_CompactValue(x) ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0]) - #endif - typedef Py_ssize_t __Pyx_compact_pylong; - typedef size_t __Pyx_compact_upylong; - #else - #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0) - #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0) - #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0) - #define __Pyx_PyLong_IsPos(x) (Py_SIZE(x) > 0) - #define __Pyx_PyLong_CompactValueUnsigned(x) ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0]) - #define __Pyx_PyLong_DigitCount(x) __Pyx_sst_abs(Py_SIZE(x)) - #define __Pyx_PyLong_SignedDigitCount(x) Py_SIZE(x) - #define __Pyx_PyLong_IsCompact(x) (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1) - #define __Pyx_PyLong_CompactValue(x)\ - ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0])) - typedef sdigit __Pyx_compact_pylong; - typedef digit __Pyx_compact_upylong; - #endif - #if PY_VERSION_HEX >= 0x030C00A5 - #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit) - #else - #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->ob_digit) - #endif -#endif -#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII -#include -static int __Pyx_sys_getdefaultencoding_not_ascii; -static int __Pyx_init_sys_getdefaultencoding_params(void) { - PyObject* sys; - PyObject* default_encoding = NULL; - PyObject* ascii_chars_u = NULL; - PyObject* ascii_chars_b = NULL; - const char* default_encoding_c; - sys = PyImport_ImportModule("sys"); - if (!sys) goto bad; - default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); - Py_DECREF(sys); - if (!default_encoding) goto bad; - default_encoding_c = PyBytes_AsString(default_encoding); - if (!default_encoding_c) goto bad; - if (strcmp(default_encoding_c, "ascii") == 0) { - __Pyx_sys_getdefaultencoding_not_ascii = 0; - } else { - char ascii_chars[128]; - int c; - for (c = 0; c < 128; c++) { - ascii_chars[c] = (char) c; - } - __Pyx_sys_getdefaultencoding_not_ascii = 1; - ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); - if (!ascii_chars_u) goto bad; - ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); - if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { - PyErr_Format( - PyExc_ValueError, - "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", - default_encoding_c); - goto bad; - } - Py_DECREF(ascii_chars_u); - Py_DECREF(ascii_chars_b); - } - Py_DECREF(default_encoding); - return 0; -bad: - Py_XDECREF(default_encoding); - Py_XDECREF(ascii_chars_u); - Py_XDECREF(ascii_chars_b); - return -1; -} -#endif -#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 -#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) -#else -#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) -#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT -#include -static char* __PYX_DEFAULT_STRING_ENCODING; -static int __Pyx_init_sys_getdefaultencoding_params(void) { - PyObject* sys; - PyObject* default_encoding = NULL; - char* default_encoding_c; - sys = PyImport_ImportModule("sys"); - if (!sys) goto bad; - default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); - Py_DECREF(sys); - if (!default_encoding) goto bad; - default_encoding_c = PyBytes_AsString(default_encoding); - if (!default_encoding_c) goto bad; - __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1); - if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; - strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); - Py_DECREF(default_encoding); - return 0; -bad: - Py_XDECREF(default_encoding); - return -1; -} -#endif -#endif - - -/* Test for GCC > 2.95 */ -#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) - #define likely(x) __builtin_expect(!!(x), 1) - #define unlikely(x) __builtin_expect(!!(x), 0) -#else /* !__GNUC__ or GCC < 2.95 */ - #define likely(x) (x) - #define unlikely(x) (x) -#endif /* __GNUC__ */ -static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } - -#if !CYTHON_USE_MODULE_STATE -static PyObject *__pyx_m = NULL; -#endif -static int __pyx_lineno; -static int __pyx_clineno = 0; -static const char * __pyx_cfilenm = __FILE__; -static const char *__pyx_filename; - -/* Header.proto */ -#if !defined(CYTHON_CCOMPLEX) - #if defined(__cplusplus) - #define CYTHON_CCOMPLEX 1 - #elif (defined(_Complex_I) && !defined(_MSC_VER)) || ((defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_COMPLEX__) && !defined(_MSC_VER)) - #define CYTHON_CCOMPLEX 1 - #else - #define CYTHON_CCOMPLEX 0 - #endif -#endif -#if CYTHON_CCOMPLEX - #ifdef __cplusplus - #include - #else - #include - #endif -#endif -#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__) - #undef _Complex_I - #define _Complex_I 1.0fj -#endif - -/* #### Code section: filename_table ### */ - -static const char *__pyx_f[] = { - "src/jcvi/assembly/chic.pyx", - "__init__.cython-30.pxd", - "contextvars.pxd", - "array.pxd", - "type.pxd", - "bool.pxd", - "complex.pxd", -}; -/* #### Code section: utility_code_proto_before_types ### */ -/* ForceInitThreads.proto */ -#ifndef __PYX_FORCE_INIT_THREADS - #define __PYX_FORCE_INIT_THREADS 0 -#endif - -/* BufferFormatStructs.proto */ -struct __Pyx_StructField_; -#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0) -typedef struct { - const char* name; - struct __Pyx_StructField_* fields; - size_t size; - size_t arraysize[8]; - int ndim; - char typegroup; - char is_unsigned; - int flags; -} __Pyx_TypeInfo; -typedef struct __Pyx_StructField_ { - __Pyx_TypeInfo* type; - const char* name; - size_t offset; -} __Pyx_StructField; -typedef struct { - __Pyx_StructField* field; - size_t parent_offset; -} __Pyx_BufFmt_StackElem; -typedef struct { - __Pyx_StructField root; - __Pyx_BufFmt_StackElem* head; - size_t fmt_offset; - size_t new_count, enc_count; - size_t struct_alignment; - int is_complex; - char enc_type; - char new_packmode; - char enc_packmode; - char is_valid_array; -} __Pyx_BufFmt_Context; - -/* #### Code section: numeric_typedefs ### */ - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":787 - * # in Cython to enable them only on the right systems. - * - * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< - * ctypedef npy_int16 int16_t - * ctypedef npy_int32 int32_t - */ -typedef npy_int8 __pyx_t_5numpy_int8_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":788 - * - * ctypedef npy_int8 int8_t - * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< - * ctypedef npy_int32 int32_t - * ctypedef npy_int64 int64_t - */ -typedef npy_int16 __pyx_t_5numpy_int16_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":789 - * ctypedef npy_int8 int8_t - * ctypedef npy_int16 int16_t - * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< - * ctypedef npy_int64 int64_t - * #ctypedef npy_int96 int96_t - */ -typedef npy_int32 __pyx_t_5numpy_int32_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":790 - * ctypedef npy_int16 int16_t - * ctypedef npy_int32 int32_t - * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< - * #ctypedef npy_int96 int96_t - * #ctypedef npy_int128 int128_t - */ -typedef npy_int64 __pyx_t_5numpy_int64_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":794 - * #ctypedef npy_int128 int128_t - * - * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< - * ctypedef npy_uint16 uint16_t - * ctypedef npy_uint32 uint32_t - */ -typedef npy_uint8 __pyx_t_5numpy_uint8_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":795 - * - * ctypedef npy_uint8 uint8_t - * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< - * ctypedef npy_uint32 uint32_t - * ctypedef npy_uint64 uint64_t - */ -typedef npy_uint16 __pyx_t_5numpy_uint16_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":796 - * ctypedef npy_uint8 uint8_t - * ctypedef npy_uint16 uint16_t - * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< - * ctypedef npy_uint64 uint64_t - * #ctypedef npy_uint96 uint96_t - */ -typedef npy_uint32 __pyx_t_5numpy_uint32_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":797 - * ctypedef npy_uint16 uint16_t - * ctypedef npy_uint32 uint32_t - * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< - * #ctypedef npy_uint96 uint96_t - * #ctypedef npy_uint128 uint128_t - */ -typedef npy_uint64 __pyx_t_5numpy_uint64_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":801 - * #ctypedef npy_uint128 uint128_t - * - * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< - * ctypedef npy_float64 float64_t - * #ctypedef npy_float80 float80_t - */ -typedef npy_float32 __pyx_t_5numpy_float32_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":802 - * - * ctypedef npy_float32 float32_t - * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< - * #ctypedef npy_float80 float80_t - * #ctypedef npy_float128 float128_t - */ -typedef npy_float64 __pyx_t_5numpy_float64_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":809 - * ctypedef double complex complex128_t - * - * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< - * ctypedef npy_ulonglong ulonglong_t - * - */ -typedef npy_longlong __pyx_t_5numpy_longlong_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":810 - * - * ctypedef npy_longlong longlong_t - * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< - * - * ctypedef npy_intp intp_t - */ -typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":812 - * ctypedef npy_ulonglong ulonglong_t - * - * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< - * ctypedef npy_uintp uintp_t - * - */ -typedef npy_intp __pyx_t_5numpy_intp_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":813 - * - * ctypedef npy_intp intp_t - * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< - * - * ctypedef npy_double float_t - */ -typedef npy_uintp __pyx_t_5numpy_uintp_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":815 - * ctypedef npy_uintp uintp_t - * - * ctypedef npy_double float_t # <<<<<<<<<<<<<< - * ctypedef npy_double double_t - * ctypedef npy_longdouble longdouble_t - */ -typedef npy_double __pyx_t_5numpy_float_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":816 - * - * ctypedef npy_double float_t - * ctypedef npy_double double_t # <<<<<<<<<<<<<< - * ctypedef npy_longdouble longdouble_t - * - */ -typedef npy_double __pyx_t_5numpy_double_t; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":817 - * ctypedef npy_double float_t - * ctypedef npy_double double_t - * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< - * - * ctypedef float complex cfloat_t - */ -typedef npy_longdouble __pyx_t_5numpy_longdouble_t; -/* #### Code section: complex_type_declarations ### */ -/* Declarations.proto */ -#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) - #ifdef __cplusplus - typedef ::std::complex< float > __pyx_t_float_complex; - #else - typedef float _Complex __pyx_t_float_complex; - #endif -#else - typedef struct { float real, imag; } __pyx_t_float_complex; -#endif -static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float); - -/* Declarations.proto */ -#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) - #ifdef __cplusplus - typedef ::std::complex< double > __pyx_t_double_complex; - #else - typedef double _Complex __pyx_t_double_complex; - #endif -#else - typedef struct { double real, imag; } __pyx_t_double_complex; -#endif -static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double); - -/* Declarations.proto */ -#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) - #ifdef __cplusplus - typedef ::std::complex< long double > __pyx_t_long_double_complex; - #else - typedef long double _Complex __pyx_t_long_double_complex; - #endif -#else - typedef struct { long double real, imag; } __pyx_t_long_double_complex; -#endif -static CYTHON_INLINE __pyx_t_long_double_complex __pyx_t_long_double_complex_from_parts(long double, long double); - -/* #### Code section: type_declarations ### */ - -/*--- Type declarations ---*/ -#ifndef _ARRAYARRAY_H -struct arrayobject; -typedef struct arrayobject arrayobject; -#endif -struct __pyx_opt_args_7cpython_11contextvars_get_value; -struct __pyx_opt_args_7cpython_11contextvars_get_value_no_default; - -/* "cpython/contextvars.pxd":112 - * - * - * cdef inline object get_value(var, default_value=None): # <<<<<<<<<<<<<< - * """Return a new reference to the value of the context variable, - * or the default value of the context variable, - */ -struct __pyx_opt_args_7cpython_11contextvars_get_value { - int __pyx_n; - PyObject *default_value; -}; - -/* "cpython/contextvars.pxd":129 - * - * - * cdef inline object get_value_no_default(var, default_value=None): # <<<<<<<<<<<<<< - * """Return a new reference to the value of the context variable, - * or the provided default value if no such value was found. - */ -struct __pyx_opt_args_7cpython_11contextvars_get_value_no_default { - int __pyx_n; - PyObject *default_value; -}; - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1113 - * - * # Iterator API added in v1.6 - * ctypedef int (*NpyIter_IterNextFunc)(NpyIter* it) noexcept nogil # <<<<<<<<<<<<<< - * ctypedef void (*NpyIter_GetMultiIndexFunc)(NpyIter* it, npy_intp* outcoords) noexcept nogil - * - */ -typedef int (*__pyx_t_5numpy_NpyIter_IterNextFunc)(NpyIter *); - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1114 - * # Iterator API added in v1.6 - * ctypedef int (*NpyIter_IterNextFunc)(NpyIter* it) noexcept nogil - * ctypedef void (*NpyIter_GetMultiIndexFunc)(NpyIter* it, npy_intp* outcoords) noexcept nogil # <<<<<<<<<<<<<< - * - * cdef extern from "numpy/arrayobject.h": - */ -typedef void (*__pyx_t_5numpy_NpyIter_GetMultiIndexFunc)(NpyIter *, npy_intp *); - -/* "jcvi/assembly/chic.pyx":25 - * - * - * ctypedef np.int INT # <<<<<<<<<<<<<< - * DEF LIMIT = 10000000 - * DEF BB = 12 - */ -typedef PyObject *__pyx_t_4jcvi_8assembly_4chic_INT; -/* #### Code section: utility_code_proto ### */ - -/* --- Runtime support code (head) --- */ -/* Refnanny.proto */ -#ifndef CYTHON_REFNANNY - #define CYTHON_REFNANNY 0 -#endif -#if CYTHON_REFNANNY - typedef struct { - void (*INCREF)(void*, PyObject*, Py_ssize_t); - void (*DECREF)(void*, PyObject*, Py_ssize_t); - void (*GOTREF)(void*, PyObject*, Py_ssize_t); - void (*GIVEREF)(void*, PyObject*, Py_ssize_t); - void* (*SetupContext)(const char*, Py_ssize_t, const char*); - void (*FinishContext)(void**); - } __Pyx_RefNannyAPIStruct; - static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; - static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); - #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; -#ifdef WITH_THREAD - #define __Pyx_RefNannySetupContext(name, acquire_gil)\ - if (acquire_gil) {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ - PyGILState_Release(__pyx_gilstate_save);\ - } else {\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ - } - #define __Pyx_RefNannyFinishContextNogil() {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __Pyx_RefNannyFinishContext();\ - PyGILState_Release(__pyx_gilstate_save);\ - } -#else - #define __Pyx_RefNannySetupContext(name, acquire_gil)\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__)) - #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext() -#endif - #define __Pyx_RefNannyFinishContextNogil() {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __Pyx_RefNannyFinishContext();\ - PyGILState_Release(__pyx_gilstate_save);\ - } - #define __Pyx_RefNannyFinishContext()\ - __Pyx_RefNanny->FinishContext(&__pyx_refnanny) - #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_XINCREF(r) do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0) - #define __Pyx_XDECREF(r) do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0) - #define __Pyx_XGOTREF(r) do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0) - #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0) -#else - #define __Pyx_RefNannyDeclarations - #define __Pyx_RefNannySetupContext(name, acquire_gil) - #define __Pyx_RefNannyFinishContextNogil() - #define __Pyx_RefNannyFinishContext() - #define __Pyx_INCREF(r) Py_INCREF(r) - #define __Pyx_DECREF(r) Py_DECREF(r) - #define __Pyx_GOTREF(r) - #define __Pyx_GIVEREF(r) - #define __Pyx_XINCREF(r) Py_XINCREF(r) - #define __Pyx_XDECREF(r) Py_XDECREF(r) - #define __Pyx_XGOTREF(r) - #define __Pyx_XGIVEREF(r) -#endif -#define __Pyx_Py_XDECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; Py_XDECREF(tmp);\ - } while (0) -#define __Pyx_XDECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; __Pyx_XDECREF(tmp);\ - } while (0) -#define __Pyx_DECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; __Pyx_DECREF(tmp);\ - } while (0) -#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) -#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) - -/* PyErrExceptionMatches.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) -static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); -#else -#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) -#endif - -/* PyThreadStateGet.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; -#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; -#if PY_VERSION_HEX >= 0x030C00A6 -#define __Pyx_PyErr_Occurred() (__pyx_tstate->current_exception != NULL) -#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL) -#else -#define __Pyx_PyErr_Occurred() (__pyx_tstate->curexc_type != NULL) -#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->curexc_type) -#endif -#else -#define __Pyx_PyThreadState_declare -#define __Pyx_PyThreadState_assign -#define __Pyx_PyErr_Occurred() (PyErr_Occurred() != NULL) -#define __Pyx_PyErr_CurrentExceptionType() PyErr_Occurred() -#endif - -/* PyErrFetchRestore.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) -#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) -#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) -#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) -#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); -static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6 -#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) -#else -#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) -#endif -#else -#define __Pyx_PyErr_Clear() PyErr_Clear() -#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) -#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) -#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) -#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) -#endif - -/* PyObjectGetAttrStr.proto */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name); -#else -#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) -#endif - -/* PyObjectGetAttrStrNoError.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name); - -/* GetBuiltinName.proto */ -static PyObject *__Pyx_GetBuiltinName(PyObject *name); - -/* GetTopmostException.proto */ -#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE -static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate); -#endif - -/* SaveResetException.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); -#else -#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb) -#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb) -#endif - -/* GetException.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb) -static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#else -static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb); -#endif - -/* PyObjectCall.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); -#else -#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) -#endif - -/* RaiseException.proto */ -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); - -/* TupleAndListFromArray.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n); -static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n); -#endif - -/* IncludeStringH.proto */ -#include - -/* BytesEquals.proto */ -static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); - -/* UnicodeEquals.proto */ -static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); - -/* fastcall.proto */ -#if CYTHON_AVOID_BORROWED_REFS - #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i) -#elif CYTHON_ASSUME_SAFE_MACROS - #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i) -#else - #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i) -#endif -#if CYTHON_AVOID_BORROWED_REFS - #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg) - #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg) -#else - #define __Pyx_Arg_NewRef_VARARGS(arg) arg - #define __Pyx_Arg_XDECREF_VARARGS(arg) -#endif -#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds) -#define __Pyx_KwValues_VARARGS(args, nargs) NULL -#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s) -#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw) -#if CYTHON_METH_FASTCALL - #define __Pyx_Arg_FASTCALL(args, i) args[i] - #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds) - #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs)) - static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s); -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 - CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues); - #else - #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw) - #endif - #define __Pyx_Arg_NewRef_FASTCALL(arg) arg /* no-op, __Pyx_Arg_FASTCALL is direct and this needs - to have the same reference counting */ - #define __Pyx_Arg_XDECREF_FASTCALL(arg) -#else - #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS - #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS - #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS - #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS - #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS - #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg) - #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg) -#endif -#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS -#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start) -#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start) -#else -#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop) -#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop) -#endif - -/* RaiseDoubleKeywords.proto */ -static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); - -/* ParseKeywords.proto */ -static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues, - PyObject **argnames[], - PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, - const char* function_name); - -/* RaiseArgTupleInvalid.proto */ -static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, - Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); - -/* ArgTypeTest.proto */ -#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ - ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\ - __Pyx__ArgTypeTest(obj, type, name, exact)) -static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); - -/* IsLittleEndian.proto */ -static CYTHON_INLINE int __Pyx_Is_Little_Endian(void); - -/* BufferFormatCheck.proto */ -static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts); -static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, - __Pyx_BufFmt_StackElem* stack, - __Pyx_TypeInfo* type); - -/* BufferGetAndValidate.proto */ -#define __Pyx_GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)\ - ((obj == Py_None || obj == NULL) ?\ - (__Pyx_ZeroBuffer(buf), 0) :\ - __Pyx__GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)) -static int __Pyx__GetBufferAndValidate(Py_buffer* buf, PyObject* obj, - __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack); -static void __Pyx_ZeroBuffer(Py_buffer* buf); -static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info); -static Py_ssize_t __Pyx_minusones[] = { -1, -1, -1, -1, -1, -1, -1, -1 }; -static Py_ssize_t __Pyx_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - -/* GetItemInt.proto */ -#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\ - (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\ - __Pyx_GetItemInt_Generic(o, to_py_func(i)))) -#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ - (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, - int wraparound, int boundscheck); -#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ - (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, - int wraparound, int boundscheck); -static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, - int is_list, int wraparound, int boundscheck); - -/* PyFunctionFastCall.proto */ -#if CYTHON_FAST_PYCALL -#if !CYTHON_VECTORCALL -#define __Pyx_PyFunction_FastCall(func, args, nargs)\ - __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) -static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs); -#endif -#define __Pyx_BUILD_ASSERT_EXPR(cond)\ - (sizeof(char [1 - 2*!(cond)]) - 1) -#ifndef Py_MEMBER_SIZE -#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) -#endif -#if !CYTHON_VECTORCALL -#if PY_VERSION_HEX >= 0x03080000 - #include "frameobject.h" -#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif - #define __Pxy_PyFrame_Initialize_Offsets() - #define __Pyx_PyFrame_GetLocalsplus(frame) ((frame)->f_localsplus) -#else - static size_t __pyx_pyframe_localsplus_offset = 0; - #include "frameobject.h" - #define __Pxy_PyFrame_Initialize_Offsets()\ - ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\ - (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus))) - #define __Pyx_PyFrame_GetLocalsplus(frame)\ - (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset)) -#endif -#endif -#endif - -/* PyObjectCallMethO.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); -#endif - -/* PyObjectFastCall.proto */ -#define __Pyx_PyObject_FastCall(func, args, nargs) __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL) -static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs); - -/* PyObjectCallOneArg.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); - -/* ObjectGetItem.proto */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject *key); -#else -#define __Pyx_PyObject_GetItem(obj, key) PyObject_GetItem(obj, key) -#endif - -/* ExtTypeTest.proto */ -static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); - -/* PyDictVersioning.proto */ -#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS -#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1) -#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag) -#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\ - (version_var) = __PYX_GET_DICT_VERSION(dict);\ - (cache_var) = (value); -#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\ - static PY_UINT64_T __pyx_dict_version = 0;\ - static PyObject *__pyx_dict_cached_value = NULL;\ - if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\ - (VAR) = __pyx_dict_cached_value;\ - } else {\ - (VAR) = __pyx_dict_cached_value = (LOOKUP);\ - __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\ - }\ -} -static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj); -static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj); -static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version); -#else -#define __PYX_GET_DICT_VERSION(dict) (0) -#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var) -#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP); -#endif - -/* GetModuleGlobalName.proto */ -#if CYTHON_USE_DICT_VERSIONS -#define __Pyx_GetModuleGlobalName(var, name) do {\ - static PY_UINT64_T __pyx_dict_version = 0;\ - static PyObject *__pyx_dict_cached_value = NULL;\ - (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\ - (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\ - __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ -} while(0) -#define __Pyx_GetModuleGlobalNameUncached(var, name) do {\ - PY_UINT64_T __pyx_dict_version;\ - PyObject *__pyx_dict_cached_value;\ - (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ -} while(0) -static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value); -#else -#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name) -#define __Pyx_GetModuleGlobalNameUncached(var, name) (var) = __Pyx__GetModuleGlobalName(name) -static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name); -#endif - -#define __Pyx_BufPtrFull1d(type, buf, i0, s0, o0) (type)(__Pyx_BufPtrFull1d_imp(buf, i0, s0, o0)) -static CYTHON_INLINE void* __Pyx_BufPtrFull1d_imp(void* buf, Py_ssize_t i0, Py_ssize_t s0, Py_ssize_t o0); -#define __Pyx_BufPtrStrided2d(type, buf, i0, s0, i1, s1) (type)((char*)buf + i0 * s0 + i1 * s1) -#define __Pyx_BufPtrStrided1d(type, buf, i0, s0) (type)((char*)buf + i0 * s0) -#define __Pyx_BufPtrStrided3d(type, buf, i0, s0, i1, s1, i2, s2) (type)((char*)buf + i0 * s0 + i1 * s1 + i2 * s2) -/* TypeImport.proto */ -#ifndef __PYX_HAVE_RT_ImportType_proto_3_0_11 -#define __PYX_HAVE_RT_ImportType_proto_3_0_11 -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -#include -#endif -#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L -#define __PYX_GET_STRUCT_ALIGNMENT_3_0_11(s) alignof(s) -#else -#define __PYX_GET_STRUCT_ALIGNMENT_3_0_11(s) sizeof(void*) -#endif -enum __Pyx_ImportType_CheckSize_3_0_11 { - __Pyx_ImportType_CheckSize_Error_3_0_11 = 0, - __Pyx_ImportType_CheckSize_Warn_3_0_11 = 1, - __Pyx_ImportType_CheckSize_Ignore_3_0_11 = 2 -}; -static PyTypeObject *__Pyx_ImportType_3_0_11(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_0_11 check_size); -#endif - -/* Import.proto */ -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); - -/* ImportDottedModule.proto */ -static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple); -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple); -#endif - -/* ImportDottedModuleRelFirst.proto */ -static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple); - -/* IncludeStructmemberH.proto */ -#include - -/* FixUpExtensionType.proto */ -#if CYTHON_USE_TYPE_SPECS -static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type); -#endif - -/* FetchSharedCythonModule.proto */ -static PyObject *__Pyx_FetchSharedCythonABIModule(void); - -/* FetchCommonType.proto */ -#if !CYTHON_USE_TYPE_SPECS -static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type); -#else -static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases); -#endif - -/* PyMethodNew.proto */ -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { - PyObject *typesModule=NULL, *methodType=NULL, *result=NULL; - CYTHON_UNUSED_VAR(typ); - if (!self) - return __Pyx_NewRef(func); - typesModule = PyImport_ImportModule("types"); - if (!typesModule) return NULL; - methodType = PyObject_GetAttrString(typesModule, "MethodType"); - Py_DECREF(typesModule); - if (!methodType) return NULL; - result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL); - Py_DECREF(methodType); - return result; -} -#elif PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { - CYTHON_UNUSED_VAR(typ); - if (!self) - return __Pyx_NewRef(func); - return PyMethod_New(func, self); -} -#else - #define __Pyx_PyMethod_New PyMethod_New -#endif - -/* PyVectorcallFastCallDict.proto */ -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw); -#endif - -/* CythonFunctionShared.proto */ -#define __Pyx_CyFunction_USED -#define __Pyx_CYFUNCTION_STATICMETHOD 0x01 -#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02 -#define __Pyx_CYFUNCTION_CCLASS 0x04 -#define __Pyx_CYFUNCTION_COROUTINE 0x08 -#define __Pyx_CyFunction_GetClosure(f)\ - (((__pyx_CyFunctionObject *) (f))->func_closure) -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_CyFunction_GetClassObj(f)\ - (((__pyx_CyFunctionObject *) (f))->func_classobj) -#else - #define __Pyx_CyFunction_GetClassObj(f)\ - ((PyObject*) ((PyCMethodObject *) (f))->mm_class) -#endif -#define __Pyx_CyFunction_SetClassObj(f, classobj)\ - __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj)) -#define __Pyx_CyFunction_Defaults(type, f)\ - ((type *)(((__pyx_CyFunctionObject *) (f))->defaults)) -#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\ - ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g) -typedef struct { -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject_HEAD - PyObject *func; -#elif PY_VERSION_HEX < 0x030900B1 - PyCFunctionObject func; -#else - PyCMethodObject func; -#endif -#if CYTHON_BACKPORT_VECTORCALL - __pyx_vectorcallfunc func_vectorcall; -#endif -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API - PyObject *func_weakreflist; -#endif - PyObject *func_dict; - PyObject *func_name; - PyObject *func_qualname; - PyObject *func_doc; - PyObject *func_globals; - PyObject *func_code; - PyObject *func_closure; -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - PyObject *func_classobj; -#endif - void *defaults; - int defaults_pyobjects; - size_t defaults_size; - int flags; - PyObject *defaults_tuple; - PyObject *defaults_kwdict; - PyObject *(*defaults_getter)(PyObject *); - PyObject *func_annotations; - PyObject *func_is_coroutine; -} __pyx_CyFunctionObject; -#undef __Pyx_CyOrPyCFunction_Check -#define __Pyx_CyFunction_Check(obj) __Pyx_TypeCheck(obj, __pyx_CyFunctionType) -#define __Pyx_CyOrPyCFunction_Check(obj) __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type) -#define __Pyx_CyFunction_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_CyFunctionType) -static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc); -#undef __Pyx_IsSameCFunction -#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCyOrCFunction(func, cfunc) -static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml, - int flags, PyObject* qualname, - PyObject *closure, - PyObject *module, PyObject *globals, - PyObject* code); -static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj); -static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m, - size_t size, - int pyobjects); -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m, - PyObject *tuple); -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m, - PyObject *dict); -static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, - PyObject *dict); -static int __pyx_CyFunction_init(PyObject *module); -#if CYTHON_METH_FASTCALL -static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -#if CYTHON_BACKPORT_VECTORCALL -#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall) -#else -#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall) -#endif -#endif - -/* CythonFunction.proto */ -static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, - int flags, PyObject* qualname, - PyObject *closure, - PyObject *module, PyObject *globals, - PyObject* code); - -/* CLineInTraceback.proto */ -#ifdef CYTHON_CLINE_IN_TRACEBACK -#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) -#else -static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); -#endif - -/* CodeObjectCache.proto */ -#if !CYTHON_COMPILING_IN_LIMITED_API -typedef struct { - PyCodeObject* code_object; - int code_line; -} __Pyx_CodeObjectCacheEntry; -struct __Pyx_CodeObjectCache { - int count; - int max_count; - __Pyx_CodeObjectCacheEntry* entries; -}; -static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; -static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); -static PyCodeObject *__pyx_find_code_object(int code_line); -static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); -#endif - -/* AddTraceback.proto */ -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename); - -/* ArrayAPI.proto */ -#ifndef _ARRAYARRAY_H -#define _ARRAYARRAY_H -typedef struct arraydescr { - int typecode; - int itemsize; - PyObject * (*getitem)(struct arrayobject *, Py_ssize_t); - int (*setitem)(struct arrayobject *, Py_ssize_t, PyObject *); -#if PY_MAJOR_VERSION >= 3 - char *formats; -#endif -} arraydescr; -struct arrayobject { - PyObject_HEAD - Py_ssize_t ob_size; - union { - char *ob_item; - float *as_floats; - double *as_doubles; - int *as_ints; - unsigned int *as_uints; - unsigned char *as_uchars; - signed char *as_schars; - char *as_chars; - unsigned long *as_ulongs; - long *as_longs; -#if PY_MAJOR_VERSION >= 3 - unsigned long long *as_ulonglongs; - long long *as_longlongs; -#endif - short *as_shorts; - unsigned short *as_ushorts; - Py_UNICODE *as_pyunicodes; - void *as_voidptr; - } data; - Py_ssize_t allocated; - struct arraydescr *ob_descr; - PyObject *weakreflist; -#if PY_MAJOR_VERSION >= 3 - int ob_exports; -#endif -}; -#ifndef NO_NEWARRAY_INLINE -static CYTHON_INLINE PyObject * newarrayobject(PyTypeObject *type, Py_ssize_t size, - struct arraydescr *descr) { - arrayobject *op; - size_t nbytes; - if (size < 0) { - PyErr_BadInternalCall(); - return NULL; - } - nbytes = size * descr->itemsize; - if (nbytes / descr->itemsize != (size_t)size) { - return PyErr_NoMemory(); - } - op = (arrayobject *) type->tp_alloc(type, 0); - if (op == NULL) { - return NULL; - } - op->ob_descr = descr; - op->allocated = size; - op->weakreflist = NULL; - __Pyx_SET_SIZE(op, size); - if (size <= 0) { - op->data.ob_item = NULL; - } - else { - op->data.ob_item = PyMem_NEW(char, nbytes); - if (op->data.ob_item == NULL) { - Py_DECREF(op); - return PyErr_NoMemory(); - } - } - return (PyObject *) op; -} -#else -PyObject* newarrayobject(PyTypeObject *type, Py_ssize_t size, - struct arraydescr *descr); -#endif -static CYTHON_INLINE int resize(arrayobject *self, Py_ssize_t n) { - void *items = (void*) self->data.ob_item; - PyMem_Resize(items, char, (size_t)(n * self->ob_descr->itemsize)); - if (items == NULL) { - PyErr_NoMemory(); - return -1; - } - self->data.ob_item = (char*) items; - __Pyx_SET_SIZE(self, n); - self->allocated = n; - return 0; -} -static CYTHON_INLINE int resize_smart(arrayobject *self, Py_ssize_t n) { - void *items = (void*) self->data.ob_item; - Py_ssize_t newsize; - if (n < self->allocated && n*4 > self->allocated) { - __Pyx_SET_SIZE(self, n); - return 0; - } - newsize = n + (n / 2) + 1; - if (newsize <= n) { - PyErr_NoMemory(); - return -1; - } - PyMem_Resize(items, char, (size_t)(newsize * self->ob_descr->itemsize)); - if (items == NULL) { - PyErr_NoMemory(); - return -1; - } - self->data.ob_item = (char*) items; - __Pyx_SET_SIZE(self, n); - self->allocated = newsize; - return 0; -} -#endif - -/* BufferStructDeclare.proto */ -typedef struct { - Py_ssize_t shape, strides, suboffsets; -} __Pyx_Buf_DimInfo; -typedef struct { - size_t refcount; - Py_buffer pybuffer; -} __Pyx_Buffer; -typedef struct { - __Pyx_Buffer *rcbuffer; - char *data; - __Pyx_Buf_DimInfo diminfo[8]; -} __Pyx_LocalBuf_ND; - -#if PY_MAJOR_VERSION < 3 - static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags); - static void __Pyx_ReleaseBuffer(Py_buffer *view); -#else - #define __Pyx_GetBuffer PyObject_GetBuffer - #define __Pyx_ReleaseBuffer PyBuffer_Release -#endif - - -/* GCCDiagnostics.proto */ -#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) -#define __Pyx_HAS_GCC_DIAGNOSTIC -#endif - -/* RealImag.proto */ -#if CYTHON_CCOMPLEX - #ifdef __cplusplus - #define __Pyx_CREAL(z) ((z).real()) - #define __Pyx_CIMAG(z) ((z).imag()) - #else - #define __Pyx_CREAL(z) (__real__(z)) - #define __Pyx_CIMAG(z) (__imag__(z)) - #endif -#else - #define __Pyx_CREAL(z) ((z).real) - #define __Pyx_CIMAG(z) ((z).imag) -#endif -#if defined(__cplusplus) && CYTHON_CCOMPLEX\ - && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103) - #define __Pyx_SET_CREAL(z,x) ((z).real(x)) - #define __Pyx_SET_CIMAG(z,y) ((z).imag(y)) -#else - #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x) - #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y) -#endif - -/* Arithmetic.proto */ -#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) - #define __Pyx_c_eq_float(a, b) ((a)==(b)) - #define __Pyx_c_sum_float(a, b) ((a)+(b)) - #define __Pyx_c_diff_float(a, b) ((a)-(b)) - #define __Pyx_c_prod_float(a, b) ((a)*(b)) - #define __Pyx_c_quot_float(a, b) ((a)/(b)) - #define __Pyx_c_neg_float(a) (-(a)) - #ifdef __cplusplus - #define __Pyx_c_is_zero_float(z) ((z)==(float)0) - #define __Pyx_c_conj_float(z) (::std::conj(z)) - #if 1 - #define __Pyx_c_abs_float(z) (::std::abs(z)) - #define __Pyx_c_pow_float(a, b) (::std::pow(a, b)) - #endif - #else - #define __Pyx_c_is_zero_float(z) ((z)==0) - #define __Pyx_c_conj_float(z) (conjf(z)) - #if 1 - #define __Pyx_c_abs_float(z) (cabsf(z)) - #define __Pyx_c_pow_float(a, b) (cpowf(a, b)) - #endif - #endif -#else - static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex); - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex); - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex); - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex); - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex); - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex); - static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex); - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex); - #if 1 - static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex); - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex); - #endif -#endif - -/* Arithmetic.proto */ -#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) - #define __Pyx_c_eq_double(a, b) ((a)==(b)) - #define __Pyx_c_sum_double(a, b) ((a)+(b)) - #define __Pyx_c_diff_double(a, b) ((a)-(b)) - #define __Pyx_c_prod_double(a, b) ((a)*(b)) - #define __Pyx_c_quot_double(a, b) ((a)/(b)) - #define __Pyx_c_neg_double(a) (-(a)) - #ifdef __cplusplus - #define __Pyx_c_is_zero_double(z) ((z)==(double)0) - #define __Pyx_c_conj_double(z) (::std::conj(z)) - #if 1 - #define __Pyx_c_abs_double(z) (::std::abs(z)) - #define __Pyx_c_pow_double(a, b) (::std::pow(a, b)) - #endif - #else - #define __Pyx_c_is_zero_double(z) ((z)==0) - #define __Pyx_c_conj_double(z) (conj(z)) - #if 1 - #define __Pyx_c_abs_double(z) (cabs(z)) - #define __Pyx_c_pow_double(a, b) (cpow(a, b)) - #endif - #endif -#else - static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex); - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex); - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex); - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex); - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex); - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex); - static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex); - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex); - #if 1 - static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex); - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex); - #endif -#endif - -/* Arithmetic.proto */ -#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) - #define __Pyx_c_eq_long__double(a, b) ((a)==(b)) - #define __Pyx_c_sum_long__double(a, b) ((a)+(b)) - #define __Pyx_c_diff_long__double(a, b) ((a)-(b)) - #define __Pyx_c_prod_long__double(a, b) ((a)*(b)) - #define __Pyx_c_quot_long__double(a, b) ((a)/(b)) - #define __Pyx_c_neg_long__double(a) (-(a)) - #ifdef __cplusplus - #define __Pyx_c_is_zero_long__double(z) ((z)==(long double)0) - #define __Pyx_c_conj_long__double(z) (::std::conj(z)) - #if 1 - #define __Pyx_c_abs_long__double(z) (::std::abs(z)) - #define __Pyx_c_pow_long__double(a, b) (::std::pow(a, b)) - #endif - #else - #define __Pyx_c_is_zero_long__double(z) ((z)==0) - #define __Pyx_c_conj_long__double(z) (conjl(z)) - #if 1 - #define __Pyx_c_abs_long__double(z) (cabsl(z)) - #define __Pyx_c_pow_long__double(a, b) (cpowl(a, b)) - #endif - #endif -#else - static CYTHON_INLINE int __Pyx_c_eq_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_sum_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_diff_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_prod_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_quot_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_neg_long__double(__pyx_t_long_double_complex); - static CYTHON_INLINE int __Pyx_c_is_zero_long__double(__pyx_t_long_double_complex); - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_conj_long__double(__pyx_t_long_double_complex); - #if 1 - static CYTHON_INLINE long double __Pyx_c_abs_long__double(__pyx_t_long_double_complex); - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_pow_long__double(__pyx_t_long_double_complex, __pyx_t_long_double_complex); - #endif -#endif - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); - -/* CIntFromPy.proto */ -static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); - -/* FormatTypeName.proto */ -#if CYTHON_COMPILING_IN_LIMITED_API -typedef PyObject *__Pyx_TypeName; -#define __Pyx_FMT_TYPENAME "%U" -static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp); -#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj) -#else -typedef const char *__Pyx_TypeName; -#define __Pyx_FMT_TYPENAME "%.200s" -#define __Pyx_PyType_GetName(tp) ((tp)->tp_name) -#define __Pyx_DECREF_TypeName(obj) -#endif - -/* CIntFromPy.proto */ -static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); - -/* FastTypeChecks.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) -#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2) -static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); -static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b); -static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); -static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); -#else -#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) -#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2)) -#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) -#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) -#endif -#define __Pyx_PyErr_ExceptionMatches2(err1, err2) __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2) -#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) - -/* CheckBinaryVersion.proto */ -static unsigned long __Pyx_get_runtime_version(void); -static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer); - -/* InitStrings.proto */ -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); - -/* #### Code section: module_declarations ### */ -static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_Descr *__pyx_v_self); /* proto*/ -static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_9alignment_alignment(PyArray_Descr *__pyx_v_self); /* proto*/ -static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_6fields_fields(PyArray_Descr *__pyx_v_self); /* proto*/ -static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr *__pyx_v_self); /* proto*/ -static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarray(PyArray_Descr *__pyx_v_self); /* proto*/ -static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr *__pyx_v_self); /* proto*/ -static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiIterObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArrayObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE double __pyx_f_7cpython_7complex_7complex_4real_real(PyComplexObject *__pyx_v_self); /* proto*/ -static CYTHON_INLINE double __pyx_f_7cpython_7complex_7complex_4imag_imag(PyComplexObject *__pyx_v_self); /* proto*/ - -/* Module declarations from "libc.string" */ - -/* Module declarations from "libc.stdio" */ - -/* Module declarations from "__builtin__" */ - -/* Module declarations from "cpython.type" */ - -/* Module declarations from "cpython.version" */ - -/* Module declarations from "cpython.exc" */ - -/* Module declarations from "cpython.module" */ - -/* Module declarations from "cpython.mem" */ - -/* Module declarations from "cpython.tuple" */ - -/* Module declarations from "cpython.list" */ - -/* Module declarations from "cpython.sequence" */ - -/* Module declarations from "cpython.mapping" */ - -/* Module declarations from "cpython.iterator" */ - -/* Module declarations from "cpython.number" */ - -/* Module declarations from "cpython.int" */ - -/* Module declarations from "__builtin__" */ - -/* Module declarations from "cpython.bool" */ - -/* Module declarations from "cpython.long" */ - -/* Module declarations from "cpython.float" */ - -/* Module declarations from "__builtin__" */ - -/* Module declarations from "cpython.complex" */ - -/* Module declarations from "cpython.string" */ - -/* Module declarations from "libc.stddef" */ - -/* Module declarations from "cpython.unicode" */ - -/* Module declarations from "cpython.pyport" */ - -/* Module declarations from "cpython.dict" */ - -/* Module declarations from "cpython.instance" */ - -/* Module declarations from "cpython.function" */ - -/* Module declarations from "cpython.method" */ - -/* Module declarations from "cpython.weakref" */ - -/* Module declarations from "cpython.getargs" */ - -/* Module declarations from "cpython.pythread" */ - -/* Module declarations from "cpython.pystate" */ - -/* Module declarations from "cpython.cobject" */ - -/* Module declarations from "cpython.oldbuffer" */ - -/* Module declarations from "cpython.set" */ - -/* Module declarations from "cpython.buffer" */ - -/* Module declarations from "cpython.bytes" */ - -/* Module declarations from "cpython.pycapsule" */ - -/* Module declarations from "cpython.contextvars" */ - -/* Module declarations from "cpython" */ - -/* Module declarations from "cpython.object" */ - -/* Module declarations from "cpython.ref" */ - -/* Module declarations from "numpy" */ - -/* Module declarations from "numpy" */ - -/* Module declarations from "cython" */ - -/* Module declarations from "array" */ - -/* Module declarations from "cpython.array" */ -static CYTHON_INLINE int __pyx_f_7cpython_5array_extend_buffer(arrayobject *, char *, Py_ssize_t); /*proto*/ - -/* Module declarations from "jcvi.assembly.chic" */ -static int *__pyx_v_4jcvi_8assembly_4chic_GR; -/* #### Code section: typeinfo ### */ -static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, __PYX_IS_UNSIGNED(int) ? 'U' : 'I', __PYX_IS_UNSIGNED(int), 0 }; -static __Pyx_TypeInfo __Pyx_TypeInfo_object = { "INT", NULL, sizeof(__pyx_t_4jcvi_8assembly_4chic_INT), { 0 }, 0, 'O', 0, 0 }; -/* #### Code section: before_global_var ### */ -#define __Pyx_MODULE_NAME "jcvi.assembly.chic" -extern int __pyx_module_is_main_jcvi__assembly__chic; -int __pyx_module_is_main_jcvi__assembly__chic = 0; - -/* Implementation of "jcvi.assembly.chic" */ -/* #### Code section: global_var ### */ -static PyObject *__pyx_builtin_range; -static PyObject *__pyx_builtin_ImportError; -static PyObject *__pyx_builtin_MemoryError; -/* #### Code section: string_decls ### */ -static const char __pyx_k_a[] = "a"; -static const char __pyx_k_b[] = "b"; -static const char __pyx_k_c[] = "c"; -static const char __pyx_k_s[] = "s"; -static const char __pyx_k__3[] = "*"; -static const char __pyx_k_ia[] = "ia"; -static const char __pyx_k_ib[] = "ib"; -static const char __pyx_k_ic[] = "ic"; -static const char __pyx_k_np[] = "np"; -static const char __pyx_k__11[] = "?"; -static const char __pyx_k_dist[] = "dist"; -static const char __pyx_k_main[] = "__main__"; -static const char __pyx_k_name[] = "__name__"; -static const char __pyx_k_size[] = "size"; -static const char __pyx_k_spec[] = "__spec__"; -static const char __pyx_k_test[] = "__test__"; -static const char __pyx_k_tour[] = "tour"; -static const char __pyx_k_array[] = "array"; -static const char __pyx_k_links[] = "links"; -static const char __pyx_k_numpy[] = "numpy"; -static const char __pyx_k_range[] = "range"; -static const char __pyx_k_cumsum[] = "cumsum"; -static const char __pyx_k_import[] = "__import__"; -static const char __pyx_k_tour_M[] = "tour_M"; -static const char __pyx_k_tour_P[] = "tour_P"; -static const char __pyx_k_tour_Q[] = "tour_Q"; -static const char __pyx_k_sizes_oo[] = "sizes_oo"; -static const char __pyx_k_sizes_cum[] = "sizes_cum"; -static const char __pyx_k_tour_sizes[] = "tour_sizes"; -static const char __pyx_k_ImportError[] = "ImportError"; -static const char __pyx_k_MemoryError[] = "MemoryError"; -static const char __pyx_k_initializing[] = "_initializing"; -static const char __pyx_k_is_coroutine[] = "_is_coroutine"; -static const char __pyx_k_class_getitem[] = "__class_getitem__"; -static const char __pyx_k_score_evaluate_M[] = "score_evaluate_M"; -static const char __pyx_k_score_evaluate_P[] = "score_evaluate_P"; -static const char __pyx_k_score_evaluate_Q[] = "score_evaluate_Q"; -static const char __pyx_k_asyncio_coroutines[] = "asyncio.coroutines"; -static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; -static const char __pyx_k_jcvi_assembly_chic[] = "jcvi.assembly.chic"; -static const char __pyx_k_src_jcvi_assembly_chic_pyx[] = "src/jcvi/assembly/chic.pyx"; -static const char __pyx_k_Cythonized_version_of_score_eva[] = "\nCythonized version of score_evaluate() in hic.py.\n\nSupport three versions with different objective functions:\n- score_evaluate_M: distance is defined as the distance between mid-points\n between contigs. Maximize Sum(n_links / distance).\n- score_evaluate_P: distance is defined as the sizes of interleaving contigs\n plus the harmonic mean of all link distances. Maximize Sum(n_links / distance).\n- score_evaluate_Q: distance is defined as the sizes of interleaving contigs\n plus the actual link distances. Maximize Sum(1 / distance) for all links.\n For performance consideration, we actually use a histogram to approximate\n all link distances. See golden_array() in hic for details.\n"; -static const char __pyx_k_numpy__core_multiarray_failed_to[] = "numpy._core.multiarray failed to import"; -static const char __pyx_k_numpy__core_umath_failed_to_impo[] = "numpy._core.umath failed to import"; -/* #### Code section: decls ### */ -static int __pyx_pf_7cpython_5array_5array___getbuffer__(arrayobject *__pyx_v_self, Py_buffer *__pyx_v_info, CYTHON_UNUSED int __pyx_v_flags); /* proto */ -static void __pyx_pf_7cpython_5array_5array_2__releasebuffer__(CYTHON_UNUSED arrayobject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ -static PyObject *__pyx_pf_4jcvi_8assembly_4chic_score_evaluate_M(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_M); /* proto */ -static PyObject *__pyx_pf_4jcvi_8assembly_4chic_2score_evaluate_P(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_P); /* proto */ -static PyObject *__pyx_pf_4jcvi_8assembly_4chic_4score_evaluate_Q(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_Q); /* proto */ -/* #### Code section: late_includes ### */ -/* #### Code section: module_state ### */ -typedef struct { - PyObject *__pyx_d; - PyObject *__pyx_b; - PyObject *__pyx_cython_runtime; - PyObject *__pyx_empty_tuple; - PyObject *__pyx_empty_bytes; - PyObject *__pyx_empty_unicode; - #ifdef __Pyx_CyFunction_USED - PyTypeObject *__pyx_CyFunctionType; - #endif - #ifdef __Pyx_FusedFunction_USED - PyTypeObject *__pyx_FusedFunctionType; - #endif - #ifdef __Pyx_Generator_USED - PyTypeObject *__pyx_GeneratorType; - #endif - #ifdef __Pyx_IterableCoroutine_USED - PyTypeObject *__pyx_IterableCoroutineType; - #endif - #ifdef __Pyx_Coroutine_USED - PyTypeObject *__pyx_CoroutineAwaitType; - #endif - #ifdef __Pyx_Coroutine_USED - PyTypeObject *__pyx_CoroutineType; - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - PyTypeObject *__pyx_ptype_7cpython_4type_type; - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - PyTypeObject *__pyx_ptype_7cpython_4bool_bool; - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - PyTypeObject *__pyx_ptype_7cpython_7complex_complex; - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - PyTypeObject *__pyx_ptype_5numpy_dtype; - PyTypeObject *__pyx_ptype_5numpy_flatiter; - PyTypeObject *__pyx_ptype_5numpy_broadcast; - PyTypeObject *__pyx_ptype_5numpy_ndarray; - PyTypeObject *__pyx_ptype_5numpy_generic; - PyTypeObject *__pyx_ptype_5numpy_number; - PyTypeObject *__pyx_ptype_5numpy_integer; - PyTypeObject *__pyx_ptype_5numpy_signedinteger; - PyTypeObject *__pyx_ptype_5numpy_unsignedinteger; - PyTypeObject *__pyx_ptype_5numpy_inexact; - PyTypeObject *__pyx_ptype_5numpy_floating; - PyTypeObject *__pyx_ptype_5numpy_complexfloating; - PyTypeObject *__pyx_ptype_5numpy_flexible; - PyTypeObject *__pyx_ptype_5numpy_character; - PyTypeObject *__pyx_ptype_5numpy_ufunc; - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - PyTypeObject *__pyx_ptype_7cpython_5array_array; - #if CYTHON_USE_MODULE_STATE - #endif - PyObject *__pyx_n_s_ImportError; - PyObject *__pyx_n_s_MemoryError; - PyObject *__pyx_n_s__11; - PyObject *__pyx_n_s__3; - PyObject *__pyx_n_s_a; - PyObject *__pyx_n_s_array; - PyObject *__pyx_n_s_asyncio_coroutines; - PyObject *__pyx_n_s_b; - PyObject *__pyx_n_s_c; - PyObject *__pyx_n_s_class_getitem; - PyObject *__pyx_n_s_cline_in_traceback; - PyObject *__pyx_n_s_cumsum; - PyObject *__pyx_n_s_dist; - PyObject *__pyx_n_s_ia; - PyObject *__pyx_n_s_ib; - PyObject *__pyx_n_s_ic; - PyObject *__pyx_n_s_import; - PyObject *__pyx_n_s_initializing; - PyObject *__pyx_n_s_is_coroutine; - PyObject *__pyx_n_s_jcvi_assembly_chic; - PyObject *__pyx_n_s_links; - PyObject *__pyx_n_s_main; - PyObject *__pyx_n_s_name; - PyObject *__pyx_n_s_np; - PyObject *__pyx_n_s_numpy; - PyObject *__pyx_kp_s_numpy__core_multiarray_failed_to; - PyObject *__pyx_kp_s_numpy__core_umath_failed_to_impo; - PyObject *__pyx_n_s_range; - PyObject *__pyx_n_s_s; - PyObject *__pyx_n_s_score_evaluate_M; - PyObject *__pyx_n_s_score_evaluate_P; - PyObject *__pyx_n_s_score_evaluate_Q; - PyObject *__pyx_n_s_size; - PyObject *__pyx_n_s_sizes_cum; - PyObject *__pyx_n_s_sizes_oo; - PyObject *__pyx_n_s_spec; - PyObject *__pyx_kp_s_src_jcvi_assembly_chic_pyx; - PyObject *__pyx_n_s_test; - PyObject *__pyx_n_s_tour; - PyObject *__pyx_n_s_tour_M; - PyObject *__pyx_n_s_tour_P; - PyObject *__pyx_n_s_tour_Q; - PyObject *__pyx_n_s_tour_sizes; - PyObject *__pyx_int_2; - PyObject *__pyx_int_neg_1; - PyObject *__pyx_tuple_; - PyObject *__pyx_tuple__2; - PyObject *__pyx_tuple__4; - PyObject *__pyx_tuple__6; - PyObject *__pyx_tuple__7; - PyObject *__pyx_tuple__9; - PyObject *__pyx_codeobj__5; - PyObject *__pyx_codeobj__8; - PyObject *__pyx_codeobj__10; -} __pyx_mstate; - -#if CYTHON_USE_MODULE_STATE -#ifdef __cplusplus -namespace { - extern struct PyModuleDef __pyx_moduledef; -} /* anonymous namespace */ -#else -static struct PyModuleDef __pyx_moduledef; -#endif - -#define __pyx_mstate(o) ((__pyx_mstate *)__Pyx_PyModule_GetState(o)) - -#define __pyx_mstate_global (__pyx_mstate(PyState_FindModule(&__pyx_moduledef))) - -#define __pyx_m (PyState_FindModule(&__pyx_moduledef)) -#else -static __pyx_mstate __pyx_mstate_global_static = -#ifdef __cplusplus - {}; -#else - {0}; -#endif -static __pyx_mstate *__pyx_mstate_global = &__pyx_mstate_global_static; -#endif -/* #### Code section: module_state_clear ### */ -#if CYTHON_USE_MODULE_STATE -static int __pyx_m_clear(PyObject *m) { - __pyx_mstate *clear_module_state = __pyx_mstate(m); - if (!clear_module_state) return 0; - Py_CLEAR(clear_module_state->__pyx_d); - Py_CLEAR(clear_module_state->__pyx_b); - Py_CLEAR(clear_module_state->__pyx_cython_runtime); - Py_CLEAR(clear_module_state->__pyx_empty_tuple); - Py_CLEAR(clear_module_state->__pyx_empty_bytes); - Py_CLEAR(clear_module_state->__pyx_empty_unicode); - #ifdef __Pyx_CyFunction_USED - Py_CLEAR(clear_module_state->__pyx_CyFunctionType); - #endif - #ifdef __Pyx_FusedFunction_USED - Py_CLEAR(clear_module_state->__pyx_FusedFunctionType); - #endif - Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_4type_type); - Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_4bool_bool); - Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_7complex_complex); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_dtype); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_flatiter); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_broadcast); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_ndarray); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_generic); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_number); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_integer); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_signedinteger); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_unsignedinteger); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_inexact); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_floating); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_complexfloating); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_flexible); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_character); - Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_ufunc); - Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_5array_array); - Py_CLEAR(clear_module_state->__pyx_n_s_ImportError); - Py_CLEAR(clear_module_state->__pyx_n_s_MemoryError); - Py_CLEAR(clear_module_state->__pyx_n_s__11); - Py_CLEAR(clear_module_state->__pyx_n_s__3); - Py_CLEAR(clear_module_state->__pyx_n_s_a); - Py_CLEAR(clear_module_state->__pyx_n_s_array); - Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines); - Py_CLEAR(clear_module_state->__pyx_n_s_b); - Py_CLEAR(clear_module_state->__pyx_n_s_c); - Py_CLEAR(clear_module_state->__pyx_n_s_class_getitem); - Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback); - Py_CLEAR(clear_module_state->__pyx_n_s_cumsum); - Py_CLEAR(clear_module_state->__pyx_n_s_dist); - Py_CLEAR(clear_module_state->__pyx_n_s_ia); - Py_CLEAR(clear_module_state->__pyx_n_s_ib); - Py_CLEAR(clear_module_state->__pyx_n_s_ic); - Py_CLEAR(clear_module_state->__pyx_n_s_import); - Py_CLEAR(clear_module_state->__pyx_n_s_initializing); - Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine); - Py_CLEAR(clear_module_state->__pyx_n_s_jcvi_assembly_chic); - Py_CLEAR(clear_module_state->__pyx_n_s_links); - Py_CLEAR(clear_module_state->__pyx_n_s_main); - Py_CLEAR(clear_module_state->__pyx_n_s_name); - Py_CLEAR(clear_module_state->__pyx_n_s_np); - Py_CLEAR(clear_module_state->__pyx_n_s_numpy); - Py_CLEAR(clear_module_state->__pyx_kp_s_numpy__core_multiarray_failed_to); - Py_CLEAR(clear_module_state->__pyx_kp_s_numpy__core_umath_failed_to_impo); - Py_CLEAR(clear_module_state->__pyx_n_s_range); - Py_CLEAR(clear_module_state->__pyx_n_s_s); - Py_CLEAR(clear_module_state->__pyx_n_s_score_evaluate_M); - Py_CLEAR(clear_module_state->__pyx_n_s_score_evaluate_P); - Py_CLEAR(clear_module_state->__pyx_n_s_score_evaluate_Q); - Py_CLEAR(clear_module_state->__pyx_n_s_size); - Py_CLEAR(clear_module_state->__pyx_n_s_sizes_cum); - Py_CLEAR(clear_module_state->__pyx_n_s_sizes_oo); - Py_CLEAR(clear_module_state->__pyx_n_s_spec); - Py_CLEAR(clear_module_state->__pyx_kp_s_src_jcvi_assembly_chic_pyx); - Py_CLEAR(clear_module_state->__pyx_n_s_test); - Py_CLEAR(clear_module_state->__pyx_n_s_tour); - Py_CLEAR(clear_module_state->__pyx_n_s_tour_M); - Py_CLEAR(clear_module_state->__pyx_n_s_tour_P); - Py_CLEAR(clear_module_state->__pyx_n_s_tour_Q); - Py_CLEAR(clear_module_state->__pyx_n_s_tour_sizes); - Py_CLEAR(clear_module_state->__pyx_int_2); - Py_CLEAR(clear_module_state->__pyx_int_neg_1); - Py_CLEAR(clear_module_state->__pyx_tuple_); - Py_CLEAR(clear_module_state->__pyx_tuple__2); - Py_CLEAR(clear_module_state->__pyx_tuple__4); - Py_CLEAR(clear_module_state->__pyx_tuple__6); - Py_CLEAR(clear_module_state->__pyx_tuple__7); - Py_CLEAR(clear_module_state->__pyx_tuple__9); - Py_CLEAR(clear_module_state->__pyx_codeobj__5); - Py_CLEAR(clear_module_state->__pyx_codeobj__8); - Py_CLEAR(clear_module_state->__pyx_codeobj__10); - return 0; -} -#endif -/* #### Code section: module_state_traverse ### */ -#if CYTHON_USE_MODULE_STATE -static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) { - __pyx_mstate *traverse_module_state = __pyx_mstate(m); - if (!traverse_module_state) return 0; - Py_VISIT(traverse_module_state->__pyx_d); - Py_VISIT(traverse_module_state->__pyx_b); - Py_VISIT(traverse_module_state->__pyx_cython_runtime); - Py_VISIT(traverse_module_state->__pyx_empty_tuple); - Py_VISIT(traverse_module_state->__pyx_empty_bytes); - Py_VISIT(traverse_module_state->__pyx_empty_unicode); - #ifdef __Pyx_CyFunction_USED - Py_VISIT(traverse_module_state->__pyx_CyFunctionType); - #endif - #ifdef __Pyx_FusedFunction_USED - Py_VISIT(traverse_module_state->__pyx_FusedFunctionType); - #endif - Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_4type_type); - Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_4bool_bool); - Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_7complex_complex); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_dtype); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_flatiter); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_broadcast); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_ndarray); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_generic); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_number); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_integer); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_signedinteger); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_unsignedinteger); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_inexact); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_floating); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_complexfloating); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_flexible); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_character); - Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_ufunc); - Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_5array_array); - Py_VISIT(traverse_module_state->__pyx_n_s_ImportError); - Py_VISIT(traverse_module_state->__pyx_n_s_MemoryError); - Py_VISIT(traverse_module_state->__pyx_n_s__11); - Py_VISIT(traverse_module_state->__pyx_n_s__3); - Py_VISIT(traverse_module_state->__pyx_n_s_a); - Py_VISIT(traverse_module_state->__pyx_n_s_array); - Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines); - Py_VISIT(traverse_module_state->__pyx_n_s_b); - Py_VISIT(traverse_module_state->__pyx_n_s_c); - Py_VISIT(traverse_module_state->__pyx_n_s_class_getitem); - Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback); - Py_VISIT(traverse_module_state->__pyx_n_s_cumsum); - Py_VISIT(traverse_module_state->__pyx_n_s_dist); - Py_VISIT(traverse_module_state->__pyx_n_s_ia); - Py_VISIT(traverse_module_state->__pyx_n_s_ib); - Py_VISIT(traverse_module_state->__pyx_n_s_ic); - Py_VISIT(traverse_module_state->__pyx_n_s_import); - Py_VISIT(traverse_module_state->__pyx_n_s_initializing); - Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine); - Py_VISIT(traverse_module_state->__pyx_n_s_jcvi_assembly_chic); - Py_VISIT(traverse_module_state->__pyx_n_s_links); - Py_VISIT(traverse_module_state->__pyx_n_s_main); - Py_VISIT(traverse_module_state->__pyx_n_s_name); - Py_VISIT(traverse_module_state->__pyx_n_s_np); - Py_VISIT(traverse_module_state->__pyx_n_s_numpy); - Py_VISIT(traverse_module_state->__pyx_kp_s_numpy__core_multiarray_failed_to); - Py_VISIT(traverse_module_state->__pyx_kp_s_numpy__core_umath_failed_to_impo); - Py_VISIT(traverse_module_state->__pyx_n_s_range); - Py_VISIT(traverse_module_state->__pyx_n_s_s); - Py_VISIT(traverse_module_state->__pyx_n_s_score_evaluate_M); - Py_VISIT(traverse_module_state->__pyx_n_s_score_evaluate_P); - Py_VISIT(traverse_module_state->__pyx_n_s_score_evaluate_Q); - Py_VISIT(traverse_module_state->__pyx_n_s_size); - Py_VISIT(traverse_module_state->__pyx_n_s_sizes_cum); - Py_VISIT(traverse_module_state->__pyx_n_s_sizes_oo); - Py_VISIT(traverse_module_state->__pyx_n_s_spec); - Py_VISIT(traverse_module_state->__pyx_kp_s_src_jcvi_assembly_chic_pyx); - Py_VISIT(traverse_module_state->__pyx_n_s_test); - Py_VISIT(traverse_module_state->__pyx_n_s_tour); - Py_VISIT(traverse_module_state->__pyx_n_s_tour_M); - Py_VISIT(traverse_module_state->__pyx_n_s_tour_P); - Py_VISIT(traverse_module_state->__pyx_n_s_tour_Q); - Py_VISIT(traverse_module_state->__pyx_n_s_tour_sizes); - Py_VISIT(traverse_module_state->__pyx_int_2); - Py_VISIT(traverse_module_state->__pyx_int_neg_1); - Py_VISIT(traverse_module_state->__pyx_tuple_); - Py_VISIT(traverse_module_state->__pyx_tuple__2); - Py_VISIT(traverse_module_state->__pyx_tuple__4); - Py_VISIT(traverse_module_state->__pyx_tuple__6); - Py_VISIT(traverse_module_state->__pyx_tuple__7); - Py_VISIT(traverse_module_state->__pyx_tuple__9); - Py_VISIT(traverse_module_state->__pyx_codeobj__5); - Py_VISIT(traverse_module_state->__pyx_codeobj__8); - Py_VISIT(traverse_module_state->__pyx_codeobj__10); - return 0; -} -#endif -/* #### Code section: module_state_defines ### */ -#define __pyx_d __pyx_mstate_global->__pyx_d -#define __pyx_b __pyx_mstate_global->__pyx_b -#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime -#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple -#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes -#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode -#ifdef __Pyx_CyFunction_USED -#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType -#endif -#ifdef __Pyx_FusedFunction_USED -#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType -#endif -#ifdef __Pyx_Generator_USED -#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType -#endif -#ifdef __Pyx_IterableCoroutine_USED -#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType -#endif -#ifdef __Pyx_Coroutine_USED -#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType -#endif -#ifdef __Pyx_Coroutine_USED -#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#define __pyx_ptype_7cpython_4type_type __pyx_mstate_global->__pyx_ptype_7cpython_4type_type -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#define __pyx_ptype_7cpython_4bool_bool __pyx_mstate_global->__pyx_ptype_7cpython_4bool_bool -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#define __pyx_ptype_7cpython_7complex_complex __pyx_mstate_global->__pyx_ptype_7cpython_7complex_complex -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#define __pyx_ptype_5numpy_dtype __pyx_mstate_global->__pyx_ptype_5numpy_dtype -#define __pyx_ptype_5numpy_flatiter __pyx_mstate_global->__pyx_ptype_5numpy_flatiter -#define __pyx_ptype_5numpy_broadcast __pyx_mstate_global->__pyx_ptype_5numpy_broadcast -#define __pyx_ptype_5numpy_ndarray __pyx_mstate_global->__pyx_ptype_5numpy_ndarray -#define __pyx_ptype_5numpy_generic __pyx_mstate_global->__pyx_ptype_5numpy_generic -#define __pyx_ptype_5numpy_number __pyx_mstate_global->__pyx_ptype_5numpy_number -#define __pyx_ptype_5numpy_integer __pyx_mstate_global->__pyx_ptype_5numpy_integer -#define __pyx_ptype_5numpy_signedinteger __pyx_mstate_global->__pyx_ptype_5numpy_signedinteger -#define __pyx_ptype_5numpy_unsignedinteger __pyx_mstate_global->__pyx_ptype_5numpy_unsignedinteger -#define __pyx_ptype_5numpy_inexact __pyx_mstate_global->__pyx_ptype_5numpy_inexact -#define __pyx_ptype_5numpy_floating __pyx_mstate_global->__pyx_ptype_5numpy_floating -#define __pyx_ptype_5numpy_complexfloating __pyx_mstate_global->__pyx_ptype_5numpy_complexfloating -#define __pyx_ptype_5numpy_flexible __pyx_mstate_global->__pyx_ptype_5numpy_flexible -#define __pyx_ptype_5numpy_character __pyx_mstate_global->__pyx_ptype_5numpy_character -#define __pyx_ptype_5numpy_ufunc __pyx_mstate_global->__pyx_ptype_5numpy_ufunc -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#define __pyx_ptype_7cpython_5array_array __pyx_mstate_global->__pyx_ptype_7cpython_5array_array -#if CYTHON_USE_MODULE_STATE -#endif -#define __pyx_n_s_ImportError __pyx_mstate_global->__pyx_n_s_ImportError -#define __pyx_n_s_MemoryError __pyx_mstate_global->__pyx_n_s_MemoryError -#define __pyx_n_s__11 __pyx_mstate_global->__pyx_n_s__11 -#define __pyx_n_s__3 __pyx_mstate_global->__pyx_n_s__3 -#define __pyx_n_s_a __pyx_mstate_global->__pyx_n_s_a -#define __pyx_n_s_array __pyx_mstate_global->__pyx_n_s_array -#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines -#define __pyx_n_s_b __pyx_mstate_global->__pyx_n_s_b -#define __pyx_n_s_c __pyx_mstate_global->__pyx_n_s_c -#define __pyx_n_s_class_getitem __pyx_mstate_global->__pyx_n_s_class_getitem -#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback -#define __pyx_n_s_cumsum __pyx_mstate_global->__pyx_n_s_cumsum -#define __pyx_n_s_dist __pyx_mstate_global->__pyx_n_s_dist -#define __pyx_n_s_ia __pyx_mstate_global->__pyx_n_s_ia -#define __pyx_n_s_ib __pyx_mstate_global->__pyx_n_s_ib -#define __pyx_n_s_ic __pyx_mstate_global->__pyx_n_s_ic -#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import -#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing -#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine -#define __pyx_n_s_jcvi_assembly_chic __pyx_mstate_global->__pyx_n_s_jcvi_assembly_chic -#define __pyx_n_s_links __pyx_mstate_global->__pyx_n_s_links -#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main -#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name -#define __pyx_n_s_np __pyx_mstate_global->__pyx_n_s_np -#define __pyx_n_s_numpy __pyx_mstate_global->__pyx_n_s_numpy -#define __pyx_kp_s_numpy__core_multiarray_failed_to __pyx_mstate_global->__pyx_kp_s_numpy__core_multiarray_failed_to -#define __pyx_kp_s_numpy__core_umath_failed_to_impo __pyx_mstate_global->__pyx_kp_s_numpy__core_umath_failed_to_impo -#define __pyx_n_s_range __pyx_mstate_global->__pyx_n_s_range -#define __pyx_n_s_s __pyx_mstate_global->__pyx_n_s_s -#define __pyx_n_s_score_evaluate_M __pyx_mstate_global->__pyx_n_s_score_evaluate_M -#define __pyx_n_s_score_evaluate_P __pyx_mstate_global->__pyx_n_s_score_evaluate_P -#define __pyx_n_s_score_evaluate_Q __pyx_mstate_global->__pyx_n_s_score_evaluate_Q -#define __pyx_n_s_size __pyx_mstate_global->__pyx_n_s_size -#define __pyx_n_s_sizes_cum __pyx_mstate_global->__pyx_n_s_sizes_cum -#define __pyx_n_s_sizes_oo __pyx_mstate_global->__pyx_n_s_sizes_oo -#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec -#define __pyx_kp_s_src_jcvi_assembly_chic_pyx __pyx_mstate_global->__pyx_kp_s_src_jcvi_assembly_chic_pyx -#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test -#define __pyx_n_s_tour __pyx_mstate_global->__pyx_n_s_tour -#define __pyx_n_s_tour_M __pyx_mstate_global->__pyx_n_s_tour_M -#define __pyx_n_s_tour_P __pyx_mstate_global->__pyx_n_s_tour_P -#define __pyx_n_s_tour_Q __pyx_mstate_global->__pyx_n_s_tour_Q -#define __pyx_n_s_tour_sizes __pyx_mstate_global->__pyx_n_s_tour_sizes -#define __pyx_int_2 __pyx_mstate_global->__pyx_int_2 -#define __pyx_int_neg_1 __pyx_mstate_global->__pyx_int_neg_1 -#define __pyx_tuple_ __pyx_mstate_global->__pyx_tuple_ -#define __pyx_tuple__2 __pyx_mstate_global->__pyx_tuple__2 -#define __pyx_tuple__4 __pyx_mstate_global->__pyx_tuple__4 -#define __pyx_tuple__6 __pyx_mstate_global->__pyx_tuple__6 -#define __pyx_tuple__7 __pyx_mstate_global->__pyx_tuple__7 -#define __pyx_tuple__9 __pyx_mstate_global->__pyx_tuple__9 -#define __pyx_codeobj__5 __pyx_mstate_global->__pyx_codeobj__5 -#define __pyx_codeobj__8 __pyx_mstate_global->__pyx_codeobj__8 -#define __pyx_codeobj__10 __pyx_mstate_global->__pyx_codeobj__10 -/* #### Code section: module_code ### */ - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":287 - * - * @property - * cdef inline npy_intp itemsize(self) noexcept nogil: # <<<<<<<<<<<<<< - * return PyDataType_ELSIZE(self) - * - */ - -static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_Descr *__pyx_v_self) { - npy_intp __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":288 - * @property - * cdef inline npy_intp itemsize(self) noexcept nogil: - * return PyDataType_ELSIZE(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyDataType_ELSIZE(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":287 - * - * @property - * cdef inline npy_intp itemsize(self) noexcept nogil: # <<<<<<<<<<<<<< - * return PyDataType_ELSIZE(self) - * - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":291 - * - * @property - * cdef inline npy_intp alignment(self) noexcept nogil: # <<<<<<<<<<<<<< - * return PyDataType_ALIGNMENT(self) - * - */ - -static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_9alignment_alignment(PyArray_Descr *__pyx_v_self) { - npy_intp __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":292 - * @property - * cdef inline npy_intp alignment(self) noexcept nogil: - * return PyDataType_ALIGNMENT(self) # <<<<<<<<<<<<<< - * - * # Use fields/names with care as they may be NULL. You must check - */ - __pyx_r = PyDataType_ALIGNMENT(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":291 - * - * @property - * cdef inline npy_intp alignment(self) noexcept nogil: # <<<<<<<<<<<<<< - * return PyDataType_ALIGNMENT(self) - * - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":297 - * # for this using PyDataType_HASFIELDS. - * @property - * cdef inline object fields(self): # <<<<<<<<<<<<<< - * return PyDataType_FIELDS(self) - * - */ - -static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_6fields_fields(PyArray_Descr *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1; - __Pyx_RefNannySetupContext("fields", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":298 - * @property - * cdef inline object fields(self): - * return PyDataType_FIELDS(self) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyDataType_FIELDS(__pyx_v_self); - __Pyx_INCREF(((PyObject *)__pyx_t_1)); - __pyx_r = ((PyObject *)__pyx_t_1); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":297 - * # for this using PyDataType_HASFIELDS. - * @property - * cdef inline object fields(self): # <<<<<<<<<<<<<< - * return PyDataType_FIELDS(self) - * - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":301 - * - * @property - * cdef inline tuple names(self): # <<<<<<<<<<<<<< - * return PyDataType_NAMES(self) - * - */ - -static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1; - __Pyx_RefNannySetupContext("names", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":302 - * @property - * cdef inline tuple names(self): - * return PyDataType_NAMES(self) # <<<<<<<<<<<<<< - * - * # Use PyDataType_HASSUBARRAY to test whether this field is - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyDataType_NAMES(__pyx_v_self); - __Pyx_INCREF(((PyObject*)__pyx_t_1)); - __pyx_r = ((PyObject*)__pyx_t_1); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":301 - * - * @property - * cdef inline tuple names(self): # <<<<<<<<<<<<<< - * return PyDataType_NAMES(self) - * - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":308 - * # this field via the inline helper method PyDataType_SHAPE. - * @property - * cdef inline PyArray_ArrayDescr* subarray(self) noexcept nogil: # <<<<<<<<<<<<<< - * return PyDataType_SUBARRAY(self) - * - */ - -static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarray(PyArray_Descr *__pyx_v_self) { - PyArray_ArrayDescr *__pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":309 - * @property - * cdef inline PyArray_ArrayDescr* subarray(self) noexcept nogil: - * return PyDataType_SUBARRAY(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyDataType_SUBARRAY(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":308 - * # this field via the inline helper method PyDataType_SHAPE. - * @property - * cdef inline PyArray_ArrayDescr* subarray(self) noexcept nogil: # <<<<<<<<<<<<<< - * return PyDataType_SUBARRAY(self) - * - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":312 - * - * @property - * cdef inline npy_uint64 flags(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The data types flags.""" - * return PyDataType_FLAGS(self) - */ - -static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr *__pyx_v_self) { - npy_uint64 __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":314 - * cdef inline npy_uint64 flags(self) noexcept nogil: - * """The data types flags.""" - * return PyDataType_FLAGS(self) # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = PyDataType_FLAGS(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":312 - * - * @property - * cdef inline npy_uint64 flags(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The data types flags.""" - * return PyDataType_FLAGS(self) - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":324 - * - * @property - * cdef inline int numiter(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The number of arrays that need to be broadcast to the same shape.""" - * return PyArray_MultiIter_NUMITER(self) - */ - -static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMultiIterObject *__pyx_v_self) { - int __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":326 - * cdef inline int numiter(self) noexcept nogil: - * """The number of arrays that need to be broadcast to the same shape.""" - * return PyArray_MultiIter_NUMITER(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyArray_MultiIter_NUMITER(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":324 - * - * @property - * cdef inline int numiter(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The number of arrays that need to be broadcast to the same shape.""" - * return PyArray_MultiIter_NUMITER(self) - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":329 - * - * @property - * cdef inline npy_intp size(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The total broadcasted size.""" - * return PyArray_MultiIter_SIZE(self) - */ - -static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiIterObject *__pyx_v_self) { - npy_intp __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":331 - * cdef inline npy_intp size(self) noexcept nogil: - * """The total broadcasted size.""" - * return PyArray_MultiIter_SIZE(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyArray_MultiIter_SIZE(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":329 - * - * @property - * cdef inline npy_intp size(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The total broadcasted size.""" - * return PyArray_MultiIter_SIZE(self) - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":334 - * - * @property - * cdef inline npy_intp index(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The current (1-d) index into the broadcasted result.""" - * return PyArray_MultiIter_INDEX(self) - */ - -static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMultiIterObject *__pyx_v_self) { - npy_intp __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":336 - * cdef inline npy_intp index(self) noexcept nogil: - * """The current (1-d) index into the broadcasted result.""" - * return PyArray_MultiIter_INDEX(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyArray_MultiIter_INDEX(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":334 - * - * @property - * cdef inline npy_intp index(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The current (1-d) index into the broadcasted result.""" - * return PyArray_MultiIter_INDEX(self) - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":339 - * - * @property - * cdef inline int nd(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The number of dimensions in the broadcasted result.""" - * return PyArray_MultiIter_NDIM(self) - */ - -static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject *__pyx_v_self) { - int __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":341 - * cdef inline int nd(self) noexcept nogil: - * """The number of dimensions in the broadcasted result.""" - * return PyArray_MultiIter_NDIM(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyArray_MultiIter_NDIM(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":339 - * - * @property - * cdef inline int nd(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The number of dimensions in the broadcasted result.""" - * return PyArray_MultiIter_NDIM(self) - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":344 - * - * @property - * cdef inline npy_intp* dimensions(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The shape of the broadcasted result.""" - * return PyArray_MultiIter_DIMS(self) - */ - -static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions(PyArrayMultiIterObject *__pyx_v_self) { - npy_intp *__pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":346 - * cdef inline npy_intp* dimensions(self) noexcept nogil: - * """The shape of the broadcasted result.""" - * return PyArray_MultiIter_DIMS(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyArray_MultiIter_DIMS(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":344 - * - * @property - * cdef inline npy_intp* dimensions(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The shape of the broadcasted result.""" - * return PyArray_MultiIter_DIMS(self) - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":349 - * - * @property - * cdef inline void** iters(self) noexcept nogil: # <<<<<<<<<<<<<< - * """An array of iterator objects that holds the iterators for the arrays to be broadcast together. - * On return, the iterators are adjusted for broadcasting.""" - */ - -static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiIterObject *__pyx_v_self) { - void **__pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":352 - * """An array of iterator objects that holds the iterators for the arrays to be broadcast together. - * On return, the iterators are adjusted for broadcasting.""" - * return PyArray_MultiIter_ITERS(self) # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = PyArray_MultiIter_ITERS(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":349 - * - * @property - * cdef inline void** iters(self) noexcept nogil: # <<<<<<<<<<<<<< - * """An array of iterator objects that holds the iterators for the arrays to be broadcast together. - * On return, the iterators are adjusted for broadcasting.""" - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":367 - * - * @property - * cdef inline PyObject* base(self) noexcept nogil: # <<<<<<<<<<<<<< - * """Returns a borrowed reference to the object owning the data/memory. - * """ - */ - -static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject *__pyx_v_self) { - PyObject *__pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":370 - * """Returns a borrowed reference to the object owning the data/memory. - * """ - * return PyArray_BASE(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyArray_BASE(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":367 - * - * @property - * cdef inline PyObject* base(self) noexcept nogil: # <<<<<<<<<<<<<< - * """Returns a borrowed reference to the object owning the data/memory. - * """ - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":373 - * - * @property - * cdef inline dtype descr(self): # <<<<<<<<<<<<<< - * """Returns an owned reference to the dtype of the array. - * """ - */ - -static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArrayObject *__pyx_v_self) { - PyArray_Descr *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyArray_Descr *__pyx_t_1; - __Pyx_RefNannySetupContext("descr", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":376 - * """Returns an owned reference to the dtype of the array. - * """ - * return PyArray_DESCR(self) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF((PyObject *)__pyx_r); - __pyx_t_1 = PyArray_DESCR(__pyx_v_self); - __Pyx_INCREF((PyObject *)((PyArray_Descr *)__pyx_t_1)); - __pyx_r = ((PyArray_Descr *)__pyx_t_1); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":373 - * - * @property - * cdef inline dtype descr(self): # <<<<<<<<<<<<<< - * """Returns an owned reference to the dtype of the array. - * """ - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF((PyObject *)__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":379 - * - * @property - * cdef inline int ndim(self) noexcept nogil: # <<<<<<<<<<<<<< - * """Returns the number of dimensions in the array. - * """ - */ - -static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx_v_self) { - int __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":382 - * """Returns the number of dimensions in the array. - * """ - * return PyArray_NDIM(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyArray_NDIM(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":379 - * - * @property - * cdef inline int ndim(self) noexcept nogil: # <<<<<<<<<<<<<< - * """Returns the number of dimensions in the array. - * """ - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":385 - * - * @property - * cdef inline npy_intp *shape(self) noexcept nogil: # <<<<<<<<<<<<<< - * """Returns a pointer to the dimensions/shape of the array. - * The number of elements matches the number of dimensions of the array (ndim). - */ - -static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObject *__pyx_v_self) { - npy_intp *__pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":390 - * Can return NULL for 0-dimensional arrays. - * """ - * return PyArray_DIMS(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyArray_DIMS(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":385 - * - * @property - * cdef inline npy_intp *shape(self) noexcept nogil: # <<<<<<<<<<<<<< - * """Returns a pointer to the dimensions/shape of the array. - * The number of elements matches the number of dimensions of the array (ndim). - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":393 - * - * @property - * cdef inline npy_intp *strides(self) noexcept nogil: # <<<<<<<<<<<<<< - * """Returns a pointer to the strides of the array. - * The number of elements matches the number of dimensions of the array (ndim). - */ - -static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayObject *__pyx_v_self) { - npy_intp *__pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":397 - * The number of elements matches the number of dimensions of the array (ndim). - * """ - * return PyArray_STRIDES(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyArray_STRIDES(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":393 - * - * @property - * cdef inline npy_intp *strides(self) noexcept nogil: # <<<<<<<<<<<<<< - * """Returns a pointer to the strides of the array. - * The number of elements matches the number of dimensions of the array (ndim). - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":400 - * - * @property - * cdef inline npy_intp size(self) noexcept nogil: # <<<<<<<<<<<<<< - * """Returns the total size (in number of elements) of the array. - * """ - */ - -static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject *__pyx_v_self) { - npy_intp __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":403 - * """Returns the total size (in number of elements) of the array. - * """ - * return PyArray_SIZE(self) # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = PyArray_SIZE(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":400 - * - * @property - * cdef inline npy_intp size(self) noexcept nogil: # <<<<<<<<<<<<<< - * """Returns the total size (in number of elements) of the array. - * """ - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":406 - * - * @property - * cdef inline char* data(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The pointer to the data buffer as a char*. - * This is provided for legacy reasons to avoid direct struct field access. - */ - -static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__pyx_v_self) { - char *__pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":412 - * of `PyArray_DATA()` instead, which returns a 'void*'. - * """ - * return PyArray_BYTES(self) # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = PyArray_BYTES(__pyx_v_self); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":406 - * - * @property - * cdef inline char* data(self) noexcept nogil: # <<<<<<<<<<<<<< - * """The pointer to the data buffer as a char*. - * This is provided for legacy reasons to avoid direct struct field access. - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":824 - * ctypedef long double complex clongdouble_t - * - * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< - * return PyArray_MultiIterNew(1, a) - * - */ - -static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":825 - * - * cdef inline object PyArray_MultiIterNew1(a): - * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< - * - * cdef inline object PyArray_MultiIterNew2(a, b): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 825, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":824 - * ctypedef long double complex clongdouble_t - * - * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< - * return PyArray_MultiIterNew(1, a) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":827 - * return PyArray_MultiIterNew(1, a) - * - * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< - * return PyArray_MultiIterNew(2, a, b) - * - */ - -static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":828 - * - * cdef inline object PyArray_MultiIterNew2(a, b): - * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< - * - * cdef inline object PyArray_MultiIterNew3(a, b, c): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 828, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":827 - * return PyArray_MultiIterNew(1, a) - * - * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< - * return PyArray_MultiIterNew(2, a, b) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":830 - * return PyArray_MultiIterNew(2, a, b) - * - * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< - * return PyArray_MultiIterNew(3, a, b, c) - * - */ - -static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":831 - * - * cdef inline object PyArray_MultiIterNew3(a, b, c): - * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< - * - * cdef inline object PyArray_MultiIterNew4(a, b, c, d): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 831, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":830 - * return PyArray_MultiIterNew(2, a, b) - * - * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< - * return PyArray_MultiIterNew(3, a, b, c) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":833 - * return PyArray_MultiIterNew(3, a, b, c) - * - * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< - * return PyArray_MultiIterNew(4, a, b, c, d) - * - */ - -static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":834 - * - * cdef inline object PyArray_MultiIterNew4(a, b, c, d): - * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< - * - * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 834, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":833 - * return PyArray_MultiIterNew(3, a, b, c) - * - * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< - * return PyArray_MultiIterNew(4, a, b, c, d) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":836 - * return PyArray_MultiIterNew(4, a, b, c, d) - * - * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< - * return PyArray_MultiIterNew(5, a, b, c, d, e) - * - */ - -static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":837 - * - * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): - * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< - * - * cdef inline tuple PyDataType_SHAPE(dtype d): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 837, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":836 - * return PyArray_MultiIterNew(4, a, b, c, d) - * - * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< - * return PyArray_MultiIterNew(5, a, b, c, d, e) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":839 - * return PyArray_MultiIterNew(5, a, b, c, d, e) - * - * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< - * if PyDataType_HASSUBARRAY(d): - * return d.subarray.shape - */ - -static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2; - __Pyx_RefNannySetupContext("PyDataType_SHAPE", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":840 - * - * cdef inline tuple PyDataType_SHAPE(dtype d): - * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< - * return d.subarray.shape - * else: - */ - __pyx_t_1 = PyDataType_HASSUBARRAY(__pyx_v_d); - if (__pyx_t_1) { - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":841 - * cdef inline tuple PyDataType_SHAPE(dtype d): - * if PyDataType_HASSUBARRAY(d): - * return d.subarray.shape # <<<<<<<<<<<<<< - * else: - * return () - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __pyx_f_5numpy_5dtype_8subarray_subarray(__pyx_v_d)->shape; - __Pyx_INCREF(((PyObject*)__pyx_t_2)); - __pyx_r = ((PyObject*)__pyx_t_2); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":840 - * - * cdef inline tuple PyDataType_SHAPE(dtype d): - * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< - * return d.subarray.shape - * else: - */ - } - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":843 - * return d.subarray.shape - * else: - * return () # <<<<<<<<<<<<<< - * - * - */ - /*else*/ { - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_empty_tuple); - __pyx_r = __pyx_empty_tuple; - goto __pyx_L0; - } - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":839 - * return PyArray_MultiIterNew(5, a, b, c, d, e) - * - * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< - * if PyDataType_HASSUBARRAY(d): - * return d.subarray.shape - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1027 - * int _import_umath() except -1 - * - * cdef inline void set_array_base(ndarray arr, object base) except *: # <<<<<<<<<<<<<< - * Py_INCREF(base) # important to do this before stealing the reference below! - * PyArray_SetBaseObject(arr, base) - */ - -static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) { - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1028 - * - * cdef inline void set_array_base(ndarray arr, object base) except *: - * Py_INCREF(base) # important to do this before stealing the reference below! # <<<<<<<<<<<<<< - * PyArray_SetBaseObject(arr, base) - * - */ - Py_INCREF(__pyx_v_base); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1029 - * cdef inline void set_array_base(ndarray arr, object base) except *: - * Py_INCREF(base) # important to do this before stealing the reference below! - * PyArray_SetBaseObject(arr, base) # <<<<<<<<<<<<<< - * - * cdef inline object get_array_base(ndarray arr): - */ - __pyx_t_1 = PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(1, 1029, __pyx_L1_error) - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1027 - * int _import_umath() except -1 - * - * cdef inline void set_array_base(ndarray arr, object base) except *: # <<<<<<<<<<<<<< - * Py_INCREF(base) # important to do this before stealing the reference below! - * PyArray_SetBaseObject(arr, base) - */ - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("numpy.set_array_base", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_L0:; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1031 - * PyArray_SetBaseObject(arr, base) - * - * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< - * base = PyArray_BASE(arr) - * if base is NULL: - */ - -static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) { - PyObject *__pyx_v_base; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - __Pyx_RefNannySetupContext("get_array_base", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1032 - * - * cdef inline object get_array_base(ndarray arr): - * base = PyArray_BASE(arr) # <<<<<<<<<<<<<< - * if base is NULL: - * return None - */ - __pyx_v_base = PyArray_BASE(__pyx_v_arr); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1033 - * cdef inline object get_array_base(ndarray arr): - * base = PyArray_BASE(arr) - * if base is NULL: # <<<<<<<<<<<<<< - * return None - * return base - */ - __pyx_t_1 = (__pyx_v_base == NULL); - if (__pyx_t_1) { - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1034 - * base = PyArray_BASE(arr) - * if base is NULL: - * return None # <<<<<<<<<<<<<< - * return base - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1033 - * cdef inline object get_array_base(ndarray arr): - * base = PyArray_BASE(arr) - * if base is NULL: # <<<<<<<<<<<<<< - * return None - * return base - */ - } - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1035 - * if base is NULL: - * return None - * return base # <<<<<<<<<<<<<< - * - * # Versions of the import_* functions which are more suitable for - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(((PyObject *)__pyx_v_base)); - __pyx_r = ((PyObject *)__pyx_v_base); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1031 - * PyArray_SetBaseObject(arr, base) - * - * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< - * base = PyArray_BASE(arr) - * if base is NULL: - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1039 - * # Versions of the import_* functions which are more suitable for - * # Cython code. - * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< - * try: - * __pyx_import_array() - */ - -static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("import_array", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1040 - * # Cython code. - * cdef inline int import_array() except -1: - * try: # <<<<<<<<<<<<<< - * __pyx_import_array() - * except Exception: - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_3); - /*try:*/ { - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1041 - * cdef inline int import_array() except -1: - * try: - * __pyx_import_array() # <<<<<<<<<<<<<< - * except Exception: - * raise ImportError("numpy._core.multiarray failed to import") - */ - __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1041, __pyx_L3_error) - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1040 - * # Cython code. - * cdef inline int import_array() except -1: - * try: # <<<<<<<<<<<<<< - * __pyx_import_array() - * except Exception: - */ - } - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L8_try_end; - __pyx_L3_error:; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1042 - * try: - * __pyx_import_array() - * except Exception: # <<<<<<<<<<<<<< - * raise ImportError("numpy._core.multiarray failed to import") - * - */ - __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); - if (__pyx_t_4) { - __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1042, __pyx_L5_except_error) - __Pyx_XGOTREF(__pyx_t_5); - __Pyx_XGOTREF(__pyx_t_6); - __Pyx_XGOTREF(__pyx_t_7); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1043 - * __pyx_import_array() - * except Exception: - * raise ImportError("numpy._core.multiarray failed to import") # <<<<<<<<<<<<<< - * - * cdef inline int import_umath() except -1: - */ - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1043, __pyx_L5_except_error) - __Pyx_GOTREF(__pyx_t_8); - __Pyx_Raise(__pyx_t_8, 0, 0, 0); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __PYX_ERR(1, 1043, __pyx_L5_except_error) - } - goto __pyx_L5_except_error; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1040 - * # Cython code. - * cdef inline int import_array() except -1: - * try: # <<<<<<<<<<<<<< - * __pyx_import_array() - * except Exception: - */ - __pyx_L5_except_error:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L1_error; - __pyx_L8_try_end:; - } - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1039 - * # Versions of the import_* functions which are more suitable for - * # Cython code. - * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< - * try: - * __pyx_import_array() - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1045 - * raise ImportError("numpy._core.multiarray failed to import") - * - * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< - * try: - * _import_umath() - */ - -static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("import_umath", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1046 - * - * cdef inline int import_umath() except -1: - * try: # <<<<<<<<<<<<<< - * _import_umath() - * except Exception: - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_3); - /*try:*/ { - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1047 - * cdef inline int import_umath() except -1: - * try: - * _import_umath() # <<<<<<<<<<<<<< - * except Exception: - * raise ImportError("numpy._core.umath failed to import") - */ - __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1047, __pyx_L3_error) - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1046 - * - * cdef inline int import_umath() except -1: - * try: # <<<<<<<<<<<<<< - * _import_umath() - * except Exception: - */ - } - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L8_try_end; - __pyx_L3_error:; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1048 - * try: - * _import_umath() - * except Exception: # <<<<<<<<<<<<<< - * raise ImportError("numpy._core.umath failed to import") - * - */ - __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); - if (__pyx_t_4) { - __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1048, __pyx_L5_except_error) - __Pyx_XGOTREF(__pyx_t_5); - __Pyx_XGOTREF(__pyx_t_6); - __Pyx_XGOTREF(__pyx_t_7); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1049 - * _import_umath() - * except Exception: - * raise ImportError("numpy._core.umath failed to import") # <<<<<<<<<<<<<< - * - * cdef inline int import_ufunc() except -1: - */ - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1049, __pyx_L5_except_error) - __Pyx_GOTREF(__pyx_t_8); - __Pyx_Raise(__pyx_t_8, 0, 0, 0); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __PYX_ERR(1, 1049, __pyx_L5_except_error) - } - goto __pyx_L5_except_error; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1046 - * - * cdef inline int import_umath() except -1: - * try: # <<<<<<<<<<<<<< - * _import_umath() - * except Exception: - */ - __pyx_L5_except_error:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L1_error; - __pyx_L8_try_end:; - } - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1045 - * raise ImportError("numpy._core.multiarray failed to import") - * - * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< - * try: - * _import_umath() - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1051 - * raise ImportError("numpy._core.umath failed to import") - * - * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< - * try: - * _import_umath() - */ - -static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("import_ufunc", 1); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1052 - * - * cdef inline int import_ufunc() except -1: - * try: # <<<<<<<<<<<<<< - * _import_umath() - * except Exception: - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_3); - /*try:*/ { - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1053 - * cdef inline int import_ufunc() except -1: - * try: - * _import_umath() # <<<<<<<<<<<<<< - * except Exception: - * raise ImportError("numpy._core.umath failed to import") - */ - __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1053, __pyx_L3_error) - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1052 - * - * cdef inline int import_ufunc() except -1: - * try: # <<<<<<<<<<<<<< - * _import_umath() - * except Exception: - */ - } - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L8_try_end; - __pyx_L3_error:; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1054 - * try: - * _import_umath() - * except Exception: # <<<<<<<<<<<<<< - * raise ImportError("numpy._core.umath failed to import") - * - */ - __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); - if (__pyx_t_4) { - __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1054, __pyx_L5_except_error) - __Pyx_XGOTREF(__pyx_t_5); - __Pyx_XGOTREF(__pyx_t_6); - __Pyx_XGOTREF(__pyx_t_7); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1055 - * _import_umath() - * except Exception: - * raise ImportError("numpy._core.umath failed to import") # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1055, __pyx_L5_except_error) - __Pyx_GOTREF(__pyx_t_8); - __Pyx_Raise(__pyx_t_8, 0, 0, 0); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __PYX_ERR(1, 1055, __pyx_L5_except_error) - } - goto __pyx_L5_except_error; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1052 - * - * cdef inline int import_ufunc() except -1: - * try: # <<<<<<<<<<<<<< - * _import_umath() - * except Exception: - */ - __pyx_L5_except_error:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L1_error; - __pyx_L8_try_end:; - } - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1051 - * raise ImportError("numpy._core.umath failed to import") - * - * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< - * try: - * _import_umath() - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1058 - * - * - * cdef inline bint is_timedelta64_object(object obj) noexcept: # <<<<<<<<<<<<<< - * """ - * Cython equivalent of `isinstance(obj, np.timedelta64)` - */ - -static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_obj) { - int __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1070 - * bool - * """ - * return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyTimedeltaArrType_Type)); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1058 - * - * - * cdef inline bint is_timedelta64_object(object obj) noexcept: # <<<<<<<<<<<<<< - * """ - * Cython equivalent of `isinstance(obj, np.timedelta64)` - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1073 - * - * - * cdef inline bint is_datetime64_object(object obj) noexcept: # <<<<<<<<<<<<<< - * """ - * Cython equivalent of `isinstance(obj, np.datetime64)` - */ - -static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_obj) { - int __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1085 - * bool - * """ - * return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyDatetimeArrType_Type)); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1073 - * - * - * cdef inline bint is_datetime64_object(object obj) noexcept: # <<<<<<<<<<<<<< - * """ - * Cython equivalent of `isinstance(obj, np.datetime64)` - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1088 - * - * - * cdef inline npy_datetime get_datetime64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< - * """ - * returns the int64 value underlying scalar numpy datetime64 object - */ - -static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject *__pyx_v_obj) { - npy_datetime __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1095 - * also needed. That can be found using `get_datetime64_unit`. - * """ - * return (obj).obval # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = ((PyDatetimeScalarObject *)__pyx_v_obj)->obval; - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1088 - * - * - * cdef inline npy_datetime get_datetime64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< - * """ - * returns the int64 value underlying scalar numpy datetime64 object - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1098 - * - * - * cdef inline npy_timedelta get_timedelta64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< - * """ - * returns the int64 value underlying scalar numpy timedelta64 object - */ - -static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject *__pyx_v_obj) { - npy_timedelta __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1102 - * returns the int64 value underlying scalar numpy timedelta64 object - * """ - * return (obj).obval # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = ((PyTimedeltaScalarObject *)__pyx_v_obj)->obval; - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1098 - * - * - * cdef inline npy_timedelta get_timedelta64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< - * """ - * returns the int64 value underlying scalar numpy timedelta64 object - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1105 - * - * - * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) noexcept nogil: # <<<<<<<<<<<<<< - * """ - * returns the unit part of the dtype for a numpy datetime64 object. - */ - -static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObject *__pyx_v_obj) { - NPY_DATETIMEUNIT __pyx_r; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1109 - * returns the unit part of the dtype for a numpy datetime64 object. - * """ - * return (obj).obmeta.base # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = ((NPY_DATETIMEUNIT)((PyDatetimeScalarObject *)__pyx_v_obj)->obmeta.base); - goto __pyx_L0; - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1105 - * - * - * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) noexcept nogil: # <<<<<<<<<<<<<< - * """ - * returns the unit part of the dtype for a numpy datetime64 object. - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "cpython/complex.pxd":19 - * - * @property - * cdef inline double real(self) noexcept: # <<<<<<<<<<<<<< - * return self.cval.real - * - */ - -static CYTHON_INLINE double __pyx_f_7cpython_7complex_7complex_4real_real(PyComplexObject *__pyx_v_self) { - double __pyx_r; - - /* "cpython/complex.pxd":20 - * @property - * cdef inline double real(self) noexcept: - * return self.cval.real # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_r = __pyx_v_self->cval.real; - goto __pyx_L0; - - /* "cpython/complex.pxd":19 - * - * @property - * cdef inline double real(self) noexcept: # <<<<<<<<<<<<<< - * return self.cval.real - * - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "cpython/complex.pxd":23 - * - * @property - * cdef inline double imag(self) noexcept: # <<<<<<<<<<<<<< - * return self.cval.imag - * - */ - -static CYTHON_INLINE double __pyx_f_7cpython_7complex_7complex_4imag_imag(PyComplexObject *__pyx_v_self) { - double __pyx_r; - - /* "cpython/complex.pxd":24 - * @property - * cdef inline double imag(self) noexcept: - * return self.cval.imag # <<<<<<<<<<<<<< - * - * # PyTypeObject PyComplex_Type - */ - __pyx_r = __pyx_v_self->cval.imag; - goto __pyx_L0; - - /* "cpython/complex.pxd":23 - * - * @property - * cdef inline double imag(self) noexcept: # <<<<<<<<<<<<<< - * return self.cval.imag - * - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "cpython/contextvars.pxd":112 - * - * - * cdef inline object get_value(var, default_value=None): # <<<<<<<<<<<<<< - * """Return a new reference to the value of the context variable, - * or the default value of the context variable, - */ - -static CYTHON_INLINE PyObject *__pyx_f_7cpython_11contextvars_get_value(PyObject *__pyx_v_var, struct __pyx_opt_args_7cpython_11contextvars_get_value *__pyx_optional_args) { - PyObject *__pyx_v_default_value = ((PyObject *)Py_None); - PyObject *__pyx_v_value; - PyObject *__pyx_v_pyvalue = NULL; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("get_value", 1); - if (__pyx_optional_args) { - if (__pyx_optional_args->__pyx_n > 0) { - __pyx_v_default_value = __pyx_optional_args->default_value; - } - } - - /* "cpython/contextvars.pxd":117 - * or None if no such value or default was found. - * """ - * cdef PyObject *value = NULL # <<<<<<<<<<<<<< - * PyContextVar_Get(var, NULL, &value) - * if value is NULL: - */ - __pyx_v_value = NULL; - - /* "cpython/contextvars.pxd":118 - * """ - * cdef PyObject *value = NULL - * PyContextVar_Get(var, NULL, &value) # <<<<<<<<<<<<<< - * if value is NULL: - * # context variable does not have a default - */ - __pyx_t_1 = PyContextVar_Get(__pyx_v_var, NULL, (&__pyx_v_value)); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(2, 118, __pyx_L1_error) - - /* "cpython/contextvars.pxd":119 - * cdef PyObject *value = NULL - * PyContextVar_Get(var, NULL, &value) - * if value is NULL: # <<<<<<<<<<<<<< - * # context variable does not have a default - * pyvalue = default_value - */ - __pyx_t_2 = (__pyx_v_value == NULL); - if (__pyx_t_2) { - - /* "cpython/contextvars.pxd":121 - * if value is NULL: - * # context variable does not have a default - * pyvalue = default_value # <<<<<<<<<<<<<< - * else: - * # value or default value of context variable - */ - __Pyx_INCREF(__pyx_v_default_value); - __pyx_v_pyvalue = __pyx_v_default_value; - - /* "cpython/contextvars.pxd":119 - * cdef PyObject *value = NULL - * PyContextVar_Get(var, NULL, &value) - * if value is NULL: # <<<<<<<<<<<<<< - * # context variable does not have a default - * pyvalue = default_value - */ - goto __pyx_L3; - } - - /* "cpython/contextvars.pxd":124 - * else: - * # value or default value of context variable - * pyvalue = value # <<<<<<<<<<<<<< - * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' - * return pyvalue - */ - /*else*/ { - __pyx_t_3 = ((PyObject *)__pyx_v_value); - __Pyx_INCREF(__pyx_t_3); - __pyx_v_pyvalue = __pyx_t_3; - __pyx_t_3 = 0; - - /* "cpython/contextvars.pxd":125 - * # value or default value of context variable - * pyvalue = value - * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' # <<<<<<<<<<<<<< - * return pyvalue - * - */ - Py_XDECREF(__pyx_v_value); - } - __pyx_L3:; - - /* "cpython/contextvars.pxd":126 - * pyvalue = value - * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' - * return pyvalue # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_pyvalue); - __pyx_r = __pyx_v_pyvalue; - goto __pyx_L0; - - /* "cpython/contextvars.pxd":112 - * - * - * cdef inline object get_value(var, default_value=None): # <<<<<<<<<<<<<< - * """Return a new reference to the value of the context variable, - * or the default value of the context variable, - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("cpython.contextvars.get_value", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_pyvalue); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "cpython/contextvars.pxd":129 - * - * - * cdef inline object get_value_no_default(var, default_value=None): # <<<<<<<<<<<<<< - * """Return a new reference to the value of the context variable, - * or the provided default value if no such value was found. - */ - -static CYTHON_INLINE PyObject *__pyx_f_7cpython_11contextvars_get_value_no_default(PyObject *__pyx_v_var, struct __pyx_opt_args_7cpython_11contextvars_get_value_no_default *__pyx_optional_args) { - PyObject *__pyx_v_default_value = ((PyObject *)Py_None); - PyObject *__pyx_v_value; - PyObject *__pyx_v_pyvalue = NULL; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("get_value_no_default", 1); - if (__pyx_optional_args) { - if (__pyx_optional_args->__pyx_n > 0) { - __pyx_v_default_value = __pyx_optional_args->default_value; - } - } - - /* "cpython/contextvars.pxd":135 - * Ignores the default value of the context variable, if any. - * """ - * cdef PyObject *value = NULL # <<<<<<<<<<<<<< - * PyContextVar_Get(var, default_value, &value) - * # value of context variable or 'default_value' - */ - __pyx_v_value = NULL; - - /* "cpython/contextvars.pxd":136 - * """ - * cdef PyObject *value = NULL - * PyContextVar_Get(var, default_value, &value) # <<<<<<<<<<<<<< - * # value of context variable or 'default_value' - * pyvalue = value - */ - __pyx_t_1 = PyContextVar_Get(__pyx_v_var, ((PyObject *)__pyx_v_default_value), (&__pyx_v_value)); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(2, 136, __pyx_L1_error) - - /* "cpython/contextvars.pxd":138 - * PyContextVar_Get(var, default_value, &value) - * # value of context variable or 'default_value' - * pyvalue = value # <<<<<<<<<<<<<< - * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' - * return pyvalue - */ - __pyx_t_2 = ((PyObject *)__pyx_v_value); - __Pyx_INCREF(__pyx_t_2); - __pyx_v_pyvalue = __pyx_t_2; - __pyx_t_2 = 0; - - /* "cpython/contextvars.pxd":139 - * # value of context variable or 'default_value' - * pyvalue = value - * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' # <<<<<<<<<<<<<< - * return pyvalue - */ - Py_XDECREF(__pyx_v_value); - - /* "cpython/contextvars.pxd":140 - * pyvalue = value - * Py_XDECREF(value) # PyContextVar_Get() returned an owned reference as 'PyObject*' - * return pyvalue # <<<<<<<<<<<<<< - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_pyvalue); - __pyx_r = __pyx_v_pyvalue; - goto __pyx_L0; - - /* "cpython/contextvars.pxd":129 - * - * - * cdef inline object get_value_no_default(var, default_value=None): # <<<<<<<<<<<<<< - * """Return a new reference to the value of the context variable, - * or the provided default value if no such value was found. - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("cpython.contextvars.get_value_no_default", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_pyvalue); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "array.pxd":104 - * __data_union data - * - * def __getbuffer__(self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< - * # This implementation of getbuffer is geared towards Cython - * # requirements, and does not yet fulfill the PEP. - */ - -/* Python wrapper */ -CYTHON_UNUSED static int __pyx_pw_7cpython_5array_5array_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/ -CYTHON_UNUSED static int __pyx_pw_7cpython_5array_5array_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_7cpython_5array_5array___getbuffer__(((arrayobject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_7cpython_5array_5array___getbuffer__(arrayobject *__pyx_v_self, Py_buffer *__pyx_v_info, CYTHON_UNUSED int __pyx_v_flags) { - PyObject *__pyx_v_item_count = NULL; - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - char *__pyx_t_2; - int __pyx_t_3; - PyObject *__pyx_t_4 = NULL; - Py_ssize_t __pyx_t_5; - int __pyx_t_6; - char __pyx_t_7; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - if (unlikely(__pyx_v_info == NULL)) { - PyErr_SetString(PyExc_BufferError, "PyObject_GetBuffer: view==NULL argument is obsolete"); - return -1; - } - __Pyx_RefNannySetupContext("__getbuffer__", 0); - __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(__pyx_v_info->obj); - - /* "array.pxd":109 - * # In particular strided access is always provided regardless - * # of flags - * item_count = Py_SIZE(self) # <<<<<<<<<<<<<< - * - * info.suboffsets = NULL - */ - __pyx_t_1 = PyInt_FromSsize_t(Py_SIZE(((PyObject *)__pyx_v_self))); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 109, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_item_count = __pyx_t_1; - __pyx_t_1 = 0; - - /* "array.pxd":111 - * item_count = Py_SIZE(self) - * - * info.suboffsets = NULL # <<<<<<<<<<<<<< - * info.buf = self.data.as_chars - * info.readonly = 0 - */ - __pyx_v_info->suboffsets = NULL; - - /* "array.pxd":112 - * - * info.suboffsets = NULL - * info.buf = self.data.as_chars # <<<<<<<<<<<<<< - * info.readonly = 0 - * info.ndim = 1 - */ - __pyx_t_2 = __pyx_v_self->data.as_chars; - __pyx_v_info->buf = __pyx_t_2; - - /* "array.pxd":113 - * info.suboffsets = NULL - * info.buf = self.data.as_chars - * info.readonly = 0 # <<<<<<<<<<<<<< - * info.ndim = 1 - * info.itemsize = self.ob_descr.itemsize # e.g. sizeof(float) - */ - __pyx_v_info->readonly = 0; - - /* "array.pxd":114 - * info.buf = self.data.as_chars - * info.readonly = 0 - * info.ndim = 1 # <<<<<<<<<<<<<< - * info.itemsize = self.ob_descr.itemsize # e.g. sizeof(float) - * info.len = info.itemsize * item_count - */ - __pyx_v_info->ndim = 1; - - /* "array.pxd":115 - * info.readonly = 0 - * info.ndim = 1 - * info.itemsize = self.ob_descr.itemsize # e.g. sizeof(float) # <<<<<<<<<<<<<< - * info.len = info.itemsize * item_count - * - */ - __pyx_t_3 = __pyx_v_self->ob_descr->itemsize; - __pyx_v_info->itemsize = __pyx_t_3; - - /* "array.pxd":116 - * info.ndim = 1 - * info.itemsize = self.ob_descr.itemsize # e.g. sizeof(float) - * info.len = info.itemsize * item_count # <<<<<<<<<<<<<< - * - * info.shape = PyObject_Malloc(sizeof(Py_ssize_t) + 2) - */ - __pyx_t_1 = PyInt_FromSsize_t(__pyx_v_info->itemsize); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 116, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyNumber_Multiply(__pyx_t_1, __pyx_v_item_count); if (unlikely(!__pyx_t_4)) __PYX_ERR(3, 116, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_5 = __Pyx_PyIndex_AsSsize_t(__pyx_t_4); if (unlikely((__pyx_t_5 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(3, 116, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_v_info->len = __pyx_t_5; - - /* "array.pxd":118 - * info.len = info.itemsize * item_count - * - * info.shape = PyObject_Malloc(sizeof(Py_ssize_t) + 2) # <<<<<<<<<<<<<< - * if not info.shape: - * raise MemoryError() - */ - __pyx_v_info->shape = ((Py_ssize_t *)PyObject_Malloc(((sizeof(Py_ssize_t)) + 2))); - - /* "array.pxd":119 - * - * info.shape = PyObject_Malloc(sizeof(Py_ssize_t) + 2) - * if not info.shape: # <<<<<<<<<<<<<< - * raise MemoryError() - * info.shape[0] = item_count # constant regardless of resizing - */ - __pyx_t_6 = (!(__pyx_v_info->shape != 0)); - if (unlikely(__pyx_t_6)) { - - /* "array.pxd":120 - * info.shape = PyObject_Malloc(sizeof(Py_ssize_t) + 2) - * if not info.shape: - * raise MemoryError() # <<<<<<<<<<<<<< - * info.shape[0] = item_count # constant regardless of resizing - * info.strides = &info.itemsize - */ - PyErr_NoMemory(); __PYX_ERR(3, 120, __pyx_L1_error) - - /* "array.pxd":119 - * - * info.shape = PyObject_Malloc(sizeof(Py_ssize_t) + 2) - * if not info.shape: # <<<<<<<<<<<<<< - * raise MemoryError() - * info.shape[0] = item_count # constant regardless of resizing - */ - } - - /* "array.pxd":121 - * if not info.shape: - * raise MemoryError() - * info.shape[0] = item_count # constant regardless of resizing # <<<<<<<<<<<<<< - * info.strides = &info.itemsize - * - */ - __pyx_t_5 = __Pyx_PyIndex_AsSsize_t(__pyx_v_item_count); if (unlikely((__pyx_t_5 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(3, 121, __pyx_L1_error) - (__pyx_v_info->shape[0]) = __pyx_t_5; - - /* "array.pxd":122 - * raise MemoryError() - * info.shape[0] = item_count # constant regardless of resizing - * info.strides = &info.itemsize # <<<<<<<<<<<<<< - * - * info.format = (info.shape + 1) - */ - __pyx_v_info->strides = (&__pyx_v_info->itemsize); - - /* "array.pxd":124 - * info.strides = &info.itemsize - * - * info.format = (info.shape + 1) # <<<<<<<<<<<<<< - * info.format[0] = self.ob_descr.typecode - * info.format[1] = 0 - */ - __pyx_v_info->format = ((char *)(__pyx_v_info->shape + 1)); - - /* "array.pxd":125 - * - * info.format = (info.shape + 1) - * info.format[0] = self.ob_descr.typecode # <<<<<<<<<<<<<< - * info.format[1] = 0 - * info.obj = self - */ - __pyx_t_7 = __pyx_v_self->ob_descr->typecode; - (__pyx_v_info->format[0]) = __pyx_t_7; - - /* "array.pxd":126 - * info.format = (info.shape + 1) - * info.format[0] = self.ob_descr.typecode - * info.format[1] = 0 # <<<<<<<<<<<<<< - * info.obj = self - * - */ - (__pyx_v_info->format[1]) = 0; - - /* "array.pxd":127 - * info.format[0] = self.ob_descr.typecode - * info.format[1] = 0 - * info.obj = self # <<<<<<<<<<<<<< - * - * def __releasebuffer__(self, Py_buffer* info): - */ - __Pyx_INCREF((PyObject *)__pyx_v_self); - __Pyx_GIVEREF((PyObject *)__pyx_v_self); - __Pyx_GOTREF(__pyx_v_info->obj); - __Pyx_DECREF(__pyx_v_info->obj); - __pyx_v_info->obj = ((PyObject *)__pyx_v_self); - - /* "array.pxd":104 - * __data_union data - * - * def __getbuffer__(self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< - * # This implementation of getbuffer is geared towards Cython - * # requirements, and does not yet fulfill the PEP. - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("cpython.array.array.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - if (__pyx_v_info->obj != NULL) { - __Pyx_GOTREF(__pyx_v_info->obj); - __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0; - } - goto __pyx_L2; - __pyx_L0:; - if (__pyx_v_info->obj == Py_None) { - __Pyx_GOTREF(__pyx_v_info->obj); - __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0; - } - __pyx_L2:; - __Pyx_XDECREF(__pyx_v_item_count); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "array.pxd":129 - * info.obj = self - * - * def __releasebuffer__(self, Py_buffer* info): # <<<<<<<<<<<<<< - * PyObject_Free(info.shape) - * - */ - -/* Python wrapper */ -CYTHON_UNUSED static void __pyx_pw_7cpython_5array_5array_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info); /*proto*/ -CYTHON_UNUSED static void __pyx_pw_7cpython_5array_5array_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__releasebuffer__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_pf_7cpython_5array_5array_2__releasebuffer__(((arrayobject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -static void __pyx_pf_7cpython_5array_5array_2__releasebuffer__(CYTHON_UNUSED arrayobject *__pyx_v_self, Py_buffer *__pyx_v_info) { - - /* "array.pxd":130 - * - * def __releasebuffer__(self, Py_buffer* info): - * PyObject_Free(info.shape) # <<<<<<<<<<<<<< - * - * array newarrayobject(PyTypeObject* type, Py_ssize_t size, arraydescr *descr) - */ - PyObject_Free(__pyx_v_info->shape); - - /* "array.pxd":129 - * info.obj = self - * - * def __releasebuffer__(self, Py_buffer* info): # <<<<<<<<<<<<<< - * PyObject_Free(info.shape) - * - */ - - /* function exit code */ -} - -/* "array.pxd":141 - * - * - * cdef inline array clone(array template, Py_ssize_t length, bint zero): # <<<<<<<<<<<<<< - * """ fast creation of a new array, given a template array. - * type will be same as template. - */ - -static CYTHON_INLINE arrayobject *__pyx_f_7cpython_5array_clone(arrayobject *__pyx_v_template, Py_ssize_t __pyx_v_length, int __pyx_v_zero) { - arrayobject *__pyx_v_op = 0; - arrayobject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("clone", 1); - - /* "array.pxd":145 - * type will be same as template. - * if zero is true, new array will be initialized with zeroes.""" - * cdef array op = newarrayobject(Py_TYPE(template), length, template.ob_descr) # <<<<<<<<<<<<<< - * if zero and op is not None: - * memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) - */ - __pyx_t_1 = ((PyObject *)newarrayobject(Py_TYPE(((PyObject *)__pyx_v_template)), __pyx_v_length, __pyx_v_template->ob_descr)); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_op = ((arrayobject *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "array.pxd":146 - * if zero is true, new array will be initialized with zeroes.""" - * cdef array op = newarrayobject(Py_TYPE(template), length, template.ob_descr) - * if zero and op is not None: # <<<<<<<<<<<<<< - * memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) - * return op - */ - if (__pyx_v_zero) { - } else { - __pyx_t_2 = __pyx_v_zero; - goto __pyx_L4_bool_binop_done; - } - __pyx_t_3 = (((PyObject *)__pyx_v_op) != Py_None); - __pyx_t_2 = __pyx_t_3; - __pyx_L4_bool_binop_done:; - if (__pyx_t_2) { - - /* "array.pxd":147 - * cdef array op = newarrayobject(Py_TYPE(template), length, template.ob_descr) - * if zero and op is not None: - * memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) # <<<<<<<<<<<<<< - * return op - * - */ - (void)(memset(__pyx_v_op->data.as_chars, 0, (__pyx_v_length * __pyx_v_op->ob_descr->itemsize))); - - /* "array.pxd":146 - * if zero is true, new array will be initialized with zeroes.""" - * cdef array op = newarrayobject(Py_TYPE(template), length, template.ob_descr) - * if zero and op is not None: # <<<<<<<<<<<<<< - * memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) - * return op - */ - } - - /* "array.pxd":148 - * if zero and op is not None: - * memset(op.data.as_chars, 0, length * op.ob_descr.itemsize) - * return op # <<<<<<<<<<<<<< - * - * cdef inline array copy(array self): - */ - __Pyx_XDECREF((PyObject *)__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_op); - __pyx_r = __pyx_v_op; - goto __pyx_L0; - - /* "array.pxd":141 - * - * - * cdef inline array clone(array template, Py_ssize_t length, bint zero): # <<<<<<<<<<<<<< - * """ fast creation of a new array, given a template array. - * type will be same as template. - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("cpython.array.clone", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_op); - __Pyx_XGIVEREF((PyObject *)__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "array.pxd":150 - * return op - * - * cdef inline array copy(array self): # <<<<<<<<<<<<<< - * """ make a copy of an array. """ - * cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr) - */ - -static CYTHON_INLINE arrayobject *__pyx_f_7cpython_5array_copy(arrayobject *__pyx_v_self) { - arrayobject *__pyx_v_op = 0; - arrayobject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("copy", 1); - - /* "array.pxd":152 - * cdef inline array copy(array self): - * """ make a copy of an array. """ - * cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr) # <<<<<<<<<<<<<< - * memcpy(op.data.as_chars, self.data.as_chars, Py_SIZE(op) * op.ob_descr.itemsize) - * return op - */ - __pyx_t_1 = ((PyObject *)newarrayobject(Py_TYPE(((PyObject *)__pyx_v_self)), Py_SIZE(((PyObject *)__pyx_v_self)), __pyx_v_self->ob_descr)); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 152, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_op = ((arrayobject *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "array.pxd":153 - * """ make a copy of an array. """ - * cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr) - * memcpy(op.data.as_chars, self.data.as_chars, Py_SIZE(op) * op.ob_descr.itemsize) # <<<<<<<<<<<<<< - * return op - * - */ - (void)(memcpy(__pyx_v_op->data.as_chars, __pyx_v_self->data.as_chars, (Py_SIZE(((PyObject *)__pyx_v_op)) * __pyx_v_op->ob_descr->itemsize))); - - /* "array.pxd":154 - * cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr) - * memcpy(op.data.as_chars, self.data.as_chars, Py_SIZE(op) * op.ob_descr.itemsize) - * return op # <<<<<<<<<<<<<< - * - * cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n) except -1: - */ - __Pyx_XDECREF((PyObject *)__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_op); - __pyx_r = __pyx_v_op; - goto __pyx_L0; - - /* "array.pxd":150 - * return op - * - * cdef inline array copy(array self): # <<<<<<<<<<<<<< - * """ make a copy of an array. """ - * cdef array op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("cpython.array.copy", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_op); - __Pyx_XGIVEREF((PyObject *)__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "array.pxd":156 - * return op - * - * cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n) except -1: # <<<<<<<<<<<<<< - * """ efficient appending of new stuff of same type - * (e.g. of same array type) - */ - -static CYTHON_INLINE int __pyx_f_7cpython_5array_extend_buffer(arrayobject *__pyx_v_self, char *__pyx_v_stuff, Py_ssize_t __pyx_v_n) { - Py_ssize_t __pyx_v_itemsize; - Py_ssize_t __pyx_v_origsize; - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - - /* "array.pxd":160 - * (e.g. of same array type) - * n: number of elements (not number of bytes!) """ - * cdef Py_ssize_t itemsize = self.ob_descr.itemsize # <<<<<<<<<<<<<< - * cdef Py_ssize_t origsize = Py_SIZE(self) - * resize_smart(self, origsize + n) - */ - __pyx_t_1 = __pyx_v_self->ob_descr->itemsize; - __pyx_v_itemsize = __pyx_t_1; - - /* "array.pxd":161 - * n: number of elements (not number of bytes!) """ - * cdef Py_ssize_t itemsize = self.ob_descr.itemsize - * cdef Py_ssize_t origsize = Py_SIZE(self) # <<<<<<<<<<<<<< - * resize_smart(self, origsize + n) - * memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize) - */ - __pyx_v_origsize = Py_SIZE(((PyObject *)__pyx_v_self)); - - /* "array.pxd":162 - * cdef Py_ssize_t itemsize = self.ob_descr.itemsize - * cdef Py_ssize_t origsize = Py_SIZE(self) - * resize_smart(self, origsize + n) # <<<<<<<<<<<<<< - * memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize) - * return 0 - */ - __pyx_t_1 = resize_smart(__pyx_v_self, (__pyx_v_origsize + __pyx_v_n)); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(3, 162, __pyx_L1_error) - - /* "array.pxd":163 - * cdef Py_ssize_t origsize = Py_SIZE(self) - * resize_smart(self, origsize + n) - * memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize) # <<<<<<<<<<<<<< - * return 0 - * - */ - (void)(memcpy((__pyx_v_self->data.as_chars + (__pyx_v_origsize * __pyx_v_itemsize)), __pyx_v_stuff, (__pyx_v_n * __pyx_v_itemsize))); - - /* "array.pxd":164 - * resize_smart(self, origsize + n) - * memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize) - * return 0 # <<<<<<<<<<<<<< - * - * cdef inline int extend(array self, array other) except -1: - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "array.pxd":156 - * return op - * - * cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n) except -1: # <<<<<<<<<<<<<< - * """ efficient appending of new stuff of same type - * (e.g. of same array type) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("cpython.array.extend_buffer", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "array.pxd":166 - * return 0 - * - * cdef inline int extend(array self, array other) except -1: # <<<<<<<<<<<<<< - * """ extend array with data from another array; types must match. """ - * if self.ob_descr.typecode != other.ob_descr.typecode: - */ - -static CYTHON_INLINE int __pyx_f_7cpython_5array_extend(arrayobject *__pyx_v_self, arrayobject *__pyx_v_other) { - int __pyx_r; - int __pyx_t_1; - int __pyx_t_2; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - - /* "array.pxd":168 - * cdef inline int extend(array self, array other) except -1: - * """ extend array with data from another array; types must match. """ - * if self.ob_descr.typecode != other.ob_descr.typecode: # <<<<<<<<<<<<<< - * PyErr_BadArgument() - * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) - */ - __pyx_t_1 = (__pyx_v_self->ob_descr->typecode != __pyx_v_other->ob_descr->typecode); - if (__pyx_t_1) { - - /* "array.pxd":169 - * """ extend array with data from another array; types must match. """ - * if self.ob_descr.typecode != other.ob_descr.typecode: - * PyErr_BadArgument() # <<<<<<<<<<<<<< - * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) - * - */ - __pyx_t_2 = PyErr_BadArgument(); if (unlikely(__pyx_t_2 == ((int)0))) __PYX_ERR(3, 169, __pyx_L1_error) - - /* "array.pxd":168 - * cdef inline int extend(array self, array other) except -1: - * """ extend array with data from another array; types must match. """ - * if self.ob_descr.typecode != other.ob_descr.typecode: # <<<<<<<<<<<<<< - * PyErr_BadArgument() - * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) - */ - } - - /* "array.pxd":170 - * if self.ob_descr.typecode != other.ob_descr.typecode: - * PyErr_BadArgument() - * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) # <<<<<<<<<<<<<< - * - * cdef inline void zero(array self) noexcept: - */ - __pyx_t_2 = __pyx_f_7cpython_5array_extend_buffer(__pyx_v_self, __pyx_v_other->data.as_chars, Py_SIZE(((PyObject *)__pyx_v_other))); if (unlikely(__pyx_t_2 == ((int)-1))) __PYX_ERR(3, 170, __pyx_L1_error) - __pyx_r = __pyx_t_2; - goto __pyx_L0; - - /* "array.pxd":166 - * return 0 - * - * cdef inline int extend(array self, array other) except -1: # <<<<<<<<<<<<<< - * """ extend array with data from another array; types must match. """ - * if self.ob_descr.typecode != other.ob_descr.typecode: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("cpython.array.extend", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "array.pxd":172 - * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) - * - * cdef inline void zero(array self) noexcept: # <<<<<<<<<<<<<< - * """ set all elements of array to zero. """ - * memset(self.data.as_chars, 0, Py_SIZE(self) * self.ob_descr.itemsize) - */ - -static CYTHON_INLINE void __pyx_f_7cpython_5array_zero(arrayobject *__pyx_v_self) { - - /* "array.pxd":174 - * cdef inline void zero(array self) noexcept: - * """ set all elements of array to zero. """ - * memset(self.data.as_chars, 0, Py_SIZE(self) * self.ob_descr.itemsize) # <<<<<<<<<<<<<< - */ - (void)(memset(__pyx_v_self->data.as_chars, 0, (Py_SIZE(((PyObject *)__pyx_v_self)) * __pyx_v_self->ob_descr->itemsize))); - - /* "array.pxd":172 - * return extend_buffer(self, other.data.as_chars, Py_SIZE(other)) - * - * cdef inline void zero(array self) noexcept: # <<<<<<<<<<<<<< - * """ set all elements of array to zero. """ - * memset(self.data.as_chars, 0, Py_SIZE(self) * self.ob_descr.itemsize) - */ - - /* function exit code */ -} - -/* "jcvi/assembly/chic.pyx":34 - * - * - * def score_evaluate_M(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=2] tour_M=None): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_8assembly_4chic_1score_evaluate_M(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyMethodDef __pyx_mdef_4jcvi_8assembly_4chic_1score_evaluate_M = {"score_evaluate_M", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_8assembly_4chic_1score_evaluate_M, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_4jcvi_8assembly_4chic_1score_evaluate_M(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - arrayobject *__pyx_v_tour = 0; - PyArrayObject *__pyx_v_tour_sizes = 0; - PyArrayObject *__pyx_v_tour_M = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[3] = {0,0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("score_evaluate_M (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_tour,&__pyx_n_s_tour_sizes,&__pyx_n_s_tour_M,0}; - - /* "jcvi/assembly/chic.pyx":35 - * - * def score_evaluate_M(array.array[int] tour, - * np.ndarray[INT, ndim=1] tour_sizes=None, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=2] tour_M=None): - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - */ - values[1] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); - - /* "jcvi/assembly/chic.pyx":36 - * def score_evaluate_M(array.array[int] tour, - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=2] tour_M=None): # <<<<<<<<<<<<<< - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - sizes_oo // 2 - */ - values[2] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 34, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_sizes); - if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 34, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 2: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_M); - if (value) { values[2] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 34, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "score_evaluate_M") < 0)) __PYX_ERR(0, 34, __pyx_L3_error) - } - } else { - switch (__pyx_nargs) { - case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - __pyx_v_tour = ((arrayobject *)values[0]); - __pyx_v_tour_sizes = ((PyArrayObject *)values[1]); - __pyx_v_tour_M = ((PyArrayObject *)values[2]); - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("score_evaluate_M", 0, 1, 3, __pyx_nargs); __PYX_ERR(0, 34, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_M", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour), __pyx_ptype_7cpython_5array_array, 1, "tour", 0))) __PYX_ERR(0, 34, __pyx_L1_error) - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_sizes), __pyx_ptype_5numpy_ndarray, 1, "tour_sizes", 0))) __PYX_ERR(0, 35, __pyx_L1_error) - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_M), __pyx_ptype_5numpy_ndarray, 1, "tour_M", 0))) __PYX_ERR(0, 36, __pyx_L1_error) - __pyx_r = __pyx_pf_4jcvi_8assembly_4chic_score_evaluate_M(__pyx_self, __pyx_v_tour, __pyx_v_tour_sizes, __pyx_v_tour_M); - - /* "jcvi/assembly/chic.pyx":34 - * - * - * def score_evaluate_M(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=2] tour_M=None): - */ - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = NULL; - __pyx_L0:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_8assembly_4chic_score_evaluate_M(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_M) { - PyArrayObject *__pyx_v_sizes_oo = 0; - PyArrayObject *__pyx_v_sizes_cum = 0; - double __pyx_v_s; - int __pyx_v_size; - int __pyx_v_a; - int __pyx_v_b; - int __pyx_v_ia; - int __pyx_v_ib; - int __pyx_v_links; - double __pyx_v_dist; - __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_cum; - __Pyx_Buffer __pyx_pybuffer_sizes_cum; - __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_oo; - __Pyx_Buffer __pyx_pybuffer_sizes_oo; - __Pyx_LocalBuf_ND __pyx_pybuffernd_tour; - __Pyx_Buffer __pyx_pybuffer_tour; - __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_M; - __Pyx_Buffer __pyx_pybuffer_tour_M; - __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_sizes; - __Pyx_Buffer __pyx_pybuffer_tour_sizes; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyArrayObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - unsigned int __pyx_t_5; - PyArrayObject *__pyx_t_6 = NULL; - Py_ssize_t __pyx_t_7; - int __pyx_t_8; - int __pyx_t_9; - int __pyx_t_10; - Py_ssize_t __pyx_t_11; - int __pyx_t_12; - int __pyx_t_13; - int __pyx_t_14; - Py_ssize_t __pyx_t_15; - int __pyx_t_16; - int __pyx_t_17; - double __pyx_t_18; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("score_evaluate_M", 1); - __pyx_pybuffer_sizes_oo.pybuffer.buf = NULL; - __pyx_pybuffer_sizes_oo.refcount = 0; - __pyx_pybuffernd_sizes_oo.data = NULL; - __pyx_pybuffernd_sizes_oo.rcbuffer = &__pyx_pybuffer_sizes_oo; - __pyx_pybuffer_sizes_cum.pybuffer.buf = NULL; - __pyx_pybuffer_sizes_cum.refcount = 0; - __pyx_pybuffernd_sizes_cum.data = NULL; - __pyx_pybuffernd_sizes_cum.rcbuffer = &__pyx_pybuffer_sizes_cum; - __pyx_pybuffer_tour.pybuffer.buf = NULL; - __pyx_pybuffer_tour.refcount = 0; - __pyx_pybuffernd_tour.data = NULL; - __pyx_pybuffernd_tour.rcbuffer = &__pyx_pybuffer_tour; - __pyx_pybuffer_tour_sizes.pybuffer.buf = NULL; - __pyx_pybuffer_tour_sizes.refcount = 0; - __pyx_pybuffernd_tour_sizes.data = NULL; - __pyx_pybuffernd_tour_sizes.rcbuffer = &__pyx_pybuffer_tour_sizes; - __pyx_pybuffer_tour_M.pybuffer.buf = NULL; - __pyx_pybuffer_tour_M.refcount = 0; - __pyx_pybuffernd_tour_M.data = NULL; - __pyx_pybuffernd_tour_M.rcbuffer = &__pyx_pybuffer_tour_M; - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour, &__Pyx_TypeInfo_int, PyBUF_FORMAT| PyBUF_INDIRECT, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 34, __pyx_L1_error) - } - __pyx_pybuffernd_tour.diminfo[0].strides = __pyx_pybuffernd_tour.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour.diminfo[0].shape = __pyx_pybuffernd_tour.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour.diminfo[0].suboffsets = __pyx_pybuffernd_tour.rcbuffer->pybuffer.suboffsets[0]; - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_sizes, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 34, __pyx_L1_error) - } - __pyx_pybuffernd_tour_sizes.diminfo[0].strides = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_sizes.diminfo[0].shape = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.shape[0]; - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_M.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_M, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 34, __pyx_L1_error) - } - __pyx_pybuffernd_tour_M.diminfo[0].strides = __pyx_pybuffernd_tour_M.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_M.diminfo[0].shape = __pyx_pybuffernd_tour_M.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour_M.diminfo[1].strides = __pyx_pybuffernd_tour_M.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_tour_M.diminfo[1].shape = __pyx_pybuffernd_tour_M.rcbuffer->pybuffer.shape[1]; - - /* "jcvi/assembly/chic.pyx":37 - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=2] tour_M=None): - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] # <<<<<<<<<<<<<< - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - sizes_oo // 2 - * - */ - __pyx_t_1 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_tour_sizes), ((PyObject *)__pyx_v_tour)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 37, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 37, __pyx_L1_error) - __pyx_t_2 = ((PyArrayObject *)__pyx_t_1); - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer, (PyObject*)__pyx_t_2, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { - __pyx_v_sizes_oo = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.buf = NULL; - __PYX_ERR(0, 37, __pyx_L1_error) - } else {__pyx_pybuffernd_sizes_oo.diminfo[0].strides = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_oo.diminfo[0].shape = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.shape[0]; - } - } - __pyx_t_2 = 0; - __pyx_v_sizes_oo = ((PyArrayObject *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/assembly/chic.pyx":38 - * np.ndarray[INT, ndim=2] tour_M=None): - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - sizes_oo // 2 # <<<<<<<<<<<<<< - * - * cdef double s = 0.0 - */ - __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 38, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cumsum); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 38, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = NULL; - __pyx_t_5 = 0; - #if CYTHON_UNPACK_METHODS - if (unlikely(PyMethod_Check(__pyx_t_4))) { - __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4); - if (likely(__pyx_t_3)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); - __Pyx_INCREF(__pyx_t_3); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_4, function); - __pyx_t_5 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[2] = {__pyx_t_3, ((PyObject *)__pyx_v_sizes_oo)}; - __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5); - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 38, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - } - __pyx_t_4 = PyNumber_FloorDivide(((PyObject *)__pyx_v_sizes_oo), __pyx_int_2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 38, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = PyNumber_Subtract(__pyx_t_1, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 38, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 38, __pyx_L1_error) - __pyx_t_6 = ((PyArrayObject *)__pyx_t_3); - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { - __pyx_v_sizes_cum = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf = NULL; - __PYX_ERR(0, 38, __pyx_L1_error) - } else {__pyx_pybuffernd_sizes_cum.diminfo[0].strides = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_cum.diminfo[0].shape = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.shape[0]; - } - } - __pyx_t_6 = 0; - __pyx_v_sizes_cum = ((PyArrayObject *)__pyx_t_3); - __pyx_t_3 = 0; - - /* "jcvi/assembly/chic.pyx":40 - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - sizes_oo // 2 - * - * cdef double s = 0.0 # <<<<<<<<<<<<<< - * cdef int size = len(tour) - * cdef int a, b, ia, ib - */ - __pyx_v_s = 0.0; - - /* "jcvi/assembly/chic.pyx":41 - * - * cdef double s = 0.0 - * cdef int size = len(tour) # <<<<<<<<<<<<<< - * cdef int a, b, ia, ib - * cdef int links - */ - if (unlikely(((PyObject *)__pyx_v_tour) == Py_None)) { - PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); - __PYX_ERR(0, 41, __pyx_L1_error) - } - __pyx_t_7 = Py_SIZE(((PyObject *)__pyx_v_tour)); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 41, __pyx_L1_error) - __pyx_v_size = __pyx_t_7; - - /* "jcvi/assembly/chic.pyx":45 - * cdef int links - * cdef double dist - * for ia in range(size): # <<<<<<<<<<<<<< - * a = tour[ia] - * for ib in range(ia + 1, size): - */ - __pyx_t_8 = __pyx_v_size; - __pyx_t_9 = __pyx_t_8; - for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) { - __pyx_v_ia = __pyx_t_10; - - /* "jcvi/assembly/chic.pyx":46 - * cdef double dist - * for ia in range(size): - * a = tour[ia] # <<<<<<<<<<<<<< - * for ib in range(ia + 1, size): - * b = tour[ib] - */ - __pyx_t_11 = __pyx_v_ia; - __pyx_v_a = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); - - /* "jcvi/assembly/chic.pyx":47 - * for ia in range(size): - * a = tour[ia] - * for ib in range(ia + 1, size): # <<<<<<<<<<<<<< - * b = tour[ib] - * links = tour_M[a, b] - */ - __pyx_t_12 = __pyx_v_size; - __pyx_t_13 = __pyx_t_12; - for (__pyx_t_14 = (__pyx_v_ia + 1); __pyx_t_14 < __pyx_t_13; __pyx_t_14+=1) { - __pyx_v_ib = __pyx_t_14; - - /* "jcvi/assembly/chic.pyx":48 - * a = tour[ia] - * for ib in range(ia + 1, size): - * b = tour[ib] # <<<<<<<<<<<<<< - * links = tour_M[a, b] - * if links == 0: - */ - __pyx_t_11 = __pyx_v_ib; - __pyx_v_b = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); - - /* "jcvi/assembly/chic.pyx":49 - * for ib in range(ia + 1, size): - * b = tour[ib] - * links = tour_M[a, b] # <<<<<<<<<<<<<< - * if links == 0: - * continue - */ - __pyx_t_11 = __pyx_v_a; - __pyx_t_15 = __pyx_v_b; - __pyx_t_3 = (PyObject *) *__Pyx_BufPtrStrided2d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_tour_M.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour_M.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_tour_M.diminfo[1].strides); - if (unlikely(__pyx_t_3 == NULL)) __pyx_t_3 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_3); - __pyx_t_16 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_16 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 49, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_links = __pyx_t_16; - - /* "jcvi/assembly/chic.pyx":50 - * b = tour[ib] - * links = tour_M[a, b] - * if links == 0: # <<<<<<<<<<<<<< - * continue - * dist = sizes_cum[ib] - sizes_cum[ia] - */ - __pyx_t_17 = (__pyx_v_links == 0); - if (__pyx_t_17) { - - /* "jcvi/assembly/chic.pyx":51 - * links = tour_M[a, b] - * if links == 0: - * continue # <<<<<<<<<<<<<< - * dist = sizes_cum[ib] - sizes_cum[ia] - * if dist > LIMIT: - */ - goto __pyx_L5_continue; - - /* "jcvi/assembly/chic.pyx":50 - * b = tour[ib] - * links = tour_M[a, b] - * if links == 0: # <<<<<<<<<<<<<< - * continue - * dist = sizes_cum[ib] - sizes_cum[ia] - */ - } - - /* "jcvi/assembly/chic.pyx":52 - * if links == 0: - * continue - * dist = sizes_cum[ib] - sizes_cum[ia] # <<<<<<<<<<<<<< - * if dist > LIMIT: - * break - */ - __pyx_t_15 = __pyx_v_ib; - __pyx_t_3 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_15, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); - if (unlikely(__pyx_t_3 == NULL)) __pyx_t_3 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_3); - __pyx_t_15 = __pyx_v_ia; - __pyx_t_4 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_15, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); - if (unlikely(__pyx_t_4 == NULL)) __pyx_t_4 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_4); - __pyx_t_1 = PyNumber_Subtract(__pyx_t_3, __pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 52, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_18 = __pyx_PyFloat_AsDouble(__pyx_t_1); if (unlikely((__pyx_t_18 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 52, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_dist = __pyx_t_18; - - /* "jcvi/assembly/chic.pyx":53 - * continue - * dist = sizes_cum[ib] - sizes_cum[ia] - * if dist > LIMIT: # <<<<<<<<<<<<<< - * break - * s += links / dist - */ - __pyx_t_17 = (__pyx_v_dist > 10000000.0); - if (__pyx_t_17) { - - /* "jcvi/assembly/chic.pyx":54 - * dist = sizes_cum[ib] - sizes_cum[ia] - * if dist > LIMIT: - * break # <<<<<<<<<<<<<< - * s += links / dist - * return s, - */ - goto __pyx_L6_break; - - /* "jcvi/assembly/chic.pyx":53 - * continue - * dist = sizes_cum[ib] - sizes_cum[ia] - * if dist > LIMIT: # <<<<<<<<<<<<<< - * break - * s += links / dist - */ - } - - /* "jcvi/assembly/chic.pyx":55 - * if dist > LIMIT: - * break - * s += links / dist # <<<<<<<<<<<<<< - * return s, - * - */ - __pyx_v_s = (__pyx_v_s + (((double)__pyx_v_links) / __pyx_v_dist)); - __pyx_L5_continue:; - } - __pyx_L6_break:; - } - - /* "jcvi/assembly/chic.pyx":56 - * break - * s += links / dist - * return s, # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble(__pyx_v_s); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 56, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 56, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1)) __PYX_ERR(0, 56, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - - /* "jcvi/assembly/chic.pyx":34 - * - * - * def score_evaluate_M(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=2] tour_M=None): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_M.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); - __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} - __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_M", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - goto __pyx_L2; - __pyx_L0:; - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_M.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); - __pyx_L2:; - __Pyx_XDECREF((PyObject *)__pyx_v_sizes_oo); - __Pyx_XDECREF((PyObject *)__pyx_v_sizes_cum); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/assembly/chic.pyx":59 - * - * - * def score_evaluate_P(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_P=None): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_8assembly_4chic_3score_evaluate_P(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyMethodDef __pyx_mdef_4jcvi_8assembly_4chic_3score_evaluate_P = {"score_evaluate_P", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_8assembly_4chic_3score_evaluate_P, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_4jcvi_8assembly_4chic_3score_evaluate_P(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - arrayobject *__pyx_v_tour = 0; - PyArrayObject *__pyx_v_tour_sizes = 0; - PyArrayObject *__pyx_v_tour_P = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[3] = {0,0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("score_evaluate_P (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_tour,&__pyx_n_s_tour_sizes,&__pyx_n_s_tour_P,0}; - - /* "jcvi/assembly/chic.pyx":60 - * - * def score_evaluate_P(array.array[int] tour, - * np.ndarray[INT, ndim=1] tour_sizes=None, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=3] tour_P=None): - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - */ - values[1] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); - - /* "jcvi/assembly/chic.pyx":61 - * def score_evaluate_P(array.array[int] tour, - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_P=None): # <<<<<<<<<<<<<< - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - */ - values[2] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 59, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_sizes); - if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 59, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 2: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_P); - if (value) { values[2] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 59, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "score_evaluate_P") < 0)) __PYX_ERR(0, 59, __pyx_L3_error) - } - } else { - switch (__pyx_nargs) { - case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - __pyx_v_tour = ((arrayobject *)values[0]); - __pyx_v_tour_sizes = ((PyArrayObject *)values[1]); - __pyx_v_tour_P = ((PyArrayObject *)values[2]); - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("score_evaluate_P", 0, 1, 3, __pyx_nargs); __PYX_ERR(0, 59, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_P", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour), __pyx_ptype_7cpython_5array_array, 1, "tour", 0))) __PYX_ERR(0, 59, __pyx_L1_error) - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_sizes), __pyx_ptype_5numpy_ndarray, 1, "tour_sizes", 0))) __PYX_ERR(0, 60, __pyx_L1_error) - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_P), __pyx_ptype_5numpy_ndarray, 1, "tour_P", 0))) __PYX_ERR(0, 61, __pyx_L1_error) - __pyx_r = __pyx_pf_4jcvi_8assembly_4chic_2score_evaluate_P(__pyx_self, __pyx_v_tour, __pyx_v_tour_sizes, __pyx_v_tour_P); - - /* "jcvi/assembly/chic.pyx":59 - * - * - * def score_evaluate_P(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_P=None): - */ - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = NULL; - __pyx_L0:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_8assembly_4chic_2score_evaluate_P(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_P) { - PyArrayObject *__pyx_v_sizes_oo = 0; - PyArrayObject *__pyx_v_sizes_cum = 0; - double __pyx_v_s; - int __pyx_v_size; - int __pyx_v_a; - int __pyx_v_b; - int __pyx_v_c; - int __pyx_v_ia; - int __pyx_v_ib; - double __pyx_v_dist; - __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_cum; - __Pyx_Buffer __pyx_pybuffer_sizes_cum; - __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_oo; - __Pyx_Buffer __pyx_pybuffer_sizes_oo; - __Pyx_LocalBuf_ND __pyx_pybuffernd_tour; - __Pyx_Buffer __pyx_pybuffer_tour; - __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_P; - __Pyx_Buffer __pyx_pybuffer_tour_P; - __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_sizes; - __Pyx_Buffer __pyx_pybuffer_tour_sizes; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyArrayObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - unsigned int __pyx_t_5; - PyArrayObject *__pyx_t_6 = NULL; - Py_ssize_t __pyx_t_7; - int __pyx_t_8; - int __pyx_t_9; - int __pyx_t_10; - Py_ssize_t __pyx_t_11; - int __pyx_t_12; - int __pyx_t_13; - int __pyx_t_14; - double __pyx_t_15; - int __pyx_t_16; - Py_ssize_t __pyx_t_17; - Py_ssize_t __pyx_t_18; - int __pyx_t_19; - PyObject *__pyx_t_20 = NULL; - PyObject *__pyx_t_21 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("score_evaluate_P", 1); - __pyx_pybuffer_sizes_oo.pybuffer.buf = NULL; - __pyx_pybuffer_sizes_oo.refcount = 0; - __pyx_pybuffernd_sizes_oo.data = NULL; - __pyx_pybuffernd_sizes_oo.rcbuffer = &__pyx_pybuffer_sizes_oo; - __pyx_pybuffer_sizes_cum.pybuffer.buf = NULL; - __pyx_pybuffer_sizes_cum.refcount = 0; - __pyx_pybuffernd_sizes_cum.data = NULL; - __pyx_pybuffernd_sizes_cum.rcbuffer = &__pyx_pybuffer_sizes_cum; - __pyx_pybuffer_tour.pybuffer.buf = NULL; - __pyx_pybuffer_tour.refcount = 0; - __pyx_pybuffernd_tour.data = NULL; - __pyx_pybuffernd_tour.rcbuffer = &__pyx_pybuffer_tour; - __pyx_pybuffer_tour_sizes.pybuffer.buf = NULL; - __pyx_pybuffer_tour_sizes.refcount = 0; - __pyx_pybuffernd_tour_sizes.data = NULL; - __pyx_pybuffernd_tour_sizes.rcbuffer = &__pyx_pybuffer_tour_sizes; - __pyx_pybuffer_tour_P.pybuffer.buf = NULL; - __pyx_pybuffer_tour_P.refcount = 0; - __pyx_pybuffernd_tour_P.data = NULL; - __pyx_pybuffernd_tour_P.rcbuffer = &__pyx_pybuffer_tour_P; - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour, &__Pyx_TypeInfo_int, PyBUF_FORMAT| PyBUF_INDIRECT, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 59, __pyx_L1_error) - } - __pyx_pybuffernd_tour.diminfo[0].strides = __pyx_pybuffernd_tour.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour.diminfo[0].shape = __pyx_pybuffernd_tour.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour.diminfo[0].suboffsets = __pyx_pybuffernd_tour.rcbuffer->pybuffer.suboffsets[0]; - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_sizes, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 59, __pyx_L1_error) - } - __pyx_pybuffernd_tour_sizes.diminfo[0].strides = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_sizes.diminfo[0].shape = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.shape[0]; - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_P.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_P, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 3, 0, __pyx_stack) == -1)) __PYX_ERR(0, 59, __pyx_L1_error) - } - __pyx_pybuffernd_tour_P.diminfo[0].strides = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_P.diminfo[0].shape = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour_P.diminfo[1].strides = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_tour_P.diminfo[1].shape = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.shape[1]; __pyx_pybuffernd_tour_P.diminfo[2].strides = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.strides[2]; __pyx_pybuffernd_tour_P.diminfo[2].shape = __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.shape[2]; - - /* "jcvi/assembly/chic.pyx":62 - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_P=None): - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] # <<<<<<<<<<<<<< - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - * - */ - __pyx_t_1 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_tour_sizes), ((PyObject *)__pyx_v_tour)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 62, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 62, __pyx_L1_error) - __pyx_t_2 = ((PyArrayObject *)__pyx_t_1); - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer, (PyObject*)__pyx_t_2, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { - __pyx_v_sizes_oo = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.buf = NULL; - __PYX_ERR(0, 62, __pyx_L1_error) - } else {__pyx_pybuffernd_sizes_oo.diminfo[0].strides = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_oo.diminfo[0].shape = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.shape[0]; - } - } - __pyx_t_2 = 0; - __pyx_v_sizes_oo = ((PyArrayObject *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/assembly/chic.pyx":63 - * np.ndarray[INT, ndim=3] tour_P=None): - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) # <<<<<<<<<<<<<< - * - * cdef double s = 0.0 - */ - __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 63, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cumsum); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 63, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = NULL; - __pyx_t_5 = 0; - #if CYTHON_UNPACK_METHODS - if (unlikely(PyMethod_Check(__pyx_t_4))) { - __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4); - if (likely(__pyx_t_3)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); - __Pyx_INCREF(__pyx_t_3); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_4, function); - __pyx_t_5 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[2] = {__pyx_t_3, ((PyObject *)__pyx_v_sizes_oo)}; - __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5); - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 63, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - } - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 63, __pyx_L1_error) - __pyx_t_6 = ((PyArrayObject *)__pyx_t_1); - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { - __pyx_v_sizes_cum = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf = NULL; - __PYX_ERR(0, 63, __pyx_L1_error) - } else {__pyx_pybuffernd_sizes_cum.diminfo[0].strides = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_cum.diminfo[0].shape = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.shape[0]; - } - } - __pyx_t_6 = 0; - __pyx_v_sizes_cum = ((PyArrayObject *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/assembly/chic.pyx":65 - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - * - * cdef double s = 0.0 # <<<<<<<<<<<<<< - * cdef int size = len(tour) - * cdef int a, b, c, ia, ib - */ - __pyx_v_s = 0.0; - - /* "jcvi/assembly/chic.pyx":66 - * - * cdef double s = 0.0 - * cdef int size = len(tour) # <<<<<<<<<<<<<< - * cdef int a, b, c, ia, ib - * cdef double dist - */ - if (unlikely(((PyObject *)__pyx_v_tour) == Py_None)) { - PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); - __PYX_ERR(0, 66, __pyx_L1_error) - } - __pyx_t_7 = Py_SIZE(((PyObject *)__pyx_v_tour)); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 66, __pyx_L1_error) - __pyx_v_size = __pyx_t_7; - - /* "jcvi/assembly/chic.pyx":69 - * cdef int a, b, c, ia, ib - * cdef double dist - * for ia in range(size): # <<<<<<<<<<<<<< - * a = tour[ia] - * for ib in range(ia + 1, size): - */ - __pyx_t_8 = __pyx_v_size; - __pyx_t_9 = __pyx_t_8; - for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) { - __pyx_v_ia = __pyx_t_10; - - /* "jcvi/assembly/chic.pyx":70 - * cdef double dist - * for ia in range(size): - * a = tour[ia] # <<<<<<<<<<<<<< - * for ib in range(ia + 1, size): - * b = tour[ib] - */ - __pyx_t_11 = __pyx_v_ia; - __pyx_v_a = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); - - /* "jcvi/assembly/chic.pyx":71 - * for ia in range(size): - * a = tour[ia] - * for ib in range(ia + 1, size): # <<<<<<<<<<<<<< - * b = tour[ib] - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - */ - __pyx_t_12 = __pyx_v_size; - __pyx_t_13 = __pyx_t_12; - for (__pyx_t_14 = (__pyx_v_ia + 1); __pyx_t_14 < __pyx_t_13; __pyx_t_14+=1) { - __pyx_v_ib = __pyx_t_14; - - /* "jcvi/assembly/chic.pyx":72 - * a = tour[ia] - * for ib in range(ia + 1, size): - * b = tour[ib] # <<<<<<<<<<<<<< - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - * if dist > LIMIT: - */ - __pyx_t_11 = __pyx_v_ib; - __pyx_v_b = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); - - /* "jcvi/assembly/chic.pyx":73 - * for ib in range(ia + 1, size): - * b = tour[ib] - * dist = sizes_cum[ib - 1] - sizes_cum[ia] # <<<<<<<<<<<<<< - * if dist > LIMIT: - * break - */ - __pyx_t_11 = (__pyx_v_ib - 1); - __pyx_t_1 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); - if (unlikely(__pyx_t_1 == NULL)) __pyx_t_1 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_1); - __pyx_t_11 = __pyx_v_ia; - __pyx_t_4 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); - if (unlikely(__pyx_t_4 == NULL)) __pyx_t_4 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_4); - __pyx_t_3 = PyNumber_Subtract(__pyx_t_1, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 73, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_15 = __pyx_PyFloat_AsDouble(__pyx_t_3); if (unlikely((__pyx_t_15 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 73, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_dist = __pyx_t_15; - - /* "jcvi/assembly/chic.pyx":74 - * b = tour[ib] - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - * if dist > LIMIT: # <<<<<<<<<<<<<< - * break - * c = tour_P[a, b, 0] - */ - __pyx_t_16 = (__pyx_v_dist > 10000000.0); - if (__pyx_t_16) { - - /* "jcvi/assembly/chic.pyx":75 - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - * if dist > LIMIT: - * break # <<<<<<<<<<<<<< - * c = tour_P[a, b, 0] - * if c == 0: - */ - goto __pyx_L6_break; - - /* "jcvi/assembly/chic.pyx":74 - * b = tour[ib] - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - * if dist > LIMIT: # <<<<<<<<<<<<<< - * break - * c = tour_P[a, b, 0] - */ - } - - /* "jcvi/assembly/chic.pyx":76 - * if dist > LIMIT: - * break - * c = tour_P[a, b, 0] # <<<<<<<<<<<<<< - * if c == 0: - * continue - */ - __pyx_t_11 = __pyx_v_a; - __pyx_t_17 = __pyx_v_b; - __pyx_t_18 = 0; - __pyx_t_3 = (PyObject *) *__Pyx_BufPtrStrided3d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour_P.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_tour_P.diminfo[1].strides, __pyx_t_18, __pyx_pybuffernd_tour_P.diminfo[2].strides); - if (unlikely(__pyx_t_3 == NULL)) __pyx_t_3 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_3); - __pyx_t_19 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 76, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_c = __pyx_t_19; - - /* "jcvi/assembly/chic.pyx":77 - * break - * c = tour_P[a, b, 0] - * if c == 0: # <<<<<<<<<<<<<< - * continue - * s += c / (tour_P[a, b, 1] + dist) - */ - __pyx_t_16 = (__pyx_v_c == 0); - if (__pyx_t_16) { - - /* "jcvi/assembly/chic.pyx":78 - * c = tour_P[a, b, 0] - * if c == 0: - * continue # <<<<<<<<<<<<<< - * s += c / (tour_P[a, b, 1] + dist) - * return s, - */ - goto __pyx_L5_continue; - - /* "jcvi/assembly/chic.pyx":77 - * break - * c = tour_P[a, b, 0] - * if c == 0: # <<<<<<<<<<<<<< - * continue - * s += c / (tour_P[a, b, 1] + dist) - */ - } - - /* "jcvi/assembly/chic.pyx":79 - * if c == 0: - * continue - * s += c / (tour_P[a, b, 1] + dist) # <<<<<<<<<<<<<< - * return s, - * - */ - __pyx_t_3 = PyFloat_FromDouble(__pyx_v_s); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 79, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_c); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 79, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_18 = __pyx_v_a; - __pyx_t_17 = __pyx_v_b; - __pyx_t_11 = 1; - __pyx_t_1 = (PyObject *) *__Pyx_BufPtrStrided3d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_tour_P.rcbuffer->pybuffer.buf, __pyx_t_18, __pyx_pybuffernd_tour_P.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_tour_P.diminfo[1].strides, __pyx_t_11, __pyx_pybuffernd_tour_P.diminfo[2].strides); - if (unlikely(__pyx_t_1 == NULL)) __pyx_t_1 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_1); - __pyx_t_20 = PyFloat_FromDouble(__pyx_v_dist); if (unlikely(!__pyx_t_20)) __PYX_ERR(0, 79, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_20); - __pyx_t_21 = PyNumber_Add(__pyx_t_1, __pyx_t_20); if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 79, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_21); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_20); __pyx_t_20 = 0; - __pyx_t_20 = __Pyx_PyNumber_Divide(__pyx_t_4, __pyx_t_21); if (unlikely(!__pyx_t_20)) __PYX_ERR(0, 79, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_20); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_21); __pyx_t_21 = 0; - __pyx_t_21 = PyNumber_InPlaceAdd(__pyx_t_3, __pyx_t_20); if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 79, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_21); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_20); __pyx_t_20 = 0; - __pyx_t_15 = __pyx_PyFloat_AsDouble(__pyx_t_21); if (unlikely((__pyx_t_15 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 79, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_21); __pyx_t_21 = 0; - __pyx_v_s = __pyx_t_15; - __pyx_L5_continue:; - } - __pyx_L6_break:; - } - - /* "jcvi/assembly/chic.pyx":80 - * continue - * s += c / (tour_P[a, b, 1] + dist) - * return s, # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_21 = PyFloat_FromDouble(__pyx_v_s); if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 80, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_21); - __pyx_t_20 = PyTuple_New(1); if (unlikely(!__pyx_t_20)) __PYX_ERR(0, 80, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_20); - __Pyx_GIVEREF(__pyx_t_21); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_20, 0, __pyx_t_21)) __PYX_ERR(0, 80, __pyx_L1_error); - __pyx_t_21 = 0; - __pyx_r = __pyx_t_20; - __pyx_t_20 = 0; - goto __pyx_L0; - - /* "jcvi/assembly/chic.pyx":59 - * - * - * def score_evaluate_P(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_P=None): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_20); - __Pyx_XDECREF(__pyx_t_21); - { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_P.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); - __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} - __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_P", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - goto __pyx_L2; - __pyx_L0:; - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_P.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); - __pyx_L2:; - __Pyx_XDECREF((PyObject *)__pyx_v_sizes_oo); - __Pyx_XDECREF((PyObject *)__pyx_v_sizes_cum); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/assembly/chic.pyx":83 - * - * - * def score_evaluate_Q(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_Q=None): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_8assembly_4chic_5score_evaluate_Q(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyMethodDef __pyx_mdef_4jcvi_8assembly_4chic_5score_evaluate_Q = {"score_evaluate_Q", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_8assembly_4chic_5score_evaluate_Q, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_4jcvi_8assembly_4chic_5score_evaluate_Q(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - arrayobject *__pyx_v_tour = 0; - PyArrayObject *__pyx_v_tour_sizes = 0; - PyArrayObject *__pyx_v_tour_Q = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[3] = {0,0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("score_evaluate_Q (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_tour,&__pyx_n_s_tour_sizes,&__pyx_n_s_tour_Q,0}; - - /* "jcvi/assembly/chic.pyx":84 - * - * def score_evaluate_Q(array.array[int] tour, - * np.ndarray[INT, ndim=1] tour_sizes=None, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=3] tour_Q=None): - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - */ - values[1] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); - - /* "jcvi/assembly/chic.pyx":85 - * def score_evaluate_Q(array.array[int] tour, - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_Q=None): # <<<<<<<<<<<<<< - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - */ - values[2] = __Pyx_Arg_NewRef_FASTCALL((PyObject *)((PyArrayObject *)Py_None)); - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 83, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_sizes); - if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 83, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 2: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_tour_Q); - if (value) { values[2] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 83, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "score_evaluate_Q") < 0)) __PYX_ERR(0, 83, __pyx_L3_error) - } - } else { - switch (__pyx_nargs) { - case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - __pyx_v_tour = ((arrayobject *)values[0]); - __pyx_v_tour_sizes = ((PyArrayObject *)values[1]); - __pyx_v_tour_Q = ((PyArrayObject *)values[2]); - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("score_evaluate_Q", 0, 1, 3, __pyx_nargs); __PYX_ERR(0, 83, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_Q", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour), __pyx_ptype_7cpython_5array_array, 1, "tour", 0))) __PYX_ERR(0, 83, __pyx_L1_error) - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_sizes), __pyx_ptype_5numpy_ndarray, 1, "tour_sizes", 0))) __PYX_ERR(0, 84, __pyx_L1_error) - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_tour_Q), __pyx_ptype_5numpy_ndarray, 1, "tour_Q", 0))) __PYX_ERR(0, 85, __pyx_L1_error) - __pyx_r = __pyx_pf_4jcvi_8assembly_4chic_4score_evaluate_Q(__pyx_self, __pyx_v_tour, __pyx_v_tour_sizes, __pyx_v_tour_Q); - - /* "jcvi/assembly/chic.pyx":83 - * - * - * def score_evaluate_Q(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_Q=None): - */ - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = NULL; - __pyx_L0:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_8assembly_4chic_4score_evaluate_Q(CYTHON_UNUSED PyObject *__pyx_self, arrayobject *__pyx_v_tour, PyArrayObject *__pyx_v_tour_sizes, PyArrayObject *__pyx_v_tour_Q) { - PyArrayObject *__pyx_v_sizes_oo = 0; - PyArrayObject *__pyx_v_sizes_cum = 0; - double __pyx_v_s; - int __pyx_v_size; - int __pyx_v_a; - int __pyx_v_b; - int __pyx_v_c; - int __pyx_v_ia; - int __pyx_v_ib; - int __pyx_v_ic; - double __pyx_v_dist; - __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_cum; - __Pyx_Buffer __pyx_pybuffer_sizes_cum; - __Pyx_LocalBuf_ND __pyx_pybuffernd_sizes_oo; - __Pyx_Buffer __pyx_pybuffer_sizes_oo; - __Pyx_LocalBuf_ND __pyx_pybuffernd_tour; - __Pyx_Buffer __pyx_pybuffer_tour; - __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_Q; - __Pyx_Buffer __pyx_pybuffer_tour_Q; - __Pyx_LocalBuf_ND __pyx_pybuffernd_tour_sizes; - __Pyx_Buffer __pyx_pybuffer_tour_sizes; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyArrayObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - unsigned int __pyx_t_5; - PyArrayObject *__pyx_t_6 = NULL; - Py_ssize_t __pyx_t_7; - int __pyx_t_8; - int __pyx_t_9; - int __pyx_t_10; - Py_ssize_t __pyx_t_11; - int __pyx_t_12; - int __pyx_t_13; - int __pyx_t_14; - Py_ssize_t __pyx_t_15; - Py_ssize_t __pyx_t_16; - int __pyx_t_17; - double __pyx_t_18; - int __pyx_t_19; - int __pyx_t_20; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("score_evaluate_Q", 1); - __pyx_pybuffer_sizes_oo.pybuffer.buf = NULL; - __pyx_pybuffer_sizes_oo.refcount = 0; - __pyx_pybuffernd_sizes_oo.data = NULL; - __pyx_pybuffernd_sizes_oo.rcbuffer = &__pyx_pybuffer_sizes_oo; - __pyx_pybuffer_sizes_cum.pybuffer.buf = NULL; - __pyx_pybuffer_sizes_cum.refcount = 0; - __pyx_pybuffernd_sizes_cum.data = NULL; - __pyx_pybuffernd_sizes_cum.rcbuffer = &__pyx_pybuffer_sizes_cum; - __pyx_pybuffer_tour.pybuffer.buf = NULL; - __pyx_pybuffer_tour.refcount = 0; - __pyx_pybuffernd_tour.data = NULL; - __pyx_pybuffernd_tour.rcbuffer = &__pyx_pybuffer_tour; - __pyx_pybuffer_tour_sizes.pybuffer.buf = NULL; - __pyx_pybuffer_tour_sizes.refcount = 0; - __pyx_pybuffernd_tour_sizes.data = NULL; - __pyx_pybuffernd_tour_sizes.rcbuffer = &__pyx_pybuffer_tour_sizes; - __pyx_pybuffer_tour_Q.pybuffer.buf = NULL; - __pyx_pybuffer_tour_Q.refcount = 0; - __pyx_pybuffernd_tour_Q.data = NULL; - __pyx_pybuffernd_tour_Q.rcbuffer = &__pyx_pybuffer_tour_Q; - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour, &__Pyx_TypeInfo_int, PyBUF_FORMAT| PyBUF_INDIRECT, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 83, __pyx_L1_error) - } - __pyx_pybuffernd_tour.diminfo[0].strides = __pyx_pybuffernd_tour.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour.diminfo[0].shape = __pyx_pybuffernd_tour.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour.diminfo[0].suboffsets = __pyx_pybuffernd_tour.rcbuffer->pybuffer.suboffsets[0]; - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_sizes, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) __PYX_ERR(0, 83, __pyx_L1_error) - } - __pyx_pybuffernd_tour_sizes.diminfo[0].strides = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_sizes.diminfo[0].shape = __pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer.shape[0]; - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_tour_Q.rcbuffer->pybuffer, (PyObject*)__pyx_v_tour_Q, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 3, 0, __pyx_stack) == -1)) __PYX_ERR(0, 83, __pyx_L1_error) - } - __pyx_pybuffernd_tour_Q.diminfo[0].strides = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_tour_Q.diminfo[0].shape = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_tour_Q.diminfo[1].strides = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_tour_Q.diminfo[1].shape = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.shape[1]; __pyx_pybuffernd_tour_Q.diminfo[2].strides = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.strides[2]; __pyx_pybuffernd_tour_Q.diminfo[2].shape = __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.shape[2]; - - /* "jcvi/assembly/chic.pyx":86 - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_Q=None): - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] # <<<<<<<<<<<<<< - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - * - */ - __pyx_t_1 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_tour_sizes), ((PyObject *)__pyx_v_tour)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 86, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 86, __pyx_L1_error) - __pyx_t_2 = ((PyArrayObject *)__pyx_t_1); - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer, (PyObject*)__pyx_t_2, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { - __pyx_v_sizes_oo = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.buf = NULL; - __PYX_ERR(0, 86, __pyx_L1_error) - } else {__pyx_pybuffernd_sizes_oo.diminfo[0].strides = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_oo.diminfo[0].shape = __pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer.shape[0]; - } - } - __pyx_t_2 = 0; - __pyx_v_sizes_oo = ((PyArrayObject *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/assembly/chic.pyx":87 - * np.ndarray[INT, ndim=3] tour_Q=None): - * cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) # <<<<<<<<<<<<<< - * - * cdef double s = 0.0 - */ - __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_cumsum); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = NULL; - __pyx_t_5 = 0; - #if CYTHON_UNPACK_METHODS - if (unlikely(PyMethod_Check(__pyx_t_4))) { - __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4); - if (likely(__pyx_t_3)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); - __Pyx_INCREF(__pyx_t_3); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_4, function); - __pyx_t_5 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[2] = {__pyx_t_3, ((PyObject *)__pyx_v_sizes_oo)}; - __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5); - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - } - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_t_6 = ((PyArrayObject *)__pyx_t_1); - { - __Pyx_BufFmt_StackElem __pyx_stack[1]; - if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_object, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) { - __pyx_v_sizes_cum = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf = NULL; - __PYX_ERR(0, 87, __pyx_L1_error) - } else {__pyx_pybuffernd_sizes_cum.diminfo[0].strides = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sizes_cum.diminfo[0].shape = __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.shape[0]; - } - } - __pyx_t_6 = 0; - __pyx_v_sizes_cum = ((PyArrayObject *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/assembly/chic.pyx":89 - * cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - * - * cdef double s = 0.0 # <<<<<<<<<<<<<< - * cdef int size = len(tour) - * cdef int a, b, c, ia, ib, ic - */ - __pyx_v_s = 0.0; - - /* "jcvi/assembly/chic.pyx":90 - * - * cdef double s = 0.0 - * cdef int size = len(tour) # <<<<<<<<<<<<<< - * cdef int a, b, c, ia, ib, ic - * cdef double dist - */ - if (unlikely(((PyObject *)__pyx_v_tour) == Py_None)) { - PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); - __PYX_ERR(0, 90, __pyx_L1_error) - } - __pyx_t_7 = Py_SIZE(((PyObject *)__pyx_v_tour)); if (unlikely(__pyx_t_7 == ((Py_ssize_t)-1))) __PYX_ERR(0, 90, __pyx_L1_error) - __pyx_v_size = __pyx_t_7; - - /* "jcvi/assembly/chic.pyx":93 - * cdef int a, b, c, ia, ib, ic - * cdef double dist - * for ia in range(size): # <<<<<<<<<<<<<< - * a = tour[ia] - * for ib in range(ia + 1, size): - */ - __pyx_t_8 = __pyx_v_size; - __pyx_t_9 = __pyx_t_8; - for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) { - __pyx_v_ia = __pyx_t_10; - - /* "jcvi/assembly/chic.pyx":94 - * cdef double dist - * for ia in range(size): - * a = tour[ia] # <<<<<<<<<<<<<< - * for ib in range(ia + 1, size): - * b = tour[ib] - */ - __pyx_t_11 = __pyx_v_ia; - __pyx_v_a = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); - - /* "jcvi/assembly/chic.pyx":95 - * for ia in range(size): - * a = tour[ia] - * for ib in range(ia + 1, size): # <<<<<<<<<<<<<< - * b = tour[ib] - * if tour_Q[a, b, 0] == -1: - */ - __pyx_t_12 = __pyx_v_size; - __pyx_t_13 = __pyx_t_12; - for (__pyx_t_14 = (__pyx_v_ia + 1); __pyx_t_14 < __pyx_t_13; __pyx_t_14+=1) { - __pyx_v_ib = __pyx_t_14; - - /* "jcvi/assembly/chic.pyx":96 - * a = tour[ia] - * for ib in range(ia + 1, size): - * b = tour[ib] # <<<<<<<<<<<<<< - * if tour_Q[a, b, 0] == -1: - * continue - */ - __pyx_t_11 = __pyx_v_ib; - __pyx_v_b = (*__Pyx_BufPtrFull1d(int *, __pyx_pybuffernd_tour.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour.diminfo[0].strides, __pyx_pybuffernd_tour.diminfo[0].suboffsets)); - - /* "jcvi/assembly/chic.pyx":97 - * for ib in range(ia + 1, size): - * b = tour[ib] - * if tour_Q[a, b, 0] == -1: # <<<<<<<<<<<<<< - * continue - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - */ - __pyx_t_11 = __pyx_v_a; - __pyx_t_15 = __pyx_v_b; - __pyx_t_16 = 0; - __pyx_t_1 = (PyObject *) *__Pyx_BufPtrStrided3d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.buf, __pyx_t_11, __pyx_pybuffernd_tour_Q.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_tour_Q.diminfo[1].strides, __pyx_t_16, __pyx_pybuffernd_tour_Q.diminfo[2].strides); - if (unlikely(__pyx_t_1 == NULL)) __pyx_t_1 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_1); - __pyx_t_4 = PyObject_RichCompare(__pyx_t_1, __pyx_int_neg_1, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 97, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_17 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_17 < 0))) __PYX_ERR(0, 97, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (__pyx_t_17) { - - /* "jcvi/assembly/chic.pyx":98 - * b = tour[ib] - * if tour_Q[a, b, 0] == -1: - * continue # <<<<<<<<<<<<<< - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - * if dist > LIMIT: - */ - goto __pyx_L5_continue; - - /* "jcvi/assembly/chic.pyx":97 - * for ib in range(ia + 1, size): - * b = tour[ib] - * if tour_Q[a, b, 0] == -1: # <<<<<<<<<<<<<< - * continue - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - */ - } - - /* "jcvi/assembly/chic.pyx":99 - * if tour_Q[a, b, 0] == -1: - * continue - * dist = sizes_cum[ib - 1] - sizes_cum[ia] # <<<<<<<<<<<<<< - * if dist > LIMIT: - * break - */ - __pyx_t_16 = (__pyx_v_ib - 1); - __pyx_t_4 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); - if (unlikely(__pyx_t_4 == NULL)) __pyx_t_4 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_4); - __pyx_t_16 = __pyx_v_ia; - __pyx_t_1 = (PyObject *) *__Pyx_BufPtrStrided1d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_sizes_cum.diminfo[0].strides); - if (unlikely(__pyx_t_1 == NULL)) __pyx_t_1 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_1); - __pyx_t_3 = PyNumber_Subtract(__pyx_t_4, __pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 99, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_18 = __pyx_PyFloat_AsDouble(__pyx_t_3); if (unlikely((__pyx_t_18 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 99, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_dist = __pyx_t_18; - - /* "jcvi/assembly/chic.pyx":100 - * continue - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - * if dist > LIMIT: # <<<<<<<<<<<<<< - * break - * for ic in range(BB): - */ - __pyx_t_17 = (__pyx_v_dist > 10000000.0); - if (__pyx_t_17) { - - /* "jcvi/assembly/chic.pyx":101 - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - * if dist > LIMIT: - * break # <<<<<<<<<<<<<< - * for ic in range(BB): - * c = tour_Q[a, b, ic] - */ - goto __pyx_L6_break; - - /* "jcvi/assembly/chic.pyx":100 - * continue - * dist = sizes_cum[ib - 1] - sizes_cum[ia] - * if dist > LIMIT: # <<<<<<<<<<<<<< - * break - * for ic in range(BB): - */ - } - - /* "jcvi/assembly/chic.pyx":102 - * if dist > LIMIT: - * break - * for ic in range(BB): # <<<<<<<<<<<<<< - * c = tour_Q[a, b, ic] - * s += c / (GR[ic] + dist) - */ - for (__pyx_t_19 = 0; __pyx_t_19 < 12; __pyx_t_19+=1) { - __pyx_v_ic = __pyx_t_19; - - /* "jcvi/assembly/chic.pyx":103 - * break - * for ic in range(BB): - * c = tour_Q[a, b, ic] # <<<<<<<<<<<<<< - * s += c / (GR[ic] + dist) - * return s, - */ - __pyx_t_16 = __pyx_v_a; - __pyx_t_15 = __pyx_v_b; - __pyx_t_11 = __pyx_v_ic; - __pyx_t_3 = (PyObject *) *__Pyx_BufPtrStrided3d(__pyx_t_4jcvi_8assembly_4chic_INT *, __pyx_pybuffernd_tour_Q.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_tour_Q.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_tour_Q.diminfo[1].strides, __pyx_t_11, __pyx_pybuffernd_tour_Q.diminfo[2].strides); - if (unlikely(__pyx_t_3 == NULL)) __pyx_t_3 = Py_None; - __Pyx_INCREF((PyObject*)__pyx_t_3); - __pyx_t_20 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_20 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 103, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_c = __pyx_t_20; - - /* "jcvi/assembly/chic.pyx":104 - * for ic in range(BB): - * c = tour_Q[a, b, ic] - * s += c / (GR[ic] + dist) # <<<<<<<<<<<<<< - * return s, - */ - __pyx_v_s = (__pyx_v_s + (((double)__pyx_v_c) / ((__pyx_v_4jcvi_8assembly_4chic_GR[__pyx_v_ic]) + __pyx_v_dist))); - } - __pyx_L5_continue:; - } - __pyx_L6_break:; - } - - /* "jcvi/assembly/chic.pyx":105 - * c = tour_Q[a, b, ic] - * s += c / (GR[ic] + dist) - * return s, # <<<<<<<<<<<<<< - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = PyFloat_FromDouble(__pyx_v_s); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 105, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 105, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3)) __PYX_ERR(0, 105, __pyx_L1_error); - __pyx_t_3 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "jcvi/assembly/chic.pyx":83 - * - * - * def score_evaluate_Q(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_Q=None): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - { PyObject *__pyx_type, *__pyx_value, *__pyx_tb; - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_Q.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); - __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);} - __Pyx_AddTraceback("jcvi.assembly.chic.score_evaluate_Q", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - goto __pyx_L2; - __pyx_L0:; - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_cum.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sizes_oo.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_Q.rcbuffer->pybuffer); - __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_tour_sizes.rcbuffer->pybuffer); - __pyx_L2:; - __Pyx_XDECREF((PyObject *)__pyx_v_sizes_oo); - __Pyx_XDECREF((PyObject *)__pyx_v_sizes_cum); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyMethodDef __pyx_methods[] = { - {0, 0, 0, 0} -}; -#ifndef CYTHON_SMALL_CODE -#if defined(__clang__) - #define CYTHON_SMALL_CODE -#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) - #define CYTHON_SMALL_CODE __attribute__((cold)) -#else - #define CYTHON_SMALL_CODE -#endif -#endif -/* #### Code section: pystring_table ### */ - -static int __Pyx_CreateStringTabAndInitStrings(void) { - __Pyx_StringTabEntry __pyx_string_tab[] = { - {&__pyx_n_s_ImportError, __pyx_k_ImportError, sizeof(__pyx_k_ImportError), 0, 0, 1, 1}, - {&__pyx_n_s_MemoryError, __pyx_k_MemoryError, sizeof(__pyx_k_MemoryError), 0, 0, 1, 1}, - {&__pyx_n_s__11, __pyx_k__11, sizeof(__pyx_k__11), 0, 0, 1, 1}, - {&__pyx_n_s__3, __pyx_k__3, sizeof(__pyx_k__3), 0, 0, 1, 1}, - {&__pyx_n_s_a, __pyx_k_a, sizeof(__pyx_k_a), 0, 0, 1, 1}, - {&__pyx_n_s_array, __pyx_k_array, sizeof(__pyx_k_array), 0, 0, 1, 1}, - {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1}, - {&__pyx_n_s_b, __pyx_k_b, sizeof(__pyx_k_b), 0, 0, 1, 1}, - {&__pyx_n_s_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 0, 1, 1}, - {&__pyx_n_s_class_getitem, __pyx_k_class_getitem, sizeof(__pyx_k_class_getitem), 0, 0, 1, 1}, - {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, - {&__pyx_n_s_cumsum, __pyx_k_cumsum, sizeof(__pyx_k_cumsum), 0, 0, 1, 1}, - {&__pyx_n_s_dist, __pyx_k_dist, sizeof(__pyx_k_dist), 0, 0, 1, 1}, - {&__pyx_n_s_ia, __pyx_k_ia, sizeof(__pyx_k_ia), 0, 0, 1, 1}, - {&__pyx_n_s_ib, __pyx_k_ib, sizeof(__pyx_k_ib), 0, 0, 1, 1}, - {&__pyx_n_s_ic, __pyx_k_ic, sizeof(__pyx_k_ic), 0, 0, 1, 1}, - {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, - {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1}, - {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1}, - {&__pyx_n_s_jcvi_assembly_chic, __pyx_k_jcvi_assembly_chic, sizeof(__pyx_k_jcvi_assembly_chic), 0, 0, 1, 1}, - {&__pyx_n_s_links, __pyx_k_links, sizeof(__pyx_k_links), 0, 0, 1, 1}, - {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, - {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, - {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1}, - {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1}, - {&__pyx_kp_s_numpy__core_multiarray_failed_to, __pyx_k_numpy__core_multiarray_failed_to, sizeof(__pyx_k_numpy__core_multiarray_failed_to), 0, 0, 1, 0}, - {&__pyx_kp_s_numpy__core_umath_failed_to_impo, __pyx_k_numpy__core_umath_failed_to_impo, sizeof(__pyx_k_numpy__core_umath_failed_to_impo), 0, 0, 1, 0}, - {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, - {&__pyx_n_s_s, __pyx_k_s, sizeof(__pyx_k_s), 0, 0, 1, 1}, - {&__pyx_n_s_score_evaluate_M, __pyx_k_score_evaluate_M, sizeof(__pyx_k_score_evaluate_M), 0, 0, 1, 1}, - {&__pyx_n_s_score_evaluate_P, __pyx_k_score_evaluate_P, sizeof(__pyx_k_score_evaluate_P), 0, 0, 1, 1}, - {&__pyx_n_s_score_evaluate_Q, __pyx_k_score_evaluate_Q, sizeof(__pyx_k_score_evaluate_Q), 0, 0, 1, 1}, - {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, - {&__pyx_n_s_sizes_cum, __pyx_k_sizes_cum, sizeof(__pyx_k_sizes_cum), 0, 0, 1, 1}, - {&__pyx_n_s_sizes_oo, __pyx_k_sizes_oo, sizeof(__pyx_k_sizes_oo), 0, 0, 1, 1}, - {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1}, - {&__pyx_kp_s_src_jcvi_assembly_chic_pyx, __pyx_k_src_jcvi_assembly_chic_pyx, sizeof(__pyx_k_src_jcvi_assembly_chic_pyx), 0, 0, 1, 0}, - {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, - {&__pyx_n_s_tour, __pyx_k_tour, sizeof(__pyx_k_tour), 0, 0, 1, 1}, - {&__pyx_n_s_tour_M, __pyx_k_tour_M, sizeof(__pyx_k_tour_M), 0, 0, 1, 1}, - {&__pyx_n_s_tour_P, __pyx_k_tour_P, sizeof(__pyx_k_tour_P), 0, 0, 1, 1}, - {&__pyx_n_s_tour_Q, __pyx_k_tour_Q, sizeof(__pyx_k_tour_Q), 0, 0, 1, 1}, - {&__pyx_n_s_tour_sizes, __pyx_k_tour_sizes, sizeof(__pyx_k_tour_sizes), 0, 0, 1, 1}, - {0, 0, 0, 0, 0, 0, 0} - }; - return __Pyx_InitStrings(__pyx_string_tab); -} -/* #### Code section: cached_builtins ### */ -static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 45, __pyx_L1_error) - __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(1, 1043, __pyx_L1_error) - __pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) __PYX_ERR(3, 120, __pyx_L1_error) - return 0; - __pyx_L1_error:; - return -1; -} -/* #### Code section: cached_constants ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1043 - * __pyx_import_array() - * except Exception: - * raise ImportError("numpy._core.multiarray failed to import") # <<<<<<<<<<<<<< - * - * cdef inline int import_umath() except -1: - */ - __pyx_tuple_ = PyTuple_Pack(1, __pyx_kp_s_numpy__core_multiarray_failed_to); if (unlikely(!__pyx_tuple_)) __PYX_ERR(1, 1043, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple_); - __Pyx_GIVEREF(__pyx_tuple_); - - /* "../../../../../../private/var/folders/ht/f4psx_9j31934bxvs87wqk1h0000gn/T/pip-build-env-3euuub5s/overlay/lib/python3.12/site-packages/numpy/__init__.cython-30.pxd":1049 - * _import_umath() - * except Exception: - * raise ImportError("numpy._core.umath failed to import") # <<<<<<<<<<<<<< - * - * cdef inline int import_ufunc() except -1: - */ - __pyx_tuple__2 = PyTuple_Pack(1, __pyx_kp_s_numpy__core_umath_failed_to_impo); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(1, 1049, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__2); - __Pyx_GIVEREF(__pyx_tuple__2); - - /* "jcvi/assembly/chic.pyx":34 - * - * - * def score_evaluate_M(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=2] tour_M=None): - */ - __pyx_tuple__4 = PyTuple_Pack(13, __pyx_n_s_tour, __pyx_n_s_tour_sizes, __pyx_n_s_tour_M, __pyx_n_s_sizes_oo, __pyx_n_s_sizes_cum, __pyx_n_s_s, __pyx_n_s_size, __pyx_n_s_a, __pyx_n_s_b, __pyx_n_s_ia, __pyx_n_s_ib, __pyx_n_s_links, __pyx_n_s_dist); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 34, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__4); - __Pyx_GIVEREF(__pyx_tuple__4); - __pyx_codeobj__5 = (PyObject*)__Pyx_PyCode_New(3, 0, 0, 13, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__4, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_jcvi_assembly_chic_pyx, __pyx_n_s_score_evaluate_M, 34, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__5)) __PYX_ERR(0, 34, __pyx_L1_error) - __pyx_tuple__6 = PyTuple_Pack(2, Py_None, Py_None); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(0, 34, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__6); - __Pyx_GIVEREF(__pyx_tuple__6); - - /* "jcvi/assembly/chic.pyx":59 - * - * - * def score_evaluate_P(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_P=None): - */ - __pyx_tuple__7 = PyTuple_Pack(13, __pyx_n_s_tour, __pyx_n_s_tour_sizes, __pyx_n_s_tour_P, __pyx_n_s_sizes_oo, __pyx_n_s_sizes_cum, __pyx_n_s_s, __pyx_n_s_size, __pyx_n_s_a, __pyx_n_s_b, __pyx_n_s_c, __pyx_n_s_ia, __pyx_n_s_ib, __pyx_n_s_dist); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(0, 59, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__7); - __Pyx_GIVEREF(__pyx_tuple__7); - __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(3, 0, 0, 13, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_jcvi_assembly_chic_pyx, __pyx_n_s_score_evaluate_P, 59, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(0, 59, __pyx_L1_error) - - /* "jcvi/assembly/chic.pyx":83 - * - * - * def score_evaluate_Q(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_Q=None): - */ - __pyx_tuple__9 = PyTuple_Pack(14, __pyx_n_s_tour, __pyx_n_s_tour_sizes, __pyx_n_s_tour_Q, __pyx_n_s_sizes_oo, __pyx_n_s_sizes_cum, __pyx_n_s_s, __pyx_n_s_size, __pyx_n_s_a, __pyx_n_s_b, __pyx_n_s_c, __pyx_n_s_ia, __pyx_n_s_ib, __pyx_n_s_ic, __pyx_n_s_dist); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(0, 83, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__9); - __Pyx_GIVEREF(__pyx_tuple__9); - __pyx_codeobj__10 = (PyObject*)__Pyx_PyCode_New(3, 0, 0, 14, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__9, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_jcvi_assembly_chic_pyx, __pyx_n_s_score_evaluate_Q, 83, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__10)) __PYX_ERR(0, 83, __pyx_L1_error) - __Pyx_RefNannyFinishContext(); - return 0; - __pyx_L1_error:; - __Pyx_RefNannyFinishContext(); - return -1; -} -/* #### Code section: init_constants ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) { - if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error); - __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_int_neg_1 = PyInt_FromLong(-1); if (unlikely(!__pyx_int_neg_1)) __PYX_ERR(0, 1, __pyx_L1_error) - return 0; - __pyx_L1_error:; - return -1; -} -/* #### Code section: init_globals ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) { - /* NumpyImportArray.init */ - /* - * Cython has automatically inserted a call to _import_array since - * you didn't include one when you cimported numpy. To disable this - * add the line - * numpy._import_array - */ -#ifdef NPY_FEATURE_VERSION -#ifndef NO_IMPORT_ARRAY -if (unlikely(_import_array() == -1)) { - PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import " - "(auto-generated because you didn't call 'numpy.import_array()' after cimporting numpy; " - "use 'numpy._import_array' to disable if you are certain you don't need it)."); -} -#endif -#endif - -if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error) - - return 0; - __pyx_L1_error:; - return -1; -} -/* #### Code section: init_module ### */ - -static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/ - -static int __Pyx_modinit_global_init_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0); - /*--- Global init code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_variable_export_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0); - /*--- Variable export code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_function_export_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0); - /*--- Function export code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_type_init_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); - /*--- Type init code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_type_import_code(void) { - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0); - /*--- Type import code ---*/ - __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(4, 9, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_0_11(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", - #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyTypeObject), - #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyTypeObject), - #else - sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyHeapTypeObject), - #endif - __Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_7cpython_4type_type) __PYX_ERR(4, 9, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(5, 8, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_ptype_7cpython_4bool_bool = __Pyx_ImportType_3_0_11(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "bool", sizeof(PyBoolObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyBoolObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_7cpython_4bool_bool) __PYX_ERR(5, 8, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(6, 15, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_ptype_7cpython_7complex_complex = __Pyx_ImportType_3_0_11(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "complex", sizeof(PyComplexObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyComplexObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_7cpython_7complex_complex) __PYX_ERR(6, 15, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyImport_ImportModule("numpy"); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 272, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_ptype_5numpy_dtype = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "dtype", sizeof(PyArray_Descr), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArray_Descr),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_dtype) __PYX_ERR(1, 272, __pyx_L1_error) - __pyx_ptype_5numpy_flatiter = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "flatiter", sizeof(PyArrayIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayIterObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_flatiter) __PYX_ERR(1, 317, __pyx_L1_error) - __pyx_ptype_5numpy_broadcast = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "broadcast", sizeof(PyArrayMultiIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayMultiIterObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_broadcast) __PYX_ERR(1, 321, __pyx_L1_error) - __pyx_ptype_5numpy_ndarray = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "ndarray", sizeof(PyArrayObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_ndarray) __PYX_ERR(1, 360, __pyx_L1_error) - __pyx_ptype_5numpy_generic = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "generic", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_generic) __PYX_ERR(1, 865, __pyx_L1_error) - __pyx_ptype_5numpy_number = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "number", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_number) __PYX_ERR(1, 867, __pyx_L1_error) - __pyx_ptype_5numpy_integer = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "integer", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_integer) __PYX_ERR(1, 869, __pyx_L1_error) - __pyx_ptype_5numpy_signedinteger = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "signedinteger", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_signedinteger) __PYX_ERR(1, 871, __pyx_L1_error) - __pyx_ptype_5numpy_unsignedinteger = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "unsignedinteger", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_unsignedinteger) __PYX_ERR(1, 873, __pyx_L1_error) - __pyx_ptype_5numpy_inexact = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "inexact", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_inexact) __PYX_ERR(1, 875, __pyx_L1_error) - __pyx_ptype_5numpy_floating = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "floating", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_floating) __PYX_ERR(1, 877, __pyx_L1_error) - __pyx_ptype_5numpy_complexfloating = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "complexfloating", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_complexfloating) __PYX_ERR(1, 879, __pyx_L1_error) - __pyx_ptype_5numpy_flexible = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "flexible", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_flexible) __PYX_ERR(1, 881, __pyx_L1_error) - __pyx_ptype_5numpy_character = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "character", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_character) __PYX_ERR(1, 883, __pyx_L1_error) - __pyx_ptype_5numpy_ufunc = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "ufunc", sizeof(PyUFuncObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyUFuncObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_ufunc) __PYX_ERR(1, 947, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyImport_ImportModule("array"); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 69, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_ptype_7cpython_5array_array = __Pyx_ImportType_3_0_11(__pyx_t_1, "array", "array", sizeof(arrayobject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(arrayobject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_7cpython_5array_array) __PYX_ERR(3, 69, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_RefNannyFinishContext(); - return 0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_RefNannyFinishContext(); - return -1; -} - -static int __Pyx_modinit_variable_import_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0); - /*--- Variable import code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_function_import_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0); - /*--- Function import code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - - -#if PY_MAJOR_VERSION >= 3 -#if CYTHON_PEP489_MULTI_PHASE_INIT -static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ -static int __pyx_pymod_exec_chic(PyObject* module); /*proto*/ -static PyModuleDef_Slot __pyx_moduledef_slots[] = { - {Py_mod_create, (void*)__pyx_pymod_create}, - {Py_mod_exec, (void*)__pyx_pymod_exec_chic}, - {0, NULL} -}; -#endif - -#ifdef __cplusplus -namespace { - struct PyModuleDef __pyx_moduledef = - #else - static struct PyModuleDef __pyx_moduledef = - #endif - { - PyModuleDef_HEAD_INIT, - "chic", - __pyx_k_Cythonized_version_of_score_eva, /* m_doc */ - #if CYTHON_PEP489_MULTI_PHASE_INIT - 0, /* m_size */ - #elif CYTHON_USE_MODULE_STATE - sizeof(__pyx_mstate), /* m_size */ - #else - -1, /* m_size */ - #endif - __pyx_methods /* m_methods */, - #if CYTHON_PEP489_MULTI_PHASE_INIT - __pyx_moduledef_slots, /* m_slots */ - #else - NULL, /* m_reload */ - #endif - #if CYTHON_USE_MODULE_STATE - __pyx_m_traverse, /* m_traverse */ - __pyx_m_clear, /* m_clear */ - NULL /* m_free */ - #else - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL /* m_free */ - #endif - }; - #ifdef __cplusplus -} /* anonymous namespace */ -#endif -#endif - -#ifndef CYTHON_NO_PYINIT_EXPORT -#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC -#elif PY_MAJOR_VERSION < 3 -#ifdef __cplusplus -#define __Pyx_PyMODINIT_FUNC extern "C" void -#else -#define __Pyx_PyMODINIT_FUNC void -#endif -#else -#ifdef __cplusplus -#define __Pyx_PyMODINIT_FUNC extern "C" PyObject * -#else -#define __Pyx_PyMODINIT_FUNC PyObject * -#endif -#endif - - -#if PY_MAJOR_VERSION < 3 -__Pyx_PyMODINIT_FUNC initchic(void) CYTHON_SMALL_CODE; /*proto*/ -__Pyx_PyMODINIT_FUNC initchic(void) -#else -__Pyx_PyMODINIT_FUNC PyInit_chic(void) CYTHON_SMALL_CODE; /*proto*/ -__Pyx_PyMODINIT_FUNC PyInit_chic(void) -#if CYTHON_PEP489_MULTI_PHASE_INIT -{ - return PyModuleDef_Init(&__pyx_moduledef); -} -static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) { - #if PY_VERSION_HEX >= 0x030700A1 - static PY_INT64_T main_interpreter_id = -1; - PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp); - if (main_interpreter_id == -1) { - main_interpreter_id = current_id; - return (unlikely(current_id == -1)) ? -1 : 0; - } else if (unlikely(main_interpreter_id != current_id)) - #else - static PyInterpreterState *main_interpreter = NULL; - PyInterpreterState *current_interpreter = PyThreadState_Get()->interp; - if (!main_interpreter) { - main_interpreter = current_interpreter; - } else if (unlikely(main_interpreter != current_interpreter)) - #endif - { - PyErr_SetString( - PyExc_ImportError, - "Interpreter change detected - this module can only be loaded into one interpreter per process."); - return -1; - } - return 0; -} -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none) -#else -static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) -#endif -{ - PyObject *value = PyObject_GetAttrString(spec, from_name); - int result = 0; - if (likely(value)) { - if (allow_none || value != Py_None) { -#if CYTHON_COMPILING_IN_LIMITED_API - result = PyModule_AddObject(module, to_name, value); -#else - result = PyDict_SetItemString(moddict, to_name, value); -#endif - } - Py_DECREF(value); - } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Clear(); - } else { - result = -1; - } - return result; -} -static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) { - PyObject *module = NULL, *moddict, *modname; - CYTHON_UNUSED_VAR(def); - if (__Pyx_check_single_interpreter()) - return NULL; - if (__pyx_m) - return __Pyx_NewRef(__pyx_m); - modname = PyObject_GetAttrString(spec, "name"); - if (unlikely(!modname)) goto bad; - module = PyModule_NewObject(modname); - Py_DECREF(modname); - if (unlikely(!module)) goto bad; -#if CYTHON_COMPILING_IN_LIMITED_API - moddict = module; -#else - moddict = PyModule_GetDict(module); - if (unlikely(!moddict)) goto bad; -#endif - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad; - return module; -bad: - Py_XDECREF(module); - return NULL; -} - - -static CYTHON_SMALL_CODE int __pyx_pymod_exec_chic(PyObject *__pyx_pyinit_module) -#endif -#endif -{ - int stringtab_initialized = 0; - #if CYTHON_USE_MODULE_STATE - int pystate_addmodule_run = 0; - #endif - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - static int __pyx_t_3[12]; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - #if CYTHON_PEP489_MULTI_PHASE_INIT - if (__pyx_m) { - if (__pyx_m == __pyx_pyinit_module) return 0; - PyErr_SetString(PyExc_RuntimeError, "Module 'chic' has already been imported. Re-initialisation is not supported."); - return -1; - } - #elif PY_MAJOR_VERSION >= 3 - if (__pyx_m) return __Pyx_NewRef(__pyx_m); - #endif - /*--- Module creation code ---*/ - #if CYTHON_PEP489_MULTI_PHASE_INIT - __pyx_m = __pyx_pyinit_module; - Py_INCREF(__pyx_m); - #else - #if PY_MAJOR_VERSION < 3 - __pyx_m = Py_InitModule4("chic", __pyx_methods, __pyx_k_Cythonized_version_of_score_eva, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); - if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) - #elif CYTHON_USE_MODULE_STATE - __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) - { - int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef); - __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "chic" pseudovariable */ - if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - pystate_addmodule_run = 1; - } - #else - __pyx_m = PyModule_Create(&__pyx_moduledef); - if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #endif - CYTHON_UNUSED_VAR(__pyx_t_1); - __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) - Py_INCREF(__pyx_d); - __pyx_b = __Pyx_PyImport_AddModuleRef(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_cython_runtime = __Pyx_PyImport_AddModuleRef((const char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) - if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #if CYTHON_REFNANNY -__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); -if (!__Pyx_RefNanny) { - PyErr_Clear(); - __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); - if (!__Pyx_RefNanny) - Py_FatalError("failed to import 'refnanny' module"); -} -#endif - __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_chic(void)", 0); - if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #ifdef __Pxy_PyFrame_Initialize_Offsets - __Pxy_PyFrame_Initialize_Offsets(); - #endif - __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) - #ifdef __Pyx_CyFunction_USED - if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_FusedFunction_USED - if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_Coroutine_USED - if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_Generator_USED - if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_AsyncGen_USED - if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_StopAsyncIteration_USED - if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - /*--- Library function declarations ---*/ - /*--- Threads initialization code ---*/ - #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS - PyEval_InitThreads(); - #endif - /*--- Initialize various global constants etc. ---*/ - if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - stringtab_initialized = 1; - if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) - if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - if (__pyx_module_is_main_jcvi__assembly__chic) { - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - } - #if PY_MAJOR_VERSION >= 3 - { - PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) - if (!PyDict_GetItemString(modules, "jcvi.assembly.chic")) { - if (unlikely((PyDict_SetItemString(modules, "jcvi.assembly.chic", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - } - } - #endif - /*--- Builtin init code ---*/ - if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - /*--- Constants init code ---*/ - if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - /*--- Global type/function init code ---*/ - (void)__Pyx_modinit_global_init_code(); - (void)__Pyx_modinit_variable_export_code(); - (void)__Pyx_modinit_function_export_code(); - (void)__Pyx_modinit_type_init_code(); - if (unlikely((__Pyx_modinit_type_import_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - (void)__Pyx_modinit_variable_import_code(); - (void)__Pyx_modinit_function_import_code(); - /*--- Execution code ---*/ - #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - - /* "jcvi/assembly/chic.pyx":18 - * - * from __future__ import division - * import numpy as np # <<<<<<<<<<<<<< - * cimport numpy as np - * cimport cython - */ - __pyx_t_2 = __Pyx_ImportDottedModuleRelFirst(__pyx_n_s_numpy, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 18, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_2) < 0) __PYX_ERR(0, 18, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/assembly/chic.pyx":22 - * cimport cython - * from cpython cimport array - * import array # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_2 = __Pyx_ImportDottedModuleRelFirst(__pyx_n_s_array, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 22, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_array, __pyx_t_2) < 0) __PYX_ERR(0, 22, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/assembly/chic.pyx":29 - * DEF BB = 12 - * cdef int *GR = \ - * [ 5778, 9349, 15127, 24476, # <<<<<<<<<<<<<< - * 39603, 64079, 103682, 167761, - * 271443, 439204, 710647, 1149851] - */ - __pyx_t_3[0] = 0x1692; - __pyx_t_3[1] = 0x2485; - __pyx_t_3[2] = 0x3B17; - __pyx_t_3[3] = 0x5F9C; - __pyx_t_3[4] = 0x9AB3; - __pyx_t_3[5] = 0xFA4F; - __pyx_t_3[6] = 0x19502; - __pyx_t_3[7] = 0x28F51; - __pyx_t_3[8] = 0x42453; - __pyx_t_3[9] = 0x6B3A4; - __pyx_t_3[10] = 0xAD7F7; - __pyx_t_3[11] = 0x118B9B; - __pyx_v_4jcvi_8assembly_4chic_GR = __pyx_t_3; - - /* "jcvi/assembly/chic.pyx":34 - * - * - * def score_evaluate_M(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=2] tour_M=None): - */ - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_8assembly_4chic_1score_evaluate_M, 0, __pyx_n_s_score_evaluate_M, NULL, __pyx_n_s_jcvi_assembly_chic, __pyx_d, ((PyObject *)__pyx_codeobj__5)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 34, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_tuple__6); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_score_evaluate_M, __pyx_t_2) < 0) __PYX_ERR(0, 34, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/assembly/chic.pyx":59 - * - * - * def score_evaluate_P(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_P=None): - */ - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_8assembly_4chic_3score_evaluate_P, 0, __pyx_n_s_score_evaluate_P, NULL, __pyx_n_s_jcvi_assembly_chic, __pyx_d, ((PyObject *)__pyx_codeobj__8)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 59, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_tuple__6); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_score_evaluate_P, __pyx_t_2) < 0) __PYX_ERR(0, 59, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/assembly/chic.pyx":83 - * - * - * def score_evaluate_Q(array.array[int] tour, # <<<<<<<<<<<<<< - * np.ndarray[INT, ndim=1] tour_sizes=None, - * np.ndarray[INT, ndim=3] tour_Q=None): - */ - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_8assembly_4chic_5score_evaluate_Q, 0, __pyx_n_s_score_evaluate_Q, NULL, __pyx_n_s_jcvi_assembly_chic, __pyx_d, ((PyObject *)__pyx_codeobj__10)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 83, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_tuple__6); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_score_evaluate_Q, __pyx_t_2) < 0) __PYX_ERR(0, 83, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/assembly/chic.pyx":1 - * #cython: language_level=2, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True # <<<<<<<<<<<<<< - * - * """ - */ - __pyx_t_2 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /*--- Wrapped vars code ---*/ - - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - if (__pyx_m) { - if (__pyx_d && stringtab_initialized) { - __Pyx_AddTraceback("init jcvi.assembly.chic", __pyx_clineno, __pyx_lineno, __pyx_filename); - } - #if !CYTHON_USE_MODULE_STATE - Py_CLEAR(__pyx_m); - #else - Py_DECREF(__pyx_m); - if (pystate_addmodule_run) { - PyObject *tp, *value, *tb; - PyErr_Fetch(&tp, &value, &tb); - PyState_RemoveModule(&__pyx_moduledef); - PyErr_Restore(tp, value, tb); - } - #endif - } else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ImportError, "init jcvi.assembly.chic"); - } - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - #if CYTHON_PEP489_MULTI_PHASE_INIT - return (__pyx_m != NULL) ? 0 : -1; - #elif PY_MAJOR_VERSION >= 3 - return __pyx_m; - #else - return; - #endif -} -/* #### Code section: cleanup_globals ### */ -/* #### Code section: cleanup_module ### */ -/* #### Code section: main_method ### */ -/* #### Code section: utility_code_pragmas ### */ -#ifdef _MSC_VER -#pragma warning( push ) -/* Warning 4127: conditional expression is constant - * Cython uses constant conditional expressions to allow in inline functions to be optimized at - * compile-time, so this warning is not useful - */ -#pragma warning( disable : 4127 ) -#endif - - - -/* #### Code section: utility_code_def ### */ - -/* --- Runtime support code --- */ -/* Refnanny */ -#if CYTHON_REFNANNY -static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { - PyObject *m = NULL, *p = NULL; - void *r = NULL; - m = PyImport_ImportModule(modname); - if (!m) goto end; - p = PyObject_GetAttrString(m, "RefNannyAPI"); - if (!p) goto end; - r = PyLong_AsVoidPtr(p); -end: - Py_XDECREF(p); - Py_XDECREF(m); - return (__Pyx_RefNannyAPIStruct *)r; -} -#endif - -/* PyErrExceptionMatches */ -#if CYTHON_FAST_THREAD_STATE -static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(tuple); -#if PY_MAJOR_VERSION >= 3 - for (i=0; i= 0x030C00A6 - PyObject *current_exception = tstate->current_exception; - if (unlikely(!current_exception)) return 0; - exc_type = (PyObject*) Py_TYPE(current_exception); - if (exc_type == err) return 1; -#else - exc_type = tstate->curexc_type; - if (exc_type == err) return 1; - if (unlikely(!exc_type)) return 0; -#endif - #if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(exc_type); - #endif - if (unlikely(PyTuple_Check(err))) { - result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); - } else { - result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err); - } - #if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(exc_type); - #endif - return result; -} -#endif - -/* PyErrFetchRestore */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { -#if PY_VERSION_HEX >= 0x030C00A6 - PyObject *tmp_value; - assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value))); - if (value) { - #if CYTHON_COMPILING_IN_CPYTHON - if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb)) - #endif - PyException_SetTraceback(value, tb); - } - tmp_value = tstate->current_exception; - tstate->current_exception = value; - Py_XDECREF(tmp_value); - Py_XDECREF(type); - Py_XDECREF(tb); -#else - PyObject *tmp_type, *tmp_value, *tmp_tb; - tmp_type = tstate->curexc_type; - tmp_value = tstate->curexc_value; - tmp_tb = tstate->curexc_traceback; - tstate->curexc_type = type; - tstate->curexc_value = value; - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); -#endif -} -static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { -#if PY_VERSION_HEX >= 0x030C00A6 - PyObject* exc_value; - exc_value = tstate->current_exception; - tstate->current_exception = 0; - *value = exc_value; - *type = NULL; - *tb = NULL; - if (exc_value) { - *type = (PyObject*) Py_TYPE(exc_value); - Py_INCREF(*type); - #if CYTHON_COMPILING_IN_CPYTHON - *tb = ((PyBaseExceptionObject*) exc_value)->traceback; - Py_XINCREF(*tb); - #else - *tb = PyException_GetTraceback(exc_value); - #endif - } -#else - *type = tstate->curexc_type; - *value = tstate->curexc_value; - *tb = tstate->curexc_traceback; - tstate->curexc_type = 0; - tstate->curexc_value = 0; - tstate->curexc_traceback = 0; -#endif -} -#endif - -/* PyObjectGetAttrStr */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { - PyTypeObject* tp = Py_TYPE(obj); - if (likely(tp->tp_getattro)) - return tp->tp_getattro(obj, attr_name); -#if PY_MAJOR_VERSION < 3 - if (likely(tp->tp_getattr)) - return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); -#endif - return PyObject_GetAttr(obj, attr_name); -} -#endif - -/* PyObjectGetAttrStrNoError */ -#if __PYX_LIMITED_VERSION_HEX < 0x030d00A1 -static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError))) - __Pyx_PyErr_Clear(); -} -#endif -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) { - PyObject *result; -#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 - (void) PyObject_GetOptionalAttr(obj, attr_name, &result); - return result; -#else -#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1 - PyTypeObject* tp = Py_TYPE(obj); - if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) { - return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1); - } -#endif - result = __Pyx_PyObject_GetAttrStr(obj, attr_name); - if (unlikely(!result)) { - __Pyx_PyObject_GetAttrStr_ClearAttributeError(); - } - return result; -#endif -} - -/* GetBuiltinName */ -static PyObject *__Pyx_GetBuiltinName(PyObject *name) { - PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name); - if (unlikely(!result) && !PyErr_Occurred()) { - PyErr_Format(PyExc_NameError, -#if PY_MAJOR_VERSION >= 3 - "name '%U' is not defined", name); -#else - "name '%.200s' is not defined", PyString_AS_STRING(name)); -#endif - } - return result; -} - -/* GetTopmostException */ -#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE -static _PyErr_StackItem * -__Pyx_PyErr_GetTopmostException(PyThreadState *tstate) -{ - _PyErr_StackItem *exc_info = tstate->exc_info; - while ((exc_info->exc_value == NULL || exc_info->exc_value == Py_None) && - exc_info->previous_item != NULL) - { - exc_info = exc_info->previous_item; - } - return exc_info; -} -#endif - -/* SaveResetException */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); - PyObject *exc_value = exc_info->exc_value; - if (exc_value == NULL || exc_value == Py_None) { - *value = NULL; - *type = NULL; - *tb = NULL; - } else { - *value = exc_value; - Py_INCREF(*value); - *type = (PyObject*) Py_TYPE(exc_value); - Py_INCREF(*type); - *tb = PyException_GetTraceback(exc_value); - } - #elif CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); - *type = exc_info->exc_type; - *value = exc_info->exc_value; - *tb = exc_info->exc_traceback; - Py_XINCREF(*type); - Py_XINCREF(*value); - Py_XINCREF(*tb); - #else - *type = tstate->exc_type; - *value = tstate->exc_value; - *tb = tstate->exc_traceback; - Py_XINCREF(*type); - Py_XINCREF(*value); - Py_XINCREF(*tb); - #endif -} -static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = tstate->exc_info; - PyObject *tmp_value = exc_info->exc_value; - exc_info->exc_value = value; - Py_XDECREF(tmp_value); - Py_XDECREF(type); - Py_XDECREF(tb); - #else - PyObject *tmp_type, *tmp_value, *tmp_tb; - #if CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = tstate->exc_info; - tmp_type = exc_info->exc_type; - tmp_value = exc_info->exc_value; - tmp_tb = exc_info->exc_traceback; - exc_info->exc_type = type; - exc_info->exc_value = value; - exc_info->exc_traceback = tb; - #else - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = type; - tstate->exc_value = value; - tstate->exc_traceback = tb; - #endif - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); - #endif -} -#endif - -/* GetException */ -#if CYTHON_FAST_THREAD_STATE -static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) -#else -static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) -#endif -{ - PyObject *local_type = NULL, *local_value, *local_tb = NULL; -#if CYTHON_FAST_THREAD_STATE - PyObject *tmp_type, *tmp_value, *tmp_tb; - #if PY_VERSION_HEX >= 0x030C00A6 - local_value = tstate->current_exception; - tstate->current_exception = 0; - if (likely(local_value)) { - local_type = (PyObject*) Py_TYPE(local_value); - Py_INCREF(local_type); - local_tb = PyException_GetTraceback(local_value); - } - #else - local_type = tstate->curexc_type; - local_value = tstate->curexc_value; - local_tb = tstate->curexc_traceback; - tstate->curexc_type = 0; - tstate->curexc_value = 0; - tstate->curexc_traceback = 0; - #endif -#else - PyErr_Fetch(&local_type, &local_value, &local_tb); -#endif - PyErr_NormalizeException(&local_type, &local_value, &local_tb); -#if CYTHON_FAST_THREAD_STATE && PY_VERSION_HEX >= 0x030C00A6 - if (unlikely(tstate->current_exception)) -#elif CYTHON_FAST_THREAD_STATE - if (unlikely(tstate->curexc_type)) -#else - if (unlikely(PyErr_Occurred())) -#endif - goto bad; - #if PY_MAJOR_VERSION >= 3 - if (local_tb) { - if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0)) - goto bad; - } - #endif - Py_XINCREF(local_tb); - Py_XINCREF(local_type); - Py_XINCREF(local_value); - *type = local_type; - *value = local_value; - *tb = local_tb; -#if CYTHON_FAST_THREAD_STATE - #if CYTHON_USE_EXC_INFO_STACK - { - _PyErr_StackItem *exc_info = tstate->exc_info; - #if PY_VERSION_HEX >= 0x030B00a4 - tmp_value = exc_info->exc_value; - exc_info->exc_value = local_value; - tmp_type = NULL; - tmp_tb = NULL; - Py_XDECREF(local_type); - Py_XDECREF(local_tb); - #else - tmp_type = exc_info->exc_type; - tmp_value = exc_info->exc_value; - tmp_tb = exc_info->exc_traceback; - exc_info->exc_type = local_type; - exc_info->exc_value = local_value; - exc_info->exc_traceback = local_tb; - #endif - } - #else - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = local_type; - tstate->exc_value = local_value; - tstate->exc_traceback = local_tb; - #endif - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); -#else - PyErr_SetExcInfo(local_type, local_value, local_tb); -#endif - return 0; -bad: - *type = 0; - *value = 0; - *tb = 0; - Py_XDECREF(local_type); - Py_XDECREF(local_value); - Py_XDECREF(local_tb); - return -1; -} - -/* PyObjectCall */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { - PyObject *result; - ternaryfunc call = Py_TYPE(func)->tp_call; - if (unlikely(!call)) - return PyObject_Call(func, arg, kw); - #if PY_MAJOR_VERSION < 3 - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - #else - if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) - return NULL; - #endif - result = (*call)(func, arg, kw); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -/* RaiseException */ -#if PY_MAJOR_VERSION < 3 -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { - __Pyx_PyThreadState_declare - CYTHON_UNUSED_VAR(cause); - Py_XINCREF(type); - if (!value || value == Py_None) - value = NULL; - else - Py_INCREF(value); - if (!tb || tb == Py_None) - tb = NULL; - else { - Py_INCREF(tb); - if (!PyTraceBack_Check(tb)) { - PyErr_SetString(PyExc_TypeError, - "raise: arg 3 must be a traceback or None"); - goto raise_error; - } - } - if (PyType_Check(type)) { -#if CYTHON_COMPILING_IN_PYPY - if (!value) { - Py_INCREF(Py_None); - value = Py_None; - } -#endif - PyErr_NormalizeException(&type, &value, &tb); - } else { - if (value) { - PyErr_SetString(PyExc_TypeError, - "instance exception may not have a separate value"); - goto raise_error; - } - value = type; - type = (PyObject*) Py_TYPE(type); - Py_INCREF(type); - if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { - PyErr_SetString(PyExc_TypeError, - "raise: exception class must be a subclass of BaseException"); - goto raise_error; - } - } - __Pyx_PyThreadState_assign - __Pyx_ErrRestore(type, value, tb); - return; -raise_error: - Py_XDECREF(value); - Py_XDECREF(type); - Py_XDECREF(tb); - return; -} -#else -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { - PyObject* owned_instance = NULL; - if (tb == Py_None) { - tb = 0; - } else if (tb && !PyTraceBack_Check(tb)) { - PyErr_SetString(PyExc_TypeError, - "raise: arg 3 must be a traceback or None"); - goto bad; - } - if (value == Py_None) - value = 0; - if (PyExceptionInstance_Check(type)) { - if (value) { - PyErr_SetString(PyExc_TypeError, - "instance exception may not have a separate value"); - goto bad; - } - value = type; - type = (PyObject*) Py_TYPE(value); - } else if (PyExceptionClass_Check(type)) { - PyObject *instance_class = NULL; - if (value && PyExceptionInstance_Check(value)) { - instance_class = (PyObject*) Py_TYPE(value); - if (instance_class != type) { - int is_subclass = PyObject_IsSubclass(instance_class, type); - if (!is_subclass) { - instance_class = NULL; - } else if (unlikely(is_subclass == -1)) { - goto bad; - } else { - type = instance_class; - } - } - } - if (!instance_class) { - PyObject *args; - if (!value) - args = PyTuple_New(0); - else if (PyTuple_Check(value)) { - Py_INCREF(value); - args = value; - } else - args = PyTuple_Pack(1, value); - if (!args) - goto bad; - owned_instance = PyObject_Call(type, args, NULL); - Py_DECREF(args); - if (!owned_instance) - goto bad; - value = owned_instance; - if (!PyExceptionInstance_Check(value)) { - PyErr_Format(PyExc_TypeError, - "calling %R should have returned an instance of " - "BaseException, not %R", - type, Py_TYPE(value)); - goto bad; - } - } - } else { - PyErr_SetString(PyExc_TypeError, - "raise: exception class must be a subclass of BaseException"); - goto bad; - } - if (cause) { - PyObject *fixed_cause; - if (cause == Py_None) { - fixed_cause = NULL; - } else if (PyExceptionClass_Check(cause)) { - fixed_cause = PyObject_CallObject(cause, NULL); - if (fixed_cause == NULL) - goto bad; - } else if (PyExceptionInstance_Check(cause)) { - fixed_cause = cause; - Py_INCREF(fixed_cause); - } else { - PyErr_SetString(PyExc_TypeError, - "exception causes must derive from " - "BaseException"); - goto bad; - } - PyException_SetCause(value, fixed_cause); - } - PyErr_SetObject(type, value); - if (tb) { - #if PY_VERSION_HEX >= 0x030C00A6 - PyException_SetTraceback(value, tb); - #elif CYTHON_FAST_THREAD_STATE - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject* tmp_tb = tstate->curexc_traceback; - if (tb != tmp_tb) { - Py_INCREF(tb); - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_tb); - } -#else - PyObject *tmp_type, *tmp_value, *tmp_tb; - PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); - Py_INCREF(tb); - PyErr_Restore(tmp_type, tmp_value, tb); - Py_XDECREF(tmp_tb); -#endif - } -bad: - Py_XDECREF(owned_instance); - return; -} -#endif - -/* TupleAndListFromArray */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) { - PyObject *v; - Py_ssize_t i; - for (i = 0; i < length; i++) { - v = dest[i] = src[i]; - Py_INCREF(v); - } -} -static CYTHON_INLINE PyObject * -__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) -{ - PyObject *res; - if (n <= 0) { - Py_INCREF(__pyx_empty_tuple); - return __pyx_empty_tuple; - } - res = PyTuple_New(n); - if (unlikely(res == NULL)) return NULL; - __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n); - return res; -} -static CYTHON_INLINE PyObject * -__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n) -{ - PyObject *res; - if (n <= 0) { - return PyList_New(0); - } - res = PyList_New(n); - if (unlikely(res == NULL)) return NULL; - __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n); - return res; -} -#endif - -/* BytesEquals */ -static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { -#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API - return PyObject_RichCompareBool(s1, s2, equals); -#else - if (s1 == s2) { - return (equals == Py_EQ); - } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { - const char *ps1, *ps2; - Py_ssize_t length = PyBytes_GET_SIZE(s1); - if (length != PyBytes_GET_SIZE(s2)) - return (equals == Py_NE); - ps1 = PyBytes_AS_STRING(s1); - ps2 = PyBytes_AS_STRING(s2); - if (ps1[0] != ps2[0]) { - return (equals == Py_NE); - } else if (length == 1) { - return (equals == Py_EQ); - } else { - int result; -#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000) - Py_hash_t hash1, hash2; - hash1 = ((PyBytesObject*)s1)->ob_shash; - hash2 = ((PyBytesObject*)s2)->ob_shash; - if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { - return (equals == Py_NE); - } -#endif - result = memcmp(ps1, ps2, (size_t)length); - return (equals == Py_EQ) ? (result == 0) : (result != 0); - } - } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { - return (equals == Py_NE); - } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { - return (equals == Py_NE); - } else { - int result; - PyObject* py_result = PyObject_RichCompare(s1, s2, equals); - if (!py_result) - return -1; - result = __Pyx_PyObject_IsTrue(py_result); - Py_DECREF(py_result); - return result; - } -#endif -} - -/* UnicodeEquals */ -static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { -#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API - return PyObject_RichCompareBool(s1, s2, equals); -#else -#if PY_MAJOR_VERSION < 3 - PyObject* owned_ref = NULL; -#endif - int s1_is_unicode, s2_is_unicode; - if (s1 == s2) { - goto return_eq; - } - s1_is_unicode = PyUnicode_CheckExact(s1); - s2_is_unicode = PyUnicode_CheckExact(s2); -#if PY_MAJOR_VERSION < 3 - if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { - owned_ref = PyUnicode_FromObject(s2); - if (unlikely(!owned_ref)) - return -1; - s2 = owned_ref; - s2_is_unicode = 1; - } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { - owned_ref = PyUnicode_FromObject(s1); - if (unlikely(!owned_ref)) - return -1; - s1 = owned_ref; - s1_is_unicode = 1; - } else if (((!s2_is_unicode) & (!s1_is_unicode))) { - return __Pyx_PyBytes_Equals(s1, s2, equals); - } -#endif - if (s1_is_unicode & s2_is_unicode) { - Py_ssize_t length; - int kind; - void *data1, *data2; - if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0)) - return -1; - length = __Pyx_PyUnicode_GET_LENGTH(s1); - if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { - goto return_ne; - } -#if CYTHON_USE_UNICODE_INTERNALS - { - Py_hash_t hash1, hash2; - #if CYTHON_PEP393_ENABLED - hash1 = ((PyASCIIObject*)s1)->hash; - hash2 = ((PyASCIIObject*)s2)->hash; - #else - hash1 = ((PyUnicodeObject*)s1)->hash; - hash2 = ((PyUnicodeObject*)s2)->hash; - #endif - if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { - goto return_ne; - } - } -#endif - kind = __Pyx_PyUnicode_KIND(s1); - if (kind != __Pyx_PyUnicode_KIND(s2)) { - goto return_ne; - } - data1 = __Pyx_PyUnicode_DATA(s1); - data2 = __Pyx_PyUnicode_DATA(s2); - if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { - goto return_ne; - } else if (length == 1) { - goto return_eq; - } else { - int result = memcmp(data1, data2, (size_t)(length * kind)); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_EQ) ? (result == 0) : (result != 0); - } - } else if ((s1 == Py_None) & s2_is_unicode) { - goto return_ne; - } else if ((s2 == Py_None) & s1_is_unicode) { - goto return_ne; - } else { - int result; - PyObject* py_result = PyObject_RichCompare(s1, s2, equals); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - if (!py_result) - return -1; - result = __Pyx_PyObject_IsTrue(py_result); - Py_DECREF(py_result); - return result; - } -return_eq: - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_EQ); -return_ne: - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_NE); -#endif -} - -/* fastcall */ -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s) -{ - Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames); - for (i = 0; i < n; i++) - { - if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i]; - } - for (i = 0; i < n; i++) - { - int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ); - if (unlikely(eq != 0)) { - if (unlikely(eq < 0)) return NULL; - return kwvalues[i]; - } - } - return NULL; -} -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 -CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) { - Py_ssize_t i, nkwargs = PyTuple_GET_SIZE(kwnames); - PyObject *dict; - dict = PyDict_New(); - if (unlikely(!dict)) - return NULL; - for (i=0; i= 3 - "%s() got multiple values for keyword argument '%U'", func_name, kw_name); - #else - "%s() got multiple values for keyword argument '%s'", func_name, - PyString_AsString(kw_name)); - #endif -} - -/* ParseKeywords */ -static int __Pyx_ParseOptionalKeywords( - PyObject *kwds, - PyObject *const *kwvalues, - PyObject **argnames[], - PyObject *kwds2, - PyObject *values[], - Py_ssize_t num_pos_args, - const char* function_name) -{ - PyObject *key = 0, *value = 0; - Py_ssize_t pos = 0; - PyObject*** name; - PyObject*** first_kw_arg = argnames + num_pos_args; - int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds)); - while (1) { - Py_XDECREF(key); key = NULL; - Py_XDECREF(value); value = NULL; - if (kwds_is_tuple) { - Py_ssize_t size; -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(kwds); -#else - size = PyTuple_Size(kwds); - if (size < 0) goto bad; -#endif - if (pos >= size) break; -#if CYTHON_AVOID_BORROWED_REFS - key = __Pyx_PySequence_ITEM(kwds, pos); - if (!key) goto bad; -#elif CYTHON_ASSUME_SAFE_MACROS - key = PyTuple_GET_ITEM(kwds, pos); -#else - key = PyTuple_GetItem(kwds, pos); - if (!key) goto bad; -#endif - value = kwvalues[pos]; - pos++; - } - else - { - if (!PyDict_Next(kwds, &pos, &key, &value)) break; -#if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(key); -#endif - } - name = first_kw_arg; - while (*name && (**name != key)) name++; - if (*name) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(value); - Py_DECREF(key); -#endif - key = NULL; - value = NULL; - continue; - } -#if !CYTHON_AVOID_BORROWED_REFS - Py_INCREF(key); -#endif - Py_INCREF(value); - name = first_kw_arg; - #if PY_MAJOR_VERSION < 3 - if (likely(PyString_Check(key))) { - while (*name) { - if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) - && _PyString_Eq(**name, key)) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - value = NULL; -#endif - break; - } - name++; - } - if (*name) continue; - else { - PyObject*** argname = argnames; - while (argname != first_kw_arg) { - if ((**argname == key) || ( - (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) - && _PyString_Eq(**argname, key))) { - goto arg_passed_twice; - } - argname++; - } - } - } else - #endif - if (likely(PyUnicode_Check(key))) { - while (*name) { - int cmp = ( - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 - (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : - #endif - PyUnicode_Compare(**name, key) - ); - if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; - if (cmp == 0) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - value = NULL; -#endif - break; - } - name++; - } - if (*name) continue; - else { - PyObject*** argname = argnames; - while (argname != first_kw_arg) { - int cmp = (**argname == key) ? 0 : - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 - (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : - #endif - PyUnicode_Compare(**argname, key); - if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; - if (cmp == 0) goto arg_passed_twice; - argname++; - } - } - } else - goto invalid_keyword_type; - if (kwds2) { - if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; - } else { - goto invalid_keyword; - } - } - Py_XDECREF(key); - Py_XDECREF(value); - return 0; -arg_passed_twice: - __Pyx_RaiseDoubleKeywordsError(function_name, key); - goto bad; -invalid_keyword_type: - PyErr_Format(PyExc_TypeError, - "%.200s() keywords must be strings", function_name); - goto bad; -invalid_keyword: - #if PY_MAJOR_VERSION < 3 - PyErr_Format(PyExc_TypeError, - "%.200s() got an unexpected keyword argument '%.200s'", - function_name, PyString_AsString(key)); - #else - PyErr_Format(PyExc_TypeError, - "%s() got an unexpected keyword argument '%U'", - function_name, key); - #endif -bad: - Py_XDECREF(key); - Py_XDECREF(value); - return -1; -} - -/* RaiseArgTupleInvalid */ -static void __Pyx_RaiseArgtupleInvalid( - const char* func_name, - int exact, - Py_ssize_t num_min, - Py_ssize_t num_max, - Py_ssize_t num_found) -{ - Py_ssize_t num_expected; - const char *more_or_less; - if (num_found < num_min) { - num_expected = num_min; - more_or_less = "at least"; - } else { - num_expected = num_max; - more_or_less = "at most"; - } - if (exact) { - more_or_less = "exactly"; - } - PyErr_Format(PyExc_TypeError, - "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", - func_name, more_or_less, num_expected, - (num_expected == 1) ? "" : "s", num_found); -} - -/* ArgTypeTest */ -static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) -{ - __Pyx_TypeName type_name; - __Pyx_TypeName obj_type_name; - if (unlikely(!type)) { - PyErr_SetString(PyExc_SystemError, "Missing type object"); - return 0; - } - else if (exact) { - #if PY_MAJOR_VERSION == 2 - if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; - #endif - } - else { - if (likely(__Pyx_TypeCheck(obj, type))) return 1; - } - type_name = __Pyx_PyType_GetName(type); - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME - ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name); - __Pyx_DECREF_TypeName(type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return 0; -} - -/* IsLittleEndian */ -static CYTHON_INLINE int __Pyx_Is_Little_Endian(void) -{ - union { - uint32_t u32; - uint8_t u8[4]; - } S; - S.u32 = 0x01020304; - return S.u8[0] == 4; -} - -/* BufferFormatCheck */ -static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, - __Pyx_BufFmt_StackElem* stack, - __Pyx_TypeInfo* type) { - stack[0].field = &ctx->root; - stack[0].parent_offset = 0; - ctx->root.type = type; - ctx->root.name = "buffer dtype"; - ctx->root.offset = 0; - ctx->head = stack; - ctx->head->field = &ctx->root; - ctx->fmt_offset = 0; - ctx->head->parent_offset = 0; - ctx->new_packmode = '@'; - ctx->enc_packmode = '@'; - ctx->new_count = 1; - ctx->enc_count = 0; - ctx->enc_type = 0; - ctx->is_complex = 0; - ctx->is_valid_array = 0; - ctx->struct_alignment = 0; - while (type->typegroup == 'S') { - ++ctx->head; - ctx->head->field = type->fields; - ctx->head->parent_offset = 0; - type = type->fields->type; - } -} -static int __Pyx_BufFmt_ParseNumber(const char** ts) { - int count; - const char* t = *ts; - if (*t < '0' || *t > '9') { - return -1; - } else { - count = *t++ - '0'; - while (*t >= '0' && *t <= '9') { - count *= 10; - count += *t++ - '0'; - } - } - *ts = t; - return count; -} -static int __Pyx_BufFmt_ExpectNumber(const char **ts) { - int number = __Pyx_BufFmt_ParseNumber(ts); - if (number == -1) - PyErr_Format(PyExc_ValueError,\ - "Does not understand character buffer dtype format string ('%c')", **ts); - return number; -} -static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) { - PyErr_Format(PyExc_ValueError, - "Unexpected format string character: '%c'", ch); -} -static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) { - switch (ch) { - case '?': return "'bool'"; - case 'c': return "'char'"; - case 'b': return "'signed char'"; - case 'B': return "'unsigned char'"; - case 'h': return "'short'"; - case 'H': return "'unsigned short'"; - case 'i': return "'int'"; - case 'I': return "'unsigned int'"; - case 'l': return "'long'"; - case 'L': return "'unsigned long'"; - case 'q': return "'long long'"; - case 'Q': return "'unsigned long long'"; - case 'f': return (is_complex ? "'complex float'" : "'float'"); - case 'd': return (is_complex ? "'complex double'" : "'double'"); - case 'g': return (is_complex ? "'complex long double'" : "'long double'"); - case 'T': return "a struct"; - case 'O': return "Python object"; - case 'P': return "a pointer"; - case 's': case 'p': return "a string"; - case 0: return "end"; - default: return "unparsable format string"; - } -} -static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) { - switch (ch) { - case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; - case 'h': case 'H': return 2; - case 'i': case 'I': case 'l': case 'L': return 4; - case 'q': case 'Q': return 8; - case 'f': return (is_complex ? 8 : 4); - case 'd': return (is_complex ? 16 : 8); - case 'g': { - PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g').."); - return 0; - } - case 'O': case 'P': return sizeof(void*); - default: - __Pyx_BufFmt_RaiseUnexpectedChar(ch); - return 0; - } -} -static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) { - switch (ch) { - case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; - case 'h': case 'H': return sizeof(short); - case 'i': case 'I': return sizeof(int); - case 'l': case 'L': return sizeof(long); - #ifdef HAVE_LONG_LONG - case 'q': case 'Q': return sizeof(PY_LONG_LONG); - #endif - case 'f': return sizeof(float) * (is_complex ? 2 : 1); - case 'd': return sizeof(double) * (is_complex ? 2 : 1); - case 'g': return sizeof(long double) * (is_complex ? 2 : 1); - case 'O': case 'P': return sizeof(void*); - default: { - __Pyx_BufFmt_RaiseUnexpectedChar(ch); - return 0; - } - } -} -typedef struct { char c; short x; } __Pyx_st_short; -typedef struct { char c; int x; } __Pyx_st_int; -typedef struct { char c; long x; } __Pyx_st_long; -typedef struct { char c; float x; } __Pyx_st_float; -typedef struct { char c; double x; } __Pyx_st_double; -typedef struct { char c; long double x; } __Pyx_st_longdouble; -typedef struct { char c; void *x; } __Pyx_st_void_p; -#ifdef HAVE_LONG_LONG -typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong; -#endif -static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, int is_complex) { - CYTHON_UNUSED_VAR(is_complex); - switch (ch) { - case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; - case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short); - case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int); - case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long); -#ifdef HAVE_LONG_LONG - case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG); -#endif - case 'f': return sizeof(__Pyx_st_float) - sizeof(float); - case 'd': return sizeof(__Pyx_st_double) - sizeof(double); - case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double); - case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*); - default: - __Pyx_BufFmt_RaiseUnexpectedChar(ch); - return 0; - } -} -/* These are for computing the padding at the end of the struct to align - on the first member of the struct. This will probably the same as above, - but we don't have any guarantees. - */ -typedef struct { short x; char c; } __Pyx_pad_short; -typedef struct { int x; char c; } __Pyx_pad_int; -typedef struct { long x; char c; } __Pyx_pad_long; -typedef struct { float x; char c; } __Pyx_pad_float; -typedef struct { double x; char c; } __Pyx_pad_double; -typedef struct { long double x; char c; } __Pyx_pad_longdouble; -typedef struct { void *x; char c; } __Pyx_pad_void_p; -#ifdef HAVE_LONG_LONG -typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong; -#endif -static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, int is_complex) { - CYTHON_UNUSED_VAR(is_complex); - switch (ch) { - case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; - case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short); - case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int); - case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long); -#ifdef HAVE_LONG_LONG - case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG); -#endif - case 'f': return sizeof(__Pyx_pad_float) - sizeof(float); - case 'd': return sizeof(__Pyx_pad_double) - sizeof(double); - case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double); - case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*); - default: - __Pyx_BufFmt_RaiseUnexpectedChar(ch); - return 0; - } -} -static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) { - switch (ch) { - case 'c': - return 'H'; - case 'b': case 'h': case 'i': - case 'l': case 'q': case 's': case 'p': - return 'I'; - case '?': case 'B': case 'H': case 'I': case 'L': case 'Q': - return 'U'; - case 'f': case 'd': case 'g': - return (is_complex ? 'C' : 'R'); - case 'O': - return 'O'; - case 'P': - return 'P'; - default: { - __Pyx_BufFmt_RaiseUnexpectedChar(ch); - return 0; - } - } -} -static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) { - if (ctx->head == NULL || ctx->head->field == &ctx->root) { - const char* expected; - const char* quote; - if (ctx->head == NULL) { - expected = "end"; - quote = ""; - } else { - expected = ctx->head->field->type->name; - quote = "'"; - } - PyErr_Format(PyExc_ValueError, - "Buffer dtype mismatch, expected %s%s%s but got %s", - quote, expected, quote, - __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex)); - } else { - __Pyx_StructField* field = ctx->head->field; - __Pyx_StructField* parent = (ctx->head - 1)->field; - PyErr_Format(PyExc_ValueError, - "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'", - field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex), - parent->type->name, field->name); - } -} -static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) { - char group; - size_t size, offset, arraysize = 1; - if (ctx->enc_type == 0) return 0; - if (ctx->head->field->type->arraysize[0]) { - int i, ndim = 0; - if (ctx->enc_type == 's' || ctx->enc_type == 'p') { - ctx->is_valid_array = ctx->head->field->type->ndim == 1; - ndim = 1; - if (ctx->enc_count != ctx->head->field->type->arraysize[0]) { - PyErr_Format(PyExc_ValueError, - "Expected a dimension of size %zu, got %zu", - ctx->head->field->type->arraysize[0], ctx->enc_count); - return -1; - } - } - if (!ctx->is_valid_array) { - PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d", - ctx->head->field->type->ndim, ndim); - return -1; - } - for (i = 0; i < ctx->head->field->type->ndim; i++) { - arraysize *= ctx->head->field->type->arraysize[i]; - } - ctx->is_valid_array = 0; - ctx->enc_count = 1; - } - group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex); - do { - __Pyx_StructField* field = ctx->head->field; - __Pyx_TypeInfo* type = field->type; - if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') { - size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex); - } else { - size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex); - } - if (ctx->enc_packmode == '@') { - size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex); - size_t align_mod_offset; - if (align_at == 0) return -1; - align_mod_offset = ctx->fmt_offset % align_at; - if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset; - if (ctx->struct_alignment == 0) - ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type, - ctx->is_complex); - } - if (type->size != size || type->typegroup != group) { - if (type->typegroup == 'C' && type->fields != NULL) { - size_t parent_offset = ctx->head->parent_offset + field->offset; - ++ctx->head; - ctx->head->field = type->fields; - ctx->head->parent_offset = parent_offset; - continue; - } - if ((type->typegroup == 'H' || group == 'H') && type->size == size) { - } else { - __Pyx_BufFmt_RaiseExpected(ctx); - return -1; - } - } - offset = ctx->head->parent_offset + field->offset; - if (ctx->fmt_offset != offset) { - PyErr_Format(PyExc_ValueError, - "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected", - (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset); - return -1; - } - ctx->fmt_offset += size; - if (arraysize) - ctx->fmt_offset += (arraysize - 1) * size; - --ctx->enc_count; - while (1) { - if (field == &ctx->root) { - ctx->head = NULL; - if (ctx->enc_count != 0) { - __Pyx_BufFmt_RaiseExpected(ctx); - return -1; - } - break; - } - ctx->head->field = ++field; - if (field->type == NULL) { - --ctx->head; - field = ctx->head->field; - continue; - } else if (field->type->typegroup == 'S') { - size_t parent_offset = ctx->head->parent_offset + field->offset; - if (field->type->fields->type == NULL) continue; - field = field->type->fields; - ++ctx->head; - ctx->head->field = field; - ctx->head->parent_offset = parent_offset; - break; - } else { - break; - } - } - } while (ctx->enc_count); - ctx->enc_type = 0; - ctx->is_complex = 0; - return 0; -} -static int -__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp) -{ - const char *ts = *tsp; - int i = 0, number, ndim; - ++ts; - if (ctx->new_count != 1) { - PyErr_SetString(PyExc_ValueError, - "Cannot handle repeated arrays in format string"); - return -1; - } - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return -1; - ndim = ctx->head->field->type->ndim; - while (*ts && *ts != ')') { - switch (*ts) { - case ' ': case '\f': case '\r': case '\n': case '\t': case '\v': continue; - default: break; - } - number = __Pyx_BufFmt_ExpectNumber(&ts); - if (number == -1) return -1; - if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i]) { - PyErr_Format(PyExc_ValueError, - "Expected a dimension of size %zu, got %d", - ctx->head->field->type->arraysize[i], number); - return -1; - } - if (*ts != ',' && *ts != ')') { - PyErr_Format(PyExc_ValueError, - "Expected a comma in format string, got '%c'", *ts); - return -1; - } - if (*ts == ',') ts++; - i++; - } - if (i != ndim) { - PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d", - ctx->head->field->type->ndim, i); - return -1; - } - if (!*ts) { - PyErr_SetString(PyExc_ValueError, - "Unexpected end of format string, expected ')'"); - return -1; - } - ctx->is_valid_array = 1; - ctx->new_count = 1; - *tsp = ++ts; - return 0; -} -static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) { - int got_Z = 0; - while (1) { - switch(*ts) { - case 0: - if (ctx->enc_type != 0 && ctx->head == NULL) { - __Pyx_BufFmt_RaiseExpected(ctx); - return NULL; - } - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; - if (ctx->head != NULL) { - __Pyx_BufFmt_RaiseExpected(ctx); - return NULL; - } - return ts; - case ' ': - case '\r': - case '\n': - ++ts; - break; - case '<': - if (!__Pyx_Is_Little_Endian()) { - PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler"); - return NULL; - } - ctx->new_packmode = '='; - ++ts; - break; - case '>': - case '!': - if (__Pyx_Is_Little_Endian()) { - PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler"); - return NULL; - } - ctx->new_packmode = '='; - ++ts; - break; - case '=': - case '@': - case '^': - ctx->new_packmode = *ts++; - break; - case 'T': - { - const char* ts_after_sub; - size_t i, struct_count = ctx->new_count; - size_t struct_alignment = ctx->struct_alignment; - ctx->new_count = 1; - ++ts; - if (*ts != '{') { - PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'"); - return NULL; - } - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; - ctx->enc_type = 0; - ctx->enc_count = 0; - ctx->struct_alignment = 0; - ++ts; - ts_after_sub = ts; - for (i = 0; i != struct_count; ++i) { - ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts); - if (!ts_after_sub) return NULL; - } - ts = ts_after_sub; - if (struct_alignment) ctx->struct_alignment = struct_alignment; - } - break; - case '}': - { - size_t alignment = ctx->struct_alignment; - ++ts; - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; - ctx->enc_type = 0; - if (alignment && ctx->fmt_offset % alignment) { - ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment); - } - } - return ts; - case 'x': - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; - ctx->fmt_offset += ctx->new_count; - ctx->new_count = 1; - ctx->enc_count = 0; - ctx->enc_type = 0; - ctx->enc_packmode = ctx->new_packmode; - ++ts; - break; - case 'Z': - got_Z = 1; - ++ts; - if (*ts != 'f' && *ts != 'd' && *ts != 'g') { - __Pyx_BufFmt_RaiseUnexpectedChar('Z'); - return NULL; - } - CYTHON_FALLTHROUGH; - case '?': case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I': - case 'l': case 'L': case 'q': case 'Q': - case 'f': case 'd': case 'g': - case 'O': case 'p': - if ((ctx->enc_type == *ts) && (got_Z == ctx->is_complex) && - (ctx->enc_packmode == ctx->new_packmode) && (!ctx->is_valid_array)) { - ctx->enc_count += ctx->new_count; - ctx->new_count = 1; - got_Z = 0; - ++ts; - break; - } - CYTHON_FALLTHROUGH; - case 's': - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; - ctx->enc_count = ctx->new_count; - ctx->enc_packmode = ctx->new_packmode; - ctx->enc_type = *ts; - ctx->is_complex = got_Z; - ++ts; - ctx->new_count = 1; - got_Z = 0; - break; - case ':': - ++ts; - while(*ts != ':') ++ts; - ++ts; - break; - case '(': - if (__pyx_buffmt_parse_array(ctx, &ts) < 0) return NULL; - break; - default: - { - int number = __Pyx_BufFmt_ExpectNumber(&ts); - if (number == -1) return NULL; - ctx->new_count = (size_t)number; - } - } - } -} - -/* BufferGetAndValidate */ - static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info) { - if (unlikely(info->buf == NULL)) return; - if (info->suboffsets == __Pyx_minusones) info->suboffsets = NULL; - __Pyx_ReleaseBuffer(info); -} -static void __Pyx_ZeroBuffer(Py_buffer* buf) { - buf->buf = NULL; - buf->obj = NULL; - buf->strides = __Pyx_zeros; - buf->shape = __Pyx_zeros; - buf->suboffsets = __Pyx_minusones; -} -static int __Pyx__GetBufferAndValidate( - Py_buffer* buf, PyObject* obj, __Pyx_TypeInfo* dtype, int flags, - int nd, int cast, __Pyx_BufFmt_StackElem* stack) -{ - buf->buf = NULL; - if (unlikely(__Pyx_GetBuffer(obj, buf, flags) == -1)) { - __Pyx_ZeroBuffer(buf); - return -1; - } - if (unlikely(buf->ndim != nd)) { - PyErr_Format(PyExc_ValueError, - "Buffer has wrong number of dimensions (expected %d, got %d)", - nd, buf->ndim); - goto fail; - } - if (!cast) { - __Pyx_BufFmt_Context ctx; - __Pyx_BufFmt_Init(&ctx, stack, dtype); - if (!__Pyx_BufFmt_CheckString(&ctx, buf->format)) goto fail; - } - if (unlikely((size_t)buf->itemsize != dtype->size)) { - PyErr_Format(PyExc_ValueError, - "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "d byte%s) does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "d byte%s)", - buf->itemsize, (buf->itemsize > 1) ? "s" : "", - dtype->name, (Py_ssize_t)dtype->size, (dtype->size > 1) ? "s" : ""); - goto fail; - } - if (buf->suboffsets == NULL) buf->suboffsets = __Pyx_minusones; - return 0; -fail:; - __Pyx_SafeReleaseBuffer(buf); - return -1; -} - -/* GetItemInt */ - static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { - PyObject *r; - if (unlikely(!j)) return NULL; - r = PyObject_GetItem(o, j); - Py_DECREF(j); - return r; -} -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, - CYTHON_NCP_UNUSED int wraparound, - CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - Py_ssize_t wrapped_i = i; - if (wraparound & unlikely(i < 0)) { - wrapped_i += PyList_GET_SIZE(o); - } - if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) { - PyObject *r = PyList_GET_ITEM(o, wrapped_i); - Py_INCREF(r); - return r; - } - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -#else - return PySequence_GetItem(o, i); -#endif -} -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, - CYTHON_NCP_UNUSED int wraparound, - CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - Py_ssize_t wrapped_i = i; - if (wraparound & unlikely(i < 0)) { - wrapped_i += PyTuple_GET_SIZE(o); - } - if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) { - PyObject *r = PyTuple_GET_ITEM(o, wrapped_i); - Py_INCREF(r); - return r; - } - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -#else - return PySequence_GetItem(o, i); -#endif -} -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, - CYTHON_NCP_UNUSED int wraparound, - CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS - if (is_list || PyList_CheckExact(o)) { - Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); - if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) { - PyObject *r = PyList_GET_ITEM(o, n); - Py_INCREF(r); - return r; - } - } - else if (PyTuple_CheckExact(o)) { - Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); - if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) { - PyObject *r = PyTuple_GET_ITEM(o, n); - Py_INCREF(r); - return r; - } - } else { - PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; - PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; - if (mm && mm->mp_subscript) { - PyObject *r, *key = PyInt_FromSsize_t(i); - if (unlikely(!key)) return NULL; - r = mm->mp_subscript(o, key); - Py_DECREF(key); - return r; - } - if (likely(sm && sm->sq_item)) { - if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { - Py_ssize_t l = sm->sq_length(o); - if (likely(l >= 0)) { - i += l; - } else { - if (!PyErr_ExceptionMatches(PyExc_OverflowError)) - return NULL; - PyErr_Clear(); - } - } - return sm->sq_item(o, i); - } - } -#else - if (is_list || !PyMapping_Check(o)) { - return PySequence_GetItem(o, i); - } -#endif - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -} - -/* PyFunctionFastCall */ - #if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL -static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, - PyObject *globals) { - PyFrameObject *f; - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject **fastlocals; - Py_ssize_t i; - PyObject *result; - assert(globals != NULL); - /* XXX Perhaps we should create a specialized - PyFrame_New() that doesn't take locals, but does - take builtins without sanity checking them. - */ - assert(tstate != NULL); - f = PyFrame_New(tstate, co, globals, NULL); - if (f == NULL) { - return NULL; - } - fastlocals = __Pyx_PyFrame_GetLocalsplus(f); - for (i = 0; i < na; i++) { - Py_INCREF(*args); - fastlocals[i] = *args++; - } - result = PyEval_EvalFrameEx(f,0); - ++tstate->recursion_depth; - Py_DECREF(f); - --tstate->recursion_depth; - return result; -} -static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) { - PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); - PyObject *globals = PyFunction_GET_GLOBALS(func); - PyObject *argdefs = PyFunction_GET_DEFAULTS(func); - PyObject *closure; -#if PY_MAJOR_VERSION >= 3 - PyObject *kwdefs; -#endif - PyObject *kwtuple, **k; - PyObject **d; - Py_ssize_t nd; - Py_ssize_t nk; - PyObject *result; - assert(kwargs == NULL || PyDict_Check(kwargs)); - nk = kwargs ? PyDict_Size(kwargs) : 0; - #if PY_MAJOR_VERSION < 3 - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) { - return NULL; - } - #else - if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) { - return NULL; - } - #endif - if ( -#if PY_MAJOR_VERSION >= 3 - co->co_kwonlyargcount == 0 && -#endif - likely(kwargs == NULL || nk == 0) && - co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { - if (argdefs == NULL && co->co_argcount == nargs) { - result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); - goto done; - } - else if (nargs == 0 && argdefs != NULL - && co->co_argcount == Py_SIZE(argdefs)) { - /* function called with no arguments, but all parameters have - a default value: use default values as arguments .*/ - args = &PyTuple_GET_ITEM(argdefs, 0); - result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); - goto done; - } - } - if (kwargs != NULL) { - Py_ssize_t pos, i; - kwtuple = PyTuple_New(2 * nk); - if (kwtuple == NULL) { - result = NULL; - goto done; - } - k = &PyTuple_GET_ITEM(kwtuple, 0); - pos = i = 0; - while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { - Py_INCREF(k[i]); - Py_INCREF(k[i+1]); - i += 2; - } - nk = i / 2; - } - else { - kwtuple = NULL; - k = NULL; - } - closure = PyFunction_GET_CLOSURE(func); -#if PY_MAJOR_VERSION >= 3 - kwdefs = PyFunction_GET_KW_DEFAULTS(func); -#endif - if (argdefs != NULL) { - d = &PyTuple_GET_ITEM(argdefs, 0); - nd = Py_SIZE(argdefs); - } - else { - d = NULL; - nd = 0; - } -#if PY_MAJOR_VERSION >= 3 - result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, - args, (int)nargs, - k, (int)nk, - d, (int)nd, kwdefs, closure); -#else - result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, - args, (int)nargs, - k, (int)nk, - d, (int)nd, closure); -#endif - Py_XDECREF(kwtuple); -done: - Py_LeaveRecursiveCall(); - return result; -} -#endif - -/* PyObjectCallMethO */ - #if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { - PyObject *self, *result; - PyCFunction cfunc; - cfunc = __Pyx_CyOrPyCFunction_GET_FUNCTION(func); - self = __Pyx_CyOrPyCFunction_GET_SELF(func); - #if PY_MAJOR_VERSION < 3 - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - #else - if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) - return NULL; - #endif - result = cfunc(self, arg); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -/* PyObjectFastCall */ - #if PY_VERSION_HEX < 0x03090000 || CYTHON_COMPILING_IN_LIMITED_API -static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) { - PyObject *argstuple; - PyObject *result = 0; - size_t i; - argstuple = PyTuple_New((Py_ssize_t)nargs); - if (unlikely(!argstuple)) return NULL; - for (i = 0; i < nargs; i++) { - Py_INCREF(args[i]); - if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad; - } - result = __Pyx_PyObject_Call(func, argstuple, kwargs); - bad: - Py_DECREF(argstuple); - return result; -} -#endif -static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) { - Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs); -#if CYTHON_COMPILING_IN_CPYTHON - if (nargs == 0 && kwargs == NULL) { - if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_NOARGS)) - return __Pyx_PyObject_CallMethO(func, NULL); - } - else if (nargs == 1 && kwargs == NULL) { - if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_O)) - return __Pyx_PyObject_CallMethO(func, args[0]); - } -#endif - #if PY_VERSION_HEX < 0x030800B1 - #if CYTHON_FAST_PYCCALL - if (PyCFunction_Check(func)) { - if (kwargs) { - return _PyCFunction_FastCallDict(func, args, nargs, kwargs); - } else { - return _PyCFunction_FastCallKeywords(func, args, nargs, NULL); - } - } - #if PY_VERSION_HEX >= 0x030700A1 - if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) { - return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL); - } - #endif - #endif - #if CYTHON_FAST_PYCALL - if (PyFunction_Check(func)) { - return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs); - } - #endif - #endif - if (kwargs == NULL) { - #if CYTHON_VECTORCALL - #if PY_VERSION_HEX < 0x03090000 - vectorcallfunc f = _PyVectorcall_Function(func); - #else - vectorcallfunc f = PyVectorcall_Function(func); - #endif - if (f) { - return f(func, args, (size_t)nargs, NULL); - } - #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL - if (__Pyx_CyFunction_CheckExact(func)) { - __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func); - if (f) return f(func, args, (size_t)nargs, NULL); - } - #endif - } - if (nargs == 0) { - return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs); - } - #if PY_VERSION_HEX >= 0x03090000 && !CYTHON_COMPILING_IN_LIMITED_API - return PyObject_VectorcallDict(func, args, (size_t)nargs, kwargs); - #else - return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs); - #endif -} - -/* PyObjectCallOneArg */ - static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject *args[2] = {NULL, arg}; - return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); -} - -/* ObjectGetItem */ - #if CYTHON_USE_TYPE_SLOTS -static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject *index) { - PyObject *runerr = NULL; - Py_ssize_t key_value; - key_value = __Pyx_PyIndex_AsSsize_t(index); - if (likely(key_value != -1 || !(runerr = PyErr_Occurred()))) { - return __Pyx_GetItemInt_Fast(obj, key_value, 0, 1, 1); - } - if (PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) { - __Pyx_TypeName index_type_name = __Pyx_PyType_GetName(Py_TYPE(index)); - PyErr_Clear(); - PyErr_Format(PyExc_IndexError, - "cannot fit '" __Pyx_FMT_TYPENAME "' into an index-sized integer", index_type_name); - __Pyx_DECREF_TypeName(index_type_name); - } - return NULL; -} -static PyObject *__Pyx_PyObject_GetItem_Slow(PyObject *obj, PyObject *key) { - __Pyx_TypeName obj_type_name; - if (likely(PyType_Check(obj))) { - PyObject *meth = __Pyx_PyObject_GetAttrStrNoError(obj, __pyx_n_s_class_getitem); - if (!meth) { - PyErr_Clear(); - } else { - PyObject *result = __Pyx_PyObject_CallOneArg(meth, key); - Py_DECREF(meth); - return result; - } - } - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "'" __Pyx_FMT_TYPENAME "' object is not subscriptable", obj_type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return NULL; -} -static PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject *key) { - PyTypeObject *tp = Py_TYPE(obj); - PyMappingMethods *mm = tp->tp_as_mapping; - PySequenceMethods *sm = tp->tp_as_sequence; - if (likely(mm && mm->mp_subscript)) { - return mm->mp_subscript(obj, key); - } - if (likely(sm && sm->sq_item)) { - return __Pyx_PyObject_GetIndex(obj, key); - } - return __Pyx_PyObject_GetItem_Slow(obj, key); -} -#endif - -/* ExtTypeTest */ - static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) { - __Pyx_TypeName obj_type_name; - __Pyx_TypeName type_name; - if (unlikely(!type)) { - PyErr_SetString(PyExc_SystemError, "Missing type object"); - return 0; - } - if (likely(__Pyx_TypeCheck(obj, type))) - return 1; - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - type_name = __Pyx_PyType_GetName(type); - PyErr_Format(PyExc_TypeError, - "Cannot convert " __Pyx_FMT_TYPENAME " to " __Pyx_FMT_TYPENAME, - obj_type_name, type_name); - __Pyx_DECREF_TypeName(obj_type_name); - __Pyx_DECREF_TypeName(type_name); - return 0; -} - -/* PyDictVersioning */ - #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) { - PyObject *dict = Py_TYPE(obj)->tp_dict; - return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0; -} -static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) { - PyObject **dictptr = NULL; - Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset; - if (offset) { -#if CYTHON_COMPILING_IN_CPYTHON - dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj); -#else - dictptr = _PyObject_GetDictPtr(obj); -#endif - } - return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0; -} -static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) { - PyObject *dict = Py_TYPE(obj)->tp_dict; - if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict))) - return 0; - return obj_dict_version == __Pyx_get_object_dict_version(obj); -} -#endif - -/* GetModuleGlobalName */ - #if CYTHON_USE_DICT_VERSIONS -static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value) -#else -static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name) -#endif -{ - PyObject *result; -#if !CYTHON_AVOID_BORROWED_REFS -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && PY_VERSION_HEX < 0x030d0000 - result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash); - __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) - if (likely(result)) { - return __Pyx_NewRef(result); - } else if (unlikely(PyErr_Occurred())) { - return NULL; - } -#elif CYTHON_COMPILING_IN_LIMITED_API - if (unlikely(!__pyx_m)) { - return NULL; - } - result = PyObject_GetAttr(__pyx_m, name); - if (likely(result)) { - return result; - } -#else - result = PyDict_GetItem(__pyx_d, name); - __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) - if (likely(result)) { - return __Pyx_NewRef(result); - } -#endif -#else - result = PyObject_GetItem(__pyx_d, name); - __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) - if (likely(result)) { - return __Pyx_NewRef(result); - } - PyErr_Clear(); -#endif - return __Pyx_GetBuiltinName(name); -} - - -static CYTHON_INLINE void* __Pyx_BufPtrFull1d_imp(void* buf, Py_ssize_t i0, Py_ssize_t s0, Py_ssize_t o0) { - char* ptr = (char*)buf; -ptr += s0 * i0; -if (o0 >= 0) ptr = *((char**)ptr) + o0; - -return ptr; -} - /* TypeImport */ - #ifndef __PYX_HAVE_RT_ImportType_3_0_11 -#define __PYX_HAVE_RT_ImportType_3_0_11 -static PyTypeObject *__Pyx_ImportType_3_0_11(PyObject *module, const char *module_name, const char *class_name, - size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_0_11 check_size) -{ - PyObject *result = 0; - char warning[200]; - Py_ssize_t basicsize; - Py_ssize_t itemsize; -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *py_basicsize; - PyObject *py_itemsize; -#endif - result = PyObject_GetAttrString(module, class_name); - if (!result) - goto bad; - if (!PyType_Check(result)) { - PyErr_Format(PyExc_TypeError, - "%.200s.%.200s is not a type object", - module_name, class_name); - goto bad; - } -#if !CYTHON_COMPILING_IN_LIMITED_API - basicsize = ((PyTypeObject *)result)->tp_basicsize; - itemsize = ((PyTypeObject *)result)->tp_itemsize; -#else - py_basicsize = PyObject_GetAttrString(result, "__basicsize__"); - if (!py_basicsize) - goto bad; - basicsize = PyLong_AsSsize_t(py_basicsize); - Py_DECREF(py_basicsize); - py_basicsize = 0; - if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred()) - goto bad; - py_itemsize = PyObject_GetAttrString(result, "__itemsize__"); - if (!py_itemsize) - goto bad; - itemsize = PyLong_AsSsize_t(py_itemsize); - Py_DECREF(py_itemsize); - py_itemsize = 0; - if (itemsize == (Py_ssize_t)-1 && PyErr_Occurred()) - goto bad; -#endif - if (itemsize) { - if (size % alignment) { - alignment = size % alignment; - } - if (itemsize < (Py_ssize_t)alignment) - itemsize = (Py_ssize_t)alignment; - } - if ((size_t)(basicsize + itemsize) < size) { - PyErr_Format(PyExc_ValueError, - "%.200s.%.200s size changed, may indicate binary incompatibility. " - "Expected %zd from C header, got %zd from PyObject", - module_name, class_name, size, basicsize+itemsize); - goto bad; - } - if (check_size == __Pyx_ImportType_CheckSize_Error_3_0_11 && - ((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) { - PyErr_Format(PyExc_ValueError, - "%.200s.%.200s size changed, may indicate binary incompatibility. " - "Expected %zd from C header, got %zd-%zd from PyObject", - module_name, class_name, size, basicsize, basicsize+itemsize); - goto bad; - } - else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_0_11 && (size_t)basicsize > size) { - PyOS_snprintf(warning, sizeof(warning), - "%s.%s size changed, may indicate binary incompatibility. " - "Expected %zd from C header, got %zd from PyObject", - module_name, class_name, size, basicsize); - if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad; - } - return (PyTypeObject *)result; -bad: - Py_XDECREF(result); - return NULL; -} -#endif - -/* Import */ - static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { - PyObject *module = 0; - PyObject *empty_dict = 0; - PyObject *empty_list = 0; - #if PY_MAJOR_VERSION < 3 - PyObject *py_import; - py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); - if (unlikely(!py_import)) - goto bad; - if (!from_list) { - empty_list = PyList_New(0); - if (unlikely(!empty_list)) - goto bad; - from_list = empty_list; - } - #endif - empty_dict = PyDict_New(); - if (unlikely(!empty_dict)) - goto bad; - { - #if PY_MAJOR_VERSION >= 3 - if (level == -1) { - if (strchr(__Pyx_MODULE_NAME, '.') != NULL) { - module = PyImport_ImportModuleLevelObject( - name, __pyx_d, empty_dict, from_list, 1); - if (unlikely(!module)) { - if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) - goto bad; - PyErr_Clear(); - } - } - level = 0; - } - #endif - if (!module) { - #if PY_MAJOR_VERSION < 3 - PyObject *py_level = PyInt_FromLong(level); - if (unlikely(!py_level)) - goto bad; - module = PyObject_CallFunctionObjArgs(py_import, - name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL); - Py_DECREF(py_level); - #else - module = PyImport_ImportModuleLevelObject( - name, __pyx_d, empty_dict, from_list, level); - #endif - } - } -bad: - Py_XDECREF(empty_dict); - Py_XDECREF(empty_list); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(py_import); - #endif - return module; -} - -/* ImportDottedModule */ - #if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) { - PyObject *partial_name = NULL, *slice = NULL, *sep = NULL; - if (unlikely(PyErr_Occurred())) { - PyErr_Clear(); - } - if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) { - partial_name = name; - } else { - slice = PySequence_GetSlice(parts_tuple, 0, count); - if (unlikely(!slice)) - goto bad; - sep = PyUnicode_FromStringAndSize(".", 1); - if (unlikely(!sep)) - goto bad; - partial_name = PyUnicode_Join(sep, slice); - } - PyErr_Format( -#if PY_MAJOR_VERSION < 3 - PyExc_ImportError, - "No module named '%s'", PyString_AS_STRING(partial_name)); -#else -#if PY_VERSION_HEX >= 0x030600B1 - PyExc_ModuleNotFoundError, -#else - PyExc_ImportError, -#endif - "No module named '%U'", partial_name); -#endif -bad: - Py_XDECREF(sep); - Py_XDECREF(slice); - Py_XDECREF(partial_name); - return NULL; -} -#endif -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) { - PyObject *imported_module; -#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) - PyObject *modules = PyImport_GetModuleDict(); - if (unlikely(!modules)) - return NULL; - imported_module = __Pyx_PyDict_GetItemStr(modules, name); - Py_XINCREF(imported_module); -#else - imported_module = PyImport_GetModule(name); -#endif - return imported_module; -} -#endif -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) { - Py_ssize_t i, nparts; - nparts = PyTuple_GET_SIZE(parts_tuple); - for (i=1; i < nparts && module; i++) { - PyObject *part, *submodule; -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - part = PyTuple_GET_ITEM(parts_tuple, i); -#else - part = PySequence_ITEM(parts_tuple, i); -#endif - submodule = __Pyx_PyObject_GetAttrStrNoError(module, part); -#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) - Py_DECREF(part); -#endif - Py_DECREF(module); - module = submodule; - } - if (unlikely(!module)) { - return __Pyx__ImportDottedModule_Error(name, parts_tuple, i); - } - return module; -} -#endif -static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) { -#if PY_MAJOR_VERSION < 3 - PyObject *module, *from_list, *star = __pyx_n_s__3; - CYTHON_UNUSED_VAR(parts_tuple); - from_list = PyList_New(1); - if (unlikely(!from_list)) - return NULL; - Py_INCREF(star); - PyList_SET_ITEM(from_list, 0, star); - module = __Pyx_Import(name, from_list, 0); - Py_DECREF(from_list); - return module; -#else - PyObject *imported_module; - PyObject *module = __Pyx_Import(name, NULL, 0); - if (!parts_tuple || unlikely(!module)) - return module; - imported_module = __Pyx__ImportDottedModule_Lookup(name); - if (likely(imported_module)) { - Py_DECREF(module); - return imported_module; - } - PyErr_Clear(); - return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); -#endif -} -static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) { -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1 - PyObject *module = __Pyx__ImportDottedModule_Lookup(name); - if (likely(module)) { - PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec); - if (likely(spec)) { - PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing); - if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) { - Py_DECREF(spec); - spec = NULL; - } - Py_XDECREF(unsafe); - } - if (likely(!spec)) { - PyErr_Clear(); - return module; - } - Py_DECREF(spec); - Py_DECREF(module); - } else if (PyErr_Occurred()) { - PyErr_Clear(); - } -#endif - return __Pyx__ImportDottedModule(name, parts_tuple); -} - -/* ImportDottedModuleRelFirst */ - static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple) { - PyObject *module; - PyObject *from_list = NULL; -#if PY_MAJOR_VERSION < 3 - PyObject *star = __pyx_n_s__3; - from_list = PyList_New(1); - if (unlikely(!from_list)) - return NULL; - Py_INCREF(star); - PyList_SET_ITEM(from_list, 0, star); -#endif - module = __Pyx_Import(name, from_list, -1); - Py_XDECREF(from_list); - if (module) { - #if PY_MAJOR_VERSION >= 3 - if (parts_tuple) { - module = __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); - } - #endif - return module; - } - if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) - return NULL; - PyErr_Clear(); - return __Pyx_ImportDottedModule(name, parts_tuple); -} - -/* FixUpExtensionType */ - #if CYTHON_USE_TYPE_SPECS -static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) { -#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - CYTHON_UNUSED_VAR(spec); - CYTHON_UNUSED_VAR(type); -#else - const PyType_Slot *slot = spec->slots; - while (slot && slot->slot && slot->slot != Py_tp_members) - slot++; - if (slot && slot->slot == Py_tp_members) { - int changed = 0; -#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON) - const -#endif - PyMemberDef *memb = (PyMemberDef*) slot->pfunc; - while (memb && memb->name) { - if (memb->name[0] == '_' && memb->name[1] == '_') { -#if PY_VERSION_HEX < 0x030900b1 - if (strcmp(memb->name, "__weaklistoffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); - type->tp_weaklistoffset = memb->offset; - changed = 1; - } - else if (strcmp(memb->name, "__dictoffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); - type->tp_dictoffset = memb->offset; - changed = 1; - } -#if CYTHON_METH_FASTCALL - else if (strcmp(memb->name, "__vectorcalloffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); -#if PY_VERSION_HEX >= 0x030800b4 - type->tp_vectorcall_offset = memb->offset; -#else - type->tp_print = (printfunc) memb->offset; -#endif - changed = 1; - } -#endif -#else - if ((0)); -#endif -#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON - else if (strcmp(memb->name, "__module__") == 0) { - PyObject *descr; - assert(memb->type == T_OBJECT); - assert(memb->flags == 0 || memb->flags == READONLY); - descr = PyDescr_NewMember(type, memb); - if (unlikely(!descr)) - return -1; - if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) { - Py_DECREF(descr); - return -1; - } - Py_DECREF(descr); - changed = 1; - } -#endif - } - memb++; - } - if (changed) - PyType_Modified(type); - } -#endif - return 0; -} -#endif - -/* FetchSharedCythonModule */ - static PyObject *__Pyx_FetchSharedCythonABIModule(void) { - return __Pyx_PyImport_AddModuleRef((char*) __PYX_ABI_MODULE_NAME); -} - -/* FetchCommonType */ - static int __Pyx_VerifyCachedType(PyObject *cached_type, - const char *name, - Py_ssize_t basicsize, - Py_ssize_t expected_basicsize) { - if (!PyType_Check(cached_type)) { - PyErr_Format(PyExc_TypeError, - "Shared Cython type %.200s is not a type object", name); - return -1; - } - if (basicsize != expected_basicsize) { - PyErr_Format(PyExc_TypeError, - "Shared Cython type %.200s has the wrong size, try recompiling", - name); - return -1; - } - return 0; -} -#if !CYTHON_USE_TYPE_SPECS -static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) { - PyObject* abi_module; - const char* object_name; - PyTypeObject *cached_type = NULL; - abi_module = __Pyx_FetchSharedCythonABIModule(); - if (!abi_module) return NULL; - object_name = strrchr(type->tp_name, '.'); - object_name = object_name ? object_name+1 : type->tp_name; - cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name); - if (cached_type) { - if (__Pyx_VerifyCachedType( - (PyObject *)cached_type, - object_name, - cached_type->tp_basicsize, - type->tp_basicsize) < 0) { - goto bad; - } - goto done; - } - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; - PyErr_Clear(); - if (PyType_Ready(type) < 0) goto bad; - if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0) - goto bad; - Py_INCREF(type); - cached_type = type; -done: - Py_DECREF(abi_module); - return cached_type; -bad: - Py_XDECREF(cached_type); - cached_type = NULL; - goto done; -} -#else -static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) { - PyObject *abi_module, *cached_type = NULL; - const char* object_name = strrchr(spec->name, '.'); - object_name = object_name ? object_name+1 : spec->name; - abi_module = __Pyx_FetchSharedCythonABIModule(); - if (!abi_module) return NULL; - cached_type = PyObject_GetAttrString(abi_module, object_name); - if (cached_type) { - Py_ssize_t basicsize; -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *py_basicsize; - py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__"); - if (unlikely(!py_basicsize)) goto bad; - basicsize = PyLong_AsSsize_t(py_basicsize); - Py_DECREF(py_basicsize); - py_basicsize = 0; - if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; -#else - basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1; -#endif - if (__Pyx_VerifyCachedType( - cached_type, - object_name, - basicsize, - spec->basicsize) < 0) { - goto bad; - } - goto done; - } - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; - PyErr_Clear(); - CYTHON_UNUSED_VAR(module); - cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases); - if (unlikely(!cached_type)) goto bad; - if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad; - if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad; -done: - Py_DECREF(abi_module); - assert(cached_type == NULL || PyType_Check(cached_type)); - return (PyTypeObject *) cached_type; -bad: - Py_XDECREF(cached_type); - cached_type = NULL; - goto done; -} -#endif - -/* PyVectorcallFastCallDict */ - #if CYTHON_METH_FASTCALL -static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) -{ - PyObject *res = NULL; - PyObject *kwnames; - PyObject **newargs; - PyObject **kwvalues; - Py_ssize_t i, pos; - size_t j; - PyObject *key, *value; - unsigned long keys_are_strings; - Py_ssize_t nkw = PyDict_GET_SIZE(kw); - newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0])); - if (unlikely(newargs == NULL)) { - PyErr_NoMemory(); - return NULL; - } - for (j = 0; j < nargs; j++) newargs[j] = args[j]; - kwnames = PyTuple_New(nkw); - if (unlikely(kwnames == NULL)) { - PyMem_Free(newargs); - return NULL; - } - kwvalues = newargs + nargs; - pos = i = 0; - keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; - while (PyDict_Next(kw, &pos, &key, &value)) { - keys_are_strings &= Py_TYPE(key)->tp_flags; - Py_INCREF(key); - Py_INCREF(value); - PyTuple_SET_ITEM(kwnames, i, key); - kwvalues[i] = value; - i++; - } - if (unlikely(!keys_are_strings)) { - PyErr_SetString(PyExc_TypeError, "keywords must be strings"); - goto cleanup; - } - res = vc(func, newargs, nargs, kwnames); -cleanup: - Py_DECREF(kwnames); - for (i = 0; i < nkw; i++) - Py_DECREF(kwvalues[i]); - PyMem_Free(newargs); - return res; -} -static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) -{ - if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) { - return vc(func, args, nargs, NULL); - } - return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw); -} -#endif - -/* CythonFunctionShared */ - #if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { - if (__Pyx_CyFunction_Check(func)) { - return PyCFunction_GetFunction(((__pyx_CyFunctionObject*)func)->func) == (PyCFunction) cfunc; - } else if (PyCFunction_Check(func)) { - return PyCFunction_GetFunction(func) == (PyCFunction) cfunc; - } - return 0; -} -#else -static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { - return __Pyx_CyOrPyCFunction_Check(func) && __Pyx_CyOrPyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; -} -#endif -static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) { -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - __Pyx_Py_XDECREF_SET( - __Pyx_CyFunction_GetClassObj(f), - ((classobj) ? __Pyx_NewRef(classobj) : NULL)); -#else - __Pyx_Py_XDECREF_SET( - ((PyCMethodObject *) (f))->mm_class, - (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL)); -#endif -} -static PyObject * -__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure) -{ - CYTHON_UNUSED_VAR(closure); - if (unlikely(op->func_doc == NULL)) { -#if CYTHON_COMPILING_IN_LIMITED_API - op->func_doc = PyObject_GetAttrString(op->func, "__doc__"); - if (unlikely(!op->func_doc)) return NULL; -#else - if (((PyCFunctionObject*)op)->m_ml->ml_doc) { -#if PY_MAJOR_VERSION >= 3 - op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); -#else - op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); -#endif - if (unlikely(op->func_doc == NULL)) - return NULL; - } else { - Py_INCREF(Py_None); - return Py_None; - } -#endif - } - Py_INCREF(op->func_doc); - return op->func_doc; -} -static int -__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (value == NULL) { - value = Py_None; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_doc, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(op->func_name == NULL)) { -#if CYTHON_COMPILING_IN_LIMITED_API - op->func_name = PyObject_GetAttrString(op->func, "__name__"); -#elif PY_MAJOR_VERSION >= 3 - op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); -#else - op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); -#endif - if (unlikely(op->func_name == NULL)) - return NULL; - } - Py_INCREF(op->func_name); - return op->func_name; -} -static int -__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__name__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_name, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - Py_INCREF(op->func_qualname); - return op->func_qualname; -} -static int -__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__qualname__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_qualname, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(op->func_dict == NULL)) { - op->func_dict = PyDict_New(); - if (unlikely(op->func_dict == NULL)) - return NULL; - } - Py_INCREF(op->func_dict); - return op->func_dict; -} -static int -__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(value == NULL)) { - PyErr_SetString(PyExc_TypeError, - "function's dictionary may not be deleted"); - return -1; - } - if (unlikely(!PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "setting function's dictionary to a non-dict"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_dict, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - Py_INCREF(op->func_globals); - return op->func_globals; -} -static PyObject * -__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(op); - CYTHON_UNUSED_VAR(context); - Py_INCREF(Py_None); - return Py_None; -} -static PyObject * -__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context) -{ - PyObject* result = (op->func_code) ? op->func_code : Py_None; - CYTHON_UNUSED_VAR(context); - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) { - int result = 0; - PyObject *res = op->defaults_getter((PyObject *) op); - if (unlikely(!res)) - return -1; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - op->defaults_tuple = PyTuple_GET_ITEM(res, 0); - Py_INCREF(op->defaults_tuple); - op->defaults_kwdict = PyTuple_GET_ITEM(res, 1); - Py_INCREF(op->defaults_kwdict); - #else - op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0); - if (unlikely(!op->defaults_tuple)) result = -1; - else { - op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1); - if (unlikely(!op->defaults_kwdict)) result = -1; - } - #endif - Py_DECREF(res); - return result; -} -static int -__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value) { - value = Py_None; - } else if (unlikely(value != Py_None && !PyTuple_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__defaults__ must be set to a tuple object"); - return -1; - } - PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not " - "currently affect the values used in function calls", 1); - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->defaults_tuple, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->defaults_tuple; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - if (op->defaults_getter) { - if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; - result = op->defaults_tuple; - } else { - result = Py_None; - } - } - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value) { - value = Py_None; - } else if (unlikely(value != Py_None && !PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__kwdefaults__ must be set to a dict object"); - return -1; - } - PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not " - "currently affect the values used in function calls", 1); - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->defaults_kwdict; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - if (op->defaults_getter) { - if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; - result = op->defaults_kwdict; - } else { - result = Py_None; - } - } - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value || value == Py_None) { - value = NULL; - } else if (unlikely(!PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__annotations__ must be set to a dict object"); - return -1; - } - Py_XINCREF(value); - __Pyx_Py_XDECREF_SET(op->func_annotations, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->func_annotations; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - result = PyDict_New(); - if (unlikely(!result)) return NULL; - op->func_annotations = result; - } - Py_INCREF(result); - return result; -} -static PyObject * -__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) { - int is_coroutine; - CYTHON_UNUSED_VAR(context); - if (op->func_is_coroutine) { - return __Pyx_NewRef(op->func_is_coroutine); - } - is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE; -#if PY_VERSION_HEX >= 0x03050000 - if (is_coroutine) { - PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine; - fromlist = PyList_New(1); - if (unlikely(!fromlist)) return NULL; - Py_INCREF(marker); -#if CYTHON_ASSUME_SAFE_MACROS - PyList_SET_ITEM(fromlist, 0, marker); -#else - if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) { - Py_DECREF(marker); - Py_DECREF(fromlist); - return NULL; - } -#endif - module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0); - Py_DECREF(fromlist); - if (unlikely(!module)) goto ignore; - op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker); - Py_DECREF(module); - if (likely(op->func_is_coroutine)) { - return __Pyx_NewRef(op->func_is_coroutine); - } -ignore: - PyErr_Clear(); - } -#endif - op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine); - return __Pyx_NewRef(op->func_is_coroutine); -} -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject * -__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) { - CYTHON_UNUSED_VAR(context); - return PyObject_GetAttrString(op->func, "__module__"); -} -static int -__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - return PyObject_SetAttrString(op->func, "__module__", value); -} -#endif -static PyGetSetDef __pyx_CyFunction_getsets[] = { - {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, - {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, - {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, - {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, - {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0}, - {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, - {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, - {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, - {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, - {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, - {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, - {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, - {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, - {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, - {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, - {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0}, - {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, - {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0}, -#if CYTHON_COMPILING_IN_LIMITED_API - {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0}, -#endif - {0, 0, 0, 0, 0} -}; -static PyMemberDef __pyx_CyFunction_members[] = { -#if !CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0}, -#endif -#if CYTHON_USE_TYPE_SPECS - {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0}, -#if CYTHON_METH_FASTCALL -#if CYTHON_BACKPORT_VECTORCALL - {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0}, -#else -#if !CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0}, -#endif -#endif -#endif -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0}, -#else - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0}, -#endif -#endif - {0, 0, 0, 0, 0} -}; -static PyObject * -__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args) -{ - CYTHON_UNUSED_VAR(args); -#if PY_MAJOR_VERSION >= 3 - Py_INCREF(m->func_qualname); - return m->func_qualname; -#else - return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name); -#endif -} -static PyMethodDef __pyx_CyFunction_methods[] = { - {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0}, - {0, 0, 0, 0} -}; -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API -#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist) -#else -#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist) -#endif -static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname, - PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { -#if !CYTHON_COMPILING_IN_LIMITED_API - PyCFunctionObject *cf = (PyCFunctionObject*) op; -#endif - if (unlikely(op == NULL)) - return NULL; -#if CYTHON_COMPILING_IN_LIMITED_API - op->func = PyCFunction_NewEx(ml, (PyObject*)op, module); - if (unlikely(!op->func)) return NULL; -#endif - op->flags = flags; - __Pyx_CyFunction_weakreflist(op) = NULL; -#if !CYTHON_COMPILING_IN_LIMITED_API - cf->m_ml = ml; - cf->m_self = (PyObject *) op; -#endif - Py_XINCREF(closure); - op->func_closure = closure; -#if !CYTHON_COMPILING_IN_LIMITED_API - Py_XINCREF(module); - cf->m_module = module; -#endif - op->func_dict = NULL; - op->func_name = NULL; - Py_INCREF(qualname); - op->func_qualname = qualname; - op->func_doc = NULL; -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - op->func_classobj = NULL; -#else - ((PyCMethodObject*)op)->mm_class = NULL; -#endif - op->func_globals = globals; - Py_INCREF(op->func_globals); - Py_XINCREF(code); - op->func_code = code; - op->defaults_pyobjects = 0; - op->defaults_size = 0; - op->defaults = NULL; - op->defaults_tuple = NULL; - op->defaults_kwdict = NULL; - op->defaults_getter = NULL; - op->func_annotations = NULL; - op->func_is_coroutine = NULL; -#if CYTHON_METH_FASTCALL - switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { - case METH_NOARGS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS; - break; - case METH_O: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O; - break; - case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD; - break; - case METH_FASTCALL | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS; - break; - case METH_VARARGS | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = NULL; - break; - default: - PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); - Py_DECREF(op); - return NULL; - } -#endif - return (PyObject *) op; -} -static int -__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m) -{ - Py_CLEAR(m->func_closure); -#if CYTHON_COMPILING_IN_LIMITED_API - Py_CLEAR(m->func); -#else - Py_CLEAR(((PyCFunctionObject*)m)->m_module); -#endif - Py_CLEAR(m->func_dict); - Py_CLEAR(m->func_name); - Py_CLEAR(m->func_qualname); - Py_CLEAR(m->func_doc); - Py_CLEAR(m->func_globals); - Py_CLEAR(m->func_code); -#if !CYTHON_COMPILING_IN_LIMITED_API -#if PY_VERSION_HEX < 0x030900B1 - Py_CLEAR(__Pyx_CyFunction_GetClassObj(m)); -#else - { - PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class; - ((PyCMethodObject *) (m))->mm_class = NULL; - Py_XDECREF(cls); - } -#endif -#endif - Py_CLEAR(m->defaults_tuple); - Py_CLEAR(m->defaults_kwdict); - Py_CLEAR(m->func_annotations); - Py_CLEAR(m->func_is_coroutine); - if (m->defaults) { - PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); - int i; - for (i = 0; i < m->defaults_pyobjects; i++) - Py_XDECREF(pydefaults[i]); - PyObject_Free(m->defaults); - m->defaults = NULL; - } - return 0; -} -static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m) -{ - if (__Pyx_CyFunction_weakreflist(m) != NULL) - PyObject_ClearWeakRefs((PyObject *) m); - __Pyx_CyFunction_clear(m); - __Pyx_PyHeapTypeObject_GC_Del(m); -} -static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m) -{ - PyObject_GC_UnTrack(m); - __Pyx__CyFunction_dealloc(m); -} -static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg) -{ - Py_VISIT(m->func_closure); -#if CYTHON_COMPILING_IN_LIMITED_API - Py_VISIT(m->func); -#else - Py_VISIT(((PyCFunctionObject*)m)->m_module); -#endif - Py_VISIT(m->func_dict); - Py_VISIT(m->func_name); - Py_VISIT(m->func_qualname); - Py_VISIT(m->func_doc); - Py_VISIT(m->func_globals); - Py_VISIT(m->func_code); -#if !CYTHON_COMPILING_IN_LIMITED_API - Py_VISIT(__Pyx_CyFunction_GetClassObj(m)); -#endif - Py_VISIT(m->defaults_tuple); - Py_VISIT(m->defaults_kwdict); - Py_VISIT(m->func_is_coroutine); - if (m->defaults) { - PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); - int i; - for (i = 0; i < m->defaults_pyobjects; i++) - Py_VISIT(pydefaults[i]); - } - return 0; -} -static PyObject* -__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) -{ -#if PY_MAJOR_VERSION >= 3 - return PyUnicode_FromFormat("", - op->func_qualname, (void *)op); -#else - return PyString_FromFormat("", - PyString_AsString(op->func_qualname), (void *)op); -#endif -} -static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) { -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *f = ((__pyx_CyFunctionObject*)func)->func; - PyObject *py_name = NULL; - PyCFunction meth; - int flags; - meth = PyCFunction_GetFunction(f); - if (unlikely(!meth)) return NULL; - flags = PyCFunction_GetFlags(f); - if (unlikely(flags < 0)) return NULL; -#else - PyCFunctionObject* f = (PyCFunctionObject*)func; - PyCFunction meth = f->m_ml->ml_meth; - int flags = f->m_ml->ml_flags; -#endif - Py_ssize_t size; - switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) { - case METH_VARARGS: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) - return (*meth)(self, arg); - break; - case METH_VARARGS | METH_KEYWORDS: - return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw); - case METH_NOARGS: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) { -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(arg); -#else - size = PyTuple_Size(arg); - if (unlikely(size < 0)) return NULL; -#endif - if (likely(size == 0)) - return (*meth)(self, NULL); -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, - "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - py_name, size); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - f->m_ml->ml_name, size); -#endif - return NULL; - } - break; - case METH_O: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) { -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(arg); -#else - size = PyTuple_Size(arg); - if (unlikely(size < 0)) return NULL; -#endif - if (likely(size == 1)) { - PyObject *result, *arg0; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - arg0 = PyTuple_GET_ITEM(arg, 0); - #else - arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL; - #endif - result = (*meth)(self, arg0); - #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) - Py_DECREF(arg0); - #endif - return result; - } -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, - "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - py_name, size); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - f->m_ml->ml_name, size); -#endif - return NULL; - } - break; - default: - PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); - return NULL; - } -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments", - py_name); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", - f->m_ml->ml_name); -#endif - return NULL; -} -static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) { - PyObject *self, *result; -#if CYTHON_COMPILING_IN_LIMITED_API - self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func); - if (unlikely(!self) && PyErr_Occurred()) return NULL; -#else - self = ((PyCFunctionObject*)func)->m_self; -#endif - result = __Pyx_CyFunction_CallMethod(func, self, arg, kw); - return result; -} -static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) { - PyObject *result; - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; -#if CYTHON_METH_FASTCALL - __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc); - if (vc) { -#if CYTHON_ASSUME_SAFE_MACROS - return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw); -#else - (void) &__Pyx_PyVectorcall_FastCallDict; - return PyVectorcall_Call(func, args, kw); -#endif - } -#endif - if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { - Py_ssize_t argc; - PyObject *new_args; - PyObject *self; -#if CYTHON_ASSUME_SAFE_MACROS - argc = PyTuple_GET_SIZE(args); -#else - argc = PyTuple_Size(args); - if (unlikely(!argc) < 0) return NULL; -#endif - new_args = PyTuple_GetSlice(args, 1, argc); - if (unlikely(!new_args)) - return NULL; - self = PyTuple_GetItem(args, 0); - if (unlikely(!self)) { - Py_DECREF(new_args); -#if PY_MAJOR_VERSION > 2 - PyErr_Format(PyExc_TypeError, - "unbound method %.200S() needs an argument", - cyfunc->func_qualname); -#else - PyErr_SetString(PyExc_TypeError, - "unbound method needs an argument"); -#endif - return NULL; - } - result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw); - Py_DECREF(new_args); - } else { - result = __Pyx_CyFunction_Call(func, args, kw); - } - return result; -} -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames) -{ - int ret = 0; - if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { - if (unlikely(nargs < 1)) { - PyErr_Format(PyExc_TypeError, "%.200s() needs an argument", - ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); - return -1; - } - ret = 1; - } - if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); - return -1; - } - return ret; -} -static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - if (unlikely(nargs != 0)) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - def->ml_name, nargs); - return NULL; - } - return def->ml_meth(self, NULL); -} -static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - if (unlikely(nargs != 1)) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - def->ml_name, nargs); - return NULL; - } - return def->ml_meth(self, args[0]); -} -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - return ((__Pyx_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames); -} -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; - PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc); -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames); -} -#endif -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_CyFunctionType_slots[] = { - {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc}, - {Py_tp_repr, (void *)__Pyx_CyFunction_repr}, - {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod}, - {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse}, - {Py_tp_clear, (void *)__Pyx_CyFunction_clear}, - {Py_tp_methods, (void *)__pyx_CyFunction_methods}, - {Py_tp_members, (void *)__pyx_CyFunction_members}, - {Py_tp_getset, (void *)__pyx_CyFunction_getsets}, - {Py_tp_descr_get, (void *)__Pyx_PyMethod_New}, - {0, 0}, -}; -static PyType_Spec __pyx_CyFunctionType_spec = { - __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", - sizeof(__pyx_CyFunctionObject), - 0, -#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR - Py_TPFLAGS_METHOD_DESCRIPTOR | -#endif -#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL) - _Py_TPFLAGS_HAVE_VECTORCALL | -#endif - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, - __pyx_CyFunctionType_slots -}; -#else -static PyTypeObject __pyx_CyFunctionType_type = { - PyVarObject_HEAD_INIT(0, 0) - __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", - sizeof(__pyx_CyFunctionObject), - 0, - (destructor) __Pyx_CyFunction_dealloc, -#if !CYTHON_METH_FASTCALL - 0, -#elif CYTHON_BACKPORT_VECTORCALL - (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall), -#else - offsetof(PyCFunctionObject, vectorcall), -#endif - 0, - 0, -#if PY_MAJOR_VERSION < 3 - 0, -#else - 0, -#endif - (reprfunc) __Pyx_CyFunction_repr, - 0, - 0, - 0, - 0, - __Pyx_CyFunction_CallAsMethod, - 0, - 0, - 0, - 0, -#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR - Py_TPFLAGS_METHOD_DESCRIPTOR | -#endif -#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL - _Py_TPFLAGS_HAVE_VECTORCALL | -#endif - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, - 0, - (traverseproc) __Pyx_CyFunction_traverse, - (inquiry) __Pyx_CyFunction_clear, - 0, -#if PY_VERSION_HEX < 0x030500A0 - offsetof(__pyx_CyFunctionObject, func_weakreflist), -#else - offsetof(PyCFunctionObject, m_weakreflist), -#endif - 0, - 0, - __pyx_CyFunction_methods, - __pyx_CyFunction_members, - __pyx_CyFunction_getsets, - 0, - 0, - __Pyx_PyMethod_New, - 0, - offsetof(__pyx_CyFunctionObject, func_dict), - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -#if PY_VERSION_HEX >= 0x030400a1 - 0, -#endif -#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, -#endif -#if __PYX_NEED_TP_PRINT_SLOT - 0, -#endif -#if PY_VERSION_HEX >= 0x030C0000 - 0, -#endif -#if PY_VERSION_HEX >= 0x030d00A4 - 0, -#endif -#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, -#endif -}; -#endif -static int __pyx_CyFunction_init(PyObject *module) { -#if CYTHON_USE_TYPE_SPECS - __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL); -#else - CYTHON_UNUSED_VAR(module); - __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type); -#endif - if (unlikely(__pyx_CyFunctionType == NULL)) { - return -1; - } - return 0; -} -static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults = PyObject_Malloc(size); - if (unlikely(!m->defaults)) - return PyErr_NoMemory(); - memset(m->defaults, 0, size); - m->defaults_pyobjects = pyobjects; - m->defaults_size = size; - return m->defaults; -} -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults_tuple = tuple; - Py_INCREF(tuple); -} -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults_kwdict = dict; - Py_INCREF(dict); -} -static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->func_annotations = dict; - Py_INCREF(dict); -} - -/* CythonFunction */ - static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname, - PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { - PyObject *op = __Pyx_CyFunction_Init( - PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType), - ml, flags, qualname, closure, module, globals, code - ); - if (likely(op)) { - PyObject_GC_Track(op); - } - return op; -} - -/* CLineInTraceback */ - #ifndef CYTHON_CLINE_IN_TRACEBACK -static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) { - PyObject *use_cline; - PyObject *ptype, *pvalue, *ptraceback; -#if CYTHON_COMPILING_IN_CPYTHON - PyObject **cython_runtime_dict; -#endif - CYTHON_MAYBE_UNUSED_VAR(tstate); - if (unlikely(!__pyx_cython_runtime)) { - return c_line; - } - __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); -#if CYTHON_COMPILING_IN_CPYTHON - cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); - if (likely(cython_runtime_dict)) { - __PYX_PY_DICT_LOOKUP_IF_MODIFIED( - use_cline, *cython_runtime_dict, - __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback)) - } else -#endif - { - PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); - if (use_cline_obj) { - use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; - Py_DECREF(use_cline_obj); - } else { - PyErr_Clear(); - use_cline = NULL; - } - } - if (!use_cline) { - c_line = 0; - (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); - } - else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) { - c_line = 0; - } - __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); - return c_line; -} -#endif - -/* CodeObjectCache */ - #if !CYTHON_COMPILING_IN_LIMITED_API -static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { - int start = 0, mid = 0, end = count - 1; - if (end >= 0 && code_line > entries[end].code_line) { - return count; - } - while (start < end) { - mid = start + (end - start) / 2; - if (code_line < entries[mid].code_line) { - end = mid; - } else if (code_line > entries[mid].code_line) { - start = mid + 1; - } else { - return mid; - } - } - if (code_line <= entries[mid].code_line) { - return mid; - } else { - return mid + 1; - } -} -static PyCodeObject *__pyx_find_code_object(int code_line) { - PyCodeObject* code_object; - int pos; - if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { - return NULL; - } - pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); - if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { - return NULL; - } - code_object = __pyx_code_cache.entries[pos].code_object; - Py_INCREF(code_object); - return code_object; -} -static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { - int pos, i; - __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; - if (unlikely(!code_line)) { - return; - } - if (unlikely(!entries)) { - entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); - if (likely(entries)) { - __pyx_code_cache.entries = entries; - __pyx_code_cache.max_count = 64; - __pyx_code_cache.count = 1; - entries[0].code_line = code_line; - entries[0].code_object = code_object; - Py_INCREF(code_object); - } - return; - } - pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); - if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { - PyCodeObject* tmp = entries[pos].code_object; - entries[pos].code_object = code_object; - Py_DECREF(tmp); - return; - } - if (__pyx_code_cache.count == __pyx_code_cache.max_count) { - int new_max = __pyx_code_cache.max_count + 64; - entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( - __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry)); - if (unlikely(!entries)) { - return; - } - __pyx_code_cache.entries = entries; - __pyx_code_cache.max_count = new_max; - } - for (i=__pyx_code_cache.count; i>pos; i--) { - entries[i] = entries[i-1]; - } - entries[pos].code_line = code_line; - entries[pos].code_object = code_object; - __pyx_code_cache.count++; - Py_INCREF(code_object); -} -#endif - -/* AddTraceback */ - #include "compile.h" -#include "frameobject.h" -#include "traceback.h" -#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict, - PyObject *firstlineno, PyObject *name) { - PyObject *replace = NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL; - replace = PyObject_GetAttrString(code, "replace"); - if (likely(replace)) { - PyObject *result; - result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict); - Py_DECREF(replace); - return result; - } - PyErr_Clear(); - #if __PYX_LIMITED_VERSION_HEX < 0x030780000 - { - PyObject *compiled = NULL, *result = NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL; - compiled = Py_CompileString( - "out = type(code)(\n" - " code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n" - " code.co_flags, code.co_code, code.co_consts, code.co_names,\n" - " code.co_varnames, code.co_filename, co_name, co_firstlineno,\n" - " code.co_lnotab)\n", "", Py_file_input); - if (!compiled) return NULL; - result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict); - Py_DECREF(compiled); - if (!result) PyErr_Print(); - Py_DECREF(result); - result = PyDict_GetItemString(scratch_dict, "out"); - if (result) Py_INCREF(result); - return result; - } - #else - return NULL; - #endif -} -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename) { - PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL; - PyObject *replace = NULL, *getframe = NULL, *frame = NULL; - PyObject *exc_type, *exc_value, *exc_traceback; - int success = 0; - if (c_line) { - (void) __pyx_cfilenm; - (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line); - } - PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); - code_object = Py_CompileString("_getframe()", filename, Py_eval_input); - if (unlikely(!code_object)) goto bad; - py_py_line = PyLong_FromLong(py_line); - if (unlikely(!py_py_line)) goto bad; - py_funcname = PyUnicode_FromString(funcname); - if (unlikely(!py_funcname)) goto bad; - dict = PyDict_New(); - if (unlikely(!dict)) goto bad; - { - PyObject *old_code_object = code_object; - code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname); - Py_DECREF(old_code_object); - } - if (unlikely(!code_object)) goto bad; - getframe = PySys_GetObject("_getframe"); - if (unlikely(!getframe)) goto bad; - if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad; - frame = PyEval_EvalCode(code_object, dict, dict); - if (unlikely(!frame) || frame == Py_None) goto bad; - success = 1; - bad: - PyErr_Restore(exc_type, exc_value, exc_traceback); - Py_XDECREF(code_object); - Py_XDECREF(py_py_line); - Py_XDECREF(py_funcname); - Py_XDECREF(dict); - Py_XDECREF(replace); - if (success) { - PyTraceBack_Here( - (struct _frame*)frame); - } - Py_XDECREF(frame); -} -#else -static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( - const char *funcname, int c_line, - int py_line, const char *filename) { - PyCodeObject *py_code = NULL; - PyObject *py_funcname = NULL; - #if PY_MAJOR_VERSION < 3 - PyObject *py_srcfile = NULL; - py_srcfile = PyString_FromString(filename); - if (!py_srcfile) goto bad; - #endif - if (c_line) { - #if PY_MAJOR_VERSION < 3 - py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); - if (!py_funcname) goto bad; - #else - py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); - if (!py_funcname) goto bad; - funcname = PyUnicode_AsUTF8(py_funcname); - if (!funcname) goto bad; - #endif - } - else { - #if PY_MAJOR_VERSION < 3 - py_funcname = PyString_FromString(funcname); - if (!py_funcname) goto bad; - #endif - } - #if PY_MAJOR_VERSION < 3 - py_code = __Pyx_PyCode_New( - 0, - 0, - 0, - 0, - 0, - 0, - __pyx_empty_bytes, /*PyObject *code,*/ - __pyx_empty_tuple, /*PyObject *consts,*/ - __pyx_empty_tuple, /*PyObject *names,*/ - __pyx_empty_tuple, /*PyObject *varnames,*/ - __pyx_empty_tuple, /*PyObject *freevars,*/ - __pyx_empty_tuple, /*PyObject *cellvars,*/ - py_srcfile, /*PyObject *filename,*/ - py_funcname, /*PyObject *name,*/ - py_line, - __pyx_empty_bytes /*PyObject *lnotab*/ - ); - Py_DECREF(py_srcfile); - #else - py_code = PyCode_NewEmpty(filename, funcname, py_line); - #endif - Py_XDECREF(py_funcname); - return py_code; -bad: - Py_XDECREF(py_funcname); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(py_srcfile); - #endif - return NULL; -} -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename) { - PyCodeObject *py_code = 0; - PyFrameObject *py_frame = 0; - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject *ptype, *pvalue, *ptraceback; - if (c_line) { - c_line = __Pyx_CLineForTraceback(tstate, c_line); - } - py_code = __pyx_find_code_object(c_line ? -c_line : py_line); - if (!py_code) { - __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); - py_code = __Pyx_CreateCodeObjectForTraceback( - funcname, c_line, py_line, filename); - if (!py_code) { - /* If the code object creation fails, then we should clear the - fetched exception references and propagate the new exception */ - Py_XDECREF(ptype); - Py_XDECREF(pvalue); - Py_XDECREF(ptraceback); - goto bad; - } - __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); - __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); - } - py_frame = PyFrame_New( - tstate, /*PyThreadState *tstate,*/ - py_code, /*PyCodeObject *code,*/ - __pyx_d, /*PyObject *globals,*/ - 0 /*PyObject *locals*/ - ); - if (!py_frame) goto bad; - __Pyx_PyFrame_SetLineNumber(py_frame, py_line); - PyTraceBack_Here(py_frame); -bad: - Py_XDECREF(py_code); - Py_XDECREF(py_frame); -} -#endif - -#if PY_MAJOR_VERSION < 3 -static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) { - __Pyx_TypeName obj_type_name; - if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags); - if (__Pyx_TypeCheck(obj, __pyx_ptype_7cpython_5array_array)) return __pyx_pw_7cpython_5array_5array_1__getbuffer__(obj, view, flags); - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "'" __Pyx_FMT_TYPENAME "' does not have the buffer interface", - obj_type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return -1; -} -static void __Pyx_ReleaseBuffer(Py_buffer *view) { - PyObject *obj = view->obj; - if (!obj) return; - if (PyObject_CheckBuffer(obj)) { - PyBuffer_Release(view); - return; - } - if ((0)) {} - else if (__Pyx_TypeCheck(obj, __pyx_ptype_7cpython_5array_array)) __pyx_pw_7cpython_5array_5array_3__releasebuffer__(obj, view); - view->obj = NULL; - Py_DECREF(obj); -} -#endif - - - /* CIntFromPyVerify */ - #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) -#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) -#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ - {\ - func_type value = func_value;\ - if (sizeof(target_type) < sizeof(func_type)) {\ - if (unlikely(value != (func_type) (target_type) value)) {\ - func_type zero = 0;\ - if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ - return (target_type) -1;\ - if (is_unsigned && unlikely(value < zero))\ - goto raise_neg_overflow;\ - else\ - goto raise_overflow;\ - }\ - }\ - return (target_type) value;\ - } - -/* Declarations */ - #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) - #ifdef __cplusplus - static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { - return ::std::complex< float >(x, y); - } - #else - static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { - return x + y*(__pyx_t_float_complex)_Complex_I; - } - #endif -#else - static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { - __pyx_t_float_complex z; - z.real = x; - z.imag = y; - return z; - } -#endif - -/* Arithmetic */ - #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) -#else - static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { - return (a.real == b.real) && (a.imag == b.imag); - } - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { - __pyx_t_float_complex z; - z.real = a.real + b.real; - z.imag = a.imag + b.imag; - return z; - } - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { - __pyx_t_float_complex z; - z.real = a.real - b.real; - z.imag = a.imag - b.imag; - return z; - } - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { - __pyx_t_float_complex z; - z.real = a.real * b.real - a.imag * b.imag; - z.imag = a.real * b.imag + a.imag * b.real; - return z; - } - #if 1 - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { - if (b.imag == 0) { - return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real); - } else if (fabsf(b.real) >= fabsf(b.imag)) { - if (b.real == 0 && b.imag == 0) { - return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.imag); - } else { - float r = b.imag / b.real; - float s = (float)(1.0) / (b.real + b.imag * r); - return __pyx_t_float_complex_from_parts( - (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); - } - } else { - float r = b.real / b.imag; - float s = (float)(1.0) / (b.imag + b.real * r); - return __pyx_t_float_complex_from_parts( - (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); - } - } - #else - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { - if (b.imag == 0) { - return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real); - } else { - float denom = b.real * b.real + b.imag * b.imag; - return __pyx_t_float_complex_from_parts( - (a.real * b.real + a.imag * b.imag) / denom, - (a.imag * b.real - a.real * b.imag) / denom); - } - } - #endif - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex a) { - __pyx_t_float_complex z; - z.real = -a.real; - z.imag = -a.imag; - return z; - } - static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex a) { - return (a.real == 0) && (a.imag == 0); - } - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex a) { - __pyx_t_float_complex z; - z.real = a.real; - z.imag = -a.imag; - return z; - } - #if 1 - static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex z) { - #if !defined(HAVE_HYPOT) || defined(_MSC_VER) - return sqrtf(z.real*z.real + z.imag*z.imag); - #else - return hypotf(z.real, z.imag); - #endif - } - static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { - __pyx_t_float_complex z; - float r, lnr, theta, z_r, z_theta; - if (b.imag == 0 && b.real == (int)b.real) { - if (b.real < 0) { - float denom = a.real * a.real + a.imag * a.imag; - a.real = a.real / denom; - a.imag = -a.imag / denom; - b.real = -b.real; - } - switch ((int)b.real) { - case 0: - z.real = 1; - z.imag = 0; - return z; - case 1: - return a; - case 2: - return __Pyx_c_prod_float(a, a); - case 3: - z = __Pyx_c_prod_float(a, a); - return __Pyx_c_prod_float(z, a); - case 4: - z = __Pyx_c_prod_float(a, a); - return __Pyx_c_prod_float(z, z); - } - } - if (a.imag == 0) { - if (a.real == 0) { - return a; - } else if ((b.imag == 0) && (a.real >= 0)) { - z.real = powf(a.real, b.real); - z.imag = 0; - return z; - } else if (a.real > 0) { - r = a.real; - theta = 0; - } else { - r = -a.real; - theta = atan2f(0.0, -1.0); - } - } else { - r = __Pyx_c_abs_float(a); - theta = atan2f(a.imag, a.real); - } - lnr = logf(r); - z_r = expf(lnr * b.real - theta * b.imag); - z_theta = theta * b.real + lnr * b.imag; - z.real = z_r * cosf(z_theta); - z.imag = z_r * sinf(z_theta); - return z; - } - #endif -#endif - -/* Declarations */ - #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) - #ifdef __cplusplus - static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { - return ::std::complex< double >(x, y); - } - #else - static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { - return x + y*(__pyx_t_double_complex)_Complex_I; - } - #endif -#else - static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { - __pyx_t_double_complex z; - z.real = x; - z.imag = y; - return z; - } -#endif - -/* Arithmetic */ - #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) -#else - static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { - return (a.real == b.real) && (a.imag == b.imag); - } - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { - __pyx_t_double_complex z; - z.real = a.real + b.real; - z.imag = a.imag + b.imag; - return z; - } - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { - __pyx_t_double_complex z; - z.real = a.real - b.real; - z.imag = a.imag - b.imag; - return z; - } - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { - __pyx_t_double_complex z; - z.real = a.real * b.real - a.imag * b.imag; - z.imag = a.real * b.imag + a.imag * b.real; - return z; - } - #if 1 - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { - if (b.imag == 0) { - return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real); - } else if (fabs(b.real) >= fabs(b.imag)) { - if (b.real == 0 && b.imag == 0) { - return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.imag); - } else { - double r = b.imag / b.real; - double s = (double)(1.0) / (b.real + b.imag * r); - return __pyx_t_double_complex_from_parts( - (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); - } - } else { - double r = b.real / b.imag; - double s = (double)(1.0) / (b.imag + b.real * r); - return __pyx_t_double_complex_from_parts( - (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); - } - } - #else - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { - if (b.imag == 0) { - return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real); - } else { - double denom = b.real * b.real + b.imag * b.imag; - return __pyx_t_double_complex_from_parts( - (a.real * b.real + a.imag * b.imag) / denom, - (a.imag * b.real - a.real * b.imag) / denom); - } - } - #endif - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex a) { - __pyx_t_double_complex z; - z.real = -a.real; - z.imag = -a.imag; - return z; - } - static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex a) { - return (a.real == 0) && (a.imag == 0); - } - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex a) { - __pyx_t_double_complex z; - z.real = a.real; - z.imag = -a.imag; - return z; - } - #if 1 - static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex z) { - #if !defined(HAVE_HYPOT) || defined(_MSC_VER) - return sqrt(z.real*z.real + z.imag*z.imag); - #else - return hypot(z.real, z.imag); - #endif - } - static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { - __pyx_t_double_complex z; - double r, lnr, theta, z_r, z_theta; - if (b.imag == 0 && b.real == (int)b.real) { - if (b.real < 0) { - double denom = a.real * a.real + a.imag * a.imag; - a.real = a.real / denom; - a.imag = -a.imag / denom; - b.real = -b.real; - } - switch ((int)b.real) { - case 0: - z.real = 1; - z.imag = 0; - return z; - case 1: - return a; - case 2: - return __Pyx_c_prod_double(a, a); - case 3: - z = __Pyx_c_prod_double(a, a); - return __Pyx_c_prod_double(z, a); - case 4: - z = __Pyx_c_prod_double(a, a); - return __Pyx_c_prod_double(z, z); - } - } - if (a.imag == 0) { - if (a.real == 0) { - return a; - } else if ((b.imag == 0) && (a.real >= 0)) { - z.real = pow(a.real, b.real); - z.imag = 0; - return z; - } else if (a.real > 0) { - r = a.real; - theta = 0; - } else { - r = -a.real; - theta = atan2(0.0, -1.0); - } - } else { - r = __Pyx_c_abs_double(a); - theta = atan2(a.imag, a.real); - } - lnr = log(r); - z_r = exp(lnr * b.real - theta * b.imag); - z_theta = theta * b.real + lnr * b.imag; - z.real = z_r * cos(z_theta); - z.imag = z_r * sin(z_theta); - return z; - } - #endif -#endif - -/* Declarations */ - #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) - #ifdef __cplusplus - static CYTHON_INLINE __pyx_t_long_double_complex __pyx_t_long_double_complex_from_parts(long double x, long double y) { - return ::std::complex< long double >(x, y); - } - #else - static CYTHON_INLINE __pyx_t_long_double_complex __pyx_t_long_double_complex_from_parts(long double x, long double y) { - return x + y*(__pyx_t_long_double_complex)_Complex_I; - } - #endif -#else - static CYTHON_INLINE __pyx_t_long_double_complex __pyx_t_long_double_complex_from_parts(long double x, long double y) { - __pyx_t_long_double_complex z; - z.real = x; - z.imag = y; - return z; - } -#endif - -/* Arithmetic */ - #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus) -#else - static CYTHON_INLINE int __Pyx_c_eq_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { - return (a.real == b.real) && (a.imag == b.imag); - } - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_sum_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { - __pyx_t_long_double_complex z; - z.real = a.real + b.real; - z.imag = a.imag + b.imag; - return z; - } - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_diff_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { - __pyx_t_long_double_complex z; - z.real = a.real - b.real; - z.imag = a.imag - b.imag; - return z; - } - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_prod_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { - __pyx_t_long_double_complex z; - z.real = a.real * b.real - a.imag * b.imag; - z.imag = a.real * b.imag + a.imag * b.real; - return z; - } - #if 1 - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_quot_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { - if (b.imag == 0) { - return __pyx_t_long_double_complex_from_parts(a.real / b.real, a.imag / b.real); - } else if (fabsl(b.real) >= fabsl(b.imag)) { - if (b.real == 0 && b.imag == 0) { - return __pyx_t_long_double_complex_from_parts(a.real / b.real, a.imag / b.imag); - } else { - long double r = b.imag / b.real; - long double s = (long double)(1.0) / (b.real + b.imag * r); - return __pyx_t_long_double_complex_from_parts( - (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); - } - } else { - long double r = b.real / b.imag; - long double s = (long double)(1.0) / (b.imag + b.real * r); - return __pyx_t_long_double_complex_from_parts( - (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); - } - } - #else - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_quot_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { - if (b.imag == 0) { - return __pyx_t_long_double_complex_from_parts(a.real / b.real, a.imag / b.real); - } else { - long double denom = b.real * b.real + b.imag * b.imag; - return __pyx_t_long_double_complex_from_parts( - (a.real * b.real + a.imag * b.imag) / denom, - (a.imag * b.real - a.real * b.imag) / denom); - } - } - #endif - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_neg_long__double(__pyx_t_long_double_complex a) { - __pyx_t_long_double_complex z; - z.real = -a.real; - z.imag = -a.imag; - return z; - } - static CYTHON_INLINE int __Pyx_c_is_zero_long__double(__pyx_t_long_double_complex a) { - return (a.real == 0) && (a.imag == 0); - } - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_conj_long__double(__pyx_t_long_double_complex a) { - __pyx_t_long_double_complex z; - z.real = a.real; - z.imag = -a.imag; - return z; - } - #if 1 - static CYTHON_INLINE long double __Pyx_c_abs_long__double(__pyx_t_long_double_complex z) { - #if !defined(HAVE_HYPOT) || defined(_MSC_VER) - return sqrtl(z.real*z.real + z.imag*z.imag); - #else - return hypotl(z.real, z.imag); - #endif - } - static CYTHON_INLINE __pyx_t_long_double_complex __Pyx_c_pow_long__double(__pyx_t_long_double_complex a, __pyx_t_long_double_complex b) { - __pyx_t_long_double_complex z; - long double r, lnr, theta, z_r, z_theta; - if (b.imag == 0 && b.real == (int)b.real) { - if (b.real < 0) { - long double denom = a.real * a.real + a.imag * a.imag; - a.real = a.real / denom; - a.imag = -a.imag / denom; - b.real = -b.real; - } - switch ((int)b.real) { - case 0: - z.real = 1; - z.imag = 0; - return z; - case 1: - return a; - case 2: - return __Pyx_c_prod_long__double(a, a); - case 3: - z = __Pyx_c_prod_long__double(a, a); - return __Pyx_c_prod_long__double(z, a); - case 4: - z = __Pyx_c_prod_long__double(a, a); - return __Pyx_c_prod_long__double(z, z); - } - } - if (a.imag == 0) { - if (a.real == 0) { - return a; - } else if ((b.imag == 0) && (a.real >= 0)) { - z.real = powl(a.real, b.real); - z.imag = 0; - return z; - } else if (a.real > 0) { - r = a.real; - theta = 0; - } else { - r = -a.real; - theta = atan2l(0.0, -1.0); - } - } else { - r = __Pyx_c_abs_long__double(a); - theta = atan2l(a.imag, a.real); - } - lnr = logl(r); - z_r = expl(lnr * b.real - theta * b.imag); - z_theta = theta * b.real + lnr * b.imag; - z.real = z_r * cosl(z_theta); - z.imag = z_r * sinl(z_theta); - return z; - } - #endif -#endif - -/* CIntToPy */ - static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int neg_one = (int) -1, const_zero = (int) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(int) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(int) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(int) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 - if (is_unsigned) { - return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); - } else { - return PyLong_FromNativeBytes(bytes, sizeof(value), -1); - } -#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 - int one = 1; int little = (int)*(unsigned char *)&one; - return _PyLong_FromByteArray(bytes, sizeof(int), - little, !is_unsigned); -#else - int one = 1; int little = (int)*(unsigned char *)&one; - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(int)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - if (!is_unsigned) { - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; - } - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(kwds); - Py_XDECREF(arg_tuple); - Py_XDECREF(order_str); - Py_XDECREF(py_bytes); - Py_XDECREF(from_bytes); - return result; -#endif - } -} - -/* CIntFromPy */ - static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int neg_one = (int) -1, const_zero = (int) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(int) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (int) val; - } - } -#endif - if (unlikely(!PyLong_Check(x))) { - int val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (int) -1; - val = __Pyx_PyInt_As_int(tmp); - Py_DECREF(tmp); - return val; - } - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) { - return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) { - return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) { - return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (int) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(int) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { - return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(int) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - int val; - int ret = -1; -#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API - Py_ssize_t bytes_copied = PyLong_AsNativeBytes( - x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); - if (unlikely(bytes_copied == -1)) { - } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { - goto raise_overflow; - } else { - ret = 0; - } -#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)x, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *v; - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (likely(PyLong_CheckExact(x))) { - v = __Pyx_NewRef(x); - } else { - v = PyNumber_Long(x); - if (unlikely(!v)) return (int) -1; - assert(PyLong_CheckExact(v)); - } - { - int result = PyObject_RichCompareBool(v, Py_False, Py_LT); - if (unlikely(result < 0)) { - Py_DECREF(v); - return (int) -1; - } - is_negative = result == 1; - } - if (is_unsigned && unlikely(is_negative)) { - Py_DECREF(v); - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - Py_DECREF(v); - if (unlikely(!stepval)) - return (int) -1; - } else { - stepval = v; - } - v = NULL; - val = (int) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - long idigit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - val |= ((int) idigit) << bits; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - } - Py_DECREF(shift); shift = NULL; - Py_DECREF(mask); mask = NULL; - { - long idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((int) idigit) << bits; - } - if (!is_unsigned) { - if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - if (unlikely(ret)) - return (int) -1; - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to int"); - return (int) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to int"); - return (int) -1; -} - -/* CIntToPy */ - static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const long neg_one = (long) -1, const_zero = (long) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(long) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(long) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(long) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 - if (is_unsigned) { - return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); - } else { - return PyLong_FromNativeBytes(bytes, sizeof(value), -1); - } -#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 - int one = 1; int little = (int)*(unsigned char *)&one; - return _PyLong_FromByteArray(bytes, sizeof(long), - little, !is_unsigned); -#else - int one = 1; int little = (int)*(unsigned char *)&one; - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - if (!is_unsigned) { - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; - } - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(kwds); - Py_XDECREF(arg_tuple); - Py_XDECREF(order_str); - Py_XDECREF(py_bytes); - Py_XDECREF(from_bytes); - return result; -#endif - } -} - -/* FormatTypeName */ - #if CYTHON_COMPILING_IN_LIMITED_API -static __Pyx_TypeName -__Pyx_PyType_GetName(PyTypeObject* tp) -{ - PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp, - __pyx_n_s_name); - if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) { - PyErr_Clear(); - Py_XDECREF(name); - name = __Pyx_NewRef(__pyx_n_s__11); - } - return name; -} -#endif - -/* CIntFromPy */ - static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const long neg_one = (long) -1, const_zero = (long) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(long) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (long) val; - } - } -#endif - if (unlikely(!PyLong_Check(x))) { - long val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (long) -1; - val = __Pyx_PyInt_As_long(tmp); - Py_DECREF(tmp); - return val; - } - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) { - return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) { - return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) { - return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (long) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(long) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { - return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(long) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - long val; - int ret = -1; -#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API - Py_ssize_t bytes_copied = PyLong_AsNativeBytes( - x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); - if (unlikely(bytes_copied == -1)) { - } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { - goto raise_overflow; - } else { - ret = 0; - } -#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)x, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *v; - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (likely(PyLong_CheckExact(x))) { - v = __Pyx_NewRef(x); - } else { - v = PyNumber_Long(x); - if (unlikely(!v)) return (long) -1; - assert(PyLong_CheckExact(v)); - } - { - int result = PyObject_RichCompareBool(v, Py_False, Py_LT); - if (unlikely(result < 0)) { - Py_DECREF(v); - return (long) -1; - } - is_negative = result == 1; - } - if (is_unsigned && unlikely(is_negative)) { - Py_DECREF(v); - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - Py_DECREF(v); - if (unlikely(!stepval)) - return (long) -1; - } else { - stepval = v; - } - v = NULL; - val = (long) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - long idigit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - val |= ((long) idigit) << bits; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - } - Py_DECREF(shift); shift = NULL; - Py_DECREF(mask); mask = NULL; - { - long idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((long) idigit) << bits; - } - if (!is_unsigned) { - if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - if (unlikely(ret)) - return (long) -1; - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to long"); - return (long) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to long"); - return (long) -1; -} - -/* FastTypeChecks */ - #if CYTHON_COMPILING_IN_CPYTHON -static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { - while (a) { - a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*); - if (a == b) - return 1; - } - return b == &PyBaseObject_Type; -} -static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { - PyObject *mro; - if (a == b) return 1; - mro = a->tp_mro; - if (likely(mro)) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(mro); - for (i = 0; i < n; i++) { - if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) - return 1; - } - return 0; - } - return __Pyx_InBases(a, b); -} -static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) { - PyObject *mro; - if (cls == a || cls == b) return 1; - mro = cls->tp_mro; - if (likely(mro)) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(mro); - for (i = 0; i < n; i++) { - PyObject *base = PyTuple_GET_ITEM(mro, i); - if (base == (PyObject *)a || base == (PyObject *)b) - return 1; - } - return 0; - } - return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b); -} -#if PY_MAJOR_VERSION == 2 -static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { - PyObject *exception, *value, *tb; - int res; - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&exception, &value, &tb); - res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; - if (unlikely(res == -1)) { - PyErr_WriteUnraisable(err); - res = 0; - } - if (!res) { - res = PyObject_IsSubclass(err, exc_type2); - if (unlikely(res == -1)) { - PyErr_WriteUnraisable(err); - res = 0; - } - } - __Pyx_ErrRestore(exception, value, tb); - return res; -} -#else -static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { - if (exc_type1) { - return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2); - } else { - return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); - } -} -#endif -static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { - Py_ssize_t i, n; - assert(PyExceptionClass_Check(exc_type)); - n = PyTuple_GET_SIZE(tuple); -#if PY_MAJOR_VERSION >= 3 - for (i=0; i= 0x030B00A4 - return Py_Version & ~0xFFUL; -#else - const char* rt_version = Py_GetVersion(); - unsigned long version = 0; - unsigned long factor = 0x01000000UL; - unsigned int digit = 0; - int i = 0; - while (factor) { - while ('0' <= rt_version[i] && rt_version[i] <= '9') { - digit = digit * 10 + (unsigned int) (rt_version[i] - '0'); - ++i; - } - version += factor * digit; - if (rt_version[i] != '.') - break; - digit = 0; - factor >>= 8; - ++i; - } - return version; -#endif -} -static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer) { - const unsigned long MAJOR_MINOR = 0xFFFF0000UL; - if ((rt_version & MAJOR_MINOR) == (ct_version & MAJOR_MINOR)) - return 0; - if (likely(allow_newer && (rt_version & MAJOR_MINOR) > (ct_version & MAJOR_MINOR))) - return 1; - { - char message[200]; - PyOS_snprintf(message, sizeof(message), - "compile time Python version %d.%d " - "of module '%.100s' " - "%s " - "runtime version %d.%d", - (int) (ct_version >> 24), (int) ((ct_version >> 16) & 0xFF), - __Pyx_MODULE_NAME, - (allow_newer) ? "was newer than" : "does not match", - (int) (rt_version >> 24), (int) ((rt_version >> 16) & 0xFF) - ); - return PyErr_WarnEx(NULL, message, 1); - } -} - -/* InitStrings */ - #if PY_MAJOR_VERSION >= 3 -static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) { - if (t.is_unicode | t.is_str) { - if (t.intern) { - *str = PyUnicode_InternFromString(t.s); - } else if (t.encoding) { - *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL); - } else { - *str = PyUnicode_FromStringAndSize(t.s, t.n - 1); - } - } else { - *str = PyBytes_FromStringAndSize(t.s, t.n - 1); - } - if (!*str) - return -1; - if (PyObject_Hash(*str) == -1) - return -1; - return 0; -} -#endif -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { - while (t->p) { - #if PY_MAJOR_VERSION >= 3 - __Pyx_InitString(*t, t->p); - #else - if (t->is_unicode) { - *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); - } else if (t->intern) { - *t->p = PyString_InternFromString(t->s); - } else { - *t->p = PyString_FromStringAndSize(t->s, t->n - 1); - } - if (!*t->p) - return -1; - if (PyObject_Hash(*t->p) == -1) - return -1; - #endif - ++t; - } - return 0; -} - -#include -static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) { - size_t len = strlen(s); - if (unlikely(len > (size_t) PY_SSIZE_T_MAX)) { - PyErr_SetString(PyExc_OverflowError, "byte string is too long"); - return -1; - } - return (Py_ssize_t) len; -} -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { - Py_ssize_t len = __Pyx_ssize_strlen(c_str); - if (unlikely(len < 0)) return NULL; - return __Pyx_PyUnicode_FromStringAndSize(c_str, len); -} -static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char* c_str) { - Py_ssize_t len = __Pyx_ssize_strlen(c_str); - if (unlikely(len < 0)) return NULL; - return PyByteArray_FromStringAndSize(c_str, len); -} -static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { - Py_ssize_t ignore; - return __Pyx_PyObject_AsStringAndSize(o, &ignore); -} -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT -#if !CYTHON_PEP393_ENABLED -static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { - char* defenc_c; - PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); - if (!defenc) return NULL; - defenc_c = PyBytes_AS_STRING(defenc); -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - { - char* end = defenc_c + PyBytes_GET_SIZE(defenc); - char* c; - for (c = defenc_c; c < end; c++) { - if ((unsigned char) (*c) >= 128) { - PyUnicode_AsASCIIString(o); - return NULL; - } - } - } -#endif - *length = PyBytes_GET_SIZE(defenc); - return defenc_c; -} -#else -static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { - if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - if (likely(PyUnicode_IS_ASCII(o))) { - *length = PyUnicode_GET_LENGTH(o); - return PyUnicode_AsUTF8(o); - } else { - PyUnicode_AsASCIIString(o); - return NULL; - } -#else - return PyUnicode_AsUTF8AndSize(o, length); -#endif -} -#endif -#endif -static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT - if ( -#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - __Pyx_sys_getdefaultencoding_not_ascii && -#endif - PyUnicode_Check(o)) { - return __Pyx_PyUnicode_AsStringAndSize(o, length); - } else -#endif -#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) - if (PyByteArray_Check(o)) { - *length = PyByteArray_GET_SIZE(o); - return PyByteArray_AS_STRING(o); - } else -#endif - { - char* result; - int r = PyBytes_AsStringAndSize(o, &result, length); - if (unlikely(r < 0)) { - return NULL; - } else { - return result; - } - } -} -static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { - int is_true = x == Py_True; - if (is_true | (x == Py_False) | (x == Py_None)) return is_true; - else return PyObject_IsTrue(x); -} -static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) { - int retval; - if (unlikely(!x)) return -1; - retval = __Pyx_PyObject_IsTrue(x); - Py_DECREF(x); - return retval; -} -static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { - __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result)); -#if PY_MAJOR_VERSION >= 3 - if (PyLong_Check(result)) { - if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "__int__ returned non-int (type " __Pyx_FMT_TYPENAME "). " - "The ability to return an instance of a strict subclass of int is deprecated, " - "and may be removed in a future version of Python.", - result_type_name)) { - __Pyx_DECREF_TypeName(result_type_name); - Py_DECREF(result); - return NULL; - } - __Pyx_DECREF_TypeName(result_type_name); - return result; - } -#endif - PyErr_Format(PyExc_TypeError, - "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")", - type_name, type_name, result_type_name); - __Pyx_DECREF_TypeName(result_type_name); - Py_DECREF(result); - return NULL; -} -static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { -#if CYTHON_USE_TYPE_SLOTS - PyNumberMethods *m; -#endif - const char *name = NULL; - PyObject *res = NULL; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x) || PyLong_Check(x))) -#else - if (likely(PyLong_Check(x))) -#endif - return __Pyx_NewRef(x); -#if CYTHON_USE_TYPE_SLOTS - m = Py_TYPE(x)->tp_as_number; - #if PY_MAJOR_VERSION < 3 - if (m && m->nb_int) { - name = "int"; - res = m->nb_int(x); - } - else if (m && m->nb_long) { - name = "long"; - res = m->nb_long(x); - } - #else - if (likely(m && m->nb_int)) { - name = "int"; - res = m->nb_int(x); - } - #endif -#else - if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { - res = PyNumber_Int(x); - } -#endif - if (likely(res)) { -#if PY_MAJOR_VERSION < 3 - if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { -#else - if (unlikely(!PyLong_CheckExact(res))) { -#endif - return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); - } - } - else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, - "an integer is required"); - } - return res; -} -static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { - Py_ssize_t ival; - PyObject *x; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_CheckExact(b))) { - if (sizeof(Py_ssize_t) >= sizeof(long)) - return PyInt_AS_LONG(b); - else - return PyInt_AsSsize_t(b); - } -#endif - if (likely(PyLong_CheckExact(b))) { - #if CYTHON_USE_PYLONG_INTERNALS - if (likely(__Pyx_PyLong_IsCompact(b))) { - return __Pyx_PyLong_CompactValue(b); - } else { - const digit* digits = __Pyx_PyLong_Digits(b); - const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b); - switch (size) { - case 2: - if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { - return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -2: - if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case 3: - if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { - return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -3: - if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case 4: - if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { - return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -4: - if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - } - } - #endif - return PyLong_AsSsize_t(b); - } - x = PyNumber_Index(b); - if (!x) return -1; - ival = PyInt_AsSsize_t(x); - Py_DECREF(x); - return ival; -} -static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) { - if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) { - return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o); -#if PY_MAJOR_VERSION < 3 - } else if (likely(PyInt_CheckExact(o))) { - return PyInt_AS_LONG(o); -#endif - } else { - Py_ssize_t ival; - PyObject *x; - x = PyNumber_Index(o); - if (!x) return -1; - ival = PyInt_AsLong(x); - Py_DECREF(x); - return ival; - } -} -static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { - return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False); -} -static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { - return PyInt_FromSize_t(ival); -} - - -/* #### Code section: utility_code_pragmas_end ### */ -#ifdef _MSC_VER -#pragma warning( pop ) -#endif - - - -/* #### Code section: end ### */ -#endif /* Py_PYTHON_H */ diff --git a/jcvi/assembly/chic.pyx b/jcvi/assembly/chic.pyx deleted file mode 100644 index cd526243..00000000 --- a/jcvi/assembly/chic.pyx +++ /dev/null @@ -1,105 +0,0 @@ -#cython: language_level=2, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True - -""" -Cythonized version of score_evaluate() in hic.py. - -Support three versions with different objective functions: -- score_evaluate_M: distance is defined as the distance between mid-points - between contigs. Maximize Sum(n_links / distance). -- score_evaluate_P: distance is defined as the sizes of interleaving contigs - plus the harmonic mean of all link distances. Maximize Sum(n_links / distance). -- score_evaluate_Q: distance is defined as the sizes of interleaving contigs - plus the actual link distances. Maximize Sum(1 / distance) for all links. - For performance consideration, we actually use a histogram to approximate - all link distances. See golden_array() in hic for details. -""" - -from __future__ import division -import numpy as np -cimport numpy as np -cimport cython -from cpython cimport array -import array - - -ctypedef np.int INT -DEF LIMIT = 10000000 -DEF BB = 12 -cdef int *GR = \ - [ 5778, 9349, 15127, 24476, - 39603, 64079, 103682, 167761, - 271443, 439204, 710647, 1149851] - - -def score_evaluate_M(array.array[int] tour, - np.ndarray[INT, ndim=1] tour_sizes=None, - np.ndarray[INT, ndim=2] tour_M=None): - cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - sizes_oo // 2 - - cdef double s = 0.0 - cdef int size = len(tour) - cdef int a, b, ia, ib - cdef int links - cdef double dist - for ia in range(size): - a = tour[ia] - for ib in range(ia + 1, size): - b = tour[ib] - links = tour_M[a, b] - if links == 0: - continue - dist = sizes_cum[ib] - sizes_cum[ia] - if dist > LIMIT: - break - s += links / dist - return s, - - -def score_evaluate_P(array.array[int] tour, - np.ndarray[INT, ndim=1] tour_sizes=None, - np.ndarray[INT, ndim=3] tour_P=None): - cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - - cdef double s = 0.0 - cdef int size = len(tour) - cdef int a, b, c, ia, ib - cdef double dist - for ia in range(size): - a = tour[ia] - for ib in range(ia + 1, size): - b = tour[ib] - dist = sizes_cum[ib - 1] - sizes_cum[ia] - if dist > LIMIT: - break - c = tour_P[a, b, 0] - if c == 0: - continue - s += c / (tour_P[a, b, 1] + dist) - return s, - - -def score_evaluate_Q(array.array[int] tour, - np.ndarray[INT, ndim=1] tour_sizes=None, - np.ndarray[INT, ndim=3] tour_Q=None): - cdef np.ndarray[INT, ndim=1] sizes_oo = tour_sizes[tour] - cdef np.ndarray[INT, ndim=1] sizes_cum = np.cumsum(sizes_oo) - - cdef double s = 0.0 - cdef int size = len(tour) - cdef int a, b, c, ia, ib, ic - cdef double dist - for ia in range(size): - a = tour[ia] - for ib in range(ia + 1, size): - b = tour[ib] - if tour_Q[a, b, 0] == -1: - continue - dist = sizes_cum[ib - 1] - sizes_cum[ia] - if dist > LIMIT: - break - for ic in range(BB): - c = tour_Q[a, b, ic] - s += c / (GR[ic] + dist) - return s, diff --git a/jcvi/assembly/coverage.py b/jcvi/assembly/coverage.py deleted file mode 100644 index da5aab99..00000000 --- a/jcvi/assembly/coverage.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Provide coverage QC for assembled sequences: -1. plot paired-end reads as curves -2. plot base coverage and mate coverage -3. plot gaps in the sequence (if any) -""" -from collections import defaultdict - -from ..apps.base import ActionDispatcher, logger, need_update, sh -from ..formats.base import BaseFile, must_open -from ..formats.bed import BedLine, sort -from ..formats.sizes import Sizes - - -class Coverage(BaseFile): - """ - Three-column .coverage file, often generated by `genomeCoverageBed -d` - contigID baseID coverage - """ - - def __init__(self, bedfile, sizesfile): - - bedfile = sort([bedfile]) - coveragefile = bedfile + ".coverage" - if need_update(bedfile, coveragefile): - cmd = "genomeCoverageBed" - cmd += " -bg -i {0} -g {1}".format(bedfile, sizesfile) - sh(cmd, outfile=coveragefile) - - self.sizes = Sizes(sizesfile).mapping - - filename = coveragefile - assert filename.endswith(".coverage") - super().__init__(filename) - - def get_plot_data(self, ctg, bins=None): - import numpy as np - from jcvi.algorithms.matrix import chunk_average - - fp = open(self.filename) - size = self.sizes[ctg] - - data = np.zeros((size,), dtype=np.int) - for row in fp: - seqid, start, end, cov = row.split() - if seqid != ctg: - continue - - start, end = int(start), int(end) - cov = int(cov) - data[start:end] = cov - - bases = np.arange(1, size + 1) - if bins: - window = size / bins - bases = bases[::window] - data = chunk_average(data, window) - - return bases, data - - -def main(): - - actions = (("posmap", "QC based on indexed posmap file"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def clone_name(s, ca=False): - """ - >>> clone_name("120038881639") - "0038881639" - >>> clone_name("GW11W6RK01DAJDWa") - "GW11W6RK01DAJDW" - """ - if not ca: - return s[:-1] - - if s[0] == "1": - return s[2:] - return s.rstrip("ab") - - -def bed_to_bedpe( - bedfile, bedpefile, pairsbedfile=None, matesfile=None, ca=False, strand=False -): - """ - This converts the bedfile to bedpefile, assuming the reads are from CA. - """ - fp = must_open(bedfile) - fw = must_open(bedpefile, "w") - if pairsbedfile: - fwpairs = must_open(pairsbedfile, "w") - - clones = defaultdict(list) - for row in fp: - b = BedLine(row) - name = b.accn - clonename = clone_name(name, ca=ca) - clones[clonename].append(b) - - if matesfile: - fp = open(matesfile) - libraryline = next(fp) - # 'library bes 37896 126916' - lib, name, smin, smax = libraryline.split() - assert lib == "library" - smin, smax = int(smin), int(smax) - logger.debug( - "Happy mates for lib {0} fall between {1} - {2}".format(name, smin, smax) - ) - - nbedpe = 0 - nspan = 0 - for clonename, blines in clones.items(): - nlines = len(blines) - if nlines == 2: - a, b = blines - aseqid, astart, aend = a.seqid, a.start, a.end - bseqid, bstart, bend = b.seqid, b.start, b.end - outcols = [aseqid, astart - 1, aend, bseqid, bstart - 1, bend, clonename] - if strand: - outcols.extend([0, a.strand, b.strand]) - print("\t".join(str(x) for x in outcols), file=fw) - nbedpe += 1 - elif nlines == 1: - (a,) = blines - aseqid, astart, aend = a.seqid, a.start, a.end - bseqid, bstart, bend = 0, 0, 0 - else: # More than two lines per pair - pass - - if pairsbedfile: - start = min(astart, bstart) if bstart > 0 else astart - end = max(aend, bend) if bend > 0 else aend - if aseqid != bseqid: - continue - - span = end - start + 1 - if (not matesfile) or (smin <= span <= smax): - print( - "\t".join(str(x) for x in (aseqid, start - 1, end, clonename)), - file=fwpairs, - ) - nspan += 1 - - fw.close() - logger.debug("A total of {0} bedpe written to `{1}`.".format(nbedpe, bedpefile)) - if pairsbedfile: - fwpairs.close() - logger.debug( - "A total of {0} spans written to `{1}`.".format(nspan, pairsbedfile) - ) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/gaps.py b/jcvi/assembly/gaps.py deleted file mode 100644 index 57824dbc..00000000 --- a/jcvi/assembly/gaps.py +++ /dev/null @@ -1,294 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Calculates gap statistics and manipulate gaps in assembly. -""" -import os.path as op -import sys - -from itertools import groupby - -from ..apps.base import ActionDispatcher, OptionParser, logger, need_update -from ..formats.bed import Bed, fastaFromBed -from ..formats.blast import BlastSlow -from ..formats.sizes import Sizes - - -def main(): - - actions = ( - ("flanks", "create sequences flanking the gaps"), - ("sizes", "compile gap sizes"), - ("estimate", "estimate gap sizes based on mates"), - ("annotate", "annotate AGP v2 file with linkage info"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def annotate(args): - """ - %prog annotate agpfile gaps.linkage.bed assembly.fasta - - Annotate AGP file with linkage info of `paired-end` or `map`. - File `gaps.linkage.bed` is generated by assembly.gaps.estimate(). - """ - from jcvi.formats.agp import AGP, bed, tidy - - p = OptionParser(annotate.__doc__) - p.add_argument("--minsize", default=200, help="Smallest component size") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - agpfile, linkagebed, assemblyfasta = args - linkagebed = Bed(linkagebed) - spannedgaps = set() - for b in linkagebed: - score = int(b.score) - if score == 0: - spannedgaps.add((b.accn, b.start, b.end)) - - agp = AGP(agpfile) - newagpfile = agpfile.rsplit(".", 1)[0] + ".linkage.agp" - newagp = open(newagpfile, "w") - contig_id = 0 - minsize = opts.minsize - for a in agp: - if not a.is_gap: - cs = a.component_span - if cs < minsize: - a.is_gap = True - a.component_type = "N" - a.gap_length = cs - a.gap_type = "scaffold" - a.linkage = "yes" - a.linkage_evidence = [] - else: - contig_id += 1 - a.component_id = "contig{0:04d}".format(contig_id) - a.component_beg = 1 - a.component_end = cs - a.component_type = "W" - - print(a, file=newagp) - continue - - gapinfo = (a.object, a.object_beg, a.object_end) - gaplen = a.gap_length - - if gaplen == 100 and gapinfo not in spannedgaps: - a.component_type = "U" - tag = "map" - else: - tag = "paired-ends" - - a.linkage_evidence.append(tag) - print(a, file=newagp) - - newagp.close() - logger.debug("Annotated AGP written to `%s`.", newagpfile) - - contigbed = assemblyfasta.rsplit(".", 1)[0] + ".contigs.bed" - bedfile = bed([newagpfile, "--nogaps", "--outfile=" + contigbed]) - - contigfasta = fastaFromBed(bedfile, assemblyfasta, name=True, stranded=True) - - tidy([newagpfile, contigfasta]) - - -def blast_to_twobeds(blastfile, rclip=1): - - key1 = lambda x: x.query - key2 = lambda x: x.query[:-rclip] if rclip else key1 - data = BlastSlow(blastfile) - OK = "OK" - - fw = open("after.bed", "w") - fwlabels = open("after.labels", "w") - for pe, lines in groupby(data, key=key2): - label = OK - lines = list(lines) - assert len(lines) in (1, 2) - - if len(lines) != 2: - label = "Singleton" - - else: - a, b = lines - - aquery, bquery = a.query, b.query - asubject, bsubject = a.subject, b.subject - if asubject != bsubject: - label = "Different chr {0}|{1}".format(asubject, bsubject) - - else: - astrand, bstrand = a.orientation, b.orientation - assert aquery[-1] == "L" and bquery[-1] == "R", str((aquery, bquery)) - - if astrand == "+" and bstrand == "+": - sstart, sstop = a.sstop + 1, b.sstart - 1 - - elif astrand == "-" and bstrand == "-": - sstart, sstop = b.sstop + 1, a.sstart - 1 - - else: - label = "Strand {0}|{1}".format(astrand, bstrand) - - if label == OK: - strand = "+" - label = sstop - sstart + 1 - - if sstart > sstop: - sstart, sstop = sstop, sstart - strand = "-" - label = -(sstop - sstart + 1) - - print( - "\t".join(str(x) for x in (asubject, sstart - 1, sstop, pe, strand)), - file=fw, - ) - - print("\t".join(str(x) for x in (pe, label)), file=fwlabels) - - fw.close() - fwlabels.close() - - return fwlabels.name - - -def sizes(args): - """ - %prog sizes gaps.bed a.fasta b.fasta - - Take the flanks of gaps within a.fasta, map them onto b.fasta. Compile the - results to the gap size estimates in b. The output is detailed below: - - Columns are: - 1. A scaffold - 2. Start position - 3. End position - 4. Gap identifier - 5. Gap size in A (= End - Start) - 6. Gap size in B (based on BLAST, see below) - - For each gap, I extracted the left and right sequence (mostly 2Kb, but can be shorter - if it runs into another gap) flanking the gap. The flanker names look like gap.00003L - and gap.00003R means the left and right flanker of this particular gap, respectively. - - The BLAST output is used to calculate the gap size. For each flanker sequence, I took - the best hit, and calculate the inner distance between the L match range and R range. - The two flankers must map with at least 98% identity, and in the same orientation. - - NOTE the sixth column in the list file is not always a valid number. Other values are: - - na: both flankers are missing in B - - Singleton: one flanker is missing - - Different chr: flankers map to different scaffolds - - Strand +|-: flankers map in different orientations - - Negative value: the R flanker map before L flanker - """ - from jcvi.formats.base import DictFile - from jcvi.apps.align import blast - - p = OptionParser(sizes.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - gapsbed, afasta, bfasta = args - pf = gapsbed.rsplit(".", 1)[0] - extfasta = pf + ".ext.fasta" - - if need_update(gapsbed, extfasta): - extbed, extfasta = flanks([gapsbed, afasta]) - - q = op.basename(extfasta).split(".")[0] - r = op.basename(bfasta).split(".")[0] - blastfile = "{0}.{1}.blast".format(q, r) - - if need_update([extfasta, bfasta], blastfile): - blastfile = blast([bfasta, extfasta, "--wordsize=50", "--pctid=98"]) - - labelsfile = blast_to_twobeds(blastfile) - labels = DictFile(labelsfile, delimiter="\t") - bed = Bed(gapsbed) - for b in bed: - b.score = b.span - accn = b.accn - print( - "\t".join( - ( - str(x) - for x in ( - b.seqid, - b.start - 1, - b.end, - accn, - b.score, - labels.get(accn, "na"), - ) - ) - ) - ) - - -def flanks(args): - """ - %prog flanks gaps.bed fastafile - - Create sequences flanking the gaps. - """ - p = OptionParser(flanks.__doc__) - p.add_argument( - "--extend", - default=2000, - type=int, - help="Extend seq flanking the gaps", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gapsbed, fastafile = args - Ext = opts.extend - sizes = Sizes(fastafile).mapping - - bed = Bed(gapsbed) - pf = gapsbed.rsplit(".", 1)[0] - extbed = pf + ".ext.bed" - fw = open(extbed, "w") - for i, b in enumerate(bed): - seqid = b.seqid - gapname = b.accn - size = sizes[seqid] - - prev_b = bed[i - 1] if i > 0 else None - next_b = bed[i + 1] if i + 1 < len(bed) else None - if prev_b and prev_b.seqid != seqid: - prev_b = None - if next_b and next_b.seqid != seqid: - next_b = None - - start = prev_b.end + 1 if prev_b else 1 - start, end = max(start, b.start - Ext), b.start - 1 - print( - "\t".join(str(x) for x in (b.seqid, start - 1, end, gapname + "L")), file=fw - ) - - end = next_b.start - 1 if next_b else size - start, end = b.end + 1, min(end, b.end + Ext) - print( - "\t".join(str(x) for x in (b.seqid, start - 1, end, gapname + "R")), file=fw - ) - fw.close() - - extfasta = fastaFromBed(extbed, fastafile, name=True) - return extbed, extfasta - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/geneticmap.py b/jcvi/assembly/geneticmap.py deleted file mode 100644 index 2720c3ae..00000000 --- a/jcvi/assembly/geneticmap.py +++ /dev/null @@ -1,714 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Use genetic map to break chimeric scaffolds, order and orient scaffolds onto -chromosomes. -""" -import os.path as op -import sys - -from itertools import combinations, groupby -from random import sample -from typing import Tuple - -import numpy as np -import seaborn as sns - -from ..apps.base import ActionDispatcher, OptionParser, logger, need_update -from ..algorithms.formula import calc_ldscore -from ..algorithms.matrix import symmetrize -from ..formats.base import BaseFile, LineFile, must_open, read_block -from ..formats.bed import Bed, fastaFromBed -from ..graphics.base import ( - Rectangle, - draw_cmap, - normalize_axes, - plt, - plot_heatmap, - savefig, -) - - -MSTheader = """population_type {0} -population_name LG -distance_function kosambi -cut_off_p_value 0.000001 -no_map_dist 10.0 -no_map_size 0 -missing_threshold {1} -estimation_before_clustering no -detect_bad_data yes -objective_function ML -number_of_loci {2} -number_of_individual {3} -""" - - -class BinMap(BaseFile, dict): - def __init__(self, filename): - super().__init__(filename) - - fp = open(filename) - for header, seq in read_block(fp, "group "): - lg = header.split()[-1] - self[lg] = [] - for s in seq: - if s.strip() == "" or s[0] == ";": - continue - marker, pos = s.split() - pos = int(float(pos) * 1000) - self[lg].append((marker, pos)) - - def print_to_bed(self, filename="stdout", switch=False, sep="."): - """Print the genetic map in the BED format. - - Args: - filename (str, optional): Output filename. Defaults to "stdout". - switch (bool, optional): Use linkage group as seqid. Defaults to False. - sep (str, optional): Separator that delimits scaffold name and position. Defaults to ".". - """ - fw = must_open(filename, "w") - for lg, markers in sorted(self.items()): - for marker, pos in markers: - if not switch: - line = (lg, pos, pos + 1, marker) - else: - seqid_spos = marker.rsplit(sep, 1) - if len(seqid_spos) != 2: - logger.error( - "Error: `%s` must be in the form e.g. `name%sposition`", - marker, - sep, - ) - continue - seqid, spos = seqid_spos - spos = int(spos) - marker = "{0}:{1}".format(lg, pos / 1000.0) - line = (seqid, spos - 1, spos, marker) - print("\t".join(str(x) for x in line), file=fw) - fw.close() - - -class MSTMapLine(object): - def __init__(self, row, startidx=3): - args = row.split() - self.id = args[0] - self.seqid, pos = self.id.split(".") - self.pos = int(pos) - self.genotype = "".join(args[startidx:]) - - def __len__(self): - return len(self.genotype) - - def __str__(self): - return "{0}: {1}".format(self.id, self.genotype) - - @property - def bedline(self): - return "\t".join(str(x) for x in (self.seqid, self.pos - 1, self.pos, self.id)) - - -class MSTMap(LineFile): - def __init__(self, filename): - super().__init__(filename) - fp = open(filename) - startidx = 1 - for row in fp: - if row.startswith("locus_name"): - if row.split()[1] == "seqid": - startidx = 3 - self.header = row.split() - break - - for row in fp: - self.append(MSTMapLine(row, startidx=startidx)) - - self.nmarkers = len(self) - self.nind = len(self[0].genotype) - logger.debug( - "Map contains %d markers in %d individuals", self.nmarkers, self.nind - ) - - -class MSTMatrix(object): - def __init__(self, matrix, markerheader, population_type, missing_threshold): - self.matrix = matrix - self.markerheader = markerheader - self.population_type = population_type - self.missing_threshold = missing_threshold - self.ngenotypes = len(matrix) - self.nind = len(markerheader) - 1 - assert self.nind == len(matrix[0]) - 1 - logger.debug( - "Imported %d markers and %d individuals.", self.ngenotypes, self.nind - ) - - def write(self, filename="stdout", header=True): - fw = must_open(filename, "w") - if header: - print( - MSTheader.format( - self.population_type, - self.missing_threshold, - self.ngenotypes, - self.nind, - ), - file=fw, - ) - print("\t".join(self.markerheader), file=fw) - for m in self.matrix: - print("\t".join(m), file=fw) - - -def main(): - actions = ( - ("breakpoint", "find scaffold breakpoints using genetic map"), - ("heatmap", "calculate pairwise linkage disequilibrium"), - ("bed", "convert MSTmap output to bed format"), - ("fasta", "extract markers based on map"), - ("anchor", "anchor scaffolds based on map"), - ("rename", "rename markers according to the new mapping locations"), - ("header", "rename lines in the map header"), - # Plot genetic map - ("blat", "make ALLMAPS input csv based on sequences"), - ("dotplot", "make dotplot between chromosomes and linkage maps"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def blat(args): - """ - %prog blat map1.txt ref.fasta - - Make ALLMAPS input csv based on sequences. The tab-delimited txt file - include: name, LG, position, sequence. - """ - from jcvi.formats.base import is_number - from jcvi.formats.blast import best as blast_best, bed as blast_bed - from jcvi.apps.align import blat as blat_align - - p = OptionParser(blat.__doc__) - _, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - maptxt, ref = args - pf = maptxt.rsplit(".", 1)[0] - register = {} - fastafile = pf + ".fasta" - fp = open(maptxt) - fw = open(fastafile, "w") - for row in fp: - name, lg, pos, seq = row.split() - if not is_number(pos): - continue - register[name] = (pf + "-" + lg, pos) - print(">{0}\n{1}\n".format(name, seq), file=fw) - fw.close() - - blatfile = blat_align([ref, fastafile]) - bestfile = blast_best([blatfile]) - bedfile = blast_bed([bestfile]) - b = Bed(bedfile).order - - pf = ".".join((op.basename(maptxt).split(".")[0], op.basename(ref).split(".")[0])) - csvfile = pf + ".csv" - fp = open(maptxt) - fw = open(csvfile, "w") - for row in fp: - name, lg, pos, seq = row.split() - if name not in b: - continue - bbi, bb = b[name] - scaffold, scaffold_pos = bb.seqid, bb.start - print(",".join(str(x) for x in (scaffold, scaffold_pos, lg, pos)), file=fw) - fw.close() - - -def dotplot(args): - """ - %prog dotplot map.csv ref.fasta - - Make dotplot between chromosomes and linkage maps. - The input map is csv formatted, for example: - - ScaffoldID,ScaffoldPosition,LinkageGroup,GeneticPosition - scaffold_2707,11508,1,0 - scaffold_2707,11525,1,1.2 - """ - from natsort import natsorted - from jcvi.assembly.allmaps import CSVMapLine - from jcvi.formats.sizes import Sizes - from jcvi.graphics.base import shorten - from jcvi.graphics.dotplot import ( - plt, - savefig, - markup, - normalize_axes, - downsample, - plot_breaks_and_labels, - thousands, - ) - - p = OptionParser(dotplot.__doc__) - p.set_outfile(outfile=None) - opts, args, iopts = p.set_image_options( - args, figsize="8x8", style="dark", dpi=90, cmap="copper" - ) - - if len(args) != 2: - sys.exit(not p.print_help()) - - csvfile, fastafile = args - sizes = natsorted(Sizes(fastafile).mapping.items()) - seen = set() - raw_data = [] - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) # the whole canvas - ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # the dot plot - - fp = must_open(csvfile) - for row in fp: - m = CSVMapLine(row) - seen.add(m.seqid) - raw_data.append(m) - - # X-axis is the genome assembly - ctgs, ctg_sizes = zip(*sizes) - xsize = sum(ctg_sizes) - qb = list(np.cumsum(ctg_sizes)) - qbreaks = list(zip(ctgs, [0] + qb, qb)) - qstarts = dict(zip(ctgs, [0] + qb)) - - # Y-axis is the map - key = lambda x: x.lg - raw_data.sort(key=key) - ssizes = {} - for lg, d in groupby(raw_data, key=key): - ssizes[lg] = max([x.cm for x in d]) - ssizes = natsorted(ssizes.items()) - lgs, lg_sizes = zip(*ssizes) - ysize = sum(lg_sizes) - sb = list(np.cumsum(lg_sizes)) - sbreaks = list(zip([("LG" + x) for x in lgs], [0] + sb, sb)) - sstarts = dict(zip(lgs, [0] + sb)) - - # Re-code all the scatter dots - data = [ - (qstarts[x.seqid] + x.pos, sstarts[x.lg] + x.cm, "g") - for x in raw_data - if (x.seqid in qstarts) - ] - npairs = len(data) - data = downsample(data) - - x, y, c = zip(*data) - ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) - - # Flip X-Y label - gy, gx = op.basename(csvfile).split(".")[:2] - gx, gy = shorten(gx, maxchar=30), shorten(gy, maxchar=30) - xlim, ylim = plot_breaks_and_labels( - fig, - root, - ax, - gx, - gy, - xsize, - ysize, - qbreaks, - sbreaks, - usetex=iopts.usetex, - ) - ax.set_xlim(xlim) - ax.set_ylim(ylim) - - title = "Alignment: {} vs {}".format(gx, gy) - title += " ({} markers)".format(thousands(npairs)) - root.set_title(markup(title), x=0.5, y=0.96, color="k") - logger.debug(title) - normalize_axes(root) - - image_name = opts.outfile or (csvfile.rsplit(".", 1)[0] + "." + iopts.format) - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - fig.clear() - - -def read_subsampled_matrix(mstmap: str, subsample: int) -> Tuple[np.ndarray, str, int]: - """ - Read the subsampled matrix from file if it exists, otherwise calculate it. - """ - data = MSTMap(mstmap) - - # Take random subsample while keeping marker order - if subsample < data.nmarkers: - data = [data[x] for x in sorted(sample(range(len(data)), subsample))] - else: - logger.debug("Use all markers, --subsample ignored") - - nmarkers = len(data) - markerbedfile = mstmap + ".subsample.bed" - ldmatrix = mstmap + ".subsample.matrix" - if need_update(mstmap, (ldmatrix, markerbedfile)): - with open(markerbedfile, "w", encoding="utf-8") as fw: - print("\n".join(x.bedline for x in data), file=fw) - logger.debug( - "Write marker set of size %d to file `%s`.", nmarkers, markerbedfile - ) - - M = np.zeros((nmarkers, nmarkers), dtype=float) - for i, j in combinations(range(nmarkers), 2): - a = data[i] - b = data[j] - M[i, j] = calc_ldscore(a.genotype, b.genotype) - - M = symmetrize(M) - - logger.debug("Write LD matrix to file `%s`.", ldmatrix) - M.tofile(ldmatrix) - else: - nmarkers = len(Bed(markerbedfile)) - M = np.fromfile(ldmatrix, dtype=float).reshape(nmarkers, nmarkers) - logger.debug("LD matrix `%s` exists (%dx%d).", ldmatrix, nmarkers, nmarkers) - - return M, markerbedfile, nmarkers - - -def draw_geneticmap_heatmap(root, ax, mstmap: str, subsample: int): - """ - Draw the heatmap of the genetic map. - """ - M, markerbedfile, nmarkers = read_subsampled_matrix(mstmap, subsample) - - # Plot chromosomes breaks - b = Bed(markerbedfile) - xsize = len(b) - extent = (0, nmarkers) - chr_labels = [] - ignore_size = 20 - - breaks = [] - for seqid, beg, end in b.get_breaks(): - ignore = abs(end - beg) < ignore_size - pos = (beg + end) / 2 - chr_labels.append((seqid, pos, ignore)) - if ignore: - continue - breaks.append(end) - - cmap = sns.color_palette("rocket", as_cmap=True) - plot_heatmap(ax, M, breaks, cmap=cmap, plot_breaks=True) - - # Plot chromosome labels - for label, pos, ignore in chr_labels: - if not ignore: - xpos = 0.1 + pos * 0.8 / xsize - root.text( - xpos, 0.91, label, ha="center", va="bottom", rotation=45, color="grey" - ) - ypos = 0.9 - pos * 0.8 / xsize - root.text(0.09, ypos, label, ha="right", va="center", color="grey") - - ax.set_xlim(extent) - ax.set_ylim((nmarkers, 0)) # Invert y-axis - ax.set_axis_off() - - draw_cmap(root, r"Pairwise LD ($r^2$)", 0, 1, cmap=cmap) - - root.add_patch(Rectangle((0.1, 0.1), 0.8, 0.8, fill=False, ec="k", lw=2)) - m = mstmap.split(".")[0] - root.text(0.5, 0.06, f"Linkage Disequilibrium between {m} markers", ha="center") - - normalize_axes(root) - - -def heatmap(args): - """ - %prog heatmap map - - Calculate pairwise linkage disequilibrium given MSTmap. - """ - p = OptionParser(heatmap.__doc__) - p.add_argument( - "--subsample", - default=1000, - type=int, - help="Subsample markers to speed up", - ) - opts, args, iopts = p.set_image_options(args, figsize="8x8") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (mstmap,) = args - - plt.rcParams["axes.linewidth"] = 0 - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - ax = fig.add_axes((0.1, 0.1, 0.8, 0.8)) # the heatmap - - draw_geneticmap_heatmap(root, ax, mstmap, opts.subsample) - - pf = mstmap.split(".")[0] - image_name = pf + ".subsample" + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def header(args): - """ - %prog header map conversion_table - - Rename lines in the map header. The mapping of old names to new names are - stored in two-column `conversion_table`. - """ - from jcvi.formats.base import DictFile - - p = OptionParser(header.__doc__) - p.add_argument("--prefix", default="", help="Prepend text to line number") - p.add_argument("--ids", help="Write ids to file") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - mstmap, conversion_table = args - data = MSTMap(mstmap) - hd = data.header - conversion = DictFile(conversion_table) - newhd = [opts.prefix + conversion.get(x, x) for x in hd] - - print("\t".join(hd)) - print("--->") - print("\t".join(newhd)) - - ids = opts.ids - if ids: - fw = open(ids, "w") - print("\n".join(newhd), file=fw) - fw.close() - - -def rename(args): - """ - %prog rename map markers.bed > renamed.map - - Rename markers according to the new mapping locations. - """ - p = OptionParser(rename.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - mstmap, bedfile = args - markersbed = Bed(bedfile) - markers = markersbed.order - - data = MSTMap(mstmap) - header = data.header - header = [header[0]] + ["seqid", "start"] + header[1:] - renamed = [] - for b in data: - m, geno = b.id, b.genotype - om = m - if m not in markers: - m = m.rsplit(".", 1)[0] - if m not in markers: - continue - - i, mb = markers[m] - renamed.append([om, mb.seqid, mb.start, "\t".join(list(geno))]) - - renamed.sort(key=lambda x: (x[1], x[2])) - fw = must_open(opts.outfile, "w") - print("\t".join(header), file=fw) - for d in renamed: - print("\t".join(str(x) for x in d), file=fw) - - -def anchor(args): - """ - %prog anchor map.bed markers.blast > anchored.bed - - Anchor scaffolds based on map. - """ - from jcvi.formats.blast import bed - - p = OptionParser(anchor.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - mapbed, blastfile = args - bedfile = bed([blastfile]) - markersbed = Bed(bedfile) - markers = markersbed.order - - mapbed = Bed(mapbed, sorted=False) - for b in mapbed: - m = b.accn - if m not in markers: - continue - - i, mb = markers[m] - new_accn = "{0}:{1}-{2}".format(mb.seqid, mb.start, mb.end) - b.accn = new_accn - print(b) - - -def bed(args): - """ - %prog fasta map.out - - Convert MSTMAP output into bed format. - """ - p = OptionParser(bed.__doc__) - p.add_argument( - "--switch", - default=False, - action="store_true", - help="Switch reference and aligned map elements", - ) - p.add_argument( - "--sep", - default=".", - help="Separator that is used to delimit scaffold and position in the marker name", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (mapout,) = args - pf = mapout.split(".")[0] - mapbed = pf + ".bed" - bm = BinMap(mapout) - bm.print_to_bed(mapbed, switch=opts.switch, sep=opts.sep) - - return mapbed - - -def fasta(args): - """ - %prog fasta map.out scaffolds.fasta - - Extract marker sequences based on map. - """ - from jcvi.formats.sizes import Sizes - - p = OptionParser(fasta.__doc__) - p.add_argument( - "--extend", - default=1000, - type=int, - help="Extend seq flanking the gaps", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - mapout, sfasta = args - Flank = opts.extend - pf = mapout.split(".")[0] - mapbed = pf + ".bed" - bm = BinMap(mapout) - bm.print_to_bed(mapbed) - - bed = Bed(mapbed, sorted=False) - markersbed = pf + ".markers.bed" - fw = open(markersbed, "w") - sizes = Sizes(sfasta).mapping - for b in bed: - accn = b.accn - scf, pos = accn.split(".") - pos = int(pos) - start = max(0, pos - Flank) - end = min(pos + Flank, sizes[scf]) - print("\t".join(str(x) for x in (scf, start, end, accn)), file=fw) - - fw.close() - - fastaFromBed(markersbed, sfasta, name=True) - - -def hamming_distance(a, b, ignore=None): - dist = 0 - for x, y in zip(a, b): - if ignore and ignore in (x, y): - continue - if x != y: - dist += 1 - return dist - - -OK, BREAK, END = range(3) - - -def check_markers(a, b, maxdiff): - if a.seqid != b.seqid: - return END, None - diff = hamming_distance(a.genotype, b.genotype, ignore="-") - max_allowed = len(a) * maxdiff - if diff <= max_allowed: - return OK, None - - return BREAK, (a.seqid, a.pos, b.pos) - - -def breakpoint(args): - """ - %prog breakpoint mstmap.input > breakpoints.bed - - Find scaffold breakpoints using genetic map. Use variation.vcf.mstmap() to - generate the input for this routine. - """ - from more_itertools import pairwise - - p = OptionParser(breakpoint.__doc__) - p.add_argument( - "--diff", - default=0.1, - type=float, - help="Maximum ratio of differences allowed", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (mstmap,) = args - diff = opts.diff - data = MSTMap(mstmap) - - # Remove singleton markers (avoid double cross-over) - good = [] - nsingletons = 0 - for i in range(1, len(data) - 1): - a = data[i] - left_label, left_rr = check_markers(data[i - 1], a, diff) - right_label, right_rr = check_markers(a, data[i + 1], diff) - - if left_label == BREAK and right_label == BREAK: - nsingletons += 1 - continue - - good.append(a) - - logger.debug("A total of %d singleton markers removed.", nsingletons) - - for a, b in pairwise(good): - label, rr = check_markers(a, b, diff) - if label == BREAK: - print("\t".join(str(x) for x in rr)) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/goldenpath.py b/jcvi/assembly/goldenpath.py deleted file mode 100644 index 5108a5c6..00000000 --- a/jcvi/assembly/goldenpath.py +++ /dev/null @@ -1,1192 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Procedures to validate and update golden path of a genome assembly. This relies -heavily on formats.agp, and further includes several algorithms, e.g. overlap -detection. -""" -import os -import os.path as op -import shutil -import sys - -from copy import deepcopy -from functools import lru_cache -from itertools import groupby - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - cleanup, - logger, - mkdir, - need_update, - popen, - sh, -) -from ..apps.fetch import entrez -from ..apps.grid import WriteJobs -from ..formats.agp import AGP, TPF, build, get_phase, reindex, tidy -from ..formats.base import BaseFile, must_open -from ..formats.blast import BlastLine, BlastSlow -from ..formats.coords import Overlap_types -from ..formats.fasta import Fasta, SeqIO - - -GoodPct = 98 -GoodOverlap = 200 -GoodOverhang = 2000 - - -class Cutoff(object): - def __init__(self, pctid=GoodPct, overlap=GoodOverlap, hang=GoodOverhang): - self.pctid = pctid - self.overlap = overlap - self.hang = hang - - def __str__(self): - return "Configuration: PCTID={} OVERLAP={} HANG={}".format( - self.pctid, self.overlap, self.hang - ) - - -class CLR(object): - def __init__(self, id, size, orientation="+"): - self.id = id - self.start = 1 - self.end = size - if orientation == "?": - orientation = "+" - assert orientation in ("+", "-") - self.orientation = orientation - - def __str__(self): - return "{}: {}-{}({})".format(self.id, self.start, self.end, self.orientation) - - @property - def is_valid(self): - return self.start < self.end - - @classmethod - def from_agpline(cls, a): - c = CLR(a.component_id, 0, a.orientation) - c.start = a.component_beg - c.end = a.component_end - return c - - -class Overlap(object): - def __init__(self, blastline, asize, bsize, cutoff, qreverse=False): - - b = blastline - aid = b.query - bid = b.subject - - self.aid = aid.split("|")[3] if aid.count("|") >= 3 else aid - self.bid = bid.split("|")[3] if bid.count("|") >= 3 else bid - self.asize = asize - self.bsize = bsize - - self.qstart = b.qstart - self.qstop = b.qstop - self.sstart = b.sstart - self.sstop = b.sstop - - self.pctid = b.pctid - self.hitlen = b.hitlen - self.orientation = b.orientation - - self.cutoff = cutoff - self.qreverse = qreverse - self.blastline = b - - def __str__(self): - ov = Overlap_types[self.otype] - s = "{0} - {1}: {2} ".format(self.aid, self.bid, ov) - s += "Overlap: {0} Identity: {1}% Orientation: {2}".format( - self.hitlen, self.pctid, self.orientation - ) - return s - - @property - def swapped(self): - blastline = self.blastline.swapped - asize = self.asize - bsize = self.bsize - _, bo = self.get_ao_bo() - qreverse = bo == "-" - return Overlap(blastline, bsize, asize, self.cutoff, qreverse=qreverse) - - @property - def certificateline(self): - terminal_tag = "Terminal" if self.isTerminal else "Non-terminal" - return "\t".join( - str(x) - for x in ( - self.bid, - self.asize, - self.qstart, - self.qstop, - self.orientation, - terminal_tag, - ) - ) - - @property - def isTerminal(self): - return self.isGoodQuality and self.otype in (1, 2) - - @property - def isGoodQuality(self): - cutoff = self.cutoff - return self.hitlen >= cutoff.overlap and self.pctid >= cutoff.pctid - - def get_hangs(self): - r""" - Determine the type of overlap given query, ref alignment coordinates - Consider the following alignment between sequence a and b: - - aLhang \ / aRhang - \------------/ - /------------\ - bLhang / \ bRhang - - Terminal overlap: a before b, b before a - Contain overlap: a in b, b in a - """ - aLhang, aRhang = self.qstart - 1, self.asize - self.qstop - bLhang, bRhang = self.sstart - 1, self.bsize - self.sstop - if self.orientation == "-": - bLhang, bRhang = bRhang, bLhang - if self.qreverse: - aLhang, aRhang = aRhang, aLhang - bLhang, bRhang = bRhang, bLhang - - return aLhang, aRhang, bLhang, bRhang - - def update_clr(self, aclr, bclr): - """ - Zip the two sequences together, using "left-greedy" rule - - ============= seqA - |||| - ====(===============) seqB - """ - print(aclr, bclr, file=sys.stderr) - otype = self.otype - - if otype == 1: - if aclr.orientation == "+": - aclr.end = self.qstop - else: - aclr.start = self.qstart - if bclr.orientation == "+": - bclr.start = self.sstop + 1 - else: - bclr.end = self.sstart - 1 - - elif otype == 3: - aclr.start = aclr.end - - elif otype == 4: - bclr.start = bclr.end - - print(aclr, bclr, file=sys.stderr) - - def get_ao_bo(self): - ao = "-" if self.qreverse else "+" - bo = ao if self.orientation == "+" else {"+": "-", "-": "+"}[ao] - return ao, bo - - def anneal(self, aclr, bclr): - ao, bo = self.get_ao_bo() - - # Requirement: end-to-end join in correct order and orientation - can_anneal = self.otype in (1, 3, 4) and (ao, bo) == ( - aclr.orientation, - bclr.orientation, - ) - if not can_anneal: - print( - "* Cannot anneal! (otype={0}|{1}{2}|{3}{4})".format( - self.otype, ao, bo, aclr.orientation, bclr.orientation - ), - file=sys.stderr, - ) - return False - - self.update_clr(aclr, bclr) - return True - - def print_graphic(self): - """ - >>>>>>>>>>>>>>>>>>> seqA (alen) - |||||||| - <<<<<<<<<<<<<<<<<<<<< seqB (blen) - """ - aLhang, aRhang, bLhang, bRhang = self.get_hangs() - - achar = ">" - bchar = "<" if self.orientation == "-" else ">" - if self.qreverse: - achar = "<" - bchar = {">": "<", "<": ">"}[bchar] - - print(aLhang, aRhang, bLhang, bRhang, file=sys.stderr) - width = 50 # Canvas - hitlen = self.hitlen - lmax = max(aLhang, bLhang) - rmax = max(aRhang, bRhang) - bpwidth = lmax + hitlen + rmax - ratio = width * 1.0 / bpwidth - - _ = lambda x: int(round(x * ratio, 0)) - a1, a2 = _(aLhang), _(aRhang) - b1, b2 = _(bLhang), _(bRhang) - hit = max(_(hitlen), 1) - - msg = " " * max(b1 - a1, 0) - msg += achar * (a1 + hit + a2) - msg += " " * (width - len(msg) + 2) - msg += "{0} ({1})".format(self.aid, self.asize) - print(msg, file=sys.stderr) - - msg = " " * max(a1, b1) - msg += "|" * hit - print(msg, file=sys.stderr) - - msg = " " * max(a1 - b1, 0) - msg += bchar * (b1 + hit + b2) - msg += " " * (width - len(msg) + 2) - msg += "{0} ({1})".format(self.bid, self.bsize) - print(msg, file=sys.stderr) - print(self, file=sys.stderr) - - @property - def otype(self): - if not self.isGoodQuality: - return 0 - - aLhang, aRhang, bLhang, bRhang = self.get_hangs() - - s1 = aRhang + bLhang - s2 = aLhang + bRhang - s3 = aLhang + aRhang - s4 = bLhang + bRhang - ms = min(s1, s2, s3, s4) - if ms > self.cutoff.hang: - type = 0 - elif ms == s1: - type = 1 # a ~ b - elif ms == s2: - type = 2 # b ~ a - elif ms == s3: - type = 3 # a in b - elif ms == s4: - type = 4 # b in a - else: - assert 0 - - return type - - -class CertificateLine(object): - """ - North chr1 2 0 AC229737.8 telomere 58443 - South chr1 2 1 AC229737.8 AC202463.29 58443 37835 58443 + Non-terminal - """ - - def __init__(self, line): - args = line.split() - self.tag = args[0] - self.chr = args[1] - self.aphase = int(args[2]) - self.bphase = int(args[3]) - self.aid = args[4] - self.bid = args[5] - self.asize = int(args[6]) - self.is_no_overlap = False - - if len(args) == 7: - self.is_gap = True - return - - self.is_gap = False - - if len(args) == 8: - assert args[7] == "None" - self.is_no_overlap = True - self.terminal = "Non-terminal" - return - - self.astart = int(args[7]) - self.astop = int(args[8]) - self.orientation = args[9] - self.terminal = args[10] - - @property - def isTerminal(self): - return self.terminal == "Terminal" - - def __str__(self): - ar = [ - self.tag, - self.chr, - self.aphase, - self.bphase, - self.aid, - self.bid, - self.asize, - ] - - if self.is_no_overlap: - ar += ["None"] - elif not self.is_gap: - ar += [self.astart, self.astop, self.orientation, self.terminal] - - return "\t".join(str(x) for x in ar) - - -class Certificate(BaseFile): - - gapsize = 100000 - gaps = dict( - telomere=gapsize, centromere=gapsize, contig=gapsize, clone=50000, fragment=5000 - ) - - def __init__(self, filename): - - super().__init__(filename) - - fp = open(filename) - self.lines = [CertificateLine(x) for x in fp.readlines()] - - def write(self, filename): - fw = must_open(filename, "w") - for b in self.lines: - print(b, file=fw) - - def get_agp_gap(self, gap_type="contig"): - gap_length = Certificate.gaps[gap_type] - linkage = "yes" if gap_type in ("fragment", "clone") else "no" - - return ["N", gap_length, gap_type, linkage, ""] - - def write_AGP(self, filename, orientationguide={}): - """ - For each component, we have two overlaps: North and South. - - ======= - |||| South - ====(=================) Current BAC - North |||| - =============== - - For the case that says "Non-terminal", the overlap will not be - considered. North-South would suggest a '+' orientation, South-North - would suggest a '-' orientation. In most cases, unless the overlap - involves phase1 BAC, the selected range will be shown as the brackets - above - exclude North overlap, and include South overlap (aka the - "left-greedy" rule). - """ - fw = must_open(filename, "w") - for aid, bb in groupby(self.lines, key=lambda x: x.aid): - bb = list(bb) - north, south = bb - aid = north.aid - assert aid == south.aid - - aphase = north.aphase - chr = north.chr - size = north.asize - ar = [chr, 0, 0, 0] - - northline = southline = None - northrange = southrange = None - - # Warn if adjacent components do not have valid overlaps - if south.is_no_overlap: - print(south, file=sys.stderr) - - # Most gaps, except telomeres occur twice, so only do the "North" - if north.is_gap: - bar = ar + self.get_agp_gap(north.bid) - northline = "\t".join(str(x) for x in bar) - else: - if north.isTerminal: - northrange = north.astart, north.astop - - if south.is_gap: - if south.bid == "telomere": - bar = ar + self.get_agp_gap(south.bid) - southline = "\t".join(str(x) for x in bar) - else: - if south.isTerminal: - southrange = south.astart, south.astop - else: - bar = ar + self.get_agp_gap("fragment") - southline = "\t".join(str(x) for x in bar) - - # Determine the orientation and clear range for the current BAC - clr = [1, size] - orientation = sorientation = None - if northrange: - start, stop = northrange - Lhang = start - 1 - Rhang = size - stop - - orientation = "+" if Lhang < Rhang else "-" - if north.bphase == 1 and north.bphase < aphase: - if Lhang < Rhang: # North overlap at 5` - clr[0] = start - else: - clr[1] = stop - # Override left-greedy (also see below) - else: - if Lhang < Rhang: - clr[0] = stop + 1 - else: - clr[1] = start - 1 - - if southrange: - start, stop = southrange - Lhang = start - 1 - Rhang = size - stop - - sorientation = "+" if Lhang > Rhang else "-" - # Override left-greedy (also see above) - if aphase == 1 and aphase < south.bphase: - if Lhang < Rhang: # South overlap at 5` - clr[0] = stop + 1 - else: - clr[1] = start - 1 - else: - if Lhang < Rhang: - clr[0] = start - else: - clr[1] = stop - - if orientation: - if sorientation: - try: - assert ( - orientation == sorientation - ), "Orientation conflicts:\n{0}\n{1}".format(north, south) - except AssertionError as e: - logger.debug(e) - else: - if sorientation: - orientation = sorientation - else: # Both overlaps fail to define orientation - orientation = orientationguide.get(aid, "+") - - component_type = "D" if aphase in (1, 2) else "F" - bar = ar + [component_type, aid, clr[0], clr[1], orientation] - cline = "\t".join(str(x) for x in bar) - - if northline: - print(northline, file=fw) - print(cline, file=fw) - if southline: - print(southline, file=fw) - - fw.close() - - reindex([filename, "--inplace"]) - - -def main(): - - actions = ( - ("bes", "confirm the BES mapping"), - ("flip", "flip the FASTA sequences according to a set of references"), - ("overlap", "check terminal overlaps between two records"), - ("batchoverlap", "check terminal overlaps for many pairs"), - ("neighbor", "check neighbors of a component in agpfile"), - ("blast", "blast a component to componentpool"), - ("certificate", "make certificates for all overlaps in agpfile"), - ("agp", "make agpfile based on certificates"), - ("anneal", "merge adjacent contigs and make new agpfile"), - ("dedup", "remove redundant contigs with cdhit"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def dedup(args): - """ - %prog dedup scaffolds.fasta - - Remove redundant contigs with CD-HIT. This is run prior to - assembly.sspace.embed(). - """ - from jcvi.formats.fasta import gaps - from jcvi.apps.cdhit import deduplicate, ids - - p = OptionParser(dedup.__doc__) - p.set_align(pctid=GoodPct) - p.set_mingap(default=10) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (scaffolds,) = args - mingap = opts.mingap - splitfile, oagpfile, cagpfile = gaps( - [scaffolds, "--split", "--mingap={0}".format(mingap)] - ) - - dd = splitfile + ".cdhit" - clstrfile = dd + ".clstr" - idsfile = dd + ".ids" - if need_update(splitfile, clstrfile): - deduplicate([splitfile, "--pctid={0}".format(opts.pctid)]) - if need_update(clstrfile, idsfile): - ids([clstrfile]) - - agp = AGP(cagpfile) - reps = set(x.split()[-1] for x in open(idsfile)) - pf = scaffolds.rsplit(".", 1)[0] - dedupagp = pf + ".dedup.agp" - fw = open(dedupagp, "w") - - ndropped = ndroppedbases = 0 - for a in agp: - if not a.is_gap and a.component_id not in reps: - span = a.component_span - logger.debug("Drop component {0} ({1})".format(a.component_id, span)) - ndropped += 1 - ndroppedbases += span - continue - print(a, file=fw) - fw.close() - - logger.debug( - "Dropped components: {0}, Dropped bases: {1}".format(ndropped, ndroppedbases) - ) - logger.debug("Deduplicated file written to `{0}`.".format(dedupagp)) - - tidyagp = tidy([dedupagp, splitfile]) - dedupfasta = pf + ".dedup.fasta" - build([tidyagp, dd, dedupfasta]) - - return dedupfasta - - -def get_shred_id(id): - """ - >>> get_shred_id("ca-bacs.5638.frag11.22000-23608") - ("ca-bacs.5638", 11) - """ - try: - parts = id.split(".") - aid = ".".join(parts[:2]) - fid = int(parts[2].replace("frag", "")) - except: - aid, fid = None, None - return aid, fid - - -def is_adjacent_shreds(a, b): - aid, bid = a.component_id, b.component_id - ao, bo = a.orientation, b.orientation - if ao != bo: - return False - - ai, af = get_shred_id(aid) - bi, bf = get_shred_id(bid) - if ai is None or bi is None: - return False - - # Same sequence, with fragment id offset by one - return ai == bi and abs(af - bf) == 1 - - -def overlap_blastline_writer(oopts): - o = overlap(oopts) - if not o: - return "" - - return str(o.blastline) - - -def get_overlap_opts(aid, bid, qreverse, outdir, opts): - oopts = [ - aid, - bid, - "--suffix", - "fa", - "--dir", - outdir, - "--pctid={0}".format(opts.pctid), - "--hitlen={0}".format(opts.hitlen), - ] - if qreverse: - oopts += ["--qreverse"] - return oopts - - -def populate_blastfile(blastfile, agp, outdir, opts): - assert not op.exists(blastfile) - all_oopts = [] - for a, b, qreverse in agp.iter_paired_components(): - aid = a.component_id - bid = b.component_id - oopts = get_overlap_opts(aid, bid, qreverse, outdir, opts) - all_oopts.append(oopts) - - pool = WriteJobs(overlap_blastline_writer, all_oopts, blastfile, cpus=opts.cpus) - pool.run() - - -def anneal(args): - """ - %prog anneal agpfile contigs.fasta - - Merge adjacent overlapping contigs and make new AGP file. - - By default it will also anneal lines like these together (unless --nozipshreds): - scaffold4 1 1608 1 W ca-bacs.5638.frag11.22000-23608 1 1608 - - scaffold4 1609 1771 2 N 163 scaffold yes paired-ends - scaffold4 1772 3771 3 W ca-bacs.5638.frag10.20000-22000 1 2000 - - - These are most likely shreds, which we look for based on names. - """ - p = OptionParser(anneal.__doc__) - p.set_align(pctid=GoodPct, hitlen=GoodOverlap) - p.add_argument( - "--hang", default=GoodOverhang, type=int, help="Maximum overhang length" - ) - p.set_outdir(outdir="outdir") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - agpfile, contigs = args - outdir = opts.outdir - if not op.exists(outdir): - mkdir(outdir) - cmd = "faSplit byname {0} {1}/".format(contigs, outdir) - sh(cmd) - - cutoff = Cutoff(opts.pctid, opts.hitlen, opts.hang) - logger.debug(str(cutoff)) - - agp = AGP(agpfile) - blastfile = agpfile.replace(".agp", ".blast") - if not op.exists(blastfile): - populate_blastfile(blastfile, agp, outdir, opts) - - assert op.exists(blastfile) - logger.debug("File `{0}` found. Start loading.".format(blastfile)) - blast = BlastSlow(blastfile).to_dict() - - annealedagp = "annealed.agp" - annealedfasta = "annealed.fasta" - - newagp = deepcopy(agp) - clrstore = {} - for a, b, qreverse in agp.iter_paired_components(): - aid = a.component_id - bid = b.component_id - - pair = (aid, bid) - if pair in blast: - bl = blast[pair] - else: - oopts = get_overlap_opts(aid, bid, qreverse, outdir, opts) - o = overlap(oopts) - if not o: - continue - bl = o.blastline - - o = Overlap(bl, a.component_span, b.component_span, cutoff, qreverse=qreverse) - - if aid not in clrstore: - clrstore[aid] = CLR.from_agpline(a) - if bid not in clrstore: - clrstore[bid] = CLR.from_agpline(b) - - aclr, bclr = clrstore[aid], clrstore[bid] - - o.print_graphic() - if o.anneal(aclr, bclr): - newagp.delete_between(aid, bid, verbose=True) - - if o.otype == 2: # b ~ a - o = o.swapped - o.print_graphic() - if o.anneal(bclr, aclr): - newagp.switch_between(bid, aid, verbose=True) - newagp.delete_between(bid, aid, verbose=True) - - logger.debug("A total of {0} components with modified CLR.".format(len(clrstore))) - - for cid, c in clrstore.items(): - if c.is_valid: - continue - print("Remove {0}".format(c), file=sys.stderr) - newagp.convert_to_gap(cid, verbose=True) - - # Update all ranges that has modified clr - for a in newagp: - if a.is_gap: - continue - aid = a.component_id - if aid in clrstore: - c = clrstore[aid] - a.component_beg = c.start - a.component_end = c.end - - newagp.print_to_file(annealedagp) - tidyagp = tidy([annealedagp, contigs]) - - build([tidyagp, contigs, annealedfasta]) - return annealedfasta - - -def blast(args): - """ - %prog blast allfasta clonename - - Insert a component into agpfile by aligning to the best hit in pool and see - if they have good overlaps. - """ - from jcvi.apps.align import run_megablast - - p = OptionParser(blast.__doc__) - p.add_argument("-n", type=int, default=2, help="Take best N hits") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - allfasta, clonename = args - fastadir = "fasta" - infile = op.join(fastadir, clonename + ".fasta") - if not op.exists(infile): - entrez([clonename, "--skipcheck", "--outdir=" + fastadir]) - - outfile = "{0}.{1}.blast".format(clonename, allfasta.split(".")[0]) - run_megablast( - infile=infile, outfile=outfile, db=allfasta, pctid=GoodPct, hitlen=GoodOverlap - ) - - blasts = [BlastLine(x) for x in open(outfile)] - besthits = [] - for b in blasts: - if b.query.count("|") >= 3: - b.query = b.query.split("|")[3] - - if b.subject.count("|") >= 3: - b.subject = b.subject.split("|")[3] - - b.query = b.query.rsplit(".", 1)[0] - b.subject = b.subject.rsplit(".", 1)[0] - - if b.query == b.subject: - continue - - if b.subject not in besthits: - besthits.append(b.subject) - if len(besthits) == opts.n: - break - - for b in besthits: - overlap([clonename, b, "--dir=" + fastadir]) - - -def bes(args): - """ - %prog bes bacfasta clonename - - Use the clone name to download BES gss sequences from Genbank, map and then - visualize. - """ - from jcvi.apps.align import run_blat - - p = OptionParser(bes.__doc__) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bacfasta, clonename = args - - entrez([clonename, "--database=nucgss", "--skipcheck"]) - besfasta = clonename + ".fasta" - blatfile = clonename + ".bes.blat" - run_blat( - infile=besfasta, - outfile=blatfile, - db=bacfasta, - pctid=95, - hitlen=100, - cpus=opts.cpus, - ) - - aid, asize = next(Fasta(bacfasta).itersizes()) - - width = 50 - msg = "=" * width - msg += " " + aid - print(msg, file=sys.stderr) - - ratio = width * 1.0 / asize - _ = lambda x: int(round(x * ratio, 0)) - blasts = [BlastLine(x) for x in open(blatfile)] - for b in blasts: - if b.orientation == "+": - msg = " " * _(b.sstart) + "->" - else: - msg = " " * (_(b.sstop) - 2) + "<-" - msg += " " * (width - len(msg) + 2) - msg += b.query - if b.orientation == "+": - msg += " (hang={0})".format(b.sstart - 1) - else: - msg += " (hang={0})".format(asize - b.sstop) - - print(msg, file=sys.stderr) - - -def flip(args): - """ - %prog flip fastafile - - Go through each FASTA record, check against Genbank file and determines - whether or not to flip the sequence. This is useful before updates of the - sequences to make sure the same orientation is used. - """ - p = OptionParser(flip.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - outfastafile = fastafile.rsplit(".", 1)[0] + ".flipped.fasta" - fo = open(outfastafile, "w") - f = Fasta(fastafile, lazy=True) - for name, rec in f.iteritems_ordered(): - tmpfasta = "a.fasta" - fw = open(tmpfasta, "w") - SeqIO.write([rec], fw, "fasta") - fw.close() - - o = overlap([tmpfasta, name]) - if o.orientation == "-": - rec.seq = rec.seq.reverse_complement() - - SeqIO.write([rec], fo, "fasta") - cleanup(tmpfasta) - - -def batchoverlap(args): - """ - %prog batchoverlap pairs.txt outdir - - Check overlaps between pairs of sequences. - """ - p = OptionParser(batchoverlap.__doc__) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - pairsfile, outdir = args - fp = open(pairsfile) - cmds = [] - mkdir("overlaps") - for row in fp: - a, b = row.split()[:2] - oa = op.join(outdir, a + ".fa") - ob = op.join(outdir, b + ".fa") - cmd = "python -m jcvi.assembly.goldenpath overlap {0} {1}".format(oa, ob) - cmd += " -o overlaps/{0}_{1}.ov".format(a, b) - cmds.append(cmd) - - print("\n".join(cmds)) - - -def overlap(args): - """ - %prog overlap - - Check overlaps between two fasta records. The arguments can be genBank IDs - instead of FASTA files. In case of IDs, the sequences will be downloaded - first. - """ - from jcvi.formats.blast import chain_HSPs - - p = OptionParser(overlap.__doc__) - p.add_argument( - "--dir", - default=os.getcwd(), - help="Download sequences to dir", - ) - p.add_argument( - "--suffix", - default="fasta", - help="Suffix of the sequence file in dir", - ) - p.add_argument( - "--qreverse", - default=False, - action="store_true", - help="Reverse seq a", - ) - p.add_argument( - "--nochain", - default=False, - action="store_true", - help="Do not chain adjacent HSPs", - ) - p.set_align(pctid=GoodPct, hitlen=GoodOverlap, evalue=0.01) - p.set_outfile(outfile=None) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - afasta, bfasta = args - dir = opts.dir - chain = not opts.nochain - suffix = opts.suffix - evalue = opts.evalue - pctid = opts.pctid - hitlen = opts.hitlen - cutoff = Cutoff(pctid, hitlen) - - # Check first whether it is file or accession name - if not op.exists(afasta): - af = op.join(dir, ".".join((afasta, suffix))) - if not op.exists(af): # Check to avoid redownload - entrez([afasta, "--skipcheck", "--outdir=" + dir]) - afasta = af - - if not op.exists(bfasta): - bf = op.join(dir, ".".join((bfasta, suffix))) - if not op.exists(bf): - entrez([bfasta, "--skipcheck", "--outdir=" + dir]) - bfasta = bf - - assert op.exists(afasta) and op.exists(bfasta) - - cmd = "blastn -dust no" - cmd += " -query {0} -subject {1}".format(afasta, bfasta) - cmd += " -evalue {0} -outfmt 6 -perc_identity {1}".format(evalue, pctid) - - fp = popen(cmd) - hsps = fp.readlines() - - hsps = [BlastLine(x) for x in hsps] - hsps = [x for x in hsps if x.hitlen >= hitlen] - if chain: - logger.debug("Chain HSPs in the Blast output.") - dist = 2 * hitlen # Distance to chain the HSPs - hsps = chain_HSPs(hsps, xdist=dist, ydist=dist) - - if len(hsps) == 0: - print("No match found.", file=sys.stderr) - return None - - besthsp = hsps[0] - - aid, asize = next(Fasta(afasta).itersizes()) - bid, bsize = next(Fasta(bfasta).itersizes()) - o = Overlap(besthsp, asize, bsize, cutoff, qreverse=opts.qreverse) - o.print_graphic() - - if opts.outfile: - fw = must_open(opts.outfile, "w") - print(str(o), file=fw) - fw.close() - - return o - - -@lru_cache(maxsize=None) -def phase(accession): - gbdir = "gb" - gbfile = op.join(gbdir, accession + ".gb") - if not op.exists(gbfile): - entrez([accession, "--skipcheck", "--outdir=" + gbdir, "--format=gb"]) - rec = next(SeqIO.parse(gbfile, "gb")) - ph, keywords = get_phase(rec) - return ph, len(rec) - - -def check_certificate(certificatefile): - data = {} - if op.exists(certificatefile): - # This will make updates resume-able and backed-up - certificatefilebak = certificatefile + ".orig" - shutil.copy2(certificatefile, certificatefilebak) - - fp = open(certificatefile) - for row in fp: - atoms = row.split() - tag, aid, bid = atoms[0], atoms[4], atoms[5] - data[(tag, aid, bid)] = row.strip() - - return data - - -def certificate(args): - """ - %prog certificate tpffile certificatefile - - Generate certificate file for all overlaps in tpffile. tpffile can be - generated by jcvi.formats.agp.tpf(). - - North chr1 2 0 AC229737.8 telomere 58443 - South chr1 2 1 AC229737.8 AC202463.29 58443 37835 58443 + Non-terminal - - Each line describes a relationship between the current BAC and the - north/south BAC. First, "North/South" tag, then the chromosome, phases of - the two BACs, ids of the two BACs, the size and the overlap start-stop of - the CURRENT BAC, and orientation. Each BAC will have two lines in the - certificate file. - """ - p = OptionParser(certificate.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - tpffile, certificatefile = args - fastadir = "fasta" - - tpf = TPF(tpffile) - - data = check_certificate(certificatefile) - fw = must_open(certificatefile, "w") - for i, a in enumerate(tpf): - if a.is_gap: - continue - - aid = a.component_id - - af = op.join(fastadir, aid + ".fasta") - if not op.exists(af): # Check to avoid redownload - entrez([aid, "--skipcheck", "--outdir=" + fastadir]) - - north, south = tpf.getNorthSouthClone(i) - aphase, asize = phase(aid) - - for tag, p in (("North", north), ("South", south)): - if not p: # end of the chromosome - ov = "telomere\t{0}".format(asize) - elif p.isCloneGap: - bphase = "0" - ov = "{0}\t{1}".format(p.gap_type, asize) - else: - bid = p.component_id - bphase, bsize = phase(bid) - key = (tag, aid, bid) - if key in data: - print(data[key], file=fw) - continue - - ar = [aid, bid, "--dir=" + fastadir] - o = overlap(ar) - ov = o.certificateline if o else "{0}\t{1}\tNone".format(bid, asize) - - print( - "\t".join(str(x) for x in (tag, a.object, aphase, bphase, aid, ov)), - file=fw, - ) - fw.flush() - - -def neighbor(args): - """ - %prog neighbor agpfile componentID - - Check overlaps of a particular component in agpfile. - """ - p = OptionParser(neighbor.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - agpfile, componentID = args - fastadir = "fasta" - - cmd = "grep" - cmd += " --color -C2 {0} {1}".format(componentID, agpfile) - sh(cmd) - - agp = AGP(agpfile) - aorder = agp.order - if componentID not in aorder: - print( - "Record {0} not present in `{1}`.".format(componentID, agpfile), - file=sys.stderr, - ) - return - - i, c = aorder[componentID] - north, south = agp.getNorthSouthClone(i) - - if not north.isCloneGap: - ar = [north.component_id, componentID, "--dir=" + fastadir] - if north.orientation == "-": - ar += ["--qreverse"] - overlap(ar) - - if not south.isCloneGap: - ar = [componentID, south.component_id, "--dir=" + fastadir] - if c.orientation == "-": - ar += ["--qreverse"] - overlap(ar) - - -def agp(args): - """ - %prog agp tpffile certificatefile agpfile - - Build agpfile from overlap certificates. - - Tiling Path File (tpf) is a file that lists the component and the gaps. - It is a three-column file similar to below, also see jcvi.formats.agp.tpf(): - - telomere chr1 na - AC229737.8 chr1 + - AC202463.29 chr1 + - - Note: the orientation of the component is only used as a guide. If the - orientation is derivable from a terminal overlap, it will use it regardless - of what the tpf says. - - See jcvi.assembly.goldenpath.certificate() which generates a list of - certificates based on agpfile. At first, it seems counter-productive to - convert first agp to certificates then certificates back to agp. - - The certificates provide a way to edit the overlap information, so that the - agpfile can be corrected (without changing agpfile directly). - """ - from jcvi.formats.base import DictFile - - p = OptionParser(agp.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - tpffile, certificatefile, agpfile = args - orientationguide = DictFile(tpffile, valuepos=2) - cert = Certificate(certificatefile) - cert.write_AGP(agpfile, orientationguide=orientationguide) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/hic.py b/jcvi/assembly/hic.py deleted file mode 100644 index 0547ae15..00000000 --- a/jcvi/assembly/hic.py +++ /dev/null @@ -1,1772 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Process Hi-C output into AGP for chromosomal-scale scaffolding. -""" -import array -import json -import math -import os -import os.path as op -import sys - -from collections import defaultdict -from functools import partial -from multiprocessing import Pool -from typing import List, Optional, Tuple - -import numpy as np - -from natsort import natsorted - -from ..algorithms.ec import GA_run, GA_setup -from ..algorithms.formula import outlier_cutoff -from ..algorithms.matrix import get_signs -from ..apps.base import ( - ActionDispatcher, - OptionParser, - backup, - iglob, - logger, - mkdir, - symlink, -) -from ..apps.grid import Jobs -from ..compara.synteny import check_beds, get_bed_filenames -from ..formats.agp import order_to_agp -from ..formats.base import LineFile, must_open -from ..formats.bed import Bed -from ..formats.blast import Blast -from ..formats.sizes import Sizes -from ..graphics.base import ( - markup, - normalize_axes, - plt, - plot_heatmap, - savefig, -) -from ..graphics.dotplot import dotplot -from ..utils.cbook import gene_name - -from .allmaps import make_movie - - -# Map orientations to ints -FF = {"+": 1, "-": -1, "?": 1} -RR = {"+": -1, "-": 1, "?": -1} -LB = 18 # Lower bound for golden_array() -UB = 29 # Upper bound for golden_array() -BB = UB - LB + 1 # Span for golden_array() -ACCEPT = "[green]ACCEPT" -REJECT = "[red]REJECT" -BINSIZE = 50000 - - -class ContigOrderingLine(object): - """Stores one line in the ContigOrdering file""" - - def __init__(self, line, sep="|"): - args = line.split() - self.contig_id = args[0] - self.contig_name = args[1].split(sep)[0] - contig_rc = args[2] - assert contig_rc in ("0", "1") - self.strand = "+" if contig_rc == "0" else "-" - self.orientation_score = args[3] - self.gap_size_after_contig = args[4] - - -class ContigOrdering(LineFile): - """ContigOrdering file as created by LACHESIS, one per chromosome group. - Header contains summary information per group, followed by list of contigs - with given ordering. - """ - - def __init__(self, filename): - super().__init__(filename) - fp = open(filename) - for row in fp: - if row[0] == "#": - continue - orderline = ContigOrderingLine(row) - self.append(orderline) - - def write_agp( - self, obj, sizes, fw=sys.stdout, gapsize=100, gaptype="contig", evidence="map" - ): - """Converts the ContigOrdering file into AGP format""" - contigorder = [(x.contig_name, x.strand) for x in self] - order_to_agp( - obj, - contigorder, - sizes, - fw, - gapsize=gapsize, - gaptype=gaptype, - evidence=evidence, - ) - - -class CLMFile: - """CLM file (modified) has the following format: - - tig00046211+ tig00063795+ 1 53173 - tig00046211+ tig00063795- 1 116050 - tig00046211- tig00063795+ 1 71155 - tig00046211- tig00063795- 1 134032 - tig00030676+ tig00077819+ 5 136407 87625 87625 106905 102218 - tig00030676+ tig00077819- 5 126178 152952 152952 35680 118923 - tig00030676- tig00077819+ 5 118651 91877 91877 209149 125906 - tig00030676- tig00077819- 5 108422 157204 157204 137924 142611 - """ - - def __init__(self, clmfile, skiprecover=False): - self.name = op.basename(clmfile).rsplit(".", 1)[0] - self.clmfile = clmfile - self.idsfile = clmfile.rsplit(".", 1)[0] + ".ids" - self.parse_ids(skiprecover) - self.parse_clm() - self.signs = None - - def parse_ids(self, skiprecover): - """IDS file has a list of contigs that need to be ordered. 'recover', - keyword, if available in the third column, is less confident. - - tig00015093 46912 - tig00035238 46779 recover - tig00030900 119291 - """ - idsfile = self.idsfile - logger.debug("Parse idsfile `%s`", idsfile) - fp = open(idsfile) - tigs = [] - for row in fp: - if row[0] == "#": # Header - continue - atoms = row.split() - tig, _, size = atoms - size = int(size) - if skiprecover and len(atoms) == 3 and atoms[2] == "recover": - continue - tigs.append((tig, size)) - - # Arrange contig names and sizes - _tigs, _sizes = zip(*tigs) - self.contigs = set(_tigs) - self.sizes = np.array(_sizes) - self.tig_to_size = dict(tigs) - - # Initially all contigs are considered active - self.active = set(_tigs) - - def parse_clm(self): - clmfile = self.clmfile - logger.debug("Parse clmfile `%s`", clmfile) - fp = open(clmfile) - contacts = {} - contacts_oriented = defaultdict(dict) - orientations = defaultdict(list) - for row in fp: - atoms = row.strip().split("\t") - assert len(atoms) == 3, "Malformed line `{}`".format(atoms) - abtig, links, dists = atoms - atig, btig = abtig.split() - at, ao = atig[:-1], atig[-1] - bt, bo = btig[:-1], btig[-1] - if at not in self.tig_to_size: - continue - if bt not in self.tig_to_size: - continue - dists = [int(x) for x in dists.split()] - contacts[(at, bt)] = len(dists) - gdists = golden_array(dists) - contacts_oriented[(at, bt)][(FF[ao], FF[bo])] = gdists - contacts_oriented[(bt, at)][(RR[bo], RR[ao])] = gdists - strandedness = 1 if ao == bo else -1 - orientations[(at, bt)].append((strandedness, dists)) - - self.contacts = contacts - self.contacts_oriented = contacts_oriented - # Preprocess the orientations dict - for (at, bt), dists in orientations.items(): - dists = [(s, d, hmean_int(d)) for (s, d) in dists] - strandedness, md, mh = min(dists, key=lambda x: x[-1]) - orientations[(at, bt)] = (strandedness, len(md), mh) - self.orientations = orientations - - def calculate_densities(self): - """ - Calculate the density of inter-contig links per base. Strong contigs - considered to have high level of inter-contig links in the current - partition. - """ - active = self.active - densities = defaultdict(int) - for (at, bt), links in self.contacts.items(): - if not (at in active and bt in active): - continue - densities[at] += links - densities[bt] += links - - logdensities = {} - for x, d in densities.items(): - s = self.tig_to_size[x] - logd = np.log10(d * 1.0 / min(s, 500000)) - logdensities[x] = logd - - return logdensities - - def report_active(self): - logger.debug("Active contigs: %d (length=%d)", self.N, self.active_sizes.sum()) - - def activate(self, tourfile=None, minsize=10000, backuptour=True): - """ - Select contigs in the current partition. This is the setup phase of the - algorithm, and supports two modes: - - - "de novo": This is useful at the start of a new run where no tours - available. We select the strong contigs that have significant number - of links to other contigs in the partition. We build a histogram of - link density (# links per bp) and remove the contigs that appear as - outliers. The orientations are derived from the matrix decomposition - of the pairwise strandedness matrix O. - - - "hotstart": This is useful when there was a past run, with a given - tourfile. In this case, the active contig list and orientations are - derived from the last tour in the file. - """ - if tourfile and (not op.exists(tourfile)): - logger.debug("Tourfile `%s` not found", tourfile) - tourfile = None - - if tourfile: - logger.debug("Importing tourfile `%s`", tourfile) - tour, tour_o = iter_last_tour(tourfile, self) - self.active = set(tour) - tig_to_idx = self.tig_to_idx - tour = [tig_to_idx[x] for x in tour] - signs = sorted([(x, FF[o]) for (x, o) in zip(tour, tour_o)]) - _, signs = zip(*signs) - self.signs = np.array(signs, dtype=int) - if backuptour: - backup(tourfile) - tour = array.array("i", tour) - else: - self.report_active() - while True: - logdensities = self.calculate_densities() - lb, ub = outlier_cutoff(list(logdensities.values())) - logger.debug("Log10(link_densities) ~ [%d, %d]", lb, ub) - remove = set( - x - for x, d in logdensities.items() - if (d < lb and self.tig_to_size[x] < minsize * 10) - ) - if remove: - self.active -= remove - self.report_active() - else: - break - - logger.debug("Remove contigs with size < %d", minsize) - self.active = set(x for x in self.active if self.tig_to_size[x] >= minsize) - tour = range(self.N) # Use starting (random) order otherwise - tour = array.array("i", tour) - - # Determine orientations - self.flip_all(tour) - - self.report_active() - self.tour = tour - - return tour - - def evaluate_tour_M(self, tour): - """Use Cythonized version to evaluate the score of a current tour""" - from .chic import score_evaluate_M - - return score_evaluate_M(tour, self.active_sizes, self.M) - - def evaluate_tour_P(self, tour): - """Use Cythonized version to evaluate the score of a current tour, - with better precision on the distance of the contigs. - """ - from .chic import score_evaluate_P - - return score_evaluate_P(tour, self.active_sizes, self.P) - - def evaluate_tour_Q(self, tour): - """Use Cythonized version to evaluate the score of a current tour, - taking orientation into consideration. This may be the most accurate - evaluation under the right condition. - """ - from .chic import score_evaluate_Q - - return score_evaluate_Q(tour, self.active_sizes, self.Q) - - def flip_log(self, method, score, score_flipped, tag): - logger.debug("%s: %d => %d %s", method, score, score_flipped, tag) - - def flip_all(self, tour): - """Initialize the orientations based on pairwise O matrix.""" - if self.signs is None: # First run - score = 0 - else: - old_signs = self.signs[: self.N] - (score,) = self.evaluate_tour_Q(tour) - - # Remember we cannot have ambiguous orientation code (0 or '?') here - self.signs = get_signs(self.O, validate=False, ambiguous=False) - (score_flipped,) = self.evaluate_tour_Q(tour) - if score_flipped >= score: - tag = ACCEPT - else: - self.signs = old_signs[:] - tag = REJECT - self.flip_log("FLIPALL", score, score_flipped, tag) - return tag - - def flip_whole(self, tour): - """Test flipping all contigs at the same time to see if score improves.""" - (score,) = self.evaluate_tour_Q(tour) - self.signs = -self.signs - (score_flipped,) = self.evaluate_tour_Q(tour) - if score_flipped > score: - tag = ACCEPT - else: - self.signs = -self.signs - tag = REJECT - self.flip_log("FLIPWHOLE", score, score_flipped, tag) - return tag - - def flip_one(self, tour): - """Test flipping every single contig sequentially to see if score - improves. - """ - n_accepts = n_rejects = 0 - any_tag_ACCEPT = False - for i, t in enumerate(tour): - if i == 0: - (score,) = self.evaluate_tour_Q(tour) - self.signs[t] = -self.signs[t] - (score_flipped,) = self.evaluate_tour_Q(tour) - if score_flipped > score: - n_accepts += 1 - tag = ACCEPT - else: - self.signs[t] = -self.signs[t] - n_rejects += 1 - tag = REJECT - self.flip_log( - "FLIPONE ({}/{})".format(i + 1, len(self.signs)), - score, - score_flipped, - tag, - ) - if tag == ACCEPT: - any_tag_ACCEPT = True - score = score_flipped - logger.debug("FLIPONE: N_accepts=%d N_rejects=%d", n_accepts, n_rejects) - return ACCEPT if any_tag_ACCEPT else REJECT - - def prune_tour(self, tour, cpus): - """Test deleting each contig and check the delta_score; tour here must - be an array of ints. - """ - while True: - (tour_score,) = self.evaluate_tour_M(tour) - logger.debug("Starting score: %d", tour_score) - active_sizes = self.active_sizes - M = self.M - args = [] - for i, t in enumerate(tour): - stour = tour[:i] + tour[i + 1 :] - args.append((t, stour, tour_score, active_sizes, M)) - - # Parallel run - p = Pool(processes=cpus) - results = list(p.imap(prune_tour_worker, args)) - assert len(tour) == len( - results - ), "Array size mismatch, tour({}) != results({})".format( - len(tour), len(results) - ) - - # Identify outliers - active_contigs = self.active_contigs - idx, log10deltas = zip(*results) - lb, ub = outlier_cutoff(log10deltas) - logger.debug("Log10(delta_score) ~ [%d, %d]", lb, ub) - - remove = set(active_contigs[x] for (x, d) in results if d < lb) - self.active -= remove - self.report_active() - - tig_to_idx = self.tig_to_idx - tour = [active_contigs[x] for x in tour] - tour = array.array("i", [tig_to_idx[x] for x in tour if x not in remove]) - if not remove: - break - - self.tour = tour - self.flip_all(tour) - - return tour - - @property - def active_contigs(self): - return list(self.active) - - @property - def active_sizes(self): - return np.array([self.tig_to_size[x] for x in self.active]) - - @property - def N(self): - return len(self.active) - - @property - def oo(self): - return range(self.N) - - @property - def tig_to_idx(self): - return dict((x, i) for (i, x) in enumerate(self.active)) - - @property - def M(self): - """ - Contact frequency matrix. Each cell contains how many inter-contig - links between i-th and j-th contigs. - """ - N = self.N - tig_to_idx = self.tig_to_idx - M = np.zeros((N, N), dtype=int) - for (at, bt), links in self.contacts.items(): - if not (at in tig_to_idx and bt in tig_to_idx): - continue - ai = tig_to_idx[at] - bi = tig_to_idx[bt] - M[ai, bi] = M[bi, ai] = links - return M - - @property - def O(self): - """ - Pairwise strandedness matrix. Each cell contains whether i-th and j-th - contig are the same orientation +1, or opposite orientation -1. - """ - N = self.N - tig_to_idx = self.tig_to_idx - O = np.zeros((N, N), dtype=int) - for (at, bt), (strandedness, md, mh) in self.orientations.items(): - if not (at in tig_to_idx and bt in tig_to_idx): - continue - ai = tig_to_idx[at] - bi = tig_to_idx[bt] - score = strandedness * md - O[ai, bi] = O[bi, ai] = score - return O - - @property - def P(self): - """ - Contact frequency matrix with better precision on distance between - contigs. In the matrix M, the distance is assumed to be the distance - between mid-points of two contigs. In matrix Q, however, we compute - harmonic mean of the links for the orientation configuration that is - shortest. This offers better precision for the distance between big - contigs. - """ - N = self.N - tig_to_idx = self.tig_to_idx - P = np.zeros((N, N, 2), dtype=int) - for (at, bt), (strandedness, md, mh) in self.orientations.items(): - if not (at in tig_to_idx and bt in tig_to_idx): - continue - ai = tig_to_idx[at] - bi = tig_to_idx[bt] - P[ai, bi, 0] = P[bi, ai, 0] = md - P[ai, bi, 1] = P[bi, ai, 1] = mh - return P - - @property - def Q(self): - """ - Contact frequency matrix when contigs are already oriented. This is s a - similar matrix as M, but rather than having the number of links in the - cell, it points to an array that has the actual distances. - """ - N = self.N - tig_to_idx = self.tig_to_idx - signs = self.signs - Q = np.ones((N, N, BB), dtype=int) * -1 # Use -1 as the sentinel - for (at, bt), k in self.contacts_oriented.items(): - if not (at in tig_to_idx and bt in tig_to_idx): - continue - ai = tig_to_idx[at] - bi = tig_to_idx[bt] - ao = signs[ai] - bo = signs[bi] - Q[ai, bi] = k[(ao, bo)] - return Q - - -def hmean_int(a, a_min=5778, a_max=1149851): - """Harmonic mean of an array, returns the closest int""" - from scipy.stats import hmean - - return int(round(hmean(np.clip(a, a_min, a_max)))) - - -def golden_array(a, phi=1.61803398875, lb=LB, ub=UB): - """Given list of ints, we aggregate similar values so that it becomes an - array of multiples of phi, where phi is the golden ratio. - - phi ^ 14 = 843 - phi ^ 33 = 7881196 - - So the array of counts go between 843 to 788196. One triva is that the - exponents of phi gets closer to integers as N grows. See interesting - discussion here: - - """ - counts = np.zeros(BB, dtype=int) - for x in a: - c = int(round(math.log(x, phi))) - if c < lb: - c = lb - if c > ub: - c = ub - counts[c - lb] += 1 - return counts - - -def prune_tour_worker(arg): - """Worker thread for CLMFile.prune_tour()""" - from .chic import score_evaluate_M - - t, stour, tour_score, active_sizes, M = arg - (stour_score,) = score_evaluate_M(stour, active_sizes, M) - delta_score = tour_score - stour_score - log10d = np.log10(delta_score) if delta_score > 1e-9 else -9 - return t, log10d - - -def main(): - - actions = ( - # LACHESIS output processing - ("agp", "generate AGP file based on LACHESIS output"), - ("score", "score the current LACHESIS CLM"), - # Simulation - ("simulate", "simulate CLM data"), - # Scaffolding - ("optimize", "optimize the contig order and orientation"), - ("density", "estimate link density of contigs"), - # Plotting - ("movieframe", "plot heatmap and synteny for a particular tour"), - ("movie", "plot heatmap optimization history in a tourfile"), - # Reference-based analytics - ("bam2mat", "convert bam file to .npy format used in plotting"), - ("mergemat", "combine counts from multiple .npy data files"), - ("heatmap", "plot heatmap based on .npy file"), - ("dist", "plot distance distribution based on .dist.npy file"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def fit_power_law(xs, ys): - """Fit power law distribution. - - See reference: - http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html - Assumes the form Y = A * X^B, returns - - Args: - xs ([int]): X vector - ys ([float64]): Y vector - - Returns: - (A, B), the coefficients - """ - import math - - sum_logXlogY, sum_logXlogX, sum_logX, sum_logY = 0, 0, 0, 0 - N = len(xs) - for i in range(N): - if not xs[i] or not ys[i]: - continue - logXs, logYs = math.log(xs[i]), math.log(ys[i]) - sum_logXlogY += logXs * logYs - sum_logXlogX += logXs * logXs - sum_logX += logXs - sum_logY += logYs - - B = (N * sum_logXlogY - sum_logX * sum_logY) / ( - N * sum_logXlogX - sum_logX * sum_logX - ) - A = math.exp((sum_logY - B * sum_logX) / N) - logger.debug("Power law Y = %.1f * X ^ %.4f", A, B) - label = "$Y={:.1f} \\times X^{{ {:.4f} }}$".format(A, B) - return A, B, label - - -def dist(args): - """ - %prog dist input.dist.npy genome.json - - Plot histogram based on .dist.npy data file. The .npy file stores an array - with link counts per dist bin, with the bin starts stored in the genome.json. - """ - import seaborn as sns - import pandas as pd - from jcvi.graphics.base import human_base_formatter, markup - - p = OptionParser(dist.__doc__) - p.add_argument("--title", help="Title of the histogram") - p.add_argument("--xmin", default=300, help="Minimum distance") - p.add_argument("--xmax", default=6000000, help="Maximum distance") - opts, args, iopts = p.set_image_options(args, figsize="6x6") - - if len(args) != 2: - sys.exit(not p.print_help()) - - npyfile, jsonfile = args - pf = npyfile.rsplit(".", 1)[0] - header = json.loads(open(jsonfile).read()) - distbin_starts = np.array(header["distbinstarts"], dtype="float64") - distbin_sizes = np.array(header["distbinsizes"], dtype="float64") - a = np.load(npyfile) - - xmin, xmax = opts.xmin, opts.xmax - df = pd.DataFrame() - xstart, xend = ( - np.searchsorted(distbin_starts, xmin), - np.searchsorted(distbin_starts, xmax), - ) - df["BinStart"] = distbin_starts[xstart:xend] - df["LinkDensity"] = a[xstart:xend] / distbin_sizes[xstart:xend] - ax = sns.lineplot( - x="BinStart", y="LinkDensity", data=df, lw=3, color="lightslategray" - ) - tx = df["BinStart"] - A, B, label = fit_power_law(tx, df["LinkDensity"]) - ty = A * tx**B - ax.plot(tx, ty, "r:", lw=3, label=label) - ax.legend() - if opts.title: - ax.set_title(markup(opts.title)) - ax.set_xlabel("Link size (bp)") - ax.set_ylabel(r"Density (\# of links per bp)") - ax.set_xscale("log", nonposx="clip") - ax.set_yscale("log", nonposy="clip") - ax.xaxis.set_major_formatter(human_base_formatter) - - image_name = pf + "." + opts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def generate_groups(groupsfile): - """Parse 'groups' file. The 'groups' file has the following format, - for example: - - seq1,seq2 b - seq1 g - seq2 g - - Args: - groupsfile (str): Path to the groups file - """ - data = [] - with open(groupsfile) as fp: - for row in fp: - seqids, color = row.split() - yield seqids, color - - -def read_matrix( - npyfile: str, - header: dict, - contig: Optional[str], - groups: List[Tuple[str, str]], - vmin: int, - vmax: int, - plot_breaks: bool, -): - """ - Read the matrix from the npy file and apply log transformation and thresholding. - """ - # Load the matrix - A = np.load(npyfile) - total_bins = header["total_bins"] - - # Select specific submatrix - if contig: - contig_start = header["starts"][contig] - contig_size = header["sizes"][contig] - contig_end = contig_start + contig_size - A = A[contig_start:contig_end, contig_start:contig_end] - else: - A = A[:total_bins, :total_bins] - - # Convert seqids to positions for each group - new_groups = [] - for seqids, color in groups: - seqids = seqids.split(",") - assert all( - x in header["starts"] for x in seqids - ), f"{seqids} contain ids not found in starts" - assert all( - x in header["sizes"] for x in seqids - ), f"{seqids} contain ids not found in sizes" - start = min(header["starts"][x] for x in seqids) - end = max(header["starts"][x] + header["sizes"][x] for x in seqids) - position_seqids = [] - for seqid in seqids: - seqid_start = header["starts"][seqid] - seqid_size = header["sizes"][seqid] - position_seqids.append((seqid_start + seqid_size / 2, seqid)) - new_groups.append((start, end, position_seqids, color)) - - # Several concerns in practice: - # The diagonal counts may be too strong, this can either be resolved by - # masking them. Or perform a log transform on the entire heatmap. - B = A.astype("float64") - B += 1.0 - B = np.log(B) - B[B < vmin] = vmin - B[B > vmax] = vmax - print(B) - logger.debug("Matrix log-transformation and thresholding (%d-%d) done", vmin, vmax) - - breaks = list(header["starts"].values()) - breaks += [total_bins] # This is actually discarded - breaks = sorted(breaks)[1:] - if contig or not plot_breaks: - breaks = [] - - return B, new_groups, breaks - - -def draw_hic_heatmap( - root, - ax, - npyfile: str, - jsonfile: str, - contig: Optional[str], - groups_file: str, - title: str, - vmin: int, - vmax: int, - plot_breaks: bool, -): - """ - Draw heatmap based on .npy file. The .npy file stores a square matrix with - bins of genome, and cells inside the matrix represent number of links - between bin i and bin j. The `genome.json` contains the offsets of each - contig/chr so that we know where to draw boundary lines, or extract per - contig/chromosome heatmap. - """ - groups = list(generate_groups(groups_file)) if groups_file else [] - - # Load contig/chromosome starts and sizes - header = json.loads(open(jsonfile, encoding="utf-8").read()) - resolution = header.get("resolution") - assert resolution is not None, "`resolution` not found in `{}`".format(jsonfile) - logger.debug("Resolution set to %d", resolution) - - B, new_groups, breaks = read_matrix( - npyfile, header, contig, groups, vmin, vmax, plot_breaks - ) - plot_heatmap(ax, B, breaks, groups=new_groups, binsize=resolution) - - # Title - if contig: - title += f"-{contig}" - root.text( - 0.5, - 0.96, - markup(title), - color="darkslategray", - ha="center", - va="center", - ) - - normalize_axes(root) - - -def heatmap(args): - """ - %prog heatmap input.npy genome.json - - Plot heatmap based on .npy data file. The .npy stores a square matrix with - bins of genome, and cells inside the matrix represent number of links - between bin i and bin j. The `genome.json` contains the offsets of each - contig/chr so that we know where to draw boundary lines, or extract per - contig/chromosome heatmap. - - If a 'groups' file is given (with --groups), we will draw squares on the - heatmap. The 'groups' file has the following format, for example: - - seq1,seq2 b - seq1 g - seq2 g - - This will first draw a square around seq1+seq2 with blue color, then seq1 - and seq2 individually with green color. - """ - p = OptionParser(heatmap.__doc__) - p.add_argument("--title", help="Title of the heatmap") - p.add_argument("--groups", help="Groups file, see doc") - p.add_argument("--vmin", default=1, type=int, help="Minimum value in the heatmap") - p.add_argument("--vmax", default=6, type=int, help="Maximum value in the heatmap") - p.add_argument("--chr", help="Plot this contig/chr only") - p.add_argument( - "--nobreaks", - default=False, - action="store_true", - help="Do not plot breaks (esp. if contigs are small)", - ) - opts, args, iopts = p.set_image_options( - args, figsize="11x11", style="white", cmap="coolwarm", dpi=120 - ) - - if len(args) != 2: - sys.exit(not p.print_help()) - - npyfile, jsonfile = args - # Canvas - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) # whole canvas - ax = fig.add_axes((0.05, 0.05, 0.9, 0.9)) # just the heatmap - - draw_hic_heatmap( - root, - ax, - npyfile, - jsonfile, - contig=opts.chr, - groups_file=opts.groups, - title=opts.title, - vmin=opts.vmin, - vmax=opts.vmax, - plot_breaks=not opts.nobreaks, - ) - - pf = npyfile.rsplit(".", 1)[0] - image_name = pf + "." + iopts.format - # macOS sometimes has way too verbose output - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def mergemat(args): - """ - %prog mergemat *.npy - - Combine counts from multiple .npy data files. - """ - p = OptionParser(mergemat.__doc__) - p.set_outfile(outfile="out") - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - npyfiles = args - A = np.load(npyfiles[0]) - logger.debug("Load `%s`: matrix of shape %s; sum=%d", npyfiles[0], A.shape, A.sum()) - for npyfile in npyfiles[1:]: - B = np.load(npyfile) - A += B - logger.debug("Load `%s`: sum=%d", npyfiles[0], A.sum()) - - pf = opts.outfile - np.save(pf, A) - logger.debug("Combined %d files into `%s.npy`", len(npyfiles), pf) - - -def get_seqstarts(bamfile, N, seqids=None): - """Go through the SQ headers and pull out all sequences with size - greater than the resolution settings, i.e. contains at least a few cells - """ - import pysam - - bamfile = pysam.AlignmentFile(bamfile, "rb") - seqsize = {} - for kv in bamfile.header["SQ"]: - if kv["LN"] < 10 * N: - continue - seqsize[kv["SN"]] = kv["LN"] // N + 1 - - allseqs = seqids or natsorted(seqsize.keys()) - allseqsizes = np.array([seqsize[x] for x in allseqs]) - seqstarts = np.cumsum(allseqsizes) - seqstarts = np.roll(seqstarts, 1) - total_bins = seqstarts[0] - seqstarts[0] = 0 - seqstarts = dict(zip(allseqs, seqstarts)) - seqid_sizes = dict((x, seqsize[x]) for x in allseqs) - - return seqstarts, seqid_sizes, total_bins - - -def get_distbins(start=100, bins=2000, ratio=1.01): - """Get exponentially sized bins for link length""" - b = np.ones(bins, dtype="float64") - b[0] = 100 - for i in range(1, bins): - b[i] = b[i - 1] * ratio - bins = np.around(b).astype(dtype=int) - binsizes = np.diff(bins) - return bins, binsizes - - -def bam2mat(args): - """ - %prog bam2mat input.bam - - Convert bam file to .mat format, which is simply numpy 2D array. Important - parameter is the resolution, which is the cell size. Small cell size lead - to more fine-grained heatmap, but leads to large .mat size and slower - plotting. - """ - import pysam - from jcvi.utils.cbook import percentage - - p = OptionParser(bam2mat.__doc__) - p.add_argument( - "--resolution", - default=500000, - type=int, - help="Resolution when counting the links", - ) - p.add_argument( - "--seqids", - help="Use a given seqids file, a single line with seqids joined by comma", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bamfilename,) = args - pf = bamfilename.rsplit(".", 1)[0] - N = opts.resolution - pf += f".resolution_{N}" - bins = 1500 # Distance distribution bins - minsize = 100 # Record distance if it is at least minsize - seqids = opts.seqids - seqids = ( - open(seqids).readline().strip().split(",") - if seqids and op.exists(seqids) - else None - ) - - seqstarts, seqsize, total_bins = get_seqstarts(bamfilename, N, seqids=seqids) - distbinstarts, distbinsizes = get_distbins(start=minsize, bins=bins) - - # Store the starts and sizes into a JSON file - jsonfile = pf + ".json" - fwjson = open(jsonfile, "w") - header = { - "starts": seqstarts, - "sizes": seqsize, - "total_bins": total_bins, - "distbinstarts": list(distbinstarts), - "distbinsizes": list(distbinsizes), - "resolution": N, - } - - # int64 will not be able to deserialize with Python 3 - # Here is a workaround: - # https://stackoverflow.com/questions/11942364/typeerror-integer-is-not-json-serializable-when-serializing-json-in-python - def default(o): - if isinstance(o, np.int64): - return int(o) - raise TypeError - - json.dump(header, fwjson, sort_keys=True, indent=4, default=default) - fwjson.close() - logger.debug("Contig bin starts written to `%s`", jsonfile) - - print(sorted(seqstarts.items(), key=lambda x: x[-1])) - logger.debug("Initialize matrix of size %dx%d", total_bins, total_bins) - A = np.zeros((total_bins, total_bins), dtype=int) - B = np.zeros(bins, dtype=int) - - # Find the bin ID of each read - def bin_number(chr, pos): - return seqstarts[chr] + pos // N - - def distbin_number(dist, start=minsize, ratio=1.01): - return int(round(math.log(dist * 1.0 / start, ratio))) - - bamfile = pysam.AlignmentFile(bamfilename, "rb") - # Check all reads, rules borrowed from LACHESIS - # https://github.com/shendurelab/LACHESIS/blob/master/src/GenomeLinkMatrix.cc#L1476 - j = k = 0 - for c in bamfile: - j += 1 - if j % 100000 == 0: - print("{} reads counted".format(j), file=sys.stderr) - - if c.is_qcfail and c.is_duplicate: - continue - if c.is_secondary and c.is_supplementary: - continue - if c.mapping_quality == 0: - continue - if not c.is_paired: - continue - if c.is_read2: # Take only one read - continue - - # pysam v0.8.3 does not support keyword reference_name - achr = bamfile.getrname(c.reference_id) - apos = c.reference_start - bchr = bamfile.getrname(c.next_reference_id) - bpos = c.next_reference_start - if achr not in seqstarts or bchr not in seqstarts: - continue - if achr == bchr: - dist = abs(apos - bpos) - if dist < minsize: - continue - db = distbin_number(dist) - B[db] += 1 - - abin, bbin = bin_number(achr, apos), bin_number(bchr, bpos) - A[abin, bbin] += 1 - if abin != bbin: - A[bbin, abin] += 1 - - k += 1 - - logger.debug("Total reads counted: %s", percentage(2 * k, j)) - bamfile.close() - np.save(pf, A) - logger.debug("Link counts written to `%s.npy`", pf) - np.save(pf + ".dist", B) - logger.debug("Link dists written to `%s.dist.npy`", pf) - - -def simulate(args): - """ - %prog simulate test - - Simulate CLM and IDS files with given names. - - The simulator assumes several distributions: - - Links are distributed uniformly across genome - - Log10(link_size) are distributed normally - - Genes are distributed uniformly - """ - p = OptionParser(simulate.__doc__) - p.add_argument("--genomesize", default=10000000, type=int, help="Genome size") - p.add_argument("--genes", default=1000, type=int, help="Number of genes") - p.add_argument("--contigs", default=100, type=int, help="Number of contigs") - p.add_argument("--coverage", default=10, type=int, help="Link coverage") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (pf,) = args - GenomeSize = opts.genomesize - Genes = opts.genes - Contigs = opts.contigs - Coverage = opts.coverage - PE = 500 - Links = int(GenomeSize * Coverage / PE) - - # Simulate the contig sizes that sum to GenomeSize - # See also: - # - (ContigSizes,) = np.random.dirichlet([1] * Contigs, 1) * GenomeSize - ContigSizes = np.array(np.round_(ContigSizes, decimals=0), dtype=int) - ContigStarts = np.zeros(Contigs, dtype=int) - ContigStarts[1:] = np.cumsum(ContigSizes)[:-1] - - # Write IDS file - idsfile = pf + ".ids" - fw = open(idsfile, "w") - print("#Contig\tRECounts\tLength", file=fw) - for i, s in enumerate(ContigSizes): - print("tig{:04d}\t{}\t{}".format(i, s // (4**4), s), file=fw) - fw.close() - - # Simulate the gene positions - GenePositions = np.sort(np.random.randint(0, GenomeSize, size=Genes)) - write_last_and_beds(pf, GenePositions, ContigStarts) - - # Simulate links, uniform start, with link distances following 1/x, where x - # is the distance between the links. As an approximation, we have links - # between [1e3, 1e7], so we map from uniform [1e-7, 1e-3] - LinkStarts = np.sort(np.random.randint(1, GenomeSize, size=Links)) - a, b = 1e-7, 1e-3 - LinkSizes = np.array( - np.round_(1 / ((b - a) * np.random.rand(Links) + a), decimals=0), dtype=int - ) - LinkEnds = LinkStarts + LinkSizes - - # Find link to contig membership - LinkStartContigs = np.searchsorted(ContigStarts, LinkStarts) - 1 - LinkEndContigs = np.searchsorted(ContigStarts, LinkEnds) - 1 - - # Extract inter-contig links - InterContigLinks = (LinkStartContigs != LinkEndContigs) & ( - LinkEndContigs != Contigs - ) - ICLinkStartContigs = LinkStartContigs[InterContigLinks] - ICLinkEndContigs = LinkEndContigs[InterContigLinks] - ICLinkStarts = LinkStarts[InterContigLinks] - ICLinkEnds = LinkEnds[InterContigLinks] - - # Write CLM file - write_clm( - pf, - ICLinkStartContigs, - ICLinkEndContigs, - ICLinkStarts, - ICLinkEnds, - ContigStarts, - ContigSizes, - ) - - -def write_last_and_beds(pf, GenePositions, ContigStarts): - """ - Write LAST file, query and subject BED files. - """ - qbedfile = pf + "tigs.bed" - sbedfile = pf + "chr.bed" - lastfile = "{}tigs.{}chr.last".format(pf, pf) - qbedfw = open(qbedfile, "w") - sbedfw = open(sbedfile, "w") - lastfw = open(lastfile, "w") - - GeneContigs = np.searchsorted(ContigStarts, GenePositions) - 1 - for i, (c, gstart) in enumerate(zip(GeneContigs, GenePositions)): - gene = "gene{:05d}".format(i) - tig = "tig{:04d}".format(c) - start = ContigStarts[c] - cstart = gstart - start - print("\t".join(str(x) for x in (tig, cstart, cstart + 1, gene)), file=qbedfw) - print( - "\t".join(str(x) for x in ("chr1", gstart, gstart + 1, gene)), file=sbedfw - ) - lastatoms = [gene, gene, 100] + [0] * 8 + [100] - print("\t".join(str(x) for x in lastatoms), file=lastfw) - - qbedfw.close() - sbedfw.close() - lastfw.close() - - -def write_clm( - pf, - ICLinkStartContigs, - ICLinkEndContigs, - ICLinkStarts, - ICLinkEnds, - ContigStarts, - ContigSizes, -): - """ - Write CLM file from simulated data. - """ - clm = defaultdict(list) - for start, end, linkstart, linkend in zip( - ICLinkStartContigs, ICLinkEndContigs, ICLinkStarts, ICLinkEnds - ): - start_a = ContigStarts[start] - start_b = start_a + ContigSizes[start] - end_a = ContigStarts[end] - end_b = end_a + ContigSizes[end] - if linkend >= end_b: - continue - clm[(start, end)].append( - (linkstart - start_a, start_b - linkstart, linkend - end_a, end_b - linkend) - ) - - clmfile = pf + ".clm" - fw = open(clmfile, "w") - - def format_array(a): - return [str(x) for x in sorted(a) if x > 0] - - for (start, end), links in sorted(clm.items()): - start = "tig{:04d}".format(start) - end = "tig{:04d}".format(end) - nlinks = len(links) - if not nlinks: - continue - ff = format_array([(b + c) for a, b, c, d in links]) - fr = format_array([(b + d) for a, b, c, d in links]) - rf = format_array([(a + c) for a, b, c, d in links]) - rr = format_array([(a + d) for a, b, c, d in links]) - print("{}+ {}+\t{}\t{}".format(start, end, nlinks, " ".join(ff)), file=fw) - print("{}+ {}-\t{}\t{}".format(start, end, nlinks, " ".join(fr)), file=fw) - print("{}- {}+\t{}\t{}".format(start, end, nlinks, " ".join(rf)), file=fw) - print("{}- {}-\t{}\t{}".format(start, end, nlinks, " ".join(rr)), file=fw) - fw.close() - - -def density(args): - """ - %prog density test.clm - - Estimate link density of contigs. - """ - p = OptionParser(density.__doc__) - p.add_argument( - "--save", - default=False, - action="store_true", - help="Write log densitites of contigs to file", - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (clmfile,) = args - clm = CLMFile(clmfile) - pf = clmfile.rsplit(".", 1)[0] - - if opts.save: - logdensities = clm.calculate_densities() - densityfile = pf + ".density" - fw = open(densityfile, "w") - for name, logd in logdensities.items(): - s = clm.tig_to_size[name] - print("\t".join(str(x) for x in (name, s, logd)), file=fw) - fw.close() - logger.debug("Density written to `%s`", densityfile) - - tourfile = clmfile.rsplit(".", 1)[0] + ".tour" - tour = clm.activate(tourfile=tourfile, backuptour=False) - clm.flip_all(tour) - clm.flip_whole(tour) - clm.flip_one(tour) - - -def optimize(args): - """ - %prog optimize test.clm - - Optimize the contig order and orientation, based on CLM file. - """ - p = OptionParser(optimize.__doc__) - p.add_argument( - "--skiprecover", - default=False, - action="store_true", - help="Do not import 'recover' contigs", - ) - p.add_argument( - "--startover", - default=False, - action="store_true", - help="Do not resume from existing tour file", - ) - p.add_argument("--skipGA", default=False, action="store_true", help="Skip GA step") - p.set_outfile(outfile=None) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (clmfile,) = args - startover = opts.startover - runGA = not opts.skipGA - cpus = opts.cpus - - # Load contact map - clm = CLMFile(clmfile, skiprecover=opts.skiprecover) - - tourfile = opts.outfile or clmfile.rsplit(".", 1)[0] + ".tour" - if startover: - tourfile = None - tour = clm.activate(tourfile=tourfile) - - fwtour = open(tourfile, "w") - # Store INIT tour - print_tour(fwtour, clm.tour, "INIT", clm.active_contigs, clm.oo, signs=clm.signs) - - if runGA: - for phase in range(1, 3): - tour = optimize_ordering(fwtour, clm, phase, cpus) - tour = clm.prune_tour(tour, cpus) - - # Flip orientations - phase = 1 - while True: - tag1, tag2 = optimize_orientations(fwtour, clm, phase, cpus) - if tag1 == REJECT and tag2 == REJECT: - logger.debug("Terminating ... no more %s", ACCEPT) - break - phase += 1 - - fwtour.close() - - -def optimize_ordering(fwtour, clm, phase, cpus): - """ - Optimize the ordering of contigs by Genetic Algorithm (GA). - """ - from .chic import score_evaluate_M - - # Prepare input files - tour_contigs = clm.active_contigs - tour_sizes = clm.active_sizes - tour_M = clm.M - tour = clm.tour - signs = clm.signs - oo = clm.oo - - def callback(tour, gen, phase, oo): - fitness = tour.fitness if hasattr(tour, "fitness") else None - label = "GA{}-{}".format(phase, gen) - if fitness: - fitness = "{0}".format(fitness).split(",")[0].replace("(", "") - label += "-" + fitness - if gen % 20 == 0: - print_tour(fwtour, tour, label, tour_contigs, oo, signs=signs) - return tour - - callbacki = partial(callback, phase=phase, oo=oo) - toolbox = GA_setup(tour) - toolbox.register("evaluate", score_evaluate_M, tour_sizes=tour_sizes, tour_M=tour_M) - tour, tour_fitness = GA_run( - toolbox, ngen=1000, npop=100, cpus=cpus, callback=callbacki - ) - clm.tour = tour - - return tour - - -def optimize_orientations(fwtour, clm, phase, cpus): - """ - Optimize the orientations of contigs by using heuristic flipping. - """ - # Prepare input files - tour_contigs = clm.active_contigs - tour = clm.tour - oo = clm.oo - - print_tour( - fwtour, tour, "FLIPALL{}".format(phase), tour_contigs, oo, signs=clm.signs - ) - tag1 = clm.flip_whole(tour) - print_tour( - fwtour, tour, "FLIPWHOLE{}".format(phase), tour_contigs, oo, signs=clm.signs - ) - tag2 = clm.flip_one(tour) - print_tour( - fwtour, tour, "FLIPONE{}".format(phase), tour_contigs, oo, signs=clm.signs - ) - - return tag1, tag2 - - -def prepare_synteny(tourfile, lastfile, odir, p, opts): - """ - Prepare synteny plots for movie(). - """ - qbedfile, sbedfile = get_bed_filenames(lastfile, p, opts) - qbedfile = op.abspath(qbedfile) - sbedfile = op.abspath(sbedfile) - - qbed = Bed(qbedfile, sorted=False) - contig_to_beds = dict(qbed.sub_beds()) - - # Create a separate directory for the subplots and movie - mkdir(odir, overwrite=True) - os.chdir(odir) - logger.debug("Change into subdir `%s`", odir) - - # Make anchorsfile - anchorsfile = ".".join(op.basename(lastfile).split(".", 2)[:2]) + ".anchors" - fw = open(anchorsfile, "w") - for b in Blast(lastfile): - print( - "\t".join((gene_name(b.query), gene_name(b.subject), str(int(b.score)))), - file=fw, - ) - fw.close() - - # Symlink sbed - symlink(sbedfile, op.basename(sbedfile)) - - return anchorsfile, qbedfile, contig_to_beds - - -def separate_tour_and_o(row): - """ - The tour line typically contains contig list like: - tig00044568+ tig00045748- tig00071055- tig00015093- tig00030900- - - This function separates the names from the orientations. - """ - tour = [] - tour_o = [] - for contig in row.split(): - if contig[-1] in ("+", "-", "?"): - tour.append(contig[:-1]) - tour_o.append(contig[-1]) - else: # Unoriented - tour.append(contig) - tour_o.append("?") - return tour, tour_o - - -def iter_last_tour(tourfile, clm): - """ - Extract last tour from tourfile. The clm instance is also passed in to see - if any contig is covered in the clm. - """ - row = open(tourfile).readlines()[-1] - _tour, _tour_o = separate_tour_and_o(row) - tour = [] - tour_o = [] - for tc, to in zip(_tour, _tour_o): - if tc not in clm.contigs: - logger.debug( - "Contig `%s` in file `%s` not found in `%s`", tc, tourfile, clm.idsfile - ) - continue - tour.append(tc) - tour_o.append(to) - return tour, tour_o - - -def iter_tours(tourfile, frames=1): - """ - Extract tours from tourfile. Tourfile contains a set of contig - configurations, generated at each iteration of the genetic algorithm. Each - configuration has two rows, first row contains iteration id and score, - second row contains list of contigs, separated by comma. - """ - fp = open(tourfile) - - i = 0 - for row in fp: - if row[0] == ">": - label = row[1:].strip() - if label.startswith("GA"): - pf, j, score = label.split("-", 2) - j = int(j) - else: - j = 0 - i += 1 - else: - if j % frames != 0: - continue - tour, tour_o = separate_tour_and_o(row) - yield i, label, tour, tour_o - - fp.close() - - -def movie(args): - """ - %prog movie test.tour test.clm ref.contigs.last - - Plot optimization history. - """ - p = OptionParser(movie.__doc__) - p.add_argument("--frames", default=500, type=int, help="Only plot every N frames") - p.add_argument( - "--engine", - default="ffmpeg", - choices=("ffmpeg", "gifsicle"), - help="Movie engine, output MP4 or GIF", - ) - p.set_beds() - opts, args, iopts = p.set_image_options( - args, figsize="16x8", style="white", cmap="coolwarm", format="png", dpi=300 - ) - - if len(args) != 3: - sys.exit(not p.print_help()) - - tourfile, clmfile, lastfile = args - tourfile = op.abspath(tourfile) - clmfile = op.abspath(clmfile) - lastfile = op.abspath(lastfile) - cwd = os.getcwd() - odir = op.basename(tourfile).rsplit(".", 1)[0] + "-movie" - anchorsfile, qbedfile, contig_to_beds = prepare_synteny( - tourfile, lastfile, odir, p, opts - ) - - args = [] - for i, label, tour, tour_o in iter_tours(tourfile, frames=opts.frames): - padi = "{:06d}".format(i) - # Make sure the anchorsfile and bedfile has the serial number in, - # otherwise parallelization may fail - a, b = op.basename(anchorsfile).split(".", 1) - ianchorsfile = a + "_" + padi + "." + b - symlink(anchorsfile, ianchorsfile) - - # Make BED file with new order - qb = Bed() - for contig, o in zip(tour, tour_o): - if contig not in contig_to_beds: - continue - bedlines = contig_to_beds[contig][:] - if o == "-": - bedlines.reverse() - for x in bedlines: - qb.append(x) - - a, b = op.basename(qbedfile).split(".", 1) - ibedfile = a + "_" + padi + "." + b - qb.print_to_file(ibedfile) - # Plot dot plot, but do not sort contigs by name (otherwise losing - # order) - image_name = padi + "." + iopts.format - - tour = ",".join(tour) - args.append( - [[tour, clmfile, ianchorsfile, "--outfile", image_name, "--label", label]] - ) - - Jobs(movieframe, args).run() - - os.chdir(cwd) - make_movie(odir, odir, engine=opts.engine, format=iopts.format) - - -def score(args): - """ - %prog score main_results/ cached_data/ contigsfasta - - Score the current LACHESIS CLM. - """ - p = OptionParser(score.__doc__) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - mdir, cdir, contigsfasta = args - orderingfiles = natsorted(iglob(mdir, "*.ordering")) - sizes = Sizes(contigsfasta) - contig_names = list(sizes.iter_names()) - contig_ids = dict((name, i) for (i, name) in enumerate(contig_names)) - - oo = [] - # Load contact matrix - glm = op.join(cdir, "all.GLM") - N = len(contig_ids) - M = np.zeros((N, N), dtype=int) - fp = open(glm) - for row in fp: - if row[0] == "#": - continue - x, y, z = row.split() - if x == "X": - continue - M[int(x), int(y)] = int(z) - - fwtour = open("tour", "w") - - def callback(tour, gen, oo): - fitness = tour.fitness if hasattr(tour, "fitness") else None - label = "GA-{0}".format(gen) - if fitness: - fitness = "{0}".format(fitness).split(",")[0].replace("(", "") - label += "-" + fitness - print_tour(fwtour, tour, label, contig_names, oo) - return tour - - for ofile in orderingfiles: - co = ContigOrdering(ofile) - for x in co: - contig_id = contig_ids[x.contig_name] - oo.append(contig_id) - pf = op.basename(ofile).split(".")[0] - print(pf) - print(oo) - - tour, tour_sizes, tour_M = prepare_ec(oo, sizes, M) - # Store INIT tour - print_tour(fwtour, tour, "INIT", contig_names, oo) - - # Faster Cython version for evaluation - from .chic import score_evaluate_M - - callbacki = partial(callback, oo=oo) - toolbox = GA_setup(tour) - toolbox.register( - "evaluate", score_evaluate_M, tour_sizes=tour_sizes, tour_M=tour_M - ) - tour, tour.fitness = GA_run( - toolbox, npop=100, cpus=opts.cpus, callback=callbacki - ) - print(tour, tour.fitness) - break - - fwtour.close() - - -def print_tour(fwtour, tour, label, contig_names, oo, signs=None): - print(">" + label, file=fwtour) - if signs is not None: - contig_o = [] - for x in tour: - idx = oo[x] - sign = {1: "+", 0: "?", -1: "-"}[signs[idx]] - contig_o.append(contig_names[idx] + sign) - print(" ".join(contig_o), file=fwtour) - else: - print(" ".join(contig_names[oo[x]] for x in tour), file=fwtour) - - -def prepare_ec(oo, sizes, M): - """ - This prepares EC and converts from contig_id to an index. - """ - tour = range(len(oo)) - tour_sizes = np.array([sizes.sizes[x] for x in oo]) - tour_M = M[oo, :][:, oo] - return tour, tour_sizes, tour_M - - -def score_evaluate(tour, tour_sizes=None, tour_M=None): - """SLOW python version of the evaluation function. For benchmarking - purposes only. Do not use in production. - """ - sizes_oo = np.array([tour_sizes[x] for x in tour]) - sizes_cum = np.cumsum(sizes_oo) - sizes_oo / 2 - s = 0 - size = len(tour) - for ia in range(size): - a = tour[ia] - for ib in range(ia + 1, size): - b = tour[ib] - links = tour_M[a, b] - dist = sizes_cum[ib] - sizes_cum[ia] - if dist > 1e7: - break - s += links * 1.0 / dist - return (s,) - - -def movieframe(args): - """ - %prog movieframe tour test.clm contigs.ref.anchors - - Draw heatmap and synteny in the same plot. - """ - p = OptionParser(movieframe.__doc__) - p.add_argument("--label", help="Figure title") - p.set_beds() - p.set_outfile(outfile=None) - opts, args, iopts = p.set_image_options( - args, figsize="16x8", style="white", cmap="coolwarm", format="png", dpi=120 - ) - - if len(args) != 3: - sys.exit(not p.print_help()) - - tour, clmfile, anchorsfile = args - tour = tour.split(",") - image_name = opts.outfile or ("movieframe." + iopts.format) - label = opts.label or op.basename(image_name).rsplit(".", 1)[0] - - clm = CLMFile(clmfile) - totalbins, bins, breaks = make_bins(tour, clm.tig_to_size) - M = read_clm(clm, totalbins, bins) - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) # whole canvas - ax1 = fig.add_axes((0.05, 0.1, 0.4, 0.8)) # heatmap - ax2 = fig.add_axes((0.55, 0.1, 0.4, 0.8)) # dot plot - ax2_root = fig.add_axes((0.5, 0, 0.5, 1)) # dot plot canvas - - # Left axis: heatmap - plot_heatmap(ax1, M, breaks, binsize=BINSIZE) - - # Right axis: synteny - qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts, sorted=False) - dotplot(anchorsfile, qbed, sbed, fig, ax2_root, ax2, sep=False, title="") - - root.text(0.5, 0.98, clm.name, color="g", ha="center", va="center") - root.text(0.5, 0.95, label, color="darkslategray", ha="center", va="center") - normalize_axes(root) - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def make_bins(tour, sizes): - breaks = [] - start = 0 - bins = {} - for x in tour: - size = sizes[x] - end = start + int(round(size * 1.0 / BINSIZE)) - bins[x] = (start, end) - start = end - breaks.append(start) - - totalbins = start - return totalbins, bins, breaks - - -def read_clm(clm, totalbins, bins): - M = np.zeros((totalbins, totalbins)) - for (x, y), z in clm.contacts.items(): - if x not in bins or y not in bins: - continue - xstart, xend = bins[x] - ystart, yend = bins[y] - M[xstart:xend, ystart:yend] = z - M[ystart:yend, xstart:xend] = z - - M = np.log10(M + 1) - return M - - -def agp(args): - """ - %prog agp main_results/ contigs.fasta - - Generate AGP file based on LACHESIS output. - """ - p = OptionParser(agp.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - odir, contigsfasta = args - fwagp = must_open(opts.outfile, "w") - orderingfiles = natsorted(iglob(odir, "*.ordering")) - sizes = Sizes(contigsfasta).mapping - contigs = set(sizes.keys()) - anchored = set() - - for ofile in orderingfiles: - co = ContigOrdering(ofile) - anchored |= set([x.contig_name for x in co]) - obj = op.basename(ofile).split(".")[0] - co.write_agp(obj, sizes, fwagp) - - singletons = contigs - anchored - logger.debug("Anchored: %d, Singletons: %d", len(anchored), len(singletons)) - - for s in natsorted(singletons): - order_to_agp(s, [(s, "?")], sizes, fwagp) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/kmer.py b/jcvi/assembly/kmer.py deleted file mode 100644 index f50d69ca..00000000 --- a/jcvi/assembly/kmer.py +++ /dev/null @@ -1,1410 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Deals with K-mers and K-mer distribution from reads or genome -""" -import os.path as op -import sys -import math - -from collections import defaultdict -from typing import List - -import numpy as np -from more_itertools import chunked - -from ..apps.grid import MakeManager -from ..apps.base import ( - ActionDispatcher, - OptionParser, - PIPE, - Popen, - logger, - need_update, - sh, -) -from ..formats.fasta import Fasta -from ..formats.base import BaseFile, must_open, get_number -from ..graphics.base import ( - adjust_spines, - asciiplot, - markup, - normalize_axes, - panel_labels, - plt, - savefig, - set_human_axis, - set_ticklabels_helvetica, - write_messages, -) -from ..utils.cbook import thousands, percentage - -from .automaton import iter_project - - -KMERYL, KSOAP, KALLPATHS = range(3) - - -class KmerSpectrum(BaseFile): - def __init__(self, histfile): - super().__init__(histfile) - self.load_data(histfile) - - def load_data(self, histfile): - self.data = [] - self.totalKmers = 0 - self.hist = {} - kformat = self.guess_format(histfile) - kformats = ("Meryl", "Soap", "AllPaths") - logger.debug("Guessed format: %s", kformats[kformat]) - - fp = open(histfile) - for rowno, row in enumerate(fp): - if row[0] == "#": - continue - if kformat == KSOAP: - K = rowno + 1 - counts = int(row.strip()) - else: # meryl histogram - K, counts = row.split()[:2] - K, counts = int(K), int(counts) - - Kcounts = K * counts - self.totalKmers += Kcounts - self.hist[K] = Kcounts - self.data.append((K, counts)) - - def guess_format(self, histfile): - # Guess the format of the Kmer histogram - fp = open(histfile) - for row in fp: - if row.startswith("# 1:"): - return KALLPATHS - if len(row.split()) == 1: - return KSOAP - return KMERYL - - def get_xy(self, vmin=1, vmax=100): - self.counts = sorted((a, b) for a, b in self.hist.items() if vmin <= a <= vmax) - return zip(*self.counts) - - def analyze(self, K=23, maxiter=100, method="nbinom"): - """Analyze K-mer histogram. - - Args: - K (int, optional): K-mer size. Defaults to 23. - maxiter (int): Iterations to run. Defaults to 100. - method (str, optional): Method to use, either 'nbinom' or - 'allpaths'. Defaults to "nbinom". - - Returns: - A dictionary containing info for annotating the plot. analyze() also - sets the following properties: - - lambda_: Main peak - - repetitive: Repeats message - - snprate: SNP rate message - """ - if method == "nbinom": - return self.analyze_nbinom(K=K, maxiter=maxiter) - return self.analyze_allpaths(K=K) - - def analyze_nbinom(self, K=23, maxiter=100): - """Analyze the K-mer histogram using negative binomial distribution. - - Args: - K (int, optional): K-mer size used when generating the histogram. Defaults to 23. - """ - from scipy.stats import nbinom - from scipy.optimize import minimize_scalar - from functools import lru_cache - - method, xopt = "bounded", "xatol" - MAX_1CN_SIZE = 1e10 - MAX_OPTIMIZED_SIZE = 9.9e9 - - # Generate bins for the decomposed negative binomial distributions - bins = [ - (i, i) for i in range(1, 9) - ] # The first 8 CN are critical often determines ploidy - for i in (8, 16, 32, 64, 128, 256, 512): # 14 geometricly sized bins - a, b = i + 1, int(round(i * 2**0.5)) - bins.append((a, b)) - a, b = b + 1, i * 2 - bins.append((a, b)) - - # Convert histogram to np array so we can index by CN - kf_ceil = max([cov for cov, _ in self.data]) - N = kf_ceil + 1 - hist = np.zeros(N, dtype=int) - for cov, count in self.data: - hist[cov] = count - - # min1: find first minimum - _kf_min1 = 5 - while ( - _kf_min1 - 1 >= 2 - and hist[_kf_min1 - 1] * (_kf_min1 - 1) < hist[_kf_min1] * _kf_min1 - ): - _kf_min1 -= 1 - while ( - _kf_min1 <= kf_ceil - and hist[_kf_min1 + 1] * (_kf_min1 + 1) < hist[_kf_min1] * _kf_min1 - ): - _kf_min1 += 1 - - # max2: find absolute maximum mx2 above first minimum min1 - _kf_max2 = _kf_min1 - for kf in range(_kf_min1 + 1, int(0.8 * kf_ceil)): - if hist[kf] * kf > hist[_kf_max2] * _kf_max2: - _kf_max2 = kf - - # Discard the last entry as that is usually an inflated number - hist = hist[:-1] - kf_range = np.arange(_kf_min1, len(hist), dtype=int) - P = hist[kf_range] * kf_range # Target distribution - print("==> Start nbinom method on range ({}, {})".format(_kf_min1, len(hist))) - - # Below is the optimization schemes, we optimize one variable at a time - @lru_cache(maxsize=None) - def nbinom_pmf_range(lambda_: int, rho: int, bin_id: int): - stacked = np.zeros(len(kf_range), dtype=np.float64) - lambda_ /= 100 # 2-digit precision - rho /= 100 # 2-digit precision - n = lambda_ / (rho - 1) - p = 1 / rho - start, end = bins[bin_id] - for i in range(start, end + 1): - stacked += nbinom.pmf(kf_range, n * i, p) - return stacked - - def generative_model(G, lambda_, rho): - stacked = np.zeros(len(kf_range), dtype=np.float64) - lambda_ = int(round(lambda_ * 100)) - rho = int(round(rho * 100)) - for bin_id, g in enumerate(G): - stacked += g * nbinom_pmf_range(lambda_, rho, bin_id) - stacked *= kf_range - return stacked - - def func(lambda_, rho, G): - stacked = generative_model(G, lambda_, rho) - return np.sum((P - stacked) ** 2) # L2 norm - - def optimize_func(lambda_, rho, G): - # Iterate over all G - for i, g in enumerate(G): - G_i = optimize_func_Gi(lambda_, rho, G, i) - if ( - not 1 < G_i < MAX_OPTIMIZED_SIZE - ): # Optimizer did not optimize this G_i - break - # Also remove the last bin since it is subject to marginal effect - G[i - 1] = 0 - lambda_ = optimize_func_lambda_(lambda_, rho, G) - rho = optimize_func_rho(lambda_, rho, G) - score = func(lambda_, rho, G) - return lambda_, rho, G, score - - def optimize_func_lambda_(lambda_, rho, G): - def f(arg): - return func(arg, rho, G) - - res = minimize_scalar( - f, bounds=(_kf_min1, 100), method=method, options={xopt: 0.01} - ) - return res.x - - def optimize_func_rho(lambda_, rho, G): - def f(arg): - return func(lambda_, arg, G) - - res = minimize_scalar( - f, bounds=(1.001, 5), method=method, options={xopt: 0.01} - ) - return res.x - - def optimize_func_Gi(lambda_, rho, G, i): - # Iterate a single G_i - def f(arg): - G[i] = arg - return func(lambda_, rho, G) - - res = minimize_scalar( - f, bounds=(0, MAX_1CN_SIZE), method=method, options={xopt: 100} - ) - return res.x - - def run_optimization(termination=0.999, maxiter=100): - ll, rr, GG = l0, r0, G0 - prev_score = np.inf - for i in range(maxiter): - print("Iteration", i + 1, file=sys.stderr) - ll, rr, GG, score = optimize_func(ll, rr, GG) - if score / prev_score > termination: - break - prev_score = score - if i % 10 == 0: - print(ll, rr, GG, score, file=sys.stderr) - print("Success!", file=sys.stderr) - # Remove bogus values that are close to the bounds - final_GG = [g for g in GG if 1 < g < MAX_OPTIMIZED_SIZE] - return ll, rr, final_GG - - # Optimization - very slow - G0 = np.zeros(len(bins)) - l0 = _kf_max2 - r0 = 1.5 - print(l0, r0, G0, file=sys.stderr) - ll, rr, GG = run_optimization(maxiter=maxiter) - print(ll, rr, GG, file=sys.stderr) - - # Ready for genome summary - m = f"\n==> Kmer (K={K}) Spectrum Analysis\n" - - genome_size = int(round(self.totalKmers / ll)) - inferred_genome_size = 0 - for i, g in enumerate(GG): - start, end = bins[i] - mid = (start + end) / 2 - inferred_genome_size += g * mid * (end - start + 1) - inferred_genome_size = int(round(inferred_genome_size)) - genome_size = max(genome_size, inferred_genome_size) - m += f"Genome size estimate = {thousands(genome_size)}\n" - copy_series = [] - copy_messages = [] - for i, g in enumerate(GG): - start, end = bins[i] - mid = (start + end) / 2 - copy_num = start if start == end else "{}-{}".format(start, end) - g_copies = int(round(g * mid * (end - start + 1))) - copy_series.append((mid, copy_num, g_copies, g)) - copy_message = f"CN {copy_num}: {g_copies / 1e6:.1f} Mb ({ g_copies * 100 / genome_size:.1f} %)" - copy_messages.append(copy_message) - m += copy_message + "\n" - - if genome_size > inferred_genome_size: - g_copies = genome_size - inferred_genome_size - copy_num = "{}+".format(end + 1) - copy_series.append((end + 1, copy_num, g_copies, g_copies / (end + 1))) - m += f"CN {copy_num}: {g_copies / 1e6:.1f} Mb ({ g_copies * 100 / genome_size:.1f} %)\n" - - # Determine ploidy - def determine_ploidy(copy_series, threshold=0.15): - counts_so_far = 1 - ploidy_so_far = 0 - for mid, _, g_copies, _ in copy_series: - if g_copies / counts_so_far < threshold: - break - counts_so_far += g_copies - ploidy_so_far = mid - return int(ploidy_so_far) - - ploidy = determine_ploidy(copy_series) - self.ploidy = ploidy - self.ploidy_message = f"Ploidy: {ploidy}" - m += self.ploidy_message + "\n" - self.copy_messages = copy_messages[:ploidy] - - # Repeat content - def calc_repeats(copy_series, ploidy, genome_size): - unique = 0 - for mid, _, g_copies, _ in copy_series: - if mid <= ploidy: - unique += g_copies - else: - break - return 1 - unique / genome_size - - repeats = calc_repeats(copy_series, ploidy, genome_size) - self.repetitive = f"Repeats: {repeats * 100:.1f} %" - m += self.repetitive + "\n" - - # SNP rate - def calc_snp_rate(copy_series, ploidy, genome_size, K): - # We can calculate the SNP rate s, assuming K-mer of length K: - # s = 1-(1-L/G)^(1/K) - # L: # of unique K-mers under 'het' peak - # G: genome size - # K: K-mer length - L = 0 - for mid, copy_num, g_copies, g in copy_series: - if mid < ploidy: - L += g - else: - break - return 1 - (1 - L / genome_size) ** (1 / K) - - snp_rate = calc_snp_rate(copy_series, ploidy, genome_size, K) - self.snprate = f"SNP rate: {snp_rate * 100:.2f} %" - m += self.snprate + "\n" - print(m, file=sys.stderr) - - self.lambda_ = ll - return { - "generative_model": generative_model, - "Gbins": GG, - "lambda": ll, - "rho": rr, - "kf_range": kf_range, - } - - def analyze_allpaths(self, ploidy=2, K=23, covmax=1000000): - """ - Analyze Kmer spectrum, calculations derived from - allpathslg/src/kmers/KmerSpectra.cc - """ - from math import sqrt - - data = self.data - kf_ceil = max(K for (K, c) in data) - if kf_ceil > covmax: - exceeds = sum(1 for (K, c) in data if K > covmax) - logger.debug( - "A total of %d distinct K-mers appear > %d times. Ignored ...", - exceeds, - covmax, - ) - kf_ceil = covmax - - nkf = kf_ceil + 1 - a = [0] * nkf - for kf, c in data: - if kf > kf_ceil: - continue - a[kf] = c - - ndk = a # number of distinct kmers - nk = [k * c for k, c in enumerate(a)] # number of kmers - cndk = [0] * nkf # cumulative number of distinct kmers - cnk = [0] * nkf # cumulative number of kmers - for kf in range(1, nkf): - cndk[kf] = cndk[kf - 1] + 0.5 * (ndk[kf - 1] + ndk[kf]) - cnk[kf] = cnk[kf - 1] + 0.5 * (nk[kf - 1] + nk[kf]) - - # Separate kmer spectrum in 5 regions based on the kf - # 1 ... kf_min1 : bad kmers with low frequency - # kf_min1 ... kf_min2 : good kmers CN = 1/2 (SNPs) - # kf_min2 ... kf_min3 : good kmers CN = 1 - # kf_min3 ... kf_hi : good kmers CN > 1 (repetitive) - # kf_hi ... inf : bad kmers with high frequency - - # min1: find first minimum - _kf_min1 = 10 - while _kf_min1 - 1 >= 2 and nk[_kf_min1 - 1] < nk[_kf_min1]: - _kf_min1 -= 1 - while _kf_min1 <= kf_ceil and nk[_kf_min1 + 1] < nk[_kf_min1]: - _kf_min1 += 1 - - # max2: find absolute maximum mx2 above first minimum min1 - _kf_max2 = _kf_min1 - for kf in range(_kf_min1 + 1, int(0.8 * kf_ceil)): - if nk[kf] > nk[_kf_max2]: - _kf_max2 = kf - - # max2: resetting max2 for cases of very high polymorphism - if ploidy == 2: - ndk_half = ndk[_kf_max2 // 2] - ndk_double = ndk[_kf_max2 * 2] - if ndk_double > ndk_half: - _kf_max2 *= 2 - - # max1: SNPs local maximum max1 as half global maximum max2 - _kf_max1 = _kf_max2 // 2 - - # min2: SNPs local minimum min2 between max1 and max2 - _kf_min2 = ( - _kf_max1 - * (2 * ndk[_kf_max1] + ndk[_kf_max2]) - // (ndk[_kf_max1] + ndk[_kf_max2]) - ) - - # min1: refine between min1 and max2/2 - for kf in range(_kf_min1 + 1, _kf_max1): - if nk[kf] < nk[_kf_min1]: - _kf_min1 = kf - - # min3: not a minimum, really. upper edge of main peak - _kf_min3 = _kf_max2 * 3 // 2 - - print("kfs:", _kf_min1, _kf_max1, _kf_min2, _kf_max2, _kf_min3, file=sys.stderr) - self.min1 = _kf_min1 - self.max1 = _kf_max1 - self.min2 = _kf_min2 - self.max2 = _kf_max2 - self.min3 = _kf_min3 - self.lambda_ = self.max2 # Main peak - - # Define maximum kf above which we neglect data - _kf_hi = ( - _kf_max2 * sqrt(4 * ndk[2 * _kf_max2] * _kf_max2) - if 2 * _kf_max2 < len(ndk) - else _kf_max2 * sqrt(4 * ndk[len(ndk) - 1] * _kf_max2) - ) - _kf_hi = int(_kf_hi) - - if _kf_hi > kf_ceil: - _kf_hi = kf_ceil - - _nk_total = cnk[len(cnk) - 1] - _nk_bad_low_kf = cnk[_kf_min1] - _nk_good_uniq = cnk[_kf_min3] - cnk[_kf_min2] - _nk_bad_high_kf = _nk_total - cnk[_kf_hi] - _ndk_good_snp = cndk[_kf_min2] - cndk[_kf_min1] - _ndk_good_uniq = cndk[_kf_min3] - cndk[_kf_min2] - - # kmer coverage C_k - _kf_ave_uniq = _nk_good_uniq * 1.0 / _ndk_good_uniq - _genome_size = (_nk_total - _nk_bad_low_kf - _nk_bad_high_kf) / _kf_ave_uniq - _genome_size_unique = _ndk_good_uniq + _ndk_good_snp / 2 - _genome_size_repetitive = _genome_size - _genome_size_unique - _coverage = _nk_total / _genome_size if _genome_size else 0 - - # SNP rate estimation, assumes uniform distribution of SNPs over the - # genome and accounts for the reduction in SNP kmer counts when - # polymorphism is very high - if ploidy == 2: - _d_SNP = ( - 1.0 / (1.0 - (1.0 - 0.5 * _ndk_good_snp / _genome_size) ** (1.0 / K)) - if _ndk_good_snp > 0 - else 1000000 - ) - - G = int(_genome_size) - G1 = int(_genome_size_unique) - GR = int(_genome_size_repetitive) - coverage = int(_coverage) - - m = f"Kmer (K={K}) Spectrum Analysis\n" - m += f"Genome size estimate = {thousands(G)}\n" - m += f"Genome size estimate CN = 1 = {thousands(G1)} ({percentage(G1, G)})\n" - m += f"Genome size estimate CN > 1 = {thousands(GR)} ({percentage(GR, G)})\n" - m += f"Coverage estimate: {coverage} x\n" - self.repetitive = f"Repeats: {GR * 100 // G} %" - - if ploidy == 2: - d_SNP = int(_d_SNP) - self.snprate = f"SNP rate ~= 1/{d_SNP}" - else: - self.snprate = f"SNP rate not computed (Ploidy = {ploidy})" - m += self.snprate + "\n" - - self.genomesize = int(round(self.totalKmers * 1.0 / self.max2)) - - print(m, file=sys.stderr) - return {} - - -class KMCComplex(object): - def __init__(self, indices): - self.indices = indices - - def write( - self, - outfile: str, - action: str = "union", - ci_in: int = 0, - ci_out: int = 0, - batch: int = 0, - ): - assert action in ("union", "intersect") - op = " + sum " if action == "union" else " * " - mm = MakeManager() - if batch > 1: - filename = outfile + ".{}.def" - # Divide indices into batches - batches = [] - batchsize = (len(self.indices) + batch - 1) // batch - logger.debug("Use batchsize of %d", batchsize) - for i, indices in enumerate(chunked(self.indices, batchsize)): - filename_i = filename.format(i + 1) - outfile_i = outfile + ".{}".format(i + 1) - self.write_definitions( - filename_i, indices, outfile_i, op, ci_in=ci_in, ci_out=0 - ) - cmd = "kmc_tools complex {}".format(filename_i) - outfile_suf = outfile_i + ".kmc_suf" - mm.add(indices, outfile_suf, cmd) - batches.append(outfile_suf) - else: - batches = self.indices - - # Merge batches into one - filename = outfile + ".def" - self.write_definitions( - filename, batches, outfile, op, ci_in=ci_in, ci_out=ci_out - ) - outfile_suf = outfile + ".kmc_suf" - mm.add(batches, outfile_suf, "kmc_tools complex {}".format(filename)) - - # Write makefile - mm.write() - - def write_definitions( - self, - filename: str, - indices: List[str], - outfile: str, - op: str, - ci_in: int, - ci_out: int, - ): - fw = must_open(filename, "w") - print("INPUT:", file=fw) - ss = [] - pad = len(str(len(indices))) - for i, e in enumerate(indices): - s = "s{0:0{1}d}".format(i + 1, pad) - ss.append(s) - msg = "{} = {}".format(s, e.rsplit(".", 1)[0]) - if ci_in: - msg += f" -ci{ci_in}" - print(msg, file=fw) - print("OUTPUT:", file=fw) - print("{} = {}".format(outfile, op.join(ss)), file=fw) - if ci_out: - print("OUTPUT_PARAMS:", file=fw) - print(f"-ci{ci_out}", file=fw) - fw.close() - - -def main(): - - actions = ( - # K-mer counting - ("jellyfish", "count kmers using `jellyfish`"), - ("meryl", "count kmers using `meryl`"), - ("kmc", "count kmers using `kmc`"), - ("kmcop", "intersect or union kmc indices"), - ("entropy", "calculate entropy for kmers from kmc dump"), - ("bed", "map kmers on FASTA"), - # K-mer histogram - ("histogram", "plot the histogram based on meryl K-mer distribution"), - ("multihistogram", "plot histogram across a set of K-mer sizes"), - # These forms a pipeline to count K-mers for given FASTA seq - ("dump", "convert FASTA sequences to list of K-mers"), - ("bin", "serialize counts to bitarrays"), - ("bincount", "count K-mers in the bin"), - ("count", "run dump - jellyfish - bin - bincount in serial"), - ("logodds", "compute log likelihood between two db"), - ("model", "model kmer distribution given error rate"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def entropy_score(kmer): - """ - Schmieder and Edwards. Quality control and preprocessing of metagenomic datasets. (2011) Bioinformatics - https://academic.oup.com/bioinformatics/article/27/6/863/236283/Quality-control-and-preprocessing-of-metagenomic - """ - l = len(kmer) - 2 - k = l if l < 64 else 64 - counts = defaultdict(int) - for i in range(l): - trinuc = kmer[i : i + 3] - counts[trinuc] += 1 - - logk = math.log(k) - res = 0 - for k, v in counts.items(): - f = v * 1.0 / l - res += f * math.log(f) / logk - return res * -100 - - -def entropy(args): - """ - %prog entropy kmc_dump.out - - kmc_dump.out contains two columns: - AAAAAAAAAAAGAAGAAAGAAA 34 - """ - p = OptionParser(entropy.__doc__) - p.add_argument( - "--threshold", default=0, type=int, help="Complexity needs to be above" - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (kmc_out,) = args - fp = open(kmc_out) - for row in fp: - kmer, count = row.split() - score = entropy_score(kmer) - if score >= opts.threshold: - print(" ".join((kmer, count, "{:.2f}".format(score)))) - - -def bed(args): - """ - %prog bed fastafile kmer.dump.txt - - Map kmers on FASTA. - """ - from jcvi.formats.fasta import rc, parse_fasta - - p = OptionParser(bed.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, dumpfile = args - fp = open(dumpfile) - KMERS = set() - for row in fp: - kmer = row.split()[0] - kmer_rc = rc(kmer) - KMERS.add(kmer) - KMERS.add(kmer_rc) - - K = len(kmer) - logger.debug("Imported %d %d-mers", len(KMERS), K) - - for name, seq in parse_fasta(fastafile): - name = name.split()[0] - for i in range(len(seq) - K): - if i % 5000000 == 0: - print("{}:{}".format(name, i), file=sys.stderr) - kmer = seq[i : i + K] - if kmer in KMERS: - print("\t".join(str(x) for x in (name, i, i + K, kmer))) - - -def kmcop(args): - """ - %prog kmcop *.kmc_suf - - Intersect or union kmc indices. - """ - p = OptionParser(kmcop.__doc__) - p.add_argument( - "--action", - choices=("union", "intersect", "reduce"), - default="union", - help="Action", - ) - p.add_argument( - "--ci_in", - default=0, - type=int, - help="Exclude input kmers with less than ci_in counts", - ) - p.add_argument( - "--cs", - default=0, - type=int, - help="Maximal value of a counter, only used when action is reduce", - ) - p.add_argument( - "--ci_out", - default=0, - type=int, - help="Exclude output kmers with less than ci_out counts", - ) - p.add_argument( - "--batch", - default=1, - type=int, - help="Number of batch, useful to reduce memory usage", - ) - p.add_argument("--exclude", help="Exclude accessions from this list") - p.add_argument("-o", default="results", help="Output name") - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - indices = args - if opts.exclude: - before = set(indices) - exclude_ids = set(x.strip() for x in open(opts.exclude)) - indices = [x for x in indices if x.rsplit(".", 2)[0] not in exclude_ids] - after = set(indices) - if before > after: - logger.debug( - "Excluded accessions %d → %d (%s)", - len(before), - len(after), - ",".join(before - after), - ) - if opts.action == "reduce": - mm = MakeManager() - ci = opts.ci_in - cs = opts.cs - suf = "" - if ci: - suf += f"_ci{ci}" - if cs: - suf += f"_cs{cs}" - for index in indices: - idx = index.rsplit(".", 1)[0] - reduced_idx = idx + suf - cmd = f"kmc_tools transform {idx} reduce {reduced_idx}" - if ci: - cmd += f" -ci{ci}" - if cs: - cmd += f" -cs{cs}" - reduced_index = reduced_idx + ".kmc_suf" - mm.add(index, reduced_index, cmd) - mm.write() - else: - ku = KMCComplex(indices) - ku.write( - opts.o, - action=opts.action, - ci_in=opts.ci_in, - ci_out=opts.ci_out, - batch=opts.batch, - ) - - -def kmc(args): - """ - %prog kmc folder - - Run kmc3 on Illumina reads. - """ - p = OptionParser(kmc.__doc__) - p.add_argument("-k", default=27, type=int, help="Kmer size") - p.add_argument( - "--ci", default=2, type=int, help="Exclude kmers with less than ci counts" - ) - p.add_argument("--cs", default=0, type=int, help="Maximal value of a counter") - p.add_argument("--cx", type=int, help="Exclude kmers with more than cx counts") - p.add_argument( - "--single", - default=False, - action="store_true", - help="Input is single-end data, only one FASTQ/FASTA", - ) - p.add_argument( - "--fasta", - default=False, - action="store_true", - help="Input is FASTA instead of FASTQ", - ) - p.add_argument( - "--mem", default=48, type=int, help="Max amount of RAM in GB (`kmc -m`)" - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (folder,) = args - K = opts.k - n = 1 if opts.single else 2 - pattern = ( - "*.fa,*.fa.gz,*.fasta,*.fasta.gz" - if opts.fasta - else "*.fq,*.fq.gz,*.fastq,*.fastq.gz" - ) - - mm = MakeManager() - for p, pf in iter_project(folder, pattern=pattern, n=n, commonprefix=False): - pf = pf.split("_")[0] + ".ms{}".format(K) - infiles = pf + ".infiles" - fw = open(infiles, "w") - print("\n".join(p), file=fw) - fw.close() - - cmd = "kmc -k{} -m{} -t{}".format(K, opts.mem, opts.cpus) - cmd += " -ci{}".format(opts.ci) - if opts.cs: - cmd += " -cs{}".format(opts.cs) - if opts.cx: - cmd += " -cx{}".format(opts.cx) - if opts.fasta: - cmd += " -fm" - cmd += " @{} {} .".format(infiles, pf) - outfile = pf + ".kmc_suf" - mm.add(p, outfile, cmd) - - mm.write() - - -def meryl(args): - """ - %prog meryl folder - - Run meryl on Illumina reads. - """ - p = OptionParser(meryl.__doc__) - p.add_argument("-k", default=19, type=int, help="Kmer size") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (folder,) = args - K = opts.k - cpus = opts.cpus - mm = MakeManager() - for p, pf in iter_project(folder): - cmds = [] - mss = [] - for i, ip in enumerate(p): - ms = "{}{}.ms{}".format(pf, i + 1, K) - mss.append(ms) - cmd = "meryl -B -C -m {} -threads {}".format(K, cpus) - cmd += " -s {} -o {}".format(ip, ms) - cmds.append(cmd) - ams, bms = mss - pms = "{}.ms{}".format(pf, K) - cmd = "meryl -M add -s {} -s {} -o {}".format(ams, bms, pms) - cmds.append(cmd) - cmd = "rm -f {}.mcdat {}.mcidx {}.mcdat {}.mcidx".format(ams, ams, bms, bms) - cmds.append(cmd) - mm.add(p, pms + ".mcdat", cmds) - - mm.write() - - -def model(args): - """ - %prog model erate - - Model kmer distribution given error rate. See derivation in FIONA paper: - - """ - from scipy.stats import binom, poisson - - p = OptionParser(model.__doc__) - p.add_argument("-k", default=23, type=int, help="Kmer size") - p.add_argument("--cov", default=50, type=int, help="Expected coverage") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (erate,) = args - erate = float(erate) - cov = opts.cov - k = opts.k - - xy = [] - # Range include c although it is unclear what it means to have c=0 - for c in range(0, cov * 2 + 1): - Prob_Yk = 0 - for i in range(k + 1): - # Probability of having exactly i errors - pi_i = binom.pmf(i, k, erate) - # Expected coverage of kmer with exactly i errors - mu_i = cov * (erate / 3) ** i * (1 - erate) ** (k - i) - # Probability of seeing coverage of c - Prob_Yk_i = poisson.pmf(c, mu_i) - # Sum i over 0, 1, ... up to k errors - Prob_Yk += pi_i * Prob_Yk_i - xy.append((c, Prob_Yk)) - - x, y = zip(*xy) - asciiplot(x, y, title="Model") - - -def logodds(args): - """ - %prog logodds cnt1 cnt2 - - Compute log likelihood between two db. - """ - from math import log - from jcvi.formats.base import DictFile - - p = OptionParser(logodds.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - cnt1, cnt2 = args - d = DictFile(cnt2) - fp = open(cnt1) - for row in fp: - scf, c1 = row.split() - c2 = d[scf] - c1, c2 = float(c1), float(c2) - c1 += 1 - c2 += 1 - score = int(100 * (log(c1) - log(c2))) - print("{0}\t{1}".format(scf, score)) - - -def get_K(jfdb): - """ - Infer K from jellyfish db. - """ - j = jfdb.rsplit("_", 1)[0].rsplit("-", 1)[-1] - assert j[0] == "K" - return int(j[1:]) - - -def count(args): - """ - %prog count fastafile jf.db - - Run dump - jellyfish - bin - bincount in serial. - """ - from bitarray import bitarray - - p = OptionParser(count.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, jfdb = args - K = get_K(jfdb) - cmd = "jellyfish query {0} -C | cut -d' ' -f 2".format(jfdb) - t = must_open("tmp", "w") - proc = Popen(cmd, stdin=PIPE, stdout=t) - t.flush() - - f = Fasta(fastafile, lazy=True) - for name, rec in f.iteritems_ordered(): - kmers = list(make_kmers(rec.seq, K)) - print("\n".join(kmers), file=proc.stdin) - proc.stdin.close() - logger.debug(cmd) - proc.wait() - - a = bitarray() - binfile = ".".join((fastafile, jfdb, "bin")) - fw = open(binfile, "w") - t.seek(0) - for row in t: - c = row.strip() - a.append(int(c)) - a.tofile(fw) - logger.debug("Serialize %d bits to `%s`.", len(a), binfile) - fw.close() - sh("rm {0}".format(t.name)) - - logger.debug( - "Shared K-mers (K=%d) between `%s` and `%s` written to `%s`.", - K, - fastafile, - jfdb, - binfile, - ) - cntfile = ".".join((fastafile, jfdb, "cnt")) - bincount([fastafile, binfile, "-o", cntfile, "-K {0}".format(K)]) - logger.debug("Shared K-mer counts written to `%s`.", cntfile) - - -def bincount(args): - """ - %prog bincount fastafile binfile - - Count K-mers in the bin. - """ - from bitarray import bitarray - from jcvi.formats.sizes import Sizes - - p = OptionParser(bincount.__doc__) - p.add_argument("-K", default=23, type=int, help="K-mer size") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, binfile = args - K = opts.K - - fp = open(binfile) - a = bitarray() - a.fromfile(fp) - f = Sizes(fastafile) - tsize = 0 - fw = must_open(opts.outfile, "w") - for name, seqlen in f.iter_sizes(): - ksize = seqlen - K + 1 - b = a[tsize : tsize + ksize] - bcount = b.count() - print("\t".join(str(x) for x in (name, bcount)), file=fw) - tsize += ksize - - -def bin(args): - """ - %prog bin filename filename.bin - - Serialize counts to bitarrays. - """ - from bitarray import bitarray - - p = OptionParser(bin.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - inp, outp = args - fp = must_open(inp) - fw = must_open(outp, "w") - a = bitarray() - for row in fp: - c = row.split()[-1] - a.append(int(c)) - a.tofile(fw) - fw.close() - - -def make_kmers(seq, K): - seq = str(seq).upper().replace("N", "A") - seqlen = len(seq) - for i in range(seqlen - K + 1): - yield seq[i : i + K] - - -def dump(args): - """ - %prog dump fastafile - - Convert FASTA sequences to list of K-mers. - """ - p = OptionParser(dump.__doc__) - p.add_argument("-K", default=23, type=int, help="K-mer size") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - K = opts.K - fw = must_open(opts.outfile, "w") - f = Fasta(fastafile, lazy=True) - for name, rec in f.iteritems_ordered(): - kmers = list(make_kmers(rec.seq, K)) - print("\n".join(kmers), file=fw) - fw.close() - - -def jellyfish(args): - """ - %prog jellyfish [*.fastq|*.fasta] - - Run jellyfish to dump histogram to be used in kmer.histogram(). - """ - from jcvi.apps.base import getfilesize - from jcvi.utils.cbook import human_size - - p = OptionParser(jellyfish.__doc__) - p.add_argument("-K", default=23, type=int, help="K-mer size") - p.add_argument( - "--coverage", - default=40, - type=int, - help="Expected sequence coverage", - ) - p.add_argument("--prefix", default="jf", help="Database prefix") - p.add_argument( - "--nohist", - default=False, - action="store_true", - help="Do not print histogram", - ) - p.set_home("jellyfish") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - fastqfiles = args - K = opts.K - coverage = opts.coverage - - totalfilesize = sum(getfilesize(x) for x in fastqfiles) - fq = fastqfiles[0] - pf = opts.prefix - gzip = fq.endswith(".gz") - - hashsize = totalfilesize / coverage - logger.debug( - "Total file size: %s, hashsize (-s): %d", - human_size(totalfilesize, a_kilobyte_is_1024_bytes=True), - hashsize, - ) - - jfpf = "{0}-K{1}".format(pf, K) - jfdb = jfpf - fastqfiles = " ".join(fastqfiles) - - jfcmd = op.join(opts.jellyfish_home, "jellyfish") - cmd = jfcmd - cmd += " count -t {0} -C -o {1}".format(opts.cpus, jfpf) - cmd += " -s {0} -m {1}".format(hashsize, K) - if gzip: - cmd = "gzip -dc {0} | ".format(fastqfiles) + cmd + " /dev/fd/0" - else: - cmd += " " + fastqfiles - - if need_update(fastqfiles, jfdb): - sh(cmd) - - if opts.nohist: - return - - jfhisto = jfpf + ".histogram" - cmd = jfcmd + " histo -t 64 {0} -o {1}".format(jfdb, jfhisto) - - if need_update(jfdb, jfhisto): - sh(cmd) - - -def multihistogram(args): - """ - %prog multihistogram *.histogram species - - Plot the histogram based on a set of K-mer hisotograms. The method is based - on Star et al.'s method (Atlantic Cod genome paper). - """ - p = OptionParser(multihistogram.__doc__) - p.add_argument("--kmin", default=15, type=int, help="Minimum K-mer size, inclusive") - p.add_argument("--kmax", default=30, type=int, help="Maximum K-mer size, inclusive") - p.add_argument("--vmin", default=2, type=int, help="Minimum value, inclusive") - p.add_argument("--vmax", default=100, type=int, help="Maximum value, inclusive") - opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300) - - if len(args) < 1: - sys.exit(not p.print_help()) - - histfiles = args[:-1] - species = args[-1] - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - A = fig.add_axes((0.08, 0.12, 0.38, 0.76)) - B = fig.add_axes((0.58, 0.12, 0.38, 0.76)) - - lines = [] - legends = [] - genomesizes = [] - for histfile in histfiles: - ks = KmerSpectrum(histfile) - x, y = ks.get_xy(opts.vmin, opts.vmax) - K = get_number(op.basename(histfile).split(".")[0].split("-")[-1]) - if not opts.kmin <= K <= opts.kmax: - continue - - (line,) = A.plot(x, y, "-", lw=1) - lines.append(line) - legends.append("K = {0}".format(K)) - ks.analyze(K=K, method="allpaths") - genomesizes.append((K, ks.genomesize / 1e6)) - - leg = A.legend(lines, legends, shadow=True, fancybox=True) - leg.get_frame().set_alpha(0.5) - - title = "{0} genome K-mer histogram".format(species) - A.set_title(markup(title)) - xlabel, ylabel = "Coverage (X)", "Counts" - A.set_xlabel(xlabel) - A.set_ylabel(ylabel) - set_human_axis(A) - - title = "{0} genome size estimate".format(species) - B.set_title(markup(title)) - x, y = zip(*genomesizes) - B.plot(x, y, "ko", mfc="w") - t = np.linspace(opts.kmin - 0.5, opts.kmax + 0.5, 100) - p = np.poly1d(np.polyfit(x, y, 2)) - B.plot(t, p(t), "r:") - - xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)" - B.set_xlabel(xlabel) - B.set_ylabel(ylabel) - set_ticklabels_helvetica(B) - - labels = ((0.04, 0.96, "A"), (0.54, 0.96, "B")) - panel_labels(root, labels) - - normalize_axes(root) - imagename = species + ".multiK.pdf" - savefig(imagename, dpi=iopts.dpi, iopts=iopts) - - -def plot_nbinom_fit(ax, ks: KmerSpectrum, ymax: float, method_info: dict): - """ - Plot the negative binomial fit. - """ - generative_model = method_info["generative_model"] - GG = method_info["Gbins"] - ll = method_info["lambda"] - rr = method_info["rho"] - kf_range = method_info["kf_range"] - stacked = generative_model(GG, ll, rr) - ax.plot( - kf_range, - stacked, - ":", - color="#6a3d9a", - lw=2, - ) - # Plot multiple CN locations, CN1, CN2, ... up to ploidy - cn_color = "#a6cee3" - for i in range(1, ks.ploidy + 1): - x = i * ks.lambda_ - ax.plot((x, x), (0, ymax), "-.", color=cn_color) - ax.text( - x, - ymax * 0.95, - f"CN{i}", - ha="right", - va="center", - color=cn_color, - rotation=90, - ) - - -def draw_ks_histogram( - ax, - histfile: str, - method: str, - coverage: int, - vmin: int, - vmax: int, - species: str, - K: int, - maxiter: int, - peaks: bool, -) -> int: - """ - Draw the K-mer histogram. - """ - ks = KmerSpectrum(histfile) - method_info = ks.analyze(K=K, maxiter=maxiter, method=method) - - Total_Kmers = int(ks.totalKmers) - Kmer_coverage = ks.lambda_ if not coverage else coverage - Genome_size = int(round(Total_Kmers * 1.0 / Kmer_coverage)) - - Total_Kmers_msg = f"Total {K}-mers: {thousands(Total_Kmers)}" - Kmer_coverage_msg = f"{K}-mer coverage: {Kmer_coverage:.1f}x" - Genome_size_msg = f"Estimated genome size: {Genome_size / 1e6:.1f} Mb" - Repetitive_msg = ks.repetitive - SNPrate_msg = ks.snprate - - messages = [ - Total_Kmers_msg, - Kmer_coverage_msg, - Genome_size_msg, - Repetitive_msg, - SNPrate_msg, - ] - for msg in messages: - print(msg, file=sys.stderr) - - x, y = ks.get_xy(vmin, vmax) - title = f"{species} {K}-mer histogram" - - ax.bar(x, y, fc="#b2df8a", lw=0) - - if peaks: # Only works for method 'allpaths' - t = (ks.min1, ks.max1, ks.min2, ks.max2, ks.min3) - tcounts = [(x, y) for x, y in ks.counts if x in t] - if tcounts: - x, y = zip(*tcounts) - tcounts = dict(tcounts) - ax.plot(x, y, "ko", lw=3, mec="k", mfc="w") - ax.text(ks.max1, tcounts[ks.max1], "SNP peak") - ax.text(ks.max2, tcounts[ks.max2], "Main peak") - - _, ymax = ax.get_ylim() - ymax *= 7 / 6 - # Plot the negative binomial fit - if method == "nbinom": - plot_nbinom_fit(ax, ks, ymax, method_info) - messages += [ks.ploidy_message] + ks.copy_messages - - write_messages(ax, messages) - - ax.set_title(markup(title)) - ax.set_xlim((0, vmax)) - ax.set_ylim((0, ymax)) - adjust_spines(ax, ["left", "bottom"], outward=True) - xlabel, ylabel = "Coverage (X)", "Counts" - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - set_human_axis(ax) - - return Genome_size - - -def histogram(args): - """ - %prog histogram meryl.histogram species K - - Plot the histogram based on Jellyfish or meryl K-mer distribution, species and N are - only used to annotate the graphic. - """ - p = OptionParser(histogram.__doc__) - p.add_argument( - "--vmin", - dest="vmin", - default=2, - type=int, - help="minimum value, inclusive", - ) - p.add_argument( - "--vmax", - dest="vmax", - default=200, - type=int, - help="maximum value, inclusive", - ) - p.add_argument( - "--method", - choices=("nbinom", "allpaths"), - default="nbinom", - help="'nbinom' - slow but more accurate for het or polyploid genome; " - + "'allpaths' - fast and works for homozygous enomes", - ) - p.add_argument( - "--maxiter", - default=100, - type=int, - help="Max iterations for optimization. Only used with --method nbinom", - ) - p.add_argument( - "--coverage", default=0, type=int, help="Kmer coverage [default: auto]" - ) - p.add_argument( - "--nopeaks", - default=False, - action="store_true", - help="Do not annotate K-mer peaks", - ) - opts, args, iopts = p.set_image_options(args, figsize="7x7") - - if len(args) != 3: - sys.exit(not p.print_help()) - - histfile, species, N = args - method = opts.method - vmin, vmax = opts.vmin, opts.vmax - peaks = not opts.nopeaks and method == "allpaths" - N = int(N) - - fig = plt.figure(1, (iopts.w, iopts.h)) - ax = fig.add_axes((0.1, 0.1, 0.8, 0.8)) - - Genome_size = draw_ks_histogram( - ax, histfile, method, opts.coverage, vmin, vmax, species, N, opts.maxiter, peaks - ) - - imagename = histfile.split(".")[0] + "." + iopts.format - savefig(imagename, dpi=100) - - return Genome_size - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/opticalmap.py b/jcvi/assembly/opticalmap.py deleted file mode 100644 index 88dabb55..00000000 --- a/jcvi/assembly/opticalmap.py +++ /dev/null @@ -1,427 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Optical map alignment parser. -""" -import sys - -from collections import defaultdict -from xml.etree.ElementTree import ElementTree - -import numpy as np -from more_itertools import pairwise - -from ..apps.base import ActionDispatcher, OptionParser, logger -from ..formats.base import must_open -from ..formats.bed import Bed -from ..utils.range import range_chain, range_parse, Range - - -class OpticalMap(object): - def __init__(self, xmlfile): - tree = ElementTree() - self.root = tree.parse(xmlfile) - self.maps = dict(self.iter_maps()) - self.alignments = [] - - for ref, aligned, e in self.iter_alignments(): - aligned_map = self.maps[aligned] - nfrags = aligned_map.num_frags - if e.orientation == "-": - e.alignment = [(nfrags - i - 1, l, r) for (i, l, r) in e.alignment] - self.alignments.append(e) - - def iter_maps(self): - for e in self.root.findall("restriction_map"): - e = RestrictionMap(e) - yield e.name, e - - def iter_alignments(self): - for e in self.root.findall("map_alignment"): - e = MapAlignment(e) - yield e.reference_map_name, e.aligned_map_name, e - - def write_bed( - self, bedfile="stdout", point=False, scale=None, blockonly=False, switch=False - ): - fw = must_open(bedfile, "w") - # when switching ref_map and aligned_map elements, disable `blockOnly` - if switch: - blockonly = False - for a in self.alignments: - reference_map_name = a.reference_map_name - aligned_map_name = a.aligned_map_name - - ref_map = self.maps[reference_map_name] - aligned_map = self.maps[aligned_map_name] - - ref_blocks = ref_map.cumsizes - aligned_blocks = aligned_map.cumsizes - - score = a.soma_score - score = "{0:.1f}".format(score) - orientation = a.orientation - - endpoints = [] - ref_endpoints = [] - for i, l, r in a.alignment: - start = 0 if i == 0 else (aligned_blocks[i - 1] - 1) - end = aligned_blocks[i] - 1 - endpoints.extend([start, end]) - - ref_start = ref_blocks[l - 1] - 1 - ref_end = ref_blocks[r] - 1 - ref_endpoints.extend([ref_start, ref_end]) - - if switch: - if scale: - ref_start /= scale - ref_end /= scale - accn = "{0}:{1}-{2}".format(reference_map_name, ref_start, ref_end) - else: - if scale: - start /= scale - end /= scale - accn = "{0}:{1}-{2}".format(aligned_map_name, start, end) - - if point: - accn = accn.rsplit("-")[0] - - if not blockonly: - bed_elems = ( - [ - reference_map_name, - ref_start, - ref_end, - accn, - score, - orientation, - ] - if not switch - else [aligned_map_name, start, end, accn, score, orientation] - ) - print("\t".join(str(x) for x in bed_elems), file=fw) - - if blockonly: - start, end = min(endpoints), max(endpoints) - accn = "{0}:{1}-{2}".format(aligned_map_name, start, end) - - start, end = min(ref_endpoints), max(ref_endpoints) - print( - "\t".join( - str(x) - for x in ( - reference_map_name, - start, - end, - accn, - score, - orientation, - ) - ), - file=fw, - ) - - -class RestrictionMap(object): - def __init__(self, node): - num_frags = node.find("num_frags").text - map_blocks = node.find("map_block").text - - num_frags = int(num_frags) - - self.name = node.find("name").text - self.num_frags = num_frags - self.map_blocks = [int(round(float(x) * 1000)) for x in map_blocks.split()] - - assert len(self.map_blocks) == self.num_frags - - @property - def cumsizes(self): - return np.cumsum(self.map_blocks) - - -class MapAlignment(object): - def __init__(self, node): - reference_map = node.find("reference_map") - reference_map_name = reference_map.find("name").text - - aligned_map = node.find("aligned_map") - aligned_map_name = aligned_map.find("name").text - aligned_map_orientation = aligned_map.find("orientation").text - - assert aligned_map_orientation in ("N", "R") - self.orientation = "-" if aligned_map_orientation == "R" else "+" - - soma_score = node.find("soma_score").text - count = node.find("count").text - - soma_score = float(soma_score) - count = int(count) - - self.reference_map_name = reference_map_name - self.aligned_map_name = aligned_map_name - self.aligned_map_orientation = aligned_map_orientation - - self.soma_score = soma_score - self.alignment = [] - - for f in node.findall("f"): - i = f.find("i").text - l = f.find("l").text - r = f.find("r").text - i, l, r = [int(x) for x in (i, l, r)] - self.alignment.append((i, l, r)) - - -def main(): - - actions = ( - ("bed", "convert xml format into bed format"), - ("condense", "condense split alignments in om bed"), - ("fasta", "use the OM bed to scaffold and create pseudomolecules"), - ("chimera", "scan the bed file to break scaffolds that multi-maps"), - ("silicosoma", "convert .silico to .soma"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def silicosoma(args): - """ - %prog silicosoma in.silico > out.soma - - Convert .silico to .soma file. - - Format of .silico - A text file containing in-silico digested contigs. This file contains pairs - of lines. The first line in each pair constains an identifier, this contig - length in bp, and the number of restriction sites, separated by white space. - The second line contains a white space delimited list of the restriction - site positions. - - Format of .soma - Each line of the text file contains two decimal numbers: The size of the - fragment and the standard deviation (both in kb), separated by white space. - The standard deviation is ignored. - """ - p = OptionParser(silicosoma.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (silicofile,) = args - fp = must_open(silicofile) - fw = must_open(opts.outfile, "w") - next(fp) - positions = [int(x) for x in next(fp).split()] - for a, b in pairwise(positions): - assert a <= b - fragsize = int(round((b - a) / 1000.0)) # kb - if fragsize: - print(fragsize, 0, file=fw) - - -def condense(args): - """ - %prog condense OM.bed - - Merge split alignments in OM bed. - """ - from itertools import groupby - from jcvi.assembly.patch import merge_ranges - - p = OptionParser(condense.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - bed = Bed(bedfile, sorted=False) - key = lambda x: (x.seqid, x.start, x.end) - for k, sb in groupby(bed, key=key): - sb = list(sb) - b = sb[0] - chr, start, end, strand = merge_ranges(sb) - - id = "{0}:{1}-{2}".format(chr, start, end) - b.accn = id - print(b) - - -def chimera(args): - """ - %prog chimera bedfile - - Scan the bed file to break scaffolds that multi-maps. - """ - p = OptionParser(chimera.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - bed = Bed(bedfile) - selected = select_bed(bed) - mapped = defaultdict(set) # scaffold => chr - chimerabed = "chimera.bed" - fw = open(chimerabed, "w") - for b in selected: - scf = range_parse(b.accn).seqid - chr = b.seqid - mapped[scf].add(chr) - - nchimera = 0 - for s, chrs in sorted(mapped.items()): - if len(chrs) == 1: - continue - - print("=" * 80, file=sys.stderr) - print( - "{0} mapped to multiple locations: {1}".format(s, ",".join(sorted(chrs))), - file=sys.stderr, - ) - ranges = [] - for b in selected: - rr = range_parse(b.accn) - scf = rr.seqid - if scf == s: - print(b, file=sys.stderr) - ranges.append(rr) - - # Identify breakpoints - ranges.sort(key=lambda x: (x.seqid, x.start, x.end)) - for a, b in pairwise(ranges): - seqid = a.seqid - if seqid != b.seqid: - continue - - start, end = a.end, b.start - if start > end: - start, end = end, start - - chimeraline = "\t".join(str(x) for x in (seqid, start, end)) - print(chimeraline, file=fw) - print(chimeraline, file=sys.stderr) - nchimera += 1 - - fw.close() - logger.debug("A total of %d junctions written to `%s`.", nchimera, chimerabed) - - -def select_bed(bed): - """ - Return non-overlapping set of ranges, choosing high scoring blocks over low - scoring alignments when there are conflicts. - """ - ranges = [ - Range(x.seqid, x.start, x.end, float(x.score), i) for i, x in enumerate(bed) - ] - selected, score = range_chain(ranges) - selected = [bed[x.id] for x in selected] - - return selected - - -def fasta(args): - """ - %prog fasta bedfile scf.fasta pseudomolecules.fasta - - Use OM bed to scaffold and create pseudomolecules. bedfile can be generated - by running jcvi.assembly.opticalmap bed --blockonly - """ - from jcvi.formats.sizes import Sizes - from jcvi.formats.agp import OO, build - - p = OptionParser(fasta.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - bedfile, scffasta, pmolfasta = args - pf = bedfile.rsplit(".", 1)[0] - bed = Bed(bedfile) - selected = select_bed(bed) - oo = OO() - seen = set() - sizes = Sizes(scffasta).mapping - agpfile = pf + ".agp" - agp = open(agpfile, "w") - for b in selected: - scf = range_parse(b.accn).seqid - chr = b.seqid - cs = (chr, scf) - if cs not in seen: - oo.add(chr, scf, sizes[scf], b.strand) - seen.add(cs) - else: - logger.debug("Seen %s, ignored.", cs) - - oo.write_AGP(agp, gaptype="contig") - agp.close() - build([agpfile, scffasta, pmolfasta]) - - -def bed(args): - """ - %prog bed xmlfile - - Print summary of optical map alignment in BED format. - """ - from jcvi.formats.bed import sort - - p = OptionParser(bed.__doc__) - p.add_argument( - "--blockonly", - default=False, - action="store_true", - help="Only print out large blocks, not fragments", - ) - p.add_argument( - "--point", - default=False, - action="store_true", - help="Print accesssion as single point instead of interval", - ) - p.add_argument("--scale", type=float, help="Scale the OM distance by factor") - p.add_argument( - "--switch", - default=False, - action="store_true", - help="Switch reference and aligned map elements", - ) - p.add_argument( - "--nosort", - default=False, - action="store_true", - help="Do not sort bed", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (xmlfile,) = args - bedfile = xmlfile.rsplit(".", 1)[0] + ".bed" - - om = OpticalMap(xmlfile) - om.write_bed( - bedfile, - point=opts.point, - scale=opts.scale, - blockonly=opts.blockonly, - switch=opts.switch, - ) - - if not opts.nosort: - sort([bedfile, "--inplace"]) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/patch.py b/jcvi/assembly/patch.py deleted file mode 100644 index e3102f52..00000000 --- a/jcvi/assembly/patch.py +++ /dev/null @@ -1,968 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Patch the sequences of one assembly using sequences from another assembly. This -is tested on merging the medicago WGS assembly with the clone-by-clone assembly. - -There are a few techniques, used in curating medicago assembly. - -1. Split chimeric scaffolds based on genetic map and then refine breakpoints -2. Create patchers by mix-and-max guided by optical map -3. Find gaps and fill N's using alternative assembly -4. Add telomeric sequences -5. Find gaps in optical map -6. Insert unplaced scaffolds using mates -""" -import os.path as op -import sys -import math - -from collections import defaultdict -from itertools import groupby -from more_itertools import pairwise, roundrobin - -from ..apps.base import ActionDispatcher, OptionParser, cleanup, logger, sh -from ..formats.base import FileMerger -from ..formats.bed import ( - Bed, - BedLine, - complementBed, - fastaFromBed, - mergeBed, - summary, -) -from ..formats.blast import BlastSlow -from ..formats.sizes import Sizes -from ..utils.range import ( - range_closest, - range_distance, - range_interleave, - range_merge, - range_minmax, - range_parse, -) - - -def main(): - - actions = ( - # OM guided approach - ("refine", "find gaps within or near breakpoint regions"), - ("patcher", "given om alignment, prepare the patchers"), - # Gap filling through sequence matching - ("fill", "perform gap filling using one assembly vs the other"), - ("install", "install patches into backbone"), - # Placement through mates and manual insertions and deletions - ("bambus", "find candidate scaffolds to insert based on mates"), - ("insert", "insert scaffolds into assembly"), - ("eject", "eject scaffolds from assembly"), - ("closest", "find the nearest gaps flanking suggested regions"), - # Misc - ("tips", "append telomeric sequences based on patchers and complements"), - ("gaps", "create patches around OM gaps"), - # Touch-up - ("pasteprepare", "prepare sequences for paste"), - ("paste", "paste in good sequences in the final assembly"), - ("pastegenes", "paste in zero or low coverage genes"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def pastegenes(args): - """ - %prog pastegenes coverage.list old.genes.bed new.genes.bed old.assembly - - Paste in zero or low coverage genes. For a set of neighboring genes - missing, add the whole cassette as unplaced scaffolds. For singletons the - program will try to make a patch. - """ - from jcvi.formats.base import DictFile - from jcvi.utils.cbook import gene_name - - p = OptionParser(pastegenes.__doc__) - p.add_argument( - "--cutoff", - default=90, - type=int, - help="Coverage cutoff to call gene missing", - ) - p.add_argument( - "--flank", - default=2000, - type=int, - help="Get the seq of size on two ends", - ) - p.add_argument( - "--maxsize", - default=50000, - type=int, - help="Maximum size of patchers to be replaced", - ) - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - coveragefile, oldbed, newbed, oldassembly = args - cutoff = opts.cutoff - flank = opts.flank - maxsize = opts.maxsize - - coverage = DictFile(coveragefile, valuepos=2, cast=float) - - obed = Bed(oldbed) - order = obed.order - bed = [x for x in obed if x.accn in coverage] - key = lambda x: coverage[x.accn] >= cutoff - - extrabed = "extra.bed" - extendbed = "extend.bed" - pastebed = "paste.bed" - - fw = open(extrabed, "w") - fwe = open(extendbed, "w") - fwp = open(pastebed, "w") - fw_ids = open(extendbed + ".ids", "w") - - singletons, large, large_genes = 0, 0, 0 - for chr, chrbed in groupby(bed, key=lambda x: x.seqid): - chrbed = list(chrbed) - for good, beds in groupby(chrbed, key=key): - if good: - continue - - beds = list(beds) - blocksize = len(set([gene_name(x.accn) for x in beds])) - if blocksize == 1: - singletons += 1 - accn = beds[0].accn - gi, gb = order[accn] - leftb = obed[gi - 1] - rightb = obed[gi + 1] - leftr = leftb.range - rightr = rightb.range - cur = gb.range - distance_to_left, oo = range_distance(leftr, cur) - distance_to_right, oo = range_distance(cur, rightr) - span, oo = range_distance(leftr, rightr) - - label = "LEFT" if 0 < distance_to_left <= distance_to_right else "RIGHT" - - if 0 < span <= maxsize: - print( - "\t".join( - str(x) for x in (chr, leftb.start, rightb.end, gb.accn) - ), - file=fwp, - ) - - print(leftb, file=fwe) - print(gb, file=fwe) - print(rightb, file=fwe) - print( - "L:{0} R:{1} [{2}]".format( - distance_to_left, distance_to_right, label - ), - file=fwe, - ) - print(gb.accn, file=fw_ids) - continue - - large += 1 - large_genes += blocksize - - ranges = [(x.start, x.end) for x in beds] - rmin, rmax = range_minmax(ranges) - rmin -= flank - rmax += flank - - name = "-".join((beds[0].accn, beds[-1].accn)) - print("\t".join(str(x) for x in (chr, rmin - 1, rmax, name)), file=fw) - - fw.close() - fwe.close() - - extrabed = mergeBed(extrabed, d=flank, nms=True) - fastaFromBed(extrabed, oldassembly, name=True) - summary([extrabed]) - - logger.debug("Singleton blocks : {0}".format(singletons)) - logger.debug("Large blocks : {0} ({1} genes)".format(large, large_genes)) - - -def pasteprepare(args): - """ - %prog pasteprepare bacs.fasta - - Prepare sequences for paste. - """ - p = OptionParser(pasteprepare.__doc__) - p.add_argument( - "--flank", - default=5000, - type=int, - help="Get the seq of size on two ends", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (goodfasta,) = args - flank = opts.flank - pf = goodfasta.rsplit(".", 1)[0] - extbed = pf + ".ext.bed" - - sizes = Sizes(goodfasta) - fw = open(extbed, "w") - for bac, size in sizes.iter_sizes(): - print("\t".join(str(x) for x in (bac, 0, min(flank, size), bac + "L")), file=fw) - print( - "\t".join(str(x) for x in (bac, max(size - flank, 0), size, bac + "R")), - file=fw, - ) - fw.close() - - fastaFromBed(extbed, goodfasta, name=True) - - -def paste(args): - """ - %prog paste flanks.bed flanks_vs_assembly.blast backbone.fasta - - Paste in good sequences in the final assembly. - """ - from jcvi.formats.bed import uniq - - p = OptionParser(paste.__doc__) - p.add_argument( - "--maxsize", - default=300000, - type=int, - help="Maximum size of patchers to be replaced", - ) - p.add_argument("--prefix", help="Prefix of the new object") - p.set_rclip(rclip=1) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - pbed, blastfile, bbfasta = args - maxsize = opts.maxsize # Max DNA size to replace gap - order = Bed(pbed).order - - beforebed, afterbed = blast_to_twobeds( - blastfile, order, log=True, rclip=opts.rclip, maxsize=maxsize, flipbeds=True - ) - beforebed = uniq([beforebed]) - - afbed = Bed(beforebed) - bfbed = Bed(afterbed) - - shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix) - - -def eject(args): - """ - %prog eject candidates.bed chr.fasta - - Eject scaffolds from assembly, using the range identified by closest(). - """ - p = OptionParser(eject.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - candidates, chrfasta = args - sizesfile = Sizes(chrfasta).filename - cbedfile = complementBed(candidates, sizesfile) - - cbed = Bed(cbedfile) - for b in cbed: - b.accn = b.seqid - b.score = 1000 - b.strand = "+" - - cbed.print_to_file() - - -def closest(args): - """ - %prog closest candidates.bed gaps.bed fastafile - - Identify the nearest gaps flanking suggested regions. - """ - p = OptionParser(closest.__doc__) - p.add_argument( - "--om", - default=False, - action="store_true", - help="The bedfile is OM blocks", - ) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - candidates, gapsbed, fastafile = args - sizes = Sizes(fastafile).mapping - bed = Bed(candidates) - ranges = [] - for b in bed: - r = range_parse(b.accn) if opts.om else b - ranges.append([r.seqid, r.start, r.end]) - - gapsbed = Bed(gapsbed) - granges = [(x.seqid, x.start, x.end) for x in gapsbed] - - ranges = range_merge(ranges) - for r in ranges: - a = range_closest(granges, r) - b = range_closest(granges, r, left=False) - seqid = r[0] - - if a is not None and a[0] != seqid: - a = None - if b is not None and b[0] != seqid: - b = None - - mmin = 1 if a is None else a[1] - mmax = sizes[seqid] if b is None else b[2] - - print("\t".join(str(x) for x in (seqid, mmin - 1, mmax))) - - -def insert(args): - """ - %prog insert candidates.bed gaps.bed chrs.fasta unplaced.fasta - - Insert scaffolds into assembly. - """ - from jcvi.formats.agp import mask, bed - from jcvi.formats.sizes import agp - - p = OptionParser(insert.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - candidates, gapsbed, chrfasta, unplacedfasta = args - refinedbed = refine([candidates, gapsbed]) - sizes = Sizes(unplacedfasta).mapping - cbed = Bed(candidates) - corder = cbed.order - gbed = Bed(gapsbed) - gorder = gbed.order - - gpbed = Bed() - gappositions = {} # (chr, start, end) => gapid - - fp = open(refinedbed) - gap_to_scf = defaultdict(list) - seen = set() - for row in fp: - atoms = row.split() - if len(atoms) <= 6: - continue - unplaced = atoms[3] - strand = atoms[5] - gapid = atoms[9] - if gapid not in seen: - seen.add(gapid) - gi, gb = gorder[gapid] - gpbed.append(gb) - gappositions[(gb.seqid, gb.start, gb.end)] = gapid - gap_to_scf[gapid].append((unplaced, strand)) - - gpbedfile = "candidate.gaps.bed" - gpbed.print_to_file(gpbedfile, sorted=True) - - agpfile = agp([chrfasta]) - maskedagpfile = mask([agpfile, gpbedfile]) - maskedbedfile = maskedagpfile.rsplit(".", 1)[0] + ".bed" - bed([maskedagpfile, "--outfile={0}".format(maskedbedfile)]) - - mbed = Bed(maskedbedfile) - finalbed = Bed() - for b in mbed: - sid = b.seqid - key = (sid, b.start, b.end) - if key not in gappositions: - finalbed.add("{0}\n".format(b)) - continue - - gapid = gappositions[key] - scfs = gap_to_scf[gapid] - - # For scaffolds placed in the same gap, sort according to positions - scfs.sort(key=lambda x: corder[x[0]][1].start + corder[x[0]][1].end) - for scf, strand in scfs: - size = sizes[scf] - finalbed.add("\t".join(str(x) for x in (scf, 0, size, sid, 1000, strand))) - - finalbedfile = "final.bed" - finalbed.print_to_file(finalbedfile) - - # Clean-up - toclean = [gpbedfile, agpfile, maskedagpfile, maskedbedfile] - cleanup(toclean) - - -def gaps(args): - """ - %prog gaps OM.bed fastafile - - Create patches around OM gaps. - """ - from jcvi.formats.bed import uniq - - p = OptionParser(gaps.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - ombed, fastafile = args - ombed = uniq([ombed]) - bed = Bed(ombed) - - for a, b in pairwise(bed): - om_a = (a.seqid, a.start, a.end, "+") - om_b = (b.seqid, b.start, b.end, "+") - ch_a = range_parse(a.accn) - ch_b = range_parse(b.accn) - ch_a = (ch_a.seqid, ch_a.start, ch_a.end, "+") - ch_b = (ch_b.seqid, ch_b.start, ch_b.end, "+") - - om_dist, x = range_distance(om_a, om_b, distmode="ee") - ch_dist, x = range_distance(ch_a, ch_b, distmode="ee") - - if om_dist <= 0 and ch_dist <= 0: - continue - - print(a) - print(b) - print(om_dist, ch_dist) - - -def tips(args): - """ - %prog tips patchers.bed complements.bed original.fasta backbone.fasta - - Append telomeric sequences based on patchers and complements. - """ - p = OptionParser(tips.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - pbedfile, cbedfile, sizesfile, bbfasta = args - - pbed = Bed(pbedfile, sorted=False) - cbed = Bed(cbedfile, sorted=False) - - complements = dict() - for object, beds in groupby(cbed, key=lambda x: x.seqid): - beds = list(beds) - complements[object] = beds - - sizes = Sizes(sizesfile).mapping - bbsizes = Sizes(bbfasta).mapping - tbeds = [] - - for object, beds in groupby(pbed, key=lambda x: x.accn): - beds = list(beds) - startbed, endbed = beds[0], beds[-1] - start_id, end_id = startbed.seqid, endbed.seqid - if startbed.start == 1: - start_id = None - if endbed.end == sizes[end_id]: - end_id = None - print(object, start_id, end_id, file=sys.stderr) - if start_id: - b = complements[start_id][0] - b.accn = object - tbeds.append(b) - tbeds.append( - BedLine( - "\t".join( - str(x) for x in (object, 0, bbsizes[object], object, 1000, "+") - ) - ) - ) - if end_id: - b = complements[end_id][-1] - b.accn = object - tbeds.append(b) - - tbed = Bed() - tbed.extend(tbeds) - - tbedfile = "tips.bed" - tbed.print_to_file(tbedfile) - - -def fill(args): - """ - %prog fill gaps.bed bad.fasta - - Perform gap filling of one assembly (bad) using sequences from another. - """ - p = OptionParser(fill.__doc__) - p.add_argument( - "--extend", - default=2000, - type=int, - help="Extend seq flanking the gaps", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gapsbed, badfasta = args - Ext = opts.extend - - gapdist = 2 * Ext + 1 # This is to prevent to replacement ranges intersect - gapsbed = mergeBed(gapsbed, d=gapdist, nms=True) - - bed = Bed(gapsbed) - sizes = Sizes(badfasta).mapping - pf = gapsbed.rsplit(".", 1)[0] - extbed = pf + ".ext.bed" - fw = open(extbed, "w") - for b in bed: - gapname = b.accn - start, end = max(0, b.start - Ext - 1), b.start - 1 - print("\t".join(str(x) for x in (b.seqid, start, end, gapname + "L")), file=fw) - start, end = b.end, min(sizes[b.seqid], b.end + Ext) - print("\t".join(str(x) for x in (b.seqid, start, end, gapname + "R")), file=fw) - fw.close() - - fastaFromBed(extbed, badfasta, name=True) - - -def blast_to_twobeds( - blastfile, order, log=False, rclip=1, maxsize=300000, flipbeds=False -): - - abed, bbed = "before.bed", "after.bed" - beforebed, afterbed = abed, bbed - if flipbeds: - beforebed, afterbed = afterbed, beforebed - - fwa = open(beforebed, "w") - fwb = open(afterbed, "w") - if log: - logfile = "problems.log" - log = open(logfile, "w") - - key1 = lambda x: x.query - key2 = lambda x: x.query[:-rclip] if rclip else key1 - data = BlastSlow(blastfile) - OK = "OK" - - seen = set() - for pe, lines in groupby(data, key=key2): - label = OK - lines = list(lines) - if len(lines) != 2: - label = "Singleton" - - else: - a, b = lines - - aquery, bquery = a.query, b.query - asubject, bsubject = a.subject, b.subject - if asubject != bsubject: - label = "Different chr {0}|{1}".format(asubject, bsubject) - - else: - astrand, bstrand = a.orientation, b.orientation - assert aquery[-1] == "L" and bquery[-1] == "R", str((aquery, bquery)) - - ai, ax = order[aquery] - bi, bx = order[bquery] - qstart, qstop = ax.start + a.qstart - 1, bx.start + b.qstop - 1 - - if astrand == "+" and bstrand == "+": - sstart, sstop = a.sstart, b.sstop - - elif astrand == "-" and bstrand == "-": - sstart, sstop = b.sstart, a.sstop - - else: - label = "Strand {0}|{1}".format(astrand, bstrand) - - if sstart > sstop: - label = "Start beyond stop" - - if sstop > sstart + maxsize: - label = "Stop beyond start plus {0}".format(maxsize) - - aquery = lines[0].query - bac_name = aquery[:-1] - seen.add(bac_name) - name = bac_name + "LR" - - if label != OK: - if log: - print("\t".join((name, label)), file=log) - continue - - print( - "\t".join(str(x) for x in (ax.seqid, qstart - 1, qstop, name, 1000, "+")), - file=fwa, - ) - print( - "\t".join( - str(x) for x in (asubject, sstart - 1, sstop, name, 1000, astrand) - ), - file=fwb, - ) - - # Missing - if log: - label = "Missing" - for k in order.keys(): - k = k[:-1] - if k not in seen: - seen.add(k) - k += "LR" - print("\t".join((k, label)), file=log) - log.close() - - fwa.close() - fwb.close() - - return abed, bbed - - -def shuffle_twobeds(afbed, bfbed, bbfasta, prefix=None): - # Shuffle the two bedfiles together - sz = Sizes(bbfasta) - sizes = sz.mapping - shuffled = "shuffled.bed" - border = bfbed.order - - all = [] - afbed.sort(key=afbed.nullkey) - totalids = len(sizes) - pad = int(math.log10(totalids)) + 1 - cj = 0 - seen = set() - accn = lambda x: "{0}{1:0{2}d}".format(prefix, x, pad) - - for seqid, aa in afbed.sub_beds(): - cj += 1 - abeds, bbeds, beds = [], [], [] - size = sizes[seqid] - ranges = [(x.seqid, x.start, x.end) for x in aa] - cranges = range_interleave(ranges, sizes={seqid: size}, empty=True) - for crange in cranges: - if crange: - seqid, start, end = crange - bedline = "\t".join(str(x) for x in (seqid, start - 1, end)) - abeds.append(BedLine(bedline)) - else: - abeds.append(None) - - for a in aa: - gapid = a.accn - bi, b = border[gapid] - if a.strand == "-": - b.extra[1] = b.strand = "-" if b.strand == "+" else "+" - - bbeds.append(b) - - n_abeds = len(abeds) - n_bbeds = len(bbeds) - assert n_abeds - n_bbeds == 1, "abeds: {0}, bbeds: {1}".format(n_abeds, n_bbeds) - - beds = [x for x in roundrobin(abeds, bbeds) if x] - if prefix: - for b in beds: - b.accn = accn(cj) - - all.extend(beds) - seen.add(seqid) - - # Singletons - for seqid, size in sz.iter_sizes(): - if seqid in seen: - continue - - bedline = "\t".join(str(x) for x in (seqid, 0, size, accn(cj))) - b = BedLine(bedline) - - cj += 1 - if prefix: - b.accn = accn(cj) - - all.append(b) - - shuffledbed = Bed() - shuffledbed.extend(all) - shuffledbed.print_to_file(shuffled) - - return shuffledbed - - -def install(args): - """ - %prog install patchers.bed patchers.fasta backbone.fasta alt.fasta - - Install patches into backbone, using sequences from alternative assembly. - The patches sequences are generated via jcvi.assembly.patch.fill(). - - The output is a bedfile that can be converted to AGP using - jcvi.formats.agp.frombed(). - """ - from jcvi.apps.align import blast - from jcvi.formats.fasta import SeqIO - - p = OptionParser(install.__doc__) - p.set_rclip(rclip=1) - p.add_argument( - "--maxsize", - default=300000, - type=int, - help="Maximum size of patchers to be replaced", - ) - p.add_argument("--prefix", help="Prefix of the new object") - p.add_argument( - "--strict", - default=False, - action="store_true", - help="Only update if replacement has no gaps", - ) - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - pbed, pfasta, bbfasta, altfasta = args - maxsize = opts.maxsize # Max DNA size to replace gap - rclip = opts.rclip - - blastfile = blast([altfasta, pfasta, "--wordsize=100", "--pctid=99"]) - order = Bed(pbed).order - beforebed, afterbed = blast_to_twobeds( - blastfile, order, rclip=rclip, maxsize=maxsize - ) - - beforefasta = fastaFromBed(beforebed, bbfasta, name=True, stranded=True) - afterfasta = fastaFromBed(afterbed, altfasta, name=True, stranded=True) - - # Exclude the replacements that contain more Ns than before - ah = SeqIO.parse(beforefasta, "fasta") - bh = SeqIO.parse(afterfasta, "fasta") - count_Ns = lambda x: x.seq.count("n") + x.seq.count("N") - exclude = set() - for arec, brec in zip(ah, bh): - an = count_Ns(arec) - bn = count_Ns(brec) - if opts.strict: - if bn == 0: - continue - - elif bn < an: - continue - - id = arec.id - exclude.add(id) - - logger.debug( - "Ignore {0} updates because of decreasing quality.".format(len(exclude)) - ) - - abed = Bed(beforebed, sorted=False) - bbed = Bed(afterbed, sorted=False) - abed = [x for x in abed if x.accn not in exclude] - bbed = [x for x in bbed if x.accn not in exclude] - - abedfile = "before.filtered.bed" - bbedfile = "after.filtered.bed" - afbed = Bed() - afbed.extend(abed) - bfbed = Bed() - bfbed.extend(bbed) - - afbed.print_to_file(abedfile) - bfbed.print_to_file(bbedfile) - - shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix) - - -def refine(args): - """ - %prog refine breakpoints.bed gaps.bed - - Find gaps within or near breakpoint region. - - For breakpoint regions with no gaps, there are two options: - - Break in the middle of the region - - Break at the closest gap (--closest) - """ - from pybedtools import BedTool - - p = OptionParser(refine.__doc__) - p.add_argument( - "--closest", - default=False, - action="store_true", - help="In case of no gaps, use closest", - ) - p.set_outfile("auto") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - breakpointsbed, gapsbed = args - ncols = len(next(open(breakpointsbed)).split()) - logger.debug("File %s contains %d columns.", breakpointsbed, ncols) - a = BedTool(breakpointsbed) - b = BedTool(gapsbed) - o = a.intersect(b, wao=True) - - pf = "{0}.{1}".format( - op.basename(breakpointsbed).split(".")[0], op.basename(gapsbed).split(".")[0] - ) - nogapsbed = pf + ".nogaps.bed" - largestgapsbed = pf + ".largestgaps.bed" - nogapsfw = open(nogapsbed, "w") - largestgapsfw = open(largestgapsbed, "w") - for b, gaps in groupby(o, key=lambda x: x[:ncols]): - gaps = list(gaps) - gap = gaps[0] - if len(gaps) == 1 and gap[-1] == "0": - assert gap[-3] == "." - print("\t".join(b), file=nogapsfw) - continue - - gaps = [(int(x[-1]), x) for x in gaps] - maxgap = max(gaps)[1] - # Write the gap interval that's intersected (often from column 4 and on) - print("\t".join(maxgap[ncols:]), file=largestgapsfw) - - nogapsfw.close() - largestgapsfw.close() - beds = [largestgapsbed] - toclean = [nogapsbed, largestgapsbed] - - if opts.closest: - closestgapsbed = pf + ".closestgaps.bed" - cmd = "closestBed -a {0} -b {1} -d".format(nogapsbed, gapsbed) - sh(cmd, outfile=closestgapsbed) - beds += [closestgapsbed] - toclean += [closestgapsbed] - else: - pointbed = pf + ".point.bed" - pbed = Bed() - bed = Bed(nogapsbed) - for b in bed: - pos = (b.start + b.end) // 2 - b.start, b.end = pos, pos - pbed.append(b) - pbed.print_to_file(pointbed) - beds += [pointbed] - toclean += [pointbed] - - refinedbed = pf + ".refined.bed" if opts.outfile == "auto" else opts.outfile - FileMerger(beds, outfile=refinedbed).merge() - - # Clean-up - cleanup(toclean) - - return refinedbed - - -def merge_ranges(beds): - - m = [x.accn for x in beds] - - mr = [range_parse(x) for x in m] - mc = set(x.seqid for x in mr) - if len(mc) != 1: - logger.error("Multiple seqid found in pocket. Aborted.") - return - - mc = list(mc)[0] - ms = min(x.start for x in mr) - me = max(x.end for x in mr) - - neg_strands = sum(1 for x in beds if x.strand == "-") - pos_strands = len(beds) - neg_strands - strand = "-" if neg_strands > pos_strands else "+" - - return mc, ms, me, strand - - -def patcher(args): - """ - %prog patcher backbone.bed other.bed - - Given optical map alignment, prepare the patchers. Use --backbone to suggest - which assembly is the major one, and the patchers will be extracted from - another assembly. - """ - from jcvi.formats.bed import uniq - - p = OptionParser(patcher.__doc__) - p.add_argument( - "--backbone", - default="OM", - help="Prefix of the backbone assembly", - ) - p.add_argument("--object", default="object", help="New object name") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - backbonebed, otherbed = args - backbonebed = uniq([backbonebed]) - otherbed = uniq([otherbed]) - - pf = backbonebed.split(".")[0] - - # Make a uniq bed keeping backbone at redundant intervals - cmd = "intersectBed -v -wa" - cmd += " -a {0} -b {1}".format(otherbed, backbonebed) - outfile = otherbed.rsplit(".", 1)[0] + ".not." + backbonebed - sh(cmd, outfile=outfile) - - uniqbed = Bed() - uniqbedfile = pf + ".merged.bed" - uniqbed.extend(Bed(backbonebed)) - uniqbed.extend(Bed(outfile)) - uniqbed.print_to_file(uniqbedfile, sorted=True) - - # Condense adjacent intervals, allow some chaining - bed = uniqbed - key = lambda x: range_parse(x.accn).seqid - - bed_fn = pf + ".patchers.bed" - bed_fw = open(bed_fn, "w") - - for k, sb in groupby(bed, key=key): - sb = list(sb) - chr, start, end, strand = merge_ranges(sb) - - print( - "\t".join(str(x) for x in (chr, start, end, opts.object, 1000, strand)), - file=bed_fw, - ) - - bed_fw.close() - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/postprocess.py b/jcvi/assembly/postprocess.py deleted file mode 100644 index d6b899d2..00000000 --- a/jcvi/assembly/postprocess.py +++ /dev/null @@ -1,537 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Finishing pipeline, starting with a phase1/2 BAC. The pipeline ideally should -include the following components - -+ BLAST against the Illumina contigs to fish out additional seqs -+ Use minimus2 to combine the contigs through overlaps -+ Map the mates to the contigs and perform scaffolding -""" -import os -import os.path as op -import sys - -from collections import defaultdict -from itertools import groupby - -from ..apps.align import run_megablast -from ..apps.base import ( - ActionDispatcher, - OptionParser, - cleanup, - logger, - mkdir, - need_update, - sh, -) -from ..formats.base import must_open -from ..formats.contig import ContigFile -from ..formats.fasta import ( - Fasta, - Seq, - SeqIO, - SeqRecord, - format, - gaps, - parse_fasta, - tidy, -) -from ..formats.sizes import Sizes -from ..utils.cbook import depends - -from .base import n50 - - -def main(): - - actions = ( - ("screen", "screen sequences against library"), - ("circular", "make circular genome"), - ("dedup", "remove duplicate contigs within assembly"), - ("dust", "remove low-complexity contigs within assembly"), - ("dust2bed", "extract low-complexity regions as bed file"), - ("build", "build assembly files after a set of clean-ups"), - ("overlap", "build larger contig set by fishing additional seqs"), - ("overlapbatch", "call overlap on a set of sequences"), - ("scaffold", "build scaffolds based on the ordering in the AGP file"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def dust2bed(args): - """ - %prog dust2bed fastafile - - Use dustmasker to find low-complexity regions (LCRs) in the genome. - """ - from jcvi.formats.base import read_block - - p = OptionParser(dust2bed.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - interval = fastafile + ".iv" - if need_update(fastafile, interval): - cmd = "dustmasker -in {0}".format(fastafile) - sh(cmd, outfile=interval) - - fp = open(interval) - bedfile = fastafile.rsplit(".", 1)[0] + ".dust.bed" - fw = must_open(bedfile, "w") - nlines = 0 - nbases = 0 - for header, block in read_block(fp, ">"): - header = header.strip(">") - for b in block: - start, end = b.split(" - ") - start, end = int(start), int(end) - print("\t".join(str(x) for x in (header, start, end)), file=fw) - nlines += 1 - nbases += end - start - logger.debug( - "A total of {0} DUST intervals ({1} bp) exported to `{2}`".format( - nlines, nbases, bedfile - ) - ) - - -def fasta2bed(fastafile): - """ - Alternative BED generation from FASTA file. Used for sanity check. - """ - dustfasta = fastafile.rsplit(".", 1)[0] + ".dust.fasta" - for name, seq in parse_fasta(dustfasta): - for islower, ss in groupby(enumerate(seq), key=lambda x: x[-1].islower()): - if not islower: - continue - ss = list(ss) - ms, mn = min(ss) - xs, xn = max(ss) - print("\t".join(str(x) for x in (name, ms, xs))) - - -def circular(args): - """ - %prog circular fastafile startpos - - Make circular genome, startpos is the place to start the sequence. This can - be determined by mapping to a reference. Self overlaps are then resolved. - Startpos is 1-based. - """ - from jcvi.assembly.goldenpath import overlap - - p = OptionParser(circular.__doc__) - p.add_argument( - "--flip", - default=False, - action="store_true", - help="Reverse complement the sequence", - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, startpos = args - startpos = int(startpos) - key, seq = next(parse_fasta(fastafile)) - aseq = seq[startpos:] - bseq = seq[:startpos] - aseqfile, bseqfile = "a.seq", "b.seq" - - for f, s in zip((aseqfile, bseqfile), (aseq, bseq)): - fw = must_open(f, "w") - print(">{0}\n{1}".format(f, s), file=fw) - fw.close() - - o = overlap([aseqfile, bseqfile]) - seq = aseq[: o.qstop] + bseq[o.sstop :] - seq = Seq(seq) - - if opts.flip: - seq = seq.reverse_complement() - - cleanup(aseqfile, bseqfile) - - fw = must_open(opts.outfile, "w") - rec = SeqRecord(seq, id=key, description="") - SeqIO.write([rec], fw, "fasta") - fw.close() - - -def dust(args): - """ - %prog dust assembly.fasta - - Remove low-complexity contigs within assembly. - """ - p = OptionParser(dust.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - dustfastafile = fastafile.rsplit(".", 1)[0] + ".dust.fasta" - if need_update(fastafile, dustfastafile): - cmd = "dustmasker -in {0}".format(fastafile) - cmd += " -out {0} -outfmt fasta".format(dustfastafile) - sh(cmd) - - for name, seq in parse_fasta(dustfastafile): - nlow = sum(1 for x in seq if x in "acgtnN") - pctlow = nlow * 100.0 / len(seq) - if pctlow < 98: - continue - # print "{0}\t{1:.1f}".format(name, pctlow) - print(name) - - -def dedup(args): - """ - %prog dedup assembly.assembly.blast assembly.fasta - - Remove duplicate contigs within assembly. - """ - from jcvi.formats.blast import BlastLine - - p = OptionParser(dedup.__doc__) - p.set_align(pctid=0, pctcov=98) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - blastfile, fastafile = args - cov = opts.pctcov / 100.0 - sizes = Sizes(fastafile).mapping - fp = open(blastfile) - removed = set() - for row in fp: - b = BlastLine(row) - query, subject = b.query, b.subject - if query == subject: - continue - qsize, ssize = sizes[query], sizes[subject] - qspan = abs(b.qstop - b.qstart) - if qspan < qsize * cov: - continue - if (qsize, query) < (ssize, subject): - removed.add(query) - - print("\n".join(sorted(removed))) - - -def build(args): - """ - %prog build current.fasta Bacteria_Virus.fasta prefix - - Build assembly files after a set of clean-ups: - 1. Use cdhit (100%) to remove duplicate scaffolds - 2. Screen against the bacteria and virus database (remove scaffolds 95% id, 50% cov) - 3. Mask matches to UniVec_Core - 4. Sort by decreasing scaffold sizes - 5. Rename the scaffolds sequentially - 6. Build the contigs by splitting scaffolds at gaps - 7. Rename the contigs sequentially - """ - from jcvi.apps.cdhit import deduplicate - from jcvi.apps.vecscreen import mask - from jcvi.formats.fasta import sort - - p = OptionParser(build.__doc__) - p.add_argument( - "--nodedup", - default=False, - action="store_true", - help="Do not deduplicate [default: deduplicate]", - ) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - fastafile, bacteria, pf = args - dd = deduplicate([fastafile, "--pctid=100"]) if not opts.nodedup else fastafile - screenfasta = screen([dd, bacteria]) - tidyfasta = mask([screenfasta]) - sortedfasta = sort([tidyfasta, "--sizes"]) - scaffoldfasta = pf + ".assembly.fasta" - format([sortedfasta, scaffoldfasta, "--prefix=scaffold_", "--sequential"]) - gapsplitfasta = pf + ".gapSplit.fasta" - cmd = "gapSplit -minGap=10 {0} {1}".format(scaffoldfasta, gapsplitfasta) - sh(cmd) - contigsfasta = pf + ".contigs.fasta" - format([gapsplitfasta, contigsfasta, "--prefix=contig_", "--sequential"]) - - -def screen(args): - """ - %prog screen scaffolds.fasta library.fasta - - Screen sequences against FASTA library. Sequences that have 95% id and 50% - cov will be removed by default. - """ - from jcvi.apps.align import blast - from jcvi.formats.blast import covfilter - - p = OptionParser(screen.__doc__) - p.set_align(pctid=95, pctcov=50) - p.add_argument("--best", default=1, type=int, help="Get the best N hit") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - scaffolds, library = args - pctidflag = "--pctid={0}".format(opts.pctid) - blastfile = blast([library, scaffolds, pctidflag, "--best={0}".format(opts.best)]) - - idsfile = blastfile.rsplit(".", 1)[0] + ".ids" - covfilter( - [ - blastfile, - scaffolds, - "--ids=" + idsfile, - pctidflag, - "--pctcov={0}".format(opts.pctcov), - ] - ) - - pf = scaffolds.rsplit(".", 1)[0] - nf = pf + ".screen.fasta" - cmd = "faSomeRecords {0} -exclude {1} {2}".format(scaffolds, idsfile, nf) - sh(cmd) - - logger.debug("Screened FASTA written to `{0}`.".format(nf)) - - return nf - - -def scaffold(args): - """ - %prog scaffold ctgfasta agpfile - - Build scaffolds based on ordering in the AGP file. - """ - from jcvi.formats.agp import bed, order_to_agp, build - from jcvi.formats.bed import Bed - - p = OptionParser(scaffold.__doc__) - p.add_argument( - "--prefix", - default=False, - action="store_true", - help="Keep IDs with same prefix together", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - ctgfasta, agpfile = args - sizes = Sizes(ctgfasta).mapping - - pf = ctgfasta.rsplit(".", 1)[0] - phasefile = pf + ".phases" - fwphase = open(phasefile, "w") - newagpfile = pf + ".new.agp" - fwagp = open(newagpfile, "w") - - scaffoldbuckets = defaultdict(list) - - bedfile = bed([agpfile, "--nogaps", "--outfile=tmp"]) - bb = Bed(bedfile) - for s, partialorder in bb.sub_beds(): - name = partialorder[0].accn - bname = name.rsplit("_", 1)[0] if opts.prefix else s - scaffoldbuckets[bname].append([(b.accn, b.strand) for b in partialorder]) - - # Now the buckets contain a mixture of singletons and partially resolved - # scaffolds. Print the scaffolds first then remaining singletons. - for bname, scaffolds in sorted(scaffoldbuckets.items()): - ctgorder = [] - singletons = set() - for scaf in sorted(scaffolds): - for node, orientation in scaf: - ctgorder.append((node, orientation)) - if len(scaf) == 1: - singletons.add(node) - nscaffolds = len(scaffolds) - nsingletons = len(singletons) - if nsingletons == 1 and nscaffolds == 0: - phase = 3 - elif nsingletons == 0 and nscaffolds == 1: - phase = 2 - else: - phase = 1 - - msg = "{0}: Scaffolds={1} Singletons={2} Phase={3}".format( - bname, nscaffolds, nsingletons, phase - ) - print(msg, file=sys.stderr) - print("\t".join((bname, str(phase))), file=fwphase) - - order_to_agp(bname, ctgorder, sizes, fwagp) - - fwagp.close() - cleanup(bedfile) - - fastafile = "final.fasta" - build([newagpfile, ctgfasta, fastafile]) - tidy([fastafile]) - - -@depends -def run_gapsplit(infile=None, outfile=None): - gaps([infile, "--split"]) - return outfile - - -def overlapbatch(args): - """ - %prog overlapbatch ctgfasta poolfasta - - Fish out the sequences in `poolfasta` that overlap with `ctgfasta`. - Mix and combine using `minimus2`. - """ - p = OptionParser(overlap.__doc__) - opts, args = p.parse_args(args) - if len(args) != 2: - sys.exit(not p.print_help()) - - ctgfasta, poolfasta = args - f = Fasta(ctgfasta) - for k, rec in f.iteritems_ordered(): - fastafile = k + ".fasta" - fw = open(fastafile, "w") - SeqIO.write([rec], fw, "fasta") - fw.close() - - overlap([fastafile, poolfasta]) - - -def overlap(args): - """ - %prog overlap ctgfasta poolfasta - - Fish out the sequences in `poolfasta` that overlap with `ctgfasta`. - Mix and combine using `minimus2`. - """ - p = OptionParser(overlap.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - ctgfasta, poolfasta = args - prefix = ctgfasta.split(".")[0] - rid = list(Fasta(ctgfasta).iterkeys()) - assert len(rid) == 1, "Use overlapbatch() to improve multi-FASTA file" - - rid = rid[0] - splitctgfasta = ctgfasta.rsplit(".", 1)[0] + ".split.fasta" - ctgfasta = run_gapsplit(infile=ctgfasta, outfile=splitctgfasta) - - # Run BLAST - blastfile = ctgfasta + ".blast" - run_megablast(infile=ctgfasta, outfile=blastfile, db=poolfasta) - - # Extract contigs and merge using minimus2 - closuredir = prefix + ".closure" - closure = False - if need_update(blastfile, closuredir): - mkdir(closuredir, overwrite=True) - closure = True - - if closure: - idsfile = op.join(closuredir, prefix + ".ids") - cmd = "cut -f2 {0} | sort -u".format(blastfile) - sh(cmd, outfile=idsfile) - - idsfastafile = op.join(closuredir, prefix + ".ids.fasta") - cmd = "faSomeRecords {0} {1} {2}".format(poolfasta, idsfile, idsfastafile) - sh(cmd) - - # This step is a hack to weight the bases from original sequences more - # than the pulled sequences, by literally adding another copy to be used - # in consensus calls. - redundantfastafile = op.join(closuredir, prefix + ".redundant.fasta") - format([ctgfasta, redundantfastafile, "--prefix=RED."]) - - mergedfastafile = op.join(closuredir, prefix + ".merged.fasta") - cmd = "cat {0} {1} {2}".format(ctgfasta, redundantfastafile, idsfastafile) - sh(cmd, outfile=mergedfastafile) - - afgfile = op.join(closuredir, prefix + ".afg") - cmd = "toAmos -s {0} -o {1}".format(mergedfastafile, afgfile) - sh(cmd) - - cwd = os.getcwd() - os.chdir(closuredir) - cmd = "minimus2 {0} -D REFCOUNT=0".format(prefix) - cmd += " -D OVERLAP=100 -D MINID=98" - sh(cmd) - os.chdir(cwd) - - # Analyze output, make sure that: - # + Get the singletons of the original set back - # + Drop any contig that is comprised entirely of pulled set - originalIDs = set(Fasta(ctgfasta).iterkeys()) - minimuscontig = op.join(closuredir, prefix + ".contig") - c = ContigFile(minimuscontig) - excludecontigs = set() - for rec in c.iter_records(): - reads = set(x.id for x in rec.reads) - if reads.isdisjoint(originalIDs): - excludecontigs.add(rec.id) - - logger.debug("Exclude contigs: {0}".format(", ".join(sorted(excludecontigs)))) - - finalfasta = prefix + ".improved.fasta_" - fw = open(finalfasta, "w") - minimusfasta = op.join(closuredir, prefix + ".fasta") - f = Fasta(minimusfasta) - for id, rec in f.iteritems_ordered(): - if id in excludecontigs: - continue - SeqIO.write([rec], fw, "fasta") - - singletonfile = op.join(closuredir, prefix + ".singletons") - singletons = set(x.strip() for x in open(singletonfile)) - leftovers = singletons & originalIDs - - logger.debug("Pull leftover singletons: {0}".format(", ".join(sorted(leftovers)))) - - f = Fasta(ctgfasta) - for id, rec in f.iteritems_ordered(): - if id not in leftovers: - continue - SeqIO.write([rec], fw, "fasta") - - fw.close() - - fastafile = finalfasta - finalfasta = fastafile.rstrip("_") - format( - [fastafile, finalfasta, "--sequential", "--pad0=3", "--prefix={0}_".format(rid)] - ) - - logger.debug("Improved FASTA written to `{0}`.".format(finalfasta)) - - n50([ctgfasta]) - n50([finalfasta]) - - errlog = "error.log" - cleanup(fastafile, blastfile, errlog) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/preprocess.py b/jcvi/assembly/preprocess.py deleted file mode 100644 index 54b7659f..00000000 --- a/jcvi/assembly/preprocess.py +++ /dev/null @@ -1,735 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Wrapper to trim and correct sequence data. -""" -import os -import os.path as op -import sys - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - cleanup, - datadir, - download, - logger, - mkdir, - need_update, - sh, -) -from ..formats.base import BaseFile, must_open, write_file -from ..formats.fastq import guessoffset -from ..utils.cbook import depends, human_size - - -class FastQCdata(BaseFile, dict): - def __init__(self, filename, human=False): - super().__init__(filename) - if not op.exists(filename): - logger.debug("File `%s` not found.", filename) - # Sample_RF37-1/RF37-1_GATCAG_L008_R2_fastqc => - # RF37-1_GATCAG_L008_R2 - self["Filename"] = op.basename(op.split(filename)[0]).rsplit("_", 1)[0] - self["Total Sequences"] = self["Sequence length"] = self["Total Bases"] = ( - "na" - ) - return - - fp = open(filename) - for row in fp: - atoms = row.rstrip().split("\t") - if atoms[0] in ("#", ">"): - continue - if len(atoms) != 2: - continue - - a, b = atoms - self[a] = b - - ts = self["Total Sequences"] - sl = self["Sequence length"] - if "-" in sl: - a, b = sl.split("-") - sl = (int(a) + int(b)) / 2 - if a == "30": - sl = int(b) - - ts, sl = int(ts), int(sl) - tb = ts * sl - - self["Total Sequences"] = human_size(ts).rstrip("b") if human else ts - self["Total Bases"] = human_size(tb).rstrip("b") if human else tb - - -def main(): - - actions = ( - ("contamination", "check reads contamination against Ecoli"), - ("correct", "correct reads using ALLPATHS-LG"), - ("count", "count reads based on FASTQC results"), - ("diginorm", "run K-mer based normalization"), - ("expand", "expand sequences using short reads"), - ("hetsmooth", "reduce K-mer diversity using het-smooth"), - ("trim", "trim reads using TRIMMOMATIC"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def diginorm(args): - """ - %prog diginorm fastqfile - - Run K-mer based normalization. Based on tutorial: - - - Assume input is either an interleaved pairs file, or two separate files. - - To set up khmer: - $ git clone git://github.com/ged-lab/screed.git - $ git clone git://github.com/ged-lab/khmer.git - $ cd screed - $ python setup.py install - $ cd ../khmer - $ make test - $ export PYTHONPATH=~/export/khmer - """ - from jcvi.formats.fastq import shuffle, pairinplace, split - from jcvi.apps.base import getfilesize - - p = OptionParser(diginorm.__doc__) - p.add_argument( - "--single", default=False, action="store_true", help="Single end reads" - ) - p.add_argument("--tablesize", help="Memory size") - p.add_argument( - "--npass", - default="1", - choices=("1", "2"), - help="How many passes of normalization", - ) - p.set_depth(depth=50) - p.set_home("khmer", default="/usr/local/bin/") - opts, args = p.parse_args(args) - - if len(args) not in (1, 2): - sys.exit(not p.print_help()) - - if len(args) == 2: - fastq = shuffle(args + ["--tag"]) - else: - (fastq,) = args - - kh = opts.khmer_home - depth = opts.depth - PE = not opts.single - sys.path.insert(0, op.join(kh, "python")) - - pf = fastq.rsplit(".", 1)[0] - keepfile = fastq + ".keep" - hashfile = pf + ".kh" - mints = 10000000 - ts = opts.tablesize or ((getfilesize(fastq) / 16 / mints + 1) * mints) - - norm_cmd = op.join(kh, "normalize-by-median.py") - filt_cmd = op.join(kh, "filter-abund.py") - if need_update(fastq, (hashfile, keepfile)): - cmd = norm_cmd - cmd += " -C {0} -k 20 -N 4 -x {1}".format(depth, ts) - if PE: - cmd += " -p" - cmd += " -s {0} {1}".format(hashfile, fastq) - sh(cmd) - - abundfiltfile = keepfile + ".abundfilt" - if need_update((hashfile, keepfile), abundfiltfile): - cmd = filt_cmd - cmd += " {0} {1}".format(hashfile, keepfile) - sh(cmd) - - if opts.npass == "1": - seckeepfile = abundfiltfile - else: - seckeepfile = abundfiltfile + ".keep" - if need_update(abundfiltfile, seckeepfile): - cmd = norm_cmd - cmd += " -C {0} -k 20 -N 4 -x {1}".format(depth - 10, ts / 2) - cmd += " {0}".format(abundfiltfile) - sh(cmd) - - if PE: - pairsfile = pairinplace( - [seckeepfile, "--base={0}".format(pf + "_norm"), "--rclip=2"] - ) - split([pairsfile]) - - -def expand(args): - """ - %prog expand bes.fasta reads.fastq - - Expand sequences using short reads. Useful, for example for getting BAC-end - sequences. The template to use, in `bes.fasta` may just contain the junction - sequences, then align the reads to get the 'flanks' for such sequences. - """ - import math - - from jcvi.formats.fasta import Fasta, SeqIO - from jcvi.formats.fastq import readlen, first, fasta - from jcvi.formats.blast import Blast - from jcvi.apps.base import cleanup - from jcvi.apps.bowtie import align, get_samfile - from jcvi.apps.align import blast - - p = OptionParser(expand.__doc__) - p.set_depth(depth=200) - p.set_firstN() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bes, reads = args - size = Fasta(bes).totalsize - rl = readlen([reads]) - expected_size = size + 2 * rl - nreads = expected_size * opts.depth / rl - nreads = int(math.ceil(nreads / 1000.0)) * 1000 - - # Attract reads - samfile, logfile = align( - [bes, reads, "--reorder", "--mapped", "--firstN={0}".format(opts.firstN)] - ) - - samfile, mapped, _ = get_samfile(reads, bes, bowtie=True, mapped=True) - logger.debug("Extract first %d reads from `%s`.", nreads, mapped) - - pf = mapped.split(".")[0] - pf = pf.split("-")[0] - bespf = bes.split(".")[0] - reads = pf + ".expand.fastq" - first([str(nreads), mapped, "-o", reads]) - - # Perform mini-assembly - fastafile = reads.rsplit(".", 1)[0] + ".fasta" - qualfile = "" - if need_update(reads, fastafile): - fastafile, qualfile = fasta([reads]) - - contigs = op.join(pf, "454LargeContigs.fna") - if need_update(fastafile, contigs): - cmd = "runAssembly -o {0} -cpu 8 {1}".format(pf, fastafile) - sh(cmd) - assert op.exists(contigs) - - # Annotate contigs - blastfile = blast([bes, contigs]) - mapping = {} - for query, b in Blast(blastfile).iter_best_hit(): - mapping[query] = b - - f = Fasta(contigs, lazy=True) - annotatedfasta = ".".join((pf, bespf, "fasta")) - fw = open(annotatedfasta, "w") - keys = list(Fasta(bes).iterkeys_ordered()) # keep an ordered list - recs = [] - for key, v in f.iteritems_ordered(): - vid = v.id - if vid not in mapping: - continue - b = mapping[vid] - subject = b.subject - rec = v.reverse_complement() if b.orientation == "-" else v - rec.id = rid = "_".join((pf, vid, subject)) - rec.description = "" - recs.append((keys.index(subject), rid, rec)) - - recs = [x[-1] for x in sorted(recs)] - SeqIO.write(recs, fw, "fasta") - fw.close() - - cleanup(samfile, logfile, mapped, reads, fastafile, qualfile, blastfile, pf) - logger.debug("Annotated seqs (n=%d) written to `%s`.", len(recs), annotatedfasta) - - return annotatedfasta - - -def contamination(args): - """ - %prog contamination Ecoli.fasta genome.fasta read.fastq - - Check read contamination on a folder of paired reads. Use bowtie2 to compare - the reads against: - 1. Ecoli.fsata - this will tell us the lower bound of contamination - 2. genome.fasta - this will tell us the upper bound of contamination - """ - from jcvi.apps.bowtie import BowtieLogFile, align - - p = OptionParser(contamination.__doc__) - p.set_firstN() - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - ecoli, genome, fq = args - firstN_opt = "--firstN={0}".format(opts.firstN) - samfile, logfile = align([ecoli, fq, firstN_opt]) - bl = BowtieLogFile(logfile) - lowerbound = bl.rate - samfile, logfile = align([genome, fq, firstN_opt]) - bl = BowtieLogFile(logfile) - upperbound = 100 - bl.rate - - median = (lowerbound + upperbound) / 2 - - clogfile = fq + ".Ecoli" - fw = open(clogfile, "w") - lowerbound = "{0:.1f}".format(lowerbound) - upperbound = "{0:.1f}".format(upperbound) - median = "{0:.1f}".format(median) - - print("\t".join((fq, lowerbound, median, upperbound)), file=fw) - print( - "{0}: Ecoli contamination rate {1}-{2}".format(fq, lowerbound, upperbound), - file=sys.stderr, - ) - fw.close() - - -def count(args): - """ - %prog count *.gz - - Count reads based on FASTQC results. FASTQC needs to be run on all the input - data given before running this command. - """ - from jcvi.utils.table import loadtable, write_csv - - p = OptionParser(count.__doc__) - p.add_argument("--dir", help="Sub-directory where FASTQC was run") - p.add_argument( - "--human", - default=False, - action="store_true", - help="Human friendly numbers", - ) - p.set_table() - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - filenames = args - subdir = opts.dir - header = "Filename|Total Sequences|Sequence length|Total Bases".split("|") - rows = [] - human = opts.human - for f in filenames: - folder = f.replace(".gz", "").rsplit(".", 1)[0] + "_fastqc" - if subdir: - folder = op.join(subdir, folder) - summaryfile = op.join(folder, "fastqc_data.txt") - - fqcdata = FastQCdata(summaryfile, human=human) - row = [fqcdata[x] for x in header] - rows.append(row) - - print(loadtable(header, rows), file=sys.stderr) - write_csv(header, rows, sep=opts.sep, filename=opts.outfile, align=opts.align) - - -def hetsmooth(args): - """ - %prog hetsmooth reads_1.fq reads_2.fq jf-23_0 - - Wrapper against het-smooth. Below is the command used in het-smooth manual. - - $ het-smooth --kmer-len=23 --bottom-threshold=38 --top-threshold=220 - --no-multibase-replacements --jellyfish-hash-file=23-mers.jf - reads_1.fq reads_2.fq - """ - p = OptionParser(hetsmooth.__doc__) - p.add_argument("-K", default=23, type=int, help="K-mer size") - p.add_argument("-L", type=int, help="Bottom threshold, first min") - p.add_argument("-U", type=int, help="Top threshold, second min") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - reads1fq, reads2fq, jfdb = args - K = opts.K - L = opts.L - U = opts.U - - assert L is not None and U is not None, "Please specify -L and -U" - - cmd = "het-smooth --kmer-len={0}".format(K) - cmd += " --bottom-threshold={0} --top-threshold={1}".format(L, U) - cmd += " --no-multibase-replacements --jellyfish-hash-file={0}".format(jfdb) - cmd += " --no-reads-log" - cmd += " " + " ".join((reads1fq, reads2fq)) - - sh(cmd) - - -def trim(args): - """ - %prog trim fastqfiles - - Trim reads using TRIMMOMATIC. If two fastqfiles are given, then it invokes - the paired reads mode. See manual: - - - """ - tv = "0.32" - TrimJar = "trimmomatic-{0}.jar".format(tv) - p = OptionParser(trim.__doc__) - p.add_argument( - "--path", - default=op.join("~/bin", TrimJar), - help="Path to trimmomatic jar file", - ) - p.set_phred() - p.add_argument( - "--nofrags", - default=False, - action="store_true", - help="Discard frags file in PE mode", - ) - p.add_argument( - "--minqv", - default=15, - type=int, - help="Average qv after trimming", - ) - p.add_argument( - "--minlen", - default=36, - type=int, - help="Minimum length after trimming", - ) - p.add_argument( - "--adapteronly", - default=False, - action="store_true", - help="Only trim adapters with no qv trimming", - ) - p.add_argument( - "--nogz", - default=False, - action="store_true", - help="Do not write to gzipped files", - ) - p.add_argument( - "--log", - default=None, - dest="trimlog", - help="Specify a `trimlog` file", - ) - p.set_cpus(cpus=4) - opts, args = p.parse_args(args) - - if len(args) not in (1, 2): - sys.exit(not p.print_help()) - - path = op.expanduser(opts.path) - url = "http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-{0}.zip".format( - tv - ) - - if not op.exists(path): - path = download(url) - TrimUnzipped = "Trimmomatic-" + tv - if not op.exists(TrimUnzipped): - sh("unzip " + path) - cleanup(path) - path = op.join(TrimUnzipped, TrimJar) - - assert op.exists(path), "Couldn't find Trimmomatic jar file at `{0}`".format(path) - - adaptersfile = "adapters.fasta" - Adapters = must_open(op.join(datadir, adaptersfile)).read() - write_file(adaptersfile, Adapters, skipcheck=True) - - assert op.exists( - adaptersfile - ), "Please place the illumina adapter sequence in `{0}`".format(adaptersfile) - - if opts.phred is None: - offset = guessoffset([args[0]]) - else: - offset = int(opts.phred) - - phredflag = " -phred{0}".format(offset) - threadsflag = " -threads {0}".format(opts.cpus) - if opts.trimlog: - trimlog = " -trimlog {0}".format(opts.trimlog) - - cmd = "java -Xmx4g -jar {0}".format(path) - frags = ".frags.fastq" - pairs = ".pairs.fastq" - if not opts.nogz: - frags += ".gz" - pairs += ".gz" - - get_prefix = lambda x: op.basename(x).replace(".gz", "").rsplit(".", 1)[0] - get_dirname = lambda x: "{0}/".format(op.dirname(x)) if op.dirname(x) else "" - if len(args) == 1: - cmd += " SE" - cmd += phredflag - cmd += threadsflag - if opts.trimlog: - cmd += trimlog - (fastqfile,) = args - prefix = get_prefix(fastqfile) - dirname = get_dirname(fastqfile) - frags1 = dirname + prefix + frags - cmd += " {0}".format(" ".join((fastqfile, frags1))) - else: - cmd += " PE" - cmd += phredflag - cmd += threadsflag - if opts.trimlog: - cmd += trimlog - fastqfile1, fastqfile2 = args - prefix1 = get_prefix(fastqfile1) - dirname1 = get_dirname(fastqfile1) - prefix2 = get_prefix(fastqfile2) - dirname2 = get_dirname(fastqfile2) - pairs1 = dirname1 + prefix1 + pairs - pairs2 = dirname2 + prefix2 + pairs - frags1 = dirname1 + prefix1 + frags - frags2 = dirname2 + prefix2 + frags - if opts.nofrags: - frags1 = "/dev/null" - frags2 = "/dev/null" - cmd += " {0}".format( - " ".join((fastqfile1, fastqfile2, pairs1, frags1, pairs2, frags2)) - ) - - cmd += " ILLUMINACLIP:{0}:2:30:10".format(adaptersfile) - - if not opts.adapteronly: - cmd += " LEADING:3 TRAILING:3" - cmd += " SLIDINGWINDOW:4:{0}".format(opts.minqv) - - cmd += " MINLEN:{0}".format(opts.minlen) - - if offset != 33: - cmd += " TOPHRED33" - sh(cmd) - - -@depends -def run_RemoveDodgyReads( - infile=None, - outfile=None, - removeDuplicates=True, - rc=False, - nthreads=32, -): - # orig.fastb => filt.fastb - assert op.exists(infile) - orig = infile.rsplit(".", 1)[0] - filt = outfile.rsplit(".", 1)[0] - - cmd = "RemoveDodgyReads IN_HEAD={0} OUT_HEAD={1}".format(orig, filt) - if not removeDuplicates: - cmd += " REMOVE_DUPLICATES=False" - if rc: - cmd += " RC=True" - cmd += nthreads - sh(cmd) - - -@depends -def run_FastbAndQualb2Fastq(infile=None, outfile=None, rc=False): - corr = op.basename(infile).rsplit(".", 1)[0] - cmd = "FastbQualbToFastq HEAD_IN={0} HEAD_OUT={0}".format(corr) - cmd += " PAIRED=False PHRED_OFFSET=33" - if rc: - cmd += " FLIP=True" - sh(cmd) - - -@depends -def run_pairs(infile=None, outfile=None, suffix=False): - from jcvi.assembly.allpaths import pairs - - args = infile - if suffix: - args.append("--suffix") - pairs(args) - - -def correct(args): - """ - %prog correct *.fastq - - Correct the fastqfile and generated corrected fastqfiles. This calls - assembly.allpaths.prepare() to generate input files for ALLPATHS-LG. The - naming convention for your fastqfiles are important, and are listed below. - - By default, this will correct all PE reads, and remove duplicates of all MP - reads, and results will be placed in `frag_reads.corr.{pairs,frags}.fastq` - and `jump_reads.corr.{pairs,frags}.fastq`. - """ - from jcvi.assembly.allpaths import prepare - from jcvi.assembly.base import FastqNamings - - p = OptionParser(correct.__doc__ + FastqNamings) - p.add_argument("--dir", default="data", help="Working directory") - p.add_argument( - "--fragsdedup", - default=False, - action="store_true", - help="Don't deduplicate the fragment reads", - ) - p.add_argument("--ploidy", default="2", choices=("1", "2"), help="Ploidy") - p.add_argument( - "--haploidify", - default=False, - action="store_true", - help="Set HAPLOIDIFY=True", - ) - p.add_argument( - "--suffix", - default=False, - action="store_true", - help="Add suffix /1, /2 to read names", - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - fastq = args - tag, tagj, taglj = "frag_reads", "jump_reads", "long_jump_reads" - - ploidy = opts.ploidy - haploidify = opts.haploidify - suffix = opts.suffix - assert (not haploidify) or (haploidify and ploidy == "2") - - prepare(["Unknown"] + fastq + ["--norun"]) - - datadir = opts.dir - mkdir(datadir) - fullpath = op.join(os.getcwd(), datadir) - nthreads = " NUM_THREADS={0}".format(opts.cpus) - phred64 = guessoffset([args[0]]) == 64 - - orig = datadir + "/{0}_orig".format(tag) - origfastb = orig + ".fastb" - if need_update(fastq, origfastb): - cmd = "PrepareAllPathsInputs.pl DATA_DIR={0} HOSTS='{1}' PLOIDY={2}".format( - fullpath, opts.cpus, ploidy - ) - if phred64: - cmd += " PHRED_64=True" - sh(cmd) - - if op.exists(origfastb): - correct_frag( - datadir, - tag, - origfastb, - nthreads, - dedup=opts.fragsdedup, - haploidify=haploidify, - suffix=suffix, - ) - - origj = datadir + "/{0}_orig".format(tagj) - origjfastb = origj + ".fastb" - if op.exists(origjfastb): - correct_jump(datadir, tagj, origjfastb, nthreads, suffix=suffix) - - origlj = datadir + "/{0}_orig".format(taglj) - origljfastb = origlj + ".fastb" - if op.exists(origljfastb): - correct_jump(datadir, taglj, origljfastb, nthreads, suffix=suffix) - - -def export_fastq(datadir, corrfastb, rc=False, suffix=False): - pf = op.basename(corrfastb.rsplit(".", 1)[0]) - - cwd = os.getcwd() - os.chdir(datadir) - corrfastq = pf + ".fastq" - run_FastbAndQualb2Fastq(infile=op.basename(corrfastb), outfile=corrfastq, rc=rc) - os.chdir(cwd) - - pairsfile = pf + ".pairs" - fragsfastq = pf + ".corr.fastq" - run_pairs( - infile=[op.join(datadir, pairsfile), op.join(datadir, corrfastq)], - outfile=fragsfastq, - suffix=suffix, - ) - - -def correct_frag( - datadir, tag, origfastb, nthreads, dedup=False, haploidify=False, suffix=False -): - filt = datadir + "/{0}_filt".format(tag) - filtfastb = filt + ".fastb" - run_RemoveDodgyReads( - infile=origfastb, - outfile=filtfastb, - removeDuplicates=dedup, - rc=False, - nthreads=nthreads, - ) - - filtpairs = filt + ".pairs" - edit = datadir + "/{0}_edit".format(tag) - editpairs = edit + ".pairs" - if need_update(filtpairs, editpairs): - cmd = "ln -sf {0} {1}.pairs".format(op.basename(filtpairs), edit) - sh(cmd) - - editfastb = edit + ".fastb" - if need_update(filtfastb, editfastb): - cmd = "FindErrors HEAD_IN={0} HEAD_OUT={1}".format(filt, edit) - cmd += " PLOIDY_FILE=data/ploidy" - cmd += nthreads - sh(cmd) - - corr = datadir + "/{0}_corr".format(tag) - corrfastb = corr + ".fastb" - if need_update(editfastb, corrfastb): - cmd = "CleanCorrectedReads DELETE=True" - cmd += " HEAD_IN={0} HEAD_OUT={1}".format(edit, corr) - cmd += " PLOIDY_FILE={0}/ploidy".format(datadir) - if haploidify: - cmd += " HAPLOIDIFY=True" - cmd += nthreads - sh(cmd) - - export_fastq(datadir, corrfastb, suffix=suffix) - - -def correct_jump(datadir, tagj, origjfastb, nthreads, suffix=False): - # Pipeline for jump reads does not involve correction - filt = datadir + "/{0}_filt".format(tagj) - filtfastb = filt + ".fastb" - run_RemoveDodgyReads( - infile=origjfastb, - outfile=filtfastb, - removeDuplicates=True, - rc=True, - nthreads=nthreads, - ) - - export_fastq(datadir, filtfastb, rc=True, suffix=suffix) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/sim.py b/jcvi/assembly/sim.py deleted file mode 100644 index 124dd5e7..00000000 --- a/jcvi/assembly/sim.py +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Simulate Illumina sequencing reads. -""" -import math -import os -import os.path as op -import random -import sys - -from ..apps.base import ActionDispatcher, OptionParser, cleanup, logger, sh -from ..formats.fasta import Fasta - - -def main(): - - actions = ( - ("wgsim", "sample paired end reads using dwgsim"), - ("eagle", "simulate Illumina reads using EAGLE"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def add_sim_options(p): - """ - Add options shared by eagle or wgsim. - """ - p.add_argument( - "--distance", - default=500, - type=int, - help="Outer distance between the two ends", - ) - p.add_argument("--readlen", default=150, type=int, help="Length of the read") - p.set_depth(depth=10) - p.set_outfile(outfile=None) - - -def eagle(args): - """ - %prog eagle fastafile - - """ - p = OptionParser(eagle.__doc__) - p.add_argument( - "--share", default="/usr/local/share/EAGLE/", help="Default EAGLE share path" - ) - add_sim_options(p) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - share = opts.share - depth = opts.depth - readlen = opts.readlen - distance = opts.distance - pf = op.basename(fastafile).split(".")[0] - - # Since EAGLE does not natively support read length other than 100bp and - # 250bp - for an arbitrary read length we need to generate a bunch of - # support files - - # First file is the Runinfo - runinfo_readlen = "RunInfo_PairedReads2x{}Cycles1x1Tiles.xml".format(readlen) - if not op.exists(runinfo_readlen): - runinfo = op.join(share, "RunInfo/RunInfo_PairedReads2x251Cycles1x1Tiles.xml") - runinfo_xml = open(runinfo).read() - runinfo_xml = ( - runinfo_xml.replace("251", str(readlen)) - .replace("252", str(readlen + 1)) - .replace("502", str(2 * readlen)) - ) - fw = open(runinfo_readlen, "w") - print(runinfo_xml.strip(), file=fw) - fw.close() - - # Generate quality profiles - quality_file1 = "QualityTable.read1.length{}.qval".format(readlen) - quality_file2 = "QualityTable.read2.length{}.qval".format(readlen) - if not (op.exists(quality_file1) and op.exists(quality_file2)): - for i, qq in enumerate([quality_file1, quality_file2]): - cmd = "/usr/local/libexec/EAGLE/scaleQualityTable.pl" - cmd += " --input {}".format( - op.join( - share, - "QualityTables/DefaultQualityTable.read{}.length101.qval".format( - i + 1 - ), - ) - ) - cmd += " --cycles {}".format(readlen) - cmd += " --output {}".format(qq) - sh(cmd, silent=True) - - # Since distance is different from the default distribution which is - # centered around 319, we shift our peak to the new peak - template_lengths = op.join( - share, "TemplateLengthTables/DefaultTemplateLengthTable.tsv" - ) - template_distance = "TemplateLengthTable{}.tsv".format(distance) - shift = distance - 319 - if not op.exists(template_distance): - fp = open(template_lengths) - fw = open(template_distance, "w") - for row in fp: - size, counts = row.split() - size = int(size) - counts = int(counts) - size += shift - if size < readlen: - continue - print("\t".join(str(x) for x in (size, counts)), file=fw) - fw.close() - - # All done, let's simulate! - cmd = "configureEAGLE.pl" - cmd += " --reference-genome {}".format(fastafile) - cmd += " --coverage-depth {}".format(depth) - cmd += " --gc-coverage-fit-table {}".format( - op.join(share, "GcCoverageFitTables/Homo_sapiens.example1.tsv") - ) - cmd += " --run-info {}".format(runinfo_readlen) - cmd += " --quality-table {}".format(quality_file1) - cmd += " --quality-table {}".format(quality_file2) - cmd += " --template-length-table {}".format(template_distance) - cmd += " --random-seed {}".format(random.randint(1, 65535)) - sh(cmd, silent=True) - - # Retrieve results - outpf = opts.outfile or "{0}.{1}bp.{2}x".format(pf, distance, depth) - outpf += ".bwa" - cwd = os.getcwd() - eagle_dir = "EAGLE" - os.chdir(eagle_dir) - sh("make bam", silent=True) - - # Convert BAM to FASTQ - from jcvi.formats.sam import fastq - - a, b = fastq(["eagle.bam", outpf]) - sh("mv {} {} ../".format(a, b)) - os.chdir(cwd) - - # Clean-up - cleanup(eagle_dir) - - -def wgsim(args): - """ - %prog wgsim fastafile - - Run dwgsim on fastafile. - """ - p = OptionParser(wgsim.__doc__) - p.add_argument( - "--erate", - default=0.01, - type=float, - help="Base error rate of the read", - ) - p.add_argument( - "--noerrors", - default=False, - action="store_true", - help="Simulate reads with no errors", - ) - p.add_argument( - "--genomesize", - type=int, - help="Genome size in Mb [default: estimate from data]", - ) - add_sim_options(p) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - pf = op.basename(fastafile).split(".")[0] - - genomesize = opts.genomesize - size = genomesize * 1000000 if genomesize else Fasta(fastafile).totalsize - depth = opts.depth - readlen = opts.readlen - readnum = int(math.ceil(size * depth / (2 * readlen))) - - distance = opts.distance - stdev = distance / 10 - - outpf = opts.outfile or "{0}.{1}bp.{2}x".format(pf, distance, depth) - - logger.debug("Total genome size: {0} bp".format(size)) - logger.debug("Target depth: {0}x".format(depth)) - logger.debug("Number of read pairs (2x{0}): {1}".format(readlen, readnum)) - - if opts.noerrors: - opts.erate = 0 - - cmd = "dwgsim -e {0} -E {0}".format(opts.erate) - if opts.noerrors: - cmd += " -r 0 -R 0 -X 0 -y 0" - - cmd += " -d {0} -s {1}".format(distance, stdev) - cmd += " -N {0} -1 {1} -2 {1}".format(readnum, readlen) - cmd += " {0} {1}".format(fastafile, outpf) - sh(cmd) - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/soap.py b/jcvi/assembly/soap.py deleted file mode 100644 index 273370f8..00000000 --- a/jcvi/assembly/soap.py +++ /dev/null @@ -1,331 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Script to write and assist SOAPdenovo assembly. -""" -import os.path as op -import sys - -from jcvi.formats.fastq import guessoffset, readlen, is_fastq -from jcvi.assembly.base import FastqNamings, Library, get_libs -from jcvi.apps.base import OptionParser, ActionDispatcher, need_update, sh - - -class FillLine(object): - def __init__(self, row): - args = row.split() - self.start = int(args[0]) - self.end = int(args[1]) - self.leftextend = int(args[2]) - self.rightextend = int(args[3]) - self.closed = int(args[4]) == 1 - self.extendlength = int(args[5]) - self.before = int(args[6]) - self.after = int(args[7]) - # Convert from unsigned to signed - # - if self.after > 0 and (self.after & 0x80000000): - self.after += -0x100000000 - - @property - def delta(self): - return self.after - self.before - - -def main(): - - actions = ( - ("clean", "clean and dedup paired FASTQ files"), - ("correct", "correct reads using ErrorCorrection"), - ("prepare", "prepare SOAP config files and run script"), - ("fillstats", "build stats on .fill file from GapCloser"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -SOAPHEADER = """ -P={0} -K={1} -S=soap.config -G=soap.gc.config -C={2} -A=asm$K -""" - -GCRUN = ( - "GapCloser_v1.12 -a ${A}.scafSeq -b $G -l 155 -o ${A}.closed.scafSeq -p 31 -t $P" -) -GCRUNG = "GapCloser_v1.12 -a {0} -b $G -l 155 -o {1} -p 31 -t $P" - -SOAPRUN = ( - """ -$C pregraph -s $S -d 1 -K $K -o $A -R -p $P -$C contig -s $S -g $A -M 1 -R -p $P -$C map -s $S -g $A -p $P -$C scaff -g $A -F -p $P -""" - + GCRUN -) - -SCFRUN = ( - """ -prepare -K $K -c %s -g $A -$C map -s $S -g $A -p $P -$C scaff -z -g $A -F -p $P -""" - + GCRUN -) - - -def get_size(filename): - - library_name = lambda x: "-".join(op.basename(x).split(".")[0].split("-")[:2]) - - lib = Library(library_name(filename)) - return lib.size - - -def correct(args): - """ - %prog correct *.fastq - - Correct reads using ErrorCorrection. Only PE will be used to build the K-mer - table. - """ - p = OptionParser(correct.__doc__) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - lstfile = "reads2cor.lst" - fw = open(lstfile, "w") - print("\n".join(x for x in args if x[:2] == "PE"), file=fw) - fw.close() - - p1 = args[0] - offset = guessoffset([p1]) - cpus = opts.cpus - - freq = "output.freq.cz" - freqlen = freq + ".len" - if need_update(args, (freq, freqlen)): - cmd = "KmerFreq_AR_v2.0 -k 17 -c -1 -q {0}".format(offset) - cmd += " -m 1 -t {0}".format(cpus) - cmd += " -p output {0}".format(lstfile) - sh(cmd) - - fw = open(lstfile, "w") - print("\n".join(args), file=fw) - fw.close() - - cmd = "Corrector_AR_v2.0 -k 17 -l 3 -m 5 -c 5 -a 0 -e 1 -w 0 -r 45" - cmd += " -Q {0} -q 30 -x 8 -t {1} -o 1 ".format(offset, cpus) - cmd += " {0} {1} {2}".format(freq, freqlen, lstfile) - sh(cmd) - - -def clean(args): - """ - %prog clean 1.fastq 2.fastq [insertsize] - - Clean and dedup paired FASTQ files. - """ - p = OptionParser(clean.__doc__) - p.add_argument("-a", default=0, type=int, help="Trim length at 5' end") - p.add_argument("-b", default=50, type=int, help="Trim length at 3' end") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) == 2: - p1, p2 = args - size = get_size(p1) - elif len(args) == 3: - p1, p2, size = args - size = int(size) - else: - sys.exit(not p.print_help()) - - pf = p1.split(".")[0] - cpus = opts.cpus - - offset = guessoffset([p1]) - a, b = opts.a, opts.b - - p1_clean = p1 + ".clean" - p1_cleangz = p1_clean + ".gz" - p2_clean = p2 + ".clean" - p2_cleangz = p2_clean + ".gz" - if need_update([p1, p2], [p1_cleangz, p2_cleangz]): - cmd = "SOAPfilter_v2.0 -t {0} -m 2000000 -p -y -z -g".format(cpus) - cmd += " -q {0} -w 10 -B 50 -f 0".format(offset) - cmd += " -l {0} -a {1} -b {2} -c {1} -d {2}".format(size, a, b, a, b) - cmd += " {0} {1} {2}.clean.stat {3} {4}".format(p1, p2, pf, p1_clean, p2_clean) - sh(cmd) - - -def fillstats(args): - """ - %prog fillstats genome.fill - - Build stats on .fill file from GapCloser. - """ - from jcvi.utils.cbook import SummaryStats, percentage, thousands - - p = OptionParser(fillstats.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fillfile,) = args - fp = open(fillfile) - scaffolds = 0 - gaps = [] - for row in fp: - if row[0] == ">": - scaffolds += 1 - continue - fl = FillLine(row) - gaps.append(fl) - - print("{0} scaffolds in total".format(scaffolds), file=sys.stderr) - - closed = [x for x in gaps if x.closed] - closedbp = sum(x.before for x in closed) - notClosed = [x for x in gaps if not x.closed] - notClosedbp = sum(x.before for x in notClosed) - - totalgaps = len(closed) + len(notClosed) - - print( - "Closed gaps: {0} size: {1} bp".format( - percentage(len(closed), totalgaps), thousands(closedbp) - ), - file=sys.stderr, - ) - ss = SummaryStats([x.after for x in closed]) - print(ss, file=sys.stderr) - - ss = SummaryStats([x.delta for x in closed]) - print("Delta:", ss, file=sys.stderr) - - print( - "Remaining gaps: {0} size: {1} bp".format( - percentage(len(notClosed), totalgaps), thousands(notClosedbp) - ), - file=sys.stderr, - ) - ss = SummaryStats([x.after for x in notClosed]) - print(ss, file=sys.stderr) - - -def prepare(args): - """ - %prog prepare *.fastq - - Scan input fastq files (see below) and write SOAP config files based - on inputfiles. Use "--scaffold contigs.fasta" to perform scaffolding. - """ - from jcvi.formats.base import write_file - - p = OptionParser(prepare.__doc__ + FastqNamings) - p.add_argument("-K", default=45, type=int, help="K-mer size") - p.add_argument( - "--assemble_1st_rank_only", - default=False, - action="store_true", - help="Assemble the first rank only, other libs asm_flags=2", - ) - p.add_argument("--scaffold", help="Only perform scaffolding") - p.add_argument("--gapclose", help="Only perform gap closure") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - fnames = args - K = opts.K - for x in fnames: - assert op.exists(x), "File `{0}` not found.".format(x) - - a1st = opts.assemble_1st_rank_only - - cfgfile = "soap.config" - gc_cfgfile = "soap.gc.config" - fw = open(cfgfile, "w") - fw_gc = open(gc_cfgfile, "w") - - libs = get_libs(fnames) - rank = 0 - max_rd_len = max(readlen([f]) for f in fnames) - - block = "max_rd_len={0}\n".format(max_rd_len) - for stream in (sys.stderr, fw, fw_gc): - print(block, file=stream) - - # Collect singletons first - singletons = [] - for lib, fs in libs: - if lib.size == 0: - singletons += fs - continue - - for lib, fs in libs: - size = lib.size - if size == 0: - continue - - rank += 1 - block = "[LIB]\n" - block += "avg_ins={0}\n".format(size) - block += "reverse_seq={0}\n".format(lib.reverse_seq) - asm_flags = 2 if (rank > 1 and a1st) else lib.asm_flags - block += "asm_flags={0}\n".format(asm_flags) - block += "rank={0}\n".format(rank) - if lib.reverse_seq: - pair_num_cutoff = 3 - block += "pair_num_cutoff={0}\n".format(pair_num_cutoff) - block += "map_len=35\n" - - for f in fs: - if ".1." in f: - tag = "q1" - elif ".2." in f: - tag = "q2" - block += "{0}={1}\n".format(tag, f) - - if rank == 1: - for s in singletons: - tag = "q" if is_fastq(s) else "f" - block += tag + "={0}\n".format(s) - - print(block, file=sys.stderr) - print(block, file=fw) - - if asm_flags > 2: - print(block, file=fw_gc) - - runfile = "run.sh" - scaffold = opts.scaffold - bb = 63 if K <= 63 else 127 - binary = "SOAPdenovo-{0}mer".format(bb) - header = SOAPHEADER.format(opts.cpus, K, binary) - if opts.gapclose: - gapclose = opts.gapclose - outfile = gapclose.rsplit(".", 1)[0] + ".closed.fasta" - template = header + GCRUNG.format(gapclose, outfile) - else: - template = header + (SCFRUN % scaffold if scaffold else SOAPRUN) - - write_file(runfile, template) - fw.close() - fw_gc.close() - - -if __name__ == "__main__": - main() diff --git a/jcvi/assembly/syntenypath.py b/jcvi/assembly/syntenypath.py deleted file mode 100644 index 82fa9a84..00000000 --- a/jcvi/assembly/syntenypath.py +++ /dev/null @@ -1,553 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Syntenic path assembly. -""" -import sys - -from collections import defaultdict -from itertools import groupby, combinations -from more_itertools import pairwise - -from ..algorithms.graph import BiGraph -from ..apps.base import ActionDispatcher, OptionParser, logger -from ..formats.base import LineFile, must_open -from ..formats.blast import Blast, BlastSlow -from ..formats.sizes import Sizes -from ..utils.range import range_intersect - - -class OVLLine: - def __init__(self, row): - # tig00000004 tig00042923 I -64039 -18713 16592 99.84 - # See also: assembly.goldenpath.Overlap for another implementation - args = row.split() - self.a = args[0] - self.b = args[1] - self.bstrand = "+" if args[2] == "N" else "-" - self.ahang = int(args[3]) - self.bhang = int(args[4]) - self.overlap = int(args[5]) - self.pctid = float(args[6]) - self.score = int(self.overlap * self.pctid / 100) - self.best = None - - @property - def tag(self): - if self.ahang >= 0: - t = "a->b" if self.bhang > 0 else "b in a" - elif self.ahang < 0: - t = "b->a" if self.bhang < 0 else "a in b" - return t - - -class OVL(LineFile): - def __init__(self, filename): - super().__init__(filename) - fp = must_open(filename) - contained = set() - alledges = defaultdict(list) - for row in fp: - o = OVLLine(row) - self.append(o) - if o.tag == "a in b": - contained.add(o.a) - elif o.tag == "b in a": - contained.add(o.b) - if o.tag == "a->b": - alledges[o.a + "-3`"].append(o) - elif o.tag == "b->a": - alledges[o.a + "-5`"].append(o) - logger.debug( - "Imported {} links. Contained tigs: {}".format(len(self), len(contained)) - ) - self.contained = contained - - logger.debug("Pruning edges to keep the mutual best") - for k, v in alledges.items(): - bo = max(v, key=lambda x: x.score) - bo.best = True - - self.graph = BiGraph() - for o in self: - if not o.best: - continue - if o.tag == "a->b": - a, b = o.a, o.b - elif o.tag == "b->a": - a, b = o.b, o.a - if a in contained or b in contained: - continue - bstrand = "<" if o.bstrand == "-" else ">" - self.graph.add_edge(a, b, ">", bstrand, length=o.score) - - -def main(): - - actions = ( - ("bed", "convert ANCHORS file to BED format"), - ("fromblast", "Generate path from BLAST file"), - ("fromovl", "build overlap graph from AMOS overlaps"), - ("happy", "Make graph from happy mapping data"), - ("partition", "Make individual graphs partitioned by happy mapping"), - ("merge", "Merge multiple graphs together and visualize"), - ("connect", "connect contigs using long reads"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def fromovl(args): - """ - %prog graph nucmer2ovl.ovl fastafile - - Build overlap graph from ovl file which is converted using NUCMER2OVL. - """ - p = OptionParser(fromovl.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - ovlfile, fastafile = args - ovl = OVL(ovlfile) - g = ovl.graph - - fw = open("contained.ids", "w") - print("\n".join(sorted(ovl.contained)), file=fw) - - graph_to_agp(g, ovlfile, fastafile, exclude=ovl.contained, verbose=False) - - -def bed(args): - """ - %prog bed anchorsfile - - Convert ANCHORS file to BED format. - """ - from collections import defaultdict - from jcvi.compara.synteny import check_beds - from jcvi.formats.bed import Bed - from jcvi.formats.base import get_number - from ..compara.base import AnchorFile - - p = OptionParser(bed.__doc__) - p.add_argument( - "--switch", - default=False, - action="store_true", - help="Switch reference and aligned map elements", - ) - p.add_argument( - "--scale", type=float, help="Scale the aligned map distance by factor" - ) - p.set_beds() - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (anchorsfile,) = args - switch = opts.switch - scale = opts.scale - ac = AnchorFile(anchorsfile) - pairs = defaultdict(list) - for a, b, block_id in ac.iter_pairs(): - pairs[a].append(b) - - qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts) - bd = Bed() - for q in qbed: - qseqid, qstart, qend, qaccn = q.seqid, q.start, q.end, q.accn - if qaccn not in pairs: - continue - for s in pairs[qaccn]: - si, s = sorder[s] - sseqid, sstart, send, saccn = s.seqid, s.start, s.end, s.accn - if switch: - qseqid, sseqid = sseqid, qseqid - qstart, sstart = sstart, qstart - qend, send = send, qend - qaccn, saccn = saccn, qaccn - if scale: - sstart /= scale - try: - newsseqid = get_number(sseqid) - except ValueError: - raise ValueError( - "`{0}` is on `{1}` with no number to extract".format(saccn, sseqid) - ) - bedline = "\t".join( - str(x) - for x in (qseqid, qstart - 1, qend, "{0}:{1}".format(newsseqid, sstart)) - ) - bd.add(bedline) - - bd.print_to_file(filename=opts.outfile, sorted=True) - - -def happy_nodes(row, prefix=None): - row = row.translate(None, "[](){}+-") - scfs = [x.strip() for x in row.split(":")] - if prefix: - scfs = [prefix + x for x in scfs] - return scfs - - -def happy_edges(row, prefix=None): - """ - Convert a row in HAPPY file and yield edges. - """ - trans = str.maketrans("[](){}", " ") - row = row.strip().strip("+") - row = row.translate(trans) - scfs = [x.strip("+") for x in row.split(":")] - for a, b in pairwise(scfs): - oa = "<" if a.strip()[0] == "-" else ">" - ob = "<" if b.strip()[0] == "-" else ">" - - is_uncertain = a[-1] == " " or b[0] == " " - - a = a.strip().strip("-") - b = b.strip().strip("-") - - if prefix: - a = prefix + a - b = prefix + b - - yield (a, b, oa, ob), is_uncertain - - -def partition(args): - """ - %prog partition happy.txt synteny.graph - - Select edges from another graph and merge it with the certain edges built - from the HAPPY mapping data. - """ - allowed_format = ("png", "ps") - p = OptionParser(partition.__doc__) - p.add_argument("--prefix", help="Add prefix to the name") - p.add_argument( - "--namestart", - default=0, - type=int, - help="Use a shorter name, starting index", - ) - p.add_argument( - "--format", - default="png", - choices=allowed_format, - help="Generate image of format", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - happyfile, graphfile = args - bg = BiGraph() - bg.read(graphfile, color="red") - prefix = opts.prefix - fp = open(happyfile) - for i, row in enumerate(fp): - nns = happy_nodes(row, prefix=prefix) - nodes = set(nns) - edges = happy_edges(row, prefix=prefix) - - small_graph = BiGraph() - for (a, b, oa, ob), is_uncertain in edges: - color = "gray" if is_uncertain else "black" - small_graph.add_edge(a, b, oa, ob, color=color) - - for (u, v), e in bg.edges.items(): - # Grab edge if both vertices are on the same line - if u in nodes and v in nodes: - uv = (str(u), str(v)) - if uv in small_graph.edges: - e = small_graph.edges[uv] - e.color = "blue" # supported by both evidences - else: - small_graph.add_edge(e) - - print(small_graph, file=sys.stderr) - - pngfile = "A{0:02d}.{1}".format(i + 1, opts.format) - telomeres = (nns[0], nns[-1]) - small_graph.draw( - pngfile, namestart=opts.namestart, nodehighlight=telomeres, dpi=72 - ) - - legend = [ - "Edge colors:", - "[BLUE] Experimental + Synteny", - "[BLACK] Experimental certain", - "[GRAY] Experimental uncertain", - "[RED] Synteny only", - "Rectangle nodes are telomeres.", - ] - print("\n".join(legend), file=sys.stderr) - - -def merge(args): - """ - %prog merge graphs - - Merge multiple graphs together and visualize. - """ - p = OptionParser(merge.__doc__) - p.add_argument( - "--colorlist", - default="black,red,pink,blue,green", - help="The color palette", - ) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - colorlist = opts.colorlist.split(",") - assert len(colorlist) >= len(args), "Need more colors in --colorlist" - - g = BiGraph() - for a, c in zip(args, colorlist): - g.read(a, color=c) - - g.draw("merged.png") - - -def happy(args): - """ - %prog happy happy.txt - - Make bi-directed graph from HAPPY mapping data. JCVI encodes uncertainties - in the order of the contigs / scaffolds. - - : separates scaffolds - + means telomere (though the telomere repeats may not show because the - telomere-adjacent sequence is missing) - - means that the scaffold is in reverse orientation to that shown in the 2003 - TIGR scaffolds. - - Ambiguities are represented as follows, using Paul Dear.s description: - [ ] means undetermined orientation. error quite possible (70% confidence?) - ( ) means uncertain orientation. small chance of error (90% confidence?) - { } means uncertain order. - - Example: - +-8254707:8254647:-8254690:{[8254694]:[8254713]:[8254531]:[8254797]}:8254802:8254788+ - """ - p = OptionParser(happy.__doc__) - p.add_argument("--prefix", help="Add prefix to the name") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (happyfile,) = args - - certain = "certain.graph" - uncertain = "uncertain.graph" - fw1 = open(certain, "w") - fw2 = open(uncertain, "w") - - fp = open(happyfile) - for row in fp: - for e, is_uncertain in happy_edges(row, prefix=opts.prefix): - fw = fw2 if is_uncertain else fw1 - print(e, file=fw) - - logger.debug("Edges written to `{0}`".format(",".join((certain, uncertain)))) - - -def fromblast(args): - """ - %prog fromblast blastfile subject.fasta - - Generate path from BLAST file. If multiple subjects map to the same query, - an edge is constructed between them (with the link provided by the query). - - The BLAST file MUST be filtered, chained, supermapped. - """ - from jcvi.formats.blast import sort - from jcvi.utils.range import range_distance - - p = OptionParser(fromblast.__doc__) - p.add_argument( - "--clique", - default=False, - action="store_true", - help="Populate clique instead of linear path", - ) - p.add_argument( - "--maxdist", - default=100000, - type=int, - help="Create edge within certain distance", - ) - p.set_verbose(help="Print verbose reports to stdout") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - blastfile, subjectfasta = args - clique = opts.clique - maxdist = opts.maxdist - sort([blastfile, "--query"]) - blast = BlastSlow(blastfile, sorted=True) - g = BiGraph() - for query, blines in groupby(blast, key=lambda x: x.query): - blines = list(blines) - iterator = combinations(blines, 2) if clique else pairwise(blines) - for a, b in iterator: - asub, bsub = a.subject, b.subject - if asub == bsub: - continue - - arange = (a.query, a.qstart, a.qstop, "+") - brange = (b.query, b.qstart, b.qstop, "+") - dist, oo = range_distance(arange, brange, distmode="ee") - if dist > maxdist: - continue - - atag = ">" if a.orientation == "+" else "<" - btag = ">" if b.orientation == "+" else "<" - g.add_edge(asub, bsub, atag, btag) - - graph_to_agp(g, blastfile, subjectfasta, verbose=opts.verbose) - - -def graph_to_agp(g, blastfile, subjectfasta, exclude=[], verbose=False): - - from jcvi.formats.agp import order_to_agp - - logger.debug(str(g)) - g.write("graph.txt") - # g.draw("graph.pdf") - - paths = [] - for path in g.iter_paths(): - m, oo = g.path(path) - if len(oo) == 1: # Singleton path - continue - paths.append(oo) - if verbose: - print(m) - print(oo) - - npaths = len(paths) - ntigs = sum(len(x) for x in paths) - logger.debug( - "Graph decomposed to {0} paths with {1} components.".format(npaths, ntigs) - ) - - agpfile = blastfile + ".agp" - sizes = Sizes(subjectfasta) - fwagp = open(agpfile, "w") - scaffolded = set() - for i, oo in enumerate(paths): - ctgorder = [(str(ctg), ("+" if strand else "-")) for ctg, strand in oo] - scaffolded |= set(ctg for ctg, strand in ctgorder) - object = "pmol_{0:04d}".format(i) - order_to_agp(object, ctgorder, sizes.mapping, fwagp) - - # Get the singletons as well - nsingletons = nscaffolded = nexcluded = 0 - for ctg, size in sizes.iter_sizes(): - if ctg in scaffolded: - nscaffolded += 1 - continue - if ctg in exclude: - nexcluded += 1 - continue - - ctgorder = [(ctg, "+")] - object = ctg - order_to_agp(object, ctgorder, sizes.mapping, fwagp) - nsingletons += 1 - logger.debug( - "scaffolded={} excluded={} singletons={}".format( - nscaffolded, nexcluded, nsingletons - ) - ) - - fwagp.close() - logger.debug("AGP file written to `{0}`.".format(agpfile)) - - -def connect(args): - """ - %prog connect assembly.fasta read_mapping.blast - - Connect contigs using long reads. - """ - p = OptionParser(connect.__doc__) - p.add_argument( - "--clip", - default=2000, - type=int, - help="Only consider end of contigs", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, blastfile = args - clip = opts.clip - - sizes = Sizes(fastafile).mapping - blast = Blast(blastfile) - blasts = [] - for b in blast: - seqid = b.subject - size = sizes[seqid] - start, end = b.sstart, b.sstop - cstart, cend = min(size, clip), max(0, size - clip) - if start > cstart and end < cend: - continue - blasts.append(b) - - key = lambda x: x.query - blasts.sort(key=key) - g = BiGraph() - for query, bb in groupby(blasts, key=key): - bb = sorted(bb, key=lambda x: x.qstart) - nsubjects = len(set(x.subject for x in bb)) - if nsubjects == 1: - continue - print("\n".join(str(x) for x in bb)) - for a, b in pairwise(bb): - astart, astop = a.qstart, a.qstop - bstart, bstop = b.qstart, b.qstop - if a.subject == b.subject: - continue - - arange = astart, astop - brange = bstart, bstop - ov = range_intersect(arange, brange) - alen = astop - astart + 1 - blen = bstop - bstart + 1 - if ov: - ostart, ostop = ov - ov = ostop - ostart + 1 - - print(ov, alen, blen) - if ov and (ov > alen / 2 or ov > blen / 2): - print("Too much overlap ({0})".format(ov)) - continue - - asub = a.subject - bsub = b.subject - atag = ">" if a.orientation == "+" else "<" - btag = ">" if b.orientation == "+" else "<" - g.add_edge(asub, bsub, atag, btag) - - graph_to_agp(g, blastfile, fastafile, verbose=False) - - -if __name__ == "__main__": - main() diff --git a/jcvi/compara/__init__.py b/jcvi/compara/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/compara/__main__.py b/jcvi/compara/__main__.py deleted file mode 100644 index ef7ff4e8..00000000 --- a/jcvi/compara/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Cluster of comparative genomics analysis methods: SynFind and QUOTA-ALIGN algorithms, synteny analysis, QC, etc. -""" - -from ..apps.base import dmain - - -if __name__ == "__main__": - dmain(__file__) diff --git a/jcvi/compara/base.py b/jcvi/compara/base.py deleted file mode 100644 index 972a6ad3..00000000 --- a/jcvi/compara/base.py +++ /dev/null @@ -1,164 +0,0 @@ -from collections import defaultdict -from typing import Dict, Tuple - -from ..apps.base import logger -from ..formats.base import BaseFile, read_block, must_open -from ..utils.range import Range - - -class AnchorFile(BaseFile): - def __init__(self, filename, minsize=0): - super().__init__(filename) - self.blocks = list(self.iter_blocks(minsize=minsize)) - - def iter_blocks(self, minsize=0): - fp = open(self.filename) - for _, lines in read_block(fp, "#"): - lines = [x.split() for x in lines] - if len(lines) >= minsize: - yield lines - - def iter_pairs(self, minsize=0): - block_id = -1 - for rows in self.iter_blocks(minsize=minsize): - block_id += 1 - for row in rows: - a, b = row[:2] - yield a, b, block_id - - def make_ranges(self, order, clip=10): - """Prepare anchors information into a set of ranges for chaining""" - ranges = [] - block_pairs = defaultdict(dict) - blocks = self.blocks - for i, ib in enumerate(blocks): - q, s, t = zip(*ib) - if q[0] not in order: - q, s = s, q - - r = make_range(q, s, t, i, order, block_pairs, clip=clip) - ranges.append(r) - - assert q[0] in order - if s[0] not in order: - continue - - # is_self comparison - q, s = s, q - r = make_range(q, s, t, i, order, block_pairs, clip=clip) - ranges.append(r) - return ranges, block_pairs - - def filter_blocks(self, accepted: Dict[Tuple[str, str], str]): - """ - Filter the blocks based on the accepted pairs. This is used to update - the anchors so that they match the info in the LAST file. - """ - new_blocks = [] - nremoved = 0 - ncorrected = 0 - nblocks_removed = 0 - for block in self.blocks: - new_block = [] - for line in block: - a, b, score = line - pair = (a, b) - if pair not in accepted: - nremoved += 1 - continue - av = accepted[pair] - if score != av and score != av + "L": - score = av - ncorrected += 1 - new_block.append((a, b, score)) - if new_block: - new_blocks.append(new_block) - else: - nblocks_removed += 1 - - logger.debug("Removed %d existing anchors", nremoved) - if nblocks_removed: - logger.debug("Removed %d empty blocks", nblocks_removed) - logger.debug("Corrected scores for %d anchors", ncorrected) - self.blocks = new_blocks - - def print_to_file(self, filename="stdout"): - """ - Print the anchors to a file, optionally filtering them based on the - accepted pairs. - """ - fw = must_open(filename, "w") - for block in self.blocks: - print("###", file=fw) - for line in block: - a, b, score = line - print("\t".join((a, b, score)), file=fw) - fw.close() - - logger.debug("Anchors written to `%s`", filename) - - def blast(self, blastfile=None, outfile=None): - """ - convert anchor file to 12 col blast file - """ - from ..formats.blast import BlastSlow, BlastLineByConversion - - if not outfile: - outfile = self.filename + ".blast" - - if blastfile is not None: - blasts = BlastSlow(blastfile).to_dict() - else: - blasts = None - - fw = must_open(outfile, "w", checkexists=True) - nlines = 0 - for a, b, _ in self.iter_pairs(): - if (a, b) in blasts: - bline = blasts[(a, b)] - elif (b, a) in blasts: - bline = blasts[(b, a)] - else: - line = "\t".join((a, b)) - bline = BlastLineByConversion(line, mode="110000000000") - - print(bline, file=fw) - nlines += 1 - fw.close() - - logger.debug("A total of %d BLAST lines written to `%s`", nlines, outfile) - - return outfile - - @property - def is_empty(self): - blocks = self.blocks - return not blocks or not blocks[0] - - -def get_best_pair(qs, ss, ts): - pairs = {} - for q, s, t in zip(qs, ss, ts): - t = int(t[:-1]) if t[-1] == "L" else int(t) - if q not in pairs or pairs[q][1] < t: - pairs[q] = (s, t) - - # Discard score - spairs = dict((q, s) for q, (s, t) in pairs.items()) - return spairs - - -def make_range(q, s, t, i, order, block_pairs, clip=10): - pairs = get_best_pair(q, s, t) - score = len(pairs) - block_pairs[i].update(pairs) - - q = [order[x][0] for x in q] - q.sort() - qmin = q[0] - qmax = q[-1] - if qmax - qmin >= 2 * clip: - qmin += clip / 2 - qmax -= clip / 2 - - return Range("0", qmin, qmax, score=score, id=i) diff --git a/jcvi/compara/blastfilter.py b/jcvi/compara/blastfilter.py deleted file mode 100755 index 6b53324e..00000000 --- a/jcvi/compara/blastfilter.py +++ /dev/null @@ -1,325 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog blast_file --qbed query.bed --sbed subject.bed - -Accepts bed format and blast file, and run several BLAST filters below:: - -* Local dup filter: -if the input is query.bed and subject.bed, the script files query.localdups -and subject.localdups are created containing the parent|offspring dups, as -inferred by subjects hitting same query or queries hitting same subject. - -* C-score filter: -see supplementary info for sea anemone genome paper, formula:: - - cscore(A,B) = score(A,B) / - max(best score for A, best score for B) - -Finally a blast.filtered file is created. -""" -import sys -import os.path as op - -from collections import defaultdict -from itertools import groupby - -from ..apps.base import OptionParser, logger -from ..compara.synteny import check_beds -from ..formats.blast import Blast -from ..utils.cbook import gene_name -from ..utils.grouper import Grouper - - -def blastfilter_main(blast_file, p, opts): - - qbed, sbed, qorder, sorder, is_self = check_beds(blast_file, p, opts) - - tandem_Nmax = opts.tandem_Nmax - cscore = opts.cscore - exclude = opts.exclude - - fp = open(blast_file) - total_lines = sum(1 for line in fp if line[0] != "#") - logger.debug( - "Load BLAST file `{}` (total {} lines)".format(blast_file, total_lines) - ) - bl = Blast(blast_file) - blasts = sorted(list(bl), key=lambda b: b.score, reverse=True) - - filtered_blasts = [] - seen = set() - ostrip = opts.strip_names - nwarnings = 0 - for b in blasts: - query, subject = b.query, b.subject - if query == subject: - continue - - if ostrip: - query, subject = gene_name(query), gene_name(subject) - if query not in qorder: - if nwarnings < 100: - logger.warning("{} not in {}".format(query, qbed.filename)) - elif nwarnings == 100: - logger.warning("too many warnings.. suppressed") - nwarnings += 1 - continue - if subject not in sorder: - if nwarnings < 100: - logger.warning("{} not in {}".format(subject, sbed.filename)) - elif nwarnings == 100: - logger.warning("too many warnings.. suppressed") - nwarnings += 1 - continue - - qi, q = qorder[query] - si, s = sorder[subject] - - if is_self and qi > si: - # move all hits to same side when doing self-self BLAST - query, subject = subject, query - qi, si = si, qi - q, s = s, q - - key = query, subject - if key in seen: - continue - seen.add(key) - b.query, b.subject = [str(k) for k in key] - - b.qi, b.si = qi, si - b.qseqid, b.sseqid = q.seqid, s.seqid - - filtered_blasts.append(b) - - if exclude: - before_filter = len(filtered_blasts) - logger.debug("running excluded pairs (--exclude `{}`) ..".format(exclude)) - filtered_blasts = list(filter_exclude(filtered_blasts, exclude=exclude)) - logger.debug( - "after filter ({}->{}) ..".format(before_filter, len(filtered_blasts)) - ) - - if cscore: - before_filter = len(filtered_blasts) - logger.debug("running the cscore filter (cscore>=%.2f) .." % cscore) - filtered_blasts = list(filter_cscore(filtered_blasts, cscore=cscore)) - logger.debug( - "after filter ({}->{}) ..".format(before_filter, len(filtered_blasts)) - ) - - if tandem_Nmax: - logger.debug( - "running the local dups filter (tandem_Nmax={}) ..".format(tandem_Nmax) - ) - - qtandems = tandem_grouper(filtered_blasts, flip=True, tandem_Nmax=tandem_Nmax) - standems = tandem_grouper(filtered_blasts, flip=False, tandem_Nmax=tandem_Nmax) - - qdups_fh = ( - open(op.splitext(opts.qbed)[0] + ".localdups", "w") - if opts.tandems_only - else None - ) - - if is_self: - for s in standems: - qtandems.join(*s) - qdups_to_mother = write_localdups(qtandems, qbed, qdups_fh) - sdups_to_mother = qdups_to_mother - else: - qdups_to_mother = write_localdups(qtandems, qbed, qdups_fh) - sdups_fh = ( - open(op.splitext(opts.sbed)[0] + ".localdups", "w") - if opts.tandems_only - else None - ) - sdups_to_mother = write_localdups(standems, sbed, sdups_fh) - - if opts.tandems_only: - # write out new .bed after tandem removal - write_new_bed(qbed, qdups_to_mother) - if not is_self: - write_new_bed(sbed, sdups_to_mother) - - # just want to use this script as a tandem finder. - # sys.exit() - - before_filter = len(filtered_blasts) - filtered_blasts = list( - filter_tandem(filtered_blasts, qdups_to_mother, sdups_to_mother) - ) - logger.debug( - "after filter ({}->{}) ..".format(before_filter, len(filtered_blasts)) - ) - - blastfilteredfile = blast_file + ".filtered" - fw = open(blastfilteredfile, "w") - write_new_blast(filtered_blasts, fh=fw) - fw.close() - - -def write_localdups(tandems, bed, dups_fh=None): - - tandem_groups = [] - for group in tandems: - rows = [bed[i] for i in group] - # within the tandem groups, genes are sorted with decreasing size - rows.sort(key=lambda a: (-abs(a.end - a.start), a.accn)) - tandem_groups.append([x.accn for x in rows]) - - dups_to_mother = {} - n = 1 - for accns in sorted(tandem_groups): - if dups_fh: - print("\t".join(accns), file=dups_fh) - if n: - n -= 1 - logger.debug("write local dups to file {}".format(dups_fh.name)) - - for dup in accns[1:]: - dups_to_mother[dup] = accns[0] - - return dups_to_mother - - -def write_new_bed(bed, children): - # generate local dup removed annotation files - out_name = "%s.nolocaldups%s" % op.splitext(bed.filename) - logger.debug("write tandem-filtered bed file %s" % out_name) - fh = open(out_name, "w") - for i, row in enumerate(bed): - if row["accn"] in children: - continue - print(row, file=fh) - fh.close() - - -def write_new_blast(filtered_blasts, fh=sys.stdout): - for b in filtered_blasts: - print(b, file=fh) - - -def filter_exclude(blast_list, exclude=None): - """Filter gene pairs from an excluded list - - Args: - blast_list (List[BlastLine]): List of BlastLines - exclude (str, optional): Path to the excluded anchors file. Defaults to None. - """ - from .base import AnchorFile - - excluded_pairs = set() - ac = AnchorFile(exclude) - for a, b, block in ac.iter_pairs(): - excluded_pairs.add((a, b)) - excluded_pairs.add((b, a)) - for b in blast_list: - if (b.query, b.subject) in excluded_pairs: - continue - yield b - - -def filter_cscore(blast_list, cscore=0.5): - - best_score = defaultdict(float) - for b in blast_list: - if b.score > best_score[b.query]: - best_score[b.query] = b.score - if b.score > best_score[b.subject]: - best_score[b.subject] = b.score - - for b in blast_list: - cur_cscore = b.score / max(best_score[b.query], best_score[b.subject]) - if cur_cscore > cscore: - yield b - - -def filter_tandem(blast_list, qdups_to_mother, sdups_to_mother): - - mother_blast = [] - for b in blast_list: - if b.query in qdups_to_mother: - b.query = qdups_to_mother[b.query] - if b.subject in sdups_to_mother: - b.subject = sdups_to_mother[b.subject] - mother_blast.append(b) - - mother_blast.sort(key=lambda b: b.score, reverse=True) - seen = {} - for b in mother_blast: - if b.query == b.subject: - continue - key = b.query, b.subject - if key in seen: - continue - seen[key] = None - yield b - - -def tandem_grouper(blast_list, tandem_Nmax=10, flip=True): - if not flip: - simple_blast = [ - (b.query, (b.sseqid, b.si)) for b in blast_list if b.evalue < 1e-10 - ] - else: - simple_blast = [ - (b.subject, (b.qseqid, b.qi)) for b in blast_list if b.evalue < 1e-10 - ] - - simple_blast.sort() - - standems = Grouper() - for name, hits in groupby(simple_blast, key=lambda x: x[0]): - # these are already sorted. - hits = [x[1] for x in hits] - for ia, a in enumerate(hits[:-1]): - b = hits[ia + 1] - # on the same chr and rank difference no larger than tandem_Nmax - if b[1] - a[1] <= tandem_Nmax and b[0] == a[0]: - standems.join(a[1], b[1]) - - return standems - - -def main(args): - - p = OptionParser(__doc__) - p.set_beds() - p.set_stripnames() - p.add_argument( - "--tandems_only", - dest="tandems_only", - action="store_true", - default=False, - help="only calculate tandems, write .localdup file and exit.", - ) - p.add_argument( - "--tandem_Nmax", - type=int, - default=10, - help="merge tandem genes within distance", - ) - p.add_argument( - "--cscore", - type=float, - default=0.7, - help="retain hits that have good bitscore. a value of 0.5 means " - "keep all values that are 50% or greater of the best hit. " - "higher is more stringent", - ) - p.add_argument("--exclude", help="Remove anchors from a previous run") - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - blastfilter_main(blastfile, p, opts) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/jcvi/compara/catalog.py b/jcvi/compara/catalog.py deleted file mode 100644 index fe2d4247..00000000 --- a/jcvi/compara/catalog.py +++ /dev/null @@ -1,982 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -import os.path as op -import sys -import string - -from collections import defaultdict -from itertools import product, combinations - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - glob, - logger, - mkdir, - need_update, - sh, -) -from ..apps.align import last as last_main, diamond_blastp_main, blast_main -from ..compara.blastfilter import main as blastfilter_main -from ..compara.quota import main as quota_main -from ..compara.synteny import scan, mcscan, liftover -from ..formats.base import BaseFile, DictFile, must_open -from ..formats.bed import Bed -from ..formats.blast import ( - BlastLine, - cscore, - filter as blast_filter, - filtered_blastfile_name, -) -from ..formats.fasta import Fasta -from ..utils.cbook import gene_name -from ..utils.grouper import Grouper - -from .base import AnchorFile -from .synteny import check_beds - - -class OMGFile(BaseFile): - def __init__(self, filename): - super().__init__(filename) - fp = open(filename) - inblock = False - components = [] - component = [] - for row in fp: - if inblock: - atoms = row.split() - natoms = len(atoms) - assert natoms in (0, 7) - if natoms: - gene, taxa = atoms[0], atoms[5] - component.append((gene, taxa)) - else: - inblock = False - components.append(tuple(component)) - - if row.strip().startswith("---"): - inblock = True - component = [] - - if inblock: - components.append(tuple(component)) - self.components = components - - def best(self): - bb = set() - for component in self.components: - size = len(component) - if size > 1: - bb.add(component) - return bb - - -def main(): - actions = ( - ("tandem", "identify tandem gene groups within certain distance"), - ("ortholog", "run a combined synteny and RBH pipeline to call orthologs"), - ("group", "cluster the anchors into ortho-groups"), - ("omgprepare", "prepare weights file to run Sankoff OMG algorithm"), - ("omg", "generate a series of Sankoff OMG algorithm inputs"), - ("omgparse", "parse the OMG outputs to get gene lists"), - ("enrich", "enrich OMG output by pulling genes missed by OMG"), - ("layout", "layout the gene lists"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def get_weights(weightsfiles=None): - if weightsfiles is None: - weightsfiles = glob("*.weights") - - weights = defaultdict(list) - for row in must_open(weightsfiles): - a, b, c = row.split() - weights[a].append((a, b, c)) - return weights - - -def get_edges(weightsfiles=None): - if weightsfiles is None: - weightsfiles = glob("*.weights") - - edges = {} - for row in must_open(weightsfiles): - a, b, c = row.split() - c = int(c) - edges[(a, b)] = c - edges[(b, a)] = c - return edges - - -def get_info(): - infofiles = glob("*.info") - info = {} - for row in must_open(infofiles): - a = row.split()[0] - info[a] = row.rstrip() - return info - - -def enrich(args): - """ - %prog enrich omgfile groups ntaxa > enriched.omg - - Enrich OMG output by pulling genes misses by OMG. - """ - p = OptionParser(enrich.__doc__) - p.add_argument( - "--ghost", - default=False, - action="store_true", - help="Add ghost homologs already used", - ) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - omgfile, groupsfile, ntaxa = args - ntaxa = int(ntaxa) - ghost = opts.ghost - - # Get gene pair => weight mapping - weights = get_edges() - info = get_info() - # Get gene => taxon mapping - info = dict((k, v.split()[5]) for k, v in info.items()) - - groups = Grouper() - - fp = open(groupsfile) - for row in fp: - members = row.strip().split(",") - groups.join(*members) - - logger.debug( - "Imported %d families with %d members.", len(groups), groups.num_members - ) - - seen = set() - omggroups = Grouper() - fp = open(omgfile) - for row in fp: - genes, idxs = row.split() - genes = genes.split(",") - seen.update(genes) - omggroups.join(*genes) - - nmembers = omggroups.num_members - logger.debug("Imported %d OMG families with %d members.", len(omggroups), nmembers) - assert nmembers == len(seen) - - alltaxa = set(str(x) for x in range(ntaxa)) - recruited = [] - fp = open(omgfile) - for row in fp: - genes, idxs = row.split() - genes = genes.split(",") - a = genes[0] - - idxs = set(idxs.split(",")) - missing_taxa = alltaxa - idxs - if not missing_taxa: - print(row.rstrip()) - continue - - leftover = groups[a] - if not ghost: - leftover = set(leftover) - seen - - if not leftover: - print(row.rstrip()) - continue - - leftover_sorted_by_taxa = dict( - (k, [x for x in leftover if info[x] == k]) for k in missing_taxa - ) - - # print genes, leftover - # print leftover_sorted_by_taxa - solutions = [] - for solution in product(*leftover_sorted_by_taxa.values()): - score = sum(weights.get((a, b), 0) for a in solution for b in genes) - if score == 0: - continue - score += sum(weights.get((a, b), 0) for a, b in combinations(solution, 2)) - solutions.append((score, solution)) - # print solution, score - - best_solution = max(solutions) if solutions else None - if best_solution is None: - print(row.rstrip()) - continue - - # print "best ==>", best_solution - best_score, best_addition = best_solution - genes.extend(best_addition) - recruited.extend(best_addition) - - genes = sorted([(info[x], x) for x in genes]) - idxs, genes = zip(*genes) - - if ghost: # decorate additions so it's clear that they were added - pgenes = [] - for g in genes: - if g in recruited and g in seen: - pgenes.append("|{0}|".format(g)) - else: - pgenes.append(g) - genes = pgenes - - print("\t".join((",".join(genes), ",".join(idxs)))) - if not ghost: - seen.update(best_addition) - - logger.debug("Recruited %d new genes.", len(recruited)) - - -def pairwise_distance(a, b, threadorder): - d = 0 - for x, y in zip(a, b)[:-1]: # Last column not used - x, y = x.strip("|"), y.strip("|") - if "." in (x, y): - dd = 50 - else: - xi, x = threadorder[x] - yi, y = threadorder[y] - dd = min(abs(xi - yi), 50) - d += dd - return d - - -def insert_into_threaded(atoms, threaded, threadorder): - min_idx, min_d = 0, 1000 - for i, t in enumerate(threaded): - # calculate distance - d = pairwise_distance(atoms, t, threadorder) - if d < min_d: - min_idx = i - min_d = d - - i = min_idx - t = threaded[i] - threaded.insert(i, atoms) - logger.debug("Insert %s before %s (d=%d)", atoms, t, min_d) - - -def sort_layout(thread, listfile, column=0): - """ - Sort the syntelog table according to chromomomal positions. First orient the - contents against threadbed, then for contents not in threadbed, insert to - the nearest neighbor. - """ - outfile = listfile.rsplit(".", 1)[0] + ".sorted.list" - threadorder = thread.order - fw = open(outfile, "w") - lt = DictFile(listfile, keypos=column, valuepos=None) - threaded = [] - imported = set() - for t in thread: - accn = t.accn - if accn not in lt: - continue - - imported.add(accn) - atoms = lt[accn] - threaded.append(atoms) - - assert len(threaded) == len(imported) - - total = sum(1 for x in open(listfile)) - logger.debug("Total: %d, currently threaded: %d", total, len(threaded)) - fp = open(listfile) - for row in fp: - atoms = row.split() - accn = atoms[0] - if accn in imported: - continue - insert_into_threaded(atoms, threaded, threadorder) - - for atoms in threaded: - print("\t".join(atoms), file=fw) - - fw.close() - logger.debug("File `%s` sorted to `%s`.", outfile, thread.filename) - - -def layout(args): - """ - %prog layout omgfile taxa - - Build column formatted gene lists after omgparse(). Use species list - separated by comma in place of taxa, e.g. "BR,BO,AN,CN" - """ - p = OptionParser(layout.__doc__) - p.add_argument("--sort", help="Sort layout file based on bedfile") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - omgfile, taxa = args - listfile = omgfile.rsplit(".", 1)[0] + ".list" - taxa = taxa.split(",") - ntaxa = len(taxa) - fw = open(listfile, "w") - - data = [] - fp = open(omgfile) - for row in fp: - genes, idxs = row.split() - row = ["."] * ntaxa - genes = genes.split(",") - ixs = [int(x) for x in idxs.split(",")] - for gene, idx in zip(genes, ixs): - row[idx] = gene - txs = ",".join(taxa[x] for x in ixs) - print("\t".join(("\t".join(row), txs)), file=fw) - data.append(row) - - coldata = zip(*data) - ngenes = [] - for i, tx in enumerate(taxa): - genes = [x for x in coldata[i] if x != "."] - genes = set(x.strip("|") for x in genes) - ngenes.append((len(genes), tx)) - - details = ", ".join("{0} {1}".format(a, b) for a, b in ngenes) - total = sum(a for a, b in ngenes) - s = "A list of {0} orthologous families that collectively".format(len(data)) - s += " contain a total of {0} genes ({1})".format(total, details) - print(s, file=sys.stderr) - - fw.close() - lastcolumn = ntaxa + 1 - cmd = "sort -k{0},{0} {1} -o {1}".format(lastcolumn, listfile) - sh(cmd) - - logger.debug("List file written to `%s`.", listfile) - sort = opts.sort - if sort: - thread = Bed(sort) - sort_layout(thread, listfile) - - -def omgparse(args): - """ - %prog omgparse work - - Parse the OMG outputs to get gene lists. - """ - p = OptionParser(omgparse.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (work,) = args - omgfiles = glob(op.join(work, "gf*.out")) - for omgfile in omgfiles: - omg = OMGFile(omgfile) - best = omg.best() - for bb in best: - genes, taxa = zip(*bb) - print("\t".join((",".join(genes), ",".join(taxa)))) - - -def group(args): - """ - %prog group anchorfiles - - Group the anchors into ortho-groups. Can input multiple anchor files. - """ - p = OptionParser(group.__doc__) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - anchorfiles = args - groups = Grouper() - for anchorfile in anchorfiles: - ac = AnchorFile(anchorfile) - for a, b, idx in ac.iter_pairs(): - groups.join(a, b) - - logger.debug("Created %d groups with %d members.", len(groups), groups.num_members) - - outfile = opts.outfile - fw = must_open(outfile, "w") - for g in groups: - print(",".join(sorted(g)), file=fw) - fw.close() - - return outfile - - -def omg(args): - """ - %prog omg weightsfile - - Run Sankoff's OMG algorithm to get orthologs. Download OMG code at: - - - This script only writes the partitions, but not launch OMGMec. You may need to: - - $ parallel "java -cp ~/code/OMGMec TestOMGMec {} 4 > {}.out" ::: work/gf????? - - Then followed by omgparse() to get the gene lists. - """ - p = OptionParser(omg.__doc__) - - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - weightsfiles = args - groupfile = group(weightsfiles + ["--outfile=groups"]) - - weights = get_weights(weightsfiles) - info = get_info() - - fp = open(groupfile) - - work = "work" - mkdir(work) - for i, row in enumerate(fp): - gf = op.join(work, "gf{0:05d}".format(i)) - genes = row.rstrip().split(",") - - fw = open(gf, "w") - contents = "" - npairs = 0 - for gene in genes: - gene_pairs = weights[gene] - for a, b, c in gene_pairs: - if b not in genes: - continue - - contents += "weight {0}".format(c) + "\n" - contents += info[a] + "\n" - contents += info[b] + "\n\n" - npairs += 1 - - header = "a group of genes :length ={0}".format(npairs) - print(header, file=fw) - print(contents, file=fw) - - fw.close() - - -def geneinfo(bed, genomeidx, ploidy): - bedfile = bed.filename - p = bedfile.split(".")[0] - idx = genomeidx[p] - pd = ploidy[p] - infofile = p + ".info" - - if not need_update(bedfile, infofile): - return infofile - - fwinfo = open(infofile, "w") - - for s in bed: - chr = "".join(x for x in s.seqid if x in string.digits) - try: - chr = int(chr) - except ValueError: - chr = "0" - - print( - "\t".join(str(x) for x in (s.accn, chr, s.start, s.end, s.strand, idx, pd)), - file=fwinfo, - ) - fwinfo.close() - - logger.debug("Update info file `%s`.", infofile) - - return infofile - - -def omgprepare(args): - """ - %prog omgprepare ploidy anchorsfile blastfile - - Prepare to run Sankoff's OMG algorithm to get orthologs. - """ - p = OptionParser(omgprepare.__doc__) - p.add_argument("--norbh", action="store_true", help="Disable RBH hits") - p.add_argument( - "--pctid", default=0, type=int, help="Percent id cutoff for RBH hits" - ) - p.add_argument("--cscore", default=90, type=int, help="C-score cutoff for RBH hits") - p.set_stripnames() - p.set_beds() - - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - ploidy, anchorfile, blastfile = args - norbh = opts.norbh - pctid = opts.pctid - cs = opts.cscore - qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) - - fp = open(ploidy) - genomeidx = dict((x.split()[0], i) for i, x in enumerate(fp)) - fp.close() - - ploidy = DictFile(ploidy) - - geneinfo(qbed, genomeidx, ploidy) - geneinfo(sbed, genomeidx, ploidy) - - pf = blastfile.rsplit(".", 1)[0] - cscorefile = pf + ".cscore" - cscore([blastfile, "-o", cscorefile, "--cutoff=0", "--pct"]) - ac = AnchorFile(anchorfile) - pairs = set((a, b) for a, b, i in ac.iter_pairs()) - logger.debug("Imported %d pairs from `%s`.", len(pairs), anchorfile) - - weightsfile = pf + ".weights" - fp = open(cscorefile) - fw = open(weightsfile, "w") - npairs = 0 - for row in fp: - a, b, c, pct = row.split() - c, pct = float(c), float(pct) - c = int(c * 100) - if (a, b) not in pairs: - if norbh: - continue - if c < cs: - continue - if pct < pctid: - continue - c /= 10 # This severely penalizes RBH against synteny - - print("\t".join((a, b, str(c))), file=fw) - npairs += 1 - fw.close() - - logger.debug("Write %d pairs to `%s`.", npairs, weightsfile) - - -def make_ortholog(blocksfile, rbhfile, orthofile): - # Generate mapping both ways - adict = DictFile(rbhfile) - bdict = DictFile(rbhfile, keypos=1, valuepos=0) - adict.update(bdict) - - fp = open(blocksfile) - fw = open(orthofile, "w") - nrecruited = 0 - for row in fp: - a, b = row.split() - if b == ".": - if a in adict: - b = adict[a] - nrecruited += 1 - b += "'" - print("\t".join((a, b)), file=fw) - - logger.debug("Recruited %d pairs from RBH.", nrecruited) - fp.close() - fw.close() - - -def ortholog(args): - """ - %prog ortholog species_a species_b - - Run a sensitive pipeline to find orthologs between two species a and b. - The pipeline runs LAST and generate .lifted.anchors. - - `--full` mode would assume 1-to-1 quota synteny blocks as the backbone of - such predictions. Extra orthologs will be recruited from reciprocal best - match (RBH). - """ - p = OptionParser(ortholog.__doc__) - p.add_argument( - "--dbtype", - default="nucl", - choices=("nucl", "prot"), - help="Molecule type of subject database", - ) - - p.add_argument( - "--full", - default=False, - action="store_true", - help="Run in full 1x1 mode, including blocks and RBH", - ) - p.add_argument("--cscore", default=0.7, type=float, help="C-score cutoff") - p.add_argument( - "--dist", default=20, type=int, help="Extent of flanking regions to search" - ) - p.add_argument( - "-n", - "--min_size", - dest="n", - type=int, - default=4, - help="minimum number of anchors in a cluster", - ) - p.add_argument("--quota", help="Quota align parameter") - p.add_argument("--exclude", help="Remove anchors from a previous run") - p.add_argument( - "--self_remove", - default=98, - type=float, - help="Remove self hits that are above this percent identity", - ) - p.add_argument( - "--no_strip_names", - default=False, - action="store_true", - help="Do not strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", - ) - p.add_argument( - "--liftover_dist", - type=int, - help="Distance to extend from liftover. Defaults to half of --dist", - ) - p.set_cpus() - dotplot_group = p.set_dotplot_opts() - dotplot_group.add_argument( - "--notex", default=False, action="store_true", help="Do not use tex" - ) - dotplot_group.add_argument( - "--no_dotplot", default=False, action="store_true", help="Do not make dotplot" - ) - p.add_argument( - "--ignore_zero_anchor", - default=False, - action="store_true", - help="Ignore this pair of ortholog identification instead of throwing an error when performing many pairs of cataloging.", - ) - p.add_argument( - "--align_soft", - default="last", - choices=("last", "blast", "diamond_blastp"), - help="Sequence alignment software. Default for both and . Users could also use for both and , or for .", - ) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - a, b = args - dbtype = opts.dbtype - ignore_zero_anchor = opts.ignore_zero_anchor - suffix = ".cds" if dbtype == "nucl" else ".pep" - abed, afasta = a + ".bed", a + suffix - bbed, bfasta = b + ".bed", b + suffix - ccscore = opts.cscore - quota = opts.quota - exclude = opts.exclude - dist = "--dist={0}".format(opts.dist) - minsize_flag = "--min_size={}".format(opts.n) - cpus_flag = "--cpus={}".format(opts.cpus) - align_soft = opts.align_soft - - aprefix = op.basename(a) - bprefix = op.basename(b) - pprefix = ".".join((aprefix, bprefix)) - qprefix = ".".join((bprefix, aprefix)) - last = pprefix + ".last" - if need_update((afasta, bfasta), last, warn=True): - if align_soft == "blast": - blast_main([bfasta, afasta, cpus_flag], dbtype) - elif dbtype == "prot" and align_soft == "diamond_blastp": - diamond_blastp_main([bfasta, afasta, cpus_flag], dbtype) - else: - last_main([bfasta, afasta, cpus_flag], dbtype) - - self_remove = opts.self_remove - if a == b: - lastself = filtered_blastfile_name(last, self_remove, 0, inverse=True) - if need_update(last, lastself, warn=True): - blast_filter( - [last, "--hitlen=0", f"--pctid={self_remove}", "--inverse", "--noself"] - ) - last = lastself - - filtered_last = last + ".filtered" - if need_update(last, filtered_last, warn=True): - # If we are doing filtering based on another file then we don't run cscore anymore - dargs = [last, "--cscore={}".format(ccscore)] - if exclude: - dargs += ["--exclude={}".format(exclude)] - if opts.no_strip_names: - dargs += ["--no_strip_names"] - blastfilter_main(dargs) - - anchors = pprefix + ".anchors" - lifted_anchors = pprefix + ".lifted.anchors" - pdf = pprefix + ".pdf" - if not opts.full: - if need_update(filtered_last, lifted_anchors, warn=True): - dargs = [ - filtered_last, - anchors, - minsize_flag, - dist, - "--liftover={0}".format(last), - ] - if opts.no_strip_names: - dargs += ["--no_strip_names"] - if opts.liftover_dist: - dargs += ["--liftover_dist={}".format(opts.liftover_dist)] - try: - scan(dargs) - except ValueError as e: - if ignore_zero_anchor: - logger.debug(str(e)) - logger.debug("Ignoring this error and continuing...") - return - else: - raise ValueError(e) from e - if quota: - quota_main([lifted_anchors, "--quota={0}".format(quota), "--screen"]) - if need_update(anchors, pdf, warn=True) and not opts.no_dotplot: - from jcvi.graphics.dotplot import dotplot_main - - dargs = [anchors] - if opts.nostdpf: - dargs += ["--nostdpf"] - if opts.nochpf: - dargs += ["--nochpf"] - if opts.skipempty: - dargs += ["--skipempty"] - if opts.genomenames: - dargs += ["--genomenames", opts.genomenames] - if opts.theme: - dargs += ["--theme", opts.theme] - if opts.notex: - dargs += ["--notex"] - dotplot_main(dargs) - return - - if need_update(filtered_last, anchors, warn=True): - if opts.no_strip_names: - scan([filtered_last, anchors, dist, "--no_strip_names"]) - else: - scan([filtered_last, anchors, dist]) - - ooanchors = pprefix + ".1x1.anchors" - if need_update(anchors, ooanchors, warn=True): - quota_main([anchors, "--quota=1:1", "--screen"]) - - lifted_anchors = pprefix + ".1x1.lifted.anchors" - if need_update((last, ooanchors), lifted_anchors, warn=True): - if opts.no_strip_names: - liftover([last, ooanchors, dist, "--no_strip_names"]) - else: - liftover([last, ooanchors, dist]) - - pblocks = pprefix + ".1x1.blocks" - qblocks = qprefix + ".1x1.blocks" - if need_update(lifted_anchors, [pblocks, qblocks], warn=True): - mcscan([abed, lifted_anchors, "--iter=1", "-o", pblocks]) - mcscan([bbed, lifted_anchors, "--iter=1", "-o", qblocks]) - - rbh = pprefix + ".rbh" - if need_update(last, rbh, warn=True): - cscore([last, "-o", rbh]) - - portho = pprefix + ".ortholog" - qortho = qprefix + ".ortholog" - if need_update([pblocks, qblocks, rbh], [portho, qortho], warn=True): - make_ortholog(pblocks, rbh, portho) - make_ortholog(qblocks, rbh, qortho) - - -def tandem_main( - blast_file, - cds_file, - bed_file, - N=3, - P=50, - is_self=True, - evalue=0.01, - strip_name=".", - ofile=sys.stderr, - genefam=False, -): - if genefam: - N = 1e5 - - # get the sizes for the CDS first - f = Fasta(cds_file) - sizes = dict(f.itersizes()) - - # retrieve the locations - bed = Bed(bed_file) - order = bed.order - - if is_self: - # filter the blast file - g = Grouper() - fp = open(blast_file) - for row in fp: - b = BlastLine(row) - query_len = sizes[b.query] - subject_len = sizes[b.subject] - if b.hitlen < min(query_len, subject_len) * P / 100.0: - continue - - query = gene_name(b.query, sep=strip_name) - subject = gene_name(b.subject, sep=strip_name) - qi, q = order[query] - si, s = order[subject] - - if abs(qi - si) <= N and b.evalue <= evalue: - if genefam: - g.join(query, subject) - elif q.seqid == s.seqid: - g.join(query, subject) - - else: - homologs = Grouper() - fp = open(blast_file) - for row in fp: - b = BlastLine(row) - query_len = sizes[b.query] - subject_len = sizes[b.subject] - if b.hitlen < min(query_len, subject_len) * P / 100.0: - continue - if b.evalue > evalue: - continue - - query = gene_name(b.query, sep=strip_name) - subject = gene_name(b.subject, sep=strip_name) - homologs.join(query, subject) - - if genefam: - g = homologs - else: - g = Grouper() - for i, atom in enumerate(bed): - for x in range(1, N + 1): - if all( - [ - i - x >= 0, - bed[i - x].seqid == atom.seqid, - homologs.joined(bed[i - x].accn, atom.accn), - ] - ): - leni = sizes[bed[i].accn] - lenx = sizes[bed[i - x].accn] - if abs(leni - lenx) > max(leni, lenx) * (1 - P / 100.0): - continue - g.join(bed[i - x].accn, atom.accn) - - # dump the grouper - fw = must_open(ofile, "w") - ngenes, nfamilies = 0, 0 - families = [] - for group in sorted(g): - if len(group) >= 2: - print(",".join(sorted(group)), file=fw) - ngenes += len(group) - nfamilies += 1 - families.append(sorted(group)) - - longest_family = max(families, key=lambda x: len(x)) - - # generate reports - print("Proximal paralogues (dist=%d):" % N, file=sys.stderr) - print("Total %d genes in %d families" % (ngenes, nfamilies), file=sys.stderr) - print( - "Longest families (%d): %s" % (len(longest_family), ",".join(longest_family)), - file=sys.stderr, - ) - - return families - - -def tandem(args): - """ - %prog tandem blast_file cds_file bed_file [options] - - Find tandem gene clusters that are separated by N genes, based on filtered - blast_file by enforcing alignments between any two genes at least 50% - (or user specified value) of either gene. - - pep_file can also be used in same manner. - """ - p = OptionParser(tandem.__doc__) - p.add_argument( - "--tandem_Nmax", - dest="tandem_Nmax", - type=int, - default=3, - help="merge tandem genes within distance", - ) - p.add_argument( - "--percent_overlap", - type=int, - default=50, - help="tandem genes have >=x% aligned sequence, x=0-100", - ) - p.set_align(evalue=0.01) - p.add_argument( - "--not_self", - default=False, - action="store_true", - help="provided is not self blast file", - ) - p.add_argument( - "--strip_gene_name", - dest="sep", - type=str, - default=".", - help="strip alternative splicing. Use None for no stripping.", - ) - p.add_argument( - "--genefamily", - dest="genefam", - action="store_true", - help="compile gene families based on similarity", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - blast_file, cds_file, bed_file = args - N = opts.tandem_Nmax - P = opts.percent_overlap - is_self = not opts.not_self - sep = opts.sep - ofile = opts.outfile - - tandem_main( - blast_file, - cds_file, - bed_file, - N=N, - P=P, - is_self=is_self, - evalue=opts.evalue, - strip_name=sep, - ofile=ofile, - genefam=opts.genefam, - ) - - -if __name__ == "__main__": - main() diff --git a/jcvi/compara/fractionation.py b/jcvi/compara/fractionation.py deleted file mode 100644 index 61e59413..00000000 --- a/jcvi/compara/fractionation.py +++ /dev/null @@ -1,854 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Catalog gene losses, and bites within genes. -""" -import sys - -from itertools import groupby - -from ..apps.base import ActionDispatcher, OptionParser, logger, sh -from ..formats.bed import Bed -from ..formats.blast import Blast -from ..utils.cbook import gene_name -from ..utils.grouper import Grouper -from ..utils.range import range_minmax, range_overlap, range_distance - -from .synteny import check_beds - - -def main(): - - actions = ( - # Identify true gene loss - ("loss", "extract likely gene loss candidates"), - ("validate", "confirm synteny loss against CDS bed overlaps"), - ("summary", "provide summary of fractionation"), - ("gaps", "check gene locations against gaps"), - # Gene specific status - ("gffselect", "dump gff for the missing genes"), - ("genestatus", "tag genes based on translation from GMAP models"), - # Specific study for napus (requires specific datasets) - ("napus", "extract gene loss vs diploid ancestors (napus)"), - ("merge", "merge protein quartets table with registry (napus)"), - ("segment", "merge adjacent gene loss into segmental loss (napus)"), - ("offdiag", "find gene pairs that are off diagonal"), - ("diff", "calculate diff of size of syntenic regions"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def offdiag(args): - """ - %prog offdiag diploid.napus.1x1.lifted.anchors - - Find gene pairs that are off diagnoal. "Off diagonal" are the pairs that are - not on the orthologous chromosomes. For example, napus chrA01 and brapa A01. - """ - p = OptionParser(offdiag.__doc__) - p.set_beds() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (anchorsfile,) = args - qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts) - - fp = open(anchorsfile) - pf = "-".join(anchorsfile.split(".")[:2]) - header = "Block-id|Napus|Diploid|Napus-chr|Diploid-chr|RBH?".split("|") - print("\t".join(header)) - i = -1 - for row in fp: - if row[0] == "#": - i += 1 - continue - q, s, score = row.split() - rbh = "no" if score[-1] == "L" else "yes" - qi, qq = qorder[q] - si, ss = sorder[s] - oqseqid = qseqid = qq.seqid - osseqid = sseqid = ss.seqid - sseqid = sseqid.split("_")[0][-3:] - if qseqid[0] == "A": - qseqid = qseqid[-3:] # A09 => A09 - elif qseqid[0] == "C": - qseqid = "C0" + qseqid[-1] # C9 => C09 - else: - continue - if qseqid == sseqid or sseqid[-2:] == "nn": - continue - block_id = pf + "-block-{0}".format(i) - print("\t".join((block_id, q, s, oqseqid, osseqid, rbh))) - - -def diff(args): - """ - %prog diff simplefile - - Calculate difference of pairwise syntenic regions. - """ - from jcvi.utils.cbook import SummaryStats - - p = OptionParser(diff.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (simplefile,) = args - fp = open(simplefile) - data = [x.split() for x in fp] - spans = [] - for block_id, ab in groupby(data[1:], key=lambda x: x[0]): - a, b = list(ab) - aspan, bspan = a[4], b[4] - aspan, bspan = int(aspan), int(bspan) - spans.append((aspan, bspan)) - aspans, bspans = zip(*spans) - dspans = [b - a for a, b, in spans] - s = SummaryStats(dspans) - print("For a total of {0} blocks:".format(len(dspans)), file=sys.stderr) - print("Sum of A: {0}".format(sum(aspans)), file=sys.stderr) - print("Sum of B: {0}".format(sum(bspans)), file=sys.stderr) - print("Sum of Delta: {0} ({1})".format(sum(dspans), s), file=sys.stderr) - - -def estimate_size(accns, bed, order, conservative=True): - """ - Estimate the bp length for the deletion tracks, indicated by the gene accns. - True different levels of estimates vary on conservativeness. - """ - accns = [order[x] for x in accns] - ii, bb = zip(*accns) - mini, maxi = min(ii), max(ii) - if not conservative: # extend one gene - mini -= 1 - maxi += 1 - minb = bed[mini] - maxb = bed[maxi] - assert minb.seqid == maxb.seqid - distmode = "ss" if conservative else "ee" - ra = (minb.seqid, minb.start, minb.end, "+") - rb = (maxb.seqid, maxb.start, maxb.end, "+") - - dist, orientation = range_distance(ra, rb, distmode=distmode) - assert dist != -1 - return dist - - -def segment(args): - """ - %prog segment loss.ids bedfile - - Merge adjacent gene loss into segmental loss. - - Then based on the segmental loss, estimate amount of DNA loss in base pairs. - Two estimates can be given: - - conservative: just within the start and end of a single gene - - aggressive: extend the deletion track to the next gene - - The real deletion size is within these estimates. - """ - from jcvi.formats.base import SetFile - - p = OptionParser(segment.__doc__) - p.add_argument( - "--chain", - default=1, - type=int, - help="Allow next N genes to be chained", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - idsfile, bedfile = args - bed = Bed(bedfile) - order = bed.order - ids = SetFile(idsfile) - losses = Grouper() - skip = opts.chain - for i, a in enumerate(bed): - a = a.accn - for j in range(i + 1, i + 1 + skip): - if j >= len(bed): - break - b = bed[j].accn - if a in ids: - losses.join(a, a) - if a in ids and b in ids: - losses.join(a, b) - - losses = list(losses) - singletons = [x for x in losses if len(x) == 1] - segments = [x for x in losses if len(x) > 1] - ns, nm, nt = len(singletons), len(segments), len(losses) - assert ns + nm == nt - - # Summary for all segments - for x in sorted(singletons) + sorted(segments): - print( - "\t".join( - str(x) - for x in ("|".join(sorted(x)), len(x), estimate_size(x, bed, order)) - ) - ) - - # Find longest segment stretch - if segments: - mx, maxsegment = max([(len(x), x) for x in segments]) - print("Longest stretch: run of {0} genes".format(mx), file=sys.stderr) - print(" {0}".format("|".join(sorted(maxsegment))), file=sys.stderr) - seg_asize = sum(estimate_size(x, bed, order) for x in segments) - seg_bsize = sum( - estimate_size(x, bed, order, conservative=False) for x in segments - ) - else: - seg_asize = seg_bsize = 0 - - sing_asize = sum(estimate_size(x, bed, order) for x in singletons) - sing_bsize = sum( - estimate_size(x, bed, order, conservative=False) for x in singletons - ) - total_asize = sing_asize + seg_asize - total_bsize = sing_bsize + seg_bsize - print( - "Singleton ({0}): {1} - {2} bp".format(ns, sing_asize, sing_bsize), - file=sys.stderr, - ) - print( - "Segment ({0}): {1} - {2} bp".format(nm, seg_asize, seg_bsize), file=sys.stderr - ) - print( - "Total ({0}): {1} - {2} bp".format(nt, total_asize, total_bsize), - file=sys.stderr, - ) - print( - "Average ({0}): {1} bp".format(nt, (total_asize + total_bsize) / 2), - file=sys.stderr, - ) - - -def merge(args): - """ - %prog merge protein-quartets registry LOST - - Merge protein quartets table with dna quartets registry. This is specific - to the napus project. - """ - from jcvi.formats.base import DictFile - - p = OptionParser(merge.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - quartets, registry, lost = args - qq = DictFile(registry, keypos=1, valuepos=3) - lost = DictFile(lost, keypos=1, valuepos=0, delimiter="|") - qq.update(lost) - fp = open(quartets) - cases = { - "AN,CN": 4, - "BO,AN,CN": 8, - "BO,CN": 2, - "BR,AN": 1, - "BR,AN,CN": 6, - "BR,BO": 3, - "BR,BO,AN": 5, - "BR,BO,AN,CN": 9, - "BR,BO,CN": 7, - } - ip = { - "syntenic_model": "Syntenic_model_excluded_by_OMG", - "complete": "Predictable", - "partial": "Truncated", - "pseudogene": "Pseudogene", - "random": "Match_random", - "real_ns": "Transposed", - "gmap_fail": "GMAP_fail", - "AN LOST": "AN_LOST", - "CN LOST": "CN_LOST", - "BR LOST": "BR_LOST", - "BO LOST": "BO_LOST", - "outside": "Outside_synteny_blocks", - "[NF]": "Not_found", - } - for row in fp: - atoms = row.strip().split("\t") - genes = atoms[:4] - tag = atoms[4] - a, b, c, d = [qq.get(x, ".").rsplit("-", 1)[-1] for x in genes] - qqs = [c, d, a, b] - for i, q in enumerate(qqs): - if atoms[i] != ".": - qqs[i] = "syntenic_model" - # Make comment - comment = "Case{0}".format(cases[tag]) - dots = sum([1 for x in genes if x == "."]) - if dots == 1: - idx = genes.index(".") - status = qqs[idx] - status = ip[status] - comment += "-" + status - print(row.strip() + "\t" + "\t".join(qqs + [comment])) - - -def gffselect(args): - """ - %prog gffselect gmaplocation.bed expectedlocation.bed translated.ids tag - - Try to match up the expected location and gmap locations for particular - genes. translated.ids was generated by fasta.translate --ids. tag must be - one of "complete|pseudogene|partial". - """ - from jcvi.formats.bed import intersectBed_wao - - p = OptionParser(gffselect.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - gmapped, expected, idsfile, tag = args - data = get_tags(idsfile) - completeness = dict((a.replace("mrna", "path"), c) for (a, b, c) in data) - - seen = set() - idsfile = expected.rsplit(".", 1)[0] + ".ids" - fw = open(idsfile, "w") - cnt = 0 - for a, b in intersectBed_wao(expected, gmapped): - if b is None: - continue - aname, bbname = a.accn, b.accn - bname = bbname.split(".")[0] - if completeness[bbname] != tag: - continue - if aname == bname: - if bname in seen: - continue - seen.add(bname) - print(bbname, file=fw) - cnt += 1 - fw.close() - - logger.debug("Total {0} records written to `{1}`.".format(cnt, idsfile)) - - -def gaps(args): - """ - %prog gaps idsfile fractionationfile gapsbed - - Check gene locations against gaps. `idsfile` contains a list of IDs to query - into `fractionationfile` in order to get expected locations. - """ - from jcvi.formats.base import DictFile - from jcvi.apps.base import popen - from jcvi.utils.cbook import percentage - - p = OptionParser(gaps.__doc__) - p.add_argument("--bdist", default=0, type=int, help="Base pair distance") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - idsfile, frfile, gapsbed = args - bdist = opts.bdist - d = DictFile(frfile, keypos=1, valuepos=2) - bedfile = idsfile + ".bed" - fw = open(bedfile, "w") - fp = open(idsfile) - total = 0 - for row in fp: - id = row.strip() - hit = d[id] - tag, pos = get_tag(hit, None) - seqid, start, end = pos - start, end = max(start - bdist, 1), end + bdist - print("\t".join(str(x) for x in (seqid, start - 1, end, id)), file=fw) - total += 1 - fw.close() - - cmd = "intersectBed -a {0} -b {1} -v | wc -l".format(bedfile, gapsbed) - not_in_gaps = popen(cmd).read() - not_in_gaps = int(not_in_gaps) - in_gaps = total - not_in_gaps - print("Ids in gaps: {1}".format(total, percentage(in_gaps, total)), file=sys.stderr) - - -def get_tags(idsfile): - fp = open(idsfile) - data = [] - for row in fp: - mRNA, label = row.split() - labelatoms = label.split(",") - if label == "complete" or label == "contain_ns,complete": - tag = "complete" - if "cannot_translate" in labelatoms: - tag = "pseudogene" - elif "five_prime_missing" in labelatoms or "three_prime_missing" in labelatoms: - tag = "partial" - data.append((mRNA, label, tag)) - return data - - -def genestatus(args): - """ - %prog genestatus diploid.gff3.exon.ids - - Tag genes based on translation from GMAP models, using fasta.translate() - --ids. - """ - p = OptionParser(genestatus.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (idsfile,) = args - data = get_tags(idsfile) - key = lambda x: x[0].split(".")[0] - for gene, cc in groupby(data, key=key): - cc = list(cc) - tags = [x[-1] for x in cc] - if "complete" in tags: - tag = "complete" - elif "partial" in tags: - tag = "partial" - else: - tag = "pseudogene" - print("\t".join((gene, tag))) - - -def summary(args): - """ - %prog summary diploid.napus.fractionation gmap.status - - Provide summary of fractionation. `fractionation` file is generated with - loss(). `gmap.status` is generated with genestatus(). - """ - from jcvi.formats.base import DictFile - from jcvi.utils.cbook import percentage, Registry - - p = OptionParser(summary.__doc__) - p.add_argument("--extra", help="Cross with extra tsv file") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - frfile, statusfile = args - status = DictFile(statusfile) - fp = open(frfile) - registry = Registry() # keeps all the tags for any given gene - for row in fp: - seqid, gene, tag = row.split() - if tag == ".": - registry[gene].append("outside") - else: - registry[gene].append("inside") - if tag[0] == "[": - registry[gene].append("no_syntenic_model") - if tag.startswith("[S]"): - registry[gene].append("[S]") - gstatus = status.get(gene, None) - if gstatus == "complete": - registry[gene].append("complete") - elif gstatus == "pseudogene": - registry[gene].append("pseudogene") - elif gstatus == "partial": - registry[gene].append("partial") - else: - registry[gene].append("gmap_fail") - elif tag.startswith("[NS]"): - registry[gene].append("[NS]") - if "random" in tag or "Scaffold" in tag: - registry[gene].append("random") - else: - registry[gene].append("real_ns") - elif tag.startswith("[NF]"): - registry[gene].append("[NF]") - else: - registry[gene].append("syntenic_model") - - inside = registry.count("inside") - outside = registry.count("outside") - syntenic = registry.count("syntenic_model") - non_syntenic = registry.count("no_syntenic_model") - s = registry.count("[S]") - ns = registry.count("[NS]") - nf = registry.count("[NF]") - complete = registry.count("complete") - pseudogene = registry.count("pseudogene") - partial = registry.count("partial") - gmap_fail = registry.count("gmap_fail") - random = registry.count("random") - real_ns = registry.count("real_ns") - - complete_models = registry.get_tag("complete") - pseudogenes = registry.get_tag("pseudogene") - partial_deletions = registry.get_tag("partial") - - m = "{0} inside synteny blocks\n".format(inside) - m += "{0} outside synteny blocks\n".format(outside) - m += "{0} has syntenic gene\n".format(syntenic) - m += "{0} lack syntenic gene\n".format(non_syntenic) - m += "{0} has sequence match in syntenic location\n".format(s) - m += "{0} has sequence match in non-syntenic location\n".format(ns) - m += "{0} has sequence match in un-ordered scaffolds\n".format(random) - m += "{0} has sequence match in real non-syntenic location\n".format(real_ns) - m += "{0} has no sequence match\n".format(nf) - m += "{0} syntenic sequence - complete model\n".format(percentage(complete, s)) - m += "{0} syntenic sequence - partial model\n".format(percentage(partial, s)) - m += "{0} syntenic sequence - pseudogene\n".format(percentage(pseudogene, s)) - m += "{0} syntenic sequence - gmap fail\n".format(percentage(gmap_fail, s)) - print(m, file=sys.stderr) - - aa = ["complete_models", "partial_deletions", "pseudogenes"] - bb = [complete_models, partial_deletions, pseudogenes] - for a, b in zip(aa, bb): - fw = open(a, "w") - print("\n".join(b), file=fw) - fw.close() - - extra = opts.extra - if extra: - registry.update_from(extra) - - fp.seek(0) - fw = open("registry", "w") - for row in fp: - seqid, gene, tag = row.split() - ts = registry[gene] - print("\t".join((seqid, gene, tag, "-".join(ts))), file=fw) - fw.close() - - logger.debug("Registry written.") - - -def get_tag(name, order): - if name[0] == "[": - tag, tname = name[1:].split("]") - seqid, se = tname.split(":") - start, end = se.split("-") - start, end = int(start), int(end) - else: - tag = None - xi, x = order[name] - seqid, start, end = x.seqid, x.start, x.end - return tag, (seqid, start, end) - - -def napus(args): - """ - %prog napus napus.bed brapa.boleracea.i1.blocks diploid.napus.fractionation - - Extract napus gene loss vs diploid ancestors. We are looking specifically - for anything that has the pattern: - - BR - BO or BR - BO - | | - AN CN - - Step 1: extract BR - BO syntenic pairs - Step 2: get diploid gene retention patterns from BR or BO as query - Step 3: look for if AN or CN is NS(non-syntenic) or NF(not found) and - specifically with NS, the NS location is actually the homeologous site. - Step 4: categorize gene losses into singleton, or segmental (defined as - consecutive losses with a maximum skip of 1 - """ - from jcvi.utils.cbook import SummaryStats - - p = OptionParser(napus.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - napusbed, brbo, dpnp = args - retention = {} - fp = open(dpnp) - for row in fp: - seqid, query, hit = row.split() - retention[query] = hit - - order = Bed(napusbed).order - - quartetsfile = "quartets" - fp = open(brbo) - fw = open(quartetsfile, "w") - AL = "AN LOST" - CL = "CN LOST" - for row in fp: - br, bo = row.split() - if "." in (br, bo): - continue - an, cn = retention[br], retention[bo] - row = "\t".join((br, bo, an, cn)) - if "." in (an, cn): - # print row - continue - - # label loss candidates - antag, anrange = get_tag(an, order) - cntag, cnrange = get_tag(cn, order) - - if range_overlap(anrange, cnrange): - if (antag, cntag) == ("NS", None): - row = row + "\t{0}|{1}".format(AL, br) - if (antag, cntag) == (None, "NS"): - row = row + "\t{0}|{1}".format(CL, bo) - - print(row, file=fw) - fw.close() - - logger.debug("Quartets and gene losses written to `{0}`.".format(quartetsfile)) - - # Parse the quartets file to extract singletons vs.segmental losses - fp = open(quartetsfile) - fw = open(quartetsfile + ".summary", "w") - data = [x.rstrip().split("\t") for x in fp] - skip = 1 # max distance between losses - - g = Grouper() - losses = [(len(x) == 5) for x in data] - for i, d in enumerate(losses): - if not d: - continue - g.join(i, i) - itag = data[i][-1].split("|")[0] - for j in range(i + 1, i + skip + 1): - jtag = data[j][-1].split("|")[0] - if j < len(losses) and losses[j] and itag == jtag: - g.join(i, j) - - losses = list(g) - singletons = [x for x in losses if len(x) == 1] - segments = [x for x in losses if len(x) > 1] - ns, nm = len(singletons), len(segments) - assert len(losses) == ns + nm - - grab_tag = lambda pool, tag: [ - x for x in pool if all(data[z][-1].startswith(tag) for z in x) - ] - - an_loss_singletons = grab_tag(singletons, AL) - cn_loss_singletons = grab_tag(singletons, CL) - als, cls = len(an_loss_singletons), len(cn_loss_singletons) - - an_loss_segments = grab_tag(segments, AL) - cn_loss_segments = grab_tag(segments, CL) - alm, clm = len(an_loss_segments), len(cn_loss_segments) - mixed = len(segments) - alm - clm - assert mixed == 0 - - logger.debug("Singletons: {0} (AN LOSS: {1}, CN LOSS: {2})".format(ns, als, cls)) - logger.debug("Segments: {0} (AN LOSS: {1}, CN LOSS: {2})".format(nm, alm, clm)) - print(SummaryStats([len(x) for x in losses]), file=sys.stderr) - - for x in singletons + segments: - print("### LENGTH =", len(x), file=fw) - for i in x: - print("\t".join(data[i]), file=fw) - fw.close() - - -def region_str(region): - return "{0}:{1}-{2}".format(*region) - - -def loss(args): - """ - %prog loss a.b.i1.blocks [a.b-genomic.blast] - - Extract likely gene loss candidates between genome a and b. - """ - p = OptionParser(loss.__doc__) - p.add_argument( - "--bed", - default=False, - action="store_true", - help="Genomic BLAST is in bed format", - ) - p.add_argument("--gdist", default=20, type=int, help="Gene distance") - p.add_argument( - "--bdist", - default=20000, - type=int, - help="Base pair distance", - ) - p.set_beds() - opts, args = p.parse_args(args) - - if len(args) not in (1, 2): - sys.exit(not p.print_help()) - - blocksfile = args[0] - emptyblast = len(args) == 1 - if emptyblast: - genomicblast = "empty.blast" - sh("touch {0}".format(genomicblast)) - else: - genomicblast = args[1] - - gdist, bdist = opts.gdist, opts.bdist - qbed, sbed, qorder, sorder, is_self = check_beds(blocksfile, p, opts) - blocks = [] - fp = open(blocksfile) - genetrack = {} - proxytrack = {} - for row in fp: - a, b = row.split() - genetrack[a] = b - blocks.append((a, b)) - - data = [] - for key, rows in groupby(blocks, key=lambda x: x[-1]): - rows = list(rows) - data.append((key, rows)) - - imax = len(data) - 1 - for i, (key, rows) in enumerate(data): - if i == 0 or i == imax: - continue - if key != ".": - continue - - before, br = data[i - 1] - after, ar = data[i + 1] - bi, bx = sorder[before] - ai, ax = sorder[after] - dist = abs(bi - ai) - if bx.seqid != ax.seqid or dist > gdist: - continue - - start, end = range_minmax(((bx.start, bx.end), (ax.start, ax.end))) - start, end = max(start - bdist, 1), end + bdist - proxy = (bx.seqid, start, end) - for a, b in rows: - proxytrack[a] = proxy - - tags = {} - if opts.bed: - bed = Bed(genomicblast, sorted=False) - key = lambda x: gene_name(x.accn.rsplit(".", 1)[0]) - for query, bb in groupby(bed, key=key): - bb = list(bb) - if query not in proxytrack: - continue - - proxy = proxytrack[query] - tag = "NS" - best_b = bb[0] - for b in bb: - hsp = (b.seqid, b.start, b.end) - if range_overlap(proxy, hsp): - tag = "S" - best_b = b - break - - hsp = (best_b.seqid, best_b.start, best_b.end) - proxytrack[query] = hsp - tags[query] = tag - - else: - blast = Blast(genomicblast) - for query, bb in blast.iter_hits(): - bb = list(bb) - query = gene_name(query) - if query not in proxytrack: - continue - - proxy = proxytrack[query] - tag = "NS" - best_b = bb[0] - for b in bb: - hsp = (b.subject, b.sstart, b.sstop) - if range_overlap(proxy, hsp): - tag = "S" - best_b = b - break - - hsp = (best_b.subject, best_b.sstart, best_b.sstop) - proxytrack[query] = hsp - tags[query] = tag - - for b in qbed: - accn = b.accn - target_region = genetrack[accn] - if accn in proxytrack: - target_region = region_str(proxytrack[accn]) - if accn in tags: - ptag = "[{0}]".format(tags[accn]) - else: - ptag = "[NF]" - target_region = ptag + target_region - - print("\t".join((b.seqid, accn, target_region))) - - if emptyblast: - sh("rm -f {0}".format(genomicblast)) - - -def validate(args): - """ - %prog validate diploid.napus.fractionation cds.bed - - Check whether [S] intervals overlap with CDS. - """ - from jcvi.formats.bed import intersectBed_wao - - p = OptionParser(validate.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fractionation, cdsbed = args - fp = open(fractionation) - - sbed = "S.bed" - fw = open(sbed, "w") - for row in fp: - a, b, c = row.split() - if not c.startswith("[S]"): - continue - - tag, (seqid, start, end) = get_tag(c, None) - print("\t".join(str(x) for x in (seqid, start - 1, end, b)), file=fw) - - fw.close() - - pairs = {} - for a, b in intersectBed_wao(sbed, cdsbed): - if b is None: - continue - pairs[a.accn] = b.accn - - validated = fractionation + ".validated" - fw = open(validated, "w") - fp.seek(0) - fixed = 0 - for row in fp: - a, b, c = row.split() - if b in pairs: - assert c.startswith("[S]") - c = pairs[b] - fixed += 1 - - print("\t".join((a, b, c)), file=fw) - - logger.debug("Fixed {0} [S] cases in `{1}`.".format(fixed, validated)) - fw.close() - - -if __name__ == "__main__": - main() diff --git a/jcvi/compara/ks.py b/jcvi/compara/ks.py deleted file mode 100644 index 2dfcf3e3..00000000 --- a/jcvi/compara/ks.py +++ /dev/null @@ -1,1176 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Calculation of synonymous substitutions (Ks). -""" -import csv -import os -import os.path as op -import sys - -from functools import partial -from itertools import combinations, product -from math import exp, log, pi, sqrt -from typing import Optional - -import numpy as np - -from Bio import AlignIO, SeqIO -from Bio.Align.Applications import ClustalwCommandline, MuscleCommandline - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - Popen, - cleanup, - getpath, - iglob, - logger, - mkdir, - sh, -) -from ..formats.base import LineFile, must_open -from ..graphics.base import AbstractLayout, adjust_spines, markup, plt, savefig -from ..utils.cbook import gene_name -from ..utils.table import write_csv - -CLUSTALW_BIN = partial(getpath, name="CLUSTALW2", warn="warn") -MUSCLE_BIN = partial(getpath, name="MUSCLE", warn="warn") -PAL2NAL_BIN = partial(getpath, name="PAL2NAL", warn="warn") -PAML_BIN = partial(getpath, name="PAML", warn="warn") - - -class AbstractCommandline: - def run(self): - r = Popen(str(self)) - return r.communicate() - - -class YnCommandline(AbstractCommandline): - """Little commandline for yn00.""" - - def __init__(self, ctl_file, command=PAML_BIN("yn00")): - self.ctl_file = ctl_file - self.parameters = [] - self.command = command - - def __str__(self): - return self.command + " %s >/dev/null" % self.ctl_file - - -class MrTransCommandline(AbstractCommandline): - """Simple commandline faker.""" - - def __init__( - self, - prot_align_file, - nuc_file, - output_file, - outfmt="paml", - command=PAL2NAL_BIN("pal2nal.pl"), - ): - self.prot_align_file = prot_align_file - self.nuc_file = nuc_file - self.output_file = output_file - self.outfmt = outfmt - self.command = command - - self.parameters = [] - - def __str__(self): - return self.command + " %s %s -output %s > %s" % ( - self.prot_align_file, - self.nuc_file, - self.outfmt, - self.output_file, - ) - - -def main(): - - actions = ( - ("batch", "compute ks for a set of anchors file"), - ("fromgroups", "flatten the gene families into pairs"), - ("prepare", "prepare pairs of sequences"), - ("calc", "calculate Ks between pairs of sequences"), - ("subset", "subset pre-calculated Ks according to pairs file"), - ("gc3", "filter the Ks results to remove high GC3 genes"), - ("report", "generate a distribution of Ks values"), - ("multireport", "generate several Ks value distributions in same figure"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def batch(args): - """ - %prog batch all.cds *.anchors - - Compute Ks values for a set of anchors file. This will generate a bunch of - work directories for each comparisons. The anchorsfile should be in the form - of specie1.species2.anchors. - """ - from jcvi.apps.grid import MakeManager - - p = OptionParser(batch.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - cdsfile = args[0] - anchors = args[1:] - workdirs = [".".join(op.basename(x).split(".")[:2]) for x in anchors] - for wd in workdirs: - mkdir(wd) - - mm = MakeManager() - for wd, ac in zip(workdirs, anchors): - pairscdsfile = wd + ".cds.fasta" - cmd = "python -m jcvi.apps.ks prepare {} {} -o {}".format( - ac, cdsfile, pairscdsfile - ) - mm.add((ac, cdsfile), pairscdsfile, cmd) - ksfile = wd + ".ks" - cmd = "python -m jcvi.apps.ks calc {} -o {} --workdir {}".format( - pairscdsfile, ksfile, wd - ) - mm.add(pairscdsfile, ksfile, cmd) - mm.write() - - -class LayoutLine(object): - def __init__(self, row, delimiter=","): - args = row.rstrip().split(delimiter) - args = [x.strip() for x in args] - self.ksfile = args[0] - self.components = int(args[1]) - self.label = args[2] - self.color = args[3] - self.marker = args[4] - - def __str__(self): - return ", ".join( - str(x) - for x in (self.ksfile, self.components, self.label, self.color, self.marker) - ) - - -class Layout(AbstractLayout): - def __init__(self, filename, delimiter=",", seed: Optional[int] = None): - super().__init__(filename) - if not op.exists(filename): - ksfiles = iglob(".", "*.ks") - header = "Ks file|ncomponents|label|color|marker".split("|") - contents = [] - for ksfile in ksfiles: - leg = op.basename(ksfile).rsplit(".", 1)[0] - if leg.count(".") == 1: - leg = leg.replace(".", " *vs.* ") - contents.append((ksfile, "1", leg, "", "")) - write_csv(header, contents, comment=True, filename=filename) - - fp = open(filename) - for row in fp: - if row[0] == "#": - continue - self.append(LayoutLine(row, delimiter=delimiter)) - - self.assign_colors(seed=seed) - self.assign_markers(seed=seed) - - -class KsPlot(object): - def __init__(self, ax, ks_max, bins, legendp="upper left"): - - self.ax = ax - self.ks_max = ks_max - self.interval = ks_max / bins - self.legendp = legendp - self.lines = [] - self.labels = [] - - def add_data( - self, - data, - components=1, - label="Ks", - color="r", - marker=".", - fill=False, - fitted=True, - kde=False, - ): - - ax = self.ax - ks_max = self.ks_max - interval = self.interval - if kde: - marker = None - - line, line_mixture = plot_ks_dist( - ax, - data, - interval, - components, - ks_max, - color=color, - marker=marker, - fill=fill, - fitted=fitted, - kde=kde, - ) - self.lines.append(line) - self.labels.append(label) - - if fitted: - self.lines.append(line_mixture) - self.labels.append(label + " (fitted)") - - def draw(self, title="*Ks* distribution", filename="Ks_plot.pdf"): - - ax = self.ax - ks_max = self.ks_max - lines = self.lines - labels = [markup(x) for x in self.labels] - legendp = self.legendp - if len(lines) > 1: - leg = ax.legend( - lines, - labels, - loc=legendp, - shadow=True, - fancybox=True, - prop={"size": 10}, - ) - leg.get_frame().set_alpha(0.5) - - ax.set_xlim((0, ks_max - self.interval)) - ylim = ax.get_ylim()[-1] - ax.set_ylim(0, ylim) - ax.set_title(markup(title), fontweight="bold") - ax.set_xlabel(markup("Synonymous substitutions per site (*Ks*)")) - ax.set_ylabel("Percentage of gene pairs (bin={})".format(self.interval)) - - ax.set_xticklabels(ax.get_xticks(), family="Helvetica") - ax.set_yticklabels(ax.get_yticks(), family="Helvetica") - - adjust_spines(ax, ["left", "bottom"], outward=True) - - if filename: - savefig(filename, dpi=300) - - -def multireport(args): - """ - %prog multireport layoutfile - - Generate several Ks value distributions in the same figure. If the layout - file is missing then a template file listing all ks files will be written. - - The layout file contains the Ks file, number of components, colors, and labels: - - # Ks file, ncomponents, label, color, marker - LAP.sorghum.ks, 1, LAP-sorghum, r, o - SES.sorghum.ks, 1, SES-sorghum, g, + - MOL.sorghum.ks, 1, MOL-sorghum, m, ^ - - If color or marker is missing, then a random one will be assigned. - """ - p = OptionParser(multireport.__doc__) - p.set_outfile(outfile="Ks_plot.pdf") - add_plot_options(p) - opts, args, iopts = p.set_image_options(args, figsize="8x6") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (layoutfile,) = args - ks_min = opts.vmin - ks_max = opts.vmax - bins = opts.bins - fill = opts.fill - layout = Layout(layoutfile, seed=iopts.seed) - print(layout, file=sys.stderr) - - fig = plt.figure(1, (iopts.w, iopts.h)) - ax = fig.add_axes([0.12, 0.13, 0.8, 0.8]) - - kp = KsPlot(ax, ks_max, bins, legendp=opts.legendp) - for lo in layout: - data = KsFile(lo.ksfile) - data = [x.ng_ks for x in data] - data = [x for x in data if ks_min <= x <= ks_max] - kp.add_data( - data, - lo.components, - label=lo.label, - color=lo.color, - marker=lo.marker, - fill=fill, - fitted=opts.fit, - kde=opts.kde, - ) - - kp.draw(title=opts.title, filename=opts.outfile) - - -def get_GC3(cdsfile): - from jcvi.formats.fasta import Fasta - - f = Fasta(cdsfile, lazy=True) - GC3 = {} - for name, rec in f.iteritems_ordered(): - positions = rec.seq[2::3].upper() - gc_counts = sum(1 for x in positions if x in "GC") - gc_ratio = gc_counts * 1.0 / len(positions) - GC3[name] = gc_ratio - - return GC3 - - -def plot_GC3(GC3, cdsfile, fill="white"): - from jcvi.graphics.histogram import histogram - - numberfile = "{0}.gc3".format(cdsfile) - fw = must_open(numberfile, "w") - fw.write("\n".join(map(str, GC3.values()))) - fw.close() - histogram( - numberfile, - vmin=0, - vmax=1, - xlabel="GC3", - title=cdsfile, - bins=50, - skip=0, - ascii=False, - fill=fill, - ) - - logger.debug("{0} GC3 values plotted to {1}.pdf".format(len(GC3), numberfile)) - - -def gc3(args): - """ - %prog gc3 ksfile cdsfile [cdsfile2] -o newksfile - - Filter the Ks results to remove high GC3 genes. High GC3 genes are - problematic in Ks calculation - see Tang et al. 2010 PNAS. Specifically, the - two calculation methods produce drastically different results for these - pairs. Therefore we advise to remoeve these high GC3 genes. This is often - the case for studying cereal genes. - - If 2 genomes are involved, the cdsfile of the 2nd genome can be provided - concatenated or separated. - """ - p = OptionParser(gc3.__doc__) - p.add_argument( - "--plot", default=False, action="store_true", help="Also plot the GC3 histogram" - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - outfile = opts.outfile - plot = opts.plot - - if not 1 < len(args) < 4: - sys.exit(not p.print_help()) - - ks_file, cdsfile = args[:2] - GC3 = get_GC3(cdsfile) - if plot: - plot_GC3(GC3, cdsfile, fill="green") - - if len(args) == 3: - cdsfile2 = args[2] - GC3_2 = get_GC3(cdsfile2) - GC3.update(GC3_2) - if plot: - plot_GC3(GC3_2, cdsfile2, fill="lightgreen") - - data = KsFile(ks_file) - noriginals = len(data) - - fw = must_open(outfile, "w") - writer = csv.writer(fw) - writer.writerow(fields.split(",")) - nlines = 0 - cutoff = 0.75 - for d in data: - a, b = d.name.split(";") - aratio, bratio = GC3[a], GC3[b] - if (aratio + bratio) / 2 > cutoff: - continue - writer.writerow(d) - nlines += 1 - logger.debug("{0} records written (from {1}).".format(nlines, noriginals)) - - -def extract_pairs(abed, bbed, groups): - """ - Called by fromgroups(), extract pairs specific to a pair of species. - """ - agenome = op.basename(abed.filename).split(".")[0] - bgenome = op.basename(bbed.filename).split(".")[0] - aorder = abed.order - border = bbed.order - pairsfile = "{0}.{1}.pairs".format(agenome, bgenome) - fw = open(pairsfile, "w") - - is_self = abed.filename == bbed.filename - npairs = 0 - for group in groups: - iter = combinations(group, 2) if is_self else product(group, repeat=2) - - for a, b in iter: - if a not in aorder or b not in border: - continue - - print("\t".join((a, b)), file=fw) - npairs += 1 - - logger.debug("File `{0}` written with {1} pairs.".format(pairsfile, npairs)) - - -def fromgroups(args): - """ - %prog fromgroups groupsfile a.bed b.bed ... - - Flatten the gene familes into pairs, the groupsfile is a file with each line - containing the members, separated by comma. The commands also require - several bed files in order to sort the pairs into different piles (e.g. - pairs of species in comparison. - """ - from jcvi.formats.bed import Bed - - p = OptionParser(fromgroups.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - groupsfile = args[0] - bedfiles = args[1:] - beds = [Bed(x) for x in bedfiles] - fp = open(groupsfile) - groups = [row.strip().split(",") for row in fp] - for b1, b2 in product(beds, repeat=2): - extract_pairs(b1, b2, groups) - - -def find_first_isoform(a, f): - if a in f: - return a - for i in range(100): - ia = ".".join((a, str(i))) - if ia in f: - return ia - return a - - -def prepare(args): - """ - %prog prepare pairsfile cdsfile [pepfile] -o paired.cds.fasta - - Pick sequences from cdsfile to form pairs, ready to be calculated. The - pairsfile can be generated from formats.blast.cscore(). The first two - columns contain the pair. - """ - from jcvi.formats.fasta import Fasta - - p = OptionParser(prepare.__doc__) - p.set_outfile() - - opts, args = p.parse_args(args) - outfile = opts.outfile - - if len(args) == 2: - pairsfile, cdsfile = args - pepfile = None - elif len(args) == 3: - pairsfile, cdsfile, pepfile = args - else: - sys.exit(not p.print_help()) - - f = Fasta(cdsfile) - fp = open(pairsfile) - fw = must_open(outfile, "w") - if pepfile: - assert outfile != "stdout", "Please specify outfile name." - f2 = Fasta(pepfile) - fw2 = must_open(outfile + ".pep", "w") - for row in fp: - if row[0] == "#": - continue - a, b = row.split()[:2] - if a == b: - logger.debug("Self pairs found: {0} - {1}. Ignored".format(a, b)) - continue - - if a not in f: - a = find_first_isoform(a, f) - assert a, a - if b not in f: - b = find_first_isoform(b, f) - assert b, b - - acds = f[a] - bcds = f[b] - SeqIO.write((acds, bcds), fw, "fasta") - if pepfile: - apep = f2[a] - bpep = f2[b] - SeqIO.write((apep, bpep), fw2, "fasta") - fw.close() - if pepfile: - fw2.close() - - -def calc(args): - """ - %prog calc [prot.fasta] cds.fasta > out.ks - - Protein file is optional. If only one file is given, it is assumed to - be CDS sequences with correct frame (frame 0). Results will be written to - stdout. Both protein file and nucleotide file are assumed to be Fasta format, - with adjacent records as the pairs to compare. - - Author: Haibao Tang , Brad Chapman, Jingping Li - Calculate synonymous mutation rates for gene pairs - - This does the following: - 1. Fetches a protein pair. - 2. Aligns the protein pair with clustalw (default) or muscle. - 3. Convert the output to Fasta format. - 4. Use this alignment info to align gene sequences using PAL2NAL - 5. Run PAML yn00 to calculate synonymous mutation rates. - """ - from jcvi.formats.fasta import translate - - p = OptionParser(calc.__doc__) - p.add_argument( - "--longest", - action="store_true", - help="Get longest ORF, only works if no pep file, e.g. ESTs", - ) - p.add_argument( - "--msa", - default="clustalw", - choices=("clustalw", "muscle"), - help="software used to align the proteins", - ) - p.add_argument("--workdir", default=os.getcwd(), help="Work directory") - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) == 1: - protein_file, dna_file = None, args[0] - elif len(args) == 2: - protein_file, dna_file = args - else: - print("Incorrect arguments", file=sys.stderr) - sys.exit(not p.print_help()) - - output_h = must_open(opts.outfile, "w") - print(fields, file=output_h) - work_dir = op.join(opts.workdir, "syn_analysis") - mkdir(work_dir) - - if not protein_file: - protein_file = dna_file + ".pep" - translate_args = [dna_file, "--outfile=" + protein_file] - if opts.longest: - translate_args += ["--longest"] - dna_file, protein_file = translate(translate_args) - - prot_iterator = SeqIO.parse(open(protein_file), "fasta") - dna_iterator = SeqIO.parse(open(dna_file), "fasta") - for p_rec_1, p_rec_2, n_rec_1, n_rec_2 in zip( - prot_iterator, prot_iterator, dna_iterator, dna_iterator - ): - - print("--------", p_rec_1.name, p_rec_2.name, file=sys.stderr) - if opts.msa == "clustalw": - align_fasta = clustal_align_protein((p_rec_1, p_rec_2), work_dir) - elif opts.msa == "muscle": - align_fasta = muscle_align_protein((p_rec_1, p_rec_2), work_dir) - mrtrans_fasta = run_mrtrans(align_fasta, (n_rec_1, n_rec_2), work_dir) - if mrtrans_fasta: - ds_subs_yn, dn_subs_yn, ds_subs_ng, dn_subs_ng = find_synonymous( - mrtrans_fasta, work_dir - ) - if ds_subs_yn is not None: - pair_name = "%s;%s" % (p_rec_1.name, p_rec_2.name) - output_h.write( - "%s\n" - % ( - ",".join( - str(x) - for x in ( - pair_name, - ds_subs_yn, - dn_subs_yn, - ds_subs_ng, - dn_subs_ng, - ) - ) - ) - ) - output_h.flush() - - # Clean-up - sh("rm -rf 2YN.t 2YN.dN 2YN.dS rst rub rst1 syn_analysis") - - -def find_synonymous(input_file, work_dir): - """Run yn00 to find the synonymous subsitution rate for the alignment.""" - cwd = os.getcwd() - os.chdir(work_dir) - # create the .ctl file - ctl_file = "yn-input.ctl" - output_file = "nuc-subs.yn" - ctl_h = open(ctl_file, "w") - ctl_h.write( - "seqfile = %s\noutfile = %s\nverbose = 0\n" - % (op.basename(input_file), output_file) - ) - ctl_h.write("icode = 0\nweighting = 0\ncommonf3x4 = 0\n") - ctl_h.close() - - cl = YnCommandline(ctl_file) - print("\tyn00:", cl, file=sys.stderr) - r, e = cl.run() - ds_value_yn = None - ds_value_ng = None - dn_value_yn = None - dn_value_ng = None - - # Nei-Gojobori - output_h = open(output_file) - row = output_h.readline() - while row: - if row.find("Nei & Gojobori") >= 0: - for x in range(5): - row = next(output_h) - dn_value_ng, ds_value_ng = row.split("(")[1].split(")")[0].split() - break - row = output_h.readline() - output_h.close() - - # Yang - output_h = open(output_file) - for line in output_h: - if line.find("+-") >= 0 and line.find("dS") == -1: - parts = line.split(" +-") - ds_value_yn = extract_subs_value(parts[1]) - dn_value_yn = extract_subs_value(parts[0]) - - if ds_value_yn is None or ds_value_ng is None: - h = open(output_file) - print("yn00 didn't work: \n%s" % h.read(), file=sys.stderr) - - os.chdir(cwd) - return ds_value_yn, dn_value_yn, ds_value_ng, dn_value_ng - - -def extract_subs_value(text): - """Extract a subsitution value from a line of text. - - This is just a friendly function to grab a float value for Ks and Kn - values from the junk I get from the last line of the yn00 file. - - Line: - 2 1 52.7 193.3 2.0452 0.8979 0.0193 0.0573 +- 0.0177 - 2.9732 +- 3.2002 - - Parts: - [' 2 1 52.7 193.3 2.0452 0.8979 0.0193 0.0573', - ' 0.0177 2.9732', ' 3.2002\n'] - - So we want 0.0573 for Kn and 2.9732 for Ks. - """ - parts = text.split() - value = float(parts[-1]) - - return value - - -def run_mrtrans(align_fasta, recs, work_dir, outfmt="paml"): - """Align nucleotide sequences with mrtrans and the protein alignment.""" - align_file = op.join(work_dir, "prot-align.fasta") - nuc_file = op.join(work_dir, "nuc.fasta") - output_file = op.join(work_dir, "nuc-align.mrtrans") - - # make the prot_align file and nucleotide file - align_h0 = open(align_file + "0", "w") - align_h0.write(str(align_fasta)) - align_h0.close() - prot_seqs = {} - i = 0 - for rec in SeqIO.parse(align_h0.name, "fasta"): - prot_seqs[i] = rec.seq - i += 1 - align_h = open(align_file, "w") - for i, rec in enumerate(recs): - if len(rec.id) > 30: - rec.id = rec.id[:28] + "_" + str(i) - rec.description = "" - print(">{0}\n{1}".format(rec.id, prot_seqs[i]), file=align_h) - align_h.close() - SeqIO.write(recs, open(nuc_file, "w"), "fasta") - - # run the program - cl = MrTransCommandline(align_file, nuc_file, output_file, outfmt=outfmt) - r, e = cl.run() - if e is None: - print("\tpal2nal:", cl, file=sys.stderr) - return output_file - elif e.read().find("could not translate") >= 0: - print("***pal2nal could not translate", file=sys.stderr) - return None - - -def clustal_align_protein(recs, work_dir, outfmt="fasta"): - """ - Align given proteins with clustalw. - recs are iterable of Biopython SeqIO objects - """ - fasta_file = op.join(work_dir, "prot-start.fasta") - align_file = op.join(work_dir, "prot.aln") - SeqIO.write(recs, open(fasta_file, "w"), "fasta") - - clustal_cl = ClustalwCommandline( - cmd=CLUSTALW_BIN("clustalw2"), - infile=fasta_file, - outfile=align_file, - outorder="INPUT", - type="PROTEIN", - ) - stdout, stderr = clustal_cl() - - aln_file = open(clustal_cl.outfile) - alignment = AlignIO.read(aln_file, "clustal") - print("\tDoing clustalw alignment: %s" % clustal_cl, file=sys.stderr) - if outfmt == "fasta": - return alignment.format("fasta") - if outfmt == "clustal": - return alignment - - -def muscle_align_protein(recs, work_dir, outfmt="fasta", inputorder=True): - """ - Align given proteins with muscle. - recs are iterable of Biopython SeqIO objects - """ - fasta_file = op.join(work_dir, "prot-start.fasta") - align_file = op.join(work_dir, "prot.aln") - SeqIO.write(recs, open(fasta_file, "w"), "fasta") - - muscle_cl = MuscleCommandline( - cmd=MUSCLE_BIN("muscle"), - input=fasta_file, - out=align_file, - seqtype="protein", - clwstrict=True, - ) - stdout, stderr = muscle_cl() - alignment = AlignIO.read(muscle_cl.out, "clustal") - - if inputorder: - try: - muscle_inputorder(muscle_cl.input, muscle_cl.out) - except ValueError: - return "" - alignment = AlignIO.read(muscle_cl.out, "fasta") - - print("\tDoing muscle alignment: %s" % muscle_cl, file=sys.stderr) - if outfmt == "fasta": - return alignment.format("fasta") - if outfmt == "clustal": - return alignment.format("clustal") - - -def muscle_inputorder(inputfastafile, alnfile, trunc_name=True): - """ - Fix for muscle -stable option according to here: - http://drive5.com/muscle/stable.html - """ - sh("cp {0} {0}.old".format(alnfile), log=False) - maxi = 30 if trunc_name else 1000 - - aa = AlignIO.read(alnfile, "clustal") - alignment = dict((a.id[:maxi], a) for a in aa) - if trunc_name and len(alignment) < len(aa): - raise ValueError("ERROR: The first 30 chars of your seq names are not unique") - - fw = must_open(alnfile, "w") - for rec in SeqIO.parse(inputfastafile, "fasta"): - a = alignment[rec.id[:maxi]] - fw.write(">{0}\n{1}\n".format(a.id[:maxi], a.seq)) - - fw.close() - sh("rm {0}.old".format(alnfile), log=False) - - -def subset(args): - """ - %prog subset pairsfile ksfile1 ksfile2 ... -o pairs.ks - - Subset some pre-calculated ks ka values (in ksfile) according to pairs - in tab delimited pairsfile/anchorfile. - """ - p = OptionParser(subset.__doc__) - p.add_argument( - "--noheader", action="store_true", help="don't write ksfile header line" - ) - p.add_argument( - "--block", action="store_true", help="preserve block structure in input" - ) - p.set_stripnames() - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - pairsfile, ksfiles = args[0], args[1:] - noheader = opts.noheader - block = opts.block - if block: - noheader = True - outfile = opts.outfile - - ksvals = {} - for ksfile in ksfiles: - ksvals.update( - dict( - (line.name, line) - for line in KsFile(ksfile, strip_names=opts.strip_names) - ) - ) - - fp = open(pairsfile) - fw = must_open(outfile, "w") - - if not noheader: - print(fields, file=fw) - - i = j = 0 - for row in fp: - if row[0] == "#": - if block: - print(row.strip(), file=fw) - continue - a, b = row.split()[:2] - name = ";".join((a, b)) - if name not in ksvals: - name = ";".join((b, a)) - if name not in ksvals: - j += 1 - print("\t".join((a, b, ".", ".")), file=fw) - continue - ksline = ksvals[name] - if block: - print("\t".join(str(x) for x in (a, b, ksline.ks)), file=fw) - else: - ksline.name = ";".join((a, b)) - print(ksline, file=fw) - i += 1 - fw.close() - - logger.debug("{0} pairs not found in ksfiles".format(j)) - logger.debug("{0} ks records written to `{1}`".format(i, outfile)) - return outfile - - -fields = "name,yn_ks,yn_ka,ng_ks,ng_ka" -descriptions = { - "name": "Gene pair", - "yn_ks": "Yang-Nielson Ks estimate", - "yn_ka": "Yang-Nielson Ka estimate", - "ng_ks": "Nei-Gojobori Ks estimate", - "ng_ka": "Nei-Gojobori Ka estimate", -} - - -class KsLine: - def __init__(self, row, strip_names=False): - args = row.strip().split(",") - self.name = args[0] - self.yn_ks = self.get_float(args[1]) - self.yn_ka = self.get_float(args[2]) - self.ng_ks = self.get_float(args[3]) - self.ng_ka = self.get_float(args[4]) - self.ks = self.ng_ks - if ";" in self.name: - self.gene_a, self.gene_b = self.name.split(";") - if strip_names: - self.gene_a = gene_name(self.gene_a) - self.gene_b = gene_name(self.gene_b) - - def get_float(self, x): - try: - x = float(x) - except: - x = -1 - return x - - def __str__(self): - return ",".join( - str(x) for x in (self.name, self.yn_ks, self.yn_ka, self.ng_ks, self.ng_ka) - ) - - @property - def anchorline(self): - return "\t".join( - (gene_name(self.gene_a), gene_name(self.gene_b), "{:.3f}".format(self.ks)) - ) - - -class KsFile(LineFile): - def __init__(self, filename, strip_names=False): - super().__init__(filename) - - fp = open(filename) - for row in fp: - ksline = KsLine(row, strip_names=strip_names) - if ksline.name == "name": # header - continue - self.append(ksline) - - logger.debug( - "File `{0}` contains a total of {1} gene pairs".format(filename, len(self)) - ) - - def print_to_anchors(self, outfile): - fw = must_open(outfile, "w") - for row in self: - print(row.anchorline, file=fw) - fw.close() - - -def my_hist(ax, l, interval, max_r, color="g", marker=".", fill=False, kde=False): - if not l: - return - - n, p = [], [] - total_len = len(l) - for i in np.arange(0, max_r, interval): - xmin, xmax = i - 0.5 * interval, i + 0.5 * interval - nx = [x for x in l if xmin <= x < xmax] - n.append(i) - p.append(len(nx) * 100.0 / total_len) - - if kde: - from scipy import stats - - kernel = stats.gaussian_kde(l) - n = np.arange(0, max_r, interval) - kn = kernel(n) - p = kn / sum(kn) * 100 - - if fill: - from pylab import poly_between - - xs, ys = poly_between(n, 0, p) - line = ax.fill(xs, ys, fc=color, alpha=0.5) - - else: - line = ax.plot( - n, p, color=color, lw=2, ms=3, marker=marker, mfc="w", mec=color, mew=2 - ) - - return line - - -def lognormpdf(bins, mu, sigma): - return np.exp(-((np.log(bins) - mu) ** 2) / (2 * sigma**2)) / ( - bins * sigma * sqrt(2 * pi) - ) - - -def lognormpdf_mix(bins, probs, mus, sigmas, interval=0.1): - y = 0 - for prob, mu, sigma in zip(probs, mus, sigmas): - y += prob * lognormpdf(bins, mu, sigma) - y *= 100 * interval # Percentage - - return y - - -def get_mixture(data, components): - """ - probs = [.476, .509] - mus = [.69069, -.15038] - variances = [.468982e-1, .959052e-1] - """ - from jcvi.apps.base import popen - - probs, mus, sigmas = [], [], [] - fw = must_open("tmp", "w") - log_data = [log(x) for x in data if x > 0.05] - data = "\n".join(["%.4f" % x for x in log_data]).replace("inf\n", "") - fw.write(data) - fw.close() - - cmd = "gmm-bic {0} {1} {2}".format(components, len(log_data), fw.name) - pipe = popen(cmd) - - for row in pipe: - if row[0] != "#": - continue - - atoms = row.split(",") - a, b, c = atoms[1:4] - a = float(a) - b = float(b) - c = float(c) - - mus.append(a) - sigmas.append(b) - probs.append(c) - - cleanup(fw.name) - return probs, mus, sigmas - - -def plot_ks_dist( - ax, - data, - interval, - components, - ks_max, - color="r", - marker=".", - fill=False, - fitted=True, - kde=False, -): - - (line,) = my_hist( - ax, data, interval, ks_max, color=color, marker=marker, fill=fill, kde=kde - ) - logger.debug("Total {0} pairs after filtering.".format(len(data))) - - line_mixture = None - if fitted: - probs, mus, variances = get_mixture(data, components) - - iv = 0.001 - bins = np.arange(iv, ks_max, iv) - y = lognormpdf_mix(bins, probs, mus, variances, interval) - - (line_mixture,) = ax.plot(bins, y, ":", color=color, lw=3) - - for i in range(components): - peak_val = exp(mus[i]) - mixline = lognormpdf_mix(peak_val, probs, mus, variances, interval) - ax.text( - peak_val, - mixline, - "Ks=%.2f" % peak_val, - color="w", - size=10, - bbox=dict(ec="w", fc=color, alpha=0.6, boxstyle="round"), - ) - - return line, line_mixture - - -def add_plot_options(p): - p.add_argument( - "--fit", default=False, action="store_true", help="Plot fitted lines" - ) - p.add_argument( - "--kde", default=False, action="store_true", help="Use KDE smoothing" - ) - p.add_argument("--vmin", default=0.0, type=float, help="Minimum value, inclusive") - p.add_argument("--vmax", default=3.0, type=float, help="Maximum value, inclusive") - p.add_argument( - "--bins", default=60, type=int, help="Number of bins to plot in the histogram" - ) - p.add_argument("--legendp", default="upper right", help="Place of the legend") - p.add_argument( - "--fill", - default=False, - action="store_true", - help="Do not fill the histogram area", - ) - p.add_argument("--title", default="*Ks* distribution", help="Title of the plot") - - -def report(args): - """ - %prog report ksfile - - generate a report given a Ks result file (as produced by synonymous_calc.py). - describe the median Ks, Ka values, as well as the distribution in stem-leaf plot - """ - from jcvi.utils.cbook import SummaryStats - from jcvi.graphics.histogram import stem_leaf_plot - - p = OptionParser(report.__doc__) - p.add_argument( - "--pdf", - default=False, - action="store_true", - help="Generate graphic output for the histogram", - ) - p.add_argument( - "--components", - default=1, - type=int, - help="Number of components to decompose peaks", - ) - add_plot_options(p) - opts, args, iopts = p.set_image_options(args, figsize="5x5") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (ks_file,) = args - data = KsFile(ks_file) - ks_min = opts.vmin - ks_max = opts.vmax - bins = opts.bins - - for f in fields.split(",")[1:]: - columndata = [getattr(x, f) for x in data] - ks = "ks" in f - if not ks: - continue - - columndata = [x for x in columndata if ks_min <= x <= ks_max] - - st = SummaryStats(columndata) - title = "{0} ({1}): ".format(descriptions[f], ks_file) - title += "Median:{0:.3f} (1Q:{1:.3f}|3Q:{2:.3f}||".format( - st.median, st.firstq, st.thirdq - ) - title += "Mean:{0:.3f}|Std:{1:.3f}||N:{2})".format(st.mean, st.sd, st.size) - - tbins = (0, ks_max, bins) if ks else (0, 0.6, 10) - digit = 2 if (ks_max * 1.0 / bins) < 0.1 else 1 - stem_leaf_plot(columndata, *tbins, digit=digit, title=title) - - if not opts.pdf: - return - - components = opts.components - data = [x.ng_ks for x in data] - data = [x for x in data if ks_min <= x <= ks_max] - - fig = plt.figure(1, (iopts.w, iopts.h)) - ax = fig.add_axes([0.12, 0.1, 0.8, 0.8]) - kp = KsPlot(ax, ks_max, opts.bins, legendp=opts.legendp) - kp.add_data(data, components, fill=opts.fill, fitted=opts.fit, kde=opts.kde) - kp.draw(title=opts.title) - - -if __name__ == "__main__": - main() diff --git a/jcvi/compara/pad.py b/jcvi/compara/pad.py deleted file mode 100644 index dad5fda5..00000000 --- a/jcvi/compara/pad.py +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -This implements the method described in Tang et al. 2010 PNAS paper, - - -Angiosperm genome comparisons reveal early polyploidy in the monocot lineage - -The main pipeline assumes starting with defined synteny blocks in .anchors -format (use compara.synteny.scan()), then segment the chromosomes and cluster -segments according to the matching patterns. Finally the putative ancestral -regions (PAR) are identified and visualized. -""" -import os.path as op -import sys - -from math import log - -import numpy as np - -from more_itertools import pairwise - -from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh -from ..formats.bed import Bed -from ..formats.blast import BlastLine - -from .base import AnchorFile -from .synteny import check_beds - - -def main(): - - actions = ( - ("cluster", "cluster the segments"), - ("pad", "test and reconstruct candidate PADs"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def make_arrays(blastfile, qpadbed, spadbed, qpadnames, spadnames): - """ - This function makes three matrices: observed, expected and logmp. The logmp - contains the statistical significance for each comparison. - """ - m, n = len(qpadnames), len(spadnames) - qpadorder, spadorder = qpadbed.order, spadbed.order - qpadid = dict((a, i) for i, a in enumerate(qpadnames)) - spadid = dict((a, i) for i, a in enumerate(spadnames)) - qpadlen = dict((a, len(b)) for a, b in qpadbed.sub_beds()) - spadlen = dict((a, len(b)) for a, b in spadbed.sub_beds()) - - qsize, ssize = len(qpadbed), len(spadbed) - - assert sum(qpadlen.values()) == qsize - assert sum(spadlen.values()) == ssize - - # Populate arrays of observed counts and expected counts - logger.debug("Initialize array of size ({0} x {1})".format(m, n)) - observed = np.zeros((m, n)) - fp = open(blastfile) - all_dots = 0 - for row in fp: - b = BlastLine(row) - qi, q = qpadorder[b.query] - si, s = spadorder[b.subject] - qseqid, sseqid = q.seqid, s.seqid - qsi, ssi = qpadid[qseqid], spadid[sseqid] - observed[qsi, ssi] += 1 - all_dots += 1 - - assert int(round(observed.sum())) == all_dots - - logger.debug("Total area: {0} x {1}".format(qsize, ssize)) - S = qsize * ssize - expected = np.zeros((m, n)) - qsum = 0 - for i, a in enumerate(qpadnames): - alen = qpadlen[a] - qsum += alen - for j, b in enumerate(spadnames): - blen = spadlen[b] - expected[i, j] = all_dots * alen * blen * 1.0 / S - - assert int(round(expected.sum())) == all_dots - - # Calculate the statistical significance for each cell - from scipy.stats.distributions import poisson - - logmp = np.zeros((m, n)) - for i in range(m): - for j in range(n): - obs, exp = observed[i, j], expected[i, j] - pois = max(poisson.pmf(obs, exp), 1e-250) # Underflow - logmp[i, j] = max(-log(pois), 0) - - return logmp - - -def pad(args): - """ - %prog pad blastfile cdtfile --qbed q.pad.bed --sbed s.pad.bed - - Test and reconstruct candidate PADs. - """ - from jcvi.formats.cdt import CDT - - p = OptionParser(pad.__doc__) - p.set_beds() - p.add_argument( - "--cutoff", - default=0.3, - type=float, - help="The clustering cutoff to call similar", - ) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - cutoff = opts.cutoff - blastfile, cdtfile = args - qbed, sbed, qorder, sorder, is_self = check_beds(blastfile, p, opts) - - cdt = CDT(cdtfile) - qparts = list(cdt.iter_partitions(cutoff=cutoff)) - sparts = list(cdt.iter_partitions(cutoff=cutoff, gtr=False)) - - qid, sid = {}, {} - for i, part in enumerate(qparts): - qid.update(dict((x, i) for x in part)) - for i, part in enumerate(sparts): - sid.update(dict((x, i) for x in part)) - - # Without writing files, conversion from PAD to merged PAD is done in memory - for q in qbed: - q.seqid = qid[q.seqid] - for s in sbed: - s.seqid = sid[s.seqid] - - qnames = range(len(qparts)) - snames = range(len(sparts)) - - logmp = make_arrays(blastfile, qbed, sbed, qnames, snames) - m, n = logmp.shape - pvalue_cutoff = 1e-30 - cutoff = -log(pvalue_cutoff) - - significant = [] - for i in range(m): - for j in range(n): - score = logmp[i, j] - if score < cutoff: - continue - significant.append((qparts[i], sparts[j], score)) - - for a, b, score in significant: - print("|".join(a), "|".join(b), score) - - logger.debug( - "Collected {0} PAR comparisons significant at (P < {1}).".format( - len(significant), pvalue_cutoff - ) - ) - - return significant - - -def get_segments(ranges, extra, minsegment=40): - """ - Given a list of Range, perform chaining on the ranges and select a highest - scoring subset and cut based on their boundaries. Let's say the projection - of the synteny blocks onto one axis look like the following. - - 1=====10......20====30....35====~~ - - Then the segmentation will yield a block [1, 20), [20, 35), using an - arbitrary right extension rule. Extra are additional end breaks for - chromosomes. - """ - from jcvi.utils.range import range_chain, LEFT, RIGHT - - NUL = 2 - selected, score = range_chain(ranges) - - endpoints = [(x.start, NUL) for x in selected] - endpoints += [(x[0], LEFT) for x in extra] - endpoints += [(x[1], RIGHT) for x in extra] - endpoints.sort() - - current_left = 0 - for a, ai in endpoints: - - if ai == LEFT: - current_left = a - if ai == RIGHT: - yield current_left, a - elif ai == NUL: - if a - current_left < minsegment: - continue - yield current_left, a - 1 - current_left = a - - -def write_PAD_bed(bedfile, prefix, pads, bed): - - fw = open(bedfile, "w") - padnames = ["{0}:{1:05d}-{2:05d}".format(prefix, a, b) for a, b in pads] - for a, b in pairwise(padnames): - assert a != b, a - - j = 0 - # Assign all genes to new partitions - for i, x in enumerate(bed): - a, b = pads[j] - if i > b: - j += 1 - a, b = pads[j] - print("\t".join((padnames[j], str(i), str(i + 1), x.accn)), file=fw) - - fw.close() - - npads = len(pads) - logger.debug("{0} partition written in `{1}`.".format(npads, bedfile)) - return npads, padnames - - -def cluster(args): - """ - %prog cluster blastfile anchorfile --qbed qbedfile --sbed sbedfile - - Cluster the segments and form PAD. This is the method described in Tang et - al. (2010) PNAS paper. The anchorfile defines a list of synteny blocks, - based on which the genome on one or both axis can be chopped up into pieces - and clustered. - """ - from jcvi.utils.range import Range - - p = OptionParser(cluster.__doc__) - p.set_beds() - p.add_argument( - "--minsize", default=10, type=int, help="Only segment using blocks >= size" - ) - p.add_argument( - "--path", default="~/scratch/bin", help="Path to the CLUSTER 3.0 binary" - ) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - blastfile, anchorfile = args - qbed, sbed, qorder, sorder, is_self = check_beds(blastfile, p, opts) - - minsize = opts.minsize - ac = AnchorFile(anchorfile) - qranges, sranges = [], [] - qextra = [x[1:] for x in qbed.get_breaks()] - sextra = [x[1:] for x in sbed.get_breaks()] - - id = 0 - for block in ac.iter_blocks(minsize=minsize): - q, s = list(zip(*block))[:2] - q = [qorder[x][0] for x in q] - s = [sorder[x][0] for x in s] - minq, maxq = min(q), max(q) - mins, maxs = min(s), max(s) - id += 1 - - qr = Range("0", minq, maxq, maxq - minq, id) - sr = Range("0", mins, maxs, maxs - mins, id) - qranges.append(qr) - sranges.append(sr) - - qpads = list(get_segments(qranges, qextra)) - spads = list(get_segments(sranges, sextra)) - - suffix = ".pad.bed" - qpf = opts.qbed.split(".")[0] - spf = opts.sbed.split(".")[0] - qpadfile = qpf + suffix - spadfile = spf + suffix - qnpads, qpadnames = write_PAD_bed(qpadfile, qpf, qpads, qbed) - snpads, spadnames = write_PAD_bed(spadfile, spf, spads, sbed) - - qpadbed, spadbed = Bed(qpadfile), Bed(spadfile) - - logmp = make_arrays(blastfile, qpadbed, spadbed, qpadnames, spadnames) - m, n = logmp.shape - - matrixfile = ".".join((qpf, spf, "logmp.txt")) - fw = open(matrixfile, "w") - header = ["o"] + spadnames - print("\t".join(header), file=fw) - for i in range(m): - row = [qpadnames[i]] + ["{0:.1f}".format(x) for x in logmp[i, :]] - print("\t".join(row), file=fw) - - fw.close() - - # Run CLUSTER 3.0 (Pearson correlation, average linkage) - cmd = op.join(opts.path, "cluster") - cmd += " -g 2 -e 2 -m a -f {0}".format(matrixfile) - pf = matrixfile.rsplit(".", 1)[0] - cdtfile = pf + ".cdt" - if need_update(matrixfile, cdtfile): - sh(cmd) - - -if __name__ == "__main__": - main() diff --git a/jcvi/compara/pedigree.py b/jcvi/compara/pedigree.py deleted file mode 100644 index 94536fca..00000000 --- a/jcvi/compara/pedigree.py +++ /dev/null @@ -1,270 +0,0 @@ -""" -Pedigree file manipulation. -""" - -import sys - -from collections import Counter -from dataclasses import dataclass -from random import sample -from typing import Dict, Optional - -import networkx as nx -import numpy as np - -from ..apps.base import OptionParser, ActionDispatcher, logger -from ..formats.base import BaseFile -from ..graphics.base import set3_n - - -@dataclass -class Sample: - """ - A sample in the pedigree file. - """ - - name: str - dad: Optional[str] - mom: Optional[str] - - @property - def is_terminal(self) -> bool: - """ - Return True if the sample is terminal. - """ - return self.dad is None and self.mom is None - - -@dataclass -class SampleInbreeding: - """ - Store inbreeding information for a sample. - """ - - name: str - mean_inbreeding: float - std_inbreeding: float - dosage: Dict[str, float] - - def __str__(self): - return f"{self.name}\t{self.mean_inbreeding:.4f}\t{self.std_inbreeding:.4f}" - - -class Pedigree(BaseFile, dict): - """ - Read a pedigree file and store the information. - """ - - def __init__(self, pedfile: str): - super().__init__(pedfile) - with open(self.filename, encoding="utf-8") as fp: - for row in fp: - row = row.strip() - if row[0] == "#": # header - continue - if not row: - continue - atoms = row.split() - _, name, dad, mom = atoms[:4] - dad = dad if dad != "0" else None - mom = mom if mom != "0" else None - s = Sample(name, dad, mom) - self[s.name] = s - self._check() - - def _check(self): - """ - # Check if all nodes are assigned, including the roots - """ - terminal_nodes = set() - for s in self: - dad, mom = self[s].dad, self[s].mom - if dad and dad not in self: - terminal_nodes.add(dad) - if mom and mom not in self: - terminal_nodes.add(mom) - for s in terminal_nodes: - logger.info("Adding %s to pedigree", s) - self[s] = Sample(s, None, None) - self.terminal_nodes = terminal_nodes - - def to_graph( - self, inbreeding_dict: Dict[str, SampleInbreeding], title: str = "" - ) -> nx.DiGraph: - """ - Convert the pedigree to a graph. - """ - graph_styles = {"labelloc": "b", "label": title, "splines": "curved"} - edge_styles = {"arrowhead": "none", "color": "lightslategray"} - G = nx.DiGraph(**graph_styles) - for s in self: - dad, mom = self[s].dad, self[s].mom - if dad: - G.add_edge(dad, s, **edge_styles) - if mom: - G.add_edge(mom, s, **edge_styles) - # Map colors to terminal nodes - terminal_nodes = [s for s in self if self[s].is_terminal] - colors = dict(zip(terminal_nodes, set3_n(len(terminal_nodes)))) - for s in self: - inb = inbreeding_dict[s] - label = s - if inb.mean_inbreeding > 0.01: - label += f"\n(F={inb.mean_inbreeding:.2f})" - dosage = inb.dosage - fillcolor = [f"{colors[k]};{v:.2f}" for k, v in dosage.items()] - fillcolor = ":".join(fillcolor) - # Hack to make the color appear on the wedge - if fillcolor.count(";") == 1: - fillcolor += ":white" - else: - fillcolor = fillcolor.rsplit(";", 1)[0] - node_styles = { - "color": "none", - "fillcolor": fillcolor, - "fixedsize": "true", - "fontname": "Helvetica", - "fontsize": "10", - "height": "0.6", - "label": label, - "shape": "circle", - "style": "wedged", - "width": "0.6", - } - for k, v in node_styles.items(): - G._node[s][k] = v - return G - - -class GenotypeCollection(dict): - """ - Store genotypes for each sample. - """ - - def add(self, s: str, ploidy: int): - """ - Add genotypes for a fixed sample (usually terminal). - """ - self[s] = [f"{s}_{i:02d}" for i in range(ploidy)] - - def cross(self, s: str, dad: str, mom: str, ploidy: int): - """ - Cross two samples to generate genotype for a new sample. - """ - dad_genotype = self[dad] - mom_genotype = self[mom] - gamete_ploidy = ploidy // 2 - dad_gamete = sample(dad_genotype, gamete_ploidy) - mom_gamete = sample(mom_genotype, gamete_ploidy) - sample_genotype = sorted(dad_gamete + mom_gamete) - self[s] = sample_genotype - - def inbreeding_coef(self, s: str) -> float: - """ - Calculate inbreeding coefficient for a sample. - - Traditional inbreeding coefficient (F) is a measure of the probability - that two alleles at a locus are identical by descent. This definition is - not applicable for polyploids. - - Here we use a simpler measure of inbreeding coefficient, which is the - proportion of alleles that are non-unique in a genotype. Or we should - really call it "Proportion inbred". - """ - genotype = self[s] - ploidy = len(genotype) - unique = len(set(genotype)) - return 1 - unique / ploidy - - def dosage(self, s: str) -> Counter: - """ - Calculate dosage for a sample. - """ - genotype = self[s] - return Counter(allele.rsplit("_", 1)[0] for allele in genotype) - - -def simulate_one_iteration(ped: Pedigree, ploidy: int) -> GenotypeCollection: - """ - Simulate one iteration of genotypes. - """ - genotypes = GenotypeCollection() - while len(genotypes) < len(ped): - for s in ped: - if ped[s].is_terminal: - genotypes.add(s, ploidy=ploidy) - else: - dad, mom = ped[s].dad, ped[s].mom - if dad not in genotypes or mom not in genotypes: - continue - genotypes.cross(s, dad, mom, ploidy=ploidy) - return genotypes - - -def calculate_inbreeding( - ped: Pedigree, - ploidy: int, - N: int, -) -> Dict[str, SampleInbreeding]: - """ - Wrapper to calculate inbreeding coefficients for a sample. - """ - logger.info("Simulating %d samples with ploidy=%d", N, ploidy) - all_collections = [] - for _ in range(N): - genotypes = simulate_one_iteration(ped, ploidy) - all_collections.append(genotypes) - - results = {} - for s in ped: - inbreeding_coefs = [ - genotypes.inbreeding_coef(s) for genotypes in all_collections - ] - dosages = [genotypes.dosage(s) for genotypes in all_collections] - dosage = sum(dosages, Counter()) - # normalize - dosage = {k: round(v / (ploidy * N), 3) for k, v in dosage.items()} - mean_inbreeding = float(np.mean(inbreeding_coefs)) - std_inbreeding = float(np.std(inbreeding_coefs)) - sample_inbreeding = SampleInbreeding(s, mean_inbreeding, std_inbreeding, dosage) - results[s] = sample_inbreeding - return results - - -def pedigree(args): - """ - %prog pedigree pedfile - - Plot pedigree and calculate pedigree coefficients from a pedigree file. - """ - p = OptionParser(pedigree.__doc__) - p.add_argument("--ploidy", default=2, type=int, help="Ploidy") - p.add_argument("--N", default=10000, type=int, help="Number of samples") - p.add_argument("--title", default="", help="Title of the graph") - opts, args, iopts = p.set_image_options(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (pedfile,) = args - ped = Pedigree(pedfile) - inb = calculate_inbreeding(ped, opts.ploidy, opts.N) - print("Sample\tProportion Inbreeding\tStd dev.") - for _, v in inb.items(): - print(v) - - G = ped.to_graph(inb, title=opts.title) - A = nx.nx_agraph.to_agraph(G) - image_file = f"{pedfile}.{iopts.format}" - A.draw(image_file, prog="dot") - logger.info("Pedigree graph written to `%s`", image_file) - - -def main(): - actions = (("pedigree", "Plot pedigree and calculate inbreeding coefficients"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -if __name__ == "__main__": - main() diff --git a/jcvi/compara/phylogeny.py b/jcvi/compara/phylogeny.py deleted file mode 100644 index 5233595f..00000000 --- a/jcvi/compara/phylogeny.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# -# phylogeny.py -# compara -# -# Created by Haibao Tang on 05/21/20 -# Copyright © 2020 Haibao Tang. All rights reserved. -# -import csv -import sys -import os.path as op - -from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir -from ..formats.fasta import Fasta, SeqIO - - -def lcn(args): - """ - %prog lcn Orthogroups/Orthogroups.tsv Orthogroup_Sequences/ lcn/ - """ - p = OptionParser(lcn.__doc__) - p.add_argument( - "--min-single-ratio", default=0.9, help="Single copy ratio must be > " - ) - p.add_argument("--max-zero-ratio", default=0, help="Zero copy ratio must be < ") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - (groups_tsv, sequence_dir, lcn_dir) = args - selected = [] - # Read in the orthogroup definition and selected based on counts - with open(groups_tsv) as fp: - reader = csv.reader(fp, delimiter="\t") - header = next(reader, None) - species_names = header[1:] - for row in reader: - counts = [len(x.split(", ")) if x.strip() != "" else 0 for x in row[1:]] - single_ratio = sum([x == 1 for x in counts]) / len(counts) - zero_ratio = sum([x == 0 for x in counts]) / len(counts) - if single_ratio < opts.min_single_ratio: - continue - if zero_ratio > opts.max_zero_ratio: - continue - print(row[0], single_ratio, zero_ratio, counts, file=sys.stderr) - selected.append(row) - - logger.debug("A total of %d orthogroups selected", len(selected)) - - # Collect the FASTA sequences now - mkdir(lcn_dir) - for row in selected: - orthogroup = row[0] - orthogroup_fasta = "{}.fa".format(orthogroup) - input_fasta = op.join(sequence_dir, orthogroup_fasta) - fasta = Fasta(input_fasta) - selected_seqs = [] - for gene_names, species_name in zip(row[1:], species_names): - gene_names = gene_names.split(", ") - if len(gene_names) == 1: - (selected,) = gene_names - else: - max_length, selected = max((len(fasta[x]), x) for x in gene_names) - selected_seq = fasta[selected] - # Set gene name to species name so we can later combine them in supermatrix - selected_seq.id = species_name - selected_seq.name = species_name - selected_seq.description = "" - selected_seqs.append(selected_seq) - - output_fasta = op.join(lcn_dir, orthogroup_fasta) - with open(output_fasta, "w") as fw: - SeqIO.write(selected_seqs, fw, "fasta") - print( - "{}: {} => {} ({})".format( - orthogroup, len(fasta), len(selected_seqs), output_fasta - ), - file=sys.stderr, - ) - - -def main(): - actions = (("lcn", "collect low copy ortholog groups from OrthoFinder results"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -if __name__ == "__main__": - main() diff --git a/jcvi/compara/quota.py b/jcvi/compara/quota.py deleted file mode 100755 index 9e6356c3..00000000 --- a/jcvi/compara/quota.py +++ /dev/null @@ -1,288 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Quota synteny alignment (QUOTA-ALIGN) - -%prog [options] anchorsfile --qbed=qbedfile --sbed=sbedfile - -This python program does the following: -1. merge 2D-overlapping blocks (now skipped, but existed in original version) -2. build constraints that represent 1D-overlap among blocks -3. feed the data into the linear programming solver - -The algorithm is described in Tang et al. BMC Bioinformatics 2011. -"Screening synteny blocks in pairwise genome comparisons through integer -programming." -""" - -import os.path as op -import sys - -from ..algorithms.lpsolve import MIPDataModel -from ..apps.base import OptionParser, logger -from ..compara.synteny import _score, check_beds -from ..formats.base import must_open - -from .base import AnchorFile - - -def get_1D_overlap(eclusters, depth=1): - """ - Find blocks that are 1D overlapping, - returns cliques of block ids that are in conflict - """ - overlap_set = set() - active = set() - - ends = [] - for i, (chr, left, right) in enumerate(eclusters): - ends.append((chr, left, 0, i)) # 0/1 for left/right-ness - ends.append((chr, right, 1, i)) - ends.sort() - - chr_last = "" - for chr, _, left_right, i in ends: - if chr != chr_last: - active.clear() - if left_right == 0: - active.add(i) - else: - active.remove(i) - - if len(active) > depth: - overlap_set.add(tuple(sorted(active))) - - chr_last = chr - - return overlap_set - - -def make_range(clusters, extend=0): - """ - Convert to interval ends from a list of anchors - extend modifies the xmax, ymax boundary of the box, - which can be positive or negative - very useful when we want to make the range as fuzzy as we specify - """ - eclusters = [] - for cluster in clusters: - xlist, ylist, _ = zip(*cluster) - score = _score(cluster) - - xchr, xmin = min(xlist) - xchr, xmax = max(xlist) - ychr, ymin = min(ylist) - ychr, ymax = max(ylist) - - # allow fuzziness to the boundary - xmax += extend - ymax += extend - # because extend can be negative values, we don't want it to be less than min - if xmax < xmin: - xmin, xmax = xmax, xmin - if ymax < ymin: - ymin, ymax = ymax, ymin - - eclusters.append(((xchr, xmin, xmax), (ychr, ymin, ymax), score)) - - return eclusters - - -def get_constraints(clusters, quota=(1, 1), Nmax=0): - """ - Check pairwise cluster comparison, if they overlap then mark edge as conflict - """ - qa, qb = quota - eclusters = make_range(clusters, extend=-Nmax) - nodes = [c[-1] for c in eclusters] - - eclusters_x, eclusters_y, _ = zip(*eclusters) - - # represents the contraints over x-axis and y-axis - constraints_x = get_1D_overlap(eclusters_x, qa) - constraints_y = get_1D_overlap(eclusters_y, qb) - - return nodes, constraints_x, constraints_y - - -def create_data_model(nodes, constraints_x, qa, constraints_y, qb): - """ - Maximize - 4 x1 + 2 x2 + 3 x3 + x4 - Subject To - x1 + x2 <= 1 - End - """ - num_vars = len(nodes) - obj_coeffs = nodes[:] - constraint_coeffs = [] - bounds = [] - for c in constraints_x: - constraint_coeffs.append({x: 1 for x in c}) - bounds.append(qa) - num_constraints = len(constraints_x) - - # non-self - if not (constraints_x is constraints_y): - for c in constraints_y: - constraint_coeffs.append({x: 1 for x in c}) - bounds.append(qb) - num_constraints += len(constraints_y) - - return MIPDataModel( - constraint_coeffs, bounds, obj_coeffs, num_vars, num_constraints - ) - - -def solve_lp( - clusters, - quota, - work_dir="work", - Nmax=0, - self_match=False, - verbose=False, -): - """ - Solve the formatted LP instance - """ - qb, qa = quota # flip it - nodes, constraints_x, constraints_y = get_constraints(clusters, (qa, qb), Nmax=Nmax) - - if self_match: - constraints_x = constraints_y = constraints_x | constraints_y - - data = create_data_model(nodes, constraints_x, qa, constraints_y, qb) - return data.solve(work_dir=work_dir, verbose=verbose) - - -def read_clusters(qa_file, qorder, sorder): - """Read in the clusters from anchors file - - Args: - qa_file (str): Path to input file - qorder (dict): Dictionary to find position of feature in query - sorder (dict): Dictionary to find position of feature in subject - - Returns: - List: List of matches and scores - """ - af = AnchorFile(qa_file) - blocks = af.blocks - clusters = [] - for block in blocks: - cluster = [] - for a, b, score in block: - ia, oa = qorder[a] - ib, ob = sorder[b] - ca, cb = oa.seqid, ob.seqid - cluster.append(((ca, ia), (cb, ib), score)) - clusters.append(cluster) - - return clusters - - -def main(args): - p = OptionParser(__doc__) - - p.set_beds() - p.add_argument( - "--quota", - default="1:1", - help="`quota mapping` procedure -- screen blocks to constrain mapping" - " (useful for orthology), " - "put in the format like (#subgenomes expected for genome X):" - "(#subgenomes expected for genome Y)", - ) - p.add_argument( - "--Nm", - dest="Nmax", - type=int, - default=10, - help="distance cutoff to tolerate two blocks that are " - "slightly overlapping (cutoff for `quota mapping`) " - "[default: %default units (gene or bp dist)]", - ) - - p.add_argument( - "--self", - dest="self_match", - action="store_true", - default=False, - help="you might turn this on when screening paralogous blocks, " - "esp. if you have reduced mirrored blocks into non-redundant set", - ) - p.set_verbose(help="Show verbose solver output") - - p.add_argument( - "--screen", - default=False, - action="store_true", - help="generate new anchors file", - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (qa_file,) = args - _, _, qorder, sorder, _ = check_beds(qa_file, p, opts) - - # sanity check for the quota - if opts.quota: - try: - qa, qb = opts.quota.split(":") - qa, qb = int(qa), int(qb) - except ValueError: - logger.error("quota string should be the form x:x (2:4, 1:3, etc.)") - sys.exit(1) - - if opts.self_match and qa != qb: - raise Exception( - "when comparing genome to itself, " - "quota must be the same number " - "(like 1:1, 2:2) you have %s" % opts.quota - ) - quota = (qa, qb) - - self_match = opts.self_match - - clusters = read_clusters(qa_file, qorder, sorder) - for cluster in clusters: - assert len(cluster) > 0 - - # below runs `quota mapping` - work_dir = op.join(op.dirname(op.abspath(qa_file)), "work") - - selected_ids = solve_lp( - clusters, - quota, - work_dir=work_dir, - Nmax=opts.Nmax, - self_match=self_match, - verbose=opts.verbose, - ) - - logger.debug("Selected %d blocks", len(selected_ids)) - prefix = qa_file.rsplit(".", 1)[0] - suffix = "{}x{}".format(qa, qb) - outfile = ".".join((prefix, suffix)) - fw = must_open(outfile, "w") - print(",".join(str(x) for x in selected_ids), file=fw) - fw.close() - logger.debug("Screened blocks ids written to `%s`", outfile) - - if opts.screen: - from jcvi.compara.synteny import screen - - new_qa_file = ".".join((prefix, suffix, "anchors")) - largs = [qa_file, new_qa_file, "--ids", outfile] - if opts.qbed and opts.sbed: - largs += ["--qbed={0}".format(opts.qbed)] - largs += ["--sbed={0}".format(opts.sbed)] - screen(largs) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/jcvi/compara/reconstruct.py b/jcvi/compara/reconstruct.py deleted file mode 100644 index 83a9c377..00000000 --- a/jcvi/compara/reconstruct.py +++ /dev/null @@ -1,379 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -From synteny blocks, reconstruct ancestral order by interleaving the genes in -between the anchors. This is the bottom-up method used first in Bowers (2003), -and in Tang (2010), to reconstruct pre-alpha and pre-rho order, respectively. -""" -import sys - -from itertools import zip_longest -from math import sqrt -from more_itertools import pairwise - -from ..apps.base import ActionDispatcher, OptionParser, logger -from ..formats.base import get_number -from ..formats.bed import Bed -from ..utils.grouper import Grouper - -from .base import AnchorFile -from .synteny import check_beds - - -def main(): - - actions = ( - ("collinear", "reduce synteny blocks to strictly collinear"), - ("zipbed", "build ancestral contig from collinear blocks"), - ("pairs", "convert anchorsfile to pairsfile"), - # Sankoff-Zheng reconstruction - ("adjgraph", "construct adjacency graph"), - # Experimental gene order graph for ancestral reconstruction - ("fuse", "fuse gene orders based on anchorsfile"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def add_bed_to_graph(G, bed, families): - for seqid, bs in bed.sub_beds(): - prev_node, prev_strand = None, "+" - for b in bs: - accn = b.accn - strand = b.strand - node = "=".join(families[accn]) - if prev_node: - G.add_edge(prev_node, node, prev_strand, strand) - prev_node, prev_strand = node, strand - - return G - - -def print_edges(bed, families): - """ - Instead of going through the graph construction, just print the edges. - """ - symbols = {"+": ">", "-": "<"} - for seqid, bs in bed.sub_beds(): - prev_node, prev_strand = None, "+" - for b in bs: - accn = b.accn - strand = b.strand - node = "=".join(families[accn]) - if prev_node: - print( - "{}{}--{}{}".format( - prev_node, symbols[prev_strand], symbols[strand], node - ) - ) - prev_node, prev_strand = node, strand - - -def fuse(args): - """ - %prog fuse *.bed *.anchors - - Fuse gene orders based on anchors file. - """ - from jcvi.algorithms.graph import BiGraph - - p = OptionParser(fuse.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - bedfiles = [x for x in args if x.endswith(".bed")] - anchorfiles = [x for x in args if x.endswith(".anchors")] - - # TODO: Use Markov clustering to sparsify the edges - families = Grouper() - for anchorfile in anchorfiles: - af = AnchorFile(anchorfile) - for a, b, block_id in af.iter_pairs(): - families.join(a, b) - - allowed = set(families.keys()) - logger.debug( - "Total families: {}, Gene members: {}".format(len(families), len(allowed)) - ) - - # TODO: Use C++ implementation of BiGraph() when available - # For now just serialize this to the disk - for bedfile in bedfiles: - bed = Bed(bedfile, include=allowed) - print_edges(bed, families) - - -def adjgraph(args): - """ - %prog adjgraph adjacency.txt subgraph.txt - - Construct adjacency graph for graphviz. The file may look like sample below. - The lines with numbers are chromosomes with gene order information. - - genome 0 - chr 0 - -1 -13 -16 3 4 -6126 -5 17 -6 7 18 5357 8 -5358 5359 -9 -10 -11 5362 5360 - chr 1 - 138 6133 -5387 144 -6132 -139 140 141 146 -147 6134 145 -170 -142 -143 - """ - import pygraphviz as pgv - - from jcvi.formats.base import SetFile - - p = OptionParser(adjgraph.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - infile, subgraph = args - subgraph = SetFile(subgraph) - subgraph = set(x.strip("-") for x in subgraph) - - G = pgv.AGraph(strict=False) # allow multi-edge - SG = pgv.AGraph(strict=False) - - palette = ("green", "magenta", "tomato", "peachpuff") - fp = open(infile) - genome_id = -1 - key = 0 - for row in fp: - if row.strip() == "": - continue - - atoms = row.split() - tag = atoms[0] - if tag in ("ChrNumber", "chr"): - continue - - if tag == "genome": - genome_id += 1 - gcolor = palette[genome_id] - continue - - nodeseq = [] - for p in atoms: - np = p.strip("-") - nodeL, nodeR = np + "L", np + "R" - if p[0] == "-": # negative strand - nodeseq += [nodeR, nodeL] - else: - nodeseq += [nodeL, nodeR] - - for a, b in pairwise(nodeseq): - G.add_edge(a, b, key, color=gcolor) - key += 1 - - na, nb = a[:-1], b[:-1] - if na not in subgraph and nb not in subgraph: - continue - - SG.add_edge(a, b, key, color=gcolor) - - G.graph_attr.update(dpi="300") - - fw = open("graph.dot", "w") - G.write(fw) - fw.close() - - fw = open("subgraph.dot", "w") - SG.write(fw) - fw.close() - - -def pairs(args): - """ - %prog pairs anchorsfile prefix - - Convert anchorsfile to pairsfile. - """ - p = OptionParser(pairs.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - anchorfile, prefix = args - outfile = prefix + ".pairs" - fw = open(outfile, "w") - - af = AnchorFile(anchorfile) - blocks = af.blocks - pad = len(str(len(blocks))) - npairs = 0 - for i, block in enumerate(blocks): - block_id = "{0}{1:0{2}d}".format(prefix, i + 1, pad) - lines = [] - for q, s, score in block: - npairs += 1 - score = score.replace("L", "") - lines.append("\t".join((q, s, score, block_id))) - print("\n".join(sorted(lines)), file=fw) - - fw.close() - logger.debug("A total of {0} pairs written to `{1}`.".format(npairs, outfile)) - - -def interleave_pairs(pairs): - a, b = pairs[0] - yield a - yield b - for c, d in pairs[1:]: - assert a < c - xx = range(a + 1, c) - yy = range(b + 1, d) if b < d else range(b - 1, d, -1) - for x, y in zip_longest(xx, yy): - if x: - yield x - if y: - yield y - a, b = c, d - yield a - yield b - - -def zipbed(args): - """ - %prog zipbed species.bed collinear.anchors - - Build ancestral contig from collinear blocks. For example, to build pre-rho - order, use `zipbed rice.bed rice.rice.1x1.collinear.anchors`. The algorithms - proceeds by interleaving the genes together. - """ - p = OptionParser(zipbed.__doc__) - p.add_argument("--prefix", default="b", help="Prefix for the new seqid") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, anchorfile = args - prefix = opts.prefix - bed = Bed(bedfile) - order = bed.order - newbedfile = prefix + ".bed" - fw = open(newbedfile, "w") - - af = AnchorFile(anchorfile) - blocks = af.blocks - pad = len(str(len(blocks))) - for i, block in enumerate(blocks): - block_id = "{0}{1:0{2}d}".format(prefix, i + 1, pad) - pairs = [] - for q, s, score in block: - qi, q = order[q] - si, s = order[s] - pairs.append((qi, si)) - newbed = list(interleave_pairs(pairs)) - for i, b in enumerate(newbed): - accn = bed[b].accn - print("\t".join(str(x) for x in (block_id, i, i + 1, accn)), file=fw) - - logger.debug("Reconstructed bedfile written to `{0}`.".format(newbedfile)) - - -# Non-linear transformation of anchor scores -def score_convert(x): - return int(sqrt(x)) - - -def get_collinear(block): - # block contains (gene a, gene b, score) - asc_score, asc_chain = print_chain(block) - desc_score, desc_chain = print_chain(block, ascending=False) - return asc_chain if asc_score > desc_score else desc_chain - - -def print_chain(block, ascending=True): - - scope = 50 # reduce search complexity - if not ascending: - block = [(a, -b, c) for (a, b, c) in block] - - block.sort() - bsize = len(block) - fromm = [-1] * bsize - scores = [score_convert(c) for (a, b, c) in block] - - for i, (a, b, c) in enumerate(block): - for j in range(i + 1, i + scope): - if j >= bsize: - break - - d, e, f = block[j] - - # Ensure strictly collinear - if d == a or b >= e: - continue - - this_score = scores[i] + score_convert(f) - if this_score > scores[j]: - fromm[j] = i - scores[j] = this_score - - scoresfromm = list(zip(scores, fromm)) - maxchain = max(scoresfromm) - chainscore, chainend = maxchain - solution = [scoresfromm.index(maxchain), chainend] - last = chainend - while True: - _last = fromm[last] - if _last == -1: - break - last = _last - solution.append(last) - - solution.reverse() - solution = [block[x] for x in solution] - if not ascending: - solution = [(a, -b, c) for (a, b, c) in solution] - return chainscore, solution - - -def collinear(args): - """ - %prog collinear a.b.anchors - - Reduce synteny blocks to strictly collinear, use dynamic programming in a - procedure similar to DAGchainer. - """ - p = OptionParser(collinear.__doc__) - p.set_beds() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (anchorfile,) = args - qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) - - af = AnchorFile(anchorfile) - newanchorfile = anchorfile.rsplit(".", 1)[0] + ".collinear.anchors" - fw = open(newanchorfile, "w") - - blocks = af.blocks - for block in blocks: - print("#" * 3, file=fw) - iblock = [] - for q, s, score in block: - qi, q = qorder[q] - si, s = sorder[s] - score = get_number(score) - iblock.append([qi, si, score]) - - block = get_collinear(iblock) - - for q, s, score in block: - q = qbed[q].accn - s = sbed[s].accn - print("\t".join((q, s, str(score))), file=fw) - - fw.close() - - -if __name__ == "__main__": - main() diff --git a/jcvi/compara/synfind.py b/jcvi/compara/synfind.py deleted file mode 100755 index 62112bb1..00000000 --- a/jcvi/compara/synfind.py +++ /dev/null @@ -1,279 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog rice.sorghum.last --qbed=rice.bed --sbed=sorghum.bed - -Given a blast, we find the syntenic regions for every single gene. The -algorithm works by expanding the query gene to a window centered on the gene. A -single linkage algorithm follows that outputs the synteny block. - -The result looks like the following: -Os01g0698300 Sb03g032090 S 7 + -Os01g0698500 Sb03g032140 G 11 + - -The pairs (A, B) -- A is query, and then B is the syntenic region found. -G is "Gray gene", which means it does not have match to the region (fractionated -or inserted). In this case, a right flanker is used to represent the region. -S is "Syntelog", which means it has a match to the region. In this case, the match -itself is used to represent the region. The number in the 4th column is the -synteny score. For the same query, it is ordered with decreasing synteny score. -The last column means orientation. "+" is same direction. -""" -import os.path as op -import sqlite3 -import sys - -from bisect import bisect_left -from itertools import groupby, tee - -from ..algorithms.lis import ( - longest_increasing_subsequence, - longest_decreasing_subsequence, -) -from ..apps.base import OptionParser, logger -from ..formats.base import must_open -from ..utils.grouper import Grouper - -from .synteny import check_beds, read_blast - - -def transposed(data): - x, y = zip(*data) - return zip(y, x) - - -def get_flanker(group, query): - """ - >>> get_flanker([(370, 15184), (372, 15178), (373, 15176), (400, 15193)], 385) - ((373, 15176), (400, 15193), True) - - >>> get_flanker([(124, 13639), (137, 13625)], 138) - ((137, 13625), (137, 13625), False) - """ - group.sort() - pos = bisect_left(group, (query, 0)) - left_flanker = group[0] if pos == 0 else group[pos - 1] - right_flanker = group[-1] if pos == len(group) else group[pos] - # pick the closest flanker - if abs(query - left_flanker[0]) < abs(query - right_flanker[0]): - flanker, other = left_flanker, right_flanker - else: - flanker, other = right_flanker, left_flanker - - flanked = not (pos == 0 or pos == len(group) or flanker == query) - - return flanker, other, flanked - - -def find_synteny_region(query, sbed, data, window, cutoff, colinear=False): - """ - Get all synteny blocks for a query, algorithm is single linkage - anchors are a window centered on query - - Two categories of syntenic regions depending on what query is: - (Syntelog): syntenic region is denoted by the syntelog - (Gray gene): syntenic region is marked by the closest flanker - """ - regions = [] - ysorted = sorted(data, key=lambda x: x[1]) - g = Grouper() - - a, b = tee(ysorted) - next(b, None) - for ia, ib in zip(a, b): - pos1, pos2 = ia[1], ib[1] - if pos2 - pos1 < window and sbed[pos1].seqid == sbed[pos2].seqid: - g.join(ia, ib) - - for group in sorted(g): - (qflanker, syntelog), (far_flanker, far_syntelog), flanked = get_flanker( - group, query - ) - - # run a mini-dagchainer here, take the direction that gives us most anchors - if colinear: - y_indexed_group = [(y, i) for i, (x, y) in enumerate(group)] - lis = longest_increasing_subsequence(y_indexed_group) - lds = longest_decreasing_subsequence(y_indexed_group) - - if len(lis) >= len(lds): - track = lis - orientation = "+" - else: - track = lds - orientation = "-" - - group = [group[i] for (y, i) in track] - - xpos, ypos = zip(*group) - score = min(len(set(xpos)), len(set(ypos))) - - if qflanker == query: - gray = "S" - else: - gray = "G" if not flanked else "F" - score -= 1 # slight penalty for not finding syntelog - - if score < cutoff: - continue - - # y-boundary of the block - left, right = group[0][1], group[-1][1] - # this characterizes a syntenic region (left, right). - # syntelog is -1 if it's a gray gene - syn_region = (syntelog, far_syntelog, left, right, gray, orientation, score) - regions.append(syn_region) - - return sorted(regions, key=lambda x: -x[-1]) # decreasing synteny score - - -def batch_query(qbed, sbed, all_data, opts, fw=None, c=None, transpose=False): - - cutoff = int(opts.cutoff * opts.window) - window = opts.window / 2 - colinear = opts.scoring == "collinear" - qnote, snote = opts.qnote, opts.snote - if qnote == "null" or snote == "null": - qnote = op.basename(qbed.filename).split(".")[0] - snote = op.basename(sbed.filename).split(".")[0] - - # process all genes present in the bed file - if transpose: - all_data = transposed(all_data) - qbed, sbed = sbed, qbed - qnote, snote = snote, qnote - - all_data.sort() - - def simple_bed(x): - return sbed[x].seqid, sbed[x].start - - qsimplebed = qbed.simple_bed - - for seqid, ranks in groupby(qsimplebed, key=lambda x: x[0]): - ranks = [x[1] for x in ranks] - for r in ranks: - rmin = max(r - window, ranks[0]) - rmax = min(r + window + 1, ranks[-1]) - rmin_pos = bisect_left(all_data, (rmin, 0)) - rmax_pos = bisect_left(all_data, (rmax, 0)) - data = all_data[rmin_pos:rmax_pos] - regions = find_synteny_region( - r, sbed, data, window, cutoff, colinear=colinear - ) - for ( - syntelog, - far_syntelog, - left, - right, - gray, - orientation, - score, - ) in regions: - query = qbed[r].accn - - left_chr, left_pos = simple_bed(left) - right_chr, right_pos = simple_bed(right) - - anchor = sbed[syntelog].accn - anchor_chr, anchor_pos = simple_bed(syntelog) - # below is useful for generating the syntenic region in the coge url - left_dist = abs(anchor_pos - left_pos) if anchor_chr == left_chr else 0 - right_dist = ( - abs(anchor_pos - right_pos) if anchor_chr == right_chr else 0 - ) - flank_dist = (max(left_dist, right_dist) / 10000 + 1) * 10000 - - far_syntelog = sbed[far_syntelog].accn - - data = [ - query, - anchor, - gray, - score, - flank_dist, - orientation, - far_syntelog, - ] - pdata = data[:6] + [qnote, snote] - if fw: - print("\t".join(str(x) for x in pdata), file=fw) - continue - c.execute("insert into synteny values (?,?,?,?,?,?,?,?)", pdata) - - -def main(blastfile, p, opts): - - sqlite = opts.sqlite - qbed, sbed, qorder, sorder, is_self = check_beds(blastfile, p, opts) - filtered_blast = read_blast( - blastfile, qorder, sorder, is_self=is_self, ostrip=opts.strip_names - ) - all_data = [(b.qi, b.si) for b in filtered_blast] - - c = None - if sqlite: - conn = sqlite3.connect(sqlite) - c = conn.cursor() - c.execute("drop table if exists synteny") - c.execute( - "create table synteny (query text, anchor text, " - "gray varchar(1), score integer, dr integer, " - "orientation varchar(1), qnote text, snote text)" - ) - fw = None - else: - fw = must_open(opts.outfile, "w") - - batch_query(qbed, sbed, all_data, opts, fw=fw, c=c, transpose=False) - if qbed.filename == sbed.filename: - logger.debug("Self comparisons, mirror ignored") - else: - batch_query(qbed, sbed, all_data, opts, fw=fw, c=c, transpose=True) - - if sqlite: - c.execute("create index q on synteny (query)") - conn.commit() - c.close() - else: - fw.close() - - -if __name__ == "__main__": - - p = OptionParser(__doc__) - p.set_beds() - p.set_stripnames() - p.set_outfile() - - coge_group = p.add_argument_group("CoGe-specific options") - coge_group.add_argument("--sqlite", help="Write sqlite database") - coge_group.add_argument("--qnote", default="null", help="Query dataset group id") - coge_group.add_argument("--snote", default="null", help="Subject dataset group id") - - params_group = p.add_argument_group("Synteny parameters") - params_group.add_argument( - "--window", type=int, default=40, help="Synteny window size" - ) - params_group.add_argument( - "--cutoff", - type=float, - default=0.1, - help="Minimum number of anchors to call synteny", - ) - supported_scoring = ("collinear", "density") - params_group.add_argument( - "--scoring", - choices=supported_scoring, - default="collinear", - help="Scoring scheme", - ) - - opts, args = p.parse_args() - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - main(blastfile, p, opts) diff --git a/jcvi/compara/synteny.py b/jcvi/compara/synteny.py deleted file mode 100755 index 7059d93f..00000000 --- a/jcvi/compara/synteny.py +++ /dev/null @@ -1,1883 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -"""Syntenty inference in comparative genomics -""" - -import os.path as op -import sys - -from collections import defaultdict -from collections.abc import Iterable - -import numpy as np - -from ..algorithms.lis import heaviest_increasing_subsequence as his -from ..apps.base import ActionDispatcher, OptionParser, cleanup, logger -from ..formats.base import BaseFile, SetFile, read_block, must_open -from ..formats.bed import Bed, BedLine -from ..formats.blast import Blast -from ..utils.cbook import gene_name, human_size -from ..utils.grouper import Grouper -from ..utils.range import range_chain - -from .base import AnchorFile - - -class BlockFile(BaseFile): - """Parse .blocks file which is the mcscan output with multiple columns as 'tracks'""" - - def __init__(self, filename, defaultcolor="#fb8072", header=False): - super().__init__(filename) - fp = must_open(filename) - hd = next(fp).rstrip().split("\t") - ncols = len(hd) - if header: - self.header = hd - else: - fp.seek(0) - self.header = range(ncols) - - data = [] - highlight = [] - for row in fp: - hl = "*" in row - # r* highlights the block in red color - if hl: - hl, row = row.split("*", 1) - hl = hl or defaultcolor - atoms = row.rstrip().split("\t") - atoms = [x.strip() for x in atoms] - atoms = ["." if x == "" else x for x in atoms] - if len(atoms) > ncols: - atoms = atoms[:ncols] - elif len(atoms) < ncols: - atoms = atoms + ["."] * (ncols - len(atoms)) - data.append(atoms) - highlight.append(hl) - - self.data = data - self.highlight = highlight - self.columns = list(zip(*data)) - self.ncols = ncols - - def get_extent(self, i, order, debug=True): - # Some blocks file, such as ones manually edited, will have garbled - # order, which prompts the hack below - acol = [order[x][0] for x in self.columns[0] if x in order] - bcol = [order[x][0] for x in self.columns[i] if x in order] - elen = min(len(acol), len(bcol)) - ia, ib = acol[:elen], bcol[:elen] - orientation = get_orientation(ia, ib) - - ocol = [order[x] for x in self.columns[i] if x in order] - # orientation = '+' if ocol[0][0] <= ocol[-1][0] else '-' - si, start = min(ocol) - ei, end = max(ocol) - same_chr = start.seqid == end.seqid - chr = start.seqid if same_chr else None - ngenes = ei - si + 1 - if debug: - r = "{0}:{1}-{2}".format(chr, start.start, end.end) - print( - "Column {0}: {1} - {2} ({3})".format(i, start.accn, end.accn, r), - file=sys.stderr, - ) - print( - " {0} .. {1} ({2}) features .. {3}".format( - chr, ngenes, len(ocol), orientation - ), - file=sys.stderr, - ) - - span = abs(start.start - end.end) - - return start, end, si, ei, chr, orientation, span - - def iter_pairs(self, i, j, highlight=False): - for h, d in zip(self.highlight, self.data): - if highlight and not h: - continue - - a, b = d[i], d[j] - if "." in (a, b) or "" in (a, b): - continue - - yield a, b, h - - def iter_all_pairs(self): - ncols = self.ncols - for i in range(ncols): - for j in range(i + 1, ncols): - for a, b, h in self.iter_pairs(i, j): - yield a, b, h - - def iter_gene_col(self): - for hd, col in zip(self.header, self.columns): - for g in col: - if g not in (".", ""): - yield g, hd - - def query_gene(self, gene, color=None, invert=False): - """ - Used in mcscanq() for query - """ - qi = self.columns[0].index(gene) - ndata = len(self.data) - for col in self.columns[1:]: - upstream_dist = downstream_dist = 1000 - # search upstream - for i in range(qi - 1, -1, -1): - if col[i] not in (".", ""): - upstream = col[i] - upstream_dist = qi - i - break - # search downstream - for i in range(qi, ndata): - if col[i] not in (".", ""): - downstream = col[i] - downstream_dist = i - qi - break - closest = upstream if upstream_dist < downstream_dist else downstream - # output in .simple format - if invert: - line = "\t".join(str(x) for x in (closest, closest, gene, gene, 0, "+")) - else: - line = "\t".join(str(x) for x in (gene, gene, closest, closest, 0, "+")) - if color is not None: - line = color + "*" + line - yield line - - def grouper(self) -> Grouper: - """Build orthogroup based on the gene matches.""" - grouper = Grouper() - for row in self.data: - if "." not in row: - grouper.join(*row) - logger.debug("A total of %d orthogroups formed", len(grouper)) - return grouper - - -class SimpleFile(object): - def __init__(self, simplefile, defaultcolor="#fb8072", order=None): - # Sometimes the simplefile has query and subject wrong - fp = open(simplefile) - self.blocks = [] - check = False - for row in fp: - if row[:2] == "##" or row.startswith("StartGeneA"): - continue - hl = "*" in row - if hl: - hl, row = row.split("*", 1) - hl = hl or defaultcolor - a, b, c, d, score, orientation = row.split() - if order and a not in order: - if c not in order: - check = True - print( - """{} {} {} {} can not found in bed files.""".format( - a, b, c, d - ), - file=sys.stderr, - ) - else: - a, b, c, d = c, d, a, b - if orientation == "-": - c, d = d, c - score = int(score) - self.blocks.append((a, b, c, d, score, orientation, hl)) - if check: - print( - "Error: some genes in blocks can't be found, please rerun after making sure that bed file agree with simple file.", - file=sys.stderr, - ) - exit(1) - - -def _score(cluster): - """ - score of the cluster, in this case, is the number of non-repetitive matches - """ - x, y = list(zip(*cluster))[:2] - return min(len(set(x)), len(set(y))) - - -def get_orientation(ia, ib): - """Infer the orientation of a pairwise block. - - Args: - ia (List[int]): List a - ib (List[int]): List b - - Returns: - str: plus (+) or minus (-) - """ - if len(ia) != len(ib) or len(ia) < 2: - return "+" # Just return a default orientation - - slope, _ = np.polyfit(ia, ib, 1) - return "+" if slope >= 0 else "-" - - -def group_hits(blasts): - if not blasts: - return {"": []} - - # Already in the form of (qi, si, score) - if isinstance(blasts[0], Iterable) and len(blasts[0]) == 3: - return {"": blasts} - - # grouping the hits based on chromosome pair - all_hits = defaultdict(list) - for b in blasts: - all_hits[(b.qseqid, b.sseqid)].append((b.qi, b.si, b.score)) - - return all_hits - - -def read_blast(blast_file, qorder, sorder, is_self=False, ostrip=True): - """Read the blast and convert name into coordinates""" - filtered_blast = [] - seen = set() - bl = Blast(blast_file) - for b in bl: - query, subject = b.query, b.subject - if is_self and query == subject: - continue - if ostrip: - query, subject = gene_name(query), gene_name(subject) - if query not in qorder or subject not in sorder: - continue - - qi, q = qorder[query] - si, s = sorder[subject] - - if is_self: - # remove redundant a<->b to one side when doing self-self BLAST - if qi > si: - query, subject = subject, query - qi, si = si, qi - q, s = s, q - # Too close to diagonal! possible tandem repeats - if q.seqid == s.seqid and si - qi < 40: - continue - - key = query, subject - if key in seen: - continue - seen.add(key) - - b.qseqid, b.sseqid = q.seqid, s.seqid - b.qi, b.si = qi, si - b.query, b.subject = query, subject - - filtered_blast.append(b) - - logger.debug( - "A total of %d BLAST imported from `%s`.", len(filtered_blast), blast_file - ) - - return filtered_blast - - -def read_anchors(ac, qorder, sorder, minsize=0): - """ - anchors file are just (geneA, geneB) pairs (with possible deflines) - """ - all_anchors = defaultdict(list) - nanchors = 0 - anchor_to_block = {} - - for a, b, idx in ac.iter_pairs(minsize=minsize): - if a not in qorder or b not in sorder: - continue - qi, q = qorder[a] - si, s = sorder[b] - pair = (qi, si) - - all_anchors[(q.seqid, s.seqid)].append(pair) - anchor_to_block[pair] = idx - nanchors += 1 - - logger.debug("A total of {0} anchors imported.".format(nanchors)) - assert nanchors == len(anchor_to_block) - - return all_anchors, anchor_to_block - - -def synteny_scan(points, xdist, ydist, N, is_self=False, intrabound=300): - """ - This is the core single linkage algorithm which behaves in O(n): - iterate through the pairs, foreach pair we look back on the - adjacent pairs to find links - """ - clusters = Grouper() - n = len(points) - points.sort() - for i in range(n): - for j in range(i - 1, -1, -1): - # x-axis distance - del_x = points[i][0] - points[j][0] - if del_x > xdist: - break - # y-axis distance - del_y = points[i][1] - points[j][1] - if abs(del_y) > ydist: - continue - # In self-comparison, ignore the anchors that are too close to the diagonal - if is_self: - intradist = min( - abs(points[i][0] - points[i][1]), abs(points[j][0] - points[j][1]) - ) - if intradist < intrabound: - continue - # otherwise join - clusters.join(points[i], points[j]) - - # select clusters that are at least >=N - clusters = [sorted(cluster) for cluster in list(clusters) if _score(cluster) >= N] - - return clusters - - -def batch_scan(points, xdist=20, ydist=20, N=5, is_self=False, intrabound=300): - """ - runs synteny_scan() per chromosome pair - """ - chr_pair_points = group_hits(points) - - clusters = [] - for chr_pair in sorted(chr_pair_points.keys()): - points = chr_pair_points[chr_pair] - clusters.extend( - synteny_scan( - points, xdist, ydist, N, is_self=is_self, intrabound=intrabound - ) - ) - - return clusters - - -def synteny_liftover(points, anchors, dist): - """ - This is to get the nearest anchors for all the points (useful for the - `liftover` operation below). - """ - from scipy.spatial import cKDTree - - points = np.array(points, dtype=int) - ppoints = points[:, :2] if points.shape[1] > 2 else points - tree = cKDTree(anchors, leafsize=16) - dists, idxs = tree.query(ppoints, p=1, distance_upper_bound=dist) - - for point, dist, idx in zip(points, dists, idxs): - if idx == tree.n: # nearest is out of range - continue - if dist == 0: # already in anchors - continue - - yield point, tuple(anchors[idx]) - - -def get_bed_filenames(hintfile, p, opts): - wd, hintfile = op.split(hintfile) - if not (opts.qbed and opts.sbed): - try: - q, s = hintfile.split(".", 2)[:2] - opts.qbed = op.join(wd, q + ".bed") - opts.sbed = op.join(wd, s + ".bed") - logger.debug("Assuming --qbed={0} --sbed={1}".format(opts.qbed, opts.sbed)) - except: - print("Options --qbed and --sbed are required", file=sys.stderr) - sys.exit(not p.print_help()) - - return opts.qbed, opts.sbed - - -def check_beds(hintfile, p, opts, sorted=True): - qbed_file, sbed_file = get_bed_filenames(hintfile, p, opts) - # is this a self-self blast? - is_self = qbed_file == sbed_file - if is_self: - logger.debug("Looks like self-self comparison.") - - qbed = Bed(opts.qbed, sorted=sorted) - sbed = Bed(opts.sbed, sorted=sorted) - qorder = qbed.order - sorder = sbed.order - - return qbed, sbed, qorder, sorder, is_self - - -def add_arguments(p, args, dist=10): - """ - scan and liftover has similar interfaces, so share common options - returns opts, files - """ - p.set_beds() - p.add_argument( - "--dist", default=dist, type=int, help="Extent of flanking regions to search" - ) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - blast_file, anchor_file = args - - return blast_file, anchor_file, opts.dist, opts - - -def main(): - - actions = ( - ("scan", "get anchor list using single-linkage algorithm"), - ("summary", "provide statistics for pairwise blocks"), - ("liftover", "given anchor list, pull adjacent pairs from blast file"), - # Multiple synteny blocks inference - ("mcscan", "stack synteny blocks on a reference bed"), - ("mcscanq", "query multiple synteny blocks"), - # Assemble multiple synteny blocks - ("query", "collect matching region based on the query region"), - ("assemble", "build blocks from regions defined by start and end"), - # Filter synteny blocks - ("screen", "extract subset of blocks from anchorfile"), - ("simple", "convert anchorfile to simple block descriptions"), - ("stats", "provide statistics for mscan blocks"), - ("depth", "calculate the depths in the two genomes in comparison"), - ("breakpoint", "identify breakpoints where collinearity ends"), - ("matrix", "make oxford grid based on anchors file"), - ("coge", "convert CoGe file to anchors file"), - ("spa", "convert chr ordering from SPA to simple lists"), - ("layout", "compute layout based on .simple file"), - ("rebuild", "rebuild anchors file from prebuilt blocks file"), - # Formatting - ("fromaligns", "convert aligns file to anchors file"), - ("toaligns", "convert anchors file to aligns file"), - ) - - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def get_region_size(region, bed, order): - """Get a summary of a syntenic region, how many anchors it has and - how many genes it spans. - - Args: - region (List[str]): List of gene ids - order (Dict[str, BedLine]): Bed order to retrieve the positions - - Returns: - Tuple of three strs and two ints, start / end gene / seqid of the - region and total anchor counts and the span (number of genes) - """ - ris = [order[x] for x in region] - min_ri, min_r = min(ris) - max_ri, max_r = max(ris) - anchor_count = len(region) - span = max_ri - min_ri + 1 - min_seqid = min_r.seqid - max_seqid = max_r.seqid - assert min_seqid == max_seqid, "SeqId do not match, region invalid" - return min_r.accn, max_r.accn, min_seqid, span, anchor_count - - -def query(args): - """ - %prog query anchorsfile startGeneId endGeneId - - Collect matching region based on query region as given by startGeneId to - endGeneId. This can be considered a local version of mcscan(). The bedfile - must contain the range from startGeneId to endGeneId. - - Typical pipeline is to extract a set of pairwise syntenic regions to the - selected region of interest and then assemble them into .blocks file for - plotting purposes. - """ - p = OptionParser(query.__doc__) - p.set_beds() - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - anchorsfile, start_gene_id, end_gene_id = args - qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts) - - # Guess which is qbed, which is sbed - if start_gene_id in sorder: # flip query and subject - qbed, sbed = sbed, qbed - qorder, sorder = sorder, qorder - - ac = AnchorFile(anchorsfile) - blocks = ac.blocks - si, s = qorder[start_gene_id] - ei, e = qorder[end_gene_id] - target_region = qbed[si : ei + 1] - target_genes = set(x.accn for x in target_region) - - # Go through all the blocks and pick out all matching regions - regions = [] - for block in blocks: - matching_region = set() - for a, b, score in block: - if not (a in target_genes or b in target_genes): - continue - if a in target_genes: - matching_region.add(b) - else: - matching_region.add(a) - if len(matching_region) < 2: - continue - # Print a summary of the matching region - regions.append(get_region_size(matching_region, sbed, sorder)) - - for min_accn, max_accn, seqid, span, anchor_count in sorted( - regions, key=lambda x: (-x[-1], -x[-2]) # Sort by (anchor_count, span) DESC - ): - print( - "{} {} ({}): span {}, anchors {}".format( - min_accn, max_accn, seqid, span, anchor_count - ) - ) - - -def assemble(args): - """ - %prog assemble regionsfile all.bed all.cds - - Assemble blocks file based on regions file. Regions file may look like: - - amborella evm_27.model.AmTr_v1.0_scaffold00004.87 evm_27.model.AmTr_v1.0_scaffold00004.204 - apostasia Ash010455 Ash010479 (fragScaff_scaffold_5) - apostasia Ash018328 Ash018367 (original_scaffold_2912) - apostasia Ash007533 Ash007562 (fragScaff_scaffold_132) - apostasia Ash002281 Ash002299 (fragScaff_scaffold_86) - - Where each line lists a region, starting with the species name (species.bed - must be present in the current directory). Followed by start and end gene. - Contents after the 3rd field (end gene) are ignored. Using the example - above, the final .blocks file will contain 5 columns, one column for each line. - """ - import shutil - from tempfile import mkdtemp, mkstemp - - from jcvi.apps.align import last - from jcvi.formats.fasta import some - - p = OptionParser(assemble.__doc__) - p.add_argument( - "--no_strip_names", - default=False, - action="store_true", - help="Do not strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - strip_names = not opts.no_strip_names - regionsfile, bedfile, cdsfile = args - species_beds = {} - column_genes = [] - pivot = None - with open(regionsfile) as fp: - for row in fp: - species, start, end = row.split()[:3] - if pivot is None: - pivot = species - if species not in species_beds: - species_beds[species] = Bed(species + ".bed") - bed = species_beds[species] - order = bed.order - si, s = order[start] - ei, e = order[end] - genes = set(x.accn for x in bed[si : ei + 1]) - column_genes.append(genes) - - # Write gene ids - workdir = mkdtemp() - fd, idsfile = mkstemp(dir=workdir) - with open(idsfile, "w") as fw: - for genes in column_genes: - print(" ".join(genes), file=fw) - - logger.debug("Gene ids written to `{}`".format(idsfile)) - - # Extract FASTA - fd, fastafile = mkstemp(dir=workdir) - some_args = [cdsfile, idsfile, fastafile] - if not strip_names: - some_args += ["--no_strip_names"] - some(some_args) - - # Perform self-comparison and collect all pairs - last_output = last([fastafile, fastafile, "--outdir", workdir]) - blast = Blast(last_output) - pairs = set() - for b in blast: - query, subject = b.query, b.subject - if strip_names: - query, subject = gene_name(query), gene_name(subject) - pairs.add((query, subject)) - logger.debug("Extracted {} gene pairs from `{}`".format(len(pairs), last_output)) - - # Sort the pairs into columns - N = len(column_genes) - all_slots = [] - for i in range(N): - for j in range(i + 1, N): - genes_i = column_genes[i] - genes_j = column_genes[j] - for a, b in pairs: - if not (a in genes_i and b in genes_j): - continue - slots = ["."] * N - slots[i] = a - slots[j] = b - all_slots.append(slots) - - # Compress the pairwise results and merge when possible - # TODO: This is currently not optimized and inefficient - def is_compatible(slots1, slots2): - # At least intersects for one gene - assert len(slots1) == len(slots2) - flag = False - for a, b in zip(slots1, slots2): - if "." in (a, b): - continue - if a == b: - flag = True - else: - return False - return flag - - def merge(slots, processed): - for i, a in enumerate(slots): - if processed[i] == "." and a != ".": - processed[i] = a - - processed_slots = [] - all_slots.sort() - for slots in all_slots: - merged = False - for processed in processed_slots: - if is_compatible(slots, processed): - merge(slots, processed) # Merge into that line - merged = True - break - if not merged: # New information - processed_slots.append(slots) - - logger.debug( - "Before compression: {}, After compression: {}".format( - len(all_slots), len(processed_slots) - ) - ) - - pivot_order = species_beds[pivot].order - pivot_max = len(species_beds[pivot]) - pivot_sort_key = lambda x: pivot_order[x[0]][0] if x[0] != "." else pivot_max - processed_slots.sort(key=pivot_sort_key) - - with must_open(opts.outfile, "w") as fw: - for slots in processed_slots: - print("\t".join(slots), file=fw) - - # Cleanup - cleanup(workdir) - - -def colinear_evaluate_weights(tour, data): - tour = dict((s, i) for i, s in enumerate(tour)) - data = [(tour[x], score) for x, score in data if x in tour] - return (his(data)[-1],) - - -def layout(args): - """ - %prog layout query.subject.simple query.seqids subject.seqids - - Compute optimal seqids order in a second genome, based on seqids on one - genome, given the pairwise blocks in .simple format. - """ - from jcvi.algorithms.ec import GA_setup, GA_run - - p = OptionParser(layout.__doc__) - p.set_beds() - p.set_cpus(cpus=32) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - simplefile, qseqids, sseqids = args - qbed, sbed, qorder, sorder, is_self = check_beds(simplefile, p, opts) - - qseqids = qseqids.strip().split(",") - sseqids = sseqids.strip().split(",") - qseqids_ii = dict((s, i) for i, s in enumerate(qseqids)) - sseqids_ii = dict((s, i) for i, s in enumerate(sseqids)) - - blocks = SimpleFile(simplefile).blocks - scores = defaultdict(int) - for a, b, c, d, score, orientation, hl in blocks: - qi, q = qorder[a] - si, s = sorder[c] - qseqid, sseqid = q.seqid, s.seqid - if sseqid not in sseqids: - continue - scores[sseqids_ii[sseqid], qseqid] += score - - data = [] - for (a, b), score in sorted(scores.items()): - if b not in qseqids_ii: - continue - data.append((qseqids_ii[b], score)) - - tour = range(len(qseqids)) - toolbox = GA_setup(tour) - toolbox.register("evaluate", colinear_evaluate_weights, data=data) - tour, fitness = GA_run(toolbox, ngen=100, npop=100, cpus=opts.cpus) - tour = [qseqids[x] for x in tour] - - print(",".join(tour)) - - -def fromaligns(args): - """ - %prog fromaligns out.aligns - - Convert aligns file (old MCscan output) to anchors file. - """ - p = OptionParser(fromaligns.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (alignsfile,) = args - fp = must_open(alignsfile) - fw = must_open(opts.outfile, "w") - for row in fp: - if row.startswith("## Alignment"): - print("###", file=fw) - continue - if row[0] == "#" or not row.strip(): - continue - atoms = row.split(":")[-1].split() - print("\t".join(atoms[:2]), file=fw) - fw.close() - - -def toaligns(args): - """ - %prog fromaligns input.anchors - - Convert anchors file to tab-separated aligns file, adding the first column - with the Block ID. - """ - p = OptionParser(toaligns.__doc__) - p.add_argument("--prefix", default="b", help="Prefix to the block id") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (anchorfile,) = args - ac = AnchorFile(anchorfile) - logger.debug("A total of {} blocks imported".format(len(ac.blocks))) - max_block_id_len = len(str(len(ac.blocks) - 1)) - header = "\t".join(("#Block ID", "Gene 1", "Gene 2")) - - with must_open(opts.outfile, "w") as fw: - print(header, file=fw) - for a, b, block_id in ac.iter_pairs(): - block_id = "{}{:0{}d}".format(opts.prefix, block_id, max_block_id_len) - print("\t".join((block_id, a, b)), file=fw) - - -def mcscanq(args): - """ - %prog mcscanq query.ids blocksfile - - Query multiple synteny blocks to get the closest alignment feature. Mostly - used for 'highlighting' the lines in the synteny plot, drawn by - graphics.karyotype and graphics.synteny. - """ - p = OptionParser(mcscanq.__doc__) - p.add_argument("--color", help="Add color highlight, used in plotting") - p.add_argument( - "--invert", default=False, action="store_true", help="Invert query and subject" - ) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - qids, blocksfile = args - b = BlockFile(blocksfile) - fp = open(qids) - for gene in fp: - gene = gene.strip() - for line in b.query_gene(gene, color=opts.color, invert=opts.invert): - print(line) - - -def spa(args): - """ - %prog spa spafiles - - Convert chromosome ordering from SPA to simple lists. First column is the - reference order. - """ - from jcvi.algorithms.graph import merge_paths - from jcvi.utils.cbook import uniqify - - p = OptionParser(spa.__doc__) - p.add_argument( - "--unmapped", - default=False, - action="store_true", - help="Include unmapped scaffolds in the list", - ) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - spafiles = args - paths = [] - mappings = [] - missings = [] - for spafile in spafiles: - fp = open(spafile) - path = [] - mapping = [] - missing = [] - for row in fp: - if row[0] == "#" or not row.strip(): - continue - - atoms = row.rstrip().split("\t") - if len(atoms) == 2: - a, c2 = atoms - assert a == "unmapped" - missing.append(c2) - continue - - c1, c2, orientation = atoms - path.append(c1) - mapping.append(c2) - - paths.append(uniqify(path)) - mappings.append(mapping) - missings.append(missing) - - ref = merge_paths(paths) - print("ref", len(ref), ",".join(ref)) - for spafile, mapping, missing in zip(spafiles, mappings, missings): - mapping = [x for x in mapping if "random" not in x] - mapping = uniqify(mapping) - if len(mapping) < 50 and opts.unmapped: - mapping = uniqify(mapping + missing) - - print(spafile, len(mapping), ",".join(mapping)) - - -def rebuild(args): - """ - %prog rebuild blocksfile blastfile - - Rebuild anchors file from pre-built blocks file. - """ - p = OptionParser(rebuild.__doc__) - p.add_argument( - "--header", default=False, action="store_true", help="First line is header" - ) - p.add_argument( - "--write_blast", - default=False, - action="store_true", - help="Get blast records of rebuilt anchors", - ) - p.set_beds() - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - blocksfile, blastfile = args - bk = BlockFile(blocksfile, header=opts.header) - fw = open("pairs", "w") - for a, b, h in bk.iter_all_pairs(): - print("\t".join((a, b)), file=fw) - fw.close() - - if opts.write_blast: - AnchorFile("pairs").blast(blastfile, "pairs.blast") - - fw = open("tracks", "w") - for g, col in bk.iter_gene_col(): - print("\t".join(str(x) for x in (g, col)), file=fw) - fw.close() - - -def coge(args): - """ - %prog coge cogefile - - Convert CoGe file to anchors file. - """ - p = OptionParser(coge.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (cogefile,) = args - fp = must_open(cogefile) - cogefile = cogefile.replace(".gz", "") - ksfile = cogefile + ".ks" - anchorsfile = cogefile + ".anchors" - fw_ks = must_open(ksfile, "w") - fw_ac = must_open(anchorsfile, "w") - - tag = "###" - print(tag, file=fw_ks) - for header, lines in read_block(fp, tag): - print(tag, file=fw_ac) - lines = list(lines) - for line in lines: - if line[0] == "#": - continue - ( - ks, - ka, - achr, - a, - astart, - astop, - bchr, - b, - bstart, - bstop, - ev, - ss, - ) = line.split() - a = a.split("||")[3] - b = b.split("||")[3] - print("\t".join((a, b, ev)), file=fw_ac) - print(",".join((";".join((a, b)), ks, ka, ks, ka)), file=fw_ks) - - fw_ks.close() - fw_ac.close() - - -def matrix(args): - """ - %prog matrix all.bed anchorfile matrixfile - - Make oxford grid based on anchors file. - """ - - p = OptionParser(matrix.__doc__) - p.add_argument("--seqids", help="File with seqids") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - bedfile, anchorfile, matrixfile = args - ac = AnchorFile(anchorfile) - seqidsfile = opts.seqids - if seqidsfile: - seqids = SetFile(seqidsfile, delimiter=",") - - order = Bed(bedfile).order - blocks = ac.blocks - m = defaultdict(int) - fw = open(matrixfile, "w") - aseqids = set() - bseqids = set() - for block in blocks: - a, b, scores = zip(*block) - ai, af = order[a[0]] - bi, bf = order[b[0]] - aseqid = af.seqid - bseqid = bf.seqid - if seqidsfile: - if (aseqid not in seqids) or (bseqid not in seqids): - continue - m[(aseqid, bseqid)] += len(block) - aseqids.add(aseqid) - bseqids.add(bseqid) - - aseqids = list(aseqids) - bseqids = list(bseqids) - print("\t".join(["o"] + bseqids), file=fw) - for aseqid in aseqids: - print("\t".join([aseqid] + [str(m[(aseqid, x)]) for x in bseqids]), file=fw) - - -def get_boundary_bases(start, end, order): - - from jcvi.utils.range import range_minmax - - (i, s), (j, e) = order[start], order[end] - seqid = s.seqid - assert seqid == e.seqid - - startbase, endbase = range_minmax([(s.start, s.end), (e.start, e.end)]) - - return seqid, startbase, endbase - - -def simple(args): - """ - %prog simple anchorfile --qbed=qbedfile --sbed=sbedfile [options] - - Write the block ends for each block in the anchorfile. - GeneA1 GeneA2 GeneB1 GeneB2 +/- score - - Optional additional columns: - orderA1 orderA2 orderB1 orderB2 sizeA sizeB size block_id - - With base coordinates (--coords): - block_id seqidA startA endA bpSpanA GeneA1 GeneA2 geneSpanA - block_id seqidB startB endB bpSpanB GeneB1 GeneB2 geneSpanB - """ - p = OptionParser(simple.__doc__) - p.add_argument( - "--rich", default=False, action="store_true", help="Output additional columns" - ) - p.add_argument( - "--coords", - default=False, - action="store_true", - help="Output columns with base coordinates", - ) - p.add_argument( - "--bed", - default=False, - action="store_true", - help="Generate BED file for the blocks", - ) - p.add_argument( - "--noheader", default=False, action="store_true", help="Don't output header" - ) - p.set_beds() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (anchorfile,) = args - additional = opts.rich - coords = opts.coords - header = not opts.noheader - bed = opts.bed - if bed: - coords = True - bbed = Bed() - - ac = AnchorFile(anchorfile) - simplefile = anchorfile.rsplit(".", 1)[0] + ".simple" - - qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) - pf = "-".join(anchorfile.split(".", 2)[:2]) - if ac.is_empty: - logger.error("No blocks found in `%s`. Aborting ..", anchorfile) - return - - if coords: - h = "Block|Chr|Start|End|Span|StartGene|EndGene|GeneSpan|Orientation" - else: - h = "StartGeneA|EndGeneA|StartGeneB|EndGeneB|Orientation|Score" - if additional: - h += "|StartOrderA|EndOrderA|StartOrderB|EndOrderB|SizeA|SizeB|Size|Block" - - fws = open(simplefile, "w") - if header: - print("\t".join(h.split("|")), file=fws) - - blocks = ac.blocks - atotalbase = btotalbase = 0 - for i, block in enumerate(blocks): - a, b, scores = zip(*block) - a = [qorder[x] for x in a] - b = [sorder[x] for x in b] - ia, oa = zip(*a) - ib, ob = zip(*b) - - astarti, aendi = min(ia), max(ia) - bstarti, bendi = min(ib), max(ib) - astart, aend = min(a)[1].accn, max(a)[1].accn - bstart, bend = min(b)[1].accn, max(b)[1].accn - - sizeA = len(set(ia)) - sizeB = len(set(ib)) - size = len(block) - - orientation = get_orientation(ia, ib) - aspan = aendi - astarti + 1 - bspan = bendi - bstarti + 1 - score = int((aspan * bspan) ** 0.5) - score = str(score) - block_id = pf + "-block-{0}".format(i) - - if coords: - - aseqid, astartbase, aendbase = get_boundary_bases(astart, aend, qorder) - bseqid, bstartbase, bendbase = get_boundary_bases(bstart, bend, sorder) - abase = aendbase - astartbase + 1 - bbase = bendbase - bstartbase + 1 - atotalbase += abase - btotalbase += bbase - - # Write dual lines - aargs = [ - block_id, - aseqid, - astartbase, - aendbase, - abase, - astart, - aend, - aspan, - "+", - ] - bargs = [ - block_id, - bseqid, - bstartbase, - bendbase, - bbase, - bstart, - bend, - bspan, - orientation, - ] - - if bed: - bbed.append( - BedLine( - "\t".join( - str(x) - for x in ( - bseqid, - bstartbase - 1, - bendbase, - "{}:{}-{}".format(aseqid, astartbase, aendbase), - size, - orientation, - ) - ) - ) - ) - - for args in (aargs, bargs): - print("\t".join(str(x) for x in args), file=fws) - continue - - args = [astart, aend, bstart, bend, score, orientation] - if additional: - args += [astarti, aendi, bstarti, bendi, sizeA, sizeB, size, block_id] - print("\t".join(str(x) for x in args), file=fws) - - fws.close() - logger.debug("A total of {0} blocks written to `{1}`.".format(i + 1, simplefile)) - - if coords: - print( - "Total block span in {0}: {1}".format( - qbed.filename, human_size(atotalbase, precision=2) - ), - file=sys.stderr, - ) - print( - "Total block span in {0}: {1}".format( - sbed.filename, human_size(btotalbase, precision=2) - ), - file=sys.stderr, - ) - print( - "Ratio: {0:.1f}x".format( - max(atotalbase, btotalbase) * 1.0 / min(atotalbase, btotalbase) - ), - file=sys.stderr, - ) - - if bed: - bedfile = simplefile + ".bed" - bbed.print_to_file(filename=bedfile, sorted=True) - logger.debug("Bed file written to `{}`".format(bedfile)) - - -def screen(args): - """ - %prog screen anchorfile newanchorfile --qbed=qbedfile --sbed=sbedfile [options] - - Extract subset of blocks from anchorfile. Provide several options: - - 1. Option --ids: a file with IDs, 0-based, comma separated, all in one line. - 2. Option --seqids: only allow seqids in this file. - 3. Option --seqpairs: only allow seqpairs in this file, one per line, e.g. "Chr01,Chr05". - 4. Option --minspan: remove blocks with less span than this. - 5. Option --minsize: remove blocks with less number of anchors than this. - 6. Option --intrabound: remove blocks that are too close to the diagonal on - self dot plot that are typically artifacts - """ - from jcvi.utils.range import range_distance - - p = OptionParser(screen.__doc__) - p.set_beds() - p.add_argument("--ids", help="File with block IDs (0-based)") - p.add_argument("--seqids", help="File with seqids") - p.add_argument("--seqpairs", help="File with seqpairs") - p.add_argument( - "--intrabound", - default=300, - type=int, - help="Lower bound of intra-chromosomal blocks (only for self comparison)", - ) - p.add_argument("--minspan", default=0, type=int, help="Only blocks with span >=") - p.add_argument("--minsize", default=0, type=int, help="Only blocks with anchors >=") - p.add_argument( - "--simple", action="store_true", help="Write simple anchorfile with block ends" - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - anchorfile, newanchorfile = args - ac = AnchorFile(anchorfile) - idsfile = opts.ids - seqidsfile = opts.seqids - seqpairsfile = opts.seqpairs - minspan = opts.minspan - minsize = opts.minsize - osimple = opts.simple - intrabound = opts.intrabound - ids, seqids, seqpairs = None, None, None - - if idsfile: - ids = SetFile(idsfile, delimiter=",") - ids = set(int(x) for x in ids) - if seqidsfile: - seqids = SetFile(seqidsfile, delimiter=",") - if seqpairsfile: - fp = open(seqpairsfile) - seqpairs = set() - for row in fp: - a, b = row.strip().split(",") - seqpairs.add((a, b)) - seqpairs.add((b, a)) - - qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) - blocks = ac.blocks - selected = 0 - fw = open(newanchorfile, "w") - - for i, block in enumerate(blocks): - if ids and i not in ids: - continue - - a, b, scores = zip(*block) - a = [qorder[x] for x in a] - b = [sorder[x] for x in b] - ia, oa = zip(*a) - ib, ob = zip(*b) - min_ia, max_ia = min(ia), max(ia) - min_ib, max_ib = min(ib), max(ib) - aspan = max_ia - min_ia + 1 - bspan = max_ib - min_ib + 1 - aseqid = oa[0].seqid - bseqid = ob[0].seqid - - if seqids: - if (aseqid not in seqids) or (bseqid not in seqids): - continue - - if seqpairs: - if (aseqid, bseqid) not in seqpairs: - continue - - same_chromosome = is_self and (aseqid == bseqid) - - if same_chromosome: - dist, _ = range_distance( - (aseqid, min_ia, max_ia, "?"), (bseqid, min_ib, max_ib, "?") - ) - if dist < intrabound: - continue - - if minsize: - if len(block) < minsize: - continue - - if minspan: - if aspan < minspan or bspan < minspan: - continue - - selected += 1 - print("###", file=fw) - for line in block: - print("\t".join(line), file=fw) - - fw.close() - - if osimple: - simple( - [ - newanchorfile, - "--noheader", - "--qbed=" + qbed.filename, - "--sbed=" + sbed.filename, - ] - ) - - logger.debug("Before: {0} blocks, After: {1} blocks".format(len(blocks), selected)) - - -def summary(args): - """ - %prog summary anchorfile - - Provide statistics for pairwise blocks. - """ - from jcvi.utils.cbook import SummaryStats - - p = OptionParser(summary.__doc__) - p.add_argument("--prefix", help="Generate per block stats") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (anchorfile,) = args - ac = AnchorFile(anchorfile) - clusters = ac.blocks - if clusters == [[]]: - logger.debug("A total of 0 anchor was found. Aborted.") - raise ValueError("A total of 0 anchor was found. Aborted.") - - nclusters = len(clusters) - nanchors = [len(c) for c in clusters] - nranchors = [_score(c) for c in clusters] # non-redundant anchors - print( - "A total of {0} (NR:{1}) anchors found in {2} clusters.".format( - sum(nanchors), sum(nranchors), nclusters - ), - file=sys.stderr, - ) - print("Stats:", SummaryStats(nanchors), file=sys.stderr) - print("NR stats:", SummaryStats(nranchors), file=sys.stderr) - - prefix = opts.prefix - if prefix: - pad = len(str(nclusters)) - for i, c in enumerate(clusters): - block_id = "{0}{1:0{2}d}".format(prefix, i + 1, pad) - print("\t".join((block_id, str(len(c))))) - - -def stats(args): - """ - %prog stats blocksfile - - Provide statistics for MCscan-style blocks. The count of homologs in each - pivot gene is recorded. - """ - from jcvi.utils.cbook import percentage - - p = OptionParser(stats.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blocksfile,) = args - fp = open(blocksfile) - counts = defaultdict(int) - total = orthologous = 0 - for row in fp: - atoms = row.rstrip().split("\t") - hits = [x for x in atoms[1:] if x != "."] - counts[len(hits)] += 1 - total += 1 - if atoms[1] != ".": - orthologous += 1 - - print("Total lines: {0}".format(total), file=sys.stderr) - for i, n in sorted(counts.items()): - print("Count {0}: {1}".format(i, percentage(n, total)), file=sys.stderr) - - print(file=sys.stderr) - - matches = sum(n for i, n in counts.items() if i != 0) - print( - "Total lines with matches: {0}".format(percentage(matches, total)), - file=sys.stderr, - ) - for i, n in sorted(counts.items()): - if i == 0: - continue - - print("Count {0}: {1}".format(i, percentage(n, matches)), file=sys.stderr) - - print(file=sys.stderr) - print( - "Orthologous matches: {0}".format(percentage(orthologous, matches)), - file=sys.stderr, - ) - - -def mcscan(args): - """ - %prog mcscan bedfile anchorfile [options] - - Stack synteny blocks on a reference bed, MCSCAN style. The first column in - the output is the reference order, given in the bedfile. Then each column - next to it are separate 'tracks'. - - If --mergetandem=tandem_file is specified, tandem_file should have each - tandem cluster as one line, tab separated. - """ - p = OptionParser(mcscan.__doc__) - p.add_argument( - "--iter", default=100, type=int, help="Max number of chains to output" - ) - p.add_argument( - "--ascii", - default=False, - action="store_true", - help="Output symbols rather than gene names", - ) - p.add_argument( - "--Nm", default=10, type=int, help="Clip block ends to allow slight overlaps" - ) - p.add_argument( - "--trackids", action="store_true", help="Track block IDs in separate file" - ) - p.add_argument( - "--mergetandem", - default=None, - help="merge tandems genes in output acoording to PATH-TO-TANDEM_FILE, " - "cannot be used with --ascii", - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, anchorfile = args - ascii = opts.ascii - clip = opts.Nm - trackids = opts.trackids - ofile = opts.outfile - mergetandem = opts.mergetandem - bed = Bed(bedfile) - order = bed.order - - if trackids: - olog = ofile + ".tracks" - fwlog = must_open(olog, "w") - - if mergetandem: - assert not ascii - tandems = {} - for row in open(mergetandem): - row = row.split() - s = ";".join(row) - for atom in row: - tandems[atom] = s - - ac = AnchorFile(anchorfile) - ranges, block_pairs = ac.make_ranges(order, clip=clip) - - fw = must_open(ofile, "w") - - tracks = [] - print("Chain started: {0} blocks".format(len(ranges)), file=sys.stderr) - iteration = 0 - while ranges: - if iteration >= opts.iter: - break - - selected, score = range_chain(ranges) - tracks.append(selected) - selected = set(x.id for x in selected) - if trackids: - print(",".join(str(x) for x in sorted(selected)), file=fwlog) - - ranges = [x for x in ranges if x.id not in selected] - msg = "Chain {0}: score={1}".format(iteration, score) - if ranges: - msg += " {0} blocks remained..".format(len(ranges)) - else: - msg += " done!" - - print(msg, file=sys.stderr) - iteration += 1 - - mbed = [] - for b in bed: - id = b.accn - atoms = [] - for track in tracks: - track_ids = [x.id for x in track] - for tid in track_ids: - pairs = block_pairs[tid] - anchor = pairs.get(id, ".") - if anchor != ".": - break - if ascii and anchor != ".": - anchor = "x" - atoms.append(anchor) - mbed.append((id, atoms)) - - for id, atoms in mbed: - sep = "" if ascii else "\t" - if mergetandem: - for i, atom in enumerate(atoms): - atoms[i] = tandems.get(atom, atom) - print("\t".join((id, sep.join(atoms))), file=fw) - - logger.debug("MCscan blocks written to `{0}`.".format(ofile)) - if trackids: - logger.debug("Block IDs written to `{0}`.".format(olog)) - - -def write_details(fw, details, bed): - """ - Write per gene depth to file - """ - for a, b, depth in details: - for i in range(a, b): - gi = bed[i].accn - print("\t".join((gi, str(depth))), file=fw) - - -def depth(args): - """ - %prog depth anchorfile --qbed qbedfile --sbed sbedfile - - Calculate the depths in the two genomes in comparison, given in --qbed and - --sbed. The synteny blocks will be layered on the genomes, and the - multiplicity will be summarized to stderr. - """ - from jcvi.utils.range import range_depth - from jcvi.graphics.base import latex - - p = OptionParser(depth.__doc__) - p.add_argument("--depthfile", help="Generate file with gene and depth") - p.add_argument( - "--histogram", default=False, action="store_true", help="Plot histograms in PDF" - ) - p.add_argument("--xmax", type=int, help="x-axis maximum to display in plot") - p.add_argument("--title", default=None, help="Title to display in plot") - p.add_argument("--quota", help="Force to use this quota, e.g. 1:1, 1:2 ...") - p.set_beds() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (anchorfile,) = args - qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) - depthfile = opts.depthfile - ac = AnchorFile(anchorfile) - qranges = [] - sranges = [] - blocks = ac.blocks - for ib in blocks: - q, s, t = zip(*ib) - q = [qorder[x] for x in q] - s = [sorder[x] for x in s] - qrange = (min(q)[0], max(q)[0]) - srange = (min(s)[0], max(s)[0]) - qranges.append(qrange) - sranges.append(srange) - if is_self: - qranges.append(srange) - - qgenome = op.basename(qbed.filename).split(".")[0] - sgenome = op.basename(sbed.filename).split(".")[0] - qtag = "Genome {0} depths".format(qgenome) - print("{}:".format(qtag), file=sys.stderr) - dsq, details = range_depth(qranges, len(qbed)) - if depthfile: - fw = open(depthfile, "w") - write_details(fw, details, qbed) - - if is_self: - return - - stag = "Genome {0} depths".format(sgenome) - print("{}:".format(stag), file=sys.stderr) - dss, details = range_depth(sranges, len(sbed)) - if depthfile: - write_details(fw, details, sbed) - fw.close() - logger.debug("Depth written to `{0}`.".format(depthfile)) - - if not opts.histogram: - return - - from jcvi.graphics.base import plt, quickplot_ax, savefig, normalize_axes - - # Plot two histograms one for query genome, one for subject genome - plt.figure(1, (6, 3)) - f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) - - xmax = opts.xmax or max(4, max(list(dsq.keys()) + list(dss.keys()))) - if opts.quota: - speak, qpeak = opts.quota.split(":") - qpeak, speak = int(qpeak), int(speak) - else: - qpeak = find_peak(dsq) - speak = find_peak(dss) - - qtag = "# of {} blocks per {} gene".format(sgenome, qgenome) - stag = "# of {} blocks per {} gene".format(qgenome, sgenome) - quickplot_ax( - ax1, - dss, - 0, - xmax, - stag, - ylabel="Percentage of genome", - highlight=range(1, speak + 1), - ) - quickplot_ax(ax2, dsq, 0, xmax, qtag, ylabel=None, highlight=range(1, qpeak + 1)) - - title = opts.title or "{} vs {} syntenic depths\n{}:{} pattern".format( - qgenome, sgenome, speak, qpeak - ) - root = f.add_axes([0, 0, 1, 1]) - vs, pattern = latex(title).split("\n") - root.text(0.5, 0.97, vs, ha="center", va="center", color="darkslategray") - root.text(0.5, 0.925, pattern, ha="center", va="center", color="tomato", size=16) - print(title, file=sys.stderr) - - normalize_axes(root) - - pf = anchorfile.rsplit(".", 1)[0] + ".depth" - image_name = pf + ".pdf" - savefig(image_name) - - -def find_peak(data, cutoff=0.9): - """ - This will look for the point where cumulative cutoff is reached. For - example: - - >>> find_peak({0: 27, 1: 71, 2: 1}) - 1 - """ - total_length = sum(data.values()) - count_cutoff = cutoff * total_length - cum_sum = 0 - for i, count in sorted(data.items()): - cum_sum += count - if cum_sum > count_cutoff: - return i - - -def get_blocks(scaffold, bs, order, xdist=20, ydist=20, N=6): - points = [] - for b in bs: - accn = b.accn.rsplit(".", 1)[0] - if accn not in order: - continue - x, xx = order[accn] - y = (b.start + b.end) / 2 - points.append((x, y)) - - # print scaffold, points - blocks = synteny_scan(points, xdist, ydist, N) - return blocks - - -def breakpoint(args): - """ - %prog breakpoint blastfile bedfile [options] - - Identify breakpoints where collinearity ends. `blastfile` contains mapping - from markers (query) to scaffolds (subject). `bedfile` contains marker - locations in the related species. - """ - from jcvi.formats.blast import bed - from jcvi.utils.range import range_interleave - - p = OptionParser(breakpoint.__doc__) - p.add_argument( - "--xdist", type=int, default=20, help="xdist (in related genome) cutoff" - ) - p.add_argument( - "--ydist", type=int, default=200000, help="ydist (in current genome) cutoff" - ) - p.add_argument("-n", type=int, default=5, help="number of markers in a block") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - blastfile, bedfile = args - order = Bed(bedfile).order - blastbedfile = bed([blastfile]) - bbed = Bed(blastbedfile) - for scaffold, bs in bbed.sub_beds(): - blocks = get_blocks( - scaffold, bs, order, xdist=opts.xdist, ydist=opts.ydist, N=opts.n - ) - sblocks = [] - for block in blocks: - xx, yy = zip(*block) - sblocks.append((scaffold, min(yy), max(yy))) - iblocks = range_interleave(sblocks) - for ib in iblocks: - ch, start, end = ib - print("{0}\t{1}\t{2}".format(ch, start - 1, end)) - - -def scan(args): - """ - %prog scan blastfile anchor_file [options] - - pull out syntenic anchors from blastfile based on single-linkage algorithm - """ - p = OptionParser(scan.__doc__) - p.add_argument( - "-n", - "--min_size", - dest="n", - type=int, - default=4, - help="minimum number of anchors in a cluster", - ) - p.add_argument( - "--intrabound", - default=300, - type=int, - help="Lower bound of intra-chromosomal blocks (only for self comparison)", - ) - p.add_argument("--liftover", help="Scan BLAST file to find extra anchors") - p.add_argument( - "--liftover_dist", - type=int, - help="Distance to extend from liftover. Defaults to half of --dist", - ) - p.set_stripnames() - - blast_file, anchor_file, dist, opts = add_arguments(p, args, dist=20) - qbed, sbed, qorder, sorder, is_self = check_beds(blast_file, p, opts) - - intrabound = opts.intrabound - filtered_blast = read_blast( - blast_file, qorder, sorder, is_self=is_self, ostrip=False - ) - - fw = open(anchor_file, "w") - logger.debug("Chaining distance = {0}".format(dist)) - - clusters = batch_scan( - filtered_blast, - xdist=dist, - ydist=dist, - N=opts.n, - is_self=is_self, - intrabound=intrabound, - ) - for cluster in clusters: - print("###", file=fw) - for qi, si, score in cluster: - query, subject = qbed[qi].accn, sbed[si].accn - print("\t".join((query, subject, str(int(score)))), file=fw) - - fw.close() - summary([anchor_file]) - - lo = opts.liftover - if not lo: - return anchor_file - - dargs = ["--qbed=" + opts.qbed, "--sbed=" + opts.sbed] - if not opts.strip_names: - dargs += ["--no_strip_names"] - liftover_dist = opts.liftover_dist or dist // 2 - dargs += ["--dist={}".format(liftover_dist)] - newanchorfile = liftover([lo, anchor_file] + dargs) - return newanchorfile - - -def liftover(args): - """ - %prog liftover blastfile anchorfile [options] - - Typical use for this program is given a list of anchors (syntennic - genes), choose from the blastfile the pairs that are close to the anchors. - - Anchorfile has the following format, each row defines a pair. - - geneA geneB - geneC geneD - """ - p = OptionParser(liftover.__doc__) - p.set_stripnames() - - blast_file, anchor_file, dist, opts = add_arguments(p, args) - qbed, sbed, qorder, sorder, is_self = check_beds(blast_file, p, opts) - - filtered_blast = read_blast( - blast_file, qorder, sorder, is_self=is_self, ostrip=opts.strip_names - ) - blast_to_score = dict(((b.qi, b.si), int(b.score)) for b in filtered_blast) - accepted = dict(((b.query, b.subject), str(int(b.score))) for b in filtered_blast) - - ac = AnchorFile(anchor_file) - all_hits = group_hits(filtered_blast) - all_anchors, anchor_to_block = read_anchors(ac, qorder, sorder) - - # select hits that are close to the anchor list - lifted = 0 - for chr_pair in sorted(all_anchors.keys()): - hits = np.array(all_hits[chr_pair]) - anchors = np.array(all_anchors[chr_pair]) - - if not len(hits): - continue - - for point, nearest in synteny_liftover(hits, anchors, dist): - qi, si = point[:2] - block_id = anchor_to_block[nearest] - query, subject = qbed[qi].accn, sbed[si].accn - score = blast_to_score[(qi, si)] - - ac.blocks[block_id].append((query, subject, str(score) + "L")) - lifted += 1 - - logger.debug("%d new pairs found (dist=%d).", lifted, dist) - newanchorfile = anchor_file.rsplit(".", 1)[0] + ".lifted.anchors" - if accepted: - ac.filter_blocks(accepted) - ac.print_to_file(filename=newanchorfile) - summary([newanchorfile]) - - return newanchorfile - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/__init__.py b/jcvi/formats/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/formats/__main__.py b/jcvi/formats/__main__.py deleted file mode 100644 index 483e4cb9..00000000 --- a/jcvi/formats/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Array of data parsers for bioinformatics file formats, such as: GFF3, BED, SAM/BAM, VCF, PSL, AGP, FASTA/FASTQ, BLAST, etc. -""" - -from ..apps.base import dmain - - -if __name__ == "__main__": - dmain(__file__) diff --git a/jcvi/formats/agp.py b/jcvi/formats/agp.py deleted file mode 100644 index 58c79362..00000000 --- a/jcvi/formats/agp.py +++ /dev/null @@ -1,2188 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Genbank AGP file format, see spec here -http://www.ncbi.nlm.nih.gov/projects/genome/assembly/agp -""" -import re -import shutil -import sys - -from collections import defaultdict -from copy import deepcopy -from itertools import groupby, zip_longest - -from Bio.Seq import Seq -from Bio.SeqRecord import SeqRecord -from Bio import SeqIO -from more_itertools import pairwise - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - cleanup, - flatten, - logger, - need_update, -) -from ..assembly.base import calculate_A50 -from ..utils.range import range_intersect - -from .base import LineFile, must_open -from .bed import Bed -from .fasta import Fasta - - -Supported_AGP_Version = "2.1" -AGP_Version_Pragma = "##agp-version " + Supported_AGP_Version -Valid_component_type = list("ADFGNOPUW") - -Valid_gap_type = ( - "scaffold", - "fragment", # in v2.0, obsolete in v2.1 - "clone", # in v1.1, obsolete in v2.0 - "contig", - "centromere", - "short_arm", # in both versions - "heterochromatin", - "telomere", - "repeat", # in both versions - "contamination", -) # new in v2.0 - -Valid_orientation = ("+", "-", "0", "?", "na") - -Valid_evidence = ( - "", - "na", - "paired-ends", - "align_genus", - "align_xgenus", - "align_trnscpt", - "within_clone", - "clone_contig", - "map", - "pcr", # new in v2.1 - "proximity_ligation", # new in v2.1 - "strobe", - "unspecified", -) - -component_RGB = {"O": "0,100,0", "F": "0,100,0", "D": "50,205,50", "N": "255,255,255"} - -""" -phase 0 - (P)refinish; phase 1,2 - (D)raft; -phase 3 - (F)inished; 4 - (O)thers -""" -Phases = "PDDFO" - - -class AGPLine(object): - def __init__(self, row, validate=True): - - atoms = row.split("\t") - atoms[-1] = atoms[-1].strip() - self.object = atoms[0] - self.object_beg = int(atoms[1]) - self.object_end = int(atoms[2]) - self.object_span = self.object_end - self.object_beg + 1 - self.part_number = atoms[3] - self.component_type = atoms[4] - self.is_gap = self.component_type in ("N", "U") - - if not self.is_gap: - self.component_id = atoms[5] - self.component_beg = int(atoms[6]) - self.component_end = int(atoms[7]) - self.component_span = self.component_end - self.component_beg + 1 - self.orientation = atoms[8].strip() - else: - self.gap_length = int(atoms[5]) - self.gap_type = atoms[6] - self.linkage = atoms[7] - self.linkage_evidence = [] - if len(atoms) > 8: - linkage_evidence = atoms[8].strip() - if linkage_evidence: - self.linkage_evidence = linkage_evidence.split(";") - self.orientation = "na" - self.component_id = "{0}.gap{1:03d}".format( - self.gap_type, int(self.part_number) - ) - - if validate: - try: - self.validate() - except AssertionError as b: - logger.error("%s\nerror when validating this line:\n%s", b, row) - - self.sign = {"+": 1, "-": -1, "?": 0}.get(self.orientation) - - def __str__(self): - - fields = [ - self.object, - self.object_beg, - self.object_end, - self.part_number, - self.component_type, - ] - - if not self.is_gap: - fields += [ - self.component_id, - self.component_beg, - self.component_end, - self.orientation, - ] - else: - fields += [ - self.gap_length, - self.gap_type, - self.linkage, - ";".join(self.linkage_evidence), - ] - - return "\t".join(str(x) for x in fields) - - __repr__ = __str__ - - @property - def bedline(self): - # bed formatted line - gid = self.component_id if not self.is_gap else self.gap_type - return "\t".join( - ( - self.object, - str(self.object_beg - 1), - str(self.object_end), - gid, - self.component_type, - self.orientation, - ) - ) - - @property - def bedextra(self): - # extra lines for bed12 - return "\t".join( - str(x) - for x in ( - self.object_beg - 1, - self.object_end, - component_RGB[self.component_type], - 1, - str(self.object_end - self.object_beg + 1) + ",", - "0,", - ) - ) - - @property - def bed12line(self): - # bed12 formatted line - return self.bedline + "\t" + self.bedextra - - def gffline(self, gff_source="MGSC", gff_feat_type="golden_path_fragment"): - # gff3 formatted line - gff_feat_id = "".join( - str(x) for x in (self.object, ".", "{0:03d}".format(int(self.part_number))) - ) - attributes = ";".join( - ( - "ID=" + gff_feat_id, - "Name=" + self.component_id, - "phase=" + self.component_type, - ) - ) - gff_feat_type = "gap" if self.component_type in ["N", "U"] else gff_feat_type - orientation = "." if self.orientation == "na" else self.orientation - - return "\t".join( - str(x) - for x in ( - self.object, - gff_source, - gff_feat_type, - str(self.object_beg), - str(self.object_end), - ".", - orientation, - ".", - attributes, - ) - ) - - @property - def isCloneGap(self): - return self.is_gap and self.gap_type != "fragment" - - def validate(self): - assert ( - self.orientation in Valid_orientation - ), "orientation must be one of {0}".format("|".join(Valid_orientation)) - assert ( - self.component_type in Valid_component_type - ), "component_type must be one of {0}".format("|".join(Valid_component_type)) - assert ( - self.object_beg <= self.object_end - ), "object_beg needs to be <= object_end" - - if not self.is_gap: - assert ( - self.component_beg <= self.component_end - ), "component_begin must be <= component_end" - assert ( - self.object_span == self.component_span - ), f"object_span ({self.object_span}) must be same as component_span ({self.component_span})" - else: - assert self.gap_length >= 1, "gap_length must be >= 1" - assert ( - self.object_span == self.gap_length - ), "object span (%d) must be same as gap_length (%d)" % ( - self.object_span, - self.gap_length, - ) - assert ( - self.gap_type in Valid_gap_type - ), "gap_type must be one of {}, you have {}".format( - "|".join(Valid_gap_type), self.gap_type - ) - - assert all( - x in Valid_evidence for x in self.linkage_evidence - ), "linkage_evidence must be one of {0}, you have {1}".format( - "|".join(Valid_evidence), self.linkage_evidence - ) - - if self.linkage == "no": - assert not self.linkage_evidence or self.linkage_evidence[0] in ( - "", - "na", - ), "linkage no is incompatible with evidence {0}".format( - self.linkage_evidence - ) - - @classmethod - def agpline(cls, tuple): - return AGPLine("\t".join(str(x) for x in tuple), validate=False) - - @classmethod - def cline(cls, object, cid, sizes, o): - line = [object, 0, 0, 0] - line += ["W", cid, 1, sizes[cid], o] - return AGPLine.agpline(line) - - @classmethod - def gline(cls, object, gap, unknown=100): - line = [object, 0, 0, 0] - gtype = "N" - if gap < unknown: - gtype = "U" - gap = unknown # Reset it to 100 - line += [gtype, gap, "scaffold", "yes", "paired-ends"] - return AGPLine.agpline(line) - - -class AGP(LineFile): - def __init__(self, filename, nogaps=False, validate=True, sorted=True): - super().__init__(filename) - - fp = must_open(filename) - self.header = [] - for row in fp: - if row[0] == "#": - self.header.append(row.strip()) - continue - if row.strip() == "": - continue - a = AGPLine(row, validate=validate) - if nogaps and a.is_gap: - continue - self.append(a) - - self.validate = validate - if validate: - if not sorted: - self.sort(key=lambda x: (x.object, x.object_beg)) - self.validate_all() - - @property - def order(self): - """ - Returns a dict with component_id => (i, agpline) - """ - d = {} - for i, x in enumerate(self): - if x.is_gap: - continue - xid = x.component_id - d[xid] = (i, x) - - xid = xid.rsplit(".", 1)[0] # Remove Genbank version - if xid not in d: - d[xid] = (i, x) - - return d - - def getAdjacentClone(self, i, south=True): - """ - Returns the adjacent clone name. - """ - rr = range(i + 1, len(self)) if south else range(i - 1, -1, -1) - a = self[i] - for ix in rr: - x = self[ix] - if x.object != a.object: - break - if x.is_gap: - if x.isCloneGap: - return x - else: - continue - else: - return x - return None - - def getNorthSouthClone(self, i): - """ - Returns the adjacent clone name from both sides. - """ - north = self.getAdjacentClone(i, south=False) - south = self.getAdjacentClone(i) - return north, south - - def transfer_header(self, fw=sys.stdout): - """ - transfer_header() copies header to a new file. - print_header() creates a new header. - """ - print("\n".join(self.header), file=fw) - - @classmethod - def print_header( - cls, fw=sys.stdout, organism=None, taxid=None, source=None, comment=None - ): - print(AGP_Version_Pragma, file=fw) - # these comments are entirely optional, modeled after maize AGP - if organism: - print("# ORGANISM: {0}".format(organism), file=fw) - if taxid: - print("# TAX_ID: {0}".format(taxid), file=fw) - if source: - print("# GENOME CENTER: {0}".format(source), file=fw) - if comment: - print("# COMMENT: {0}".format(comment), file=fw) - fields = ( - "object object_beg object_end part_number component_type " - "component_id/gap_length component_beg/gap_type " - "component_end/linkage orientation/linkage_evidence" - ) - print("# FIELDS: {0}".format(", ".join(fields.split())), file=fw) - - def rstats(self, object, bacs, components, scaffold_sizes, length): - from jcvi.utils.cbook import human_size - - nbacs = len(bacs) - nscaffolds = len(scaffold_sizes) - a50, l50, n50 = calculate_A50(scaffold_sizes) - l50 = human_size(l50) - length = human_size(length) - - return (object, nbacs, components, nscaffolds, n50, l50, length) - - def iter_object(self): - for ob, lines_with_same_ob in groupby(self, key=lambda x: x.object): - yield ob, list(lines_with_same_ob) - - def iter_paired_components(self): - for object, lines in self.iter_object(): - lines = [x for x in lines if not x.is_gap] - for a, b in pairwise(lines): - qreverse = a.orientation == "-" - yield a, b, qreverse - - def print_to_file(self, filename, index=True): - fw = open(filename, "w") - for a in self: - print(a, file=fw) - fw.close() - logger.debug("AGP file written to `%s`.", filename) - if index: - reindex([filename, "--inplace"]) - - def summary_one(self, object, lines): - bacs = set() - components = 0 - scaffold_sizes = [] - _scaffold_key = lambda x: x.is_gap and x.linkage == "no" - length = max(x.object_end for x in lines) - - for is_gap, scaffold in groupby(lines, key=_scaffold_key): - if is_gap: - continue - - scaffold = list(scaffold) - scaffold_size = 0 - for b in scaffold: - if b.is_gap: - scaffold_size += b.gap_length - else: - bacs.add(b.component_id) - components += 1 - scaffold_size += b.component_span - - scaffold_sizes.append(scaffold_size) - - return ( - self.rstats(object, bacs, components, scaffold_sizes, length), - (bacs, components, scaffold_sizes, length), - ) - - def summary_all(self): - - all_bacs = set() - all_scaffold_sizes = [] - all_components = 0 - all_length = 0 - for ob, lines in self.iter_object(): - s, bstats = self.summary_one(ob, lines) - yield s - - bacs, components, scaffold_sizes, length = bstats - all_components += components - all_bacs |= bacs - all_scaffold_sizes.extend(scaffold_sizes) - all_length += length - - yield self.rstats( - "Total", all_bacs, all_components, all_scaffold_sizes, all_length - ) - - def validate_one(self, object, lines): - object_beg = lines[0].object_beg - assert object_beg == 1, "object %s must start at 1 (instead of %d)" % ( - object, - object_beg, - ) - - for a, b in pairwise(lines): - assert ( - b.object_beg - a.object_end == 1 - ), "lines not continuous coords between:\n%s\n%s" % (a, b) - - def validate_all(self): - for ob, lines in self.iter_object(): - self.validate_one(ob, lines) - - def build_one(self, object, lines, fasta, fw, newagp=None): - """ - Construct molecule using component fasta sequence - """ - components = [] - - total_bp = 0 - for line in lines: - - if line.is_gap: - seq = "N" * line.gap_length - if newagp: - print(line, file=newagp) - else: - seq = fasta.sequence( - dict( - chr=line.component_id, - start=line.component_beg, - stop=line.component_end, - strand=line.orientation, - ) - ) - # Check for dangling N's - if newagp: - trimNs(seq, line, newagp) - - components.append(seq) - total_bp += len(seq) - - if self.validate: - assert ( - total_bp == line.object_end - ), "cumulative base pairs (%d) does not match (%d)" % ( - total_bp, - line.object_end, - ) - - if not newagp: - rec = SeqRecord(Seq("".join(components)), id=object, description="") - SeqIO.write([rec], fw, "fasta") - if len(rec) > 1000000: - logger.debug("Write object %s to `%s`", object, fw.name) - - def build_all(self, componentfasta, targetfasta, newagp=None): - f = Fasta(componentfasta, index=False) - fw = open(targetfasta, "w") - - for ob, lines in self.iter_object(): - self.build_one(ob, lines, f, fw, newagp=newagp) - - @property - def graph(self): - from jcvi.algorithms.graph import BiGraph - - g = BiGraph() - for ob, lines in self.iter_object(): - components = [x for x in lines if not x.is_gap] - gaps = [x for x in lines if x.is_gap] - for i, (a, b) in enumerate(pairwise(components)): - g.add_edge( - a.component_id, - b.component_id, - a.orientation, - b.orientation, - length=gaps[i].gap_length, - ) - if len(components) == 1: # Singleton object - a = components[0] - g.add_node(a.component_id) - - return g - - def get_line(self, cid): - for i, a in enumerate(self): - if not a.is_gap and a.component_id == cid: - return i, a - return None, None - - # Update AGP on the fly - def delete_line(self, a, verbose=False): - ai, ax = self.get_line(a) - if ai is None: - return - - if verbose: - msg = "* Delete line:\n{0}".format(ax) - print(msg, file=sys.stderr) - - del self[ai] - - def delete_lines(self, lines, verbose=False): - deleted = set() - for r in lines: - if r.is_gap: - continue - cid = r.component_id - self.delete_line(cid, verbose=verbose) - deleted.add(cid) - return deleted - - def insert_lines(self, a, lines, after=False, delete=False, verbose=False): - if delete: - deleted = self.delete_lines(lines, verbose=verbose) - - ai, ax = self.get_line(a) - if after: - ai += 1 - for i, x in enumerate(lines): - self.insert(ai + i, x) - if verbose: - tag = "after" if after else "before" - msg = "* Insert {0} line:\n".format(tag) - msg += "\n".join([str(ax), "-" * 60]) + "\n" - msg += "\n".join(str(x) for x in lines) - print(msg, file=sys.stderr) - return deleted - - def update_between(self, a, b, lines, delete=True, verbose=False): - if delete: - deleted = self.delete_lines(lines, verbose=verbose) - - ai, ax = self.get_line(a) - bi, bx = self.get_line(b) - # Update - self[ai + 1 : bi] = lines - if verbose: - msg = "* Update between:\n" - msg += "\n".join([str(ax), str(bx), "-" * 60]) + "\n" - msg += "\n".join(str(x) for x in lines) - print(msg, file=sys.stderr) - return deleted - - def convert_to_gap(self, a, verbose=False): - ai, ax = self.get_line(a) - gline = AGPLine.gline(ax.object, 100) - self[ai] = gline - if verbose: - msg = "* Convert from/to:\n" - msg += "\n".join([str(ax), str(gline), "-" * 60]) + "\n" - print(msg, file=sys.stderr) - - def delete_between(self, a, b, verbose=True): - return self.update_between(a, b, [], verbose=verbose) - - def switch_between(self, a, b, verbose=True): - ai, ax = self.get_line(a) - bi, bx = self.get_line(b) - self[ai] = bx - self[bi] = ax - if verbose: - msg = "* Switch between:\n" - msg += "\n".join([str(ax), str(bx)]) - print(msg, file=sys.stderr) - - -class TPFLine(object): - def __init__(self, line): - args = line.split() - self.component_id = args[0] - self.object = args[1] - if self.is_gap: - self.gap_type = self.component_id - self.orientation = args[2] - - def __str__(self): - return "\t".join((self.component_id, self.object_id, self.orientation)) - - @property - def is_gap(self): - return self.component_id in Valid_gap_type - - @property - def isCloneGap(self): - return self.is_gap and self.gap_type != "fragment" - - -class TPF(LineFile): - def __init__(self, filename): - super().__init__(filename) - fp = open(filename) - for row in fp: - if row[0] == "#": - continue - self.append(TPFLine(row)) - - def getAdjacentClone(self, i, south=True): - """ - Returns adjacent clone name, either the line before or after the current - line. - """ - rr = range(i + 1, len(self)) if south else range(i - 1, -1, -1) - a = self[i] - for ix in rr: - x = self[ix] - if x.object != a.object: - break - return x - return None - - def getNorthSouthClone(self, i): - """ - Returns adjacent clone name on both sides. - """ - north = self.getAdjacentClone(i, south=False) - south = self.getAdjacentClone(i) - return north, south - - -class OOLine(object): - def __init__(self, id, component_id, component_size, strand): - self.id = id - self.component_id = component_id - self.component_size = component_size - self.strand = strand - - -class OO(LineFile): - def __init__(self, filename=None, ctgsizes=None): - super().__init__(filename) - - if filename is None: - return - - from jcvi.formats.base import read_block - - fp = open(filename) - prefix = "contig_" - self.contigs = set() - for header, block in read_block(fp, ">"): - header = header[1:] # Trim the '>' - header = header.split()[0] - for b in block: - ctg, orientation = b.split() - if ctg.startswith(prefix): - ctg = ctg[len(prefix) :] - - assert orientation in ("BE", "EB") - - strand = "+" if orientation == "BE" else "-" - ctgsize = ctgsizes[ctg] - self.add(header, ctg, ctgsize, strand) - self.contigs.add(ctg) - - def add(self, scaffold, ctg, ctgsize, strand="0"): - self.append(OOLine(scaffold, ctg, ctgsize, strand)) - - def sub_beds(self): - for scaffold, beds in groupby(self, key=lambda x: x.id): - yield scaffold, list(beds) - - def write_AGP( - self, fw=sys.stdout, gapsize=100, phases={}, gaptype="scaffold", evidence="" - ): - - linkage = "yes" - - for object, beds in self.sub_beds(): - object_beg = 1 - part_number = 0 - for b in beds: - component_id = b.component_id - size = b.component_size - if ( - part_number > 0 and gapsize > 0 - ): # Print gap except for the first one - object_end = object_beg + gapsize - 1 - part_number += 1 - component_type = "U" if gapsize == 100 else "N" - print( - "\t".join( - str(x) - for x in ( - object, - object_beg, - object_end, - part_number, - component_type, - gapsize, - gaptype, - linkage, - evidence, - ) - ), - file=fw, - ) - - object_beg += gapsize - - object_end = object_beg + size - 1 - part_number += 1 - strand = "?" if b.strand == "0" else b.strand - print( - "\t".join( - str(x) - for x in ( - object, - object_beg, - object_end, - part_number, - phases.get(component_id, "W"), - component_id, - 1, - size, - strand, - ) - ), - file=fw, - ) - - object_beg += size - - -def order_to_agp( - object, ctgorder, sizes, fwagp, gapsize=100, gaptype="scaffold", evidence="" -): - - o = OO() # Without a filename - for scaffold_number, (ctg, strand) in enumerate(ctgorder): - size = sizes[ctg] - o.add(object, ctg, size, strand) - - o.write_AGP(fwagp, gapsize=gapsize, gaptype=gaptype, phases={}, evidence=evidence) - - -def trimNs(seq, line, newagp): - """ - Test if the sequences contain dangling N's on both sides. This component - needs to be adjusted to the 'actual' sequence range. - """ - start, end = line.component_beg, line.component_end - size = end - start + 1 - leftNs, rightNs = 0, 0 - lid, lo = line.component_id, line.orientation - for s in seq: - if s in "nN": - leftNs += 1 - else: - break - for s in seq[::-1]: - if s in "nN": - rightNs += 1 - else: - break - - if lo == "-": - trimstart = start + rightNs - trimend = end - leftNs - else: - trimstart = start + leftNs - trimend = end - rightNs - - trimrange = (trimstart, trimend) - oldrange = (start, end) - - if trimrange != oldrange: - logger.debug("{0} trimmed of N's: {1} => {2}".format(lid, oldrange, trimrange)) - - if leftNs: - print( - "\t".join( - str(x) - for x in (line.object, 0, 0, 0, "N", leftNs, "fragment", "yes", "") - ), - file=newagp, - ) - if trimend > trimstart: - print( - "\t".join( - str(x) - for x in ( - line.object, - 0, - 0, - 0, - line.component_type, - lid, - trimstart, - trimend, - lo, - ) - ), - file=newagp, - ) - if rightNs and rightNs != size: - print( - "\t".join( - str(x) - for x in (line.object, 0, 0, 0, "N", rightNs, "fragment", "yes", "") - ), - file=newagp, - ) - else: - print(line, file=newagp) - - -def main(): - - actions = ( - ("summary", "print out a table of scaffold statistics"), - ("stats", "print out a report for length of gaps and components"), - ("phase", "given genbank file, get the phase for the HTG BAC record"), - ("bed", "print out the tiling paths in bed/gff3 format"), - ("frombed", "generate AGP file based on bed file"), - ("fromcsv", "generate AGP file based on simple csv file"), - ( - "extendbed", - "extend the components to fill the component range and output bed/gff3 format file", - ), - ("gaps", "print out the distribution of gap sizes"), - ("tpf", "print out a list of accessions, aka Tiling Path File"), - ("cut", "cut at the boundaries of given ranges"), - ("mask", "mask given ranges in components to gaps"), - ("swap", "swap objects and components"), - ("format", "reformat AGP file"), - ("reindex", "assume accurate component order, reindex coordinates"), - ("tidy", "run trim=>reindex=>merge sequentially"), - ( - "build", - "given agp file and component fasta file, build " + "pseudomolecule fasta", - ), - ( - "validate", - "given agp file, component and pseudomolecule fasta, " - + "validate if the build is correct", - ), - ("infer", "infer where the components are in the genome"), - ("compress", "compress coordinates based on multiple AGP files"), - ) - - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def fromcsv(args): - """ - %prog fromcsv contigs.fasta map.csv map.agp - - Convert csv which contains list of scaffolds/contigs to AGP file. - """ - import csv - from jcvi.formats.sizes import Sizes - - p = OptionParser(fromcsv.__doc__) - p.add_argument("--evidence", default="map", help="Linkage evidence to add in AGP") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - contigsfasta, mapcsv, mapagp = args - reader = csv.reader(open(mapcsv)) - sizes = Sizes(contigsfasta).mapping - next(reader) # Header - fwagp = must_open(mapagp, "w") - o = OO() - for row in reader: - if len(row) == 2: - object, ctg = row - strand = "?" - elif len(row) == 3: - object, ctg, strand = row - size = sizes[ctg] - o.add(object, ctg, size, strand) - - o.write_AGP( - fwagp, gapsize=100, gaptype="scaffold", phases={}, evidence=opts.evidence - ) - - -def compress(args): - """ - %prog compress a.agp b.agp - - Convert coordinates based on multiple AGP files. Useful to simplify multiple - liftOvers to compress multiple chain files into a single chain file, in - upgrading locations of genomic features. - - Example: - `a.agp` could contain split scaffolds: - scaffold_0.1 1 600309 1 W scaffold_0 1 600309 + - - `b.agp` could contain mapping to chromosomes: - LG05 6435690 7035998 53 W scaffold_0.1 1 600309 + - - The final AGP we want is: - LG05 6435690 7035998 53 W scaffold_0 1 600309 + - """ - p = OptionParser(compress.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - aagpfile, bagpfile = args - # First AGP provides the mapping - store = {} - agp = AGP(aagpfile) - for a in agp: - if a.is_gap: - continue - # Ignore '?' in the mapping - if a.sign == 0: - a.sign = 1 - store[(a.object, a.object_beg, a.object_end)] = ( - a.component_id, - a.component_beg, - a.component_end, - a.sign, - ) - - # Second AGP forms the backbone - agp = AGP(bagpfile) - fw = must_open(opts.outfile, "w") - print("\n".join(agp.header), file=fw) - for a in agp: - if a.is_gap: - print(a, file=fw) - continue - component_id, component_beg, component_end, sign = store[ - (a.component_id, a.component_beg, a.component_end) - ] - - orientation = {1: "+", -1: "-", 0: "?"}.get(sign * a.sign) - atoms = ( - a.object, - a.object_beg, - a.object_end, - a.part_number, - a.component_type, - component_id, - component_beg, - component_end, - orientation, - ) - a = AGPLine("\t".join(str(x) for x in atoms)) - print(a, file=fw) - - -def map_one_scaffold_1way(scaffold, genome, orientation="+"): - if orientation == "-": - scaffold = scaffold.reverse_complement() - - scaffold = str(scaffold) - for obj_name, obj in genome.iteritems(): - obj_idx = obj.find(scaffold) - if obj_idx == -1: - continue - else: - return obj_name, obj_idx, orientation - return -1, -1, orientation # unmapped scaffolds - - -def map_one_scaffold(opts): - scaffold_name, scaffold, genome = opts - scaffold = scaffold.seq - obj_name, obj_idx, objo = map_one_scaffold_1way(scaffold, genome) - if obj_name == -1: - obj_name, obj_idx, objo = map_one_scaffold_1way( - scaffold, genome, orientation="-" - ) - if obj_name == -1: - return "" - - obj_end = obj_idx + len(scaffold) - return "\t".join( - str(x) for x in (obj_name, obj_idx, obj_end, scaffold_name, 1000, objo) - ) - - -def check_seen(r, seen): - from jcvi.utils.range import range_overlap - - for s in seen: - if range_overlap(r, s): - return True - return False - - -def infer(args): - """ - %prog infer scaffolds.fasta genome.fasta - - Infer where the components are in the genome. This function is rarely used, - but can be useful when distributor does not ship an AGP file. - """ - from jcvi.apps.grid import WriteJobs - from jcvi.formats.bed import sort - - p = OptionParser(infer.__doc__) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - scaffoldsf, genomef = args - inferbed = "infer-components.bed" - if need_update((scaffoldsf, genomef), inferbed): - scaffolds = Fasta(scaffoldsf, lazy=True) - genome = Fasta(genomef) - genome = genome.tostring() - args = [ - (scaffold_name, scaffold, genome) - for scaffold_name, scaffold in scaffolds.iteritems_ordered() - ] - - pool = WriteJobs(map_one_scaffold, args, inferbed, cpus=opts.cpus) - pool.run() - - sort([inferbed, "-i"]) - bed = Bed(inferbed) - inferagpbed = "infer.bed" - fw = open(inferagpbed, "w") - seen = [] - for b in bed: - r = (b.seqid, b.start, b.end) - if check_seen(r, seen): - continue - print( - "\t".join(str(x) for x in (b.accn, 0, b.span, b.seqid, b.score, b.strand)), - file=fw, - ) - seen.append(r) - fw.close() - - frombed([inferagpbed]) - - -def format(args): - """ - %prog format oldagpfile newagpfile - - Reformat AGP file. --switch will replace the ids in the AGP file. - """ - from jcvi.formats.base import DictFile - - p = OptionParser(format.__doc__) - p.add_argument("--switchcomponent", help="Switch component id based on") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - oldagpfile, newagpfile = args - switchcomponent = opts.switchcomponent - if switchcomponent: - switchcomponent = DictFile(switchcomponent) - - agp = AGP(oldagpfile) - fw = open(newagpfile, "w") - nconverts = 0 - for i, a in enumerate(agp): - if not a.is_gap and a.component_id in switchcomponent: - oldid = a.component_id - newid = switchcomponent[a.component_id] - a.component_id = newid - logger.debug("Covert {0} to {1} on line {2}".format(oldid, newid, i + 1)) - nconverts += 1 - print(a, file=fw) - - logger.debug("Total converted records: {0}".format(nconverts)) - - -def frombed(args): - """ - %prog frombed bedfile - - Generate AGP file based on bed file. The bed file must have at least 6 - columns. With the 4-th column indicating the new object. - """ - p = OptionParser(frombed.__doc__) - p.add_argument( - "--gapsize", - default=100, - type=int, - help="Insert gaps of size", - ) - p.add_argument("--evidence", default="map", help="Linkage evidence to add in AGP") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - gapsize = opts.gapsize - agpfile = bedfile.replace(".bed", ".agp") - fw = open(agpfile, "w") - - bed = Bed(bedfile, sorted=False) - for object, beds in groupby(bed, key=lambda x: x.accn): - beds = list(beds) - for i, b in enumerate(beds): - if gapsize and i != 0: - print( - "\t".join( - str(x) - for x in ( - object, - 0, - 0, - 0, - "U", - gapsize, - "scaffold", - "yes", - opts.evidence, - ) - ), - file=fw, - ) - - print( - "\t".join( - str(x) - for x in (object, 0, 0, 0, "W", b.seqid, b.start, b.end, b.strand) - ), - file=fw, - ) - - fw.close() - - # Reindex - return reindex([agpfile, "--inplace"]) - - -def swap(args): - """ - %prog swap agpfile - - Swap objects and components. Will add gap lines. This is often used in - conjuction with formats.chain.fromagp() to convert between different - coordinate systems. - """ - from jcvi.utils.range import range_interleave - - p = OptionParser(swap.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (agpfile,) = args - - agp = AGP(agpfile, nogaps=True, validate=False) - agp.sort(key=lambda x: (x.component_id, x.component_beg)) - - newagpfile = agpfile.rsplit(".", 1)[0] + ".swapped.agp" - fw = open(newagpfile, "w") - agp.transfer_header(fw) - for cid, aa in groupby(agp, key=(lambda x: x.component_id)): - aa = list(aa) - aranges = [(x.component_id, x.component_beg, x.component_end) for x in aa] - gaps = range_interleave(aranges) - for a, g in zip_longest(aa, gaps): - a.object, a.component_id = a.component_id, a.object - a.component_beg = a.object_beg - a.component_end = a.object_end - print(a, file=fw) - if not g: - continue - - aline = [cid, 0, 0, 0] - gseq, ga, gb = g - cspan = gb - ga + 1 - aline += ["N", cspan, "fragment", "yes"] - print("\t".join(str(x) for x in aline), file=fw) - - fw.close() - # Reindex - reindex([newagpfile, "--inplace"]) - - return newagpfile - - -def stats(args): - """ - %prog stats agpfile - - Print out a report for length of gaps and components. - """ - from jcvi.utils.table import tabulate - - p = OptionParser(stats.__doc__) - p.add_argument( - "--warn", - default=False, - action="store_true", - help="Warnings on small component spans", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (agpfile,) = args - - agp = AGP(agpfile) - gap_lengths = [] - component_lengths = [] - for a in agp: - span = a.object_span - if a.is_gap: - label = a.gap_type - gap_lengths.append((span, label)) - else: - label = "{0}:{1}-{2}".format( - a.component_id, a.component_beg, a.component_end - ) - component_lengths.append((span, label)) - if opts.warn and span < 50: - logger.error("component span too small ({0}):\n{1}".format(span, a)) - - table = dict() - for label, lengths in zip(("Gaps", "Components"), (gap_lengths, component_lengths)): - - if not lengths: - table[(label, "Min")] = table[(label, "Max")] = table[(label, "Sum")] = ( - "n.a." - ) - continue - - table[(label, "Min")] = "{0} ({1})".format(*min(lengths)) - table[(label, "Max")] = "{0} ({1})".format(*max(lengths)) - table[(label, "Sum")] = sum(x[0] for x in lengths) - - print(tabulate(table), file=sys.stderr) - - -def cut(args): - """ - %prog cut agpfile bedfile - - Cut at the boundaries of the ranges in the bedfile. - """ - p = OptionParser(cut.__doc__) - p.add_argument("--sep", default=".", help="Separator for splits") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - agpfile, bedfile = args - sep = opts.sep - - agp = AGP(agpfile) - bed = Bed(bedfile) - simple_agp = agp.order - newagpfile = agpfile.replace(".agp", ".cut.agp") - fw = open(newagpfile, "w") - - agp_fixes = defaultdict(list) - for component, intervals in bed.sub_beds(): - i, a = simple_agp[component] - object = a.object - component_span = a.component_span - orientation = a.orientation - - assert a.component_beg, a.component_end - cuts = set() - for i in intervals: - start, end = i.start, i.end - end -= 1 - - assert start <= end - cuts.add(start) - cuts.add(end) - - cuts.add(0) - cuts.add(component_span) - cuts = list(sorted(cuts)) - - sum_of_spans = 0 - for i, (a, b) in enumerate(pairwise(cuts)): - oid = object + "{0}{1}".format(sep, i + 1) - aline = [oid, 0, 0, 0] - cspan = b - a - aline += ["D", component, a + 1, b, orientation] - sum_of_spans += cspan - - aline = "\t".join(str(x) for x in aline) - agp_fixes[component].append(aline) - - assert component_span == sum_of_spans - - # Finally write the masked agp - for a in agp: - if not a.is_gap and a.component_id in agp_fixes: - print("\n".join(agp_fixes[a.component_id]), file=fw) - else: - print(a, file=fw) - - fw.close() - # Reindex - reindex([newagpfile, "--inplace"]) - - return newagpfile - - -def mask(args): - """ - %prog mask agpfile bedfile - - Mask given ranges in components to gaps. When the bedfile contains a single - base pair, this position can be a point of split and no base is lost - (--splitsingle). - """ - p = OptionParser(mask.__doc__) - p.add_argument( - "--splitobject", - default=False, - action="store_true", - help="Create new names for object", - ) - p.add_argument( - "--splitcomponent", - default=False, - action="store_true", - help="Create new names for component", - ) - p.add_argument( - "--splitsingle", - default=False, - action="store_true", - help="Do not remove base on single point", - ) - p.add_argument( - "--gaptype", - default="scaffold", - help="Masked region has gap type of", - ) - p.add_argument( - "--noretain", - default=False, - action="store_true", - help="Do not retain old names for non-split objects", - ) - p.add_argument("--sep", default=".", help="Separator for splits") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(p.print_help()) - - agpfile, bedfile = args - gaptype = opts.gaptype - splitobject = opts.splitobject - splitcomponent = opts.splitcomponent - sep = opts.sep - - assert not ( - splitobject and splitcomponent - ), "Options --splitobject and --splitcomponent conflict" - - agp = AGP(agpfile) - bed = Bed(bedfile) - simple_agp = agp.order - # agp lines to replace original ones, keyed by the component - agp_fixes = defaultdict(list) - - newagpfile = agpfile.replace(".agp", ".masked.agp") - fw = open(newagpfile, "w") - - if splitcomponent: - componentindex = defaultdict(int) - - for component, intervals in bed.sub_beds(): - i, a = simple_agp[component] - object = a.object - orientation = a.orientation - - assert a.component_beg, a.component_end - arange = a.component_beg, a.component_end - - # Make sure `ivs` contain DISJOINT ranges, and located within `arange` - ivs = [] - points = set() - for i in intervals: - start, end = i.start, i.end - if opts.splitsingle: - points.add(start) - iv = range_intersect(arange, (start, end)) - if iv is not None: - ivs.append(iv) - - # Sort the ends of `ivs` as well as the arange - arange = a.component_beg - 1, a.component_end + 1 - endpoints = sorted(flatten(ivs + [arange])) - # reverse if component on negative strand - if orientation == "-": - endpoints.reverse() - - sum_of_spans = 0 - # assign complements as sequence components - for i, (a, b) in enumerate(pairwise(endpoints)): - if orientation == "-": - a, b = b, a - if orientation not in ("+", "-"): - orientation = "+" - - oid = object + "{0}{1}".format(sep, i // 2 + 1) if splitobject else object - aline = [oid, 0, 0, 0] - if i % 2 == 0: - cspan = b - a - 1 - if splitcomponent: - cid = component + "{0}{1}".format( - sep, componentindex[component] + 1 - ) - componentindex[component] += 1 - aline += ["W", cid, 1, cspan, orientation] - else: - end = b if (opts.splitsingle and b in points) else b - 1 - aline += ["W", component, a + 1, end, orientation] - is_gap = False - else: - cspan = b - a + 1 - aline += ["N", cspan, gaptype, "yes", "paired-ends"] - is_gap = True - if cspan <= 0: - continue - - sum_of_spans += cspan - aline = "\t".join(str(x) for x in aline) - if not (splitobject and is_gap): - agp_fixes[component].append(aline) - - retain = not opts.noretain - # Finally write the masked agp - for a in agp: - if a.is_gap: - print(a, file=fw) - elif a.component_id in agp_fixes: - print("\n".join(agp_fixes[a.component_id]), file=fw) - else: - if not retain: - if splitobject: - a.object += sep + "0" - elif splitcomponent: - a.component_id += sep + "0" - print(a, file=fw) - - fw.close() - - # Reindex - reindex([newagpfile, "--inplace"]) - - return newagpfile - - -def reindex(args): - """ - %prog agpfile - - assume the component line order is correct, modify coordinates, this is - necessary mostly due to manual edits (insert/delete) that disrupts - the target coordinates. - """ - p = OptionParser(reindex.__doc__) - p.add_argument( - "--nogaps", - default=False, - action="store_true", - help="Remove all gap lines", - ) - p.add_argument( - "--inplace", - default=False, - action="store_true", - help="Replace input file", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (agpfile,) = args - inplace = opts.inplace - agp = AGP(agpfile, validate=False) - pf = agpfile.rsplit(".", 1)[0] - newagpfile = pf + ".reindexed.agp" - - fw = open(newagpfile, "w") - agp.transfer_header(fw) - for chr, chr_agp in groupby(agp, lambda x: x.object): - chr_agp = list(chr_agp) - object_beg = 1 - for i, b in enumerate(chr_agp): - b.object_beg = object_beg - b.part_number = i + 1 - if opts.nogaps and b.is_gap: - continue - - if b.is_gap: - b.object_end = object_beg + b.gap_length - 1 - else: - b.object_end = object_beg + b.component_span - 1 - - object_beg = b.object_end + 1 - - print(str(b), file=fw) - - # Last step: validate the new agpfile - fw.close() - AGP(newagpfile, validate=True) - - if inplace: - shutil.move(newagpfile, agpfile) - logger.debug("Rename file `{0}` to `{1}`".format(newagpfile, agpfile)) - newagpfile = agpfile - - return newagpfile - - -def summary(args): - """ - %prog summary agpfile - - print a table of scaffold statistics, number of BACs, no of scaffolds, - scaffold N50, scaffold L50, actual sequence, PSMOL NNNs, PSMOL-length, % of - PSMOL sequenced. - """ - from jcvi.utils.table import write_csv - - p = OptionParser(summary.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (agpfile,) = args - header = ( - "Chromosome #_Distinct #_Components #_Scaffolds " - "Scaff_N50 Scaff_L50 Length".split() - ) - - agp = AGP(agpfile) - data = list(agp.summary_all()) - write_csv(header, data, sep=" ") - - -chr_pat = re.compile(r"chromosome (\d)", re.I) -clone_pat = re.compile(r"clone ([^, ]*\d)[ ,]", re.I) - - -def get_clone(rec): - """ - >>> get_clone("Medicago truncatula chromosome 2 clone mth2-48e18") - ('2', 'mth2-48e18') - """ - s = rec.description - chr = re.search(chr_pat, s) - clone = re.search(clone_pat, s) - chr = chr.group(1) if chr else "" - clone = clone.group(1) if clone else "" - - return chr, clone - - -def get_phase(rec): - keywords = rec.annotations["keywords"] - description = rec.description.upper() - - if "HTGS_PHASE1" in keywords: - phase = 1 - elif "HTGS_PHASE2" in keywords: - phase = 2 - elif len(keywords) == 1 and "HTG" in keywords: - phase = 3 - elif "PLN" in keywords: # EMBL BACs - if "DRAFT" in description: - if "UNORDERED" in description: - phase = 1 - else: - phase = 2 - else: - assert "COMPLETE" in description, description - phase = 3 - else: - phase = 3 - - return phase, keywords - - -def phase(args): - """ - %prog phase genbankfiles - - Input has to be gb file. Search the `KEYWORDS` section to look for PHASE. - Also look for "chromosome" and "clone" in the definition line. - """ - p = OptionParser(phase.__doc__) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - fw = must_open(opts.outfile, "w") - for gbfile in args: - for rec in SeqIO.parse(gbfile, "gb"): - bac_phase, keywords = get_phase(rec) - chr, clone = get_clone(rec) - keyword_field = ";".join(keywords) - print( - "\t".join((rec.id, str(bac_phase), keyword_field, chr, clone)), file=fw - ) - - -def tpf(args): - """ - %prog tpf agpfile - - Print out a list of ids, one per line. Also known as the Tiling Path. - - AC225490.9 chr6 - - Can optionally output scaffold gaps. - """ - p = OptionParser(tpf.__doc__) - p.add_argument( - "--noversion", - default=False, - action="store_true", - help="Remove trailing accession versions", - ) - p.add_argument( - "--gaps", - default=False, - action="store_true", - help="Include gaps in the output", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (agpfile,) = args - agp = AGP(agpfile) - for a in agp: - object = a.object - if a.is_gap: - if opts.gaps and a.isCloneGap: - print("\t".join((a.gap_type, object, "na"))) - continue - - component_id = a.component_id - orientation = a.orientation - - if opts.noversion: - component_id = component_id.rsplit(".", 1)[0] - - print("\t".join((component_id, object, orientation))) - - -def bed(args): - """ - %prog bed agpfile - - print out the tiling paths in bed/gff3 format - """ - from jcvi.formats.obo import validate_term - - p = OptionParser(bed.__doc__) - p.add_argument( - "--gaps", - default=False, - action="store_true", - help="Only print bed lines for gaps", - ) - p.add_argument( - "--nogaps", - default=False, - action="store_true", - help="Do not print bed lines for gaps", - ) - p.add_argument( - "--bed12", - default=False, - action="store_true", - help="Produce bed12 formatted output", - ) - p.add_argument( - "--component", - default=False, - action="store_true", - help="Generate bed file for components", - ) - p.set_outfile() - g1 = p.add_argument_group( - "GFF specific parameters", - "Note: If not specified, output will be in `bed` format", - ) - g1.add_argument( - "--gff", - default=False, - action="store_true", - help="Produce gff3 formatted output. By default, ignores AGP gap lines", - ) - g1.add_argument("--source", default="MGSC", help="Specify a gff3 source") - g1.add_argument( - "--feature", - default="golden_path_fragment", - help="Specify a gff3 feature type", - ) - p.set_SO_opts() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - if opts.component: - opts.nogaps = True - - # If output format is gff3 and 'verifySO' option is invoked, validate the SO term - if opts.gff and opts.verifySO: - validate_term(opts.feature, method=opts.verifySO) - - (agpfile,) = args - agp = AGP(agpfile) - fw = must_open(opts.outfile, "w") - if opts.gff: - print("##gff-version 3", file=fw) - - for a in agp: - if opts.nogaps and a.is_gap: - continue - if opts.gaps and not a.is_gap: - continue - if opts.bed12: - print(a.bed12line, file=fw) - elif opts.gff: - print( - a.gffline(gff_source=opts.source, gff_feat_type=opts.feature), file=fw - ) - elif opts.component: - name = "{0}:{1}-{2}".format( - a.component_id, a.component_beg, a.component_end - ) - print( - "\t".join( - str(x) - for x in ( - a.component_id, - a.component_beg - 1, - a.component_end, - name, - a.component_type, - a.orientation, - ) - ), - file=fw, - ) - else: - print(a.bedline, file=fw) - fw.close() - - return fw.name - - -def extendbed(args): - """ - %prog extend agpfile componentfasta - - Extend the components to fill the component range. For example, a bed/gff3 file - that was converted from the agp will contain only the BAC sequence intervals - that are 'represented' - sometimes leaving the 5` and 3` out (those that - overlap with adjacent sequences. This script fill up those ranges, - potentially to make graphics for tiling path. - """ - from jcvi.formats.sizes import Sizes - - p = OptionParser(extendbed.__doc__) - p.add_argument( - "--nogaps", - default=False, - action="store_true", - help="Do not print bed lines for gaps", - ) - p.add_argument( - "--bed12", - default=False, - action="store_true", - help="Produce bed12 formatted output", - ) - p.add_argument( - "--gff", - default=False, - action="store_true", - help="Produce gff3 formatted output. By default, ignores " + " AGP gap lines.", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - # If output format is GFF3, ignore AGP gap lines. - if opts.gff: - opts.nogaps = True - - agpfile, fastafile = args - agp = AGP(agpfile) - fw = must_open(opts.outfile, "w") - if opts.gff: - print("##gff-version 3", file=fw) - - ranges = defaultdict(list) - thickCoords = [] # These are the coordinates before modify ranges - # Make the first pass to record all the component ranges - for a in agp: - thickCoords.append((a.object_beg, a.object_end)) - if a.is_gap: - continue - ranges[a.component_id].append(a) - - # Modify the ranges - sizes = Sizes(fastafile).mapping - for accn, rr in ranges.items(): - alen = sizes[accn] - - a = rr[0] - if a.orientation == "+": - hang = a.component_beg - 1 - else: - hang = alen - a.component_end - a.object_beg -= hang - - a = rr[-1] - if a.orientation == "+": - hang = alen - a.component_end - else: - hang = a.component_beg - 1 - a.object_end += hang - - for a, (ts, te) in zip(agp, thickCoords): - if opts.nogaps and a.is_gap: - continue - if opts.bed12: - line = a.bedline - a.object_beg, a.object_end = ts, te - line += "\t" + a.bedextra - print(line, file=fw) - elif opts.gff: - print(a.gffline(), file=fw) - else: - print(a.bedline, file=fw) - - -def gaps(args): - """ - %prog gaps agpfile - - Print out the distribution of gapsizes. Option --merge allows merging of - adjacent gaps which is used by tidy(). - """ - from jcvi.graphics.histogram import loghistogram - - p = OptionParser(gaps.__doc__) - p.add_argument( - "--merge", - dest="merge", - default=False, - action="store_true", - help="Merge adjacent gaps (to conform to AGP specification)", - ) - p.add_argument( - "--header", - default=False, - action="store_true", - help="Produce an AGP header", - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - merge = opts.merge - (agpfile,) = args - - if merge: - merged_agpfile = agpfile.replace(".agp", ".merged.agp") - fw = open(merged_agpfile, "w") - - agp = AGP(agpfile) - sizes = [] - data = [] # store merged AGPLine's - priorities = ("centromere", "telomere", "scaffold", "contig", "clone", "fragment") - - for is_gap, alines in groupby(agp, key=lambda x: (x.object, x.is_gap)): - alines = list(alines) - is_gap = is_gap[1] - if is_gap: - gap_size = sum(x.gap_length for x in alines) - gap_types = set(x.gap_type for x in alines) - for gtype in ("centromere", "telomere"): - if gtype in gap_types: - gap_size = gtype - - sizes.append(gap_size) - b = deepcopy(alines[0]) - b.object_beg = min(x.object_beg for x in alines) - b.object_end = max(x.object_end for x in alines) - b.gap_length = sum(x.gap_length for x in alines) - - assert b.gap_length == b.object_end - b.object_beg + 1 - b.component_type = "U" if b.gap_length == 100 else "N" - - gtypes = [x.gap_type for x in alines] - for gtype in priorities: - if gtype in gtypes: - b.gap_type = gtype - break - - linkages = [x.linkage for x in alines] - for linkage in ("no", "yes"): - if linkage in linkages: - b.linkage = linkage - break - - alines = [b] - - data.extend(alines) - - loghistogram(sizes) - - if opts.header: - AGP.print_header( - fw, - organism="Medicago truncatula", - taxid=3880, - source="J. Craig Venter Institute", - ) - - if merge: - for ob, bb in groupby(data, lambda x: x.object): - for i, b in enumerate(bb): - b.part_number = i + 1 - print(b, file=fw) - return merged_agpfile - - -def tidy(args): - """ - %prog tidy agpfile componentfasta - - Given an agp file, run through the following steps: - 1. Trim components with dangling N's - 2. Merge adjacent gaps - 3. Trim gaps at the end of an object - 4. Reindex the agp - - Final output is in `.tidy.agp`. - """ - p = OptionParser(tidy.__doc__) - p.add_argument( - "--nogaps", - default=False, - action="store_true", - help="Remove all gap lines", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(p.print_help()) - - agpfile, componentfasta = args - originalagpfile = agpfile - - # Step 1: Trim terminal Ns - tmpfasta = "tmp.fasta" - trimmed_agpfile = build( - [agpfile, componentfasta, tmpfasta, "--newagp", "--novalidate"] - ) - cleanup(tmpfasta) - agpfile = trimmed_agpfile - agpfile = reindex([agpfile, "--inplace"]) - - # Step 2: Merge adjacent gaps - merged_agpfile = gaps([agpfile, "--merge"]) - cleanup(agpfile) - - # Step 3: Trim gaps at the end of object - agpfile = merged_agpfile - agp = AGP(agpfile) - newagpfile = agpfile.replace(".agp", ".fixed.agp") - fw = open(newagpfile, "w") - for object, a in groupby(agp, key=lambda x: x.object): - a = list(a) - if a[0].is_gap: - g, a = a[0], a[1:] - logger.debug("Trim beginning Ns({0}) of {1}".format(g.gap_length, object)) - if a and a[-1].is_gap: - a, g = a[:-1], a[-1] - logger.debug("Trim trailing Ns({0}) of {1}".format(g.gap_length, object)) - print("\n".join(str(x) for x in a), file=fw) - fw.close() - cleanup(agpfile) - - # Step 4: Final reindex - agpfile = newagpfile - reindex_opts = [agpfile, "--inplace"] - if opts.nogaps: - reindex_opts += ["--nogaps"] - agpfile = reindex(reindex_opts) - - tidyagpfile = originalagpfile.replace(".agp", ".tidy.agp") - shutil.move(agpfile, tidyagpfile) - - logger.debug("File written to `%s`.", tidyagpfile) - return tidyagpfile - - -def build(args): - """ - %prog build agpfile componentfasta targetfasta - - Build targetfasta based on info from agpfile - """ - p = OptionParser(build.__doc__) - p.add_argument( - "--newagp", - dest="newagp", - default=False, - action="store_true", - help="Check components to trim dangling N's", - ) - p.add_argument( - "--novalidate", - dest="novalidate", - default=False, - action="store_true", - help="Don't validate the agpfile", - ) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - agpfile, componentfasta, targetfasta = args - validate = not opts.novalidate - - if opts.newagp: - assert agpfile.endswith(".agp") - newagpfile = agpfile.replace(".agp", ".trimmed.agp") - newagp = open(newagpfile, "w") - else: - newagpfile = None - newagp = None - - agp = AGP(agpfile, validate=validate, sorted=True) - agp.build_all(componentfasta=componentfasta, targetfasta=targetfasta, newagp=newagp) - logger.debug("Target fasta written to `%s`.", targetfasta) - - return newagpfile - - -def validate(args): - """ - %prog validate agpfile componentfasta targetfasta - - validate consistency between agpfile and targetfasta - """ - p = OptionParser(validate.__doc__) - - opts, args = p.parse_args(args) - if len(args) < 3: - sys.exit(not p.print_help()) - - agpfile, componentfasta, targetfasta = args - agp = AGP(agpfile) - build = Fasta(targetfasta) - bacs = Fasta(componentfasta, index=False) - - # go through this line by line - for aline in agp: - try: - build_seq = build.sequence( - dict(chr=aline.object, start=aline.object_beg, stop=aline.object_end) - ) - - if aline.is_gap: - assert build_seq.upper() == aline.gap_length * "N", ( - "gap mismatch: %s" % aline - ) - else: - bac_seq = bacs.sequence( - dict( - chr=aline.component_id, - start=aline.component_beg, - stop=aline.component_end, - strand=aline.orientation, - ) - ) - - assert ( - build_seq.upper() == bac_seq.upper() - ), f"sequence mismatch: {aline}" - - logger.debug( - "%s:%d-%d verified", aline.object, aline.object_beg, aline.object_end - ) - - except Exception as e: - logger.error(e) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/base.py b/jcvi/formats/base.py deleted file mode 100644 index f711b9ea..00000000 --- a/jcvi/formats/base.py +++ /dev/null @@ -1,1196 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -import fileinput -import math -import os -import os.path as op -import sys - -from collections import OrderedDict -from itertools import cycle, groupby, islice -from typing import IO, Union - - -from Bio import SeqIO -from ..apps.base import ( - OptionParser, - ActionDispatcher, - cleanup, - logger, - mkdir, - need_update, - popen, - sh, -) - - -FastaExt = ("fasta", "fa", "fna", "cds", "pep", "faa", "fsa", "seq", "nt", "aa") -FastqExt = ("fastq", "fq") - - -class BaseFile(object): - def __init__(self, filename): - self.filename = filename - if filename: - logger.debug("Load file `%s`", filename) - - -class LineFile(BaseFile, list): - """ - Generic file parser for line-based files - """ - - def __init__(self, filename, comment=None, load=False): - super().__init__(filename) - - if load: - fp = must_open(filename) - self.lines = [l.strip() for l in fp if l[0] != comment] - logger.debug("Load %d lines from `%s`", len(self.lines), filename) - - -class DictFile(BaseFile, OrderedDict): - """ - Generic file parser for multi-column files, keyed by a particular index. - """ - - def __init__( - self, - filename, - keypos=0, - valuepos=1, - delimiter=None, - strict=True, - keycast=None, - cast=None, - ): - BaseFile.__init__(self, filename) - OrderedDict.__init__(self) - self.keypos = keypos - - fp = must_open(filename) - ncols = (max(keypos, valuepos) if valuepos else keypos) + 1 - thiscols = 0 - for lineno, row in enumerate(fp): - row = row.rstrip() - atoms = row.split(delimiter) - atoms = [x.strip() for x in atoms] - thiscols = len(atoms) - if thiscols < ncols: - action = "Aborted" if strict else "Skipped" - - msg = "Must contain >= {0} columns. {1}.\n".format(ncols, action) - msg += " --> Line {0}: {1}".format(lineno + 1, row) - logger.error(msg) - if strict: - sys.exit(1) - else: - continue - - key = atoms[keypos] - value = atoms[valuepos] if (valuepos is not None) else atoms - if keycast: - key = keycast(key) - if cast: - value = cast(value) - self[key] = value - - assert thiscols, "File empty" - self.ncols = thiscols - logger.debug("Imported %d records from `%s`", len(self), filename) - - @classmethod - def num_columns(cls, filename, delimiter=None): - """Return the column number of the csv file. - - Args: - filename (str): Path to the file. - delimiter (str, optional): Separator of the csv file. Defaults to None. - - Returns: - int: Column number. - """ - fp = must_open(filename) - return max(len(row.split(delimiter)) for row in fp) - - -class SetFile(BaseFile, set): - def __init__(self, filename, column=-1, delimiter=None): - super().__init__(filename) - fp = open(filename) - for row in fp: - if not row.strip(): - continue - keys = [x.strip() for x in row.split(delimiter)] - if column >= 0: - keys = [keys[column]] - self.update(keys) - - -class FileMerger(object): - """ - Same as cat * > filename - """ - - def __init__(self, filelist, outfile): - self.filelist = filelist - self.outfile = outfile - self.ingz = filelist[0].endswith(".gz") - self.outgz = outfile.endswith(".gz") - - def merge(self, checkexists=False): - outfile = self.outfile - if checkexists and not need_update(self.filelist, outfile, warn=True): - return - - files = " ".join(self.filelist) - ingz, outgz = self.ingz, self.outgz - if ingz and outgz: # can merge gz files directly - cmd = "cat {}".format(files) - else: - cmd = "zcat" if self.ingz else "cat" - cmd += " " + files - sh(cmd, outfile=outfile) - - return outfile - - -class FileSplitter(object): - def __init__(self, filename, outputdir=None, format="fasta", mode="cycle"): - self.filename = filename - self.outputdir = outputdir - self.mode = mode - - format = format or self._guess_format(filename) - logger.debug("format is %s", format) - - if format in ("fasta", "fastq"): - self.klass = "seqio" - elif format == "clust": - self.klass = "clust" - else: - self.klass = "txt" - - self.format = format - mkdir(outputdir) - - def _open(self, filename): - if self.klass == "seqio": - handle = SeqIO.parse(open(filename), self.format) - elif self.klass == "clust": - from jcvi.apps.uclust import ClustFile - - handle = iter(ClustFile(filename)) - else: - handle = open(filename) - return handle - - @property - def num_records(self): - handle = self._open(self.filename) - return sum(1 for x in handle) - - def _guess_format(self, filename): - root, ext = op.splitext(filename) - ext = ext.strip(".") - - if ext in FastaExt: - format = "fasta" - elif ext in FastqExt: - format = "fastq" - else: - format = "txt" - return format - - def _batch_iterator(self, N=1): - """Returns N lists of records. - - This can be used on any iterator, for example to batch up - SeqRecord objects from Bio.SeqIO.parse(...), or to batch - Alignment objects from Bio.AlignIO.parse(...), or simply - lines from a file handle. - - This is a generator function, and it returns lists of the - entries from the supplied iterator. Each list will have - batch_size entries, although the final list may be shorter. - """ - batch_size = math.ceil(self.num_records / float(N)) - handle = self._open(self.filename) - while True: - batch = list(islice(handle, batch_size)) - if not batch: - break - yield batch - - @classmethod - def get_names(cls, filename, N): - root, ext = op.splitext(op.basename(filename)) - - names = [] - pad0 = len(str(int(N - 1))) - for i in range(N): - name = "{0}_{1:0{2}d}{3}".format(root, i, pad0, ext) - names.append(name) - - return names - - def write(self, fw, batch): - if self.klass == "seqio": - SeqIO.write(batch, fw, self.format) - elif self.klass == "clust": - for b in batch: - print(b, file=fw) - else: - for line in batch: - fw.write(line) - return len(batch) - - def split(self, N, force=False): - """ - There are two modes of splitting the records - - batch: splitting is sequentially to records/N chunks - - cycle: placing each record in the splitted files and cycles - - use `cycle` if the len of the record is not evenly distributed - """ - mode = self.mode - assert mode in ("batch", "cycle", "optimal") - logger.debug("set split mode=%s", mode) - - self.names = self.__class__.get_names(self.filename, N) - if self.outputdir: - self.names = [op.join(self.outputdir, x) for x in self.names] - - if not need_update(self.filename, self.names) and not force: - logger.error("file %s already existed, skip file splitting", self.names[0]) - return - - filehandles = [open(x, "w") for x in self.names] - - if mode == "batch": - for batch, fw in zip(self._batch_iterator(N), filehandles): - count = self.write(fw, batch) - logger.debug("write %d records to %s", count, fw.name) - - elif mode == "cycle": - handle = self._open(self.filename) - for record, fw in zip(handle, cycle(filehandles)): - count = self.write(fw, [record]) - - elif mode == "optimal": - """ - This mode is based on Longest Processing Time (LPT) algorithm: - - A simple, often-used algorithm is the LPT algorithm (Longest - Processing Time) which sorts the jobs by its processing time and - then assigns them to the machine with the earliest end time so far. - This algorithm achieves an upper bound of 4/3 - 1/(3m) OPT. - - Citation: - """ - endtime = [0] * N - handle = self._open(self.filename) - for record in handle: - mt, mi = min((x, i) for (i, x) in enumerate(endtime)) - fw = filehandles[mi] - count = self.write(fw, [record]) - endtime[mi] += len(record) - - for fw in filehandles: - fw.close() - - -def longest_unique_prefix(query, targets, remove_self=True): - """ - Find the longest unique prefix for filename, when compared against a list of - filenames. Useful to simplify file names in a pool of files. See usage in - formats.fasta.pool(). - """ - query = op.basename(query) - targets = [op.basename(x) for x in targets] - prefix_lengths = [len(op.commonprefix([query, name])) for name in targets] - if remove_self and len(query) in prefix_lengths: - prefix_lengths.remove(len(query)) - longest_length = max(prefix_lengths) - return query[: longest_length + 1] - - -def check_exists(filename, oappend=False): - """ - Avoid overwriting some files accidentally. - """ - from jcvi.utils.console import console - - if op.exists(filename): - if oappend: - return oappend - overwrite = ( - console.input("`{}` found, overwrite (Y/n)?".format(filename)) == "Y" - ) - else: - overwrite = True - - return overwrite - - -def timestamp(): - from datetime import datetime as dt - - return "{0}{1:02d}{2:02d}".format(dt.now().year, dt.now().month, dt.now().day) - - -def must_open( - filename: str, - mode: str = "r", - checkexists: bool = False, - skipcheck: bool = False, - oappend: bool = False, -) -> Union[IO, fileinput.FileInput]: - """ - Accepts filename and returns filehandle. - - Checks on multiple files, stdin/stdout/stderr, .gz or .bz2 file. - """ - if isinstance(filename, list): - assert "r" in mode - - if filename[0].endswith((".gz", ".bz2")): - filename = " ".join(filename) # allow opening multiple gz/bz2 files - else: - return fileinput.input(filename) - - if filename.startswith("s3://"): - from jcvi.utils.aws import pull_from_s3 - - filename = pull_from_s3(filename) - - if filename in ("-", "stdin"): - assert "r" in mode - fp = sys.stdin - - elif filename == "stdout": - assert "w" in mode - fp = sys.stdout - - elif filename == "stderr": - assert "w" in mode - fp = sys.stderr - - elif filename == "tmp" and mode == "w": - from tempfile import NamedTemporaryFile - - fp = NamedTemporaryFile(mode=mode, delete=False) - - elif filename.endswith(".gz"): - import gzip - - if "r" in mode: - fp = gzip.open(filename, mode + "t") - elif "w" in mode: - fp = gzip.open(filename, mode) - - elif filename.endswith(".bz2"): - if "r" in mode: - cmd = f"bzcat {filename}" - fp = popen(cmd, debug=False) - elif "w" in mode: - import bz2 - - fp = bz2.BZ2File(filename, mode) - - else: - if checkexists: - assert mode == "w" - overwrite = ( - (not op.exists(filename)) - if skipcheck - else check_exists(filename, oappend) - ) - if overwrite: - if oappend: - fp = open(filename, "a") - else: - fp = open(filename, "w") - else: - logger.debug("File `%s` already exists. Skipped.", filename) - return None - else: - fp = open(filename, mode) - - return fp - - -bash_shebang = "#!/bin/bash" -python_shebang = """#!/usr/bin/env python -# -*- coding: UTF-8 -*-""" - - -def write_file(filename, contents, meta=None, skipcheck=False, append=False, tee=False): - if not meta: - suffix = filename.rsplit(".", 1)[-1] - if suffix == "sh": - meta = "run script" - elif suffix == "py": - meta = "python script" - else: - meta = "file" - - meta_choices = ("file", "run script", "python script") - assert meta in meta_choices, "meta must be one of {0}".format( - "|".join(meta_choices) - ) - - contents = contents.strip() - shebang = "\n" - if "script" in meta: - if not append: - if meta == "run script": - shebang = bash_shebang - elif meta == "python script": - shebang = python_shebang - contents = "\n\n".join((shebang, contents)) - - fw = must_open(filename, "w", checkexists=True, skipcheck=skipcheck, oappend=append) - if fw: - print(contents, file=fw) - fw.close() - if tee: - print(contents, file=sys.stderr) - - fileop = "appended" if append else "written" - message = "{0} {1} to `{2}`.".format(meta, fileop, filename) - logger.debug(message.capitalize()) - if meta == "run script" and not append: - sh("chmod u+x {0}".format(filename)) - - -def read_until(handle, start): - # read each line until a certain start, then puts the start tag back - while 1: - pos = handle.tell() - line = handle.readline() - if not line: - break - if line.startswith(start): - handle.seek(pos) - return - - -def read_block(handle, signal): - """ - Useful for reading block-like file formats, for example FASTA or OBO file, - such file usually startswith some signal, and in-between the signals are a - record - """ - signal_len = len(signal) - it = ( - x[1] - for x in groupby(handle, key=lambda row: row.strip()[:signal_len] == signal) - ) - found_signal = False - for header in it: - header = list(header) - for h in header[:-1]: - h = h.strip() - if h[:signal_len] != signal: - continue - yield h, [] # Header only, no contents - header = header[-1].strip() - if header[:signal_len] != signal: - continue - found_signal = True - seq = list(s.strip() for s in next(it)) - yield header, seq - - if not found_signal: - handle.seek(0) - seq = list(s.strip() for s in handle) - yield None, seq - - -def is_number(s, cast=float): - """ - Check if a string is a number. Use cast=int to check if s is an integer. - """ - try: - cast(s) # for int, long and float - except ValueError: - return False - - return True - - -def get_number(s, cast=int): - """ - Try to get a number out of a string, and cast it. - """ - import string - - d = "".join(x for x in str(s) if x in string.digits) - return cast(d) if d else s - - -def flexible_cast(s): - if is_number(s, cast=int): - return int(s) - elif is_number(s, cast=float): - return float(s) - return s - - -def main(): - actions = ( - ("pairwise", "convert a list of IDs into all pairs"), - ("split", "split large file into N chunks"), - ("reorder", "reorder columns in tab-delimited files"), - ("flatten", "convert a list of IDs into one per line"), - ("unflatten", "convert lines to a list of IDs on single line"), - ("group", "group elements in a table based on key (groupby) column"), - ("setop", "set operations on files"), - ("join", "join tabular-like files based on common column"), - ("subset", "subset tabular-like files based on common column"), - ("truncate", "remove lines from end of file"), - ("append", "append a column with fixed value"), - ("seqids", "make a list of seqids for graphics.karyotype"), - ("mergecsv", "merge a set of tsv files"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def seqids(args): - """ - %prog seqids prefix start end - - Make a list of seqids for graphics.karyotype. For example: - - $ python -m jcvi.formats.base seqids chromosome_ 1 3 - chromosome_1,chromosome_2,chromosome_3 - $ python -m jcvi.formats.base seqids A 3 1 --pad0=2 - A03,A02,A01 - """ - p = OptionParser(seqids.__doc__) - p.add_argument("--pad0", default=0, help="How many zeros to pad") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - prefix, start, end = args - pad0 = opts.pad0 - start, end = int(start), int(end) - step = 1 if start <= end else -1 - - print( - ",".join( - [ - "{}{:0{}d}".format(prefix, x, pad0) - for x in range(start, end + step, step) - ] - ) - ) - - -def pairwise(args): - """ - %prog pairwise ids - - Convert a list of IDs into all pairs. - """ - from itertools import combinations - - p = OptionParser(pairwise.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (idsfile,) = args - ids = SetFile(idsfile) - ids = sorted(ids) - fw = open(idsfile + ".pairs", "w") - for a, b in combinations(ids, 2): - print("\t".join((a, b)), file=fw) - fw.close() - - -def append(args): - """ - %prog append csvfile [tag] - - Append a column with fixed value. If tag is missing then just append the - filename. - """ - p = OptionParser(append.__doc__) - p.set_sep() - p.set_outfile() - opts, args = p.parse_args(args) - - nargs = len(args) - if nargs not in (1, 2): - sys.exit(not p.print_help()) - - csvfile = args[0] - tag = args[1] if nargs == 2 else csvfile - fp = must_open(csvfile) - fw = must_open(opts.outfile, "w") - for row in fp: - row = row.rstrip("\r\n") - row = opts.sep.join((row, tag)) - print(row, file=fw) - - -def truncate(args): - """ - %prog truncate linecount filename - - Remove linecount lines from the end of the file in-place. Borrowed from: - - """ - p = OptionParser(truncate.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - number, filename = args - number = int(number) - count = 0 - - f = open(filename, "r+b") - f.seek(0, os.SEEK_END) - while f.tell() > 0: - f.seek(-1, os.SEEK_CUR) - char = f.read(1) - if char == "\n": - count += 1 - if count == number + 1: - f.truncate() - print("Removed {0} lines from end of file".format(number), file=sys.stderr) - return number - - f.seek(-1, os.SEEK_CUR) - - if count < number + 1: - print("No change: requested removal would leave empty file", file=sys.stderr) - return -1 - - -def flatten(args): - """ - %prog flatten filename > ids - - Convert a list of IDs (say, multiple IDs per line) and move them into one - per line. - - For example, convert this, to this: - A,B,C | A - 1 | B - a,4 | C - | 1 - | a - | 4 - - If multi-column file with multiple elements per column, zip then flatten like so: - A,B,C 2,10,gg | A,2 - 1,3 4 | B,10 - | C,gg - | 1,4 - | 3,na - """ - from itertools import zip_longest - - p = OptionParser(flatten.__doc__) - p.set_sep(sep=",") - p.add_argument( - "--zipflatten", - default=None, - dest="zipsep", - help="Specify if columns of the file should be zipped before" - + " flattening. If so, specify delimiter separating column elements", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (tabfile,) = args - zipsep = opts.zipsep - - fp = must_open(tabfile) - for row in fp: - if zipsep: - row = row.rstrip() - atoms = row.split(opts.sep) - frows = [] - for atom in atoms: - frows.append(atom.split(zipsep)) - print( - "\n".join( - [zipsep.join(x) for x in list(zip_longest(*frows, fillvalue="na"))] - ) - ) - else: - print(row.strip().replace(opts.sep, "\n")) - - -def unflatten(args): - """ - %prog unflatten idsfile > unflattened - - Given a list of ids, one per line, unflatten the list onto a single line with sep. - """ - p = OptionParser(unflatten.__doc__) - p.add_argument("--sep", default=",", help="Separator when joining ids") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (idsfile,) = args - ids = must_open(idsfile).read().split() - with must_open(opts.outfile, "w") as fw: - print(opts.sep.join(ids), file=fw) - - -def group(args): - """ - %prog group tabfile > tabfile.grouped - - Given a tab-delimited file, either group all elements within the file or - group the elements in the value column(s) based on the key (groupby) column - - For example, convert this | into this - --------------------------------------- - a 2 3 4 | a,2,3,4,5,6 - a 5 6 | b,7,8 - b 7 8 | c,9,10,11 - c 9 | - c 10 11 | - - If grouping by a particular column, - convert this | into this: - --------------------------------------------- - a 2 3 4 | a 2,5 3,6 4 - a 5 6 | b 7 8 - b 7 8 | c 9,10 11 - c 9 | - c 10 11 | - - By default, it uniqifies all the grouped elements - """ - from jcvi.utils.cbook import AutoVivification - from jcvi.utils.grouper import Grouper - - p = OptionParser(group.__doc__) - p.set_sep() - p.add_argument( - "--groupby", default=None, type=int, help="Default column to groupby" - ) - p.add_argument( - "--groupsep", default=",", help="Separator to join the grouped elements" - ) - p.add_argument( - "--nouniq", - default=False, - action="store_true", - help="Do not uniqify the grouped elements", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (tabfile,) = args - sep = opts.sep - groupby = opts.groupby - groupsep = opts.groupsep - - cols = [] - grouper = AutoVivification() if groupby is not None else Grouper() - fp = must_open(tabfile) - for row in fp: - row = row.rstrip() - atoms = row.split(sep) - if groupby is not None: - if len(cols) < len(atoms): - cols = [x for x in range(len(atoms))] - if groupby not in cols: - logger.error("groupby col index `%s` is out of range", groupby) - sys.exit() - - key = atoms[groupby] - for col in cols: - if col == groupby: - continue - if not grouper[key][col]: - grouper[key][col] = [] if opts.nouniq else set() - if col < len(atoms): - if groupsep in atoms[col]: - for atom in atoms[col].split(groupsep): - if opts.nouniq: - grouper[key][col].append(atom) - else: - grouper[key][col].add(atom) - else: - if opts.nouniq: - grouper[key][col].append(atoms[col]) - else: - grouper[key][col].add(atoms[col]) - else: - grouper.join(*atoms) - - for key in grouper: - if groupby is not None: - line = [] - for col in cols: - if col == groupby: - line.append(key) - elif col in grouper[key].keys(): - line.append(groupsep.join(grouper[key][col])) - else: - line.append("na") - print(sep.join(line)) - else: - print(groupsep.join(key)) - - -def reorder(args): - """ - %prog reorder tabfile 1,2,4,3 > newtabfile - - Reorder columns in tab-delimited files. The above syntax will print out a - new file with col-1,2,4,3 from the old file. - """ - import csv - - p = OptionParser(reorder.__doc__) - p.set_sep() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - tabfile, order = args - sep = opts.sep - order = [int(x) - 1 for x in order.split(",")] - reader = csv.reader(must_open(tabfile), delimiter=sep) - writer = csv.writer(sys.stdout, delimiter=sep) - for row in reader: - newrow = [row[x] for x in order] - writer.writerow(newrow) - - -def split(args): - """ - %prog split file outdir N - - Split file into N records. This allows splitting FASTA/FASTQ/TXT file - properly at boundary of records. Split is useful for parallelization - on input chunks. - - Option --mode is useful on how to break into chunks. - 1. chunk - chunk records sequentially, 1-100 in file 1, 101-200 in file 2, etc. - 2. cycle - chunk records in Round Robin fashion - 3. optimal - try to make split file of roughly similar sizes, using LPT - algorithm. This is the default. - """ - p = OptionParser(split.__doc__) - mode_choices = ("batch", "cycle", "optimal") - p.add_argument( - "--all", default=False, action="store_true", help="split all records" - ) - p.add_argument( - "--mode", - default="optimal", - choices=mode_choices, - help="Mode when splitting records", - ) - p.add_argument( - "--format", choices=("fasta", "fastq", "txt", "clust"), help="input file format" - ) - - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - filename, outdir, N = args - fs = FileSplitter(filename, outputdir=outdir, format=opts.format, mode=opts.mode) - - if opts.all: - logger.debug("option -all override N") - N = fs.num_records - else: - N = min(fs.num_records, int(N)) - assert N > 0, "N must be > 0" - - logger.debug("split file into %d chunks", N) - fs.split(N) - - return fs - - -def join(args): - """ - %prog join file1.txt(pivotfile) file2.txt .. - - Join tabular-like files based on common column. - --column specifies the column index to pivot on. - Use comma to separate multiple values if the pivot column is different - in each file. Maintain the order in the first file. - --sep specifies the column separators, default to tab. - Use comma to separate multiple values if the column separator is different - in each file. - """ - p = OptionParser(join.__doc__) - p.add_argument( - "--column", default="0", help="0-based column id, multiple values allowed" - ) - p.set_sep(multiple=True) - p.add_argument( - "--noheader", default=False, action="store_true", help="Do not print header" - ) - p.add_argument("--na", default="na", help="Value for unjoined data") - p.add_argument( - "--compact", - default=False, - action="store_true", - help="Do not repeat pivotal columns in output", - ) - p.add_argument( - "--keysep", - default=",", - help="specify separator joining multiple elements in the key column" - + " of the pivot file", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - nargs = len(args) - - keysep = opts.keysep - compact = opts.compact - - if len(args) < 2: - sys.exit(not p.print_help()) - - na = opts.na - c = opts.column - if "," in c: - cc = [int(x) for x in c.split(",")] - else: - cc = [int(c)] * nargs - - assert len(cc) == nargs, "Column index number != File number" - - s = opts.sep - if "," in s: - ss = [x for x in s.split(",")] - else: - ss = [s] * nargs - - assert len(ss) == nargs, "column separator number != File number" - - # Maintain the first file line order, and combine other files into it - pivotfile = args[0] - files = [ - DictFile(f, keypos=c, valuepos=None, delimiter=s) - for f, c, s in zip(args, cc, ss) - ] - otherfiles = files[1:] - # The header contains filenames - headers = [] - for i, x in enumerate(files): - ncols = x.ncols - if i and compact: - ncols -= 1 - headers += [op.basename(x.filename)] * ncols - header = "\t".join(headers) - - fp = must_open(pivotfile) - fw = must_open(opts.outfile, "w") - if not opts.noheader: - print(header, file=fw) - - for row in fp: - row = row.rstrip() - atoms = row.split(ss[0]) - newrow = atoms - key = atoms[cc[0]] - keys = key.split(keysep) if keysep in key else [key] - for d in otherfiles: - drows = list() - for key in keys: - krow = d.get(key, [na] * d.ncols) - if compact: - krow.pop(d.keypos) - drows.append(krow) - drow = [keysep.join(x) for x in list(zip(*drows))] - newrow += drow - print("\t".join(newrow), file=fw) - - -def subset(args): - """ - %prog subset file1.txt(pivotfile) file2.txt .. - - subset tabular-like file1 based on common column with file 2. - Normally file1 should have unique row entries. - If more than one file2 are provided, they must have same column separators. - Multiple file2's will be concatenated in the output. - - --column specifies the column index (0-based) to pivot on. - Use comma to separate multiple values if the pivot column is different - in each file. Maintain the order in the first file. - --sep specifies the column separators, default to tab. - Use comma to separate multiple values if the column separator is different - in each file. - """ - - p = OptionParser(subset.__doc__) - p.add_argument( - "--column", default="0", help="0-based column id, multiple values allowed" - ) - p.set_sep(multiple=True) - p.add_argument( - "--pivot", - default=1, - type=int, - help="1 for using order in file1, 2 for using order in \ - file2", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - nargs = len(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - c = opts.column - if "," in c: - cc = [int(x) for x in c.split(",")] - assert len(set(cc[1:])) == 1, "Multiple file2's must have same column index." - cc = cc[0:2] - else: - cc = [int(c)] * 2 - - s = opts.sep - if "," in s: - ss = [x for x in s.split(",")] - assert ( - len(set(cc[1:])) == 1 - ), "Multiple file2's must have same column separator." - ss = ss[0:2] - else: - ss = [s] * 2 - - if nargs > 2: - file2 = FileMerger(args[1:], outfile="concatenatedFile2").merge() - else: - file2 = args[1] - newargs = [args[0], file2] - - files = [ - DictFile(f, keypos=c, valuepos=None, delimiter=s) - for f, c, s in zip(newargs, cc, ss) - ] - - pivot = 0 if opts.pivot == 1 else 1 - fp = open(newargs[pivot]) - fw = must_open(opts.outfile, "w") - - for row in fp: - row = row.rstrip() - atoms = row.split(ss[pivot]) - key = atoms[cc[pivot]] - d = files[1 - pivot] - if key in d: - print(ss[0].join(files[0][key]), file=fw) - - if nargs > 2: - cleanup(file2) - - -def setop(args): - """ - %prog setop "fileA & fileB" > newfile - - Perform set operations, except on files. The files (fileA and fileB) contain - list of ids. The operator is one of the four: - - |: union (elements found in either file) - &: intersection (elements found in both) - -: difference (elements in fileA but not in fileB) - ^: symmetric difference (elementes found in either set but not both) - - Please quote the argument to avoid shell interpreting | and &. - """ - from natsort import natsorted - - p = OptionParser(setop.__doc__) - p.add_argument( - "--column", - default=0, - type=int, - help="The column to extract, 0-based, -1 to disable", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (statement,) = args - fa, op, fb = statement.split() - assert op in ("|", "&", "-", "^") - - column = opts.column - fa = SetFile(fa, column=column) - fb = SetFile(fb, column=column) - - if op == "|": - t = fa | fb - elif op == "&": - t = fa & fb - elif op == "-": - t = fa - fb - elif op == "^": - t = fa ^ fb - - for x in natsorted(t): - print(x) - - -def mergecsv(args): - """ - %prog mergecsv *.tsv - - Merge a set of tsv files. - """ - p = OptionParser(mergecsv.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - tsvfiles = args - outfile = opts.outfile - - cleanup(outfile) - - fw = must_open(opts.outfile, "w") - for i, tsvfile in enumerate(tsvfiles): - fp = open(tsvfile) - if i > 0: - next(fp) - for row in fp: - fw.write(row) - fw.close() - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/bed.py b/jcvi/formats/bed.py deleted file mode 100755 index ac656a58..00000000 --- a/jcvi/formats/bed.py +++ /dev/null @@ -1,2504 +0,0 @@ -""" -Classes to handle the .bed files -""" - -import math -import os -import os.path as op -import shutil -import sys - -from collections import defaultdict, OrderedDict -from itertools import groupby -from typing import Optional, Tuple - -import numpy as np - -from more_itertools import pairwise -from natsort import natsorted, natsort_key - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - cleanup, - logger, - need_update, - popen, - sh, -) -from ..utils.cbook import SummaryStats, percentage, thousands -from ..utils.grouper import Grouper -from ..utils.range import ( - Range, - range_chain, - range_distance, - range_intersect, - range_union, -) - -from .base import DictFile, LineFile, get_number, is_number, must_open -from .sizes import Sizes - - -class BedLine(object): - # the Bed format supports more columns. we only need - # the first 4, but keep the information in 'extra'. - __slots__ = ( - "seqid", - "start", - "end", - "accn", - "extra", - "score", - "strand", - "args", - "nargs", - ) - - def __init__(self, sline): - args = sline.strip().split("\t") - self.nargs = nargs = len(args) - self.seqid = args[0] - self.start = int(args[1]) + 1 - self.end = int(args[2]) - assert self.start <= self.end, "start={0} end={1}".format(self.start, self.end) - self.extra = self.accn = self.score = self.strand = None - - if nargs > 3: - self.accn = args[3] - if nargs > 4: - self.score = args[4] - if nargs > 5: - self.strand = args[5] - if nargs > 6: - self.extra = args[6:] - - self.args = args - - def __str__(self): - args = [self.seqid, self.start - 1, self.end] - if self.accn is not None: - args += [self.accn] - if self.score is not None: - args += [self.score] - if self.strand is not None: - args += [self.strand] - if self.extra is not None: - args += self.extra - - s = "\t".join(str(x) for x in args) - return s - - __repr__ = __str__ - - def __getitem__(self, key): - return getattr(self, key) - - @property - def span(self): - return self.end - self.start + 1 - - @property - def range(self): - strand = self.strand or "+" - return self.seqid, self.start, self.end, strand - - @property - def tag(self): - return "{0}:{1}-{2}".format(self.seqid, self.start, self.end) - - def reverse_complement(self, sizes): - size = sizes.get_size(self.seqid) - - start = size - self.end + 1 - end = size - self.start + 1 - self.start, self.end = start, end - assert self.start <= self.end, "start={0} end={1}".format(self.start, self.end) - - if self.strand: - strand = {"+": "-", "-": "+"}[self.strand] - - def gffline(self, type="match", source="default"): - score = ( - "." - if not self.score or (self.score and not is_number(self.score)) - else self.score - ) - strand = "." if not self.strand else self.strand - row = "\t".join( - ( - self.seqid, - source, - type, - str(self.start), - str(self.end), - score, - strand, - ".", - f"ID={self.accn}", - ) - ) - return row - - -class Bed(LineFile): - def __init__(self, filename=None, key=None, sorted=True, juncs=False, include=None): - super().__init__(filename) - - # the sorting key provides some flexibility in ordering the features - # for example, user might not like the lexico-order of seqid - self.nullkey = lambda x: (natsort_key(x.seqid), x.start, x.accn) - self.key = key or self.nullkey - - if not filename: - return - - for line in must_open(filename): - if ( - line.strip() == "" - or line[0] == "#" - or line.startswith("browser ") - or line.startswith("track name") - ): - continue - b = BedLine(line) - if include and b.accn not in include: - continue - self.append(b) - - if sorted: - self.sort(key=self.key) - - def add(self, row): - self.append(BedLine(row)) - - def print_to_file(self, filename="stdout", sorted=False): - if sorted: - self.sort(key=self.key) - - fw = must_open(filename, "w") - for b in self: - if b.start < 1: - logger.error("Start < 1. Reset start for `%s`.", b.accn) - b.start = 1 - print(b, file=fw) - fw.close() - - def sum(self, seqid=None, unique=True): - return bed_sum(self, seqid=seqid, unique=unique) - - @property - def seqids(self): - return natsorted(set(b.seqid for b in self)) - - @property - def accns(self): - return natsorted(set(b.accn for b in self)) - - @property - def order(self): - # get the gene order given a Bed object - return dict((f.accn, (i, f)) for (i, f) in enumerate(self)) - - @property - def order_in_chr(self): - # get the gene order on a particular seqid - res = {} - self.sort(key=self.nullkey) - for seqid, beds in groupby(self, key=lambda x: x.seqid): - for i, f in enumerate(beds): - res[f.accn] = (seqid, i, f) - return res - - @property - def bp_in_chr(self): - # get the bp position on a particular seqid - res = {} - self.sort(key=self.nullkey) - for seqid, beds in groupby(self, key=lambda x: x.seqid): - for i, f in enumerate(beds): - res[f.accn] = (seqid, (f.start + f.end) / 2, f) - return res - - @property - def max_bp_in_chr(self): - # Get the maximum bp position on a particular seqid - res = OrderedDict() - self.sort(key=self.nullkey) - for seqid, beds in groupby(self, key=lambda x: x.seqid): - res[seqid] = max(x.end for x in beds) - return res - - @property - def simple_bed(self): - return [(b.seqid, i) for (i, b) in enumerate(self)] - - @property - def links(self): - r = [] - for s, sb in self.sub_beds(): - for a, b in pairwise(sb): - r.append(((a.accn, a.strand), (b.accn, b.strand))) - return r - - def extract(self, seqid, start, end): - # get all features within certain range - for b in self: - if b.seqid != seqid: - continue - if b.start < start or b.end > end: - continue - yield b - - def sub_bed(self, seqid): - # get all the beds on one chromosome - for b in self: - if b.seqid == seqid: - yield b - - def sub_beds(self): - self.sort(key=self.nullkey) - # get all the beds on all chromosomes, emitting one at a time - for bs, sb in groupby(self, key=lambda x: x.seqid): - yield bs, list(sb) - - def get_breaks(self): - # get chromosome break positions - simple_bed = self.simple_bed - for seqid, ranks in groupby(simple_bed, key=lambda x: x[0]): - ranks = list(ranks) - # chromosome, extent of the chromosome - yield seqid, ranks[0][1], ranks[-1][1] - - -class BedpeLine(object): - def __init__(self, sline): - args = sline.strip().split("\t") - self.seqid1 = args[0] - self.start1 = int(args[1]) + 1 - self.end1 = int(args[2]) - self.seqid2 = args[3] - self.start2 = int(args[4]) + 1 - self.end2 = int(args[5]) - self.accn = args[6] - self.score = args[7] - self.strand1 = args[8] - self.strand2 = args[9] - self.isdup = False - - @property - def innerdist(self): - if self.seqid1 != self.seqid2: - return -1 - return abs(self.start2 - self.end1) - - @property - def outerdist(self): - if self.seqid1 != self.seqid2: - return -1 - return abs(self.end2 - self.start1) - - @property - def is_innie(self): - return (self.strand1, self.strand2) == ("+", "-") - - def rc(self): - self.strand1 = "+" if self.strand1 == "-" else "-" - self.strand2 = "+" if self.strand2 == "-" else "-" - - def _extend(self, rlen, size, start, end, strand): - if strand == "+": - end = start + rlen - 1 - if end > size: - end = size - start = end - rlen + 1 - else: - start = end - rlen + 1 - if start < 1: - start = 1 - end = start + rlen - 1 - return start, end, strand - - def extend(self, rlen, size): - self.start1, self.end1, self.strand1 = self._extend( - rlen, size, self.start1, self.end1, self.strand1 - ) - self.start2, self.end2, self.strand2 = self._extend( - rlen, size, self.start2, self.end2, self.strand2 - ) - - def __str__(self): - args = ( - self.seqid1, - self.start1 - 1, - self.end1, - self.seqid2, - self.start2 - 1, - self.end2, - self.accn, - self.score, - self.strand1, - self.strand2, - ) - return "\t".join(str(x) for x in args) - - @property - def bedline(self): - assert self.seqid1 == self.seqid2 - assert self.start1 <= self.end2 - args = (self.seqid1, self.start1 - 1, self.end2, self.accn) - return "\t".join(str(x) for x in args) - - -class BedEvaluate(object): - def __init__(self, TPbed, FPbed, FNbed, TNbed): - self.TP = Bed(TPbed).sum(unique=True) - self.FP = Bed(FPbed).sum(unique=True) - self.FN = Bed(FNbed).sum(unique=True) - self.TN = Bed(TNbed).sum(unique=True) - - def __str__(self): - from jcvi.utils.table import tabulate - - table = { - ("Prediction-True", "Reality-True"): self.TP, - ("Prediction-True", "Reality-False"): self.FP, - ("Prediction-False", "Reality-True"): self.FN, - ("Prediction-False", "Reality-False"): self.TN, - } - msg = str(tabulate(table)) - - msg += "\nSensitivity [TP / (TP + FN)]: {0:.1f} %\n".format( - self.sensitivity * 100 - ) - msg += "Specificity [TP / (TP + FP)]: {0:.1f} %\n".format( - self.specificity * 100 - ) - msg += "Accuracy [(TP + TN) / (TP + FP + FN + TN)]: {0:.1f} %".format( - self.accuracy * 100 - ) - return msg - - @property - def sensitivity(self): - if self.TP + self.FN == 0: - return 0 - return self.TP * 1.0 / (self.TP + self.FN) - - @property - def specificity(self): - if self.TP + self.FP == 0: - return 0 - return self.TP * 1.0 / (self.TP + self.FP) - - @property - def accuracy(self): - if self.TP + self.FP + self.FN + self.TN == 0: - return 0 - return (self.TP + self.TN) * 1.0 / (self.TP + self.FP + self.FN + self.TN) - - @property - def score(self): - return "|".join( - ( - "{0:.3f}".format(x) - for x in (self.sensitivity, self.specificity, self.accuracy) - ) - ) - - -class BedSummary(object): - def __init__(self, bed): - mspans = [(x.span, x.accn) for x in bed] - spans, accns = zip(*mspans) - self.mspans = mspans - self.stats = SummaryStats(spans) - self.nseqids = len(set(x.seqid for x in bed)) - self.nfeats = len(bed) - self.total_bases = bed_sum(bed, unique=False) - self.unique_bases = bed_sum(bed) - self.coverage = self.total_bases * 1.0 / self.unique_bases - - def report(self): - print("Total seqids: {0}".format(self.nseqids), file=sys.stderr) - print("Total ranges: {0}".format(self.nfeats), file=sys.stderr) - print( - "Total unique bases: {0} bp".format(thousands(self.unique_bases)), - file=sys.stderr, - ) - print( - "Total bases: {0} bp".format(thousands(self.total_bases)), file=sys.stderr - ) - print("Estimated coverage: {0:.1f}x".format(self.coverage), file=sys.stderr) - print(self.stats, file=sys.stderr) - maxspan, maxaccn = max(self.mspans) - minspan, minaccn = min(self.mspans) - print("Longest: {0} ({1})".format(maxaccn, maxspan), file=sys.stderr) - print("Shortest: {0} ({1})".format(minaccn, minspan), file=sys.stderr) - - def __str__(self): - return "\t".join(str(x) for x in (self.nfeats, self.unique_bases)) - - -def bed_sum(beds, seqid=None, unique=True): - if seqid: - ranges = [(x.seqid, x.start, x.end) for x in beds if x.seqid == seqid] - else: - ranges = [(x.seqid, x.start, x.end) for x in beds] - - unique_sum = range_union(ranges) - raw_sum = sum(x.span for x in beds) - return unique_sum if unique else raw_sum - - -def main(): - actions = ( - ("bedpe", "convert to bedpe format"), - ("bins", "bin bed lengths into each window"), - ("chain", "chain bed segments together"), - ("closest", "find closest BED feature"), - ("density", "calculates density of features per seqid"), - ("depth", "calculate average depth per feature using coverageBed"), - ("distance", "calculate distance between bed features"), - ("evaluate", "make truth table and calculate sensitivity and specificity"), - ("filter", "filter bedfile to retain records between size range"), - ("filterbedgraph", "filter bedgraph to extract unique regions"), - ("fix", "fix non-standard bed files"), - ("flanking", "get n flanking features for a given position"), - ("format", "reformat BED file"), - ("gaps", "define gaps in BED file using complementBed"), - ("index", "index bed file using tabix"), - ("juncs", "trim junctions.bed overhang to get intron, merge multiple beds"), - ("longest", "select longest feature within overlapping piles"), - ("mates", "print paired reads from bedfile"), - ("merge", "merge bed files"), - ("mergebydepth", "returns union of features beyond certain depth"), - ("pairs", "estimate insert size between paired reads from bedfile"), - ("pile", "find the ids that intersect"), - ("random", "extract a random subset of features"), - ("refine", "refine bed file using a second bed file"), - ("sample", "sample bed file and remove high-coverage regions"), - ("seqids", "print out all seqids on one line"), - ("sizes", "infer the sizes for each seqid"), - ("some", "get a subset of bed features given a list"), - ("sort", "sort bed file"), - ("summary", "summarize the lengths of the intervals"), - ("tiling", "compute the minimum tiling path"), - ("uniq", "remove overlapping features with higher scores"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def gaps(args): - """ - %prog gaps bedfile reference.fasta - - This is used to define gaps in BED file using complementBed. One use case is - to define gaps in a BED file that was derived from a pairwise BLAST, for - example between two genomes. The reference.fasta is the reference genome. - The bedfile contains 'covered' features by BLAST hits, while the output - bedfile will contain 'uncovered' (i.e. gap) features, in that case use - --missing to note if gap is missing in one or more seqids. - """ - from pybedtools import BedTool - - p = OptionParser(gaps.__doc__) - p.add_argument( - "--na_in", - help="Add '_na_in_xxx' to gap name, use comma to separate, " - + "e.g. --na_in=chr1,chr2 to note if gap is missing in chr1 or " - + "chr2, default is to not add anything. Note that if one of the " - + "missing seqids happens to be the seqid of the current feature, " - + "it will not be reported.", - ) - p.add_argument("--minsize", default=1000, type=int, help="Minimum gap size") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - inputbed, ref_fasta = args - ref_sizes = Sizes(ref_fasta).mapping - minsize = opts.minsize - fw = must_open(opts.outfile, "w") - na_in = set(opts.na_in.split(",")) if opts.na_in else set() - comp = BedTool(inputbed).complement(genome=ref_fasta, L=True, stream=True) - n_gaps = 0 - all_gaps = defaultdict(list) - for f in comp: - seqid = f[0] - start = f[1] - end = f[2] - size = int(end) - int(start) - if size < minsize: - continue - all_gaps[seqid].append(size) - gap_name = f"{seqid}_{start}_L{size}" - miss = "_".join(na_in - set([seqid])) - if miss: - gap_name += f"_na_in_{miss}" - print("\t".join((seqid, start, end, gap_name)), file=fw) - n_gaps += 1 - for seqid, gap_sizes in all_gaps.items(): - total_gap_size = sum(gap_sizes) - logger.debug( - "Total gaps in %s: %d, %s", - seqid, - len(gap_sizes), - percentage(total_gap_size, ref_sizes[seqid]), - ) - - -def closest(args): - """ - %prog closest input.bed features.bed - - Find the closest feature in `features.bed` to `input.bed`. - `features.bed` must be sorted using `jcvi.formats.bed sort`. - """ - from pybedtools import BedTool - - p = OptionParser(closest.__doc__) - p.add_argument("--maxdist", default=5000, help="Maximum distance") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - inputbed, featuresbed = args - maxdist = opts.maxdist - sort([inputbed, "-i"]) - inputs = BedTool(inputbed) - features = BedTool(featuresbed) - nearby = inputs.closest(features, d=True, t="first", stream=True) - accn_column = inputs.field_count() + features.field_count() - 3 - for f in nearby: - seqid = f[0] - start = f[1] - end = f[2] - accn = f[3] - feat = f[accn_column].split(":")[0] - dist = int(f[-1]) - if dist > maxdist: - feat = "." - print("\t".join((seqid, start, end, "{}:{}".format(accn, feat)))) - - -def format(args): - """ - %prog format input.bed - - Re-format BED file, e.g. switch sequence ids. - """ - p = OptionParser(format.__doc__) - p.add_argument("--chrprefix", help="Add prefix to seqid") - p.add_argument("--prefix", help="Add prefix to name column (4th)") - p.add_argument("--switch", help="Switch seqids based on two-column file") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - switch = DictFile(opts.switch, delimiter="\t") if opts.switch else None - prefix = opts.prefix - chrprefix = opts.chrprefix - bed = Bed(bedfile) - with must_open(opts.outfile, "w") as fw: - for b in bed: - if prefix: - b.accn = prefix + b.accn - if chrprefix: - b.seqid = chrprefix + b.seqid - if switch and b.seqid in switch: - b.seqid = switch[b.seqid] - print(b, file=fw) - - -def filterbedgraph(args): - """ - %prog filterbedgraph a.bedgraph 1 - - Filter the bedGraph, typically from the gem-mappability pipeline. Unique - regions are 1, two copies .5, etc. - """ - p = OptionParser(filterbedgraph.__doc__) - _, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedgraphfile, cutoff = args - c = float(cutoff) - fp = open(bedgraphfile) - pf = bedgraphfile.rsplit(".", 1)[0] - filteredbed = pf + ".filtered-{}.bed".format(cutoff) - fw = open(filteredbed, "w") - nfiltered = ntotal = 0 - for row in fp: - b = BedLine(row) - ntotal += 1 - if float(b.accn) >= c: - print(b, file=fw) - nfiltered += 1 - fw.close() - logger.debug( - "A total of %s intervals (score >= %.2f) written to `%s`", - percentage(nfiltered, ntotal), - cutoff, - filteredbed, - ) - - mergeBed(filteredbed, sorted=True, delim=None) - - -def tiling(args): - """ - %prog tiling bedfile - - Compute minimum tiling path using as few clones as possible. Implemented - with dynamic programming. Greedy algorithm may also work according a - stackoverflow source. - """ - p = OptionParser(tiling.__doc__) - p.add_argument( - "--overlap", - default=3000, - type=int, - help="Minimum amount of overlaps required", - ) - p.set_verbose() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - ov = opts.overlap - - bed = Bed(bedfile) - inf = len(bed) - selected = Bed() - for seqid, sbed in bed.sub_beds(): - g = Grouper() - current = sbed[0] - # Partition connected features - for a in sbed: - g.join(a) - # requires a real overlap - if a.start < current.end - ov: - g.join(a, current) - if a.end > current.end: - current = a - - # Process per partition - for gbed in g: - end = max(x.end for x in gbed) - gbed.sort(key=lambda x: (x.start, -x.end)) - entries = len(gbed) - counts = [inf] * entries - counts[0] = 1 - traceback = [-1] * entries - for i, a in enumerate(gbed): - for j in range(i + 1, entries): - b = gbed[j] - if b.start >= a.end - ov: - break - # Two ranges overlap! - if counts[i] + 1 < counts[j]: - counts[j] = counts[i] + 1 - traceback[j] = i - endi = [i for i, a in enumerate(gbed) if a.end == end] - last = min((traceback[i], i) for i in endi)[1] - chain = [] - while last != -1: - chain.append(last) - last = traceback[last] - chain = chain[::-1] - selected.extend([gbed[x] for x in chain]) - - if opts.verbose: - print(counts) - print(traceback) - print(chain) - print("\n".join(str(x) for x in gbed)) - print("*" * 30) - print("\n".join(str(gbed[x]) for x in chain)) - print() - - tilingbedfile = bedfile.rsplit(".", 1)[0] + ".tiling.bed" - selected.print_to_file(filename=tilingbedfile, sorted=True) - logger.debug( - "A total of %d tiling features written to `%s`", len(selected), tilingbedfile - ) - - -def chain(args): - """ - %prog chain bedfile - - Chain BED segments together. - """ - p = OptionParser(chain.__doc__) - p.add_argument("--dist", default=100000, help="Chaining distance") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - cmd = "sort -k4,4 -k1,1 -k2,2n -k3,3n {0} -o {0}".format(bedfile) - sh(cmd) - bed = Bed(bedfile, sorted=False) - newbed = Bed() - for accn, bb in groupby(bed, key=lambda x: x.accn): - bb = list(bb) - g = Grouper() - for a in bb: - g.join(a) - for a, b in pairwise(bb): - if a.seqid == b.seqid and b.start - a.end < opts.dist: - g.join(a, b) - data = [] - for p in g: - seqid = p[0].seqid - start = min(x.start for x in p) - end = max(x.end for x in p) - score = sum(x.span for x in p) - data.append((seqid, start - 1, end, accn, score)) - - d = max(data, key=lambda x: x[-1]) - newbed.append(BedLine("\t".join(str(x) for x in d))) - - newbed.print_to_file(opts.outfile, sorted=True) - - -def density(args): - """ - %prog density bedfile ref.fasta - - Calculates density of features per seqid. - """ - p = OptionParser(density.__doc__) - _, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, fastafile = args - bed = Bed(bedfile) - sizes = Sizes(fastafile).mapping - header = "seqid features size density_per_Mb".split() - print("\t".join(header)) - for seqid, bb in bed.sub_beds(): - nfeats = len(bb) - size = sizes[seqid] - ds = nfeats * 1e6 / size - print("\t".join(str(x) for x in (seqid, nfeats, size, "{0:.1f}".format(ds)))) - - -def sfa_to_fq(sfa, qvchar): - fq = sfa.rsplit(".", 1)[0] + ".fq" - fp = must_open(sfa) - fw = must_open(fq, "w") - total = 0 - for row in fp: - total += 1 - name, seq = row.split() - qual = len(seq) * qvchar - print("\n".join(("@" + name, seq, "+", qual)), file=fw) - logger.debug("A total of %d sequences written to `%s`.", total, fq) - return fq - - -def filter_bedpe(bedpe, filtered, ref, rc=False, rlen=None, minlen=2000, maxlen=8000): - tag = " after RC" if rc else "" - logger.debug( - "Filter criteria: innie%s, %d <= insertsize <= %d", tag, minlen, maxlen - ) - sizes = Sizes(ref).mapping - fp = must_open(bedpe) - fw = must_open(filtered, "w") - retained = total = 0 - for row in fp: - b = BedpeLine(row) - total += 1 - if rc: - b.rc() - if not b.is_innie: - continue - b.score = b.outerdist - if not minlen <= b.score <= maxlen: - continue - retained += 1 - if rlen: - b.extend(rlen, sizes[b.seqid1]) - print(b, file=fw) - logger.debug( - "A total of %d mates written to `%s`.", percentage(retained, total), filtered - ) - fw.close() - - -def rmdup_bedpe(filtered, rmdup, dupwiggle=10): - sortedfiltered = filtered + ".sorted" - if need_update(filtered, sortedfiltered): - sh("sort -k1,1 -k2,2n -i {0} -o {1}".format(filtered, sortedfiltered)) - - logger.debug("Rmdup criteria: wiggle <= %d", dupwiggle) - fp = must_open(sortedfiltered) - fw = must_open(rmdup, "w") - data = [BedpeLine(x) for x in fp] - retained = total = 0 - for _, ss in groupby(data, key=lambda x: x.seqid1): - ss = list(ss) - for i, a in enumerate(ss): - if a.isdup: - continue - for b in ss[i + 1 :]: - if b.start1 > a.start1 + dupwiggle: - break - if b.isdup: - continue - if ( - a.seqid2 == b.seqid2 - and a.start2 - dupwiggle <= b.start2 <= a.start2 + dupwiggle - ): - b.isdup = True - for a in ss: - total += 1 - if a.isdup: - continue - retained += 1 - print(a, file=fw) - logger.debug( - "A total of %s mates written to `%s`.", percentage(retained, total), rmdup - ) - fw.close() - - -def seqids(args): - """ - %prog seqids bedfile - - Print out all seqids on one line. Useful for graphics.karyotype. - """ - p = OptionParser(seqids.__doc__) - p.add_argument("--maxn", default=100, type=int, help="Maximum number of seqids") - p.add_argument("--prefix", help="Seqids must start with") - p.add_argument("--exclude", default="random", help="Seqids should not contain") - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - pf = opts.prefix - exclude = opts.exclude - bed = Bed(bedfile) - s = bed.seqids - if pf: - s = [x for x in s if x.startswith(pf)] - if exclude: - s = [x for x in s if exclude not in x] - s = s[: opts.maxn] - print(",".join(s)) - - -def juncs(args): - """ - %prog junctions junctions1.bed [junctions2.bed ...] - - Given a TopHat junctions.bed file, trim the read overhang to get intron span - - If more than one junction bed file is provided, uniq the junctions and - calculate cumulative (sum) junction support - """ - from tempfile import mkstemp - from pybedtools import BedTool - - p = OptionParser(juncs.__doc__) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - fh, trimbed = mkstemp(suffix=".bed") - fw = must_open(trimbed, "w") - for i, juncbed in enumerate(args): - bed = Bed(juncbed, juncs=True) - for b in bed: - ovh = [int(x) for x in b.extra[-2].split(",")] - b.start += ovh[0] - b.end -= ovh[1] - b.accn = "{0}-{1}".format(b.accn, i) - b.extra = None - print(b, file=fw) - fw.close() - - if len(args) > 1: - sh("sort -k1,1 -k2,2n {0} -o {0}".format(trimbed)) - - tbed = BedTool(trimbed) - grouptbed = tbed.groupby(g=[1, 2, 3, 6], c=5, ops=["sum"]) - - cmd = """awk -F $'\t' 'BEGIN { OFS = FS } { ID = sprintf("mJUNC%07d", NR); print $1,$2,$3,ID,$5,$4; }'""" - infile = grouptbed.fn - sh(cmd, infile=infile, outfile=opts.outfile) - else: - sort([trimbed, "-o", opts.outfile]) - - os.unlink(trimbed) - - -def random(args): - """ - %prog random bedfile number_of_features - - Extract a random subset of features. Number of features can be an integer - number, or a fractional number in which case a random fraction (for example - 0.1 = 10% of all features) will be extracted. - """ - from random import sample - from jcvi.formats.base import flexible_cast - - p = OptionParser(random.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, N = args - assert is_number(N) - - b = Bed(bedfile) - NN = flexible_cast(N) - if NN < 1: - NN = int(round(NN * len(b))) - - beds = sample(b, NN) - new_bed = Bed() - new_bed.extend(beds) - - outfile = bedfile.rsplit(".", 1)[0] + ".{0}.bed".format(N) - new_bed.print_to_file(outfile) - logger.debug("Write %d features to `%s`", NN, outfile) - - -def filter(args): - """ - %prog filter bedfile - - Filter the bedfile to retain records between certain size range. - """ - p = OptionParser(filter.__doc__) - p.add_argument("--minsize", default=0, type=int, help="Minimum feature length") - p.add_argument( - "--maxsize", default=1000000000, type=int, help="Minimum feature length" - ) - p.add_argument( - "--minaccn", - type=int, - help="Minimum value of accn, useful to filter based on coverage", - ) - p.add_argument("--minscore", type=int, help="Minimum score") - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - fp = must_open(bedfile) - fw = must_open(opts.outfile, "w") - minsize, maxsize = opts.minsize, opts.maxsize - minaccn = opts.minaccn - minscore = opts.minscore - total = [] - keep = [] - for row in fp: - try: - b = BedLine(row) - except IndexError: - print(row.strip(), file=fw) - continue - span = b.span - total.append(span) - if not minsize <= span <= maxsize: - continue - if minaccn and int(b.accn) < minaccn: - continue - if minscore and int(b.score) < minscore: - continue - print(b, file=fw) - keep.append(span) - - logger.debug("Stats: %s features kept.", percentage(len(keep), len(total))) - logger.debug("Stats: %s bases kept.", percentage(sum(keep), sum(total))) - - -def make_bedgraph(bedfile, fastafile): - sizesfile = Sizes(fastafile).filename - pf = bedfile.rsplit(".", 1)[0] - bedfile = sort([bedfile]) - bedgraph = pf + ".bedgraph" - if need_update(bedfile, bedgraph): - cmd = "genomeCoverageBed" - cmd += " -i {0} -g {1} -bga".format(bedfile, sizesfile) - sh(cmd, outfile=bedgraph) - - return bedgraph - - -def mergebydepth(args): - """ - %prog mergebydepth reads.bed genome.fasta - - Similar to mergeBed, but only returns regions beyond certain depth. - """ - p = OptionParser(mergebydepth.__doc__) - p.add_argument("--mindepth", default=3, type=int, help="Minimum depth required") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, fastafile = args - mindepth = opts.mindepth - bedgraph = make_bedgraph(bedfile, fastafile) - - bedgraphfiltered = bedgraph + ".d{0}".format(mindepth) - if need_update(bedgraph, bedgraphfiltered): - filter( - [ - bedgraph, - "--minaccn={0}".format(mindepth), - "--outfile={0}".format(bedgraphfiltered), - ] - ) - - merged = bedgraphfiltered + ".merge.fasta" - if need_update(bedgraphfiltered, merged): - mergeBed(bedgraphfiltered, sorted=True) - - -def depth(args): - """ - %prog depth reads.bed features.bed - - Calculate depth depth per feature using coverageBed. - """ - p = OptionParser(depth.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - readsbed, featsbed = args - fp = open(featsbed) - nargs = len(fp.readline().split("\t")) - keepcols = ",".join(str(x) for x in range(1, nargs + 1)) - cmd = "coverageBed -a {0} -b {1} -d".format(readsbed, featsbed) - cmd += " | groupBy -g {0} -c {1} -o mean".format(keepcols, nargs + 2) - sh(cmd, outfile=opts.outfile) - - -def remove_isoforms(ids): - """ - This is more or less a hack to remove the GMAP multiple mappings. Multiple - GMAP mappings can be seen given the names .mrna1, .mrna2, etc. - """ - key = lambda x: x.rsplit(".", 1)[0] - iso_number = lambda x: get_number(x.split(".")[-1]) - ids = sorted(ids, key=key) - newids = [] - for k, ii in groupby(ids, key=key): - min_i = min(list(ii), key=iso_number) - newids.append(min_i) - return newids - - -def longest(args): - """ - %prog longest bedfile fastafile - - Select longest feature within overlapping piles. - """ - from jcvi.formats.sizes import Sizes - - p = OptionParser(longest.__doc__) - p.add_argument("--maxsize", default=20000, type=int, help="Limit max size") - p.add_argument("--minsize", default=60, type=int, help="Limit min size") - p.add_argument( - "--precedence", default="Medtr", help="Accessions with prefix take precedence" - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, fastafile = args - maxsize = opts.maxsize - minsize = opts.minsize - prec = opts.precedence - mergedbed = mergeBed(bedfile, nms=True) - sizes = Sizes(fastafile).mapping - bed = Bed(mergedbed) - - pf = bedfile.rsplit(".", 1)[0] - ids = set() - for b in bed: - accns = b.accn.split(";") - prec_accns = [x for x in accns if x.startswith(prec)] - if prec_accns: - accns = prec_accns - accn_sizes = [(sizes.get(x, 0), x) for x in accns] - accn_sizes = [(size, x) for size, x in accn_sizes if size < maxsize] - if not accn_sizes: - continue - max_size, max_accn = max(accn_sizes) - if max_size < minsize: - continue - ids.add(max_accn) - - newids = remove_isoforms(ids) - logger.debug("Remove isoforms: before=%d after=%d", len(ids), len(newids)) - - longestidsfile = pf + ".longest.ids" - fw = open(longestidsfile, "w") - print("\n".join(newids), file=fw) - fw.close() - logger.debug("A total of %d records written to `%s`.", len(newids), longestidsfile) - - longestbedfile = pf + ".longest.bed" - some( - [ - bedfile, - longestidsfile, - "--outfile={0}".format(longestbedfile), - "--no_strip_names", - ] - ) - - -def merge(args): - """ - %prog merge bedfiles > newbedfile - - Concatenate bed files together. Performing seqid and name changes to avoid - conflicts in the new bed file. - """ - p = OptionParser(merge.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - bedfiles = args - fw = must_open(opts.outfile, "w") - for bedfile in bedfiles: - bed = Bed(bedfile) - pf = op.basename(bedfile).split(".")[0] - for b in bed: - b.seqid = "_".join((pf, b.seqid)) - print(b, file=fw) - - -def fix(args): - """ - %prog fix bedfile > newbedfile - - Fix non-standard bed files. One typical problem is start > end. - """ - p = OptionParser(fix.__doc__) - p.add_argument("--minspan", default=0, type=int, help="Enforce minimum span") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - minspan = opts.minspan - fp = open(bedfile) - fw = must_open(opts.outfile, "w") - nfixed = nfiltered = ntotal = 0 - for row in fp: - atoms = row.strip().split("\t") - assert len(atoms) >= 3, "Must be at least 3 columns" - seqid, start, end = atoms[:3] - start, end = int(start), int(end) - orientation = "+" - if start > end: - start, end = end, start - orientation = "-" - nfixed += 1 - - atoms[1:3] = [str(start), str(end)] - if len(atoms) > 6: - atoms[6] = orientation - line = "\t".join(atoms) - b = BedLine(line) - - if b.span >= minspan: - print(b, file=fw) - nfiltered += 1 - - ntotal += 1 - - if nfixed: - logger.debug("Total fixed: %s".format(percentage(nfixed, ntotal))) - if nfiltered: - logger.debug("Total filtered: %s".format(percentage(nfiltered, ntotal))) - - -def some(args): - """ - %prog some bedfile idsfile > newbedfile - - Retrieve a subset of bed features given a list of ids. - """ - from jcvi.formats.base import SetFile - from jcvi.utils.cbook import gene_name - - p = OptionParser(some.__doc__) - p.add_argument( - "-v", - dest="inverse", - default=False, - action="store_true", - help="Get the inverse, like grep -v", - ) - p.set_outfile() - p.set_stripnames() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, idsfile = args - inverse = opts.inverse - ostrip = opts.strip_names - fw = must_open(opts.outfile, "w") - - ids = SetFile(idsfile) - if ostrip: - ids = set(gene_name(x) for x in ids) - bed = Bed(bedfile) - ntotal = nkeep = 0 - for b in bed: - ntotal += 1 - keep = b.accn in ids - if inverse: - keep = not keep - - if keep: - nkeep += 1 - print(b, file=fw) - - fw.close() - logger.debug("Stats: %s features kept.".format(percentage(nkeep, ntotal))) - - -def uniq(args): - """ - %prog uniq bedfile - - Remove overlapping features with higher scores. - """ - from jcvi.formats.sizes import Sizes - - p = OptionParser(uniq.__doc__) - p.add_argument("--sizes", help="Use sequence length as score") - p.add_argument( - "--mode", default="span", choices=("span", "score"), help="Pile mode" - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - uniqbedfile = bedfile.split(".")[0] + ".uniq.bed" - bed = Bed(bedfile) - - if opts.sizes: - sizes = Sizes(opts.sizes).mapping - ranges = [ - Range(x.seqid, x.start, x.end, sizes[x.accn], i) for i, x in enumerate(bed) - ] - else: - if opts.mode == "span": - ranges = [ - Range(x.seqid, x.start, x.end, x.end - x.start + 1, i) - for i, x in enumerate(bed) - ] - else: - ranges = [ - Range(x.seqid, x.start, x.end, float(x.score), i) - for i, x in enumerate(bed) - ] - - selected, score = range_chain(ranges) - selected = [x.id for x in selected] - selected_ids = set(selected) - selected = [bed[x] for x in selected] - notselected = [x for i, x in enumerate(bed) if i not in selected_ids] - - newbed = Bed() - newbed.extend(selected) - newbed.print_to_file(uniqbedfile, sorted=True) - - if notselected: - leftoverfile = bedfile.split(".")[0] + ".leftover.bed" - leftoverbed = Bed() - leftoverbed.extend(notselected) - leftoverbed.print_to_file(leftoverfile, sorted=True) - - logger.debug("Imported: %d, Exported: %d", len(bed), len(newbed)) - - return uniqbedfile - - -def subtractbins(binfile1, binfile2): - from jcvi.graphics.landscape import BinFile - - abin = BinFile(binfile1) - bbin = BinFile(binfile2) - - assert len(abin) == len(bbin) - - fw = open(binfile1, "w") - - for a, b in zip(abin, bbin): - assert a.chr == b.chr - assert a.binlen == b.binlen - - a.subtract(b) - print(a, file=fw) - - fw.close() - - return binfile1 - - -def get_nbins(clen: int, shift: int) -> Tuple[int, int]: - """ - Get the number of bins for a given chromosome length and shift. - """ - nbins, last_bin = divmod(clen, shift) - if last_bin: - nbins += 1 - return nbins, last_bin - - -def bins(args): - """ - %prog bins bedfile fastafile - - Bin bed lengths into each consecutive window. Use --subtract to remove bases - from window, e.g. --subtract gaps.bed ignores the gap sequences. - """ - - p = OptionParser(bins.__doc__) - p.add_argument("--binsize", default=100000, type=int, help="Size of the bins") - p.add_argument("--subtract", help="Subtract bases from window") - p.add_argument( - "--mode", - default="span", - choices=("span", "count", "score"), - help="Accumulate feature based on", - ) - p.add_argument( - "--nomerge", default=False, action="store_true", help="Do not merge features" - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, fastafile = args - subtract = opts.subtract - mode = opts.mode - assert op.exists(bedfile), "File `{0}` not found".format(bedfile) - - binsize = opts.binsize - binfile = bedfile + ".{0}".format(binsize) - binfile += ".{0}.bins".format(mode) - - if not need_update(bedfile, binfile): - return binfile - - sz = Sizes(fastafile) - sizesfile = sz.filename - sizes = sz.mapping - fw = open(binfile, "w") - scores = "median" if mode == "score" else None - if not opts.nomerge: - bedfile = mergeBed(bedfile, nms=True, scores=scores) - if subtract: - subtractmerge = mergeBed(subtract) - subtract_complement = complementBed(subtractmerge, sizesfile) - bedfile = intersectBed(bedfile, subtract_complement) - - bedfile = sort([bedfile, "-i"]) - - bed = Bed(bedfile) - sbdict = dict(bed.sub_beds()) - for chr, chr_len in sorted(sizes.items()): - chr_len = sizes[chr] - subbeds = sbdict.get(chr, []) - nbins, last_bin = get_nbins(chr_len, binsize) - - a = np.zeros(nbins) # values - b = np.zeros(nbins, dtype=int) # bases - c = np.zeros(nbins, dtype=int) # count - b[:-1] = binsize - b[-1] = last_bin - - for bb in subbeds: - start, end = bb.start, bb.end - startbin = start // binsize - endbin = end // binsize - - assert startbin <= endbin - c[startbin : endbin + 1] += 1 - - if mode == "score": - a[startbin : endbin + 1] += float(bb.score) - - elif mode == "span": - if startbin == endbin: - a[startbin] += end - start + 1 - - if startbin < endbin: - firstsize = (startbin + 1) * binsize - start + 1 - lastsize = end - endbin * binsize - a[startbin] += firstsize - if startbin + 1 < endbin: - a[startbin + 1 : endbin] += binsize - a[endbin] += lastsize - - if mode == "count": - a = c - - for xa, xb in zip(a, b): - print("\t".join(str(x) for x in (chr, xa, xb)), file=fw) - - fw.close() - - if subtract: - subtractbinfile = bins([subtract, fastafile, "--binsize={0}".format(binsize)]) - binfile = subtractbins(binfile, subtractbinfile) - - return binfile - - -def pile(args): - """ - %prog pile abedfile bbedfile > piles - - Call intersectBed on two bedfiles. - """ - from jcvi.utils.grouper import Grouper - - p = OptionParser(pile.__doc__) - p.add_argument("--minOverlap", default=0, type=int, help="Minimum overlap required") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - abedfile, bbedfile = args - iw = intersectBed_wao(abedfile, bbedfile, minOverlap=opts.minOverlap) - groups = Grouper() - for a, b in iw: - groups.join(a.accn, b.accn) - - ngroups = 0 - for group in groups: - if len(group) > 1: - ngroups += 1 - print("|".join(group)) - - logger.debug("A total of %d piles (>= 2 members)", ngroups) - - -def index(args): - """ - %prog index bedfile - - Compress and index bedfile using `tabix`. Use --fasta to give a FASTA file - so that a bedgraph file can be generated and indexed. - """ - p = OptionParser(index.__doc__) - p.add_argument("--fasta", help="Generate bedgraph and index") - p.add_argument("--query", help="Chromosome location") - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - fastafile = opts.fasta - if fastafile: - bedfile = make_bedgraph(bedfile, fastafile) - - bedfile = sort([bedfile]) - - gzfile = bedfile + ".gz" - if need_update(bedfile, gzfile): - cmd = "bgzip {0}".format(bedfile) - sh(cmd) - - tbifile = gzfile + ".tbi" - if need_update(gzfile, tbifile): - cmd = "tabix -p bed {0}".format(gzfile) - sh(cmd) - - query = opts.query - if not query: - return - - cmd = "tabix {0} {1}".format(gzfile, query) - sh(cmd, outfile=opts.outfile) - - -def fastaFromBed(bedfile, fastafile, name=False, tab=False, stranded=False): - suffix = ".sfa" if tab else ".fasta" - outfile = op.basename(bedfile).rsplit(".", 1)[0] + suffix - cmd = "fastaFromBed -fi {0} -bed {1} -fo {2}".format(fastafile, bedfile, outfile) - if name: - cmd += " -name" - if tab: - cmd += " -tab" - if stranded: - cmd += " -s" - - if need_update([bedfile, fastafile], outfile): - sh(cmd, outfile=outfile) - - return outfile - - -def mergeBed( - bedfile: str, - d: int = 0, - sorted: bool = False, - nms: bool = False, - s: bool = False, - scores: Optional[str] = None, - delim: str = ";", - inplace: bool = False, -): - if not sorted: - bedfile = sort([bedfile, "-i"]) - cmd = "mergeBed -i {0}".format(bedfile) - if d: - cmd += " -d {0}".format(d) - if nms: - nargs = len(open(bedfile).readline().split()) - if nargs <= 3: - logger.debug("Only %d columns detected... set nms=True", nargs) - else: - cmd += " -c 4 -o collapse" - if s: - cmd += " -s" - if scores: - valid_opts = ( - "sum", - "min", - "max", - "mean", - "median", - "mode", - "antimode", - "collapse", - ) - if scores not in valid_opts: - scores = "mean" - cmd += " -scores {0}".format(scores) - - if nms and delim: - cmd += ' -delim "{0}"'.format(delim) - - pf = bedfile.rsplit(".", 1)[0] if bedfile.endswith(".bed") else bedfile - mergebedfile = op.basename(pf) + ".merge.bed" - - if need_update(bedfile, mergebedfile): - sh(cmd, outfile=mergebedfile) - - if inplace: - shutil.move(mergebedfile, bedfile) - return mergebedfile - - -def complementBed(bedfile, sizesfile): - cmd = "complementBed" - cmd += " -i {0} -g {1}".format(bedfile, sizesfile) - complementbedfile = "complement_" + op.basename(bedfile) - - if need_update([bedfile, sizesfile], complementbedfile): - sh(cmd, outfile=complementbedfile) - return complementbedfile - - -def intersectBed(bedfile1, bedfile2): - cmd = "intersectBed" - cmd += " -a {0} -b {1}".format(bedfile1, bedfile2) - suffix = ".intersect.bed" - - intersectbedfile = ( - ".".join( - (op.basename(bedfile1).split(".")[0], op.basename(bedfile2).split(".")[0]) - ) - + suffix - ) - - if need_update([bedfile1, bedfile2], intersectbedfile): - sh(cmd, outfile=intersectbedfile) - return intersectbedfile - - -def query_to_range(query, sizes): - # chr1:1-10000 => (chr1, 0, 10000) - if ":" in query: - a, bc = query.split(":", 1) - b, c = [int(x) for x in bc.split("-", 1)] - b -= 1 - else: - a = query - b, c = 0, sizes.mapping[a] - - return a, b, c - - -def evaluate(args): - """ - %prog evaluate prediction.bed reality.bed fastafile - - Make a truth table like: - True False --- Reality - True TP FP - False FN TN - |----Prediction - - Sn = TP / (all true in reality) = TP / (TP + FN) - Sp = TP / (all true in prediction) = TP / (TP + FP) - Ac = (TP + TN) / (TP + FP + FN + TN) - """ - from jcvi.formats.sizes import Sizes - - p = OptionParser(evaluate.__doc__) - p.add_argument("--query", help="Chromosome location") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - prediction, reality, fastafile = args - query = opts.query - prediction = mergeBed(prediction) - reality = mergeBed(reality) - sizes = Sizes(fastafile) - sizesfile = sizes.filename - - prediction_complement = complementBed(prediction, sizesfile) - reality_complement = complementBed(reality, sizesfile) - - TPbed = intersectBed(prediction, reality) - FPbed = intersectBed(prediction, reality_complement) - FNbed = intersectBed(prediction_complement, reality) - TNbed = intersectBed(prediction_complement, reality_complement) - beds = (TPbed, FPbed, FNbed, TNbed) - - if query: - subbeds = [] - rr = query_to_range(query, sizes) - ce = 'echo "{0}"'.format("\t".join(str(x) for x in rr)) - for b in beds: - subbed = ".".join((b, query)) - cmd = ce + " | intersectBed -a stdin -b {0}".format(b) - sh(cmd, outfile=subbed) - subbeds.append(subbed) - beds = subbeds - - be = BedEvaluate(*beds) - print(be, file=sys.stderr) - - if query: - cleanup(subbeds) - - return be - - -def intersectBed_wao(abedfile, bbedfile, minOverlap=0): - abed = Bed(abedfile) - bbed = Bed(bbedfile) - print("`{0}` has {1} features.".format(abedfile, len(abed)), file=sys.stderr) - print("`{0}` has {1} features.".format(bbedfile, len(bbed)), file=sys.stderr) - - cmd = "intersectBed -wao -a {0} -b {1}".format(abedfile, bbedfile) - acols = abed[0].nargs - bcols = bbed[0].nargs - fp = popen(cmd) - for row in fp: - atoms = row.split() - aline = "\t".join(atoms[:acols]) - bline = "\t".join(atoms[acols : acols + bcols]) - c = int(atoms[-1]) - if c < minOverlap: - continue - a = BedLine(aline) - try: - b = BedLine(bline) - except AssertionError: - b = None - - yield a, b - - -def refine(args): - """ - %prog refine bedfile1 bedfile2 refinedbed - - Refine bed file using a second bed file. The final bed is keeping all the - intervals in bedfile1, but refined by bedfile2 whenever they have - intersection. - """ - p = OptionParser(refine.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - abedfile, bbedfile, refinedbed = args - fw = open(refinedbed, "w") - intersected = refined = 0 - for a, b in intersectBed_wao(abedfile, bbedfile): - if b is None: - print(a, file=fw) - continue - - intersected += 1 - aspan_before = a.span - arange = (a.start, a.end) - brange = (b.start, b.end) - irange = range_intersect(arange, brange) - a.start, a.end = irange - aspan_after = a.span - if aspan_before > aspan_after: - refined += 1 - print(a, file=fw) - - fw.close() - print("Total intersected: {0}".format(intersected), file=sys.stderr) - print("Total refined: {0}".format(refined), file=sys.stderr) - summary([abedfile]) - summary([refinedbed]) - - -def distance(args): - """ - %prog distance bedfile - - Calculate distance between bed features. The output file is a list of - distances, which can be used to plot histogram, etc. - """ - p = OptionParser(distance.__doc__) - p.add_argument( - "--distmode", - default="ss", - choices=("ss", "ee"), - help="Distance mode between paired reads. ss is outer distance, " - "ee is inner distance", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - sortedbedfile = sort([bedfile]) - valid = total = 0 - fp = open(sortedbedfile) - for a, b in pairwise(fp): - a = BedLine(a) - b = BedLine(b) - ar = (a.seqid, a.start, a.end, "+") - br = (b.seqid, b.start, b.end, "+") - dist, oo = range_distance(ar, br, distmode=opts.distmode) - total += 1 - if dist > 0: - print(dist) - valid += 1 - - logger.debug("Total valid (> 0) distances: %s.", percentage(valid, total)) - - -def sample(args): - """ - %prog sample bedfile sizesfile - - Sample bed file and remove high-coverage regions. - - When option --targetsize is used, this program uses a differnent mode. It - first calculates the current total bases from all ranges and then compare to - targetsize, if more, then sample down as close to targetsize as possible. - - Selection via --raindrop has the effect of making coverage even. Selected - reads have the property that their end points are not within a certain - window from one another. One sweep goes from left to right, the other in - the reverse direction. - """ - import random - from jcvi.assembly.coverage import Coverage - - p = OptionParser(sample.__doc__) - p.add_argument( - "--raindrop", - default=0, - type=int, - help="Raindrop selection, ignores all other options", - ) - p.add_argument("--max", default=10, type=int, help="Max depth allowed") - p.add_argument( - "--targetsize", type=int, help="Sample bed file to get target base number" - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, sizesfile = args - pf = bedfile.rsplit(".", 1)[0] - raindrop = opts.raindrop - - # Raindrop method - if raindrop: - bed = Bed(bedfile) - forward = [] - for b in bed: - if not forward or abs(b.start - forward[-1].start) >= raindrop: - forward.append(b) - - reverse = [] - bed.sort(key=lambda x: -x.end) - for b in bed: - if not reverse or abs(b.end - reverse[-1].end) >= raindrop: - reverse.append(b) - - for tag, L in zip(("forward", "reverse"), (forward, reverse)): - logger.debug( - "Selected %d features in %s direction, span: %d", - len(L), - tag, - sum(x.span for x in L), - ) - - selected = Bed() - selected.extend(set(forward + reverse)) - selected.print_to_file(opts.outfile, sorted=True) - return - - targetsize = opts.targetsize - if targetsize: - bed = Bed(bedfile) - samplebed = pf + ".sample.bed" - fw = open(samplebed, "w") - nfeats = len(bed) - nbases = bed.sum(unique=False) - targetfeats = int(round(nfeats * targetsize / nbases)) - sub_bed = random.sample(bed, targetfeats) - for b in sub_bed: - print(b, file=fw) - - logger.debug("File written to `%s`.", samplebed) - return - - c = Coverage(bedfile, sizesfile) - coveragefile = c.filename - samplecoveragefile = pf + ".sample.coverage" - fw = open(samplecoveragefile, "w") - fp = open(coveragefile) - for row in fp: - seqid, start, end, cov = row.split() - cov = int(cov) - if cov <= opts.max: - fw.write(row) - fw.close() - - samplebedfile = pf + ".sample.bed" - cmd = "intersectBed -a {0} -b {1} -wa -u".format(bedfile, samplecoveragefile) - sh(cmd, outfile=samplebedfile) - logger.debug("Sampled bedfile written to `%s`.", samplebedfile) - - -def bedpe(args): - """ - %prog bedpe bedfile - - Convert to bedpe format. Use --span to write another bed file that contain - the span of the read pairs. - """ - from jcvi.assembly.coverage import bed_to_bedpe - - p = OptionParser(bedpe.__doc__) - p.add_argument( - "--span", default=False, action="store_true", help="Write span bed file" - ) - p.add_argument( - "--strand", default=False, action="store_true", help="Write the strand columns" - ) - p.add_argument("--mates", help="Check the library stats from .mates file") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - pf = bedfile.rsplit(".", 1)[0] - bedpefile = pf + ".bedpe" - bedspanfile = pf + ".spans.bed" if opts.span else None - bed_to_bedpe( - bedfile, - bedpefile, - pairsbedfile=bedspanfile, - matesfile=opts.mates, - strand=opts.strand, - ) - return bedpefile, bedspanfile - - -def sizes(args): - """ - %prog sizes bedfile - - Infer the sizes for each seqid. Useful before dot plots. - """ - p = OptionParser(sizes.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - assert op.exists(bedfile) - - sizesfile = bedfile.rsplit(".", 1)[0] + ".sizes" - - fw = must_open(sizesfile, "w", checkexists=True, skipcheck=True) - if fw: - b = Bed(bedfile) - for s, sbeds in b.sub_beds(): - print("{0}\t{1}".format(s, max(x.end for x in sbeds)), file=fw) - logger.debug("Sizes file written to `%s`.", sizesfile) - - return sizesfile - - -def analyze_dists(dists, cutoff=1000, alpha=0.1): - """ - The dists can show bimodal distribution if they come from a mate-pair - library. Assume bimodal distribution and then separate the two peaks. Based - on the percentage in each peak, we can decide if it is indeed one peak or - two peaks, and report the median respectively. - """ - peak0 = [d for d in dists if d < cutoff] - peak1 = [d for d in dists if d >= cutoff] - c0, c1 = len(peak0), len(peak1) - logger.debug("Component counts: %d %d", c0, c1) - if c0 == 0 or c1 == 0 or float(c1) / len(dists) < alpha: - logger.debug("Single peak identified (%d / %d < %.1f)", c1, len(dists), alpha) - return np.median(dists) - - peak0_median = np.median(peak0) - peak1_median = np.median(peak1) - logger.debug( - "Dual peaks identified: %dbp (%d), %dbp (%d) (selected)", - int(peak0_median), - c0, - int(peak1_median), - c1, - ) - - return peak1_median - - -def report_pairs( - data, - cutoff=0, - mateorientation=None, - pairsfile=None, - insertsfile=None, - rclip=1, - ascii=False, - bins=20, - distmode="ss", - mpcutoff=1000, -): - """ - This subroutine is used by the pairs function in blast.py and cas.py. - Reports number of fragments and pairs as well as linked pairs - """ - allowed_mateorientations = ("++", "--", "+-", "-+") - - if mateorientation: - assert mateorientation in allowed_mateorientations - - num_fragments, num_pairs = 0, 0 - - all_dist = [] - linked_dist = [] - # +- (forward-backward) is `innie`, -+ (backward-forward) is `outie` - orientations = defaultdict(int) - - # clip how many chars from end of the read name to get pair name - key = (lambda x: x.accn[:-rclip]) if rclip else (lambda x: x.accn) - data.sort(key=key) - - if pairsfile: - pairsfw = open(pairsfile, "w") - if insertsfile: - insertsfw = open(insertsfile, "w") - - for pe, lines in groupby(data, key=key): - lines = list(lines) - if len(lines) != 2: - num_fragments += len(lines) - continue - - num_pairs += 1 - a, b = lines - - asubject, astart, astop = a.seqid, a.start, a.end - bsubject, bstart, bstop = b.seqid, b.start, b.end - - aquery, bquery = a.accn, b.accn - astrand, bstrand = a.strand, b.strand - - dist, orientation = range_distance( - (asubject, astart, astop, astrand), - (bsubject, bstart, bstop, bstrand), - distmode=distmode, - ) - - if dist >= 0: - all_dist.append((dist, orientation, aquery, bquery)) - - # select only pairs with certain orientations - e.g. innies, outies, etc. - if mateorientation: - all_dist = [x for x in all_dist if x[1] == mateorientation] - - # try to infer cutoff as twice the median until convergence - if cutoff <= 0: - dists = np.array([x[0] for x in all_dist], dtype=int) - p0 = analyze_dists(dists, cutoff=mpcutoff) - cutoff = int(2 * p0) # initial estimate - cutoff = int(math.ceil(cutoff / bins)) * bins - logger.debug("Insert size cutoff set to %d, use '--cutoff' to override", cutoff) - - for dist, orientation, aquery, bquery in all_dist: - if dist > cutoff: - continue - if cutoff > 2 * mpcutoff and dist < mpcutoff: - continue - - linked_dist.append(dist) - if pairsfile: - print("{0}\t{1}\t{2}".format(aquery, bquery, dist), file=pairsfw) - orientations[orientation] += 1 - - print( - "{0} fragments, {1} pairs ({2} total)".format( - num_fragments, num_pairs, num_fragments + num_pairs * 2 - ), - file=sys.stderr, - ) - - s = SummaryStats(linked_dist, dtype=int) - num_links = s.size - - meandist, stdev = s.mean, s.sd - p0, p1, p2 = s.median, s.p1, s.p2 - - print( - "%d pairs (%.1f%%) are linked (cutoff=%d)" - % (num_links, num_links * 100.0 / num_pairs, cutoff), - file=sys.stderr, - ) - print( - "mean distance between mates: {0} +/- {1}".format(meandist, stdev), - file=sys.stderr, - ) - print("median distance between mates: {0}".format(p0), file=sys.stderr) - print("95% distance range: {0} - {1}".format(p1, p2), file=sys.stderr) - print("\nOrientations:", file=sys.stderr) - - orientation_summary = [] - for orientation, count in sorted(orientations.items()): - o = "{0}:{1}".format(orientation, percentage(count, num_links, mode=1)) - orientation_summary.append(o.split()[0]) - print(o, file=sys.stderr) - - if insertsfile: - from jcvi.graphics.histogram import histogram - - print("\n".join(str(x) for x in linked_dist), file=insertsfw) - insertsfw.close() - prefix = insertsfile.rsplit(".", 1)[0] - if prefix > 10: - prefix = prefix.split("-")[0] - osummary = " ".join(orientation_summary) - title = "{0} ({1}; median:{2} bp)".format(prefix, osummary, p0) - histogram( - insertsfile, - vmin=0, - vmax=cutoff, - bins=bins, - xlabel="Insertsize", - title=title, - ascii=ascii, - ) - cleanup(insertsfile) - - return s - - -def pairs(args): - """ - See __doc__ for OptionParser.set_pairs(). - """ - p = OptionParser(pairs.__doc__) - p.set_pairs() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - - basename = bedfile.split(".")[0] - insertsfile = ".".join((basename, "inserts")) - bedfile = sort([bedfile, "--accn"]) - - fp = open(bedfile) - data = [BedLine(row) for i, row in enumerate(fp) if i < opts.nrows] - - ascii = not opts.pdf - return ( - bedfile, - report_pairs( - data, - opts.cutoff, - opts.mateorientation, - pairsfile=opts.pairsfile, - insertsfile=insertsfile, - rclip=opts.rclip, - ascii=ascii, - bins=opts.bins, - distmode=opts.distmode, - ), - ) - - -def summary(args): - """ - %prog summary bedfile - - Sum the total lengths of the intervals. - """ - p = OptionParser(summary.__doc__) - p.add_argument( - "--sizes", default=False, action="store_true", help="Write .sizes file" - ) - p.add_argument( - "--all", - default=False, - action="store_true", - help="Write summary stats per seqid", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - bed = Bed(bedfile) - bs = BedSummary(bed) - if opts.sizes: - sizesfile = bedfile + ".sizes" - fw = open(sizesfile, "w") - for span, accn in bs.mspans: - print(span, file=fw) - fw.close() - logger.debug("Spans written to `%s`.", sizesfile) - return bs - - if not opts.all: - bs.report() - return bs - - for seqid, subbeds in bed.sub_beds(): - bs = BedSummary(subbeds) - print("\t".join((seqid, str(bs)))) - - -def sort(args): - """ - %prog sort bedfile - - Sort bed file to have ascending order of seqid, then start. It uses the - `sort` command. - """ - p = OptionParser(sort.__doc__) - p.add_argument( - "-i", - "--inplace", - dest="inplace", - default=False, - action="store_true", - help="Sort bed file in place", - ) - p.add_argument( - "-u", - dest="unique", - default=False, - action="store_true", - help="Uniqify the bed file", - ) - p.add_argument( - "--accn", - default=False, - action="store_true", - help="Sort based on the accessions", - ) - p.add_argument( - "--num", - default=False, - action="store_true", - help="Numerically sort seqid column, e.g. chr1,chr2,...", - ) - p.set_outfile(outfile=None) - p.set_tmpdir() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - inplace = opts.inplace - - if opts.num: - bed = Bed(bedfile) - bed.print_to_file(opts.outfile or "stdout", sorted=True) - return - - if not inplace and ".sorted." in bedfile: - return bedfile - - sortedbed = opts.outfile - if inplace: - sortedbed = bedfile - elif opts.outfile is None: - pf, sf = op.basename(bedfile).rsplit(".", 1) - sortedbed = pf + ".sorted." + sf - - sortopt = ( - "-k1,1 -k2,2n -k3,3n -k4,4" if not opts.accn else "-k4,4 -k1,1 -k2,2n -k3,3n" - ) - cmd = "sort" - if opts.tmpdir: - cmd += " -T {0}".format(opts.tmpdir) - if opts.unique: - cmd += " -u" - cmd += " {0} {1} -o {2}".format(sortopt, bedfile, sortedbed) - - if inplace or need_update(bedfile, sortedbed): - sh(cmd) - - return sortedbed - - -def mates(args): - """ - %prog mates bedfile - - Generate the mates file by inferring from the names. - """ - p = OptionParser(mates.__doc__) - p.add_argument( - "--lib", - default=False, - action="store_true", - help="Output library information along with pairs", - ) - p.add_argument( - "--nointra", - default=False, - action="store_true", - help="Remove mates that are intra-scaffold", - ) - p.add_argument( - "--prefix", - default=False, - action="store_true", - help="Only keep links between IDs with same prefix", - ) - p.set_mates() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - rclip = opts.rclip - - key = (lambda x: x.accn[:-rclip]) if rclip else (lambda x: x.accn) - bed = Bed(bedfile, key=key) - - pf = bedfile.rsplit(".", 1)[0] - matesfile = pf + ".mates" - lib = pf if opts.lib else None - fw = open(matesfile, "w") - if lib: - bedfile, stats = pairs( - [bedfile, "--rclip={0}".format(rclip), "--cutoff={0}".format(opts.cutoff)] - ) - sv = int(2 * stats.sd) - mindist = max(stats.mean - sv, 1) - maxdist = stats.mean + sv - print("\t".join(str(x) for x in ("library", pf, mindist, maxdist)), file=fw) - - num_fragments = num_pairs = 0 - matesbedfile = matesfile + ".bed" - fwm = open(matesbedfile, "w") - for _, lines in groupby(bed, key=key): - lines = list(lines) - if len(lines) != 2: - num_fragments += len(lines) - continue - - a, b = lines - - if opts.nointra and a.seqid == b.seqid: - continue - - # Use --prefix to limit the links between seqids with the same prefix - # For example, contigs of the same BAC, mth2-23j10_001, mth-23j10_002 - if opts.prefix: - aprefix = a.seqid.split("_")[0] - bprefix = b.seqid.split("_")[0] - if aprefix != bprefix: - continue - - num_pairs += 1 - pair = [a.accn, b.accn] - if lib: - pair.append(lib) - print("\t".join(pair), file=fw) - - print(a, file=fwm) - print(b, file=fwm) - - logger.debug( - "Discard %d frags and write %d pairs to `%s` and `%s`.", - num_fragments, - num_pairs, - matesfile, - matesbedfile, - ) - - fw.close() - fwm.close() - - return matesfile, matesbedfile - - -def flanking(args): - """ - %prog flanking bedfile [options] - - Get up to n features (upstream or downstream or both) flanking a given position. - """ - from numpy import array, argsort - - p = OptionParser(flanking.__doc__) - p.add_argument( - "--chrom", - default=None, - type=str, - help="chrom name of the position in query. Make sure it matches bedfile.", - ) - p.add_argument( - "--coord", default=None, type=int, help="coordinate of the position in query." - ) - p.add_argument( - "-n", default=10, type=int, help="number of flanking features to get" - ) - p.add_argument( - "--side", - default="both", - choices=("upstream", "downstream", "both"), - help="which side to get flanking features", - ) - p.add_argument( - "--max_d", default=None, type=int, help="features <= max_d away from position" - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if any([len(args) != 1, opts.chrom is None, opts.coord is None]): - sys.exit(not p.print_help()) - - (bedfile,) = args - position = (opts.chrom, opts.coord) - n, side, maxd = opts.n, opts.side, opts.max_d - - chrombed = Bed(bedfile).sub_bed(position[0]) - - if side == "upstream": - data = [ - (abs(f.start - position[1]), f) for f in chrombed if f.start <= position[1] - ] - elif side == "downstream": - data = [ - (abs(f.start - position[1]), f) for f in chrombed if f.start >= position[1] - ] - else: - data = [(abs(f.start - position[1]), f) for f in chrombed] - - if maxd: - data = [f for f in data if f[0] <= maxd] - - n += 1 # not counting self - n = min(n, len(data)) - distances, subbed = zip(*data) - distances = array(distances) - idx = argsort(distances)[:n] - flankingbed = [f for (i, f) in enumerate(subbed) if i in idx] - - fw = must_open(opts.outfile, "w") - for atom in flankingbed: - print(str(atom), file=fw) - - return position, flankingbed - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/blast.py b/jcvi/formats/blast.py deleted file mode 100644 index 54b2ede0..00000000 --- a/jcvi/formats/blast.py +++ /dev/null @@ -1,1543 +0,0 @@ -""" -parses tabular BLAST -m8 (-format 6 in BLAST+) format -""" - -import os.path as op -import sys - -from itertools import groupby -from collections import defaultdict - -from ..apps.base import ActionDispatcher, OptionParser, logger, popen, sh -from ..assembly.base import calculate_A50 -from ..compara.base import AnchorFile -from ..utils.cbook import percentage -from ..utils.grouper import Grouper -from ..utils.orderedcollections import OrderedDict -from ..utils.range import range_distance - -from .base import LineFile, BaseFile, must_open -from .bed import Bed -from .sizes import Sizes - - -try: - from .cblast import BlastLine -except ImportError as e: - logger.error(f"Failed to import cblast: {e}") - from .pyblast import BlastLine - logger.warning("Fall back to Python implementation of BlastLine") - - -class BlastSlow(LineFile): - """ - Load entire blastfile into memory - """ - - def __init__(self, filename, sorted=False): - super().__init__(filename) - fp = must_open(filename) - for row in fp: - self.append(BlastLine(row)) - self.sorted = sorted - if not sorted: - self.sort(key=lambda x: x.query) - - def iter_hits(self): - for query, blines in groupby(self, key=lambda x: x.query): - yield query, blines - - def iter_hits_pair(self): - key = lambda x: (x.query, x.subject) - if not self.sorted: - self.sort(key=key) - for qs, blines in groupby(self, key=key): - yield qs, blines - - def to_dict(self): - # for multiple HSPs pick the one with highest score - d = OrderedDict() - for line in self: - if (line.query, line.subject) not in d: - d[(line.query, line.subject)] = line - else: - cur_score = d[(line.query, line.subject)].score - if line.score > cur_score: - d[(line.query, line.subject)] = line - return d - - -class Blast(BaseFile): - """ - We can have a Blast class that loads entire file into memory, this is - not very efficient for big files (BlastSlow); when the BLAST file is - generated by BLAST/BLAT, the file is already sorted - """ - - def __init__(self, filename): - super().__init__(filename) - self.fp = must_open(filename) - - def __iter__(self): - self.fp.seek(0) - for row in self.fp: - if row[0] == "#": - continue - yield BlastLine(row) - - def iter_hits(self): - for query, blines in groupby(self.fp, key=lambda x: BlastLine(x).query): - blines = [BlastLine(x) for x in blines] - blines.sort(key=lambda x: -x.score) # descending score - yield query, blines - - def iter_best_hit(self, N=1, hsps=False, ref="query"): - if ref == "query": - ref, hit = "query", "subject" - elif ref == "subject": - ref, hit = "subject", "query" - else: - sys.exit("`ref` must be either `query` or `subject`.") - - for bref, blines in groupby(self.fp, key=lambda x: getattr(BlastLine(x), ref)): - blines = [BlastLine(x) for x in blines] - blines.sort(key=lambda x: -x.score) - counter = 0 - selected = set() - for b in blines: - if hsps: - selected.add(getattr(b, hit)) - counter = len(selected) - if counter > N: - selected.remove(getattr(b, hit)) - continue - else: - counter += 1 - if counter > N: - break - - yield bref, b - - @property - def hits(self): - """ - returns a dict with query => blastline - """ - return dict(self.iter_hits()) - - @property - def best_hits(self): - """ - returns a dict with query => best blasthit - """ - return dict(self.iter_best_hit()) - - -class BlastLineByConversion(BlastLine): - """ - make BlastLine object from tab delimited line objects with - BlastLine-like up to 12 fields formats - """ - - def __init__(self, sline, mode="1" * 12): - if int(mode, 2) == 4095: - super().__init__(sline) - elif 3072 <= int(mode, 2) < 4095: - args = sline.split("\t") - atoms = args[:2] - mode = list(mode) - if len(args) == 12: - for i in range(2, 12): - if mode[i] == "1": - atoms.append(args[i]) - else: - atoms.append("-1") - if len(args) < 12: - for i in range(2, 12): - if mode[i] == "1": - atoms.append(args[i - mode[:i].count("0")]) - else: - atoms.append("-1") - sline = "\t".join(atoms) - super().__init__(sline) - else: - m = "mode can only contain 0 or 1 \n" - m += "first two fields (query, subject) cannot be empty" - sys.exit(m) - - -class AlignStats: - """ - Stores the alignment statistics that is used in formats.blast.summary() - and formats.coords.summary() - """ - - def __init__( - self, filename, qrycovered, refcovered, qryspan, refspan, identicals, AL50 - ): - self.filename = filename - self.qrycovered = qrycovered - self.refcovered = refcovered - self.qryspan = qryspan - self.refspan = refspan - self.identicals = identicals - self.AL50 = AL50 - - def __str__(self): - pp = lambda x, d: "{:.2f}".format(x * 100.0 / d) - return "\t".join( - str(x) - for x in ( - self.filename, - self.identicals, - self.qrycovered, - pp(self.identicals, self.qrycovered), - self.refcovered, - pp(self.identicals, self.refcovered), - self.qryspan, - pp(self.identicals, self.qryspan), - self.refspan, - pp(self.identicals, self.refspan), - ) - ) - - def print_stats(self): - qrycovered = self.qrycovered - refcovered = self.refcovered - qryspan = self.qryspan - refspan = self.refspan - m0 = "AL50 (>=50% of bases in alignment blocks >= this size): {}".format( - self.AL50 - ) - m1 = "Query coverage: {}".format(percentage(self.identicals, qrycovered)) - m2 = "Reference coverage: {}".format(percentage(self.identicals, refcovered)) - m3 = "Query span: {}".format(percentage(self.identicals, qryspan)) - m4 = "Reference span: {}".format(percentage(self.identicals, refspan)) - print("\n".join((m0, m1, m2, m3, m4)), file=sys.stderr) - - -def get_stats(blastfile, strict=False): - from jcvi.utils.range import range_union, range_span - from .pyblast import BlastLine - - logger.debug("Report stats on `%s`" % blastfile) - fp = open(blastfile) - ref_ivs = [] - qry_ivs = [] - identicals = 0 - ngaps = 0 - alignlens = [] - - for row in fp: - c = BlastLine(row) - qstart, qstop = c.qstart, c.qstop - if qstart > qstop: - qstart, qstop = qstop, qstart - qry_ivs.append((c.query, qstart, qstop)) - - sstart, sstop = c.sstart, c.sstop - if sstart > sstop: - sstart, sstop = sstop, sstart - ref_ivs.append((c.subject, sstart, sstop)) - - alen = c.hitlen - ngaps += c.ngaps - identicals += c.hitlen - c.nmismatch - c.ngaps - alignlens.append(alen) - - qrycovered = range_union(qry_ivs) - refcovered = range_union(ref_ivs) - if strict: - # We discount gaps in counting covered bases, since we - # did not track individually gaps in qry and ref, we assume - # the gaps are opened evenly in the two sequences - qrycovered -= ngaps / 2 - refcovered -= ngaps / 2 - qryspan = range_span(qry_ivs) - refspan = range_span(ref_ivs) - _, AL50, _ = calculate_A50(alignlens) - filename = op.basename(blastfile) - alignstats = AlignStats( - filename, qrycovered, refcovered, qryspan, refspan, identicals, AL50 - ) - - return alignstats - - -def filtered_blastfile_name( - blastfile: str, - pctid: float, - hitlen: int, - inverse: bool = False, -) -> str: - """ - Return a filtered filename for LAST output, with the given similarity cutoff. - """ - pctid_str = f"{pctid:.1f}".replace(".", "_").replace("_0", "") - newblastfile = blastfile + ".P{0}L{1}".format(pctid_str, hitlen) - if inverse: - newblastfile += ".inverse" - return newblastfile - - -def filter(args): - """ - %prog filter test.blast - - Produce a new blast file and filter based on: - - score: >= cutoff - - pctid: >= cutoff - - hitlen: >= cutoff - - evalue: <= cutoff - - ids: valid ids - - Use --inverse to obtain the complementary records for the criteria above. - - - noself: remove self-self hits - """ - p = OptionParser(filter.__doc__) - p.add_argument("--score", dest="score", default=0, type=int, help="Score cutoff") - p.set_align(pctid=95, hitlen=100, evalue=0.01) - p.add_argument( - "--noself", default=False, action="store_true", help="Remove self-self hits" - ) - p.add_argument("--ids", help="Path to file with ids to retain") - p.add_argument( - "--inverse", - default=False, - action="store_true", - help="Similar to grep -v, inverse", - ) - p.set_outfile(outfile=None) - - opts, args = p.parse_args(args) - if len(args) != 1: - sys.exit(not p.print_help()) - - if opts.ids: - ids = set() - for row in must_open(opts.ids): - if row[0] == "#": - continue - row = row.replace(",", "\t") - ids.update(row.split()) - else: - ids = None - - (blastfile,) = args - inverse = opts.inverse - fp = must_open(blastfile) - - score, pctid, hitlen, evalue, noself = ( - opts.score, - opts.pctid, - opts.hitlen, - opts.evalue, - opts.noself, - ) - blastfile = opts.outfile or blastfile - newblastfile = filtered_blastfile_name(blastfile, pctid, hitlen, inverse) - fw = must_open(newblastfile, "w") - for row in fp: - if row[0] == "#": - continue - c = BlastLine(row) - - if ids: - if c.query in ids and c.subject in ids: - noids = False - else: - noids = True - else: - noids = None - - remove = ( - c.score < score - or c.pctid < pctid - or c.hitlen < hitlen - or c.evalue > evalue - or noids - ) - - if inverse: - remove = not remove - - remove = remove or (noself and c.query == c.subject) - - if not remove: - print(row.rstrip(), file=fw) - - fw.close() - - return newblastfile - - -def main(): - actions = ( - ("summary", "provide summary on id% and cov%"), - ("completeness", "print completeness statistics for each query"), - ("annotation", "create tabular file with the annotations"), - ("top10", "count the most frequent 10 hits"), - ("filter", "filter BLAST file (based on score, id%, alignlen)"), - ("covfilter", "filter BLAST file (based on id% and cov%)"), - ("cscore", "calculate C-score for BLAST pairs"), - ("best", "get best BLAST hit per query"), - ("anchors", "keep only the BLAST pairs that are in the anchors file"), - ("pairs", "print paired-end reads of BLAST tabular file"), - ("bed", "get bed file from BLAST tabular file"), - ("condense", "group HSPs together for same query-subject pair"), - ("chain", "chain adjacent HSPs together"), - ("swap", "swap query and subjects in BLAST tabular file"), - ("sort", "sort lines so that query grouped together and scores desc"), - ("subset", "extract hits from some query and subject chrs"), - ("mismatches", "print out histogram of mismatches of HSPs"), - ("annotate", "annotate overlap types in BLAST tabular file"), - ("score", "add up the scores for each query seq"), - ("rbbh", "find reciprocal-best blast hits"), - ("gaps", "find distribution of gap sizes between adjacent HSPs"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def collect_gaps(blast, use_subject=False): - """ - Collect the gaps between adjacent HSPs in the BLAST file. - """ - key = lambda x: x.sstart if use_subject else x.qstart - blast.sort(key=key) - - for a, b in zip(blast, blast[1:]): - if use_subject: - if a.sstop < b.sstart: - yield b.sstart - a.sstop - else: - if a.qstop < b.qstart: - yield b.qstart - a.qstop - - -def gaps(args): - """ - %prog gaps A_vs_B.blast - - Find distribution of gap sizes betwen adjacent HSPs. - """ - p = OptionParser(gaps.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - blast = BlastSlow(blastfile) - logger.debug("A total of {} records imported".format(len(blast))) - - query_gaps = list(collect_gaps(blast)) - subject_gaps = list(collect_gaps(blast, use_subject=True)) - logger.debug( - "Query gaps: {} Subject gaps: {}".format(len(query_gaps), len(subject_gaps)) - ) - - from jcvi.graphics.base import savefig - import seaborn as sns - - sns.distplot(query_gaps) - savefig("query_gaps.pdf") - - -def rbbh(args): - """ - %prog rbbh A_vs_B.blast B_vs_A.blast - - Identify the reciprocal best blast hit for each query sequence in set A - when compared to set B. - - This program assumes that the BLAST results have already been filtered - based on a combination of %id, %cov, e-value cutoffs. BLAST output should - be in tabular `-m 8` format. - """ - p = OptionParser(rbbh.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - ( - abfile, - bafile, - ) = args - ab = Blast(abfile) - ba = Blast(bafile) - - ab_hits = ab.best_hits - ba_hits = ba.best_hits - - for aquery in ab_hits: - ahit = ab_hits[aquery].subject - ba_bline = ba_hits.get(ahit) - if ba_bline: - bhit = ba_bline.subject - if bhit == aquery: - print("\t".join(str(x) for x in (aquery, ahit))) - - -def score(args): - """ - %prog score blastfile query.fasta A.ids - - Add up the scores for each query seq. Go through the lines and for each - query sequence, add up the scores when subject is in each pile by A.ids. - """ - from jcvi.formats.base import SetFile - from jcvi.formats.fasta import Fasta - - p = OptionParser(score.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - blastfile, fastafile, idsfile = args - ids = SetFile(idsfile) - - blast = Blast(blastfile) - scores = defaultdict(int) - for b in blast: - query = b.query - subject = b.subject - if subject not in ids: - continue - scores[query] += b.score - - logger.debug("A total of {0} ids loaded.".format(len(ids))) - - f = Fasta(fastafile) - for s in f.iterkeys_ordered(): - sc = scores.get(s, 0) - print("\t".join((s, str(sc)))) - - -def annotation(args): - """ - %prog annotation blastfile > annotations - - Create simple two column files from the first two coluns in blastfile. Use - --queryids and --subjectids to switch IDs or descriptions. - """ - from jcvi.formats.base import DictFile - - p = OptionParser(annotation.__doc__) - p.add_argument("--queryids", help="Query IDS file to switch") - p.add_argument("--subjectids", help="Subject IDS file to switch") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - - d = "\t" - qids = DictFile(opts.queryids, delimiter=d) if opts.queryids else None - sids = DictFile(opts.subjectids, delimiter=d) if opts.subjectids else None - blast = Blast(blastfile) - for b in blast: - query, subject = b.query, b.subject - if qids: - query = qids[query] - if sids: - subject = sids[subject] - print("\t".join((query, subject))) - - -def completeness(args): - """ - %prog completeness blastfile ref.fasta > outfile - - Print statistics for each gene, the coverage of the alignment onto the best hit, - as an indicator for completeness of the gene model. For example, one might - BLAST sugarcane ESTs against sorghum annotations as reference, to find - full-length transcripts. - """ - from jcvi.utils.range import range_minmax - from jcvi.utils.cbook import SummaryStats - - p = OptionParser(completeness.__doc__) - p.add_argument("--ids", help="Save ids that are over 50% complete") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - blastfile, fastafile = args - idsfile = opts.ids - f = Sizes(fastafile).mapping - - b = BlastSlow(blastfile) - valid = [] - data = [] - cutoff = 50 - for query, blines in groupby(b, key=lambda x: x.query): - blines = list(blines) - ranges = [(x.sstart, x.sstop) for x in blines] - b = blines[0] - query, subject = b.query, b.subject - - rmin, rmax = range_minmax(ranges) - subject_len = f[subject] - - nterminal_dist = rmin - 1 - cterminal_dist = subject_len - rmax - covered = (rmax - rmin + 1) * 100 / subject_len - if covered > cutoff: - valid.append(query) - - data.append((nterminal_dist, cterminal_dist, covered)) - print( - "\t".join( - str(x) - for x in (query, subject, nterminal_dist, cterminal_dist, covered) - ) - ) - - nd, cd, cv = zip(*data) - m = "Total: {0}, Coverage > {1}%: {2}\n".format(len(data), cutoff, len(valid)) - m += "N-terminal: {0}\n".format(SummaryStats(nd)) - m += "C-terminal: {0}\n".format(SummaryStats(cd)) - m += "Coverage: {0}".format(SummaryStats(cv)) - print(m, file=sys.stderr) - - if idsfile: - fw = open(idsfile, "w") - print("\n".join(valid), file=fw) - logger.debug( - "A total of {0} ids (cov > {1} %) written to `{2}`.".format( - len(valid), cutoff, idsfile - ) - ) - fw.close() - - -def annotate(args): - """ - %prog annotate blastfile query.fasta subject.fasta - - Annotate overlap types (dovetail, contained, etc) in BLAST tabular file. - """ - from jcvi.assembly.goldenpath import Cutoff, Overlap, Overlap_types - - p = OptionParser(annotate.__doc__) - p.set_align(pctid=94, hitlen=500) - p.add_argument("--hang", default=500, type=int, help="Maximum overhang length") - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - blastfile, afasta, bfasta = args - fp = must_open(blastfile) - asizes = Sizes(afasta).mapping - bsizes = Sizes(bfasta).mapping - cutoff = Cutoff(opts.pctid, opts.hitlen, opts.hang) - logger.debug(str(cutoff)) - for row in fp: - b = BlastLine(row) - asize = asizes[b.query] - bsize = bsizes[b.subject] - if b.query == b.subject: - continue - ov = Overlap(b, asize, bsize, cutoff) - if ov.otype: - ov.print_graphic() - print("{0}\t{1}".format(b, Overlap_types[ov.otype])) - - -def top10(args): - """ - %prog top10 blastfile.best - - Count the most frequent 10 hits. Usually the BLASTFILE needs to be screened - the get the best match. You can also provide an .ids file to query the ids. - For example the ids file can contain the seqid to species mapping. - - The ids file is two-column, and can sometimes be generated by - `jcvi.formats.fasta ids --description`. - """ - from jcvi.formats.base import DictFile - - p = OptionParser(top10.__doc__) - p.add_argument( - "--top", - default=10, - type=int, - help="Top N taxa to extract", - ) - p.add_argument( - "--ids", - default=None, - help="Two column ids file to query seqid", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - mapping = DictFile(opts.ids, delimiter="\t") if opts.ids else {} - - cmd = "cut -f2 {0}".format(blastfile) - cmd += " | sort | uniq -c | sort -k1,1nr | head -n {0}".format(opts.top) - fp = popen(cmd) - for row in fp: - count, seqid = row.split() - nseqid = mapping.get(seqid, seqid) - print("\t".join((count, nseqid))) - - -def sort(args): - """ - %prog sort - - Sort lines so that same query grouped together with scores descending. The - sort is 'in-place'. - """ - p = OptionParser(sort.__doc__) - p.add_argument( - "--query", - default=False, - action="store_true", - help="Sort by query position", - ) - p.add_argument( - "--ref", - default=False, - action="store_true", - help="Sort by reference position", - ) - p.add_argument( - "--refscore", - default=False, - action="store_true", - help="Sort by reference name, then score descending", - ) - p.add_argument( - "--coords", - default=False, - action="store_true", - help="File is .coords generated by NUCMER", - ) - p.set_tmpdir() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - - if opts.coords: - if opts.query: - key = "-k13,13 -k3,3n" - elif opts.ref: - key = "-k12,12 -k1,1n" - - else: - if opts.query: - key = "-k1,1 -k7,7n" - elif opts.ref: - key = "-k2,2 -k9,9n" - elif opts.refscore: - key = "-k2,2 -k12,12gr" - else: - key = "-k1,1 -k12,12gr" - - cmd = "sort" - if opts.tmpdir: - cmd += " -T {0}".format(opts.tmpdir) - cmd += " {0} {1} -o {1}".format(key, blastfile) - sh(cmd) - - -def cscore(args): - """ - %prog cscore blastfile > cscoreOut - - See supplementary info for sea anemone genome paper, C-score formula: - - cscore(A,B) = score(A,B) / - max(best score for A, best score for B) - - A C-score of one is the same as reciprocal best hit (RBH). - - Output file will be 3-column (query, subject, cscore). Use --cutoff to - select a different cutoff. - """ - from jcvi.utils.cbook import gene_name - - p = OptionParser(cscore.__doc__) - p.add_argument( - "--cutoff", - default=0.9999, - type=float, - help="Minimum C-score to report", - ) - p.add_argument( - "--pct", - default=False, - action="store_true", - help="Also include pct as last column", - ) - p.add_argument( - "--writeblast", - default=False, - action="store_true", - help="Also write filtered blast file", - ) - p.set_stripnames() - p.set_outfile() - - opts, args = p.parse_args(args) - ostrip = opts.strip_names - writeblast = opts.writeblast - outfile = opts.outfile - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - - blast = Blast(blastfile) - logger.debug("Register best scores ..") - best_score = defaultdict(float) - for b in blast: - query, subject = b.query, b.subject - if ostrip: - query, subject = gene_name(query), gene_name(subject) - - score = b.score - if score > best_score[query]: - best_score[query] = score - if score > best_score[subject]: - best_score[subject] = score - - blast = Blast(blastfile) - pairs = {} - cutoff = opts.cutoff - for b in blast: - query, subject = b.query, b.subject - if ostrip: - query, subject = gene_name(query), gene_name(subject) - - score = b.score - pctid = b.pctid - s = score / max(best_score[query], best_score[subject]) - if s > cutoff: - pair = (query, subject) - if pair not in pairs or s > pairs[pair][0]: - pairs[pair] = (s, pctid, b) - - fw = must_open(outfile, "w") - if writeblast: - fwb = must_open(outfile + ".filtered.blast", "w") - pct = opts.pct - for (query, subject), (s, pctid, b) in sorted(pairs.items()): - args = [query, subject, "{0:.2f}".format(s)] - if pct: - args.append("{0:.1f}".format(pctid)) - print("\t".join(args), file=fw) - if writeblast: - print(b, file=fwb) - fw.close() - if writeblast: - fwb.close() - - -def get_distance(a, b, xaxis=True): - """ - Returns the distance between two blast HSPs. - """ - if xaxis: - arange = ("0", a.qstart, a.qstop, a.orientation) # 0 is the dummy chromosome - brange = ("0", b.qstart, b.qstop, b.orientation) - else: - arange = ("0", a.sstart, a.sstop, a.orientation) - brange = ("0", b.sstart, b.sstop, b.orientation) - - dist, oo = range_distance(arange, brange, distmode="ee") - dist = abs(dist) - - return dist - - -def combine_HSPs(a): - """ - Combine HSPs into a single BlastLine. - """ - m = a[0] - if len(a) == 1: - return m - - for b in a[1:]: - assert m.query == b.query - assert m.subject == b.subject - m.hitlen += b.hitlen - m.nmismatch += b.nmismatch - m.ngaps += b.ngaps - m.qstart = min(m.qstart, b.qstart) - m.qstop = max(m.qstop, b.qstop) - m.sstart = min(m.sstart, b.sstart) - m.sstop = max(m.sstop, b.sstop) - if m.has_score: - m.score += b.score - - m.pctid = 100 - (m.nmismatch + m.ngaps) * 100.0 / m.hitlen - return m - - -def chain_HSPs(blast, xdist=100, ydist=100): - """ - Take a list of BlastLines (or a BlastSlow instance), and returns a list of - BlastLines. - """ - key = lambda x: (x.query, x.subject) - blast.sort(key=key) - - clusters = Grouper() - for qs, points in groupby(blast, key=key): - points = sorted( - list(points), key=lambda x: (x.qstart, x.qstop, x.sstart, x.sstop) - ) - - n = len(points) - for i in range(n): - a = points[i] - clusters.join(a) - for j in range(i + 1, n): - b = points[j] - - # x-axis distance - del_x = get_distance(a, b) - if del_x > xdist: - break - # y-axis distance - del_y = get_distance(a, b, xaxis=False) - if del_y > ydist: - continue - # otherwise join - clusters.join(a, b) - - chained_hsps = [combine_HSPs(x) for x in clusters] - key = lambda x: (x.query, -x.score if x.has_score else 0) - chained_hsps = sorted(chained_hsps, key=key) - - return chained_hsps - - -def chain(args): - """ - %prog chain blastfile - - Chain adjacent HSPs together to form larger HSP. - """ - p = OptionParser(chain.__doc__) - p.add_argument( - "--dist", - dest="dist", - default=100, - type=int, - help="extent of flanking regions to search", - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - dist = opts.dist - assert dist > 0 - - blast = BlastSlow(blastfile) - logger.debug("A total of {} records imported".format(len(blast))) - chained_hsps = chain_HSPs(blast, xdist=dist, ydist=dist) - logger.debug("A total of {} records after chaining".format(len(chained_hsps))) - - for b in chained_hsps: - print(b) - - -def condense(args): - """ - %prog condense blastfile > blastfile.condensed - - Condense HSPs that belong to the same query-subject pair into one. - """ - p = OptionParser(condense.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - blast = BlastSlow(blastfile) - key = lambda x: x.query - blast.sort(key=key) - - clusters = [] - for q, lines in groupby(blast, key=key): - lines = list(lines) - condenser = defaultdict(list) - - for b in lines: - condenser[(b.subject, b.orientation)].append(b) - - for bs in condenser.values(): - clusters.append(bs) - - chained_hsps = [combine_HSPs(x) for x in clusters] - chained_hsps = sorted(chained_hsps, key=lambda x: (x.query, -x.score)) - for b in chained_hsps: - print(b) - - -def mismatches(args): - """ - %prog mismatches blastfile - - Print out histogram of mismatches of HSPs, usually for evaluating SNP level. - """ - from jcvi.utils.cbook import percentage - from jcvi.graphics.histogram import stem_leaf_plot - - p = OptionParser(mismatches.__doc__) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - - data = [] - b = Blast(blastfile) - for query, bline in b.iter_best_hit(): - mm = bline.nmismatch + bline.ngaps - data.append(mm) - - nonzeros = [x for x in data if x != 0] - title = "Polymorphic sites: {0}".format(percentage(len(nonzeros), len(data))) - stem_leaf_plot(data, 0, 20, 20, title=title) - - -def covfilter(args): - """ - %prog covfilter blastfile fastafile - - Fastafile is used to get the sizes of the queries. Two filters can be - applied, the id% and cov%. - """ - from jcvi.algorithms.supermap import supermap - from jcvi.utils.range import range_union - - allowed_iterby = ("query", "query_sbjct") - - p = OptionParser(covfilter.__doc__) - p.set_align(pctid=95, pctcov=50) - p.add_argument( - "--scov", - default=False, - action="store_true", - help="Subject coverage instead of query", - ) - p.add_argument( - "--supermap", action="store_true", help="Use supermap instead of union" - ) - p.add_argument( - "--ids", - dest="ids", - default=None, - help="Print out the ids that satisfy", - ) - p.add_argument( - "--list", - dest="list", - default=False, - action="store_true", - help="List the id% and cov% per gene", - ) - p.add_argument( - "--iterby", - dest="iterby", - default="query", - choices=allowed_iterby, - help="Choose how to iterate through BLAST", - ) - p.set_outfile(outfile=None) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - blastfile, fastafile = args - pctid = opts.pctid - pctcov = opts.pctcov - union = not opts.supermap - scov = opts.scov - sz = Sizes(fastafile) - sizes = sz.mapping - iterby = opts.iterby - qspair = iterby == "query_sbjct" - - if not union: - querysupermap = blastfile + ".query.supermap" - if not op.exists(querysupermap): - supermap(blastfile, filter="query") - - blastfile = querysupermap - - assert op.exists(blastfile) - - covered = 0 - mismatches = 0 - gaps = 0 - alignlen = 0 - queries = set() - valid = set() - blast = BlastSlow(blastfile) - iterator = blast.iter_hits_pair if qspair else blast.iter_hits - - covidstore = {} - for query, blines in iterator(): - blines = list(blines) - queries.add(query) - - # per gene report - this_covered = 0 - this_alignlen = 0 - this_mismatches = 0 - this_gaps = 0 - this_identity = 0 - - ranges = [] - for b in blines: - if scov: - s, start, stop = b.subject, b.sstart, b.sstop - else: - s, start, stop = b.query, b.qstart, b.qstop - cov_id = s - - if b.pctid < pctid: - continue - - if start > stop: - start, stop = stop, start - this_covered += stop - start + 1 - this_alignlen += b.hitlen - this_mismatches += b.nmismatch - this_gaps += b.ngaps - ranges.append(("1", start, stop)) - - if ranges: - this_identity = ( - 100.0 - (this_mismatches + this_gaps) * 100.0 / this_alignlen - ) - - if union: - this_covered = range_union(ranges) - - this_coverage = this_covered * 100.0 / sizes[cov_id] - covidstore[query] = (this_identity, this_coverage) - if this_identity >= pctid and this_coverage >= pctcov: - valid.add(query) - - covered += this_covered - mismatches += this_mismatches - gaps += this_gaps - alignlen += this_alignlen - - if opts.list: - if qspair: - allpairs = defaultdict(list) - for q, s in covidstore: - allpairs[q].append((q, s)) - allpairs[s].append((q, s)) - - for id, size in sz.iter_sizes(): - if id not in allpairs: - print("\t".join((id, "na", "0", "0"))) - else: - for qs in allpairs[id]: - this_identity, this_coverage = covidstore[qs] - print( - "{0}\t{1:.1f}\t{2:.1f}".format( - "\t".join(qs), this_identity, this_coverage - ) - ) - else: - for query, size in sz.iter_sizes(): - this_identity, this_coverage = covidstore.get(query, (0, 0)) - print( - "{0}\t{1:.1f}\t{2:.1f}".format(query, this_identity, this_coverage) - ) - - mapped_count = len(queries) - valid_count = len(valid) - cutoff_message = "(id={0.pctid}% cov={0.pctcov}%)".format(opts) - - m = "Identity: {0} mismatches, {1} gaps, {2} alignlen\n".format( - mismatches, gaps, alignlen - ) - total = len(sizes.keys()) - m += "Total mapped: {0} ({1:.1f}% of {2})\n".format( - mapped_count, mapped_count * 100.0 / total, total - ) - m += "Total valid {0}: {1} ({2:.1f}% of {3})\n".format( - cutoff_message, valid_count, valid_count * 100.0 / total, total - ) - m += "Average id = {0:.2f}%\n".format(100 - (mismatches + gaps) * 100.0 / alignlen) - - queries_combined = sz.totalsize - m += "Coverage: {0} covered, {1} total\n".format(covered, queries_combined) - m += "Average coverage = {0:.2f}%".format(covered * 100.0 / queries_combined) - - logfile = blastfile + ".covfilter.log" - fw = open(logfile, "w") - for f in (sys.stderr, fw): - print(m, file=f) - fw.close() - - if opts.ids: - filename = opts.ids - fw = must_open(filename, "w") - for id in valid: - print(id, file=fw) - logger.debug( - "Queries beyond cutoffs {0} written to `{1}`.".format( - cutoff_message, filename - ) - ) - - outfile = opts.outfile - if not outfile: - return - - fw = must_open(outfile, "w") - blast = Blast(blastfile) - for b in blast: - query = (b.query, b.subject) if qspair else b.query - if query in valid: - print(b, file=fw) - - -def swap(args): - """ - %prog swap blastfile - - Print out a new blast file with query and subject swapped. - """ - p = OptionParser(swap.__doc__) - - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - swappedblastfile = blastfile + ".swapped" - fp = must_open(blastfile) - fw = must_open(swappedblastfile, "w") - for row in fp: - b = BlastLine(row) - print(b.swapped, file=fw) - - fw.close() - sort([swappedblastfile]) - - -def bed(args): - """ - %prog bed blastfile - - Print out bed file based on coordinates in BLAST report. By default, write - out subject positions. Use --swap to write query positions. - """ - from .bed import sort as bed_sort, mergeBed - - p = OptionParser(bed.__doc__) - p.add_argument( - "--swap", - default=False, - action="store_true", - help="Write query positions", - ) - p.add_argument( - "--both", - default=False, - action="store_true", - help="Generate one line for each of query and subject", - ) - p.add_argument( - "--merge", - default=None, - type=int, - help="Merge hits within this distance", - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (blastfile,) = args - positive = (not opts.swap) or opts.both - negative = opts.swap or opts.both - - fp = must_open(blastfile) - bedfile = ( - "{0}.bed".format(blastfile.rsplit(".", 1)[0]) - if blastfile.endswith(".blast") - else "{0}.bed".format(blastfile) - ) - fw = open(bedfile, "w") - for row in fp: - b = BlastLine(row) - if positive: - print(b.bedline, file=fw) - if negative: - print(b.swapped.bedline, file=fw) - - logger.debug("File written to `%s`.", bedfile) - fw.close() - bed_sort([bedfile, "-i"]) - if opts.merge: - mergeBed(bedfile, sorted=True, d=opts.merge, inplace=True) - - return bedfile - - -def pairs(args): - """ - See __doc__ for OptionParser.set_pairs(). - """ - import jcvi.formats.bed - - p = OptionParser(pairs.__doc__) - p.set_pairs() - opts, targs = p.parse_args(args) - - if len(targs) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = targs - bedfile = bed([blastfile]) - args[args.index(blastfile)] = bedfile - - return jcvi.formats.bed.pairs(args) - - -def anchors(args): - """ - %prog anchors blastfile anchorsfile - - Extract a subset of the BLAST file based on the anchors file. The anchors - file is a tab-delimited file with two columns, likely generated from synteny - pipeline. This is useful to filter down BLAST. - """ - p = OptionParser(anchors.__doc__) - p.set_outfile() - p.add_argument( - "--best", default=False, action="store_true", help="Keep only the best hit" - ) - opts, args = p.parse_args(args) - if len(args) != 2: - sys.exit(not p.print_help()) - - blastfile, anchorsfile = args - anchor_file = AnchorFile(anchorsfile) - anchor_pairs = set((a, b) for a, b, _ in anchor_file.iter_pairs()) - blast = Blast(blastfile) - found, total = 0, 0 - fw = must_open(opts.outfile, "w") - seen = set() - for rec in blast: - pp = (rec.query, rec.subject) - if pp in anchor_pairs: - found += 1 - if opts.best and pp in seen: - continue - print(rec, file=fw) - seen.add(pp) - total += 1 - logger.info("Found %s", percentage(found, total)) - - -def best(args): - """ - %prog best blastfile - - print the best hit for each query in the blastfile - """ - p = OptionParser(best.__doc__) - - p.add_argument("-n", default=1, type=int, help="get best N hits") - p.add_argument( - "--nosort", - default=False, - action="store_true", - help="assume BLAST is already sorted", - ) - p.add_argument( - "--hsps", - default=False, - action="store_true", - help="get all HSPs for the best pair", - ) - p.add_argument( - "--subject", - default=False, - action="store_true", - help="get best hit(s) for subject genome instead", - ) - p.set_tmpdir() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - n = opts.n - hsps = opts.hsps - tmpdir = opts.tmpdir - ref = "query" if not opts.subject else "subject" - - if not opts.nosort: - sargs = [blastfile] - if tmpdir: - sargs += ["-T {0}".format(tmpdir)] - if ref != "query": - sargs += ["--refscore"] - sort(sargs) - else: - logger.debug("Assuming sorted BLAST") - - if not opts.subject: - bestblastfile = blastfile + ".best" - else: - bestblastfile = blastfile + ".subject.best" - fw = open(bestblastfile, "w") - - b = Blast(blastfile) - for q, bline in b.iter_best_hit(N=n, hsps=hsps, ref=ref): - print(bline, file=fw) - - return bestblastfile - - -def summary(args): - """ - %prog summary blastfile - - Provide summary on id% and cov%, for both query and reference. Often used in - comparing genomes (based on NUCMER results). - - Columns: - filename, identicals, qrycovered, pct_qrycovered, refcovered, pct_refcovered, - qryspan, pct_qryspan, refspan, pct_refspan - """ - p = OptionParser(summary.__doc__) - p.add_argument( - "--strict", - default=False, - action="store_true", - help="Strict 'gapless' mode. Exclude gaps from covered base.", - ) - p.add_argument( - "--tabular", - default=False, - action="store_true", - help="Print succint tabular output", - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (blastfile,) = args - - alignstats = get_stats(blastfile, strict=opts.strict) - if opts.tabular: - print(str(alignstats)) - else: - alignstats.print_stats() - - -def subset(args): - """ - %prog subset blastfile qbedfile sbedfile - - Extract blast hits between given query and subject chrs. - - If --qchrs or --schrs is not given, then all chrs from q/s genome will - be included. However one of --qchrs and --schrs must be specified. - Otherwise the script will do nothing. - """ - p = OptionParser(subset.__doc__) - p.add_argument( - "--qchrs", - default=None, - help="query chrs to extract, comma sep", - ) - p.add_argument( - "--schrs", - default=None, - help="subject chrs to extract, comma sep", - ) - p.add_argument( - "--convert", - default=False, - action="store_true", - help="convert accns to chr_rank", - ) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - blastfile, qbedfile, sbedfile = args - qchrs = opts.qchrs - schrs = opts.schrs - assert qchrs or schrs, p.print_help() - convert = opts.convert - - outfile = blastfile + "." - if qchrs: - outfile += qchrs + "." - qchrs = set(qchrs.split(",")) - else: - qchrs = set(Bed(qbedfile).seqids) - if schrs: - schrs = set(schrs.split(",")) - if qbedfile != sbedfile or qchrs != schrs: - outfile += ",".join(schrs) + "." - else: - schrs = set(Bed(sbedfile).seqids) - outfile += "blast" - - qo = Bed(qbedfile).order - so = Bed(sbedfile).order - - fw = must_open(outfile, "w") - for b in Blast(blastfile): - q, s = b.query, b.subject - if qo[q][1].seqid in qchrs and so[s][1].seqid in schrs: - if convert: - b.query = qo[q][1].seqid + "_" + "{0:05d}".format(qo[q][0]) - b.subject = so[s][1].seqid + "_" + "{0:05d}".format(so[s][0]) - print(b, file=fw) - fw.close() - logger.debug("Subset blastfile written to `{0}`".format(outfile)) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/cblast.c b/jcvi/formats/cblast.c deleted file mode 100644 index 8b92b238..00000000 --- a/jcvi/formats/cblast.c +++ /dev/null @@ -1,16862 +0,0 @@ -/* Generated by Cython 3.0.11 */ - -/* BEGIN: Cython Metadata -{ - "distutils": { - "depends": [], - "extra_compile_args": [ - "-O3" - ], - "name": "jcvi.formats.cblast", - "sources": [ - "src/jcvi/formats/cblast.pyx" - ] - }, - "module_name": "jcvi.formats.cblast" -} -END: Cython Metadata */ - -#ifndef PY_SSIZE_T_CLEAN -#define PY_SSIZE_T_CLEAN -#endif /* PY_SSIZE_T_CLEAN */ -#if defined(CYTHON_LIMITED_API) && 0 - #ifndef Py_LIMITED_API - #if CYTHON_LIMITED_API+0 > 0x03030000 - #define Py_LIMITED_API CYTHON_LIMITED_API - #else - #define Py_LIMITED_API 0x03030000 - #endif - #endif -#endif - -#include "Python.h" -#ifndef Py_PYTHON_H - #error Python headers needed to compile C extensions, please install development version of Python. -#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) - #error Cython requires Python 2.7+ or Python 3.3+. -#else -#if defined(CYTHON_LIMITED_API) && CYTHON_LIMITED_API -#define __PYX_EXTRA_ABI_MODULE_NAME "limited" -#else -#define __PYX_EXTRA_ABI_MODULE_NAME "" -#endif -#define CYTHON_ABI "3_0_11" __PYX_EXTRA_ABI_MODULE_NAME -#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI -#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "." -#define CYTHON_HEX_VERSION 0x03000BF0 -#define CYTHON_FUTURE_DIVISION 0 -#include -#ifndef offsetof - #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) -#endif -#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS) - #ifndef __stdcall - #define __stdcall - #endif - #ifndef __cdecl - #define __cdecl - #endif - #ifndef __fastcall - #define __fastcall - #endif -#endif -#ifndef DL_IMPORT - #define DL_IMPORT(t) t -#endif -#ifndef DL_EXPORT - #define DL_EXPORT(t) t -#endif -#define __PYX_COMMA , -#ifndef HAVE_LONG_LONG - #define HAVE_LONG_LONG -#endif -#ifndef PY_LONG_LONG - #define PY_LONG_LONG LONG_LONG -#endif -#ifndef Py_HUGE_VAL - #define Py_HUGE_VAL HUGE_VAL -#endif -#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX -#if defined(GRAALVM_PYTHON) - /* For very preliminary testing purposes. Most variables are set the same as PyPy. - The existence of this section does not imply that anything works or is even tested */ - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 1 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #undef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #undef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 1 - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) - #endif - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif - #undef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 -#elif defined(PYPY_VERSION) - #define CYTHON_COMPILING_IN_PYPY 1 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #undef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 1 - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) - #endif - #if PY_VERSION_HEX < 0x03090000 - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00) - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif - #undef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 -#elif defined(CYTHON_LIMITED_API) - #ifdef Py_LIMITED_API - #undef __PYX_LIMITED_VERSION_HEX - #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API - #endif - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 1 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_CLINE_IN_TRACEBACK - #define CYTHON_CLINE_IN_TRACEBACK 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #undef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 1 - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #ifndef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #endif - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 1 - #ifndef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #endif - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif - #undef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 -#elif defined(Py_GIL_DISABLED) || defined(Py_NOGIL) - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 1 - #ifndef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 1 - #endif - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #ifndef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #ifndef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #ifndef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 1 - #endif - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #ifndef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 1 - #endif - #ifndef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 1 - #endif - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #ifndef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 1 - #endif - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #ifndef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #ifndef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #endif - #ifndef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 1 - #endif - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 - #endif - #ifndef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 0 - #endif -#else - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 1 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #ifndef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 1 - #endif - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #ifndef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 1 - #endif - #if PY_MAJOR_VERSION < 3 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #ifndef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 1 - #endif - #ifndef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 1 - #endif - #ifndef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 1 - #endif - #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #elif !defined(CYTHON_USE_UNICODE_WRITER) - #define CYTHON_USE_UNICODE_WRITER 1 - #endif - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #ifndef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 1 - #endif - #ifndef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 1 - #endif - #ifndef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 1 - #endif - #ifndef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6) - #endif - #ifndef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1) - #endif - #ifndef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 1 - #endif - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #ifndef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #endif - #if PY_VERSION_HEX < 0x030400a1 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #elif !defined(CYTHON_USE_TP_FINALIZE) - #define CYTHON_USE_TP_FINALIZE 1 - #endif - #if PY_VERSION_HEX < 0x030600B1 - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #elif !defined(CYTHON_USE_DICT_VERSIONS) - #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX < 0x030C00A5) - #endif - #if PY_VERSION_HEX < 0x030700A3 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #elif !defined(CYTHON_USE_EXC_INFO_STACK) - #define CYTHON_USE_EXC_INFO_STACK 1 - #endif - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 - #endif - #ifndef CYTHON_USE_FREELISTS - #define CYTHON_USE_FREELISTS 1 - #endif -#endif -#if !defined(CYTHON_FAST_PYCCALL) -#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) -#endif -#if !defined(CYTHON_VECTORCALL) -#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1) -#endif -#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1) -#if CYTHON_USE_PYLONG_INTERNALS - #if PY_MAJOR_VERSION < 3 - #include "longintrepr.h" - #endif - #undef SHIFT - #undef BASE - #undef MASK - #ifdef SIZEOF_VOID_P - enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; - #endif -#endif -#ifndef __has_attribute - #define __has_attribute(x) 0 -#endif -#ifndef __has_cpp_attribute - #define __has_cpp_attribute(x) 0 -#endif -#ifndef CYTHON_RESTRICT - #if defined(__GNUC__) - #define CYTHON_RESTRICT __restrict__ - #elif defined(_MSC_VER) && _MSC_VER >= 1400 - #define CYTHON_RESTRICT __restrict - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define CYTHON_RESTRICT restrict - #else - #define CYTHON_RESTRICT - #endif -#endif -#ifndef CYTHON_UNUSED - #if defined(__cplusplus) - /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17 - * but leads to warnings with -pedantic, since it is a C++17 feature */ - #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) - #if __has_cpp_attribute(maybe_unused) - #define CYTHON_UNUSED [[maybe_unused]] - #endif - #endif - #endif -#endif -#ifndef CYTHON_UNUSED -# if defined(__GNUC__) -# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) -# define CYTHON_UNUSED __attribute__ ((__unused__)) -# else -# define CYTHON_UNUSED -# endif -# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) -# define CYTHON_UNUSED __attribute__ ((__unused__)) -# else -# define CYTHON_UNUSED -# endif -#endif -#ifndef CYTHON_UNUSED_VAR -# if defined(__cplusplus) - template void CYTHON_UNUSED_VAR( const T& ) { } -# else -# define CYTHON_UNUSED_VAR(x) (void)(x) -# endif -#endif -#ifndef CYTHON_MAYBE_UNUSED_VAR - #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x) -#endif -#ifndef CYTHON_NCP_UNUSED -# if CYTHON_COMPILING_IN_CPYTHON -# define CYTHON_NCP_UNUSED -# else -# define CYTHON_NCP_UNUSED CYTHON_UNUSED -# endif -#endif -#ifndef CYTHON_USE_CPP_STD_MOVE - #if defined(__cplusplus) && (\ - __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600)) - #define CYTHON_USE_CPP_STD_MOVE 1 - #else - #define CYTHON_USE_CPP_STD_MOVE 0 - #endif -#endif -#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) -#ifdef _MSC_VER - #ifndef _MSC_STDINT_H_ - #if _MSC_VER < 1300 - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - #else - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; - #endif - #endif - #if _MSC_VER < 1300 - #ifdef _WIN64 - typedef unsigned long long __pyx_uintptr_t; - #else - typedef unsigned int __pyx_uintptr_t; - #endif - #else - #ifdef _WIN64 - typedef unsigned __int64 __pyx_uintptr_t; - #else - typedef unsigned __int32 __pyx_uintptr_t; - #endif - #endif -#else - #include - typedef uintptr_t __pyx_uintptr_t; -#endif -#ifndef CYTHON_FALLTHROUGH - #if defined(__cplusplus) - /* for clang __has_cpp_attribute(fallthrough) is true even before C++17 - * but leads to warnings with -pedantic, since it is a C++17 feature */ - #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) - #if __has_cpp_attribute(fallthrough) - #define CYTHON_FALLTHROUGH [[fallthrough]] - #endif - #endif - #ifndef CYTHON_FALLTHROUGH - #if __has_cpp_attribute(clang::fallthrough) - #define CYTHON_FALLTHROUGH [[clang::fallthrough]] - #elif __has_cpp_attribute(gnu::fallthrough) - #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] - #endif - #endif - #endif - #ifndef CYTHON_FALLTHROUGH - #if __has_attribute(fallthrough) - #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) - #else - #define CYTHON_FALLTHROUGH - #endif - #endif - #if defined(__clang__) && defined(__apple_build_version__) - #if __apple_build_version__ < 7000000 - #undef CYTHON_FALLTHROUGH - #define CYTHON_FALLTHROUGH - #endif - #endif -#endif -#ifdef __cplusplus - template - struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);}; - #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL::value) -#else - #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0) -#endif -#if CYTHON_COMPILING_IN_PYPY == 1 - #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000) -#else - #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000) -#endif -#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer)) - -#ifndef CYTHON_INLINE - #if defined(__clang__) - #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) - #elif defined(__GNUC__) - #define CYTHON_INLINE __inline__ - #elif defined(_MSC_VER) - #define CYTHON_INLINE __inline - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define CYTHON_INLINE inline - #else - #define CYTHON_INLINE - #endif -#endif - -#define __PYX_BUILD_PY_SSIZE_T "n" -#define CYTHON_FORMAT_SSIZE_T "z" -#if PY_MAJOR_VERSION < 3 - #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" - #define __Pyx_DefaultClassType PyClass_Type - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#else - #define __Pyx_BUILTIN_MODULE_NAME "builtins" - #define __Pyx_DefaultClassType PyType_Type -#if CYTHON_COMPILING_IN_LIMITED_API - static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, - PyObject *code, PyObject *c, PyObject* n, PyObject *v, - PyObject *fv, PyObject *cell, PyObject* fn, - PyObject *name, int fline, PyObject *lnos) { - PyObject *exception_table = NULL; - PyObject *types_module=NULL, *code_type=NULL, *result=NULL; - #if __PYX_LIMITED_VERSION_HEX < 0x030B0000 - PyObject *version_info; - PyObject *py_minor_version = NULL; - #endif - long minor_version = 0; - PyObject *type, *value, *traceback; - PyErr_Fetch(&type, &value, &traceback); - #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000 - minor_version = 11; - #else - if (!(version_info = PySys_GetObject("version_info"))) goto end; - if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end; - minor_version = PyLong_AsLong(py_minor_version); - Py_DECREF(py_minor_version); - if (minor_version == -1 && PyErr_Occurred()) goto end; - #endif - if (!(types_module = PyImport_ImportModule("types"))) goto end; - if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end; - if (minor_version <= 7) { - (void)p; - result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code, - c, n, v, fn, name, fline, lnos, fv, cell); - } else if (minor_version <= 10) { - result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code, - c, n, v, fn, name, fline, lnos, fv, cell); - } else { - if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end; - result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code, - c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell); - } - end: - Py_XDECREF(code_type); - Py_XDECREF(exception_table); - Py_XDECREF(types_module); - if (type) { - PyErr_Restore(type, value, traceback); - } - return result; - } - #ifndef CO_OPTIMIZED - #define CO_OPTIMIZED 0x0001 - #endif - #ifndef CO_NEWLOCALS - #define CO_NEWLOCALS 0x0002 - #endif - #ifndef CO_VARARGS - #define CO_VARARGS 0x0004 - #endif - #ifndef CO_VARKEYWORDS - #define CO_VARKEYWORDS 0x0008 - #endif - #ifndef CO_ASYNC_GENERATOR - #define CO_ASYNC_GENERATOR 0x0200 - #endif - #ifndef CO_GENERATOR - #define CO_GENERATOR 0x0020 - #endif - #ifndef CO_COROUTINE - #define CO_COROUTINE 0x0080 - #endif -#elif PY_VERSION_HEX >= 0x030B0000 - static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, - PyObject *code, PyObject *c, PyObject* n, PyObject *v, - PyObject *fv, PyObject *cell, PyObject* fn, - PyObject *name, int fline, PyObject *lnos) { - PyCodeObject *result; - PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); - if (!empty_bytes) return NULL; - result = - #if PY_VERSION_HEX >= 0x030C0000 - PyUnstable_Code_NewWithPosOnlyArgs - #else - PyCode_NewWithPosOnlyArgs - #endif - (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes); - Py_DECREF(empty_bytes); - return result; - } -#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#else - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#endif -#endif -#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE) - #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type) -#else - #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type)) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is) - #define __Pyx_Py_Is(x, y) Py_Is(x, y) -#else - #define __Pyx_Py_Is(x, y) ((x) == (y)) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone) - #define __Pyx_Py_IsNone(ob) Py_IsNone(ob) -#else - #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue) - #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob) -#else - #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse) - #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob) -#else - #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False) -#endif -#define __Pyx_NoneAsNull(obj) (__Pyx_Py_IsNone(obj) ? NULL : (obj)) -#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o) -#else - #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o) -#endif -#ifndef CO_COROUTINE - #define CO_COROUTINE 0x80 -#endif -#ifndef CO_ASYNC_GENERATOR - #define CO_ASYNC_GENERATOR 0x200 -#endif -#ifndef Py_TPFLAGS_CHECKTYPES - #define Py_TPFLAGS_CHECKTYPES 0 -#endif -#ifndef Py_TPFLAGS_HAVE_INDEX - #define Py_TPFLAGS_HAVE_INDEX 0 -#endif -#ifndef Py_TPFLAGS_HAVE_NEWBUFFER - #define Py_TPFLAGS_HAVE_NEWBUFFER 0 -#endif -#ifndef Py_TPFLAGS_HAVE_FINALIZE - #define Py_TPFLAGS_HAVE_FINALIZE 0 -#endif -#ifndef Py_TPFLAGS_SEQUENCE - #define Py_TPFLAGS_SEQUENCE 0 -#endif -#ifndef Py_TPFLAGS_MAPPING - #define Py_TPFLAGS_MAPPING 0 -#endif -#ifndef METH_STACKLESS - #define METH_STACKLESS 0 -#endif -#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL) - #ifndef METH_FASTCALL - #define METH_FASTCALL 0x80 - #endif - typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs); - typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args, - Py_ssize_t nargs, PyObject *kwnames); -#else - #if PY_VERSION_HEX >= 0x030d00A4 - # define __Pyx_PyCFunctionFast PyCFunctionFast - # define __Pyx_PyCFunctionFastWithKeywords PyCFunctionFastWithKeywords - #else - # define __Pyx_PyCFunctionFast _PyCFunctionFast - # define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords - #endif -#endif -#if CYTHON_METH_FASTCALL - #define __Pyx_METH_FASTCALL METH_FASTCALL - #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast - #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords -#else - #define __Pyx_METH_FASTCALL METH_VARARGS - #define __Pyx_PyCFunction_FastCall PyCFunction - #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords -#endif -#if CYTHON_VECTORCALL - #define __pyx_vectorcallfunc vectorcallfunc - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET - #define __Pyx_PyVectorcall_NARGS(n) PyVectorcall_NARGS((size_t)(n)) -#elif CYTHON_BACKPORT_VECTORCALL - typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args, - size_t nargsf, PyObject *kwnames); - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1)) - #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)) -#else - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET 0 - #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(n)) -#endif -#if PY_MAJOR_VERSION >= 0x030900B1 -#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_CheckExact(func) -#else -#define __Pyx_PyCFunction_CheckExact(func) PyCFunction_Check(func) -#endif -#define __Pyx_CyOrPyCFunction_Check(func) PyCFunction_Check(func) -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) (((PyCFunctionObject*)(func))->m_ml->ml_meth) -#elif !CYTHON_COMPILING_IN_LIMITED_API -#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func) PyCFunction_GET_FUNCTION(func) -#endif -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_CyOrPyCFunction_GET_FLAGS(func) (((PyCFunctionObject*)(func))->m_ml->ml_flags) -static CYTHON_INLINE PyObject* __Pyx_CyOrPyCFunction_GET_SELF(PyObject *func) { - return (__Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_STATIC) ? NULL : ((PyCFunctionObject*)func)->m_self; -} -#endif -static CYTHON_INLINE int __Pyx__IsSameCFunction(PyObject *func, void *cfunc) { -#if CYTHON_COMPILING_IN_LIMITED_API - return PyCFunction_Check(func) && PyCFunction_GetFunction(func) == (PyCFunction) cfunc; -#else - return PyCFunction_Check(func) && PyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; -#endif -} -#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCFunction(func, cfunc) -#if __PYX_LIMITED_VERSION_HEX < 0x030900B1 - #define __Pyx_PyType_FromModuleAndSpec(m, s, b) ((void)m, PyType_FromSpecWithBases(s, b)) - typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *); -#else - #define __Pyx_PyType_FromModuleAndSpec(m, s, b) PyType_FromModuleAndSpec(m, s, b) - #define __Pyx_PyCMethod PyCMethod -#endif -#ifndef METH_METHOD - #define METH_METHOD 0x200 -#endif -#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) - #define PyObject_Malloc(s) PyMem_Malloc(s) - #define PyObject_Free(p) PyMem_Free(p) - #define PyObject_Realloc(p) PyMem_Realloc(p) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) - #define __Pyx_PyFrame_SetLineNumber(frame, lineno) -#else - #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) - #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyThreadState_Current PyThreadState_Get() -#elif !CYTHON_FAST_THREAD_STATE - #define __Pyx_PyThreadState_Current PyThreadState_GET() -#elif PY_VERSION_HEX >= 0x030d00A1 - #define __Pyx_PyThreadState_Current PyThreadState_GetUnchecked() -#elif PY_VERSION_HEX >= 0x03060000 - #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() -#elif PY_VERSION_HEX >= 0x03000000 - #define __Pyx_PyThreadState_Current PyThreadState_GET() -#else - #define __Pyx_PyThreadState_Current _PyThreadState_Current -#endif -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op) -{ - void *result; - result = PyModule_GetState(op); - if (!result) - Py_FatalError("Couldn't find the module state"); - return result; -} -#endif -#define __Pyx_PyObject_GetSlot(obj, name, func_ctype) __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype) -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((func_ctype) PyType_GetSlot((type), Py_##name)) -#else - #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((type)->name) -#endif -#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT) -#include "pythread.h" -#define Py_tss_NEEDS_INIT 0 -typedef int Py_tss_t; -static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) { - *key = PyThread_create_key(); - return 0; -} -static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) { - Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t)); - *key = Py_tss_NEEDS_INIT; - return key; -} -static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) { - PyObject_Free(key); -} -static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) { - return *key != Py_tss_NEEDS_INIT; -} -static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) { - PyThread_delete_key(*key); - *key = Py_tss_NEEDS_INIT; -} -static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) { - return PyThread_set_key_value(*key, value); -} -static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { - return PyThread_get_key_value(*key); -} -#endif -#if PY_MAJOR_VERSION < 3 - #if CYTHON_COMPILING_IN_PYPY - #if PYPY_VERSION_NUM < 0x07030600 - #if defined(__cplusplus) && __cplusplus >= 201402L - [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]] - #elif defined(__GNUC__) || defined(__clang__) - __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))) - #elif defined(_MSC_VER) - __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")) - #endif - static CYTHON_INLINE int PyGILState_Check(void) { - return 0; - } - #else // PYPY_VERSION_NUM < 0x07030600 - #endif // PYPY_VERSION_NUM < 0x07030600 - #else - static CYTHON_INLINE int PyGILState_Check(void) { - PyThreadState * tstate = _PyThreadState_Current; - return tstate && (tstate == PyGILState_GetThisThreadState()); - } - #endif -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 || defined(_PyDict_NewPresized) -#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) -#else -#define __Pyx_PyDict_NewPresized(n) PyDict_New() -#endif -#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION - #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) -#else - #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && PY_VERSION_HEX < 0x030d0000 && CYTHON_USE_UNICODE_INTERNALS -#define __Pyx_PyDict_GetItemStrWithError(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash) -static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) { - PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name); - if (res == NULL) PyErr_Clear(); - return res; -} -#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000) -#define __Pyx_PyDict_GetItemStrWithError PyDict_GetItemWithError -#define __Pyx_PyDict_GetItemStr PyDict_GetItem -#else -static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) { -#if CYTHON_COMPILING_IN_PYPY - return PyDict_GetItem(dict, name); -#else - PyDictEntry *ep; - PyDictObject *mp = (PyDictObject*) dict; - long hash = ((PyStringObject *) name)->ob_shash; - assert(hash != -1); - ep = (mp->ma_lookup)(mp, name, hash); - if (ep == NULL) { - return NULL; - } - return ep->me_value; -#endif -} -#define __Pyx_PyDict_GetItemStr PyDict_GetItem -#endif -#if CYTHON_USE_TYPE_SLOTS - #define __Pyx_PyType_GetFlags(tp) (((PyTypeObject *)tp)->tp_flags) - #define __Pyx_PyType_HasFeature(type, feature) ((__Pyx_PyType_GetFlags(type) & (feature)) != 0) - #define __Pyx_PyObject_GetIterNextFunc(obj) (Py_TYPE(obj)->tp_iternext) -#else - #define __Pyx_PyType_GetFlags(tp) (PyType_GetFlags((PyTypeObject *)tp)) - #define __Pyx_PyType_HasFeature(type, feature) PyType_HasFeature(type, feature) - #define __Pyx_PyObject_GetIterNextFunc(obj) PyIter_Next -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v) -#else - #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v) -#endif -#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000 -#define __Pyx_PyHeapTypeObject_GC_Del(obj) {\ - PyTypeObject *type = Py_TYPE((PyObject*)obj);\ - assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\ - PyObject_GC_Del(obj);\ - Py_DECREF(type);\ -} -#else -#define __Pyx_PyHeapTypeObject_GC_Del(obj) PyObject_GC_Del(obj) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define CYTHON_PEP393_ENABLED 1 - #define __Pyx_PyUnicode_READY(op) (0) - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GetLength(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((void)u, 1114111U) - #define __Pyx_PyUnicode_KIND(u) ((void)u, (0)) - #define __Pyx_PyUnicode_DATA(u) ((void*)u) - #define __Pyx_PyUnicode_READ(k, d, i) ((void)k, PyUnicode_ReadChar((PyObject*)(d), i)) - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GetLength(u)) -#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) - #define CYTHON_PEP393_ENABLED 1 - #if PY_VERSION_HEX >= 0x030C0000 - #define __Pyx_PyUnicode_READY(op) (0) - #else - #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ - 0 : _PyUnicode_Ready((PyObject *)(op))) - #endif - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) - #define __Pyx_PyUnicode_KIND(u) ((int)PyUnicode_KIND(u)) - #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) - #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) - #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, (Py_UCS4) ch) - #if PY_VERSION_HEX >= 0x030C0000 - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) - #else - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) - #else - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) - #endif - #endif -#else - #define CYTHON_PEP393_ENABLED 0 - #define PyUnicode_1BYTE_KIND 1 - #define PyUnicode_2BYTE_KIND 2 - #define PyUnicode_4BYTE_KIND 4 - #define __Pyx_PyUnicode_READY(op) (0) - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U) - #define __Pyx_PyUnicode_KIND(u) ((int)sizeof(Py_UNICODE)) - #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) - #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) - #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch) - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) -#endif -#if CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) - #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) -#else - #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) - #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ - PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) -#endif -#if CYTHON_COMPILING_IN_PYPY - #if !defined(PyUnicode_DecodeUnicodeEscape) - #define PyUnicode_DecodeUnicodeEscape(s, size, errors) PyUnicode_Decode(s, size, "unicode_escape", errors) - #endif - #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500) - #undef PyUnicode_Contains - #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) - #endif - #if !defined(PyByteArray_Check) - #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) - #endif - #if !defined(PyObject_Format) - #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) - #endif -#endif -#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) -#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) -#if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) -#else - #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) -#endif -#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) - #define PyObject_ASCII(o) PyObject_Repr(o) -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyBaseString_Type PyUnicode_Type - #define PyStringObject PyUnicodeObject - #define PyString_Type PyUnicode_Type - #define PyString_Check PyUnicode_Check - #define PyString_CheckExact PyUnicode_CheckExact -#ifndef PyObject_Unicode - #define PyObject_Unicode PyObject_Str -#endif -#endif -#if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) - #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) -#else - #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) - #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) -#endif -#if CYTHON_COMPILING_IN_CPYTHON - #define __Pyx_PySequence_ListKeepNew(obj)\ - (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj)) -#else - #define __Pyx_PySequence_ListKeepNew(obj) PySequence_List(obj) -#endif -#ifndef PySet_CheckExact - #define PySet_CheckExact(obj) __Pyx_IS_TYPE(obj, &PySet_Type) -#endif -#if PY_VERSION_HEX >= 0x030900A4 - #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt) - #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size) -#else - #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt) - #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size) -#endif -#if CYTHON_ASSUME_SAFE_MACROS - #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i) - #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq) - #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0)) - #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0)) - #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o) - #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o) - #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o) - #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o) - #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o) -#else - #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i) - #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq) - #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v) - #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v) - #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o) - #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o) - #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o) - #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o) - #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o) -#endif -#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 - #define __Pyx_PyImport_AddModuleRef(name) PyImport_AddModuleRef(name) -#else - static CYTHON_INLINE PyObject *__Pyx_PyImport_AddModuleRef(const char *name) { - PyObject *module = PyImport_AddModule(name); - Py_XINCREF(module); - return module; - } -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyIntObject PyLongObject - #define PyInt_Type PyLong_Type - #define PyInt_Check(op) PyLong_Check(op) - #define PyInt_CheckExact(op) PyLong_CheckExact(op) - #define __Pyx_Py3Int_Check(op) PyLong_Check(op) - #define __Pyx_Py3Int_CheckExact(op) PyLong_CheckExact(op) - #define PyInt_FromString PyLong_FromString - #define PyInt_FromUnicode PyLong_FromUnicode - #define PyInt_FromLong PyLong_FromLong - #define PyInt_FromSize_t PyLong_FromSize_t - #define PyInt_FromSsize_t PyLong_FromSsize_t - #define PyInt_AsLong PyLong_AsLong - #define PyInt_AS_LONG PyLong_AS_LONG - #define PyInt_AsSsize_t PyLong_AsSsize_t - #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask - #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask - #define PyNumber_Int PyNumber_Long -#else - #define __Pyx_Py3Int_Check(op) (PyLong_Check(op) || PyInt_Check(op)) - #define __Pyx_Py3Int_CheckExact(op) (PyLong_CheckExact(op) || PyInt_CheckExact(op)) -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyBoolObject PyLongObject -#endif -#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY - #ifndef PyUnicode_InternFromString - #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) - #endif -#endif -#if PY_VERSION_HEX < 0x030200A4 - typedef long Py_hash_t; - #define __Pyx_PyInt_FromHash_t PyInt_FromLong - #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t -#else - #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t - #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t -#endif -#if CYTHON_USE_ASYNC_SLOTS - #if PY_VERSION_HEX >= 0x030500B1 - #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods - #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) - #else - #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) - #endif -#else - #define __Pyx_PyType_AsAsync(obj) NULL -#endif -#ifndef __Pyx_PyAsyncMethodsStruct - typedef struct { - unaryfunc am_await; - unaryfunc am_aiter; - unaryfunc am_anext; - } __Pyx_PyAsyncMethodsStruct; -#endif - -#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) - #if !defined(_USE_MATH_DEFINES) - #define _USE_MATH_DEFINES - #endif -#endif -#include -#ifdef NAN -#define __PYX_NAN() ((float) NAN) -#else -static CYTHON_INLINE float __PYX_NAN() { - float value; - memset(&value, 0xFF, sizeof(value)); - return value; -} -#endif -#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) -#define __Pyx_truncl trunc -#else -#define __Pyx_truncl truncl -#endif - -#define __PYX_MARK_ERR_POS(f_index, lineno) \ - { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; } -#define __PYX_ERR(f_index, lineno, Ln_error) \ - { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; } - -#ifdef CYTHON_EXTERN_C - #undef __PYX_EXTERN_C - #define __PYX_EXTERN_C CYTHON_EXTERN_C -#elif defined(__PYX_EXTERN_C) - #ifdef _MSC_VER - #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") - #else - #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. - #endif -#else - #ifdef __cplusplus - #define __PYX_EXTERN_C extern "C" - #else - #define __PYX_EXTERN_C extern - #endif -#endif - -#define __PYX_HAVE__jcvi__formats__cblast -#define __PYX_HAVE_API__jcvi__formats__cblast -/* Early includes */ -#include -#include -#ifdef _OPENMP -#include -#endif /* _OPENMP */ - -#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) -#define CYTHON_WITHOUT_ASSERTIONS -#endif - -typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; - const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; - -#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 -#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0 -#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8) -#define __PYX_DEFAULT_STRING_ENCODING "" -#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString -#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize -#define __Pyx_uchar_cast(c) ((unsigned char)c) -#define __Pyx_long_cast(x) ((long)x) -#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ - (sizeof(type) < sizeof(Py_ssize_t)) ||\ - (sizeof(type) > sizeof(Py_ssize_t) &&\ - likely(v < (type)PY_SSIZE_T_MAX ||\ - v == (type)PY_SSIZE_T_MAX) &&\ - (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ - v == (type)PY_SSIZE_T_MIN))) ||\ - (sizeof(type) == sizeof(Py_ssize_t) &&\ - (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ - v == (type)PY_SSIZE_T_MAX))) ) -static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) { - return (size_t) i < (size_t) limit; -} -#if defined (__cplusplus) && __cplusplus >= 201103L - #include - #define __Pyx_sst_abs(value) std::abs(value) -#elif SIZEOF_INT >= SIZEOF_SIZE_T - #define __Pyx_sst_abs(value) abs(value) -#elif SIZEOF_LONG >= SIZEOF_SIZE_T - #define __Pyx_sst_abs(value) labs(value) -#elif defined (_MSC_VER) - #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) -#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define __Pyx_sst_abs(value) llabs(value) -#elif defined (__GNUC__) - #define __Pyx_sst_abs(value) __builtin_llabs(value) -#else - #define __Pyx_sst_abs(value) ((value<0) ? -value : value) -#endif -static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s); -static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); -static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); -static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char*); -#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) -#define __Pyx_PyBytes_FromString PyBytes_FromString -#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); -#if PY_MAJOR_VERSION < 3 - #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString - #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize -#else - #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString - #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize -#endif -#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyObject_AsWritableString(s) ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsWritableSString(s) ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) -#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) -#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) -#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) -#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) -#define __Pyx_PyUnicode_FromOrdinal(o) PyUnicode_FromOrdinal((int)o) -#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode -#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) -#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) -static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b); -static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); -static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*); -static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); -#define __Pyx_PySequence_Tuple(obj)\ - (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) -static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); -static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); -static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); -#if CYTHON_ASSUME_SAFE_MACROS -#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) -#else -#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) -#endif -#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) -#if PY_MAJOR_VERSION >= 3 -#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) -#else -#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) -#endif -#if CYTHON_USE_PYLONG_INTERNALS - #if PY_VERSION_HEX >= 0x030C00A7 - #ifndef _PyLong_SIGN_MASK - #define _PyLong_SIGN_MASK 3 - #endif - #ifndef _PyLong_NON_SIZE_BITS - #define _PyLong_NON_SIZE_BITS 3 - #endif - #define __Pyx_PyLong_Sign(x) (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK) - #define __Pyx_PyLong_IsNeg(x) ((__Pyx_PyLong_Sign(x) & 2) != 0) - #define __Pyx_PyLong_IsNonNeg(x) (!__Pyx_PyLong_IsNeg(x)) - #define __Pyx_PyLong_IsZero(x) (__Pyx_PyLong_Sign(x) & 1) - #define __Pyx_PyLong_IsPos(x) (__Pyx_PyLong_Sign(x) == 0) - #define __Pyx_PyLong_CompactValueUnsigned(x) (__Pyx_PyLong_Digits(x)[0]) - #define __Pyx_PyLong_DigitCount(x) ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS)) - #define __Pyx_PyLong_SignedDigitCount(x)\ - ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x)) - #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue) - #define __Pyx_PyLong_IsCompact(x) PyUnstable_Long_IsCompact((PyLongObject*) x) - #define __Pyx_PyLong_CompactValue(x) PyUnstable_Long_CompactValue((PyLongObject*) x) - #else - #define __Pyx_PyLong_IsCompact(x) (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS)) - #define __Pyx_PyLong_CompactValue(x) ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0]) - #endif - typedef Py_ssize_t __Pyx_compact_pylong; - typedef size_t __Pyx_compact_upylong; - #else - #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0) - #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0) - #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0) - #define __Pyx_PyLong_IsPos(x) (Py_SIZE(x) > 0) - #define __Pyx_PyLong_CompactValueUnsigned(x) ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0]) - #define __Pyx_PyLong_DigitCount(x) __Pyx_sst_abs(Py_SIZE(x)) - #define __Pyx_PyLong_SignedDigitCount(x) Py_SIZE(x) - #define __Pyx_PyLong_IsCompact(x) (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1) - #define __Pyx_PyLong_CompactValue(x)\ - ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0])) - typedef sdigit __Pyx_compact_pylong; - typedef digit __Pyx_compact_upylong; - #endif - #if PY_VERSION_HEX >= 0x030C00A5 - #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit) - #else - #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->ob_digit) - #endif -#endif -#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII -#include -static int __Pyx_sys_getdefaultencoding_not_ascii; -static int __Pyx_init_sys_getdefaultencoding_params(void) { - PyObject* sys; - PyObject* default_encoding = NULL; - PyObject* ascii_chars_u = NULL; - PyObject* ascii_chars_b = NULL; - const char* default_encoding_c; - sys = PyImport_ImportModule("sys"); - if (!sys) goto bad; - default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); - Py_DECREF(sys); - if (!default_encoding) goto bad; - default_encoding_c = PyBytes_AsString(default_encoding); - if (!default_encoding_c) goto bad; - if (strcmp(default_encoding_c, "ascii") == 0) { - __Pyx_sys_getdefaultencoding_not_ascii = 0; - } else { - char ascii_chars[128]; - int c; - for (c = 0; c < 128; c++) { - ascii_chars[c] = (char) c; - } - __Pyx_sys_getdefaultencoding_not_ascii = 1; - ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); - if (!ascii_chars_u) goto bad; - ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); - if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { - PyErr_Format( - PyExc_ValueError, - "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", - default_encoding_c); - goto bad; - } - Py_DECREF(ascii_chars_u); - Py_DECREF(ascii_chars_b); - } - Py_DECREF(default_encoding); - return 0; -bad: - Py_XDECREF(default_encoding); - Py_XDECREF(ascii_chars_u); - Py_XDECREF(ascii_chars_b); - return -1; -} -#endif -#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 -#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) -#else -#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) -#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT -#include -static char* __PYX_DEFAULT_STRING_ENCODING; -static int __Pyx_init_sys_getdefaultencoding_params(void) { - PyObject* sys; - PyObject* default_encoding = NULL; - char* default_encoding_c; - sys = PyImport_ImportModule("sys"); - if (!sys) goto bad; - default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); - Py_DECREF(sys); - if (!default_encoding) goto bad; - default_encoding_c = PyBytes_AsString(default_encoding); - if (!default_encoding_c) goto bad; - __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1); - if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; - strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); - Py_DECREF(default_encoding); - return 0; -bad: - Py_XDECREF(default_encoding); - return -1; -} -#endif -#endif - - -/* Test for GCC > 2.95 */ -#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) - #define likely(x) __builtin_expect(!!(x), 1) - #define unlikely(x) __builtin_expect(!!(x), 0) -#else /* !__GNUC__ or GCC < 2.95 */ - #define likely(x) (x) - #define unlikely(x) (x) -#endif /* __GNUC__ */ -static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } - -#if !CYTHON_USE_MODULE_STATE -static PyObject *__pyx_m = NULL; -#endif -static int __pyx_lineno; -static int __pyx_clineno = 0; -static const char * __pyx_cfilenm = __FILE__; -static const char *__pyx_filename; - -/* #### Code section: filename_table ### */ - -static const char *__pyx_f[] = { - "src/jcvi/formats/cblast.pyx", - "", -}; -/* #### Code section: utility_code_proto_before_types ### */ -/* ForceInitThreads.proto */ -#ifndef __PYX_FORCE_INIT_THREADS - #define __PYX_FORCE_INIT_THREADS 0 -#endif - -/* #### Code section: numeric_typedefs ### */ -/* #### Code section: complex_type_declarations ### */ -/* #### Code section: type_declarations ### */ - -/*--- Type declarations ---*/ -struct __pyx_obj_4jcvi_7formats_6cblast_Blast; -struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine; -struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; -struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; - -/* "jcvi/formats/cblast.pyx":21 - * - * - * cdef class Blast: # <<<<<<<<<<<<<< - * cdef: - * FILE* fh - */ -struct __pyx_obj_4jcvi_7formats_6cblast_Blast { - PyObject_HEAD - FILE *fh; - PyObject *filename; -}; - - -/* "jcvi/formats/cblast.pyx":66 - * - * - * cdef class BlastLine: # <<<<<<<<<<<<<< - * """ - * Given a string of tab-delimited (-m 8) blast output, parse it and create - */ -struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine { - PyObject_HEAD - char _query[0x80]; - char _subject[0x80]; - int hitlen; - int nmismatch; - int ngaps; - int qstart; - int qstop; - int sstart; - int sstop; - float pctid; - float score; - double evalue; - PyObject *qseqid; - PyObject *sseqid; - int qi; - int si; - char orientation; -}; - - -/* "jcvi/formats/cblast.pyx":172 - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< - * return BlastLine(b) - * - */ -struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr { - PyObject_HEAD - PyObject *__pyx_genexpr_arg_0; - PyObject *__pyx_v_x; - PyObject *__pyx_t_0; - Py_ssize_t __pyx_t_1; -}; - - -/* "cfunc.to_py":66 - * - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - */ -struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc { - PyObject_HEAD - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*__pyx_v_f)(char *, char *, float, int, int, int, int, int, int, int, float, float); -}; - -/* #### Code section: utility_code_proto ### */ - -/* --- Runtime support code (head) --- */ -/* Refnanny.proto */ -#ifndef CYTHON_REFNANNY - #define CYTHON_REFNANNY 0 -#endif -#if CYTHON_REFNANNY - typedef struct { - void (*INCREF)(void*, PyObject*, Py_ssize_t); - void (*DECREF)(void*, PyObject*, Py_ssize_t); - void (*GOTREF)(void*, PyObject*, Py_ssize_t); - void (*GIVEREF)(void*, PyObject*, Py_ssize_t); - void* (*SetupContext)(const char*, Py_ssize_t, const char*); - void (*FinishContext)(void**); - } __Pyx_RefNannyAPIStruct; - static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; - static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); - #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; -#ifdef WITH_THREAD - #define __Pyx_RefNannySetupContext(name, acquire_gil)\ - if (acquire_gil) {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ - PyGILState_Release(__pyx_gilstate_save);\ - } else {\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ - } - #define __Pyx_RefNannyFinishContextNogil() {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __Pyx_RefNannyFinishContext();\ - PyGILState_Release(__pyx_gilstate_save);\ - } -#else - #define __Pyx_RefNannySetupContext(name, acquire_gil)\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__)) - #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext() -#endif - #define __Pyx_RefNannyFinishContextNogil() {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __Pyx_RefNannyFinishContext();\ - PyGILState_Release(__pyx_gilstate_save);\ - } - #define __Pyx_RefNannyFinishContext()\ - __Pyx_RefNanny->FinishContext(&__pyx_refnanny) - #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_XINCREF(r) do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0) - #define __Pyx_XDECREF(r) do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0) - #define __Pyx_XGOTREF(r) do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0) - #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0) -#else - #define __Pyx_RefNannyDeclarations - #define __Pyx_RefNannySetupContext(name, acquire_gil) - #define __Pyx_RefNannyFinishContextNogil() - #define __Pyx_RefNannyFinishContext() - #define __Pyx_INCREF(r) Py_INCREF(r) - #define __Pyx_DECREF(r) Py_DECREF(r) - #define __Pyx_GOTREF(r) - #define __Pyx_GIVEREF(r) - #define __Pyx_XINCREF(r) Py_XINCREF(r) - #define __Pyx_XDECREF(r) Py_XDECREF(r) - #define __Pyx_XGOTREF(r) - #define __Pyx_XGIVEREF(r) -#endif -#define __Pyx_Py_XDECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; Py_XDECREF(tmp);\ - } while (0) -#define __Pyx_XDECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; __Pyx_XDECREF(tmp);\ - } while (0) -#define __Pyx_DECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; __Pyx_DECREF(tmp);\ - } while (0) -#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) -#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) - -/* PyErrExceptionMatches.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) -static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); -#else -#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) -#endif - -/* PyThreadStateGet.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; -#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; -#if PY_VERSION_HEX >= 0x030C00A6 -#define __Pyx_PyErr_Occurred() (__pyx_tstate->current_exception != NULL) -#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL) -#else -#define __Pyx_PyErr_Occurred() (__pyx_tstate->curexc_type != NULL) -#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->curexc_type) -#endif -#else -#define __Pyx_PyThreadState_declare -#define __Pyx_PyThreadState_assign -#define __Pyx_PyErr_Occurred() (PyErr_Occurred() != NULL) -#define __Pyx_PyErr_CurrentExceptionType() PyErr_Occurred() -#endif - -/* PyErrFetchRestore.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) -#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) -#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) -#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) -#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); -static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6 -#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) -#else -#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) -#endif -#else -#define __Pyx_PyErr_Clear() PyErr_Clear() -#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) -#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) -#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) -#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) -#endif - -/* PyObjectGetAttrStr.proto */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name); -#else -#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) -#endif - -/* PyObjectGetAttrStrNoError.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name); - -/* GetBuiltinName.proto */ -static PyObject *__Pyx_GetBuiltinName(PyObject *name); - -/* TupleAndListFromArray.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n); -static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n); -#endif - -/* IncludeStringH.proto */ -#include - -/* BytesEquals.proto */ -static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); - -/* UnicodeEquals.proto */ -static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); - -/* fastcall.proto */ -#if CYTHON_AVOID_BORROWED_REFS - #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i) -#elif CYTHON_ASSUME_SAFE_MACROS - #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i) -#else - #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i) -#endif -#if CYTHON_AVOID_BORROWED_REFS - #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg) - #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg) -#else - #define __Pyx_Arg_NewRef_VARARGS(arg) arg - #define __Pyx_Arg_XDECREF_VARARGS(arg) -#endif -#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds) -#define __Pyx_KwValues_VARARGS(args, nargs) NULL -#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s) -#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw) -#if CYTHON_METH_FASTCALL - #define __Pyx_Arg_FASTCALL(args, i) args[i] - #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds) - #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs)) - static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s); -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 - CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues); - #else - #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw) - #endif - #define __Pyx_Arg_NewRef_FASTCALL(arg) arg /* no-op, __Pyx_Arg_FASTCALL is direct and this needs - to have the same reference counting */ - #define __Pyx_Arg_XDECREF_FASTCALL(arg) -#else - #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS - #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS - #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS - #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS - #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS - #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg) - #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg) -#endif -#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS -#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start) -#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start) -#else -#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop) -#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop) -#endif - -/* RaiseArgTupleInvalid.proto */ -static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, - Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); - -/* RaiseDoubleKeywords.proto */ -static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); - -/* ParseKeywords.proto */ -static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues, - PyObject **argnames[], - PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, - const char* function_name); - -/* IncludeStructmemberH.proto */ -#include - -/* FixUpExtensionType.proto */ -#if CYTHON_USE_TYPE_SPECS -static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type); -#endif - -/* FetchSharedCythonModule.proto */ -static PyObject *__Pyx_FetchSharedCythonABIModule(void); - -/* FetchCommonType.proto */ -#if !CYTHON_USE_TYPE_SPECS -static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type); -#else -static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases); -#endif - -/* PyMethodNew.proto */ -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { - PyObject *typesModule=NULL, *methodType=NULL, *result=NULL; - CYTHON_UNUSED_VAR(typ); - if (!self) - return __Pyx_NewRef(func); - typesModule = PyImport_ImportModule("types"); - if (!typesModule) return NULL; - methodType = PyObject_GetAttrString(typesModule, "MethodType"); - Py_DECREF(typesModule); - if (!methodType) return NULL; - result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL); - Py_DECREF(methodType); - return result; -} -#elif PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { - CYTHON_UNUSED_VAR(typ); - if (!self) - return __Pyx_NewRef(func); - return PyMethod_New(func, self); -} -#else - #define __Pyx_PyMethod_New PyMethod_New -#endif - -/* PyVectorcallFastCallDict.proto */ -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw); -#endif - -/* CythonFunctionShared.proto */ -#define __Pyx_CyFunction_USED -#define __Pyx_CYFUNCTION_STATICMETHOD 0x01 -#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02 -#define __Pyx_CYFUNCTION_CCLASS 0x04 -#define __Pyx_CYFUNCTION_COROUTINE 0x08 -#define __Pyx_CyFunction_GetClosure(f)\ - (((__pyx_CyFunctionObject *) (f))->func_closure) -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_CyFunction_GetClassObj(f)\ - (((__pyx_CyFunctionObject *) (f))->func_classobj) -#else - #define __Pyx_CyFunction_GetClassObj(f)\ - ((PyObject*) ((PyCMethodObject *) (f))->mm_class) -#endif -#define __Pyx_CyFunction_SetClassObj(f, classobj)\ - __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj)) -#define __Pyx_CyFunction_Defaults(type, f)\ - ((type *)(((__pyx_CyFunctionObject *) (f))->defaults)) -#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\ - ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g) -typedef struct { -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject_HEAD - PyObject *func; -#elif PY_VERSION_HEX < 0x030900B1 - PyCFunctionObject func; -#else - PyCMethodObject func; -#endif -#if CYTHON_BACKPORT_VECTORCALL - __pyx_vectorcallfunc func_vectorcall; -#endif -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API - PyObject *func_weakreflist; -#endif - PyObject *func_dict; - PyObject *func_name; - PyObject *func_qualname; - PyObject *func_doc; - PyObject *func_globals; - PyObject *func_code; - PyObject *func_closure; -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - PyObject *func_classobj; -#endif - void *defaults; - int defaults_pyobjects; - size_t defaults_size; - int flags; - PyObject *defaults_tuple; - PyObject *defaults_kwdict; - PyObject *(*defaults_getter)(PyObject *); - PyObject *func_annotations; - PyObject *func_is_coroutine; -} __pyx_CyFunctionObject; -#undef __Pyx_CyOrPyCFunction_Check -#define __Pyx_CyFunction_Check(obj) __Pyx_TypeCheck(obj, __pyx_CyFunctionType) -#define __Pyx_CyOrPyCFunction_Check(obj) __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type) -#define __Pyx_CyFunction_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_CyFunctionType) -static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc); -#undef __Pyx_IsSameCFunction -#define __Pyx_IsSameCFunction(func, cfunc) __Pyx__IsSameCyOrCFunction(func, cfunc) -static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml, - int flags, PyObject* qualname, - PyObject *closure, - PyObject *module, PyObject *globals, - PyObject* code); -static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj); -static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m, - size_t size, - int pyobjects); -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m, - PyObject *tuple); -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m, - PyObject *dict); -static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, - PyObject *dict); -static int __pyx_CyFunction_init(PyObject *module); -#if CYTHON_METH_FASTCALL -static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -#if CYTHON_BACKPORT_VECTORCALL -#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall) -#else -#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall) -#endif -#endif - -/* CythonFunction.proto */ -static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, - int flags, PyObject* qualname, - PyObject *closure, - PyObject *module, PyObject *globals, - PyObject* code); - -/* GetTopmostException.proto */ -#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE -static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate); -#endif - -/* SaveResetException.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); -#else -#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb) -#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb) -#endif - -/* FastTypeChecks.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) -#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2) -static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); -static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b); -static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); -static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); -#else -#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) -#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2)) -#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) -#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) -#endif -#define __Pyx_PyErr_ExceptionMatches2(err1, err2) __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2) -#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) - -/* KeywordStringCheck.proto */ -static int __Pyx_CheckKeywordStrings(PyObject *kw, const char* function_name, int kw_allowed); - -/* RaiseException.proto */ -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); - -/* PyObjectCall.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); -#else -#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) -#endif - -/* UnpackUnboundCMethod.proto */ -typedef struct { - PyObject *type; - PyObject **method_name; - PyCFunction func; - PyObject *method; - int flag; -} __Pyx_CachedCFunction; - -/* CallUnboundCMethod1.proto */ -static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg); -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg); -#else -#define __Pyx_CallUnboundCMethod1(cfunc, self, arg) __Pyx__CallUnboundCMethod1(cfunc, self, arg) -#endif - -/* RaiseUnexpectedTypeError.proto */ -static int __Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj); - -/* decode_c_string_utf16.proto */ -static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, const char *errors) { - int byteorder = 0; - return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); -} -static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16LE(const char *s, Py_ssize_t size, const char *errors) { - int byteorder = -1; - return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); -} -static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16BE(const char *s, Py_ssize_t size, const char *errors) { - int byteorder = 1; - return PyUnicode_DecodeUTF16(s, size, errors, &byteorder); -} - -/* decode_c_bytes.proto */ -static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes( - const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop, - const char* encoding, const char* errors, - PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)); - -/* decode_bytes.proto */ -static CYTHON_INLINE PyObject* __Pyx_decode_bytes( - PyObject* string, Py_ssize_t start, Py_ssize_t stop, - const char* encoding, const char* errors, - PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { - char* as_c_string; - Py_ssize_t size; -#if CYTHON_ASSUME_SAFE_MACROS - as_c_string = PyBytes_AS_STRING(string); - size = PyBytes_GET_SIZE(string); -#else - if (PyBytes_AsStringAndSize(string, &as_c_string, &size) < 0) { - return NULL; - } -#endif - return __Pyx_decode_c_bytes( - as_c_string, size, - start, stop, encoding, errors, decode_func); -} - -/* ArgTypeTest.proto */ -#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ - ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\ - __Pyx__ArgTypeTest(obj, type, name, exact)) -static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); - -/* PyFunctionFastCall.proto */ -#if CYTHON_FAST_PYCALL -#if !CYTHON_VECTORCALL -#define __Pyx_PyFunction_FastCall(func, args, nargs)\ - __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) -static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs); -#endif -#define __Pyx_BUILD_ASSERT_EXPR(cond)\ - (sizeof(char [1 - 2*!(cond)]) - 1) -#ifndef Py_MEMBER_SIZE -#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) -#endif -#if !CYTHON_VECTORCALL -#if PY_VERSION_HEX >= 0x03080000 - #include "frameobject.h" -#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif - #define __Pxy_PyFrame_Initialize_Offsets() - #define __Pyx_PyFrame_GetLocalsplus(frame) ((frame)->f_localsplus) -#else - static size_t __pyx_pyframe_localsplus_offset = 0; - #include "frameobject.h" - #define __Pxy_PyFrame_Initialize_Offsets()\ - ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\ - (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus))) - #define __Pyx_PyFrame_GetLocalsplus(frame)\ - (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset)) -#endif -#endif -#endif - -/* PyObjectCallMethO.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); -#endif - -/* PyObjectFastCall.proto */ -#define __Pyx_PyObject_FastCall(func, args, nargs) __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL) -static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs); - -/* PyObjectCallOneArg.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); - -/* SliceObject.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice( - PyObject* obj, Py_ssize_t cstart, Py_ssize_t cstop, - PyObject** py_start, PyObject** py_stop, PyObject** py_slice, - int has_cstart, int has_cstop, int wraparound); - -/* ListCompAppend.proto */ -#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS -static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { - PyListObject* L = (PyListObject*) list; - Py_ssize_t len = Py_SIZE(list); - if (likely(L->allocated > len)) { - Py_INCREF(x); - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 - L->ob_item[len] = x; - #else - PyList_SET_ITEM(list, len, x); - #endif - __Pyx_SET_SIZE(list, len + 1); - return 0; - } - return PyList_Append(list, x); -} -#else -#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) -#endif - -/* GetAttr.proto */ -static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *, PyObject *); - -/* SetItemInt.proto */ -#define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) :\ - (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) :\ - __Pyx_SetItemInt_Generic(o, to_py_func(i), v))) -static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v); -static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, - int is_list, int wraparound, int boundscheck); - -/* HasAttr.proto */ -static CYTHON_INLINE int __Pyx_HasAttr(PyObject *, PyObject *); - -/* RaiseUnboundLocalError.proto */ -static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname); - -/* PyObject_Str.proto */ -#define __Pyx_PyObject_Str(obj)\ - (likely(PyString_CheckExact(obj)) ? __Pyx_NewRef(obj) : PyObject_Str(obj)) - -/* SliceObject.proto */ -#define __Pyx_PyObject_DelSlice(obj, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound)\ - __Pyx_PyObject_SetSlice(obj, (PyObject*)NULL, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound) -static CYTHON_INLINE int __Pyx_PyObject_SetSlice( - PyObject* obj, PyObject* value, Py_ssize_t cstart, Py_ssize_t cstop, - PyObject** py_start, PyObject** py_stop, PyObject** py_slice, - int has_cstart, int has_cstop, int wraparound); - -/* PyObjectCall2Args.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2); - -/* PyObjectGetMethod.proto */ -static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method); - -/* PyObjectCallMethod1.proto */ -static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg); - -/* StringJoin.proto */ -#if PY_MAJOR_VERSION < 3 -#define __Pyx_PyString_Join __Pyx_PyBytes_Join -#define __Pyx_PyBaseString_Join(s, v) (PyUnicode_CheckExact(s) ? PyUnicode_Join(s, v) : __Pyx_PyBytes_Join(s, v)) -#else -#define __Pyx_PyString_Join PyUnicode_Join -#define __Pyx_PyBaseString_Join PyUnicode_Join -#endif -static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values); - -/* PyObjectSetAttrStr.proto */ -#if CYTHON_USE_TYPE_SLOTS -#define __Pyx_PyObject_DelAttrStr(o,n) __Pyx_PyObject_SetAttrStr(o, n, NULL) -static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value); -#else -#define __Pyx_PyObject_DelAttrStr(o,n) PyObject_DelAttr(o,n) -#define __Pyx_PyObject_SetAttrStr(o,n,v) PyObject_SetAttr(o,n,v) -#endif - -/* PyObjectCallNoArg.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); - -/* PyObjectCallMethod0.proto */ -static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name); - -/* ValidateBasesTuple.proto */ -#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS -static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases); -#endif - -/* PyType_Ready.proto */ -CYTHON_UNUSED static int __Pyx_PyType_Ready(PyTypeObject *t); - -/* PyObject_GenericGetAttrNoDict.proto */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name); -#else -#define __Pyx_PyObject_GenericGetAttrNoDict PyObject_GenericGetAttr -#endif - -/* PyObject_GenericGetAttr.proto */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name); -#else -#define __Pyx_PyObject_GenericGetAttr PyObject_GenericGetAttr -#endif - -/* SetupReduce.proto */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __Pyx_setup_reduce(PyObject* type_obj); -#endif - -/* Import.proto */ -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); - -/* ImportDottedModule.proto */ -static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple); -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple); -#endif - -/* ImportDottedModuleRelFirst.proto */ -static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple); - -/* PyDictVersioning.proto */ -#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS -#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1) -#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag) -#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\ - (version_var) = __PYX_GET_DICT_VERSION(dict);\ - (cache_var) = (value); -#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\ - static PY_UINT64_T __pyx_dict_version = 0;\ - static PyObject *__pyx_dict_cached_value = NULL;\ - if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\ - (VAR) = __pyx_dict_cached_value;\ - } else {\ - (VAR) = __pyx_dict_cached_value = (LOOKUP);\ - __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\ - }\ -} -static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj); -static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj); -static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version); -#else -#define __PYX_GET_DICT_VERSION(dict) (0) -#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var) -#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP); -#endif - -/* CLineInTraceback.proto */ -#ifdef CYTHON_CLINE_IN_TRACEBACK -#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) -#else -static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); -#endif - -/* CodeObjectCache.proto */ -#if !CYTHON_COMPILING_IN_LIMITED_API -typedef struct { - PyCodeObject* code_object; - int code_line; -} __Pyx_CodeObjectCacheEntry; -struct __Pyx_CodeObjectCache { - int count; - int max_count; - __Pyx_CodeObjectCacheEntry* entries; -}; -static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; -static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); -static PyCodeObject *__pyx_find_code_object(int code_line); -static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); -#endif - -/* AddTraceback.proto */ -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename); - -/* GCCDiagnostics.proto */ -#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) -#define __Pyx_HAS_GCC_DIAGNOSTIC -#endif - -/* CIntFromPy.proto */ -static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); - -/* CIntFromPy.proto */ -static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); - -/* CIntFromPy.proto */ -static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_char(char value); - -/* FormatTypeName.proto */ -#if CYTHON_COMPILING_IN_LIMITED_API -typedef PyObject *__Pyx_TypeName; -#define __Pyx_FMT_TYPENAME "%U" -static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp); -#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj) -#else -typedef const char *__Pyx_TypeName; -#define __Pyx_FMT_TYPENAME "%.200s" -#define __Pyx_PyType_GetName(tp) ((tp)->tp_name) -#define __Pyx_DECREF_TypeName(obj) -#endif - -/* CIntFromPy.proto */ -static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); - -/* SwapException.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_ExceptionSwap(type, value, tb) __Pyx__ExceptionSwap(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#else -static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb); -#endif - -/* CoroutineBase.proto */ -struct __pyx_CoroutineObject; -typedef PyObject *(*__pyx_coroutine_body_t)(struct __pyx_CoroutineObject *, PyThreadState *, PyObject *); -#if CYTHON_USE_EXC_INFO_STACK -#define __Pyx_ExcInfoStruct _PyErr_StackItem -#else -typedef struct { - PyObject *exc_type; - PyObject *exc_value; - PyObject *exc_traceback; -} __Pyx_ExcInfoStruct; -#endif -typedef struct __pyx_CoroutineObject { - PyObject_HEAD - __pyx_coroutine_body_t body; - PyObject *closure; - __Pyx_ExcInfoStruct gi_exc_state; - PyObject *gi_weakreflist; - PyObject *classobj; - PyObject *yieldfrom; - PyObject *gi_name; - PyObject *gi_qualname; - PyObject *gi_modulename; - PyObject *gi_code; - PyObject *gi_frame; - int resume_label; - char is_running; -} __pyx_CoroutineObject; -static __pyx_CoroutineObject *__Pyx__Coroutine_New( - PyTypeObject *type, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name); -static __pyx_CoroutineObject *__Pyx__Coroutine_NewInit( - __pyx_CoroutineObject *gen, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name); -static CYTHON_INLINE void __Pyx_Coroutine_ExceptionClear(__Pyx_ExcInfoStruct *self); -static int __Pyx_Coroutine_clear(PyObject *self); -static PyObject *__Pyx_Coroutine_Send(PyObject *self, PyObject *value); -static PyObject *__Pyx_Coroutine_Close(PyObject *self); -static PyObject *__Pyx_Coroutine_Throw(PyObject *gen, PyObject *args); -#if CYTHON_USE_EXC_INFO_STACK -#define __Pyx_Coroutine_SwapException(self) -#define __Pyx_Coroutine_ResetAndClearException(self) __Pyx_Coroutine_ExceptionClear(&(self)->gi_exc_state) -#else -#define __Pyx_Coroutine_SwapException(self) {\ - __Pyx_ExceptionSwap(&(self)->gi_exc_state.exc_type, &(self)->gi_exc_state.exc_value, &(self)->gi_exc_state.exc_traceback);\ - __Pyx_Coroutine_ResetFrameBackpointer(&(self)->gi_exc_state);\ - } -#define __Pyx_Coroutine_ResetAndClearException(self) {\ - __Pyx_ExceptionReset((self)->gi_exc_state.exc_type, (self)->gi_exc_state.exc_value, (self)->gi_exc_state.exc_traceback);\ - (self)->gi_exc_state.exc_type = (self)->gi_exc_state.exc_value = (self)->gi_exc_state.exc_traceback = NULL;\ - } -#endif -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyGen_FetchStopIterationValue(pvalue)\ - __Pyx_PyGen__FetchStopIterationValue(__pyx_tstate, pvalue) -#else -#define __Pyx_PyGen_FetchStopIterationValue(pvalue)\ - __Pyx_PyGen__FetchStopIterationValue(__Pyx_PyThreadState_Current, pvalue) -#endif -static int __Pyx_PyGen__FetchStopIterationValue(PyThreadState *tstate, PyObject **pvalue); -static CYTHON_INLINE void __Pyx_Coroutine_ResetFrameBackpointer(__Pyx_ExcInfoStruct *exc_state); - -/* PatchModuleWithCoroutine.proto */ -static PyObject* __Pyx_Coroutine_patch_module(PyObject* module, const char* py_code); - -/* PatchGeneratorABC.proto */ -static int __Pyx_patch_abc(void); - -/* Generator.proto */ -#define __Pyx_Generator_USED -#define __Pyx_Generator_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_GeneratorType) -#define __Pyx_Generator_New(body, code, closure, name, qualname, module_name)\ - __Pyx__Coroutine_New(__pyx_GeneratorType, body, code, closure, name, qualname, module_name) -static PyObject *__Pyx_Generator_Next(PyObject *self); -static int __pyx_Generator_init(PyObject *module); - -/* CheckBinaryVersion.proto */ -static unsigned long __Pyx_get_runtime_version(void); -static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer); - -/* InitStrings.proto */ -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); - -/* #### Code section: module_declarations ### */ - -/* Module declarations from "libc.string" */ - -/* Module declarations from "libc.stdio" */ - -/* Module declarations from "jcvi.formats.cblast" */ -static char const *__pyx_v_4jcvi_7formats_6cblast_blast_format; -static char const *__pyx_v_4jcvi_7formats_6cblast_blast_format_line; -static char const *__pyx_v_4jcvi_7formats_6cblast_blast_output; -static char const *__pyx_v_4jcvi_7formats_6cblast_bed_output; -static PyObject *__pyx_f_4jcvi_7formats_6cblast_c_str(PyObject *); /*proto*/ -static PyObject *__pyx_f_4jcvi_7formats_6cblast_py_str(PyObject *); /*proto*/ -static struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_f_4jcvi_7formats_6cblast_create_blast_line(char *, char *, float, int, int, int, int, int, int, int, float, float); /*proto*/ -static PyObject *__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*)(char *, char *, float, int, int, int, int, int, int, int, float, float)); /*proto*/ -static int __Pyx_carray_from_py_char(PyObject *, char *, Py_ssize_t); /*proto*/ -/* #### Code section: typeinfo ### */ -/* #### Code section: before_global_var ### */ -#define __Pyx_MODULE_NAME "jcvi.formats.cblast" -extern int __pyx_module_is_main_jcvi__formats__cblast; -int __pyx_module_is_main_jcvi__formats__cblast = 0; - -/* Implementation of "jcvi.formats.cblast" */ -/* #### Code section: global_var ### */ -static PyObject *__pyx_builtin_StopIteration; -static PyObject *__pyx_builtin_TypeError; -static PyObject *__pyx_builtin_id; -static PyObject *__pyx_builtin_OverflowError; -static PyObject *__pyx_builtin_enumerate; -static PyObject *__pyx_builtin_IndexError; -/* #### Code section: string_decls ### */ -static const char __pyx_k_s[] = "s"; -static const char __pyx_k__5[] = "\t"; -static const char __pyx_k__6[] = "*"; -static const char __pyx_k_gc[] = "gc"; -static const char __pyx_k_id[] = "id"; -static const char __pyx_k_qi[] = "qi"; -static const char __pyx_k_si[] = "si"; -static const char __pyx_k__13[] = "?"; -static const char __pyx_k_sys[] = "sys"; -static const char __pyx_k_args[] = "args"; -static const char __pyx_k_join[] = "join"; -static const char __pyx_k_main[] = "__main__"; -static const char __pyx_k_name[] = "__name__"; -static const char __pyx_k_self[] = "self"; -static const char __pyx_k_send[] = "send"; -static const char __pyx_k_spec[] = "__spec__"; -static const char __pyx_k_test[] = "__test__"; -static const char __pyx_k_wrap[] = "wrap"; -static const char __pyx_k_Blast[] = "Blast"; -static const char __pyx_k_UTF_8[] = "UTF-8"; -static const char __pyx_k_close[] = "close"; -static const char __pyx_k_ngaps[] = "ngaps"; -static const char __pyx_k_pctid[] = "pctid"; -static const char __pyx_k_qstop[] = "qstop"; -static const char __pyx_k_query[] = "query"; -static const char __pyx_k_score[] = "score"; -static const char __pyx_k_slots[] = "__slots__"; -static const char __pyx_k_sstop[] = "sstop"; -static const char __pyx_k_throw[] = "throw"; -static const char __pyx_k_enable[] = "enable"; -static const char __pyx_k_encode[] = "encode"; -static const char __pyx_k_evalue[] = "evalue"; -static const char __pyx_k_hitlen[] = "hitlen"; -static const char __pyx_k_import[] = "__import__"; -static const char __pyx_k_qseqid[] = "qseqid"; -static const char __pyx_k_qstart[] = "qstart"; -static const char __pyx_k_reduce[] = "__reduce__"; -static const char __pyx_k_sseqid[] = "sseqid"; -static const char __pyx_k_sstart[] = "sstart"; -static const char __pyx_k_Blast_s[] = "Blast('%s')"; -static const char __pyx_k_disable[] = "disable"; -static const char __pyx_k_genexpr[] = "genexpr"; -static const char __pyx_k_richcmp[] = "__richcmp__"; -static const char __pyx_k_subject[] = "subject"; -static const char __pyx_k_filename[] = "filename"; -static const char __pyx_k_getstate[] = "__getstate__"; -static const char __pyx_k_setstate[] = "__setstate__"; -static const char __pyx_k_BlastLine[] = "BlastLine"; -static const char __pyx_k_TypeError[] = "TypeError"; -static const char __pyx_k_enumerate[] = "enumerate"; -static const char __pyx_k_isenabled[] = "isenabled"; -static const char __pyx_k_nmismatch[] = "nmismatch"; -static const char __pyx_k_pyx_state[] = "__pyx_state"; -static const char __pyx_k_reduce_ex[] = "__reduce_ex__"; -static const char __pyx_k_IndexError[] = "IndexError"; -static const char __pyx_k_cfunc_to_py[] = "cfunc.to_py"; -static const char __pyx_k_orientation[] = "orientation"; -static const char __pyx_k_initializing[] = "_initializing"; -static const char __pyx_k_is_coroutine[] = "_is_coroutine"; -static const char __pyx_k_stringsource[] = ""; -static const char __pyx_k_OverflowError[] = "OverflowError"; -static const char __pyx_k_StopIteration[] = "StopIteration"; -static const char __pyx_k_reduce_cython[] = "__reduce_cython__"; -static const char __pyx_k_setstate_cython[] = "__setstate_cython__"; -static const char __pyx_k_BlastLine___reduce[] = "BlastLine.__reduce__"; -static const char __pyx_k_asyncio_coroutines[] = "asyncio.coroutines"; -static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; -static const char __pyx_k_jcvi_formats_cblast[] = "jcvi.formats.cblast"; -static const char __pyx_k_Blast___reduce_cython[] = "Blast.__reduce_cython__"; -static const char __pyx_k_Blast___setstate_cython[] = "Blast.__setstate_cython__"; -static const char __pyx_k_src_jcvi_formats_cblast_pyx[] = "src/jcvi/formats/cblast.pyx"; -static const char __pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma[] = "__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc..wrap"; -static const char __pyx_k_Cythonized_fast_version_of_Blas[] = "\nCythonized (fast) version of BlastLine\n\nStolen from brentp's biostuff (thanks):\n\n"; -static const char __pyx_k_that_comparison_not_implemented[] = "that comparison not implemented"; -static const char __pyx_k_BlastLine___get___locals_genexpr[] = "BlastLine.__get__..genexpr"; -static const char __pyx_k_BlastLine_s_to_s_eval_3f_score_1[] = "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)"; -static const char __pyx_k_no_default___reduce___due_to_non[] = "no default __reduce__ due to non-trivial __cinit__"; -/* #### Code section: decls ### */ -static PyObject *__pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(PyObject *__pyx_self, char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, char *__pyx_v_filename); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ -static void __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_s); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_other, size_t __pyx_v_op); /* proto */ -static Py_hash_t __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_genexpr_arg_0); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self); /* proto */ -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_Blast(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static __Pyx_CachedCFunction __pyx_umethod_PyString_Type_encode = {0, 0, 0, 0, 0}; -/* #### Code section: late_includes ### */ -/* #### Code section: module_state ### */ -typedef struct { - PyObject *__pyx_d; - PyObject *__pyx_b; - PyObject *__pyx_cython_runtime; - PyObject *__pyx_empty_tuple; - PyObject *__pyx_empty_bytes; - PyObject *__pyx_empty_unicode; - #ifdef __Pyx_CyFunction_USED - PyTypeObject *__pyx_CyFunctionType; - #endif - #ifdef __Pyx_FusedFunction_USED - PyTypeObject *__pyx_FusedFunctionType; - #endif - #ifdef __Pyx_Generator_USED - PyTypeObject *__pyx_GeneratorType; - #endif - #ifdef __Pyx_IterableCoroutine_USED - PyTypeObject *__pyx_IterableCoroutineType; - #endif - #ifdef __Pyx_Coroutine_USED - PyTypeObject *__pyx_CoroutineAwaitType; - #endif - #ifdef __Pyx_Coroutine_USED - PyTypeObject *__pyx_CoroutineType; - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - PyObject *__pyx_type_4jcvi_7formats_6cblast_Blast; - PyObject *__pyx_type_4jcvi_7formats_6cblast_BlastLine; - PyObject *__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; - PyObject *__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; - #endif - PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast_Blast; - PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast_BlastLine; - PyTypeObject *__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; - PyTypeObject *__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; - PyObject *__pyx_n_s_Blast; - PyObject *__pyx_n_s_BlastLine; - PyObject *__pyx_n_s_BlastLine___get___locals_genexpr; - PyObject *__pyx_n_s_BlastLine___reduce; - PyObject *__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1; - PyObject *__pyx_n_s_Blast___reduce_cython; - PyObject *__pyx_n_s_Blast___setstate_cython; - PyObject *__pyx_kp_s_Blast_s; - PyObject *__pyx_n_s_IndexError; - PyObject *__pyx_n_s_OverflowError; - PyObject *__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma; - PyObject *__pyx_n_s_StopIteration; - PyObject *__pyx_n_s_TypeError; - PyObject *__pyx_kp_s_UTF_8; - PyObject *__pyx_n_s__13; - PyObject *__pyx_kp_s__5; - PyObject *__pyx_n_s__6; - PyObject *__pyx_n_s_args; - PyObject *__pyx_n_s_asyncio_coroutines; - PyObject *__pyx_n_s_cfunc_to_py; - PyObject *__pyx_n_s_cline_in_traceback; - PyObject *__pyx_n_s_close; - PyObject *__pyx_kp_u_disable; - PyObject *__pyx_kp_u_enable; - PyObject *__pyx_n_s_encode; - PyObject *__pyx_n_s_enumerate; - PyObject *__pyx_n_s_evalue; - PyObject *__pyx_n_s_filename; - PyObject *__pyx_kp_u_gc; - PyObject *__pyx_n_s_genexpr; - PyObject *__pyx_n_s_getstate; - PyObject *__pyx_n_s_hitlen; - PyObject *__pyx_n_s_id; - PyObject *__pyx_n_s_import; - PyObject *__pyx_n_s_initializing; - PyObject *__pyx_n_s_is_coroutine; - PyObject *__pyx_kp_u_isenabled; - PyObject *__pyx_n_s_jcvi_formats_cblast; - PyObject *__pyx_n_s_join; - PyObject *__pyx_n_s_main; - PyObject *__pyx_n_s_name; - PyObject *__pyx_n_s_ngaps; - PyObject *__pyx_n_s_nmismatch; - PyObject *__pyx_kp_s_no_default___reduce___due_to_non; - PyObject *__pyx_n_s_orientation; - PyObject *__pyx_n_s_pctid; - PyObject *__pyx_n_s_pyx_state; - PyObject *__pyx_n_s_qi; - PyObject *__pyx_n_s_qseqid; - PyObject *__pyx_n_s_qstart; - PyObject *__pyx_n_s_qstop; - PyObject *__pyx_n_s_query; - PyObject *__pyx_n_s_reduce; - PyObject *__pyx_n_s_reduce_cython; - PyObject *__pyx_n_s_reduce_ex; - PyObject *__pyx_n_s_richcmp; - PyObject *__pyx_n_s_s; - PyObject *__pyx_n_s_score; - PyObject *__pyx_n_s_self; - PyObject *__pyx_n_s_send; - PyObject *__pyx_n_s_setstate; - PyObject *__pyx_n_s_setstate_cython; - PyObject *__pyx_n_s_si; - PyObject *__pyx_n_s_slots; - PyObject *__pyx_n_s_spec; - PyObject *__pyx_kp_s_src_jcvi_formats_cblast_pyx; - PyObject *__pyx_n_s_sseqid; - PyObject *__pyx_n_s_sstart; - PyObject *__pyx_n_s_sstop; - PyObject *__pyx_kp_s_stringsource; - PyObject *__pyx_n_s_subject; - PyObject *__pyx_n_s_sys; - PyObject *__pyx_n_s_test; - PyObject *__pyx_kp_s_that_comparison_not_implemented; - PyObject *__pyx_n_s_throw; - PyObject *__pyx_n_s_wrap; - PyObject *__pyx_int_2; - PyObject *__pyx_int_12; - PyObject *__pyx_tuple_; - PyObject *__pyx_slice__4; - PyObject *__pyx_tuple__3; - PyObject *__pyx_tuple__7; - PyObject *__pyx_tuple__9; - PyObject *__pyx_tuple__11; - PyObject *__pyx_codeobj__2; - PyObject *__pyx_codeobj__8; - PyObject *__pyx_codeobj__10; - PyObject *__pyx_codeobj__12; -} __pyx_mstate; - -#if CYTHON_USE_MODULE_STATE -#ifdef __cplusplus -namespace { - extern struct PyModuleDef __pyx_moduledef; -} /* anonymous namespace */ -#else -static struct PyModuleDef __pyx_moduledef; -#endif - -#define __pyx_mstate(o) ((__pyx_mstate *)__Pyx_PyModule_GetState(o)) - -#define __pyx_mstate_global (__pyx_mstate(PyState_FindModule(&__pyx_moduledef))) - -#define __pyx_m (PyState_FindModule(&__pyx_moduledef)) -#else -static __pyx_mstate __pyx_mstate_global_static = -#ifdef __cplusplus - {}; -#else - {0}; -#endif -static __pyx_mstate *__pyx_mstate_global = &__pyx_mstate_global_static; -#endif -/* #### Code section: module_state_clear ### */ -#if CYTHON_USE_MODULE_STATE -static int __pyx_m_clear(PyObject *m) { - __pyx_mstate *clear_module_state = __pyx_mstate(m); - if (!clear_module_state) return 0; - Py_CLEAR(clear_module_state->__pyx_d); - Py_CLEAR(clear_module_state->__pyx_b); - Py_CLEAR(clear_module_state->__pyx_cython_runtime); - Py_CLEAR(clear_module_state->__pyx_empty_tuple); - Py_CLEAR(clear_module_state->__pyx_empty_bytes); - Py_CLEAR(clear_module_state->__pyx_empty_unicode); - #ifdef __Pyx_CyFunction_USED - Py_CLEAR(clear_module_state->__pyx_CyFunctionType); - #endif - #ifdef __Pyx_FusedFunction_USED - Py_CLEAR(clear_module_state->__pyx_FusedFunctionType); - #endif - Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast_Blast); - Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast_Blast); - Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); - Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast_BlastLine); - Py_CLEAR(clear_module_state->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); - Py_CLEAR(clear_module_state->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); - Py_CLEAR(clear_module_state->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); - Py_CLEAR(clear_module_state->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); - Py_CLEAR(clear_module_state->__pyx_n_s_Blast); - Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine); - Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine___get___locals_genexpr); - Py_CLEAR(clear_module_state->__pyx_n_s_BlastLine___reduce); - Py_CLEAR(clear_module_state->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1); - Py_CLEAR(clear_module_state->__pyx_n_s_Blast___reduce_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_Blast___setstate_cython); - Py_CLEAR(clear_module_state->__pyx_kp_s_Blast_s); - Py_CLEAR(clear_module_state->__pyx_n_s_IndexError); - Py_CLEAR(clear_module_state->__pyx_n_s_OverflowError); - Py_CLEAR(clear_module_state->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma); - Py_CLEAR(clear_module_state->__pyx_n_s_StopIteration); - Py_CLEAR(clear_module_state->__pyx_n_s_TypeError); - Py_CLEAR(clear_module_state->__pyx_kp_s_UTF_8); - Py_CLEAR(clear_module_state->__pyx_n_s__13); - Py_CLEAR(clear_module_state->__pyx_kp_s__5); - Py_CLEAR(clear_module_state->__pyx_n_s__6); - Py_CLEAR(clear_module_state->__pyx_n_s_args); - Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines); - Py_CLEAR(clear_module_state->__pyx_n_s_cfunc_to_py); - Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback); - Py_CLEAR(clear_module_state->__pyx_n_s_close); - Py_CLEAR(clear_module_state->__pyx_kp_u_disable); - Py_CLEAR(clear_module_state->__pyx_kp_u_enable); - Py_CLEAR(clear_module_state->__pyx_n_s_encode); - Py_CLEAR(clear_module_state->__pyx_n_s_enumerate); - Py_CLEAR(clear_module_state->__pyx_n_s_evalue); - Py_CLEAR(clear_module_state->__pyx_n_s_filename); - Py_CLEAR(clear_module_state->__pyx_kp_u_gc); - Py_CLEAR(clear_module_state->__pyx_n_s_genexpr); - Py_CLEAR(clear_module_state->__pyx_n_s_getstate); - Py_CLEAR(clear_module_state->__pyx_n_s_hitlen); - Py_CLEAR(clear_module_state->__pyx_n_s_id); - Py_CLEAR(clear_module_state->__pyx_n_s_import); - Py_CLEAR(clear_module_state->__pyx_n_s_initializing); - Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine); - Py_CLEAR(clear_module_state->__pyx_kp_u_isenabled); - Py_CLEAR(clear_module_state->__pyx_n_s_jcvi_formats_cblast); - Py_CLEAR(clear_module_state->__pyx_n_s_join); - Py_CLEAR(clear_module_state->__pyx_n_s_main); - Py_CLEAR(clear_module_state->__pyx_n_s_name); - Py_CLEAR(clear_module_state->__pyx_n_s_ngaps); - Py_CLEAR(clear_module_state->__pyx_n_s_nmismatch); - Py_CLEAR(clear_module_state->__pyx_kp_s_no_default___reduce___due_to_non); - Py_CLEAR(clear_module_state->__pyx_n_s_orientation); - Py_CLEAR(clear_module_state->__pyx_n_s_pctid); - Py_CLEAR(clear_module_state->__pyx_n_s_pyx_state); - Py_CLEAR(clear_module_state->__pyx_n_s_qi); - Py_CLEAR(clear_module_state->__pyx_n_s_qseqid); - Py_CLEAR(clear_module_state->__pyx_n_s_qstart); - Py_CLEAR(clear_module_state->__pyx_n_s_qstop); - Py_CLEAR(clear_module_state->__pyx_n_s_query); - Py_CLEAR(clear_module_state->__pyx_n_s_reduce); - Py_CLEAR(clear_module_state->__pyx_n_s_reduce_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_reduce_ex); - Py_CLEAR(clear_module_state->__pyx_n_s_richcmp); - Py_CLEAR(clear_module_state->__pyx_n_s_s); - Py_CLEAR(clear_module_state->__pyx_n_s_score); - Py_CLEAR(clear_module_state->__pyx_n_s_self); - Py_CLEAR(clear_module_state->__pyx_n_s_send); - Py_CLEAR(clear_module_state->__pyx_n_s_setstate); - Py_CLEAR(clear_module_state->__pyx_n_s_setstate_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_si); - Py_CLEAR(clear_module_state->__pyx_n_s_slots); - Py_CLEAR(clear_module_state->__pyx_n_s_spec); - Py_CLEAR(clear_module_state->__pyx_kp_s_src_jcvi_formats_cblast_pyx); - Py_CLEAR(clear_module_state->__pyx_n_s_sseqid); - Py_CLEAR(clear_module_state->__pyx_n_s_sstart); - Py_CLEAR(clear_module_state->__pyx_n_s_sstop); - Py_CLEAR(clear_module_state->__pyx_kp_s_stringsource); - Py_CLEAR(clear_module_state->__pyx_n_s_subject); - Py_CLEAR(clear_module_state->__pyx_n_s_sys); - Py_CLEAR(clear_module_state->__pyx_n_s_test); - Py_CLEAR(clear_module_state->__pyx_kp_s_that_comparison_not_implemented); - Py_CLEAR(clear_module_state->__pyx_n_s_throw); - Py_CLEAR(clear_module_state->__pyx_n_s_wrap); - Py_CLEAR(clear_module_state->__pyx_int_2); - Py_CLEAR(clear_module_state->__pyx_int_12); - Py_CLEAR(clear_module_state->__pyx_tuple_); - Py_CLEAR(clear_module_state->__pyx_slice__4); - Py_CLEAR(clear_module_state->__pyx_tuple__3); - Py_CLEAR(clear_module_state->__pyx_tuple__7); - Py_CLEAR(clear_module_state->__pyx_tuple__9); - Py_CLEAR(clear_module_state->__pyx_tuple__11); - Py_CLEAR(clear_module_state->__pyx_codeobj__2); - Py_CLEAR(clear_module_state->__pyx_codeobj__8); - Py_CLEAR(clear_module_state->__pyx_codeobj__10); - Py_CLEAR(clear_module_state->__pyx_codeobj__12); - return 0; -} -#endif -/* #### Code section: module_state_traverse ### */ -#if CYTHON_USE_MODULE_STATE -static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) { - __pyx_mstate *traverse_module_state = __pyx_mstate(m); - if (!traverse_module_state) return 0; - Py_VISIT(traverse_module_state->__pyx_d); - Py_VISIT(traverse_module_state->__pyx_b); - Py_VISIT(traverse_module_state->__pyx_cython_runtime); - Py_VISIT(traverse_module_state->__pyx_empty_tuple); - Py_VISIT(traverse_module_state->__pyx_empty_bytes); - Py_VISIT(traverse_module_state->__pyx_empty_unicode); - #ifdef __Pyx_CyFunction_USED - Py_VISIT(traverse_module_state->__pyx_CyFunctionType); - #endif - #ifdef __Pyx_FusedFunction_USED - Py_VISIT(traverse_module_state->__pyx_FusedFunctionType); - #endif - Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast_Blast); - Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast_Blast); - Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); - Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast_BlastLine); - Py_VISIT(traverse_module_state->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); - Py_VISIT(traverse_module_state->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr); - Py_VISIT(traverse_module_state->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); - Py_VISIT(traverse_module_state->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc); - Py_VISIT(traverse_module_state->__pyx_n_s_Blast); - Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine); - Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine___get___locals_genexpr); - Py_VISIT(traverse_module_state->__pyx_n_s_BlastLine___reduce); - Py_VISIT(traverse_module_state->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1); - Py_VISIT(traverse_module_state->__pyx_n_s_Blast___reduce_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_Blast___setstate_cython); - Py_VISIT(traverse_module_state->__pyx_kp_s_Blast_s); - Py_VISIT(traverse_module_state->__pyx_n_s_IndexError); - Py_VISIT(traverse_module_state->__pyx_n_s_OverflowError); - Py_VISIT(traverse_module_state->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma); - Py_VISIT(traverse_module_state->__pyx_n_s_StopIteration); - Py_VISIT(traverse_module_state->__pyx_n_s_TypeError); - Py_VISIT(traverse_module_state->__pyx_kp_s_UTF_8); - Py_VISIT(traverse_module_state->__pyx_n_s__13); - Py_VISIT(traverse_module_state->__pyx_kp_s__5); - Py_VISIT(traverse_module_state->__pyx_n_s__6); - Py_VISIT(traverse_module_state->__pyx_n_s_args); - Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines); - Py_VISIT(traverse_module_state->__pyx_n_s_cfunc_to_py); - Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback); - Py_VISIT(traverse_module_state->__pyx_n_s_close); - Py_VISIT(traverse_module_state->__pyx_kp_u_disable); - Py_VISIT(traverse_module_state->__pyx_kp_u_enable); - Py_VISIT(traverse_module_state->__pyx_n_s_encode); - Py_VISIT(traverse_module_state->__pyx_n_s_enumerate); - Py_VISIT(traverse_module_state->__pyx_n_s_evalue); - Py_VISIT(traverse_module_state->__pyx_n_s_filename); - Py_VISIT(traverse_module_state->__pyx_kp_u_gc); - Py_VISIT(traverse_module_state->__pyx_n_s_genexpr); - Py_VISIT(traverse_module_state->__pyx_n_s_getstate); - Py_VISIT(traverse_module_state->__pyx_n_s_hitlen); - Py_VISIT(traverse_module_state->__pyx_n_s_id); - Py_VISIT(traverse_module_state->__pyx_n_s_import); - Py_VISIT(traverse_module_state->__pyx_n_s_initializing); - Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine); - Py_VISIT(traverse_module_state->__pyx_kp_u_isenabled); - Py_VISIT(traverse_module_state->__pyx_n_s_jcvi_formats_cblast); - Py_VISIT(traverse_module_state->__pyx_n_s_join); - Py_VISIT(traverse_module_state->__pyx_n_s_main); - Py_VISIT(traverse_module_state->__pyx_n_s_name); - Py_VISIT(traverse_module_state->__pyx_n_s_ngaps); - Py_VISIT(traverse_module_state->__pyx_n_s_nmismatch); - Py_VISIT(traverse_module_state->__pyx_kp_s_no_default___reduce___due_to_non); - Py_VISIT(traverse_module_state->__pyx_n_s_orientation); - Py_VISIT(traverse_module_state->__pyx_n_s_pctid); - Py_VISIT(traverse_module_state->__pyx_n_s_pyx_state); - Py_VISIT(traverse_module_state->__pyx_n_s_qi); - Py_VISIT(traverse_module_state->__pyx_n_s_qseqid); - Py_VISIT(traverse_module_state->__pyx_n_s_qstart); - Py_VISIT(traverse_module_state->__pyx_n_s_qstop); - Py_VISIT(traverse_module_state->__pyx_n_s_query); - Py_VISIT(traverse_module_state->__pyx_n_s_reduce); - Py_VISIT(traverse_module_state->__pyx_n_s_reduce_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_reduce_ex); - Py_VISIT(traverse_module_state->__pyx_n_s_richcmp); - Py_VISIT(traverse_module_state->__pyx_n_s_s); - Py_VISIT(traverse_module_state->__pyx_n_s_score); - Py_VISIT(traverse_module_state->__pyx_n_s_self); - Py_VISIT(traverse_module_state->__pyx_n_s_send); - Py_VISIT(traverse_module_state->__pyx_n_s_setstate); - Py_VISIT(traverse_module_state->__pyx_n_s_setstate_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_si); - Py_VISIT(traverse_module_state->__pyx_n_s_slots); - Py_VISIT(traverse_module_state->__pyx_n_s_spec); - Py_VISIT(traverse_module_state->__pyx_kp_s_src_jcvi_formats_cblast_pyx); - Py_VISIT(traverse_module_state->__pyx_n_s_sseqid); - Py_VISIT(traverse_module_state->__pyx_n_s_sstart); - Py_VISIT(traverse_module_state->__pyx_n_s_sstop); - Py_VISIT(traverse_module_state->__pyx_kp_s_stringsource); - Py_VISIT(traverse_module_state->__pyx_n_s_subject); - Py_VISIT(traverse_module_state->__pyx_n_s_sys); - Py_VISIT(traverse_module_state->__pyx_n_s_test); - Py_VISIT(traverse_module_state->__pyx_kp_s_that_comparison_not_implemented); - Py_VISIT(traverse_module_state->__pyx_n_s_throw); - Py_VISIT(traverse_module_state->__pyx_n_s_wrap); - Py_VISIT(traverse_module_state->__pyx_int_2); - Py_VISIT(traverse_module_state->__pyx_int_12); - Py_VISIT(traverse_module_state->__pyx_tuple_); - Py_VISIT(traverse_module_state->__pyx_slice__4); - Py_VISIT(traverse_module_state->__pyx_tuple__3); - Py_VISIT(traverse_module_state->__pyx_tuple__7); - Py_VISIT(traverse_module_state->__pyx_tuple__9); - Py_VISIT(traverse_module_state->__pyx_tuple__11); - Py_VISIT(traverse_module_state->__pyx_codeobj__2); - Py_VISIT(traverse_module_state->__pyx_codeobj__8); - Py_VISIT(traverse_module_state->__pyx_codeobj__10); - Py_VISIT(traverse_module_state->__pyx_codeobj__12); - return 0; -} -#endif -/* #### Code section: module_state_defines ### */ -#define __pyx_d __pyx_mstate_global->__pyx_d -#define __pyx_b __pyx_mstate_global->__pyx_b -#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime -#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple -#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes -#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode -#ifdef __Pyx_CyFunction_USED -#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType -#endif -#ifdef __Pyx_FusedFunction_USED -#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType -#endif -#ifdef __Pyx_Generator_USED -#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType -#endif -#ifdef __Pyx_IterableCoroutine_USED -#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType -#endif -#ifdef __Pyx_Coroutine_USED -#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType -#endif -#ifdef __Pyx_Coroutine_USED -#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#define __pyx_type_4jcvi_7formats_6cblast_Blast __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast_Blast -#define __pyx_type_4jcvi_7formats_6cblast_BlastLine __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast_BlastLine -#define __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr __pyx_mstate_global->__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr -#define __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc __pyx_mstate_global->__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc -#endif -#define __pyx_ptype_4jcvi_7formats_6cblast_Blast __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast_Blast -#define __pyx_ptype_4jcvi_7formats_6cblast_BlastLine __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast_BlastLine -#define __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr __pyx_mstate_global->__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr -#define __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc __pyx_mstate_global->__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc -#define __pyx_n_s_Blast __pyx_mstate_global->__pyx_n_s_Blast -#define __pyx_n_s_BlastLine __pyx_mstate_global->__pyx_n_s_BlastLine -#define __pyx_n_s_BlastLine___get___locals_genexpr __pyx_mstate_global->__pyx_n_s_BlastLine___get___locals_genexpr -#define __pyx_n_s_BlastLine___reduce __pyx_mstate_global->__pyx_n_s_BlastLine___reduce -#define __pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1 __pyx_mstate_global->__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1 -#define __pyx_n_s_Blast___reduce_cython __pyx_mstate_global->__pyx_n_s_Blast___reduce_cython -#define __pyx_n_s_Blast___setstate_cython __pyx_mstate_global->__pyx_n_s_Blast___setstate_cython -#define __pyx_kp_s_Blast_s __pyx_mstate_global->__pyx_kp_s_Blast_s -#define __pyx_n_s_IndexError __pyx_mstate_global->__pyx_n_s_IndexError -#define __pyx_n_s_OverflowError __pyx_mstate_global->__pyx_n_s_OverflowError -#define __pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma __pyx_mstate_global->__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma -#define __pyx_n_s_StopIteration __pyx_mstate_global->__pyx_n_s_StopIteration -#define __pyx_n_s_TypeError __pyx_mstate_global->__pyx_n_s_TypeError -#define __pyx_kp_s_UTF_8 __pyx_mstate_global->__pyx_kp_s_UTF_8 -#define __pyx_n_s__13 __pyx_mstate_global->__pyx_n_s__13 -#define __pyx_kp_s__5 __pyx_mstate_global->__pyx_kp_s__5 -#define __pyx_n_s__6 __pyx_mstate_global->__pyx_n_s__6 -#define __pyx_n_s_args __pyx_mstate_global->__pyx_n_s_args -#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines -#define __pyx_n_s_cfunc_to_py __pyx_mstate_global->__pyx_n_s_cfunc_to_py -#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback -#define __pyx_n_s_close __pyx_mstate_global->__pyx_n_s_close -#define __pyx_kp_u_disable __pyx_mstate_global->__pyx_kp_u_disable -#define __pyx_kp_u_enable __pyx_mstate_global->__pyx_kp_u_enable -#define __pyx_n_s_encode __pyx_mstate_global->__pyx_n_s_encode -#define __pyx_n_s_enumerate __pyx_mstate_global->__pyx_n_s_enumerate -#define __pyx_n_s_evalue __pyx_mstate_global->__pyx_n_s_evalue -#define __pyx_n_s_filename __pyx_mstate_global->__pyx_n_s_filename -#define __pyx_kp_u_gc __pyx_mstate_global->__pyx_kp_u_gc -#define __pyx_n_s_genexpr __pyx_mstate_global->__pyx_n_s_genexpr -#define __pyx_n_s_getstate __pyx_mstate_global->__pyx_n_s_getstate -#define __pyx_n_s_hitlen __pyx_mstate_global->__pyx_n_s_hitlen -#define __pyx_n_s_id __pyx_mstate_global->__pyx_n_s_id -#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import -#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing -#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine -#define __pyx_kp_u_isenabled __pyx_mstate_global->__pyx_kp_u_isenabled -#define __pyx_n_s_jcvi_formats_cblast __pyx_mstate_global->__pyx_n_s_jcvi_formats_cblast -#define __pyx_n_s_join __pyx_mstate_global->__pyx_n_s_join -#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main -#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name -#define __pyx_n_s_ngaps __pyx_mstate_global->__pyx_n_s_ngaps -#define __pyx_n_s_nmismatch __pyx_mstate_global->__pyx_n_s_nmismatch -#define __pyx_kp_s_no_default___reduce___due_to_non __pyx_mstate_global->__pyx_kp_s_no_default___reduce___due_to_non -#define __pyx_n_s_orientation __pyx_mstate_global->__pyx_n_s_orientation -#define __pyx_n_s_pctid __pyx_mstate_global->__pyx_n_s_pctid -#define __pyx_n_s_pyx_state __pyx_mstate_global->__pyx_n_s_pyx_state -#define __pyx_n_s_qi __pyx_mstate_global->__pyx_n_s_qi -#define __pyx_n_s_qseqid __pyx_mstate_global->__pyx_n_s_qseqid -#define __pyx_n_s_qstart __pyx_mstate_global->__pyx_n_s_qstart -#define __pyx_n_s_qstop __pyx_mstate_global->__pyx_n_s_qstop -#define __pyx_n_s_query __pyx_mstate_global->__pyx_n_s_query -#define __pyx_n_s_reduce __pyx_mstate_global->__pyx_n_s_reduce -#define __pyx_n_s_reduce_cython __pyx_mstate_global->__pyx_n_s_reduce_cython -#define __pyx_n_s_reduce_ex __pyx_mstate_global->__pyx_n_s_reduce_ex -#define __pyx_n_s_richcmp __pyx_mstate_global->__pyx_n_s_richcmp -#define __pyx_n_s_s __pyx_mstate_global->__pyx_n_s_s -#define __pyx_n_s_score __pyx_mstate_global->__pyx_n_s_score -#define __pyx_n_s_self __pyx_mstate_global->__pyx_n_s_self -#define __pyx_n_s_send __pyx_mstate_global->__pyx_n_s_send -#define __pyx_n_s_setstate __pyx_mstate_global->__pyx_n_s_setstate -#define __pyx_n_s_setstate_cython __pyx_mstate_global->__pyx_n_s_setstate_cython -#define __pyx_n_s_si __pyx_mstate_global->__pyx_n_s_si -#define __pyx_n_s_slots __pyx_mstate_global->__pyx_n_s_slots -#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec -#define __pyx_kp_s_src_jcvi_formats_cblast_pyx __pyx_mstate_global->__pyx_kp_s_src_jcvi_formats_cblast_pyx -#define __pyx_n_s_sseqid __pyx_mstate_global->__pyx_n_s_sseqid -#define __pyx_n_s_sstart __pyx_mstate_global->__pyx_n_s_sstart -#define __pyx_n_s_sstop __pyx_mstate_global->__pyx_n_s_sstop -#define __pyx_kp_s_stringsource __pyx_mstate_global->__pyx_kp_s_stringsource -#define __pyx_n_s_subject __pyx_mstate_global->__pyx_n_s_subject -#define __pyx_n_s_sys __pyx_mstate_global->__pyx_n_s_sys -#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test -#define __pyx_kp_s_that_comparison_not_implemented __pyx_mstate_global->__pyx_kp_s_that_comparison_not_implemented -#define __pyx_n_s_throw __pyx_mstate_global->__pyx_n_s_throw -#define __pyx_n_s_wrap __pyx_mstate_global->__pyx_n_s_wrap -#define __pyx_int_2 __pyx_mstate_global->__pyx_int_2 -#define __pyx_int_12 __pyx_mstate_global->__pyx_int_12 -#define __pyx_tuple_ __pyx_mstate_global->__pyx_tuple_ -#define __pyx_slice__4 __pyx_mstate_global->__pyx_slice__4 -#define __pyx_tuple__3 __pyx_mstate_global->__pyx_tuple__3 -#define __pyx_tuple__7 __pyx_mstate_global->__pyx_tuple__7 -#define __pyx_tuple__9 __pyx_mstate_global->__pyx_tuple__9 -#define __pyx_tuple__11 __pyx_mstate_global->__pyx_tuple__11 -#define __pyx_codeobj__2 __pyx_mstate_global->__pyx_codeobj__2 -#define __pyx_codeobj__8 __pyx_mstate_global->__pyx_codeobj__8 -#define __pyx_codeobj__10 __pyx_mstate_global->__pyx_codeobj__10 -#define __pyx_codeobj__12 __pyx_mstate_global->__pyx_codeobj__12 -/* #### Code section: module_code ### */ - -/* "cfunc.to_py":67 - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap, "wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'"); -static PyMethodDef __pyx_mdef_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap = {"wrap", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap}; -static PyObject *__pyx_pw_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - char *__pyx_v_query; - char *__pyx_v_subject; - float __pyx_v_pctid; - int __pyx_v_hitlen; - int __pyx_v_nmismatch; - int __pyx_v_ngaps; - int __pyx_v_qstart; - int __pyx_v_qstop; - int __pyx_v_sstart; - int __pyx_v_sstop; - float __pyx_v_evalue; - float __pyx_v_score; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[12] = {0,0,0,0,0,0,0,0,0,0,0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("wrap (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_query,&__pyx_n_s_subject,&__pyx_n_s_pctid,&__pyx_n_s_hitlen,&__pyx_n_s_nmismatch,&__pyx_n_s_ngaps,&__pyx_n_s_qstart,&__pyx_n_s_qstop,&__pyx_n_s_sstart,&__pyx_n_s_sstop,&__pyx_n_s_evalue,&__pyx_n_s_score,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 12: values[11] = __Pyx_Arg_FASTCALL(__pyx_args, 11); - CYTHON_FALLTHROUGH; - case 11: values[10] = __Pyx_Arg_FASTCALL(__pyx_args, 10); - CYTHON_FALLTHROUGH; - case 10: values[9] = __Pyx_Arg_FASTCALL(__pyx_args, 9); - CYTHON_FALLTHROUGH; - case 9: values[8] = __Pyx_Arg_FASTCALL(__pyx_args, 8); - CYTHON_FALLTHROUGH; - case 8: values[7] = __Pyx_Arg_FASTCALL(__pyx_args, 7); - CYTHON_FALLTHROUGH; - case 7: values[6] = __Pyx_Arg_FASTCALL(__pyx_args, 6); - CYTHON_FALLTHROUGH; - case 6: values[5] = __Pyx_Arg_FASTCALL(__pyx_args, 5); - CYTHON_FALLTHROUGH; - case 5: values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4); - CYTHON_FALLTHROUGH; - case 4: values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3); - CYTHON_FALLTHROUGH; - case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_query)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_subject)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[1]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 1); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 2: - if (likely((values[2] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pctid)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[2]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 2); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 3: - if (likely((values[3] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_hitlen)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[3]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 3); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 4: - if (likely((values[4] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_nmismatch)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[4]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 4); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 5: - if (likely((values[5] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_ngaps)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[5]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 5); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 6: - if (likely((values[6] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_qstart)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[6]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 6); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 7: - if (likely((values[7] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_qstop)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[7]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 7); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 8: - if (likely((values[8] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_sstart)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[8]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 8); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 9: - if (likely((values[9] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_sstop)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[9]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 9); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 10: - if (likely((values[10] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_evalue)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[10]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 10); __PYX_ERR(1, 67, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 11: - if (likely((values[11] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_score)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[11]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, 11); __PYX_ERR(1, 67, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "wrap") < 0)) __PYX_ERR(1, 67, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 12)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - values[3] = __Pyx_Arg_FASTCALL(__pyx_args, 3); - values[4] = __Pyx_Arg_FASTCALL(__pyx_args, 4); - values[5] = __Pyx_Arg_FASTCALL(__pyx_args, 5); - values[6] = __Pyx_Arg_FASTCALL(__pyx_args, 6); - values[7] = __Pyx_Arg_FASTCALL(__pyx_args, 7); - values[8] = __Pyx_Arg_FASTCALL(__pyx_args, 8); - values[9] = __Pyx_Arg_FASTCALL(__pyx_args, 9); - values[10] = __Pyx_Arg_FASTCALL(__pyx_args, 10); - values[11] = __Pyx_Arg_FASTCALL(__pyx_args, 11); - } - __pyx_v_query = __Pyx_PyObject_AsWritableString(values[0]); if (unlikely((!__pyx_v_query) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_subject = __Pyx_PyObject_AsWritableString(values[1]); if (unlikely((!__pyx_v_subject) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_pctid = __pyx_PyFloat_AsFloat(values[2]); if (unlikely((__pyx_v_pctid == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_hitlen = __Pyx_PyInt_As_int(values[3]); if (unlikely((__pyx_v_hitlen == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_nmismatch = __Pyx_PyInt_As_int(values[4]); if (unlikely((__pyx_v_nmismatch == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_ngaps = __Pyx_PyInt_As_int(values[5]); if (unlikely((__pyx_v_ngaps == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_qstart = __Pyx_PyInt_As_int(values[6]); if (unlikely((__pyx_v_qstart == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_qstop = __Pyx_PyInt_As_int(values[7]); if (unlikely((__pyx_v_qstop == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_sstart = __Pyx_PyInt_As_int(values[8]); if (unlikely((__pyx_v_sstart == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_sstop = __Pyx_PyInt_As_int(values[9]); if (unlikely((__pyx_v_sstop == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_evalue = __pyx_PyFloat_AsFloat(values[10]); if (unlikely((__pyx_v_evalue == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_v_score = __pyx_PyFloat_AsFloat(values[11]); if (unlikely((__pyx_v_score == (float)-1) && PyErr_Occurred())) __PYX_ERR(1, 67, __pyx_L3_error) - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("wrap", 1, 12, 12, __pyx_nargs); __PYX_ERR(1, 67, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc.wrap", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(__pyx_self, __pyx_v_query, __pyx_v_subject, __pyx_v_pctid, __pyx_v_hitlen, __pyx_v_nmismatch, __pyx_v_ngaps, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_score); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_wrap(PyObject *__pyx_self, char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score) { - struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_cur_scope; - struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_outer_scope; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("wrap", 1); - __pyx_outer_scope = (struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *) __Pyx_CyFunction_GetClosure(__pyx_self); - __pyx_cur_scope = __pyx_outer_scope; - - /* "cfunc.to_py":69 - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) # <<<<<<<<<<<<<< - * return wrap - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = ((PyObject *)__pyx_cur_scope->__pyx_v_f(__pyx_v_query, __pyx_v_subject, __pyx_v_pctid, __pyx_v_hitlen, __pyx_v_nmismatch, __pyx_v_ngaps, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_score)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 69, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "cfunc.to_py":67 - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc.wrap", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "cfunc.to_py":66 - * - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - */ - -static PyObject *__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *(*__pyx_v_f)(char *, char *, float, int, int, int, int, int, int, int, float, float)) { - struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_cur_scope; - PyObject *__pyx_v_wrap = 0; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", 0); - __pyx_cur_scope = (struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, __pyx_empty_tuple, NULL); - if (unlikely(!__pyx_cur_scope)) { - __pyx_cur_scope = ((struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)Py_None); - __Pyx_INCREF(Py_None); - __PYX_ERR(1, 66, __pyx_L1_error) - } else { - __Pyx_GOTREF((PyObject *)__pyx_cur_scope); - } - __pyx_cur_scope->__pyx_v_f = __pyx_v_f; - - /* "cfunc.to_py":67 - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) - */ - __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_11cfunc_dot_to_py_137__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_1wrap, 0, __pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma, ((PyObject*)__pyx_cur_scope), __pyx_n_s_cfunc_to_py, __pyx_d, ((PyObject *)__pyx_codeobj__2)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 67, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_wrap = __pyx_t_1; - __pyx_t_1 = 0; - - /* "cfunc.to_py":70 - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) - * return wrap # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_wrap); - __pyx_r = __pyx_v_wrap; - goto __pyx_L0; - - /* "cfunc.to_py":66 - * - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): # <<<<<<<<<<<<<< - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("cfunc.to_py.__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_wrap); - __Pyx_DECREF((PyObject *)__pyx_cur_scope); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "carray.from_py":79 - * - * @cname("__Pyx_carray_from_py_char") - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: # <<<<<<<<<<<<<< - * cdef Py_ssize_t i = length - * try: - */ - -static int __Pyx_carray_from_py_char(PyObject *__pyx_v_o, char *__pyx_v_v, Py_ssize_t __pyx_v_length) { - Py_ssize_t __pyx_v_i; - PyObject *__pyx_v_item = NULL; - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - Py_ssize_t __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - PyObject *__pyx_t_7 = NULL; - Py_ssize_t __pyx_t_8; - PyObject *(*__pyx_t_9)(PyObject *); - PyObject *__pyx_t_10 = NULL; - char __pyx_t_11; - char const *__pyx_t_12; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__Pyx_carray_from_py_char", 1); - - /* "carray.from_py":80 - * @cname("__Pyx_carray_from_py_char") - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: - * cdef Py_ssize_t i = length # <<<<<<<<<<<<<< - * try: - * i = len(o) - */ - __pyx_v_i = __pyx_v_length; - - /* "carray.from_py":81 - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: - * cdef Py_ssize_t i = length - * try: # <<<<<<<<<<<<<< - * i = len(o) - * except (TypeError, OverflowError): - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_3); - /*try:*/ { - - /* "carray.from_py":82 - * cdef Py_ssize_t i = length - * try: - * i = len(o) # <<<<<<<<<<<<<< - * except (TypeError, OverflowError): - * pass - */ - __pyx_t_4 = PyObject_Length(__pyx_v_o); if (unlikely(__pyx_t_4 == ((Py_ssize_t)-1))) __PYX_ERR(1, 82, __pyx_L3_error) - __pyx_v_i = __pyx_t_4; - - /* "carray.from_py":81 - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: - * cdef Py_ssize_t i = length - * try: # <<<<<<<<<<<<<< - * i = len(o) - * except (TypeError, OverflowError): - */ - } - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L8_try_end; - __pyx_L3_error:; - - /* "carray.from_py":83 - * try: - * i = len(o) - * except (TypeError, OverflowError): # <<<<<<<<<<<<<< - * pass - * if i == length: - */ - __pyx_t_5 = __Pyx_PyErr_ExceptionMatches2(__pyx_builtin_TypeError, __pyx_builtin_OverflowError); - if (__pyx_t_5) { - __Pyx_ErrRestore(0,0,0); - goto __pyx_L4_exception_handled; - } - goto __pyx_L5_except_error; - - /* "carray.from_py":81 - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: - * cdef Py_ssize_t i = length - * try: # <<<<<<<<<<<<<< - * i = len(o) - * except (TypeError, OverflowError): - */ - __pyx_L5_except_error:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L1_error; - __pyx_L4_exception_handled:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - __pyx_L8_try_end:; - } - - /* "carray.from_py":85 - * except (TypeError, OverflowError): - * pass - * if i == length: # <<<<<<<<<<<<<< - * for i, item in enumerate(o): - * if i >= length: - */ - __pyx_t_6 = (__pyx_v_i == __pyx_v_length); - if (__pyx_t_6) { - - /* "carray.from_py":86 - * pass - * if i == length: - * for i, item in enumerate(o): # <<<<<<<<<<<<<< - * if i >= length: - * break - */ - __pyx_t_4 = 0; - if (likely(PyList_CheckExact(__pyx_v_o)) || PyTuple_CheckExact(__pyx_v_o)) { - __pyx_t_7 = __pyx_v_o; __Pyx_INCREF(__pyx_t_7); - __pyx_t_8 = 0; - __pyx_t_9 = NULL; - } else { - __pyx_t_8 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_v_o); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 86, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_9 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 86, __pyx_L1_error) - } - for (;;) { - if (likely(!__pyx_t_9)) { - if (likely(PyList_CheckExact(__pyx_t_7))) { - { - Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_7); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(1, 86, __pyx_L1_error) - #endif - if (__pyx_t_8 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_10 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_8); __Pyx_INCREF(__pyx_t_10); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(1, 86, __pyx_L1_error) - #else - __pyx_t_10 = __Pyx_PySequence_ITEM(__pyx_t_7, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 86, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_10); - #endif - } else { - { - Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_7); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(1, 86, __pyx_L1_error) - #endif - if (__pyx_t_8 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_8); __Pyx_INCREF(__pyx_t_10); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(1, 86, __pyx_L1_error) - #else - __pyx_t_10 = __Pyx_PySequence_ITEM(__pyx_t_7, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 86, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_10); - #endif - } - } else { - __pyx_t_10 = __pyx_t_9(__pyx_t_7); - if (unlikely(!__pyx_t_10)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(1, 86, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_10); - } - __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_10); - __pyx_t_10 = 0; - __pyx_v_i = __pyx_t_4; - __pyx_t_4 = (__pyx_t_4 + 1); - - /* "carray.from_py":87 - * if i == length: - * for i, item in enumerate(o): - * if i >= length: # <<<<<<<<<<<<<< - * break - * v[i] = item - */ - __pyx_t_6 = (__pyx_v_i >= __pyx_v_length); - if (__pyx_t_6) { - - /* "carray.from_py":88 - * for i, item in enumerate(o): - * if i >= length: - * break # <<<<<<<<<<<<<< - * v[i] = item - * else: - */ - goto __pyx_L11_break; - - /* "carray.from_py":87 - * if i == length: - * for i, item in enumerate(o): - * if i >= length: # <<<<<<<<<<<<<< - * break - * v[i] = item - */ - } - - /* "carray.from_py":89 - * if i >= length: - * break - * v[i] = item # <<<<<<<<<<<<<< - * else: - * i += 1 # convert index to length - */ - __pyx_t_11 = __Pyx_PyInt_As_char(__pyx_v_item); if (unlikely((__pyx_t_11 == (char)-1) && PyErr_Occurred())) __PYX_ERR(1, 89, __pyx_L1_error) - (__pyx_v_v[__pyx_v_i]) = __pyx_t_11; - - /* "carray.from_py":86 - * pass - * if i == length: - * for i, item in enumerate(o): # <<<<<<<<<<<<<< - * if i >= length: - * break - */ - } - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - goto __pyx_L13_for_else; - __pyx_L11_break:; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - goto __pyx_L14_for_end; - /*else*/ { - __pyx_L13_for_else:; - - /* "carray.from_py":91 - * v[i] = item - * else: - * i += 1 # convert index to length # <<<<<<<<<<<<<< - * if i == length: - * return 0 - */ - __pyx_v_i = (__pyx_v_i + 1); - - /* "carray.from_py":92 - * else: - * i += 1 # convert index to length - * if i == length: # <<<<<<<<<<<<<< - * return 0 - * - */ - __pyx_t_6 = (__pyx_v_i == __pyx_v_length); - if (__pyx_t_6) { - - /* "carray.from_py":93 - * i += 1 # convert index to length - * if i == length: - * return 0 # <<<<<<<<<<<<<< - * - * PyErr_Format( - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "carray.from_py":92 - * else: - * i += 1 # convert index to length - * if i == length: # <<<<<<<<<<<<<< - * return 0 - * - */ - } - } - __pyx_L14_for_end:; - - /* "carray.from_py":85 - * except (TypeError, OverflowError): - * pass - * if i == length: # <<<<<<<<<<<<<< - * for i, item in enumerate(o): - * if i >= length: - */ - } - - /* "carray.from_py":98 - * IndexError, - * ("too many values found during array assignment, expected %zd" - * if i >= length else # <<<<<<<<<<<<<< - * "not enough values found during array assignment, expected %zd, got %zd"), - * length, i) - */ - __pyx_t_6 = (__pyx_v_i >= __pyx_v_length); - if (__pyx_t_6) { - __pyx_t_12 = ((char const *)"too many values found during array assignment, expected %zd"); - } else { - __pyx_t_12 = ((char const *)"not enough values found during array assignment, expected %zd, got %zd"); - } - - /* "carray.from_py":95 - * return 0 - * - * PyErr_Format( # <<<<<<<<<<<<<< - * IndexError, - * ("too many values found during array assignment, expected %zd" - */ - __pyx_t_7 = PyErr_Format(__pyx_builtin_IndexError, __pyx_t_12, __pyx_v_length, __pyx_v_i); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 95, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "carray.from_py":79 - * - * @cname("__Pyx_carray_from_py_char") - * cdef int __Pyx_carray_from_py_char(object o, base_type *v, Py_ssize_t length) except -1: # <<<<<<<<<<<<<< - * cdef Py_ssize_t i = length - * try: - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_10); - __Pyx_AddTraceback("carray.from_py.__Pyx_carray_from_py_char", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_item); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":26 - * object filename - * - * def __cinit__(self, char* filename): # <<<<<<<<<<<<<< - * self.fh = fopen(filename, 'r') - * self.filename = filename - */ - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - char *__pyx_v_filename; - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1; - #endif - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_filename,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_filename)) != 0)) { - (void)__Pyx_Arg_NewRef_VARARGS(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 26, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__cinit__") < 0)) __PYX_ERR(0, 26, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - } - __pyx_v_filename = __Pyx_PyObject_AsWritableString(values[0]); if (unlikely((!__pyx_v_filename) && PyErr_Occurred())) __PYX_ERR(0, 26, __pyx_L3_error) - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 26, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return -1; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self), __pyx_v_filename); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_5Blast___cinit__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, char *__pyx_v_filename) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__cinit__", 1); - - /* "jcvi/formats/cblast.pyx":27 - * - * def __cinit__(self, char* filename): - * self.fh = fopen(filename, 'r') # <<<<<<<<<<<<<< - * self.filename = filename - * - */ - __pyx_v_self->fh = fopen(__pyx_v_filename, ((char const *)"r")); - - /* "jcvi/formats/cblast.pyx":28 - * def __cinit__(self, char* filename): - * self.fh = fopen(filename, 'r') - * self.filename = filename # <<<<<<<<<<<<<< - * - * def __iter__(self): - */ - __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_filename); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 28, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __Pyx_GOTREF(__pyx_v_self->filename); - __Pyx_DECREF(__pyx_v_self->filename); - __pyx_v_self->filename = __pyx_t_1; - __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":26 - * object filename - * - * def __cinit__(self, char* filename): # <<<<<<<<<<<<<< - * self.fh = fopen(filename, 'r') - * self.filename = filename - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":30 - * self.filename = filename - * - * def __iter__(self): # <<<<<<<<<<<<<< - * rewind(self.fh) - * return self - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__iter__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_2__iter__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__iter__", 1); - - /* "jcvi/formats/cblast.pyx":31 - * - * def __iter__(self): - * rewind(self.fh) # <<<<<<<<<<<<<< - * return self - * - */ - rewind(__pyx_v_self->fh); - - /* "jcvi/formats/cblast.pyx":32 - * def __iter__(self): - * rewind(self.fh) - * return self # <<<<<<<<<<<<<< - * - * def __next__(self): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_self); - __pyx_r = ((PyObject *)__pyx_v_self); - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":30 - * self.filename = filename - * - * def __iter__(self): # <<<<<<<<<<<<<< - * rewind(self.fh) - * return self - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":34 - * return self - * - * def __next__(self): # <<<<<<<<<<<<<< - * cdef: - * float pct = 0.0, evalue = 0.0, bit = 0.0 - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__next__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_4__next__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { - float __pyx_v_pct; - float __pyx_v_evalue; - float __pyx_v_bit; - char __pyx_v_qname[0x80]; - char __pyx_v_sname[0x80]; - int __pyx_v_hlen; - int __pyx_v_nmiss; - int __pyx_v_ngap; - int __pyx_v_qstart; - int __pyx_v_qstop; - int __pyx_v_sstart; - int __pyx_v_sstop; - int __pyx_v_success; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_error_without_exception = 0; /* StopIteration */ - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__next__", 1); - - /* "jcvi/formats/cblast.pyx":36 - * def __next__(self): - * cdef: - * float pct = 0.0, evalue = 0.0, bit = 0.0 # <<<<<<<<<<<<<< - * char qname[128] - * char sname[128] - */ - __pyx_v_pct = 0.0; - __pyx_v_evalue = 0.0; - __pyx_v_bit = 0.0; - - /* "jcvi/formats/cblast.pyx":43 - * int success - * - * success = fscanf(self.fh, blast_format_line, qname, sname, \ # <<<<<<<<<<<<<< - * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ - * &sstart, &sstop, &evalue, &bit ) - */ - __pyx_v_success = fscanf(__pyx_v_self->fh, __pyx_v_4jcvi_7formats_6cblast_blast_format_line, __pyx_v_qname, __pyx_v_sname, (&__pyx_v_pct), (&__pyx_v_hlen), (&__pyx_v_nmiss), (&__pyx_v_ngap), (&__pyx_v_qstart), (&__pyx_v_qstop), (&__pyx_v_sstart), (&__pyx_v_sstop), (&__pyx_v_evalue), (&__pyx_v_bit)); - - /* "jcvi/formats/cblast.pyx":46 - * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ - * &sstart, &sstop, &evalue, &bit ) - * if success == EOF: # <<<<<<<<<<<<<< - * raise StopIteration - * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, - */ - __pyx_t_1 = (__pyx_v_success == EOF); - if (unlikely(__pyx_t_1)) { - - /* "jcvi/formats/cblast.pyx":47 - * &sstart, &sstop, &evalue, &bit ) - * if success == EOF: - * raise StopIteration # <<<<<<<<<<<<<< - * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, - * qstart, qstop, sstart, sstop, evalue, bit) - */ - __pyx_error_without_exception = 1; - goto __pyx_L1_error;; - - /* "jcvi/formats/cblast.pyx":46 - * &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ - * &sstart, &sstop, &evalue, &bit ) - * if success == EOF: # <<<<<<<<<<<<<< - * raise StopIteration - * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, - */ - } - - /* "jcvi/formats/cblast.pyx":48 - * if success == EOF: - * raise StopIteration - * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, # <<<<<<<<<<<<<< - * qstart, qstop, sstart, sstop, evalue, bit) - * - */ - __Pyx_XDECREF(__pyx_r); - - /* "jcvi/formats/cblast.pyx":49 - * raise StopIteration - * return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, - * qstart, qstop, sstart, sstop, evalue, bit) # <<<<<<<<<<<<<< - * - * def __dealloc__(self): - */ - __pyx_t_2 = ((PyObject *)__pyx_f_4jcvi_7formats_6cblast_create_blast_line(__pyx_v_qname, __pyx_v_sname, __pyx_v_pct, __pyx_v_hlen, __pyx_v_nmiss, __pyx_v_ngap, __pyx_v_qstart, __pyx_v_qstop, __pyx_v_sstart, __pyx_v_sstop, __pyx_v_evalue, __pyx_v_bit)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 48, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":34 - * return self - * - * def __next__(self): # <<<<<<<<<<<<<< - * cdef: - * float pct = 0.0, evalue = 0.0, bit = 0.0 - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - if (!__pyx_error_without_exception) { - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__next__", __pyx_clineno, __pyx_lineno, __pyx_filename); - } - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":51 - * qstart, qstop, sstart, sstop, evalue, bit) - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * fclose(self.fh) - * - */ - -/* Python wrapper */ -static void __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(PyObject *__pyx_v_self); /*proto*/ -static void __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -static void __pyx_pf_4jcvi_7formats_6cblast_5Blast_6__dealloc__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { - - /* "jcvi/formats/cblast.pyx":52 - * - * def __dealloc__(self): - * fclose(self.fh) # <<<<<<<<<<<<<< - * - * def __repr__(self): - */ - (void)(fclose(__pyx_v_self->fh)); - - /* "jcvi/formats/cblast.pyx":51 - * qstart, qstop, sstart, sstop, evalue, bit) - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * fclose(self.fh) - * - */ - - /* function exit code */ -} - -/* "jcvi/formats/cblast.pyx":54 - * fclose(self.fh) - * - * def __repr__(self): # <<<<<<<<<<<<<< - * return "Blast('%s')" % (self.filename, ) - * - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_8__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__repr__", 1); - - /* "jcvi/formats/cblast.pyx":55 - * - * def __repr__(self): - * return "Blast('%s')" % (self.filename, ) # <<<<<<<<<<<<<< - * - * # Python 2 and 3 differ in str and unicode handling - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 55, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_self->filename); - __Pyx_GIVEREF(__pyx_v_self->filename); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_self->filename)) __PYX_ERR(0, 55, __pyx_L1_error); - __pyx_t_2 = __Pyx_PyString_Format(__pyx_kp_s_Blast_s, __pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 55, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":54 - * fclose(self.fh) - * - * def __repr__(self): # <<<<<<<<<<<<<< - * return "Blast('%s')" % (self.filename, ) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__ = {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_10__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce_cython__", 1); - - /* "(tree fragment)":2 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 2, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__ = {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v___pyx_state = values[0]; - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)__pyx_v_self), __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_5Blast_12__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_4jcvi_7formats_6cblast_Blast *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setstate_cython__", 1); - - /* "(tree fragment)":4 - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 4, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.Blast.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":59 - * # Python 2 and 3 differ in str and unicode handling - * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython - * cdef bytes c_str(str s): # <<<<<<<<<<<<<< - * return s.encode("UTF-8") - * - */ - -static PyObject *__pyx_f_4jcvi_7formats_6cblast_c_str(PyObject *__pyx_v_s) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("c_str", 1); - - /* "jcvi/formats/cblast.pyx":60 - * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython - * cdef bytes c_str(str s): - * return s.encode("UTF-8") # <<<<<<<<<<<<<< - * - * cdef str py_str(bytes s): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PyString_Type_encode, __pyx_v_s, __pyx_kp_s_UTF_8); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 60, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(PyBytes_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_t_1))) __PYX_ERR(0, 60, __pyx_L1_error) - __pyx_r = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":59 - * # Python 2 and 3 differ in str and unicode handling - * # https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython - * cdef bytes c_str(str s): # <<<<<<<<<<<<<< - * return s.encode("UTF-8") - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.c_str", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":62 - * return s.encode("UTF-8") - * - * cdef str py_str(bytes s): # <<<<<<<<<<<<<< - * return s.decode("UTF-8", "replace") - * - */ - -static PyObject *__pyx_f_4jcvi_7formats_6cblast_py_str(PyObject *__pyx_v_s) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("py_str", 1); - - /* "jcvi/formats/cblast.pyx":63 - * - * cdef str py_str(bytes s): - * return s.decode("UTF-8", "replace") # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - if (unlikely(__pyx_v_s == Py_None)) { - PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "decode"); - __PYX_ERR(0, 63, __pyx_L1_error) - } - __pyx_t_1 = __Pyx_decode_bytes(__pyx_v_s, 0, PY_SSIZE_T_MAX, NULL, ((char const *)"replace"), PyUnicode_DecodeUTF8); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 63, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(PyString_CheckExact(__pyx_t_1)) || __Pyx_RaiseUnexpectedTypeError("str", __pyx_t_1))) __PYX_ERR(0, 63, __pyx_L1_error) - __pyx_r = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":62 - * return s.encode("UTF-8") - * - * cdef str py_str(bytes s): # <<<<<<<<<<<<<< - * return s.decode("UTF-8", "replace") - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.py_str", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":95 - * - * property query: - * def __get__(self): # <<<<<<<<<<<<<< - * return py_str(self._query) - * def __set__(self, val: str): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - - /* "jcvi/formats/cblast.pyx":96 - * property query: - * def __get__(self): - * return py_str(self._query) # <<<<<<<<<<<<<< - * def __set__(self, val: str): - * strcpy(self._query, c_str(val)) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 96, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 96, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":95 - * - * property query: - * def __get__(self): # <<<<<<<<<<<<<< - * return py_str(self._query) - * def __set__(self, val: str): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.query.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":97 - * def __get__(self): - * return py_str(self._query) - * def __set__(self, val: str): # <<<<<<<<<<<<<< - * strcpy(self._query, c_str(val)) - * - */ - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_val), (&PyString_Type), 0, "val", 1))) __PYX_ERR(0, 97, __pyx_L1_error) - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject*)__pyx_v_val)); - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - char const *__pyx_t_2; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__set__", 1); - - /* "jcvi/formats/cblast.pyx":98 - * return py_str(self._query) - * def __set__(self, val: str): - * strcpy(self._query, c_str(val)) # <<<<<<<<<<<<<< - * - * property subject: - */ - __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(__pyx_v_val); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 98, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (unlikely(__pyx_t_1 == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 98, __pyx_L1_error) - } - __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_t_1); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 98, __pyx_L1_error) - (void)(strcpy(__pyx_v_self->_query, __pyx_t_2)); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":97 - * def __get__(self): - * return py_str(self._query) - * def __set__(self, val: str): # <<<<<<<<<<<<<< - * strcpy(self._query, c_str(val)) - * - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.query.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":101 - * - * property subject: - * def __get__(self): # <<<<<<<<<<<<<< - * return py_str(self._subject) - * def __set__(self, val: str): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - - /* "jcvi/formats/cblast.pyx":102 - * property subject: - * def __get__(self): - * return py_str(self._subject) # <<<<<<<<<<<<<< - * def __set__(self, val: str): - * strcpy(self._subject, c_str(val)) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 102, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":101 - * - * property subject: - * def __get__(self): # <<<<<<<<<<<<<< - * return py_str(self._subject) - * def __set__(self, val: str): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.subject.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":103 - * def __get__(self): - * return py_str(self._subject) - * def __set__(self, val: str): # <<<<<<<<<<<<<< - * strcpy(self._subject, c_str(val)) - * - */ - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_val) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_val), (&PyString_Type), 0, "val", 1))) __PYX_ERR(0, 103, __pyx_L1_error) - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject*)__pyx_v_val)); - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_val) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - char const *__pyx_t_2; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__set__", 1); - - /* "jcvi/formats/cblast.pyx":104 - * return py_str(self._subject) - * def __set__(self, val: str): - * strcpy(self._subject, c_str(val)) # <<<<<<<<<<<<<< - * - * def __init__(self, s): - */ - __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(__pyx_v_val); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 104, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (unlikely(__pyx_t_1 == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 104, __pyx_L1_error) - } - __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_t_1); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 104, __pyx_L1_error) - (void)(strcpy(__pyx_v_self->_subject, __pyx_t_2)); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":103 - * def __get__(self): - * return py_str(self._subject) - * def __set__(self, val: str): # <<<<<<<<<<<<<< - * strcpy(self._subject, c_str(val)) - * - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.subject.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":106 - * strcpy(self._subject, c_str(val)) - * - * def __init__(self, s): # <<<<<<<<<<<<<< - * sline = c_str(s) - * sscanf(sline, blast_format, self._query, self._subject, - */ - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_s = 0; - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__init__ (wrapper)", 0); - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return -1; - #endif - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_s,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_s)) != 0)) { - (void)__Pyx_Arg_NewRef_VARARGS(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 106, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__init__") < 0)) __PYX_ERR(0, 106, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - } - __pyx_v_s = values[0]; - } - goto __pyx_L6_skip; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 106, __pyx_L3_error) - __pyx_L6_skip:; - goto __pyx_L4_argument_unpacking_done; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return -1; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), __pyx_v_s); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine___init__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_s) { - PyObject *__pyx_v_sline = NULL; - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - char const *__pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__init__", 1); - - /* "jcvi/formats/cblast.pyx":107 - * - * def __init__(self, s): - * sline = c_str(s) # <<<<<<<<<<<<<< - * sscanf(sline, blast_format, self._query, self._subject, - * &self.pctid, &self.hitlen, &self.nmismatch, &self.ngaps, - */ - if (!(likely(PyString_CheckExact(__pyx_v_s))||((__pyx_v_s) == Py_None) || __Pyx_RaiseUnexpectedTypeError("str", __pyx_v_s))) __PYX_ERR(0, 107, __pyx_L1_error) - __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_c_str(((PyObject*)__pyx_v_s)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 107, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_sline = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":108 - * def __init__(self, s): - * sline = c_str(s) - * sscanf(sline, blast_format, self._query, self._subject, # <<<<<<<<<<<<<< - * &self.pctid, &self.hitlen, &self.nmismatch, &self.ngaps, - * &self.qstart, &self.qstop, - */ - if (unlikely(__pyx_v_sline == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 108, __pyx_L1_error) - } - __pyx_t_2 = __Pyx_PyBytes_AsString(__pyx_v_sline); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) __PYX_ERR(0, 108, __pyx_L1_error) - - /* "jcvi/formats/cblast.pyx":112 - * &self.qstart, &self.qstop, - * &self.sstart, &self.sstop, - * &self.evalue, &self.score) # <<<<<<<<<<<<<< - * - * self.orientation = '+' - */ - (void)(sscanf(__pyx_t_2, __pyx_v_4jcvi_7formats_6cblast_blast_format, __pyx_v_self->_query, __pyx_v_self->_subject, (&__pyx_v_self->pctid), (&__pyx_v_self->hitlen), (&__pyx_v_self->nmismatch), (&__pyx_v_self->ngaps), (&__pyx_v_self->qstart), (&__pyx_v_self->qstop), (&__pyx_v_self->sstart), (&__pyx_v_self->sstop), (&__pyx_v_self->evalue), (&__pyx_v_self->score))); - - /* "jcvi/formats/cblast.pyx":114 - * &self.evalue, &self.score) - * - * self.orientation = '+' # <<<<<<<<<<<<<< - * if self.qstart > self.qstop: - * self.qstart, self.qstop = self.qstop, self.qstart - */ - __pyx_v_self->orientation = '+'; - - /* "jcvi/formats/cblast.pyx":115 - * - * self.orientation = '+' - * if self.qstart > self.qstop: # <<<<<<<<<<<<<< - * self.qstart, self.qstop = self.qstop, self.qstart - * self.orientation = '-' - */ - __pyx_t_3 = (__pyx_v_self->qstart > __pyx_v_self->qstop); - if (__pyx_t_3) { - - /* "jcvi/formats/cblast.pyx":116 - * self.orientation = '+' - * if self.qstart > self.qstop: - * self.qstart, self.qstop = self.qstop, self.qstart # <<<<<<<<<<<<<< - * self.orientation = '-' - * if self.sstart > self.sstop: - */ - __pyx_t_4 = __pyx_v_self->qstop; - __pyx_t_5 = __pyx_v_self->qstart; - __pyx_v_self->qstart = __pyx_t_4; - __pyx_v_self->qstop = __pyx_t_5; - - /* "jcvi/formats/cblast.pyx":117 - * if self.qstart > self.qstop: - * self.qstart, self.qstop = self.qstop, self.qstart - * self.orientation = '-' # <<<<<<<<<<<<<< - * if self.sstart > self.sstop: - * self.sstart, self.sstop = self.sstop, self.sstart - */ - __pyx_v_self->orientation = '-'; - - /* "jcvi/formats/cblast.pyx":115 - * - * self.orientation = '+' - * if self.qstart > self.qstop: # <<<<<<<<<<<<<< - * self.qstart, self.qstop = self.qstop, self.qstart - * self.orientation = '-' - */ - } - - /* "jcvi/formats/cblast.pyx":118 - * self.qstart, self.qstop = self.qstop, self.qstart - * self.orientation = '-' - * if self.sstart > self.sstop: # <<<<<<<<<<<<<< - * self.sstart, self.sstop = self.sstop, self.sstart - * self.orientation = '-' - */ - __pyx_t_3 = (__pyx_v_self->sstart > __pyx_v_self->sstop); - if (__pyx_t_3) { - - /* "jcvi/formats/cblast.pyx":119 - * self.orientation = '-' - * if self.sstart > self.sstop: - * self.sstart, self.sstop = self.sstop, self.sstart # <<<<<<<<<<<<<< - * self.orientation = '-' - * - */ - __pyx_t_5 = __pyx_v_self->sstop; - __pyx_t_4 = __pyx_v_self->sstart; - __pyx_v_self->sstart = __pyx_t_5; - __pyx_v_self->sstop = __pyx_t_4; - - /* "jcvi/formats/cblast.pyx":120 - * if self.sstart > self.sstop: - * self.sstart, self.sstop = self.sstop, self.sstart - * self.orientation = '-' # <<<<<<<<<<<<<< - * - * def __richcmp__(BlastLine self, BlastLine other, size_t op): - */ - __pyx_v_self->orientation = '-'; - - /* "jcvi/formats/cblast.pyx":118 - * self.qstart, self.qstop = self.qstop, self.qstart - * self.orientation = '-' - * if self.sstart > self.sstop: # <<<<<<<<<<<<<< - * self.sstart, self.sstop = self.sstop, self.sstart - * self.orientation = '-' - */ - } - - /* "jcvi/formats/cblast.pyx":106 - * strcpy(self._subject, c_str(val)) - * - * def __init__(self, s): # <<<<<<<<<<<<<< - * sline = c_str(s) - * sscanf(sline, blast_format, self._query, self._subject, - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_sline); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":122 - * self.orientation = '-' - * - * def __richcmp__(BlastLine self, BlastLine other, size_t op): # <<<<<<<<<<<<<< - * if op == 2: # == - * if self.query != other.query and self.qstart != other.qstart: - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__(PyObject *__pyx_v_self, PyObject *__pyx_v_other, int __pyx_arg_op); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__(PyObject *__pyx_v_self, PyObject *__pyx_v_other, int __pyx_arg_op) { - size_t __pyx_v_op; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__richcmp__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_v_op = __pyx_arg_op; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_other), __pyx_ptype_4jcvi_7formats_6cblast_BlastLine, 1, "other", 0))) __PYX_ERR(0, 122, __pyx_L1_error) - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_other), ((size_t)__pyx_v_op)); - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = NULL; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2__richcmp__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_other, size_t __pyx_v_op) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - int __pyx_t_5; - PyObject *__pyx_t_6 = NULL; - unsigned int __pyx_t_7; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__richcmp__", 1); - - /* "jcvi/formats/cblast.pyx":123 - * - * def __richcmp__(BlastLine self, BlastLine other, size_t op): - * if op == 2: # == # <<<<<<<<<<<<<< - * if self.query != other.query and self.qstart != other.qstart: - * return False - */ - switch (__pyx_v_op) { - case 2: - - /* "jcvi/formats/cblast.pyx":124 - * def __richcmp__(BlastLine self, BlastLine other, size_t op): - * if op == 2: # == - * if self.query != other.query and self.qstart != other.qstart: # <<<<<<<<<<<<<< - * return False - * return self.subject == other.subject and \ - */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 124, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_other), __pyx_n_s_query); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 124, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyObject_RichCompare(__pyx_t_2, __pyx_t_3, Py_NE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 124, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 124, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (__pyx_t_5) { - } else { - __pyx_t_1 = __pyx_t_5; - goto __pyx_L4_bool_binop_done; - } - __pyx_t_5 = (__pyx_v_self->qstart != __pyx_v_other->qstart); - __pyx_t_1 = __pyx_t_5; - __pyx_L4_bool_binop_done:; - if (__pyx_t_1) { - - /* "jcvi/formats/cblast.pyx":125 - * if op == 2: # == - * if self.query != other.query and self.qstart != other.qstart: - * return False # <<<<<<<<<<<<<< - * return self.subject == other.subject and \ - * self.qstop == other.qstop and \ - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(Py_False); - __pyx_r = Py_False; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":124 - * def __richcmp__(BlastLine self, BlastLine other, size_t op): - * if op == 2: # == - * if self.query != other.query and self.qstart != other.qstart: # <<<<<<<<<<<<<< - * return False - * return self.subject == other.subject and \ - */ - } - - /* "jcvi/formats/cblast.pyx":126 - * if self.query != other.query and self.qstart != other.qstart: - * return False - * return self.subject == other.subject and \ # <<<<<<<<<<<<<< - * self.qstop == other.qstop and \ - * self.sstop == other.sstop and \ - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 126, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_other), __pyx_n_s_subject); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 126, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_6 = PyObject_RichCompare(__pyx_t_3, __pyx_t_2, Py_EQ); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 126, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 126, __pyx_L1_error) - if (__pyx_t_1) { - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - } else { - __Pyx_INCREF(__pyx_t_6); - __pyx_t_4 = __pyx_t_6; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - goto __pyx_L6_bool_binop_done; - } - - /* "jcvi/formats/cblast.pyx":127 - * return False - * return self.subject == other.subject and \ - * self.qstop == other.qstop and \ # <<<<<<<<<<<<<< - * self.sstop == other.sstop and \ - * self.evalue == other.evalue and \ - */ - __pyx_t_1 = (__pyx_v_self->qstop == __pyx_v_other->qstop); - if (__pyx_t_1) { - } else { - __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 127, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L6_bool_binop_done; - } - - /* "jcvi/formats/cblast.pyx":128 - * return self.subject == other.subject and \ - * self.qstop == other.qstop and \ - * self.sstop == other.sstop and \ # <<<<<<<<<<<<<< - * self.evalue == other.evalue and \ - * self.hitlen == other.hitlen - */ - __pyx_t_1 = (__pyx_v_self->sstop == __pyx_v_other->sstop); - if (__pyx_t_1) { - } else { - __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 128, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L6_bool_binop_done; - } - - /* "jcvi/formats/cblast.pyx":129 - * self.qstop == other.qstop and \ - * self.sstop == other.sstop and \ - * self.evalue == other.evalue and \ # <<<<<<<<<<<<<< - * self.hitlen == other.hitlen - * - */ - __pyx_t_1 = (__pyx_v_self->evalue == __pyx_v_other->evalue); - if (__pyx_t_1) { - } else { - __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 129, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L6_bool_binop_done; - } - - /* "jcvi/formats/cblast.pyx":130 - * self.sstop == other.sstop and \ - * self.evalue == other.evalue and \ - * self.hitlen == other.hitlen # <<<<<<<<<<<<<< - * - * elif op == 3: # != - */ - __pyx_t_1 = (__pyx_v_self->hitlen == __pyx_v_other->hitlen); - __pyx_t_6 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 130, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = __pyx_t_6; - __pyx_t_6 = 0; - __pyx_L6_bool_binop_done:; - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":123 - * - * def __richcmp__(BlastLine self, BlastLine other, size_t op): - * if op == 2: # == # <<<<<<<<<<<<<< - * if self.query != other.query and self.qstart != other.qstart: - * return False - */ - break; - case 3: - - /* "jcvi/formats/cblast.pyx":133 - * - * elif op == 3: # != - * return not self.__richcmp__(other, 2) # <<<<<<<<<<<<<< - * else: - * raise Exception("that comparison not implemented") - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_richcmp); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 133, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_2 = NULL; - __pyx_t_7 = 0; - #if CYTHON_UNPACK_METHODS - if (likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_2)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_2); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); - __pyx_t_7 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[3] = {__pyx_t_2, ((PyObject *)__pyx_v_other), __pyx_int_2}; - __pyx_t_4 = __Pyx_PyObject_FastCall(__pyx_t_6, __pyx_callargs+1-__pyx_t_7, 2+__pyx_t_7); - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 133, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - } - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 133, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyBool_FromLong((!__pyx_t_1)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 133, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":132 - * self.hitlen == other.hitlen - * - * elif op == 3: # != # <<<<<<<<<<<<<< - * return not self.__richcmp__(other, 2) - * else: - */ - break; - default: - - /* "jcvi/formats/cblast.pyx":135 - * return not self.__richcmp__(other, 2) - * else: - * raise Exception("that comparison not implemented") # <<<<<<<<<<<<<< - * - * def __hash__(self): - */ - __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])), __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 135, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_Raise(__pyx_t_4, 0, 0, 0); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __PYX_ERR(0, 135, __pyx_L1_error) - break; - } - - /* "jcvi/formats/cblast.pyx":122 - * self.orientation = '-' - * - * def __richcmp__(BlastLine self, BlastLine other, size_t op): # <<<<<<<<<<<<<< - * if op == 2: # == - * if self.query != other.query and self.qstart != other.qstart: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__richcmp__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":137 - * raise Exception("that comparison not implemented") - * - * def __hash__(self): # <<<<<<<<<<<<<< - * return id(self) - * - */ - -/* Python wrapper */ -static Py_hash_t __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__(PyObject *__pyx_v_self); /*proto*/ -static Py_hash_t __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - Py_hash_t __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__hash__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static Py_hash_t __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_4__hash__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - Py_hash_t __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - Py_hash_t __pyx_t_2; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__hash__", 1); - - /* "jcvi/formats/cblast.pyx":138 - * - * def __hash__(self): - * return id(self) # <<<<<<<<<<<<<< - * - * def __repr__(self): - */ - __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_builtin_id, ((PyObject *)__pyx_v_self)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 138, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_AsHash_t(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_hash_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 138, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":137 - * raise Exception("that comparison not implemented") - * - * def __hash__(self): # <<<<<<<<<<<<<< - * return id(self) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__hash__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - if (unlikely(__pyx_r == -1) && !PyErr_Occurred()) __pyx_r = -2; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":140 - * return id(self) - * - * def __repr__(self): # <<<<<<<<<<<<<< - * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ - * (self.query, self.subject, self.evalue, self.score) - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6__repr__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__repr__", 1); - - /* "jcvi/formats/cblast.pyx":141 - * - * def __repr__(self): - * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ # <<<<<<<<<<<<<< - * (self.query, self.subject, self.evalue, self.score) - * - */ - __Pyx_XDECREF(__pyx_r); - - /* "jcvi/formats/cblast.pyx":142 - * def __repr__(self): - * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ - * (self.query, self.subject, self.evalue, self.score) # <<<<<<<<<<<<<< - * - * def __str__(self): - */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 142, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 142, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 142, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 142, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyTuple_New(4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 142, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_1)) __PYX_ERR(0, 142, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_2)) __PYX_ERR(0, 142, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_3)) __PYX_ERR(0, 142, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_4); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_4)) __PYX_ERR(0, 142, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_2 = 0; - __pyx_t_3 = 0; - __pyx_t_4 = 0; - - /* "jcvi/formats/cblast.pyx":141 - * - * def __repr__(self): - * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ # <<<<<<<<<<<<<< - * (self.query, self.subject, self.evalue, self.score) - * - */ - __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1, __pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 141, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":140 - * return id(self) - * - * def __repr__(self): # <<<<<<<<<<<<<< - * return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ - * (self.query, self.subject, self.evalue, self.score) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":144 - * (self.query, self.subject, self.evalue, self.score) - * - * def __str__(self): # <<<<<<<<<<<<<< - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * if self.orientation == '-': - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__str__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8__str__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_v_args = NULL; - char __pyx_v_result[0x200]; - PyObject *__pyx_v_attr = NULL; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - Py_ssize_t __pyx_t_4; - PyObject *(*__pyx_t_5)(PyObject *); - int __pyx_t_6; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__str__", 1); - - /* "jcvi/formats/cblast.pyx":145 - * - * def __str__(self): - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_n_s_slots); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyObject_GetSlice(__pyx_t_2, 0, 12, NULL, NULL, &__pyx_slice__4, 0, 1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (likely(PyList_CheckExact(__pyx_t_3)) || PyTuple_CheckExact(__pyx_t_3)) { - __pyx_t_2 = __pyx_t_3; __Pyx_INCREF(__pyx_t_2); - __pyx_t_4 = 0; - __pyx_t_5 = NULL; - } else { - __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 145, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - for (;;) { - if (likely(!__pyx_t_5)) { - if (likely(PyList_CheckExact(__pyx_t_2))) { - { - Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 145, __pyx_L1_error) - #endif - if (__pyx_t_4 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 145, __pyx_L1_error) - #else - __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - } else { - { - Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 145, __pyx_L1_error) - #endif - if (__pyx_t_4 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 145, __pyx_L1_error) - #else - __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - } - } else { - __pyx_t_3 = __pyx_t_5(__pyx_t_2); - if (unlikely(!__pyx_t_3)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 145, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_3); - } - __Pyx_XDECREF_SET(__pyx_v_attr, __pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttr(((PyObject *)__pyx_v_self), __pyx_v_attr); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_3))) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_args = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":146 - * def __str__(self): - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * if self.orientation == '-': # <<<<<<<<<<<<<< - * args[8], args[9] = args[9], args[8] - * - */ - __pyx_t_6 = (__pyx_v_self->orientation == '-'); - if (__pyx_t_6) { - - /* "jcvi/formats/cblast.pyx":147 - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] # <<<<<<<<<<<<<< - * - * cdef char result[512] - */ - __pyx_t_1 = PyList_GET_ITEM(__pyx_v_args, 9); - __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = PyList_GET_ITEM(__pyx_v_args, 8); - __Pyx_INCREF(__pyx_t_2); - if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 8, __pyx_t_1, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 147, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 9, __pyx_t_2, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 147, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/formats/cblast.pyx":146 - * def __str__(self): - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * if self.orientation == '-': # <<<<<<<<<<<<<< - * args[8], args[9] = args[9], args[8] - * - */ - } - - /* "jcvi/formats/cblast.pyx":150 - * - * cdef char result[512] - * sprintf(result, blast_output, self._query, self._subject, # <<<<<<<<<<<<<< - * self.pctid, self.hitlen, self.nmismatch, self.ngaps, - * self.qstart, self.qstop, - */ - (void)(sprintf(__pyx_v_result, __pyx_v_4jcvi_7formats_6cblast_blast_output, __pyx_v_self->_query, __pyx_v_self->_subject, __pyx_v_self->pctid, __pyx_v_self->hitlen, __pyx_v_self->nmismatch, __pyx_v_self->ngaps, __pyx_v_self->qstart, __pyx_v_self->qstop, __pyx_v_self->sstart, __pyx_v_self->sstop, __pyx_v_self->evalue, __pyx_v_self->score)); - - /* "jcvi/formats/cblast.pyx":156 - * self.evalue, self.score) - * - * return py_str(result) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __Pyx_PyObject_FromString(__pyx_v_result); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 156, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_2)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 156, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":144 - * (self.query, self.subject, self.evalue, self.score) - * - * def __str__(self): # <<<<<<<<<<<<<< - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * if self.orientation == '-': - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__str__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_args); - __Pyx_XDECREF(__pyx_v_attr); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":158 - * return py_str(result) - * - * @property # <<<<<<<<<<<<<< - * def has_score(self): - * return hasattr(self, "score") - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9has_score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - - /* "jcvi/formats/cblast.pyx":160 - * @property - * def has_score(self): - * return hasattr(self, "score") # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_HasAttr(((PyObject *)__pyx_v_self), __pyx_n_s_score); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(0, 160, __pyx_L1_error) - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 160, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":158 - * return py_str(result) - * - * @property # <<<<<<<<<<<<<< - * def has_score(self): - * return hasattr(self, "score") - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.has_score.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":162 - * return hasattr(self, "score") - * - * @property # <<<<<<<<<<<<<< - * def swapped(self): - * """ - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} -static PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value); /* proto */ - -/* "jcvi/formats/cblast.pyx":172 - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< - * return BlastLine(b) - * - */ - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_genexpr_arg_0) { - struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_cur_scope; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("genexpr", 0); - __pyx_cur_scope = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, __pyx_empty_tuple, NULL); - if (unlikely(!__pyx_cur_scope)) { - __pyx_cur_scope = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)Py_None); - __Pyx_INCREF(Py_None); - __PYX_ERR(0, 172, __pyx_L1_error) - } else { - __Pyx_GOTREF((PyObject *)__pyx_cur_scope); - } - __pyx_cur_scope->__pyx_genexpr_arg_0 = __pyx_genexpr_arg_0; - __Pyx_INCREF(__pyx_cur_scope->__pyx_genexpr_arg_0); - __Pyx_GIVEREF(__pyx_cur_scope->__pyx_genexpr_arg_0); - { - __pyx_CoroutineObject *gen = __Pyx_Generator_New((__pyx_coroutine_body_t) __pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator, NULL, (PyObject *) __pyx_cur_scope, __pyx_n_s_genexpr, __pyx_n_s_BlastLine___get___locals_genexpr, __pyx_n_s_jcvi_formats_cblast); if (unlikely(!gen)) __PYX_ERR(0, 172, __pyx_L1_error) - __Pyx_DECREF(__pyx_cur_scope); - __Pyx_RefNannyFinishContext(); - return (PyObject *) gen; - } - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.swapped.__get__.genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_DECREF((PyObject *)__pyx_cur_scope); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value) /* generator body */ -{ - struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_cur_scope = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)__pyx_generator->closure); - PyObject *__pyx_r = NULL; - PyObject *__pyx_t_1 = NULL; - Py_ssize_t __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("genexpr", 0); - switch (__pyx_generator->resume_label) { - case 0: goto __pyx_L3_first_run; - case 1: goto __pyx_L6_resume_from_yield; - default: /* CPython raises the right error here */ - __Pyx_RefNannyFinishContext(); - return NULL; - } - __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 172, __pyx_L1_error) - if (unlikely(!__pyx_cur_scope->__pyx_genexpr_arg_0)) { __Pyx_RaiseUnboundLocalError(".0"); __PYX_ERR(0, 172, __pyx_L1_error) } - __pyx_t_1 = __pyx_cur_scope->__pyx_genexpr_arg_0; __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = 0; - for (;;) { - { - Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_1); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 172, __pyx_L1_error) - #endif - if (__pyx_t_2 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely((0 < 0))) __PYX_ERR(0, 172, __pyx_L1_error) - #else - __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 172, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_x); - __Pyx_XDECREF_SET(__pyx_cur_scope->__pyx_v_x, __pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Str(__pyx_cur_scope->__pyx_v_x); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 172, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - __Pyx_XGIVEREF(__pyx_t_1); - __pyx_cur_scope->__pyx_t_0 = __pyx_t_1; - __pyx_cur_scope->__pyx_t_1 = __pyx_t_2; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - /* return from generator, yielding value */ - __pyx_generator->resume_label = 1; - return __pyx_r; - __pyx_L6_resume_from_yield:; - __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; - __pyx_cur_scope->__pyx_t_0 = 0; - __Pyx_XGOTREF(__pyx_t_1); - __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 172, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - CYTHON_MAYBE_UNUSED_VAR(__pyx_cur_scope); - - /* function exit code */ - PyErr_SetNone(PyExc_StopIteration); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("genexpr", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_L0:; - __Pyx_XDECREF(__pyx_r); __pyx_r = 0; - #if !CYTHON_USE_EXC_INFO_STACK - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - #endif - __pyx_generator->resume_label = -1; - __Pyx_Coroutine_clear((PyObject*)__pyx_generator); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":162 - * return hasattr(self, "score") - * - * @property # <<<<<<<<<<<<<< - * def swapped(self): - * """ - */ - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_v_args = NULL; - PyObject *__pyx_v_b = NULL; - PyObject *__pyx_v_attr = NULL; - PyObject *__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator = 0; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - Py_ssize_t __pyx_t_4; - PyObject *(*__pyx_t_5)(PyObject *); - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - int __pyx_t_8; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - - /* "jcvi/formats/cblast.pyx":167 - * Swap query and subject. - * """ - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< - * args[0:2] = [self.subject, self.query] - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_n_s_slots); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyObject_GetSlice(__pyx_t_2, 0, 12, NULL, NULL, &__pyx_slice__4, 0, 1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (likely(PyList_CheckExact(__pyx_t_3)) || PyTuple_CheckExact(__pyx_t_3)) { - __pyx_t_2 = __pyx_t_3; __Pyx_INCREF(__pyx_t_2); - __pyx_t_4 = 0; - __pyx_t_5 = NULL; - } else { - __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 167, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - for (;;) { - if (likely(!__pyx_t_5)) { - if (likely(PyList_CheckExact(__pyx_t_2))) { - { - Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 167, __pyx_L1_error) - #endif - if (__pyx_t_4 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 167, __pyx_L1_error) - #else - __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - } else { - { - Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); - #if !CYTHON_ASSUME_SAFE_MACROS - if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 167, __pyx_L1_error) - #endif - if (__pyx_t_4 >= __pyx_temp) break; - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(0, 167, __pyx_L1_error) - #else - __pyx_t_3 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - } - } else { - __pyx_t_3 = __pyx_t_5(__pyx_t_2); - if (unlikely(!__pyx_t_3)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 167, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_3); - } - __Pyx_XDECREF_SET(__pyx_v_attr, __pyx_t_3); - __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetAttr(((PyObject *)__pyx_v_self), __pyx_v_attr); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_3))) __PYX_ERR(0, 167, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_args = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":168 - * """ - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * args[0:2] = [self.subject, self.query] # <<<<<<<<<<<<<< - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - * if self.orientation == '-': - */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 168, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyList_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 168, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 0, __pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyList_SET_ITEM(__pyx_t_3, 1, __pyx_t_2)) __PYX_ERR(0, 168, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_2 = 0; - if (__Pyx_PyObject_SetSlice(__pyx_v_args, __pyx_t_3, 0, 2, NULL, NULL, NULL, 1, 1, 0) < 0) __PYX_ERR(0, 168, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "jcvi/formats/cblast.pyx":169 - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - * args[0:2] = [self.subject, self.query] - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] # <<<<<<<<<<<<<< - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - */ - __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = PyList_New(4); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 0, __pyx_t_3)) __PYX_ERR(0, 169, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 1, __pyx_t_2)) __PYX_ERR(0, 169, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 2, __pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_6); - if (__Pyx_PyList_SET_ITEM(__pyx_t_7, 3, __pyx_t_6)) __PYX_ERR(0, 169, __pyx_L1_error); - __pyx_t_3 = 0; - __pyx_t_2 = 0; - __pyx_t_1 = 0; - __pyx_t_6 = 0; - if (__Pyx_PyObject_SetSlice(__pyx_v_args, __pyx_t_7, 6, 10, NULL, NULL, NULL, 1, 1, 0) < 0) __PYX_ERR(0, 169, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "jcvi/formats/cblast.pyx":170 - * args[0:2] = [self.subject, self.query] - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - * if self.orientation == '-': # <<<<<<<<<<<<<< - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) - */ - __pyx_t_8 = (__pyx_v_self->orientation == '-'); - if (__pyx_t_8) { - - /* "jcvi/formats/cblast.pyx":171 - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] # <<<<<<<<<<<<<< - * b = "\t".join(str(x) for x in args) - * return BlastLine(b) - */ - __pyx_t_7 = PyList_GET_ITEM(__pyx_v_args, 9); - __Pyx_INCREF(__pyx_t_7); - __pyx_t_6 = PyList_GET_ITEM(__pyx_v_args, 8); - __Pyx_INCREF(__pyx_t_6); - if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 8, __pyx_t_7, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 171, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (unlikely((__Pyx_SetItemInt(__pyx_v_args, 9, __pyx_t_6, long, 1, __Pyx_PyInt_From_long, 1, 0, 0) < 0))) __PYX_ERR(0, 171, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - - /* "jcvi/formats/cblast.pyx":170 - * args[0:2] = [self.subject, self.query] - * args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - * if self.orientation == '-': # <<<<<<<<<<<<<< - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) - */ - } - - /* "jcvi/formats/cblast.pyx":172 - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) # <<<<<<<<<<<<<< - * return BlastLine(b) - * - */ - __pyx_t_6 = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___genexpr(NULL, __pyx_v_args); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 172, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyString_Join(__pyx_kp_s__5, __pyx_t_6); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 172, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_v_b = ((PyObject*)__pyx_t_7); - __pyx_t_7 = 0; - - /* "jcvi/formats/cblast.pyx":173 - * args[8], args[9] = args[9], args[8] - * b = "\t".join(str(x) for x in args) - * return BlastLine(b) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyObject_CallOneArg(((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_v_b); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 173, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_r = __pyx_t_7; - __pyx_t_7 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":162 - * return hasattr(self, "score") - * - * @property # <<<<<<<<<<<<<< - * def swapped(self): - * """ - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.swapped.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_args); - __Pyx_XDECREF(__pyx_v_b); - __Pyx_XDECREF(__pyx_v_attr); - __Pyx_XDECREF(__pyx_gb_4jcvi_7formats_6cblast_9BlastLine_7swapped_7__get___2generator); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":175 - * return BlastLine(b) - * - * @property # <<<<<<<<<<<<<< - * def bedline(self): - * cdef char result[512] - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_7bedline___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - char __pyx_v_result[0x200]; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - - /* "jcvi/formats/cblast.pyx":178 - * def bedline(self): - * cdef char result[512] - * sprintf(result, bed_output, # <<<<<<<<<<<<<< - * self._subject, self.sstart - 1, self.sstop, - * self._query, self.qstart, self.qstop, - */ - (void)(sprintf(__pyx_v_result, __pyx_v_4jcvi_7formats_6cblast_bed_output, __pyx_v_self->_subject, (__pyx_v_self->sstart - 1), __pyx_v_self->sstop, __pyx_v_self->_query, __pyx_v_self->qstart, __pyx_v_self->qstop, __pyx_v_self->score, __pyx_v_self->orientation)); - - /* "jcvi/formats/cblast.pyx":183 - * self.score, self.orientation) - * - * return py_str(result) # <<<<<<<<<<<<<< - * - * def __reduce__(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_result); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 183, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __pyx_f_4jcvi_7formats_6cblast_py_str(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 183, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":175 - * return BlastLine(b) - * - * @property # <<<<<<<<<<<<<< - * def bedline(self): - * cdef char result[512] - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.bedline.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":185 - * return py_str(result) - * - * def __reduce__(self): # <<<<<<<<<<<<<< - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyMethodDef __pyx_mdef_4jcvi_7formats_6cblast_9BlastLine_11__reduce__ = {"__reduce__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce__", 0))) return NULL; - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_10__reduce__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - PyObject *__pyx_t_9 = NULL; - PyObject *__pyx_t_10 = NULL; - PyObject *__pyx_t_11 = NULL; - PyObject *__pyx_t_12 = NULL; - PyObject *__pyx_t_13 = NULL; - PyObject *__pyx_t_14 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce__", 1); - - /* "jcvi/formats/cblast.pyx":186 - * - * def __reduce__(self): - * return create_blast_line, ( # <<<<<<<<<<<<<< - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(__pyx_f_4jcvi_7formats_6cblast_create_blast_line); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 186, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - - /* "jcvi/formats/cblast.pyx":187 - * def __reduce__(self): - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, # <<<<<<<<<<<<<< - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - * self.evalue, self.score) - */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_query); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_subject); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyFloat_FromDouble(__pyx_v_self->pctid); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_self->hitlen); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_self->nmismatch); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - - /* "jcvi/formats/cblast.pyx":188 - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, # <<<<<<<<<<<<<< - * self.evalue, self.score) - * - */ - __pyx_t_7 = __Pyx_PyInt_From_int(__pyx_v_self->ngaps); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 188, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 188, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_9 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 188, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 188, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_11 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 188, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_11); - - /* "jcvi/formats/cblast.pyx":189 - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - * self.evalue, self.score) # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_12 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 189, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_12); - __pyx_t_13 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 189, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - - /* "jcvi/formats/cblast.pyx":187 - * def __reduce__(self): - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, # <<<<<<<<<<<<<< - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - * self.evalue, self.score) - */ - __pyx_t_14 = PyTuple_New(12); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 187, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_2)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 1, __pyx_t_3)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_4); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 2, __pyx_t_4)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_5); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 3, __pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_6); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 4, __pyx_t_6)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_7); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 5, __pyx_t_7)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_8); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 6, __pyx_t_8)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_9); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 7, __pyx_t_9)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_10); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 8, __pyx_t_10)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_11); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 9, __pyx_t_11)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_12); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 10, __pyx_t_12)) __PYX_ERR(0, 187, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_13); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_14, 11, __pyx_t_13)) __PYX_ERR(0, 187, __pyx_L1_error); - __pyx_t_2 = 0; - __pyx_t_3 = 0; - __pyx_t_4 = 0; - __pyx_t_5 = 0; - __pyx_t_6 = 0; - __pyx_t_7 = 0; - __pyx_t_8 = 0; - __pyx_t_9 = 0; - __pyx_t_10 = 0; - __pyx_t_11 = 0; - __pyx_t_12 = 0; - __pyx_t_13 = 0; - - /* "jcvi/formats/cblast.pyx":186 - * - * def __reduce__(self): - * return create_blast_line, ( # <<<<<<<<<<<<<< - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - * self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - */ - __pyx_t_13 = PyTuple_New(2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 186, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1)) __PYX_ERR(0, 186, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_14); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_t_14)) __PYX_ERR(0, 186, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_14 = 0; - __pyx_r = __pyx_t_13; - __pyx_t_13 = 0; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":185 - * return py_str(result) - * - * def __reduce__(self): # <<<<<<<<<<<<<< - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_XDECREF(__pyx_t_9); - __Pyx_XDECREF(__pyx_t_10); - __Pyx_XDECREF(__pyx_t_11); - __Pyx_XDECREF(__pyx_t_12); - __Pyx_XDECREF(__pyx_t_13); - __Pyx_XDECREF(__pyx_t_14); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.__reduce__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":85 - * - * cdef public: - * char _query[128] # <<<<<<<<<<<<<< - * char _subject[128] - * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 85, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._query.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6_query_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - char __pyx_t_1[0x80]; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - if (unlikely((__Pyx_carray_from_py_char(__pyx_v_value, __pyx_t_1, 0x80) < 0))) __PYX_ERR(0, 85, __pyx_L1_error) - if (unlikely((0x80) != (0x80))) { - PyErr_Format(PyExc_ValueError, "Assignment to slice of wrong length, expected %" CYTHON_FORMAT_SSIZE_T "d, got %" CYTHON_FORMAT_SSIZE_T "d", (Py_ssize_t)(0x80), (Py_ssize_t)(0x80)); - __PYX_ERR(0, 85, __pyx_L1_error) - } - memcpy(&(__pyx_v_self->_query[0]), __pyx_t_1, sizeof(__pyx_v_self->_query[0]) * (0x80)); - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._query.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":86 - * cdef public: - * char _query[128] - * char _subject[128] # <<<<<<<<<<<<<< - * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop - * float pctid, score - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromString(__pyx_v_self->_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 86, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._subject.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_8_subject_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - char __pyx_t_1[0x80]; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - if (unlikely((__Pyx_carray_from_py_char(__pyx_v_value, __pyx_t_1, 0x80) < 0))) __PYX_ERR(0, 86, __pyx_L1_error) - if (unlikely((0x80) != (0x80))) { - PyErr_Format(PyExc_ValueError, "Assignment to slice of wrong length, expected %" CYTHON_FORMAT_SSIZE_T "d, got %" CYTHON_FORMAT_SSIZE_T "d", (Py_ssize_t)(0x80), (Py_ssize_t)(0x80)); - __PYX_ERR(0, 86, __pyx_L1_error) - } - memcpy(&(__pyx_v_self->_subject[0]), __pyx_t_1, sizeof(__pyx_v_self->_subject[0]) * (0x80)); - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine._subject.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":87 - * char _query[128] - * char _subject[128] - * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop # <<<<<<<<<<<<<< - * float pctid, score - * double evalue - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->hitlen); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.hitlen.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6hitlen_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->hitlen = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.hitlen.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->nmismatch); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.nmismatch.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->nmismatch = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.nmismatch.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->ngaps); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.ngaps.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5ngaps_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->ngaps = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.ngaps.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstart.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->qstart = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstart.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qstop); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstop.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5qstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->qstop = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qstop.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->sstart); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstart.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sstart_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->sstart = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstart.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->sstop); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstop.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5sstop_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 87, __pyx_L1_error) - __pyx_v_self->sstop = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.sstop.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":88 - * char _subject[128] - * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop - * float pctid, score # <<<<<<<<<<<<<< - * double evalue - * object qseqid, sseqid - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->pctid); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 88, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.pctid.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5pctid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - float __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __pyx_PyFloat_AsFloat(__pyx_v_value); if (unlikely((__pyx_t_1 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 88, __pyx_L1_error) - __pyx_v_self->pctid = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.pctid.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->score); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 88, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.score.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_5score_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - float __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __pyx_PyFloat_AsFloat(__pyx_v_value); if (unlikely((__pyx_t_1 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 88, __pyx_L1_error) - __pyx_v_self->score = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.score.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":89 - * int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop - * float pctid, score - * double evalue # <<<<<<<<<<<<<< - * object qseqid, sseqid - * int qi, si - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->evalue); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 89, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.evalue.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6evalue_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - double __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __pyx_PyFloat_AsDouble(__pyx_v_value); if (unlikely((__pyx_t_1 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 89, __pyx_L1_error) - __pyx_v_self->evalue = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.evalue.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":90 - * float pctid, score - * double evalue - * object qseqid, sseqid # <<<<<<<<<<<<<< - * int qi, si - * char orientation - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_self->qseqid); - __pyx_r = __pyx_v_self->qseqid; - goto __pyx_L0; - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__", 1); - __Pyx_INCREF(__pyx_v_value); - __Pyx_GIVEREF(__pyx_v_value); - __Pyx_GOTREF(__pyx_v_self->qseqid); - __Pyx_DECREF(__pyx_v_self->qseqid); - __pyx_v_self->qseqid = __pyx_v_value; - - /* function exit code */ - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(PyObject *__pyx_v_self); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__del__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6qseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__del__", 1); - __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(Py_None); - __Pyx_GOTREF(__pyx_v_self->qseqid); - __Pyx_DECREF(__pyx_v_self->qseqid); - __pyx_v_self->qseqid = Py_None; - - /* function exit code */ - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_self->sseqid); - __pyx_r = __pyx_v_self->sseqid; - goto __pyx_L0; - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__", 1); - __Pyx_INCREF(__pyx_v_value); - __Pyx_GIVEREF(__pyx_v_value); - __Pyx_GOTREF(__pyx_v_self->sseqid); - __Pyx_DECREF(__pyx_v_self->sseqid); - __pyx_v_self->sseqid = __pyx_v_value; - - /* function exit code */ - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(PyObject *__pyx_v_self); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__del__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_6sseqid_4__del__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__del__", 1); - __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(Py_None); - __Pyx_GOTREF(__pyx_v_self->sseqid); - __Pyx_DECREF(__pyx_v_self->sseqid); - __pyx_v_self->sseqid = Py_None; - - /* function exit code */ - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":91 - * double evalue - * object qseqid, sseqid - * int qi, si # <<<<<<<<<<<<<< - * char orientation - * - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->qi); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 91, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qi.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2qi_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 91, __pyx_L1_error) - __pyx_v_self->qi = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.qi.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->si); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 91, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.si.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_2si_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_int(__pyx_v_value); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 91, __pyx_L1_error) - __pyx_v_self->si = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.si.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":92 - * object qseqid, sseqid - * int qi, si - * char orientation # <<<<<<<<<<<<<< - * - * property query: - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation___get__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 1); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_char(__pyx_v_self->orientation); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 92, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.orientation.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* Python wrapper */ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_v_self), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_4jcvi_7formats_6cblast_9BlastLine_11orientation_2__set__(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_self, PyObject *__pyx_v_value) { - int __pyx_r; - char __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __pyx_t_1 = __Pyx_PyInt_As_char(__pyx_v_value); if (unlikely((__pyx_t_1 == (char)-1) && PyErr_Occurred())) __PYX_ERR(0, 92, __pyx_L1_error) - __pyx_v_self->orientation = __pyx_t_1; - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("jcvi.formats.cblast.BlastLine.orientation.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - return __pyx_r; -} - -/* "jcvi/formats/cblast.pyx":192 - * - * - * cdef BlastLine create_blast_line(char *query, char *subject, float pctid, int hitlen, # <<<<<<<<<<<<<< - * int nmismatch, int ngaps, int qstart, int qstop, - * int sstart, int sstop, float evalue, float score): - */ - -static struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_f_4jcvi_7formats_6cblast_create_blast_line(char *__pyx_v_query, char *__pyx_v_subject, float __pyx_v_pctid, int __pyx_v_hitlen, int __pyx_v_nmismatch, int __pyx_v_ngaps, int __pyx_v_qstart, int __pyx_v_qstop, int __pyx_v_sstart, int __pyx_v_sstop, float __pyx_v_evalue, float __pyx_v_score) { - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_v_b = 0; - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("create_blast_line", 1); - - /* "jcvi/formats/cblast.pyx":197 - * """ Factory method. - * """ - * cdef BlastLine b = BlastLine.__new__(BlastLine) # <<<<<<<<<<<<<< - * b.query = query - * b.subject = subject - */ - __pyx_t_1 = ((PyObject *)__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(((PyTypeObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine), __pyx_empty_tuple, NULL)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error) - __Pyx_GOTREF((PyObject *)__pyx_t_1); - __pyx_v_b = ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":198 - * """ - * cdef BlastLine b = BlastLine.__new__(BlastLine) - * b.query = query # <<<<<<<<<<<<<< - * b.subject = subject - * b.pctid = pctid - */ - __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_query); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 198, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (__Pyx_PyObject_SetAttrStr(((PyObject *)__pyx_v_b), __pyx_n_s_query, __pyx_t_1) < 0) __PYX_ERR(0, 198, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":199 - * cdef BlastLine b = BlastLine.__new__(BlastLine) - * b.query = query - * b.subject = subject # <<<<<<<<<<<<<< - * b.pctid = pctid - * b.hitlen = hitlen - */ - __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_subject); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 199, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (__Pyx_PyObject_SetAttrStr(((PyObject *)__pyx_v_b), __pyx_n_s_subject, __pyx_t_1) < 0) __PYX_ERR(0, 199, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "jcvi/formats/cblast.pyx":200 - * b.query = query - * b.subject = subject - * b.pctid = pctid # <<<<<<<<<<<<<< - * b.hitlen = hitlen - * b.nmismatch = nmismatch - */ - __pyx_v_b->pctid = __pyx_v_pctid; - - /* "jcvi/formats/cblast.pyx":201 - * b.subject = subject - * b.pctid = pctid - * b.hitlen = hitlen # <<<<<<<<<<<<<< - * b.nmismatch = nmismatch - * b.ngaps = ngaps - */ - __pyx_v_b->hitlen = __pyx_v_hitlen; - - /* "jcvi/formats/cblast.pyx":202 - * b.pctid = pctid - * b.hitlen = hitlen - * b.nmismatch = nmismatch # <<<<<<<<<<<<<< - * b.ngaps = ngaps - * b.qstart = qstart - */ - __pyx_v_b->nmismatch = __pyx_v_nmismatch; - - /* "jcvi/formats/cblast.pyx":203 - * b.hitlen = hitlen - * b.nmismatch = nmismatch - * b.ngaps = ngaps # <<<<<<<<<<<<<< - * b.qstart = qstart - * b.qstop = qstop - */ - __pyx_v_b->ngaps = __pyx_v_ngaps; - - /* "jcvi/formats/cblast.pyx":204 - * b.nmismatch = nmismatch - * b.ngaps = ngaps - * b.qstart = qstart # <<<<<<<<<<<<<< - * b.qstop = qstop - * b.sstart = sstart - */ - __pyx_v_b->qstart = __pyx_v_qstart; - - /* "jcvi/formats/cblast.pyx":205 - * b.ngaps = ngaps - * b.qstart = qstart - * b.qstop = qstop # <<<<<<<<<<<<<< - * b.sstart = sstart - * b.sstop = sstop - */ - __pyx_v_b->qstop = __pyx_v_qstop; - - /* "jcvi/formats/cblast.pyx":206 - * b.qstart = qstart - * b.qstop = qstop - * b.sstart = sstart # <<<<<<<<<<<<<< - * b.sstop = sstop - * b.evalue = evalue - */ - __pyx_v_b->sstart = __pyx_v_sstart; - - /* "jcvi/formats/cblast.pyx":207 - * b.qstop = qstop - * b.sstart = sstart - * b.sstop = sstop # <<<<<<<<<<<<<< - * b.evalue = evalue - * b.score = score - */ - __pyx_v_b->sstop = __pyx_v_sstop; - - /* "jcvi/formats/cblast.pyx":208 - * b.sstart = sstart - * b.sstop = sstop - * b.evalue = evalue # <<<<<<<<<<<<<< - * b.score = score - * return b - */ - __pyx_v_b->evalue = __pyx_v_evalue; - - /* "jcvi/formats/cblast.pyx":209 - * b.sstop = sstop - * b.evalue = evalue - * b.score = score # <<<<<<<<<<<<<< - * return b - */ - __pyx_v_b->score = __pyx_v_score; - - /* "jcvi/formats/cblast.pyx":210 - * b.evalue = evalue - * b.score = score - * return b # <<<<<<<<<<<<<< - */ - __Pyx_XDECREF((PyObject *)__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_b); - __pyx_r = __pyx_v_b; - goto __pyx_L0; - - /* "jcvi/formats/cblast.pyx":192 - * - * - * cdef BlastLine create_blast_line(char *query, char *subject, float pctid, int hitlen, # <<<<<<<<<<<<<< - * int nmismatch, int ngaps, int qstart, int qstop, - * int sstart, int sstop, float evalue, float score): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("jcvi.formats.cblast.create_blast_line", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_b); - __Pyx_XGIVEREF((PyObject *)__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_Blast(PyTypeObject *t, PyObject *a, PyObject *k) { - struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o); - p->filename = Py_None; Py_INCREF(Py_None); - if (unlikely(__pyx_pw_4jcvi_7formats_6cblast_5Blast_1__cinit__(o, a, k) < 0)) goto bad; - return o; - bad: - Py_DECREF(o); o = 0; - return NULL; -} - -static void __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast(PyObject *o) { - struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - { - PyObject *etype, *eval, *etb; - PyErr_Fetch(&etype, &eval, &etb); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1); - __pyx_pw_4jcvi_7formats_6cblast_5Blast_7__dealloc__(o); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1); - PyErr_Restore(etype, eval, etb); - } - Py_CLEAR(p->filename); - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} - -static int __pyx_tp_traverse_4jcvi_7formats_6cblast_Blast(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; - if (p->filename) { - e = (*v)(p->filename, a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear_4jcvi_7formats_6cblast_Blast(PyObject *o) { - PyObject* tmp; - struct __pyx_obj_4jcvi_7formats_6cblast_Blast *p = (struct __pyx_obj_4jcvi_7formats_6cblast_Blast *)o; - tmp = ((PyObject*)p->filename); - p->filename = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - return 0; -} - -static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(PyObject *self, CYTHON_UNUSED PyObject *arg) { - PyObject *res = __pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__(self); - if (!res && !PyErr_Occurred()) { PyErr_SetNone(PyExc_StopIteration); } - return res; -} -static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(PyObject *self, CYTHON_UNUSED PyObject *arg) { - return __pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__(self); -} - -static PyMethodDef __pyx_methods_4jcvi_7formats_6cblast_Blast[] = { - {"__next__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__, METH_NOARGS|METH_COEXIST, 0}, - {"__repr__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__, METH_NOARGS|METH_COEXIST, 0}, - {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_4jcvi_7formats_6cblast_Blast_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast}, - {Py_tp_repr, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast_Blast}, - {Py_tp_clear, (void *)__pyx_tp_clear_4jcvi_7formats_6cblast_Blast}, - {Py_tp_iter, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__}, - {Py_tp_iternext, (void *)__pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__}, - {Py_tp_methods, (void *)__pyx_methods_4jcvi_7formats_6cblast_Blast}, - {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast_Blast}, - {0, 0}, -}; -static PyType_Spec __pyx_type_4jcvi_7formats_6cblast_Blast_spec = { - "jcvi.formats.cblast.Blast", - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_Blast), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, - __pyx_type_4jcvi_7formats_6cblast_Blast_slots, -}; -#else - -static PyTypeObject __pyx_type_4jcvi_7formats_6cblast_Blast = { - PyVarObject_HEAD_INIT(0, 0) - "jcvi.formats.cblast.""Blast", /*tp_name*/ - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_Blast), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_4jcvi_7formats_6cblast_Blast, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - __pyx_pw_4jcvi_7formats_6cblast_5Blast_9__repr__, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_4jcvi_7formats_6cblast_Blast, /*tp_traverse*/ - __pyx_tp_clear_4jcvi_7formats_6cblast_Blast, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - __pyx_pw_4jcvi_7formats_6cblast_5Blast_3__iter__, /*tp_iter*/ - __pyx_pw_4jcvi_7formats_6cblast_5Blast_5__next__, /*tp_iternext*/ - __pyx_methods_4jcvi_7formats_6cblast_Blast, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_4jcvi_7formats_6cblast_Blast, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if PY_VERSION_HEX >= 0x030d00A4 - 0, /*tp_versions_used*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o); - p->qseqid = Py_None; Py_INCREF(Py_None); - p->sseqid = Py_None; Py_INCREF(Py_None); - return o; -} - -static void __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine(PyObject *o) { - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - Py_CLEAR(p->qseqid); - Py_CLEAR(p->sseqid); - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} - -static int __pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; - if (p->qseqid) { - e = (*v)(p->qseqid, a); if (e) return e; - } - if (p->sseqid) { - e = (*v)(p->sseqid, a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine(PyObject *o) { - PyObject* tmp; - struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *p = (struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine *)o; - tmp = ((PyObject*)p->qseqid); - p->qseqid = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->sseqid); - p->sseqid = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - return 0; -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_query(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_query(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5query_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_subject(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_subject(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7subject_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_has_score(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9has_score_1__get__(o); -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_swapped(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7swapped_1__get__(o); -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_bedline(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7bedline_1__get__(o); -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__query(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__query(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6_query_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__subject(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__subject(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_8_subject_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_hitlen(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_hitlen(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6hitlen_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9nmismatch_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_ngaps(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_ngaps(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5ngaps_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstart(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstart(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qstart_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstop(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstop(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5qstop_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstart(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstart(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sstart_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstop(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstop(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5sstop_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_pctid(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_pctid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5pctid_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_score(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_score(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5score_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_evalue(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_evalue(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6evalue_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qseqid(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qseqid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_3__set__(o, v); - } - else { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6qseqid_5__del__(o); - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sseqid(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sseqid(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_3__set__(o, v); - } - else { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_6sseqid_5__del__(o); - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qi(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qi(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2qi_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_si(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_si(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_2si_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_orientation(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_1__get__(o); -} - -static int __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_orientation(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11orientation_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyObject *__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(PyObject *self, CYTHON_UNUSED PyObject *arg) { - return __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__(self); -} - -static PyMethodDef __pyx_methods_4jcvi_7formats_6cblast_BlastLine[] = { - {"__repr__", (PyCFunction)__pyx_specialmethod___pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__, METH_NOARGS|METH_COEXIST, 0}, - {"__reduce__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {0, 0, 0, 0} -}; - -static struct PyGetSetDef __pyx_getsets_4jcvi_7formats_6cblast_BlastLine[] = { - {(char *)"query", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_query, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_query, (char *)0, 0}, - {(char *)"subject", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_subject, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_subject, (char *)0, 0}, - {(char *)"has_score", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_has_score, 0, (char *)0, 0}, - {(char *)"swapped", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_swapped, 0, (char *)PyDoc_STR("\n Swap query and subject.\n "), 0}, - {(char *)"bedline", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_bedline, 0, (char *)0, 0}, - {(char *)"_query", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__query, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__query, (char *)0, 0}, - {(char *)"_subject", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine__subject, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine__subject, (char *)0, 0}, - {(char *)"hitlen", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_hitlen, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_hitlen, (char *)0, 0}, - {(char *)"nmismatch", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_nmismatch, (char *)0, 0}, - {(char *)"ngaps", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_ngaps, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_ngaps, (char *)0, 0}, - {(char *)"qstart", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstart, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstart, (char *)0, 0}, - {(char *)"qstop", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qstop, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qstop, (char *)0, 0}, - {(char *)"sstart", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstart, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstart, (char *)0, 0}, - {(char *)"sstop", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sstop, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sstop, (char *)0, 0}, - {(char *)"pctid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_pctid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_pctid, (char *)0, 0}, - {(char *)"score", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_score, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_score, (char *)0, 0}, - {(char *)"evalue", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_evalue, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_evalue, (char *)0, 0}, - {(char *)"qseqid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qseqid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qseqid, (char *)0, 0}, - {(char *)"sseqid", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_sseqid, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_sseqid, (char *)0, 0}, - {(char *)"qi", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_qi, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_qi, (char *)0, 0}, - {(char *)"si", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_si, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_si, (char *)0, 0}, - {(char *)"orientation", __pyx_getprop_4jcvi_7formats_6cblast_9BlastLine_orientation, __pyx_setprop_4jcvi_7formats_6cblast_9BlastLine_orientation, (char *)0, 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_4jcvi_7formats_6cblast_BlastLine_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine}, - {Py_tp_repr, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__}, - {Py_tp_hash, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__}, - {Py_tp_str, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__}, - {Py_tp_doc, (void *)PyDoc_STR("\n Given a string of tab-delimited (-m 8) blast output, parse it and create\n an object with the usual attrs:\n\n >>> b = BlastLine(\"Os09g11510\tOs08g13650\t92.31\t39\t3\t0\t2273\t2311\t3237\t3199\t0.001\t54.0\")\n >>> b.query\n 'Os09g11510'\n >>> attrs = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', ... 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score')\n >>> [getattr(b, attr) for attr in attrs] # doctest: +ELLIPSIS\n ['Os09g11510', 'Os08g13650', 92.3..., 39, 3, 0, 2273, 2311, 3237, 3199, 0.001..., 54.0]\n ")}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine}, - {Py_tp_clear, (void *)__pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine}, - {Py_tp_richcompare, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__}, - {Py_tp_methods, (void *)__pyx_methods_4jcvi_7formats_6cblast_BlastLine}, - {Py_tp_getset, (void *)__pyx_getsets_4jcvi_7formats_6cblast_BlastLine}, - {Py_tp_init, (void *)__pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__}, - {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast_BlastLine}, - {0, 0}, -}; -static PyType_Spec __pyx_type_4jcvi_7formats_6cblast_BlastLine_spec = { - "jcvi.formats.cblast.BlastLine", - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, - __pyx_type_4jcvi_7formats_6cblast_BlastLine_slots, -}; -#else - -static PyTypeObject __pyx_type_4jcvi_7formats_6cblast_BlastLine = { - PyVarObject_HEAD_INIT(0, 0) - "jcvi.formats.cblast.""BlastLine", /*tp_name*/ - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast_BlastLine), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_4jcvi_7formats_6cblast_BlastLine, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_7__repr__, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_5__hash__, /*tp_hash*/ - 0, /*tp_call*/ - __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_9__str__, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - PyDoc_STR("\n Given a string of tab-delimited (-m 8) blast output, parse it and create\n an object with the usual attrs:\n\n >>> b = BlastLine(\"Os09g11510\tOs08g13650\t92.31\t39\t3\t0\t2273\t2311\t3237\t3199\t0.001\t54.0\")\n >>> b.query\n 'Os09g11510'\n >>> attrs = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', ... 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score')\n >>> [getattr(b, attr) for attr in attrs] # doctest: +ELLIPSIS\n ['Os09g11510', 'Os08g13650', 92.3..., 39, 3, 0, 2273, 2311, 3237, 3199, 0.001..., 54.0]\n "), /*tp_doc*/ - __pyx_tp_traverse_4jcvi_7formats_6cblast_BlastLine, /*tp_traverse*/ - __pyx_tp_clear_4jcvi_7formats_6cblast_BlastLine, /*tp_clear*/ - __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_3__richcmp__, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_4jcvi_7formats_6cblast_BlastLine, /*tp_methods*/ - 0, /*tp_members*/ - __pyx_getsets_4jcvi_7formats_6cblast_BlastLine, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - __pyx_pw_4jcvi_7formats_6cblast_9BlastLine_1__init__, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_4jcvi_7formats_6cblast_BlastLine, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if PY_VERSION_HEX >= 0x030d00A4 - 0, /*tp_versions_used*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -#if CYTHON_USE_FREELISTS -static struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *__pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[8]; -static int __pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = 0; -#endif - -static PyObject *__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - #if CYTHON_USE_FREELISTS - if (likely((int)(__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)))) { - o = (PyObject*)__pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[--__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr]; - memset(o, 0, sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)); - (void) PyObject_INIT(o, t); - PyObject_GC_Track(o); - } else - #endif - { - o = (*t->tp_alloc)(t, 0); - if (unlikely(!o)) return 0; - } - #endif - return o; -} - -static void __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyObject *o) { - struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *p = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - Py_CLEAR(p->__pyx_genexpr_arg_0); - Py_CLEAR(p->__pyx_v_x); - Py_CLEAR(p->__pyx_t_0); - #if CYTHON_USE_FREELISTS - if (((int)(__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)))) { - __pyx_freelist_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr[__pyx_freecount_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr++] = ((struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o); - } else - #endif - { - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif - } -} - -static int __pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *p = (struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr *)o; - if (p->__pyx_genexpr_arg_0) { - e = (*v)(p->__pyx_genexpr_arg_0, a); if (e) return e; - } - if (p->__pyx_v_x) { - e = (*v)(p->__pyx_v_x, a); if (e) return e; - } - if (p->__pyx_t_0) { - e = (*v)(p->__pyx_t_0, a); if (e) return e; - } - return 0; -} -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, - {Py_tp_new, (void *)__pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr}, - {0, 0}, -}; -static PyType_Spec __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec = { - "jcvi.formats.cblast.__pyx_scope_struct__genexpr", - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, - __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_slots, -}; -#else - -static PyTypeObject __pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = { - PyVarObject_HEAD_INIT(0, 0) - "jcvi.formats.cblast.""__pyx_scope_struct__genexpr", /*tp_name*/ - sizeof(struct __pyx_obj_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if PY_VERSION_HEX >= 0x030d00A4 - 0, /*tp_versions_used*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -#if CYTHON_USE_FREELISTS -static struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *__pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[8]; -static int __pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = 0; -#endif - -static PyObject *__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - #if CYTHON_USE_FREELISTS - if (likely((int)(__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)))) { - o = (PyObject*)__pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[--__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc]; - memset(o, 0, sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)); - (void) PyObject_INIT(o, t); - } else - #endif - { - o = (*t->tp_alloc)(t, 0); - if (unlikely(!o)) return 0; - } - #endif - return o; -} - -static void __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(PyObject *o) { - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && (!PyType_IS_GC(Py_TYPE(o)) || !__Pyx_PyObject_GC_IsFinalized(o))) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - #if CYTHON_USE_FREELISTS - if (((int)(__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)))) { - __pyx_freelist___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc[__pyx_freecount___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc++] = ((struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc *)o); - } else - #endif - { - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif - } -} -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc}, - {Py_tp_new, (void *)__pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc}, - {0, 0}, -}; -static PyType_Spec __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec = { - "jcvi.formats.cblast.__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", - sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_FINALIZE, - __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_slots, -}; -#else - -static PyTypeObject __pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = { - PyVarObject_HEAD_INIT(0, 0) - "jcvi.formats.cblast.""__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc", /*tp_name*/ - sizeof(struct __pyx_obj___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ - 0, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if PY_VERSION_HEX >= 0x030d00A4 - 0, /*tp_versions_used*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static PyMethodDef __pyx_methods[] = { - {0, 0, 0, 0} -}; -#ifndef CYTHON_SMALL_CODE -#if defined(__clang__) - #define CYTHON_SMALL_CODE -#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) - #define CYTHON_SMALL_CODE __attribute__((cold)) -#else - #define CYTHON_SMALL_CODE -#endif -#endif -/* #### Code section: pystring_table ### */ - -static int __Pyx_CreateStringTabAndInitStrings(void) { - __Pyx_StringTabEntry __pyx_string_tab[] = { - {&__pyx_n_s_Blast, __pyx_k_Blast, sizeof(__pyx_k_Blast), 0, 0, 1, 1}, - {&__pyx_n_s_BlastLine, __pyx_k_BlastLine, sizeof(__pyx_k_BlastLine), 0, 0, 1, 1}, - {&__pyx_n_s_BlastLine___get___locals_genexpr, __pyx_k_BlastLine___get___locals_genexpr, sizeof(__pyx_k_BlastLine___get___locals_genexpr), 0, 0, 1, 1}, - {&__pyx_n_s_BlastLine___reduce, __pyx_k_BlastLine___reduce, sizeof(__pyx_k_BlastLine___reduce), 0, 0, 1, 1}, - {&__pyx_kp_s_BlastLine_s_to_s_eval_3f_score_1, __pyx_k_BlastLine_s_to_s_eval_3f_score_1, sizeof(__pyx_k_BlastLine_s_to_s_eval_3f_score_1), 0, 0, 1, 0}, - {&__pyx_n_s_Blast___reduce_cython, __pyx_k_Blast___reduce_cython, sizeof(__pyx_k_Blast___reduce_cython), 0, 0, 1, 1}, - {&__pyx_n_s_Blast___setstate_cython, __pyx_k_Blast___setstate_cython, sizeof(__pyx_k_Blast___setstate_cython), 0, 0, 1, 1}, - {&__pyx_kp_s_Blast_s, __pyx_k_Blast_s, sizeof(__pyx_k_Blast_s), 0, 0, 1, 0}, - {&__pyx_n_s_IndexError, __pyx_k_IndexError, sizeof(__pyx_k_IndexError), 0, 0, 1, 1}, - {&__pyx_n_s_OverflowError, __pyx_k_OverflowError, sizeof(__pyx_k_OverflowError), 0, 0, 1, 1}, - {&__pyx_n_s_Pyx_CFunc_b7d994__4jcvi_7forma, __pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma, sizeof(__pyx_k_Pyx_CFunc_b7d994__4jcvi_7forma), 0, 0, 1, 1}, - {&__pyx_n_s_StopIteration, __pyx_k_StopIteration, sizeof(__pyx_k_StopIteration), 0, 0, 1, 1}, - {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1}, - {&__pyx_kp_s_UTF_8, __pyx_k_UTF_8, sizeof(__pyx_k_UTF_8), 0, 0, 1, 0}, - {&__pyx_n_s__13, __pyx_k__13, sizeof(__pyx_k__13), 0, 0, 1, 1}, - {&__pyx_kp_s__5, __pyx_k__5, sizeof(__pyx_k__5), 0, 0, 1, 0}, - {&__pyx_n_s__6, __pyx_k__6, sizeof(__pyx_k__6), 0, 0, 1, 1}, - {&__pyx_n_s_args, __pyx_k_args, sizeof(__pyx_k_args), 0, 0, 1, 1}, - {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1}, - {&__pyx_n_s_cfunc_to_py, __pyx_k_cfunc_to_py, sizeof(__pyx_k_cfunc_to_py), 0, 0, 1, 1}, - {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, - {&__pyx_n_s_close, __pyx_k_close, sizeof(__pyx_k_close), 0, 0, 1, 1}, - {&__pyx_kp_u_disable, __pyx_k_disable, sizeof(__pyx_k_disable), 0, 1, 0, 0}, - {&__pyx_kp_u_enable, __pyx_k_enable, sizeof(__pyx_k_enable), 0, 1, 0, 0}, - {&__pyx_n_s_encode, __pyx_k_encode, sizeof(__pyx_k_encode), 0, 0, 1, 1}, - {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, - {&__pyx_n_s_evalue, __pyx_k_evalue, sizeof(__pyx_k_evalue), 0, 0, 1, 1}, - {&__pyx_n_s_filename, __pyx_k_filename, sizeof(__pyx_k_filename), 0, 0, 1, 1}, - {&__pyx_kp_u_gc, __pyx_k_gc, sizeof(__pyx_k_gc), 0, 1, 0, 0}, - {&__pyx_n_s_genexpr, __pyx_k_genexpr, sizeof(__pyx_k_genexpr), 0, 0, 1, 1}, - {&__pyx_n_s_getstate, __pyx_k_getstate, sizeof(__pyx_k_getstate), 0, 0, 1, 1}, - {&__pyx_n_s_hitlen, __pyx_k_hitlen, sizeof(__pyx_k_hitlen), 0, 0, 1, 1}, - {&__pyx_n_s_id, __pyx_k_id, sizeof(__pyx_k_id), 0, 0, 1, 1}, - {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, - {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1}, - {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1}, - {&__pyx_kp_u_isenabled, __pyx_k_isenabled, sizeof(__pyx_k_isenabled), 0, 1, 0, 0}, - {&__pyx_n_s_jcvi_formats_cblast, __pyx_k_jcvi_formats_cblast, sizeof(__pyx_k_jcvi_formats_cblast), 0, 0, 1, 1}, - {&__pyx_n_s_join, __pyx_k_join, sizeof(__pyx_k_join), 0, 0, 1, 1}, - {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, - {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, - {&__pyx_n_s_ngaps, __pyx_k_ngaps, sizeof(__pyx_k_ngaps), 0, 0, 1, 1}, - {&__pyx_n_s_nmismatch, __pyx_k_nmismatch, sizeof(__pyx_k_nmismatch), 0, 0, 1, 1}, - {&__pyx_kp_s_no_default___reduce___due_to_non, __pyx_k_no_default___reduce___due_to_non, sizeof(__pyx_k_no_default___reduce___due_to_non), 0, 0, 1, 0}, - {&__pyx_n_s_orientation, __pyx_k_orientation, sizeof(__pyx_k_orientation), 0, 0, 1, 1}, - {&__pyx_n_s_pctid, __pyx_k_pctid, sizeof(__pyx_k_pctid), 0, 0, 1, 1}, - {&__pyx_n_s_pyx_state, __pyx_k_pyx_state, sizeof(__pyx_k_pyx_state), 0, 0, 1, 1}, - {&__pyx_n_s_qi, __pyx_k_qi, sizeof(__pyx_k_qi), 0, 0, 1, 1}, - {&__pyx_n_s_qseqid, __pyx_k_qseqid, sizeof(__pyx_k_qseqid), 0, 0, 1, 1}, - {&__pyx_n_s_qstart, __pyx_k_qstart, sizeof(__pyx_k_qstart), 0, 0, 1, 1}, - {&__pyx_n_s_qstop, __pyx_k_qstop, sizeof(__pyx_k_qstop), 0, 0, 1, 1}, - {&__pyx_n_s_query, __pyx_k_query, sizeof(__pyx_k_query), 0, 0, 1, 1}, - {&__pyx_n_s_reduce, __pyx_k_reduce, sizeof(__pyx_k_reduce), 0, 0, 1, 1}, - {&__pyx_n_s_reduce_cython, __pyx_k_reduce_cython, sizeof(__pyx_k_reduce_cython), 0, 0, 1, 1}, - {&__pyx_n_s_reduce_ex, __pyx_k_reduce_ex, sizeof(__pyx_k_reduce_ex), 0, 0, 1, 1}, - {&__pyx_n_s_richcmp, __pyx_k_richcmp, sizeof(__pyx_k_richcmp), 0, 0, 1, 1}, - {&__pyx_n_s_s, __pyx_k_s, sizeof(__pyx_k_s), 0, 0, 1, 1}, - {&__pyx_n_s_score, __pyx_k_score, sizeof(__pyx_k_score), 0, 0, 1, 1}, - {&__pyx_n_s_self, __pyx_k_self, sizeof(__pyx_k_self), 0, 0, 1, 1}, - {&__pyx_n_s_send, __pyx_k_send, sizeof(__pyx_k_send), 0, 0, 1, 1}, - {&__pyx_n_s_setstate, __pyx_k_setstate, sizeof(__pyx_k_setstate), 0, 0, 1, 1}, - {&__pyx_n_s_setstate_cython, __pyx_k_setstate_cython, sizeof(__pyx_k_setstate_cython), 0, 0, 1, 1}, - {&__pyx_n_s_si, __pyx_k_si, sizeof(__pyx_k_si), 0, 0, 1, 1}, - {&__pyx_n_s_slots, __pyx_k_slots, sizeof(__pyx_k_slots), 0, 0, 1, 1}, - {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1}, - {&__pyx_kp_s_src_jcvi_formats_cblast_pyx, __pyx_k_src_jcvi_formats_cblast_pyx, sizeof(__pyx_k_src_jcvi_formats_cblast_pyx), 0, 0, 1, 0}, - {&__pyx_n_s_sseqid, __pyx_k_sseqid, sizeof(__pyx_k_sseqid), 0, 0, 1, 1}, - {&__pyx_n_s_sstart, __pyx_k_sstart, sizeof(__pyx_k_sstart), 0, 0, 1, 1}, - {&__pyx_n_s_sstop, __pyx_k_sstop, sizeof(__pyx_k_sstop), 0, 0, 1, 1}, - {&__pyx_kp_s_stringsource, __pyx_k_stringsource, sizeof(__pyx_k_stringsource), 0, 0, 1, 0}, - {&__pyx_n_s_subject, __pyx_k_subject, sizeof(__pyx_k_subject), 0, 0, 1, 1}, - {&__pyx_n_s_sys, __pyx_k_sys, sizeof(__pyx_k_sys), 0, 0, 1, 1}, - {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, - {&__pyx_kp_s_that_comparison_not_implemented, __pyx_k_that_comparison_not_implemented, sizeof(__pyx_k_that_comparison_not_implemented), 0, 0, 1, 0}, - {&__pyx_n_s_throw, __pyx_k_throw, sizeof(__pyx_k_throw), 0, 0, 1, 1}, - {&__pyx_n_s_wrap, __pyx_k_wrap, sizeof(__pyx_k_wrap), 0, 0, 1, 1}, - {0, 0, 0, 0, 0, 0, 0} - }; - return __Pyx_InitStrings(__pyx_string_tab); -} -/* #### Code section: cached_builtins ### */ -static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_StopIteration = __Pyx_GetBuiltinName(__pyx_n_s_StopIteration); if (!__pyx_builtin_StopIteration) __PYX_ERR(0, 47, __pyx_L1_error) - __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) __PYX_ERR(1, 2, __pyx_L1_error) - __pyx_builtin_id = __Pyx_GetBuiltinName(__pyx_n_s_id); if (!__pyx_builtin_id) __PYX_ERR(0, 138, __pyx_L1_error) - __pyx_builtin_OverflowError = __Pyx_GetBuiltinName(__pyx_n_s_OverflowError); if (!__pyx_builtin_OverflowError) __PYX_ERR(1, 83, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(1, 86, __pyx_L1_error) - __pyx_builtin_IndexError = __Pyx_GetBuiltinName(__pyx_n_s_IndexError); if (!__pyx_builtin_IndexError) __PYX_ERR(1, 96, __pyx_L1_error) - return 0; - __pyx_L1_error:; - return -1; -} -/* #### Code section: cached_constants ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - - /* "cfunc.to_py":67 - * @cname("__Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc") - * cdef object __Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc(BlastLine (*f)(char *, char *, float, int, int, int, int, int, int, int, float, float) ): - * def wrap(char * query, char * subject, float pctid, int hitlen, int nmismatch, int ngaps, int qstart, int qstop, int sstart, int sstop, float evalue, float score): # <<<<<<<<<<<<<< - * """wrap(query: 'char *', subject: 'char *', pctid: 'float', hitlen: 'int', nmismatch: 'int', ngaps: 'int', qstart: 'int', qstop: 'int', sstart: 'int', sstop: 'int', evalue: 'float', score: 'float') -> 'BlastLine'""" - * return f(query, subject, pctid, hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop, evalue, score) - */ - __pyx_tuple_ = PyTuple_Pack(12, __pyx_n_s_query, __pyx_n_s_subject, __pyx_n_s_pctid, __pyx_n_s_hitlen, __pyx_n_s_nmismatch, __pyx_n_s_ngaps, __pyx_n_s_qstart, __pyx_n_s_qstop, __pyx_n_s_sstart, __pyx_n_s_sstop, __pyx_n_s_evalue, __pyx_n_s_score); if (unlikely(!__pyx_tuple_)) __PYX_ERR(1, 67, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple_); - __Pyx_GIVEREF(__pyx_tuple_); - __pyx_codeobj__2 = (PyObject*)__Pyx_PyCode_New(12, 0, 0, 12, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple_, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_wrap, 67, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__2)) __PYX_ERR(1, 67, __pyx_L1_error) - - /* "jcvi/formats/cblast.pyx":135 - * return not self.__richcmp__(other, 2) - * else: - * raise Exception("that comparison not implemented") # <<<<<<<<<<<<<< - * - * def __hash__(self): - */ - __pyx_tuple__3 = PyTuple_Pack(1, __pyx_kp_s_that_comparison_not_implemented); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 135, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__3); - __Pyx_GIVEREF(__pyx_tuple__3); - - /* "jcvi/formats/cblast.pyx":145 - * - * def __str__(self): - * args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] # <<<<<<<<<<<<<< - * if self.orientation == '-': - * args[8], args[9] = args[9], args[8] - */ - __pyx_slice__4 = PySlice_New(Py_None, __pyx_int_12, Py_None); if (unlikely(!__pyx_slice__4)) __PYX_ERR(0, 145, __pyx_L1_error) - __Pyx_GOTREF(__pyx_slice__4); - __Pyx_GIVEREF(__pyx_slice__4); - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_tuple__7 = PyTuple_Pack(1, __pyx_n_s_self); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__7); - __Pyx_GIVEREF(__pyx_tuple__7); - __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_reduce_cython, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(1, 1, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __pyx_tuple__9 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_pyx_state); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__9); - __Pyx_GIVEREF(__pyx_tuple__9); - __pyx_codeobj__10 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__9, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_setstate_cython, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__10)) __PYX_ERR(1, 3, __pyx_L1_error) - - /* "jcvi/formats/cblast.pyx":80 - * """ - * - * __slots__ = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ # <<<<<<<<<<<<<< - * 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score', \ - * 'qseqid', 'sseqid', 'qi', 'si', 'orientation') - */ - __pyx_tuple__11 = PyTuple_Pack(17, __pyx_n_s_query, __pyx_n_s_subject, __pyx_n_s_pctid, __pyx_n_s_hitlen, __pyx_n_s_nmismatch, __pyx_n_s_ngaps, __pyx_n_s_qstart, __pyx_n_s_qstop, __pyx_n_s_sstart, __pyx_n_s_sstop, __pyx_n_s_evalue, __pyx_n_s_score, __pyx_n_s_qseqid, __pyx_n_s_sseqid, __pyx_n_s_qi, __pyx_n_s_si, __pyx_n_s_orientation); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(0, 80, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__11); - __Pyx_GIVEREF(__pyx_tuple__11); - - /* "jcvi/formats/cblast.pyx":185 - * return py_str(result) - * - * def __reduce__(self): # <<<<<<<<<<<<<< - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - */ - __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_jcvi_formats_cblast_pyx, __pyx_n_s_reduce, 185, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) __PYX_ERR(0, 185, __pyx_L1_error) - __Pyx_RefNannyFinishContext(); - return 0; - __pyx_L1_error:; - __Pyx_RefNannyFinishContext(); - return -1; -} -/* #### Code section: init_constants ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) { - __pyx_umethod_PyString_Type_encode.type = (PyObject*)&PyString_Type; - __pyx_umethod_PyString_Type_encode.method_name = &__pyx_n_s_encode; - if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error); - __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_int_12 = PyInt_FromLong(12); if (unlikely(!__pyx_int_12)) __PYX_ERR(0, 1, __pyx_L1_error) - return 0; - __pyx_L1_error:; - return -1; -} -/* #### Code section: init_globals ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) { - return 0; -} -/* #### Code section: init_module ### */ - -static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/ - -static int __Pyx_modinit_global_init_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0); - /*--- Global init code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_variable_export_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0); - /*--- Variable export code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_function_export_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0); - /*--- Function export code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_type_init_code(void) { - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); - /*--- Type init code ---*/ - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_4jcvi_7formats_6cblast_Blast = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast_Blast_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast_Blast)) __PYX_ERR(0, 21, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast_Blast_spec, __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) - #else - __pyx_ptype_4jcvi_7formats_6cblast_Blast = &__pyx_type_4jcvi_7formats_6cblast_Blast; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_4jcvi_7formats_6cblast_Blast->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_Blast, (PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_setup_reduce((PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_Blast) < 0) __PYX_ERR(0, 21, __pyx_L1_error) - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_4jcvi_7formats_6cblast_BlastLine = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast_BlastLine_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast_BlastLine)) __PYX_ERR(0, 66, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast_BlastLine_spec, __pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) - #else - __pyx_ptype_4jcvi_7formats_6cblast_BlastLine = &__pyx_type_4jcvi_7formats_6cblast_BlastLine; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_4jcvi_7formats_6cblast_BlastLine->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_BlastLine, (PyObject *) __pyx_ptype_4jcvi_7formats_6cblast_BlastLine) < 0) __PYX_ERR(0, 66, __pyx_L1_error) - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec, NULL); if (unlikely(!__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr)) __PYX_ERR(0, 172, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr_spec, __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) < 0) __PYX_ERR(0, 172, __pyx_L1_error) - #else - __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr = &__pyx_type_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr) < 0) __PYX_ERR(0, 172, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_dictoffset && __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_4jcvi_7formats_6cblast___pyx_scope_struct__genexpr->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - } - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec, NULL); if (unlikely(!__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc)) __PYX_ERR(1, 66, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc_spec, __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) < 0) __PYX_ERR(1, 66, __pyx_L1_error) - #else - __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc = &__pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc) < 0) __PYX_ERR(1, 66, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_dictoffset && __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype___pyx_scope_struct____Pyx_CFunc_b7d994__4jcvi_7formats_6cblast_BlastLine__lParenchar____etc_to_py_aa8630__5query_7subject_5pctid_6hitlen_9nmismatch_5nga__etc->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - } - #endif - __Pyx_RefNannyFinishContext(); - return 0; - __pyx_L1_error:; - __Pyx_RefNannyFinishContext(); - return -1; -} - -static int __Pyx_modinit_type_import_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0); - /*--- Type import code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_variable_import_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0); - /*--- Variable import code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_function_import_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0); - /*--- Function import code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - - -#if PY_MAJOR_VERSION >= 3 -#if CYTHON_PEP489_MULTI_PHASE_INIT -static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ -static int __pyx_pymod_exec_cblast(PyObject* module); /*proto*/ -static PyModuleDef_Slot __pyx_moduledef_slots[] = { - {Py_mod_create, (void*)__pyx_pymod_create}, - {Py_mod_exec, (void*)__pyx_pymod_exec_cblast}, - {0, NULL} -}; -#endif - -#ifdef __cplusplus -namespace { - struct PyModuleDef __pyx_moduledef = - #else - static struct PyModuleDef __pyx_moduledef = - #endif - { - PyModuleDef_HEAD_INIT, - "cblast", - __pyx_k_Cythonized_fast_version_of_Blas, /* m_doc */ - #if CYTHON_PEP489_MULTI_PHASE_INIT - 0, /* m_size */ - #elif CYTHON_USE_MODULE_STATE - sizeof(__pyx_mstate), /* m_size */ - #else - -1, /* m_size */ - #endif - __pyx_methods /* m_methods */, - #if CYTHON_PEP489_MULTI_PHASE_INIT - __pyx_moduledef_slots, /* m_slots */ - #else - NULL, /* m_reload */ - #endif - #if CYTHON_USE_MODULE_STATE - __pyx_m_traverse, /* m_traverse */ - __pyx_m_clear, /* m_clear */ - NULL /* m_free */ - #else - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL /* m_free */ - #endif - }; - #ifdef __cplusplus -} /* anonymous namespace */ -#endif -#endif - -#ifndef CYTHON_NO_PYINIT_EXPORT -#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC -#elif PY_MAJOR_VERSION < 3 -#ifdef __cplusplus -#define __Pyx_PyMODINIT_FUNC extern "C" void -#else -#define __Pyx_PyMODINIT_FUNC void -#endif -#else -#ifdef __cplusplus -#define __Pyx_PyMODINIT_FUNC extern "C" PyObject * -#else -#define __Pyx_PyMODINIT_FUNC PyObject * -#endif -#endif - - -#if PY_MAJOR_VERSION < 3 -__Pyx_PyMODINIT_FUNC initcblast(void) CYTHON_SMALL_CODE; /*proto*/ -__Pyx_PyMODINIT_FUNC initcblast(void) -#else -__Pyx_PyMODINIT_FUNC PyInit_cblast(void) CYTHON_SMALL_CODE; /*proto*/ -__Pyx_PyMODINIT_FUNC PyInit_cblast(void) -#if CYTHON_PEP489_MULTI_PHASE_INIT -{ - return PyModuleDef_Init(&__pyx_moduledef); -} -static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) { - #if PY_VERSION_HEX >= 0x030700A1 - static PY_INT64_T main_interpreter_id = -1; - PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp); - if (main_interpreter_id == -1) { - main_interpreter_id = current_id; - return (unlikely(current_id == -1)) ? -1 : 0; - } else if (unlikely(main_interpreter_id != current_id)) - #else - static PyInterpreterState *main_interpreter = NULL; - PyInterpreterState *current_interpreter = PyThreadState_Get()->interp; - if (!main_interpreter) { - main_interpreter = current_interpreter; - } else if (unlikely(main_interpreter != current_interpreter)) - #endif - { - PyErr_SetString( - PyExc_ImportError, - "Interpreter change detected - this module can only be loaded into one interpreter per process."); - return -1; - } - return 0; -} -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none) -#else -static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) -#endif -{ - PyObject *value = PyObject_GetAttrString(spec, from_name); - int result = 0; - if (likely(value)) { - if (allow_none || value != Py_None) { -#if CYTHON_COMPILING_IN_LIMITED_API - result = PyModule_AddObject(module, to_name, value); -#else - result = PyDict_SetItemString(moddict, to_name, value); -#endif - } - Py_DECREF(value); - } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Clear(); - } else { - result = -1; - } - return result; -} -static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) { - PyObject *module = NULL, *moddict, *modname; - CYTHON_UNUSED_VAR(def); - if (__Pyx_check_single_interpreter()) - return NULL; - if (__pyx_m) - return __Pyx_NewRef(__pyx_m); - modname = PyObject_GetAttrString(spec, "name"); - if (unlikely(!modname)) goto bad; - module = PyModule_NewObject(modname); - Py_DECREF(modname); - if (unlikely(!module)) goto bad; -#if CYTHON_COMPILING_IN_LIMITED_API - moddict = module; -#else - moddict = PyModule_GetDict(module); - if (unlikely(!moddict)) goto bad; -#endif - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad; - return module; -bad: - Py_XDECREF(module); - return NULL; -} - - -static CYTHON_SMALL_CODE int __pyx_pymod_exec_cblast(PyObject *__pyx_pyinit_module) -#endif -#endif -{ - int stringtab_initialized = 0; - #if CYTHON_USE_MODULE_STATE - int pystate_addmodule_run = 0; - #endif - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - #if CYTHON_PEP489_MULTI_PHASE_INIT - if (__pyx_m) { - if (__pyx_m == __pyx_pyinit_module) return 0; - PyErr_SetString(PyExc_RuntimeError, "Module 'cblast' has already been imported. Re-initialisation is not supported."); - return -1; - } - #elif PY_MAJOR_VERSION >= 3 - if (__pyx_m) return __Pyx_NewRef(__pyx_m); - #endif - /*--- Module creation code ---*/ - #if CYTHON_PEP489_MULTI_PHASE_INIT - __pyx_m = __pyx_pyinit_module; - Py_INCREF(__pyx_m); - #else - #if PY_MAJOR_VERSION < 3 - __pyx_m = Py_InitModule4("cblast", __pyx_methods, __pyx_k_Cythonized_fast_version_of_Blas, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); - if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) - #elif CYTHON_USE_MODULE_STATE - __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) - { - int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef); - __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "cblast" pseudovariable */ - if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - pystate_addmodule_run = 1; - } - #else - __pyx_m = PyModule_Create(&__pyx_moduledef); - if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #endif - CYTHON_UNUSED_VAR(__pyx_t_1); - __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) - Py_INCREF(__pyx_d); - __pyx_b = __Pyx_PyImport_AddModuleRef(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_cython_runtime = __Pyx_PyImport_AddModuleRef((const char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) - if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #if CYTHON_REFNANNY -__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); -if (!__Pyx_RefNanny) { - PyErr_Clear(); - __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); - if (!__Pyx_RefNanny) - Py_FatalError("failed to import 'refnanny' module"); -} -#endif - __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_cblast(void)", 0); - if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #ifdef __Pxy_PyFrame_Initialize_Offsets - __Pxy_PyFrame_Initialize_Offsets(); - #endif - __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) - #ifdef __Pyx_CyFunction_USED - if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_FusedFunction_USED - if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_Coroutine_USED - if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_Generator_USED - if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_AsyncGen_USED - if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_StopAsyncIteration_USED - if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - /*--- Library function declarations ---*/ - /*--- Threads initialization code ---*/ - #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS - PyEval_InitThreads(); - #endif - /*--- Initialize various global constants etc. ---*/ - if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - stringtab_initialized = 1; - if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) - if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - if (__pyx_module_is_main_jcvi__formats__cblast) { - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - } - #if PY_MAJOR_VERSION >= 3 - { - PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) - if (!PyDict_GetItemString(modules, "jcvi.formats.cblast")) { - if (unlikely((PyDict_SetItemString(modules, "jcvi.formats.cblast", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - } - } - #endif - /*--- Builtin init code ---*/ - if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - /*--- Constants init code ---*/ - if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - /*--- Global type/function init code ---*/ - (void)__Pyx_modinit_global_init_code(); - (void)__Pyx_modinit_variable_export_code(); - (void)__Pyx_modinit_function_export_code(); - if (unlikely((__Pyx_modinit_type_init_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - (void)__Pyx_modinit_type_import_code(); - (void)__Pyx_modinit_variable_import_code(); - (void)__Pyx_modinit_function_import_code(); - /*--- Execution code ---*/ - #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - - /* "jcvi/formats/cblast.pyx":9 - * - * """ - * import sys # <<<<<<<<<<<<<< - * from libc.stdio cimport FILE, EOF, fopen, fscanf, rewind, fclose, sscanf, \ - * fgets, sprintf - */ - __pyx_t_2 = __Pyx_ImportDottedModuleRelFirst(__pyx_n_s_sys, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_sys, __pyx_t_2) < 0) __PYX_ERR(0, 9, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/formats/cblast.pyx":15 - * - * - * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" # <<<<<<<<<<<<<< - * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" - * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" - */ - __pyx_v_4jcvi_7formats_6cblast_blast_format = ((char const *)"%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f"); - - /* "jcvi/formats/cblast.pyx":16 - * - * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" - * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" # <<<<<<<<<<<<<< - * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" - * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" - */ - __pyx_v_4jcvi_7formats_6cblast_blast_format_line = ((char const *)"%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n"); - - /* "jcvi/formats/cblast.pyx":17 - * cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" - * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" - * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" # <<<<<<<<<<<<<< - * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" - * - */ - __pyx_v_4jcvi_7formats_6cblast_blast_output = ((char const *)"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g"); - - /* "jcvi/formats/cblast.pyx":18 - * cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" - * cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" - * cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" # <<<<<<<<<<<<<< - * - * - */ - __pyx_v_4jcvi_7formats_6cblast_bed_output = ((char const *)"%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c"); - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_5Blast_11__reduce_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Blast___reduce_cython, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__8)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_reduce_cython, __pyx_t_2) < 0) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_5Blast_13__setstate_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Blast___setstate_cython, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__10)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_setstate_cython, __pyx_t_2) < 0) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "jcvi/formats/cblast.pyx":80 - * """ - * - * __slots__ = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ # <<<<<<<<<<<<<< - * 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score', \ - * 'qseqid', 'sseqid', 'qi', 'si', 'orientation') - */ - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine, __pyx_n_s_slots, __pyx_tuple__11) < 0) __PYX_ERR(0, 80, __pyx_L1_error) - PyType_Modified(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); - - /* "jcvi/formats/cblast.pyx":185 - * return py_str(result) - * - * def __reduce__(self): # <<<<<<<<<<<<<< - * return create_blast_line, ( - * self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - */ - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4jcvi_7formats_6cblast_9BlastLine_11__reduce__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_BlastLine___reduce, NULL, __pyx_n_s_jcvi_formats_cblast, __pyx_d, ((PyObject *)__pyx_codeobj__12)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 185, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_4jcvi_7formats_6cblast_BlastLine, __pyx_n_s_reduce, __pyx_t_2) < 0) __PYX_ERR(0, 185, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - PyType_Modified(__pyx_ptype_4jcvi_7formats_6cblast_BlastLine); - - /* "jcvi/formats/cblast.pyx":1 - * # cython: language_level=2, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True # <<<<<<<<<<<<<< - * - * """ - */ - __pyx_t_2 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /*--- Wrapped vars code ---*/ - - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - if (__pyx_m) { - if (__pyx_d && stringtab_initialized) { - __Pyx_AddTraceback("init jcvi.formats.cblast", __pyx_clineno, __pyx_lineno, __pyx_filename); - } - #if !CYTHON_USE_MODULE_STATE - Py_CLEAR(__pyx_m); - #else - Py_DECREF(__pyx_m); - if (pystate_addmodule_run) { - PyObject *tp, *value, *tb; - PyErr_Fetch(&tp, &value, &tb); - PyState_RemoveModule(&__pyx_moduledef); - PyErr_Restore(tp, value, tb); - } - #endif - } else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ImportError, "init jcvi.formats.cblast"); - } - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - #if CYTHON_PEP489_MULTI_PHASE_INIT - return (__pyx_m != NULL) ? 0 : -1; - #elif PY_MAJOR_VERSION >= 3 - return __pyx_m; - #else - return; - #endif -} -/* #### Code section: cleanup_globals ### */ -/* #### Code section: cleanup_module ### */ -/* #### Code section: main_method ### */ -/* #### Code section: utility_code_pragmas ### */ -#ifdef _MSC_VER -#pragma warning( push ) -/* Warning 4127: conditional expression is constant - * Cython uses constant conditional expressions to allow in inline functions to be optimized at - * compile-time, so this warning is not useful - */ -#pragma warning( disable : 4127 ) -#endif - - - -/* #### Code section: utility_code_def ### */ - -/* --- Runtime support code --- */ -/* Refnanny */ -#if CYTHON_REFNANNY -static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { - PyObject *m = NULL, *p = NULL; - void *r = NULL; - m = PyImport_ImportModule(modname); - if (!m) goto end; - p = PyObject_GetAttrString(m, "RefNannyAPI"); - if (!p) goto end; - r = PyLong_AsVoidPtr(p); -end: - Py_XDECREF(p); - Py_XDECREF(m); - return (__Pyx_RefNannyAPIStruct *)r; -} -#endif - -/* PyErrExceptionMatches */ -#if CYTHON_FAST_THREAD_STATE -static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(tuple); -#if PY_MAJOR_VERSION >= 3 - for (i=0; i= 0x030C00A6 - PyObject *current_exception = tstate->current_exception; - if (unlikely(!current_exception)) return 0; - exc_type = (PyObject*) Py_TYPE(current_exception); - if (exc_type == err) return 1; -#else - exc_type = tstate->curexc_type; - if (exc_type == err) return 1; - if (unlikely(!exc_type)) return 0; -#endif - #if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(exc_type); - #endif - if (unlikely(PyTuple_Check(err))) { - result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); - } else { - result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err); - } - #if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(exc_type); - #endif - return result; -} -#endif - -/* PyErrFetchRestore */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { -#if PY_VERSION_HEX >= 0x030C00A6 - PyObject *tmp_value; - assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value))); - if (value) { - #if CYTHON_COMPILING_IN_CPYTHON - if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb)) - #endif - PyException_SetTraceback(value, tb); - } - tmp_value = tstate->current_exception; - tstate->current_exception = value; - Py_XDECREF(tmp_value); - Py_XDECREF(type); - Py_XDECREF(tb); -#else - PyObject *tmp_type, *tmp_value, *tmp_tb; - tmp_type = tstate->curexc_type; - tmp_value = tstate->curexc_value; - tmp_tb = tstate->curexc_traceback; - tstate->curexc_type = type; - tstate->curexc_value = value; - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); -#endif -} -static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { -#if PY_VERSION_HEX >= 0x030C00A6 - PyObject* exc_value; - exc_value = tstate->current_exception; - tstate->current_exception = 0; - *value = exc_value; - *type = NULL; - *tb = NULL; - if (exc_value) { - *type = (PyObject*) Py_TYPE(exc_value); - Py_INCREF(*type); - #if CYTHON_COMPILING_IN_CPYTHON - *tb = ((PyBaseExceptionObject*) exc_value)->traceback; - Py_XINCREF(*tb); - #else - *tb = PyException_GetTraceback(exc_value); - #endif - } -#else - *type = tstate->curexc_type; - *value = tstate->curexc_value; - *tb = tstate->curexc_traceback; - tstate->curexc_type = 0; - tstate->curexc_value = 0; - tstate->curexc_traceback = 0; -#endif -} -#endif - -/* PyObjectGetAttrStr */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { - PyTypeObject* tp = Py_TYPE(obj); - if (likely(tp->tp_getattro)) - return tp->tp_getattro(obj, attr_name); -#if PY_MAJOR_VERSION < 3 - if (likely(tp->tp_getattr)) - return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); -#endif - return PyObject_GetAttr(obj, attr_name); -} -#endif - -/* PyObjectGetAttrStrNoError */ -#if __PYX_LIMITED_VERSION_HEX < 0x030d00A1 -static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError))) - __Pyx_PyErr_Clear(); -} -#endif -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) { - PyObject *result; -#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1 - (void) PyObject_GetOptionalAttr(obj, attr_name, &result); - return result; -#else -#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1 - PyTypeObject* tp = Py_TYPE(obj); - if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) { - return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1); - } -#endif - result = __Pyx_PyObject_GetAttrStr(obj, attr_name); - if (unlikely(!result)) { - __Pyx_PyObject_GetAttrStr_ClearAttributeError(); - } - return result; -#endif -} - -/* GetBuiltinName */ -static PyObject *__Pyx_GetBuiltinName(PyObject *name) { - PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name); - if (unlikely(!result) && !PyErr_Occurred()) { - PyErr_Format(PyExc_NameError, -#if PY_MAJOR_VERSION >= 3 - "name '%U' is not defined", name); -#else - "name '%.200s' is not defined", PyString_AS_STRING(name)); -#endif - } - return result; -} - -/* TupleAndListFromArray */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) { - PyObject *v; - Py_ssize_t i; - for (i = 0; i < length; i++) { - v = dest[i] = src[i]; - Py_INCREF(v); - } -} -static CYTHON_INLINE PyObject * -__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) -{ - PyObject *res; - if (n <= 0) { - Py_INCREF(__pyx_empty_tuple); - return __pyx_empty_tuple; - } - res = PyTuple_New(n); - if (unlikely(res == NULL)) return NULL; - __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n); - return res; -} -static CYTHON_INLINE PyObject * -__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n) -{ - PyObject *res; - if (n <= 0) { - return PyList_New(0); - } - res = PyList_New(n); - if (unlikely(res == NULL)) return NULL; - __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n); - return res; -} -#endif - -/* BytesEquals */ -static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { -#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API - return PyObject_RichCompareBool(s1, s2, equals); -#else - if (s1 == s2) { - return (equals == Py_EQ); - } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { - const char *ps1, *ps2; - Py_ssize_t length = PyBytes_GET_SIZE(s1); - if (length != PyBytes_GET_SIZE(s2)) - return (equals == Py_NE); - ps1 = PyBytes_AS_STRING(s1); - ps2 = PyBytes_AS_STRING(s2); - if (ps1[0] != ps2[0]) { - return (equals == Py_NE); - } else if (length == 1) { - return (equals == Py_EQ); - } else { - int result; -#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000) - Py_hash_t hash1, hash2; - hash1 = ((PyBytesObject*)s1)->ob_shash; - hash2 = ((PyBytesObject*)s2)->ob_shash; - if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { - return (equals == Py_NE); - } -#endif - result = memcmp(ps1, ps2, (size_t)length); - return (equals == Py_EQ) ? (result == 0) : (result != 0); - } - } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { - return (equals == Py_NE); - } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { - return (equals == Py_NE); - } else { - int result; - PyObject* py_result = PyObject_RichCompare(s1, s2, equals); - if (!py_result) - return -1; - result = __Pyx_PyObject_IsTrue(py_result); - Py_DECREF(py_result); - return result; - } -#endif -} - -/* UnicodeEquals */ -static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { -#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API - return PyObject_RichCompareBool(s1, s2, equals); -#else -#if PY_MAJOR_VERSION < 3 - PyObject* owned_ref = NULL; -#endif - int s1_is_unicode, s2_is_unicode; - if (s1 == s2) { - goto return_eq; - } - s1_is_unicode = PyUnicode_CheckExact(s1); - s2_is_unicode = PyUnicode_CheckExact(s2); -#if PY_MAJOR_VERSION < 3 - if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { - owned_ref = PyUnicode_FromObject(s2); - if (unlikely(!owned_ref)) - return -1; - s2 = owned_ref; - s2_is_unicode = 1; - } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { - owned_ref = PyUnicode_FromObject(s1); - if (unlikely(!owned_ref)) - return -1; - s1 = owned_ref; - s1_is_unicode = 1; - } else if (((!s2_is_unicode) & (!s1_is_unicode))) { - return __Pyx_PyBytes_Equals(s1, s2, equals); - } -#endif - if (s1_is_unicode & s2_is_unicode) { - Py_ssize_t length; - int kind; - void *data1, *data2; - if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0)) - return -1; - length = __Pyx_PyUnicode_GET_LENGTH(s1); - if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { - goto return_ne; - } -#if CYTHON_USE_UNICODE_INTERNALS - { - Py_hash_t hash1, hash2; - #if CYTHON_PEP393_ENABLED - hash1 = ((PyASCIIObject*)s1)->hash; - hash2 = ((PyASCIIObject*)s2)->hash; - #else - hash1 = ((PyUnicodeObject*)s1)->hash; - hash2 = ((PyUnicodeObject*)s2)->hash; - #endif - if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { - goto return_ne; - } - } -#endif - kind = __Pyx_PyUnicode_KIND(s1); - if (kind != __Pyx_PyUnicode_KIND(s2)) { - goto return_ne; - } - data1 = __Pyx_PyUnicode_DATA(s1); - data2 = __Pyx_PyUnicode_DATA(s2); - if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { - goto return_ne; - } else if (length == 1) { - goto return_eq; - } else { - int result = memcmp(data1, data2, (size_t)(length * kind)); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_EQ) ? (result == 0) : (result != 0); - } - } else if ((s1 == Py_None) & s2_is_unicode) { - goto return_ne; - } else if ((s2 == Py_None) & s1_is_unicode) { - goto return_ne; - } else { - int result; - PyObject* py_result = PyObject_RichCompare(s1, s2, equals); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - if (!py_result) - return -1; - result = __Pyx_PyObject_IsTrue(py_result); - Py_DECREF(py_result); - return result; - } -return_eq: - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_EQ); -return_ne: - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_NE); -#endif -} - -/* fastcall */ -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s) -{ - Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames); - for (i = 0; i < n; i++) - { - if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i]; - } - for (i = 0; i < n; i++) - { - int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ); - if (unlikely(eq != 0)) { - if (unlikely(eq < 0)) return NULL; - return kwvalues[i]; - } - } - return NULL; -} -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000 -CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) { - Py_ssize_t i, nkwargs = PyTuple_GET_SIZE(kwnames); - PyObject *dict; - dict = PyDict_New(); - if (unlikely(!dict)) - return NULL; - for (i=0; i= 3 - "%s() got multiple values for keyword argument '%U'", func_name, kw_name); - #else - "%s() got multiple values for keyword argument '%s'", func_name, - PyString_AsString(kw_name)); - #endif -} - -/* ParseKeywords */ -static int __Pyx_ParseOptionalKeywords( - PyObject *kwds, - PyObject *const *kwvalues, - PyObject **argnames[], - PyObject *kwds2, - PyObject *values[], - Py_ssize_t num_pos_args, - const char* function_name) -{ - PyObject *key = 0, *value = 0; - Py_ssize_t pos = 0; - PyObject*** name; - PyObject*** first_kw_arg = argnames + num_pos_args; - int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds)); - while (1) { - Py_XDECREF(key); key = NULL; - Py_XDECREF(value); value = NULL; - if (kwds_is_tuple) { - Py_ssize_t size; -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(kwds); -#else - size = PyTuple_Size(kwds); - if (size < 0) goto bad; -#endif - if (pos >= size) break; -#if CYTHON_AVOID_BORROWED_REFS - key = __Pyx_PySequence_ITEM(kwds, pos); - if (!key) goto bad; -#elif CYTHON_ASSUME_SAFE_MACROS - key = PyTuple_GET_ITEM(kwds, pos); -#else - key = PyTuple_GetItem(kwds, pos); - if (!key) goto bad; -#endif - value = kwvalues[pos]; - pos++; - } - else - { - if (!PyDict_Next(kwds, &pos, &key, &value)) break; -#if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(key); -#endif - } - name = first_kw_arg; - while (*name && (**name != key)) name++; - if (*name) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(value); - Py_DECREF(key); -#endif - key = NULL; - value = NULL; - continue; - } -#if !CYTHON_AVOID_BORROWED_REFS - Py_INCREF(key); -#endif - Py_INCREF(value); - name = first_kw_arg; - #if PY_MAJOR_VERSION < 3 - if (likely(PyString_Check(key))) { - while (*name) { - if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) - && _PyString_Eq(**name, key)) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - value = NULL; -#endif - break; - } - name++; - } - if (*name) continue; - else { - PyObject*** argname = argnames; - while (argname != first_kw_arg) { - if ((**argname == key) || ( - (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) - && _PyString_Eq(**argname, key))) { - goto arg_passed_twice; - } - argname++; - } - } - } else - #endif - if (likely(PyUnicode_Check(key))) { - while (*name) { - int cmp = ( - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 - (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : - #endif - PyUnicode_Compare(**name, key) - ); - if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; - if (cmp == 0) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - value = NULL; -#endif - break; - } - name++; - } - if (*name) continue; - else { - PyObject*** argname = argnames; - while (argname != first_kw_arg) { - int cmp = (**argname == key) ? 0 : - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 - (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : - #endif - PyUnicode_Compare(**argname, key); - if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; - if (cmp == 0) goto arg_passed_twice; - argname++; - } - } - } else - goto invalid_keyword_type; - if (kwds2) { - if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; - } else { - goto invalid_keyword; - } - } - Py_XDECREF(key); - Py_XDECREF(value); - return 0; -arg_passed_twice: - __Pyx_RaiseDoubleKeywordsError(function_name, key); - goto bad; -invalid_keyword_type: - PyErr_Format(PyExc_TypeError, - "%.200s() keywords must be strings", function_name); - goto bad; -invalid_keyword: - #if PY_MAJOR_VERSION < 3 - PyErr_Format(PyExc_TypeError, - "%.200s() got an unexpected keyword argument '%.200s'", - function_name, PyString_AsString(key)); - #else - PyErr_Format(PyExc_TypeError, - "%s() got an unexpected keyword argument '%U'", - function_name, key); - #endif -bad: - Py_XDECREF(key); - Py_XDECREF(value); - return -1; -} - -/* FixUpExtensionType */ -#if CYTHON_USE_TYPE_SPECS -static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) { -#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - CYTHON_UNUSED_VAR(spec); - CYTHON_UNUSED_VAR(type); -#else - const PyType_Slot *slot = spec->slots; - while (slot && slot->slot && slot->slot != Py_tp_members) - slot++; - if (slot && slot->slot == Py_tp_members) { - int changed = 0; -#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON) - const -#endif - PyMemberDef *memb = (PyMemberDef*) slot->pfunc; - while (memb && memb->name) { - if (memb->name[0] == '_' && memb->name[1] == '_') { -#if PY_VERSION_HEX < 0x030900b1 - if (strcmp(memb->name, "__weaklistoffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); - type->tp_weaklistoffset = memb->offset; - changed = 1; - } - else if (strcmp(memb->name, "__dictoffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); - type->tp_dictoffset = memb->offset; - changed = 1; - } -#if CYTHON_METH_FASTCALL - else if (strcmp(memb->name, "__vectorcalloffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); -#if PY_VERSION_HEX >= 0x030800b4 - type->tp_vectorcall_offset = memb->offset; -#else - type->tp_print = (printfunc) memb->offset; -#endif - changed = 1; - } -#endif -#else - if ((0)); -#endif -#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON - else if (strcmp(memb->name, "__module__") == 0) { - PyObject *descr; - assert(memb->type == T_OBJECT); - assert(memb->flags == 0 || memb->flags == READONLY); - descr = PyDescr_NewMember(type, memb); - if (unlikely(!descr)) - return -1; - if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) { - Py_DECREF(descr); - return -1; - } - Py_DECREF(descr); - changed = 1; - } -#endif - } - memb++; - } - if (changed) - PyType_Modified(type); - } -#endif - return 0; -} -#endif - -/* FetchSharedCythonModule */ -static PyObject *__Pyx_FetchSharedCythonABIModule(void) { - return __Pyx_PyImport_AddModuleRef((char*) __PYX_ABI_MODULE_NAME); -} - -/* FetchCommonType */ -static int __Pyx_VerifyCachedType(PyObject *cached_type, - const char *name, - Py_ssize_t basicsize, - Py_ssize_t expected_basicsize) { - if (!PyType_Check(cached_type)) { - PyErr_Format(PyExc_TypeError, - "Shared Cython type %.200s is not a type object", name); - return -1; - } - if (basicsize != expected_basicsize) { - PyErr_Format(PyExc_TypeError, - "Shared Cython type %.200s has the wrong size, try recompiling", - name); - return -1; - } - return 0; -} -#if !CYTHON_USE_TYPE_SPECS -static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) { - PyObject* abi_module; - const char* object_name; - PyTypeObject *cached_type = NULL; - abi_module = __Pyx_FetchSharedCythonABIModule(); - if (!abi_module) return NULL; - object_name = strrchr(type->tp_name, '.'); - object_name = object_name ? object_name+1 : type->tp_name; - cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name); - if (cached_type) { - if (__Pyx_VerifyCachedType( - (PyObject *)cached_type, - object_name, - cached_type->tp_basicsize, - type->tp_basicsize) < 0) { - goto bad; - } - goto done; - } - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; - PyErr_Clear(); - if (PyType_Ready(type) < 0) goto bad; - if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0) - goto bad; - Py_INCREF(type); - cached_type = type; -done: - Py_DECREF(abi_module); - return cached_type; -bad: - Py_XDECREF(cached_type); - cached_type = NULL; - goto done; -} -#else -static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) { - PyObject *abi_module, *cached_type = NULL; - const char* object_name = strrchr(spec->name, '.'); - object_name = object_name ? object_name+1 : spec->name; - abi_module = __Pyx_FetchSharedCythonABIModule(); - if (!abi_module) return NULL; - cached_type = PyObject_GetAttrString(abi_module, object_name); - if (cached_type) { - Py_ssize_t basicsize; -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *py_basicsize; - py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__"); - if (unlikely(!py_basicsize)) goto bad; - basicsize = PyLong_AsSsize_t(py_basicsize); - Py_DECREF(py_basicsize); - py_basicsize = 0; - if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; -#else - basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1; -#endif - if (__Pyx_VerifyCachedType( - cached_type, - object_name, - basicsize, - spec->basicsize) < 0) { - goto bad; - } - goto done; - } - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; - PyErr_Clear(); - CYTHON_UNUSED_VAR(module); - cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases); - if (unlikely(!cached_type)) goto bad; - if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad; - if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad; -done: - Py_DECREF(abi_module); - assert(cached_type == NULL || PyType_Check(cached_type)); - return (PyTypeObject *) cached_type; -bad: - Py_XDECREF(cached_type); - cached_type = NULL; - goto done; -} -#endif - -/* PyVectorcallFastCallDict */ -#if CYTHON_METH_FASTCALL -static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) -{ - PyObject *res = NULL; - PyObject *kwnames; - PyObject **newargs; - PyObject **kwvalues; - Py_ssize_t i, pos; - size_t j; - PyObject *key, *value; - unsigned long keys_are_strings; - Py_ssize_t nkw = PyDict_GET_SIZE(kw); - newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0])); - if (unlikely(newargs == NULL)) { - PyErr_NoMemory(); - return NULL; - } - for (j = 0; j < nargs; j++) newargs[j] = args[j]; - kwnames = PyTuple_New(nkw); - if (unlikely(kwnames == NULL)) { - PyMem_Free(newargs); - return NULL; - } - kwvalues = newargs + nargs; - pos = i = 0; - keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; - while (PyDict_Next(kw, &pos, &key, &value)) { - keys_are_strings &= Py_TYPE(key)->tp_flags; - Py_INCREF(key); - Py_INCREF(value); - PyTuple_SET_ITEM(kwnames, i, key); - kwvalues[i] = value; - i++; - } - if (unlikely(!keys_are_strings)) { - PyErr_SetString(PyExc_TypeError, "keywords must be strings"); - goto cleanup; - } - res = vc(func, newargs, nargs, kwnames); -cleanup: - Py_DECREF(kwnames); - for (i = 0; i < nkw; i++) - Py_DECREF(kwvalues[i]); - PyMem_Free(newargs); - return res; -} -static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) -{ - if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) { - return vc(func, args, nargs, NULL); - } - return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw); -} -#endif - -/* CythonFunctionShared */ -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { - if (__Pyx_CyFunction_Check(func)) { - return PyCFunction_GetFunction(((__pyx_CyFunctionObject*)func)->func) == (PyCFunction) cfunc; - } else if (PyCFunction_Check(func)) { - return PyCFunction_GetFunction(func) == (PyCFunction) cfunc; - } - return 0; -} -#else -static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) { - return __Pyx_CyOrPyCFunction_Check(func) && __Pyx_CyOrPyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc; -} -#endif -static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) { -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - __Pyx_Py_XDECREF_SET( - __Pyx_CyFunction_GetClassObj(f), - ((classobj) ? __Pyx_NewRef(classobj) : NULL)); -#else - __Pyx_Py_XDECREF_SET( - ((PyCMethodObject *) (f))->mm_class, - (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL)); -#endif -} -static PyObject * -__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure) -{ - CYTHON_UNUSED_VAR(closure); - if (unlikely(op->func_doc == NULL)) { -#if CYTHON_COMPILING_IN_LIMITED_API - op->func_doc = PyObject_GetAttrString(op->func, "__doc__"); - if (unlikely(!op->func_doc)) return NULL; -#else - if (((PyCFunctionObject*)op)->m_ml->ml_doc) { -#if PY_MAJOR_VERSION >= 3 - op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); -#else - op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); -#endif - if (unlikely(op->func_doc == NULL)) - return NULL; - } else { - Py_INCREF(Py_None); - return Py_None; - } -#endif - } - Py_INCREF(op->func_doc); - return op->func_doc; -} -static int -__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (value == NULL) { - value = Py_None; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_doc, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(op->func_name == NULL)) { -#if CYTHON_COMPILING_IN_LIMITED_API - op->func_name = PyObject_GetAttrString(op->func, "__name__"); -#elif PY_MAJOR_VERSION >= 3 - op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); -#else - op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); -#endif - if (unlikely(op->func_name == NULL)) - return NULL; - } - Py_INCREF(op->func_name); - return op->func_name; -} -static int -__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__name__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_name, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - Py_INCREF(op->func_qualname); - return op->func_qualname; -} -static int -__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__qualname__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_qualname, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(op->func_dict == NULL)) { - op->func_dict = PyDict_New(); - if (unlikely(op->func_dict == NULL)) - return NULL; - } - Py_INCREF(op->func_dict); - return op->func_dict; -} -static int -__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(value == NULL)) { - PyErr_SetString(PyExc_TypeError, - "function's dictionary may not be deleted"); - return -1; - } - if (unlikely(!PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "setting function's dictionary to a non-dict"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_dict, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - Py_INCREF(op->func_globals); - return op->func_globals; -} -static PyObject * -__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(op); - CYTHON_UNUSED_VAR(context); - Py_INCREF(Py_None); - return Py_None; -} -static PyObject * -__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context) -{ - PyObject* result = (op->func_code) ? op->func_code : Py_None; - CYTHON_UNUSED_VAR(context); - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) { - int result = 0; - PyObject *res = op->defaults_getter((PyObject *) op); - if (unlikely(!res)) - return -1; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - op->defaults_tuple = PyTuple_GET_ITEM(res, 0); - Py_INCREF(op->defaults_tuple); - op->defaults_kwdict = PyTuple_GET_ITEM(res, 1); - Py_INCREF(op->defaults_kwdict); - #else - op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0); - if (unlikely(!op->defaults_tuple)) result = -1; - else { - op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1); - if (unlikely(!op->defaults_kwdict)) result = -1; - } - #endif - Py_DECREF(res); - return result; -} -static int -__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value) { - value = Py_None; - } else if (unlikely(value != Py_None && !PyTuple_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__defaults__ must be set to a tuple object"); - return -1; - } - PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not " - "currently affect the values used in function calls", 1); - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->defaults_tuple, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->defaults_tuple; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - if (op->defaults_getter) { - if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; - result = op->defaults_tuple; - } else { - result = Py_None; - } - } - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value) { - value = Py_None; - } else if (unlikely(value != Py_None && !PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__kwdefaults__ must be set to a dict object"); - return -1; - } - PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not " - "currently affect the values used in function calls", 1); - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->defaults_kwdict; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - if (op->defaults_getter) { - if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; - result = op->defaults_kwdict; - } else { - result = Py_None; - } - } - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value || value == Py_None) { - value = NULL; - } else if (unlikely(!PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__annotations__ must be set to a dict object"); - return -1; - } - Py_XINCREF(value); - __Pyx_Py_XDECREF_SET(op->func_annotations, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->func_annotations; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - result = PyDict_New(); - if (unlikely(!result)) return NULL; - op->func_annotations = result; - } - Py_INCREF(result); - return result; -} -static PyObject * -__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) { - int is_coroutine; - CYTHON_UNUSED_VAR(context); - if (op->func_is_coroutine) { - return __Pyx_NewRef(op->func_is_coroutine); - } - is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE; -#if PY_VERSION_HEX >= 0x03050000 - if (is_coroutine) { - PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine; - fromlist = PyList_New(1); - if (unlikely(!fromlist)) return NULL; - Py_INCREF(marker); -#if CYTHON_ASSUME_SAFE_MACROS - PyList_SET_ITEM(fromlist, 0, marker); -#else - if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) { - Py_DECREF(marker); - Py_DECREF(fromlist); - return NULL; - } -#endif - module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0); - Py_DECREF(fromlist); - if (unlikely(!module)) goto ignore; - op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker); - Py_DECREF(module); - if (likely(op->func_is_coroutine)) { - return __Pyx_NewRef(op->func_is_coroutine); - } -ignore: - PyErr_Clear(); - } -#endif - op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine); - return __Pyx_NewRef(op->func_is_coroutine); -} -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject * -__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) { - CYTHON_UNUSED_VAR(context); - return PyObject_GetAttrString(op->func, "__module__"); -} -static int -__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - return PyObject_SetAttrString(op->func, "__module__", value); -} -#endif -static PyGetSetDef __pyx_CyFunction_getsets[] = { - {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, - {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, - {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, - {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, - {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0}, - {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, - {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, - {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, - {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, - {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, - {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, - {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, - {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, - {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, - {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, - {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0}, - {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, - {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0}, -#if CYTHON_COMPILING_IN_LIMITED_API - {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0}, -#endif - {0, 0, 0, 0, 0} -}; -static PyMemberDef __pyx_CyFunction_members[] = { -#if !CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0}, -#endif -#if CYTHON_USE_TYPE_SPECS - {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0}, -#if CYTHON_METH_FASTCALL -#if CYTHON_BACKPORT_VECTORCALL - {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0}, -#else -#if !CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0}, -#endif -#endif -#endif -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0}, -#else - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0}, -#endif -#endif - {0, 0, 0, 0, 0} -}; -static PyObject * -__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args) -{ - CYTHON_UNUSED_VAR(args); -#if PY_MAJOR_VERSION >= 3 - Py_INCREF(m->func_qualname); - return m->func_qualname; -#else - return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name); -#endif -} -static PyMethodDef __pyx_CyFunction_methods[] = { - {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0}, - {0, 0, 0, 0} -}; -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API -#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist) -#else -#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist) -#endif -static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname, - PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { -#if !CYTHON_COMPILING_IN_LIMITED_API - PyCFunctionObject *cf = (PyCFunctionObject*) op; -#endif - if (unlikely(op == NULL)) - return NULL; -#if CYTHON_COMPILING_IN_LIMITED_API - op->func = PyCFunction_NewEx(ml, (PyObject*)op, module); - if (unlikely(!op->func)) return NULL; -#endif - op->flags = flags; - __Pyx_CyFunction_weakreflist(op) = NULL; -#if !CYTHON_COMPILING_IN_LIMITED_API - cf->m_ml = ml; - cf->m_self = (PyObject *) op; -#endif - Py_XINCREF(closure); - op->func_closure = closure; -#if !CYTHON_COMPILING_IN_LIMITED_API - Py_XINCREF(module); - cf->m_module = module; -#endif - op->func_dict = NULL; - op->func_name = NULL; - Py_INCREF(qualname); - op->func_qualname = qualname; - op->func_doc = NULL; -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - op->func_classobj = NULL; -#else - ((PyCMethodObject*)op)->mm_class = NULL; -#endif - op->func_globals = globals; - Py_INCREF(op->func_globals); - Py_XINCREF(code); - op->func_code = code; - op->defaults_pyobjects = 0; - op->defaults_size = 0; - op->defaults = NULL; - op->defaults_tuple = NULL; - op->defaults_kwdict = NULL; - op->defaults_getter = NULL; - op->func_annotations = NULL; - op->func_is_coroutine = NULL; -#if CYTHON_METH_FASTCALL - switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { - case METH_NOARGS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS; - break; - case METH_O: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O; - break; - case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD; - break; - case METH_FASTCALL | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS; - break; - case METH_VARARGS | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = NULL; - break; - default: - PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); - Py_DECREF(op); - return NULL; - } -#endif - return (PyObject *) op; -} -static int -__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m) -{ - Py_CLEAR(m->func_closure); -#if CYTHON_COMPILING_IN_LIMITED_API - Py_CLEAR(m->func); -#else - Py_CLEAR(((PyCFunctionObject*)m)->m_module); -#endif - Py_CLEAR(m->func_dict); - Py_CLEAR(m->func_name); - Py_CLEAR(m->func_qualname); - Py_CLEAR(m->func_doc); - Py_CLEAR(m->func_globals); - Py_CLEAR(m->func_code); -#if !CYTHON_COMPILING_IN_LIMITED_API -#if PY_VERSION_HEX < 0x030900B1 - Py_CLEAR(__Pyx_CyFunction_GetClassObj(m)); -#else - { - PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class; - ((PyCMethodObject *) (m))->mm_class = NULL; - Py_XDECREF(cls); - } -#endif -#endif - Py_CLEAR(m->defaults_tuple); - Py_CLEAR(m->defaults_kwdict); - Py_CLEAR(m->func_annotations); - Py_CLEAR(m->func_is_coroutine); - if (m->defaults) { - PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); - int i; - for (i = 0; i < m->defaults_pyobjects; i++) - Py_XDECREF(pydefaults[i]); - PyObject_Free(m->defaults); - m->defaults = NULL; - } - return 0; -} -static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m) -{ - if (__Pyx_CyFunction_weakreflist(m) != NULL) - PyObject_ClearWeakRefs((PyObject *) m); - __Pyx_CyFunction_clear(m); - __Pyx_PyHeapTypeObject_GC_Del(m); -} -static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m) -{ - PyObject_GC_UnTrack(m); - __Pyx__CyFunction_dealloc(m); -} -static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg) -{ - Py_VISIT(m->func_closure); -#if CYTHON_COMPILING_IN_LIMITED_API - Py_VISIT(m->func); -#else - Py_VISIT(((PyCFunctionObject*)m)->m_module); -#endif - Py_VISIT(m->func_dict); - Py_VISIT(m->func_name); - Py_VISIT(m->func_qualname); - Py_VISIT(m->func_doc); - Py_VISIT(m->func_globals); - Py_VISIT(m->func_code); -#if !CYTHON_COMPILING_IN_LIMITED_API - Py_VISIT(__Pyx_CyFunction_GetClassObj(m)); -#endif - Py_VISIT(m->defaults_tuple); - Py_VISIT(m->defaults_kwdict); - Py_VISIT(m->func_is_coroutine); - if (m->defaults) { - PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); - int i; - for (i = 0; i < m->defaults_pyobjects; i++) - Py_VISIT(pydefaults[i]); - } - return 0; -} -static PyObject* -__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) -{ -#if PY_MAJOR_VERSION >= 3 - return PyUnicode_FromFormat("", - op->func_qualname, (void *)op); -#else - return PyString_FromFormat("", - PyString_AsString(op->func_qualname), (void *)op); -#endif -} -static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) { -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *f = ((__pyx_CyFunctionObject*)func)->func; - PyObject *py_name = NULL; - PyCFunction meth; - int flags; - meth = PyCFunction_GetFunction(f); - if (unlikely(!meth)) return NULL; - flags = PyCFunction_GetFlags(f); - if (unlikely(flags < 0)) return NULL; -#else - PyCFunctionObject* f = (PyCFunctionObject*)func; - PyCFunction meth = f->m_ml->ml_meth; - int flags = f->m_ml->ml_flags; -#endif - Py_ssize_t size; - switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) { - case METH_VARARGS: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) - return (*meth)(self, arg); - break; - case METH_VARARGS | METH_KEYWORDS: - return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw); - case METH_NOARGS: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) { -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(arg); -#else - size = PyTuple_Size(arg); - if (unlikely(size < 0)) return NULL; -#endif - if (likely(size == 0)) - return (*meth)(self, NULL); -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, - "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - py_name, size); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - f->m_ml->ml_name, size); -#endif - return NULL; - } - break; - case METH_O: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) { -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(arg); -#else - size = PyTuple_Size(arg); - if (unlikely(size < 0)) return NULL; -#endif - if (likely(size == 1)) { - PyObject *result, *arg0; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - arg0 = PyTuple_GET_ITEM(arg, 0); - #else - arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL; - #endif - result = (*meth)(self, arg0); - #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) - Py_DECREF(arg0); - #endif - return result; - } -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, - "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - py_name, size); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - f->m_ml->ml_name, size); -#endif - return NULL; - } - break; - default: - PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); - return NULL; - } -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments", - py_name); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", - f->m_ml->ml_name); -#endif - return NULL; -} -static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) { - PyObject *self, *result; -#if CYTHON_COMPILING_IN_LIMITED_API - self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func); - if (unlikely(!self) && PyErr_Occurred()) return NULL; -#else - self = ((PyCFunctionObject*)func)->m_self; -#endif - result = __Pyx_CyFunction_CallMethod(func, self, arg, kw); - return result; -} -static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) { - PyObject *result; - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; -#if CYTHON_METH_FASTCALL - __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc); - if (vc) { -#if CYTHON_ASSUME_SAFE_MACROS - return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw); -#else - (void) &__Pyx_PyVectorcall_FastCallDict; - return PyVectorcall_Call(func, args, kw); -#endif - } -#endif - if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { - Py_ssize_t argc; - PyObject *new_args; - PyObject *self; -#if CYTHON_ASSUME_SAFE_MACROS - argc = PyTuple_GET_SIZE(args); -#else - argc = PyTuple_Size(args); - if (unlikely(!argc) < 0) return NULL; -#endif - new_args = PyTuple_GetSlice(args, 1, argc); - if (unlikely(!new_args)) - return NULL; - self = PyTuple_GetItem(args, 0); - if (unlikely(!self)) { - Py_DECREF(new_args); -#if PY_MAJOR_VERSION > 2 - PyErr_Format(PyExc_TypeError, - "unbound method %.200S() needs an argument", - cyfunc->func_qualname); -#else - PyErr_SetString(PyExc_TypeError, - "unbound method needs an argument"); -#endif - return NULL; - } - result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw); - Py_DECREF(new_args); - } else { - result = __Pyx_CyFunction_Call(func, args, kw); - } - return result; -} -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames) -{ - int ret = 0; - if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { - if (unlikely(nargs < 1)) { - PyErr_Format(PyExc_TypeError, "%.200s() needs an argument", - ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); - return -1; - } - ret = 1; - } - if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); - return -1; - } - return ret; -} -static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - if (unlikely(nargs != 0)) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - def->ml_name, nargs); - return NULL; - } - return def->ml_meth(self, NULL); -} -static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - if (unlikely(nargs != 1)) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - def->ml_name, nargs); - return NULL; - } - return def->ml_meth(self, args[0]); -} -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - return ((__Pyx_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames); -} -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; - PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc); -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames); -} -#endif -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_CyFunctionType_slots[] = { - {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc}, - {Py_tp_repr, (void *)__Pyx_CyFunction_repr}, - {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod}, - {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse}, - {Py_tp_clear, (void *)__Pyx_CyFunction_clear}, - {Py_tp_methods, (void *)__pyx_CyFunction_methods}, - {Py_tp_members, (void *)__pyx_CyFunction_members}, - {Py_tp_getset, (void *)__pyx_CyFunction_getsets}, - {Py_tp_descr_get, (void *)__Pyx_PyMethod_New}, - {0, 0}, -}; -static PyType_Spec __pyx_CyFunctionType_spec = { - __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", - sizeof(__pyx_CyFunctionObject), - 0, -#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR - Py_TPFLAGS_METHOD_DESCRIPTOR | -#endif -#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL) - _Py_TPFLAGS_HAVE_VECTORCALL | -#endif - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, - __pyx_CyFunctionType_slots -}; -#else -static PyTypeObject __pyx_CyFunctionType_type = { - PyVarObject_HEAD_INIT(0, 0) - __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", - sizeof(__pyx_CyFunctionObject), - 0, - (destructor) __Pyx_CyFunction_dealloc, -#if !CYTHON_METH_FASTCALL - 0, -#elif CYTHON_BACKPORT_VECTORCALL - (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall), -#else - offsetof(PyCFunctionObject, vectorcall), -#endif - 0, - 0, -#if PY_MAJOR_VERSION < 3 - 0, -#else - 0, -#endif - (reprfunc) __Pyx_CyFunction_repr, - 0, - 0, - 0, - 0, - __Pyx_CyFunction_CallAsMethod, - 0, - 0, - 0, - 0, -#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR - Py_TPFLAGS_METHOD_DESCRIPTOR | -#endif -#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL - _Py_TPFLAGS_HAVE_VECTORCALL | -#endif - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, - 0, - (traverseproc) __Pyx_CyFunction_traverse, - (inquiry) __Pyx_CyFunction_clear, - 0, -#if PY_VERSION_HEX < 0x030500A0 - offsetof(__pyx_CyFunctionObject, func_weakreflist), -#else - offsetof(PyCFunctionObject, m_weakreflist), -#endif - 0, - 0, - __pyx_CyFunction_methods, - __pyx_CyFunction_members, - __pyx_CyFunction_getsets, - 0, - 0, - __Pyx_PyMethod_New, - 0, - offsetof(__pyx_CyFunctionObject, func_dict), - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -#if PY_VERSION_HEX >= 0x030400a1 - 0, -#endif -#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, -#endif -#if __PYX_NEED_TP_PRINT_SLOT - 0, -#endif -#if PY_VERSION_HEX >= 0x030C0000 - 0, -#endif -#if PY_VERSION_HEX >= 0x030d00A4 - 0, -#endif -#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, -#endif -}; -#endif -static int __pyx_CyFunction_init(PyObject *module) { -#if CYTHON_USE_TYPE_SPECS - __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL); -#else - CYTHON_UNUSED_VAR(module); - __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type); -#endif - if (unlikely(__pyx_CyFunctionType == NULL)) { - return -1; - } - return 0; -} -static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults = PyObject_Malloc(size); - if (unlikely(!m->defaults)) - return PyErr_NoMemory(); - memset(m->defaults, 0, size); - m->defaults_pyobjects = pyobjects; - m->defaults_size = size; - return m->defaults; -} -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults_tuple = tuple; - Py_INCREF(tuple); -} -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults_kwdict = dict; - Py_INCREF(dict); -} -static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->func_annotations = dict; - Py_INCREF(dict); -} - -/* CythonFunction */ -static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname, - PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { - PyObject *op = __Pyx_CyFunction_Init( - PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType), - ml, flags, qualname, closure, module, globals, code - ); - if (likely(op)) { - PyObject_GC_Track(op); - } - return op; -} - -/* GetTopmostException */ -#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE -static _PyErr_StackItem * -__Pyx_PyErr_GetTopmostException(PyThreadState *tstate) -{ - _PyErr_StackItem *exc_info = tstate->exc_info; - while ((exc_info->exc_value == NULL || exc_info->exc_value == Py_None) && - exc_info->previous_item != NULL) - { - exc_info = exc_info->previous_item; - } - return exc_info; -} -#endif - -/* SaveResetException */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); - PyObject *exc_value = exc_info->exc_value; - if (exc_value == NULL || exc_value == Py_None) { - *value = NULL; - *type = NULL; - *tb = NULL; - } else { - *value = exc_value; - Py_INCREF(*value); - *type = (PyObject*) Py_TYPE(exc_value); - Py_INCREF(*type); - *tb = PyException_GetTraceback(exc_value); - } - #elif CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); - *type = exc_info->exc_type; - *value = exc_info->exc_value; - *tb = exc_info->exc_traceback; - Py_XINCREF(*type); - Py_XINCREF(*value); - Py_XINCREF(*tb); - #else - *type = tstate->exc_type; - *value = tstate->exc_value; - *tb = tstate->exc_traceback; - Py_XINCREF(*type); - Py_XINCREF(*value); - Py_XINCREF(*tb); - #endif -} -static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = tstate->exc_info; - PyObject *tmp_value = exc_info->exc_value; - exc_info->exc_value = value; - Py_XDECREF(tmp_value); - Py_XDECREF(type); - Py_XDECREF(tb); - #else - PyObject *tmp_type, *tmp_value, *tmp_tb; - #if CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = tstate->exc_info; - tmp_type = exc_info->exc_type; - tmp_value = exc_info->exc_value; - tmp_tb = exc_info->exc_traceback; - exc_info->exc_type = type; - exc_info->exc_value = value; - exc_info->exc_traceback = tb; - #else - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = type; - tstate->exc_value = value; - tstate->exc_traceback = tb; - #endif - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); - #endif -} -#endif - -/* FastTypeChecks */ -#if CYTHON_COMPILING_IN_CPYTHON -static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { - while (a) { - a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*); - if (a == b) - return 1; - } - return b == &PyBaseObject_Type; -} -static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { - PyObject *mro; - if (a == b) return 1; - mro = a->tp_mro; - if (likely(mro)) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(mro); - for (i = 0; i < n; i++) { - if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) - return 1; - } - return 0; - } - return __Pyx_InBases(a, b); -} -static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) { - PyObject *mro; - if (cls == a || cls == b) return 1; - mro = cls->tp_mro; - if (likely(mro)) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(mro); - for (i = 0; i < n; i++) { - PyObject *base = PyTuple_GET_ITEM(mro, i); - if (base == (PyObject *)a || base == (PyObject *)b) - return 1; - } - return 0; - } - return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b); -} -#if PY_MAJOR_VERSION == 2 -static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { - PyObject *exception, *value, *tb; - int res; - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&exception, &value, &tb); - res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; - if (unlikely(res == -1)) { - PyErr_WriteUnraisable(err); - res = 0; - } - if (!res) { - res = PyObject_IsSubclass(err, exc_type2); - if (unlikely(res == -1)) { - PyErr_WriteUnraisable(err); - res = 0; - } - } - __Pyx_ErrRestore(exception, value, tb); - return res; -} -#else -static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { - if (exc_type1) { - return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2); - } else { - return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); - } -} -#endif -static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { - Py_ssize_t i, n; - assert(PyExceptionClass_Check(exc_type)); - n = PyTuple_GET_SIZE(tuple); -#if PY_MAJOR_VERSION >= 3 - for (i=0; i= 0x030C00A6 - PyException_SetTraceback(value, tb); - #elif CYTHON_FAST_THREAD_STATE - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject* tmp_tb = tstate->curexc_traceback; - if (tb != tmp_tb) { - Py_INCREF(tb); - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_tb); - } -#else - PyObject *tmp_type, *tmp_value, *tmp_tb; - PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); - Py_INCREF(tb); - PyErr_Restore(tmp_type, tmp_value, tb); - Py_XDECREF(tmp_tb); -#endif - } -bad: - Py_XDECREF(owned_instance); - return; -} -#endif - -/* PyObjectCall */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { - PyObject *result; - ternaryfunc call = Py_TYPE(func)->tp_call; - if (unlikely(!call)) - return PyObject_Call(func, arg, kw); - #if PY_MAJOR_VERSION < 3 - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - #else - if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) - return NULL; - #endif - result = (*call)(func, arg, kw); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -/* UnpackUnboundCMethod */ -static PyObject *__Pyx_SelflessCall(PyObject *method, PyObject *args, PyObject *kwargs) { - PyObject *result; - PyObject *selfless_args = PyTuple_GetSlice(args, 1, PyTuple_Size(args)); - if (unlikely(!selfless_args)) return NULL; - result = PyObject_Call(method, selfless_args, kwargs); - Py_DECREF(selfless_args); - return result; -} -static PyMethodDef __Pyx_UnboundCMethod_Def = { - "CythonUnboundCMethod", - __PYX_REINTERPRET_FUNCION(PyCFunction, __Pyx_SelflessCall), - METH_VARARGS | METH_KEYWORDS, - NULL -}; -static int __Pyx_TryUnpackUnboundCMethod(__Pyx_CachedCFunction* target) { - PyObject *method; - method = __Pyx_PyObject_GetAttrStr(target->type, *target->method_name); - if (unlikely(!method)) - return -1; - target->method = method; -#if CYTHON_COMPILING_IN_CPYTHON - #if PY_MAJOR_VERSION >= 3 - if (likely(__Pyx_TypeCheck(method, &PyMethodDescr_Type))) - #else - if (likely(!__Pyx_CyOrPyCFunction_Check(method))) - #endif - { - PyMethodDescrObject *descr = (PyMethodDescrObject*) method; - target->func = descr->d_method->ml_meth; - target->flag = descr->d_method->ml_flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_STACKLESS); - } else -#endif -#if CYTHON_COMPILING_IN_PYPY -#else - if (PyCFunction_Check(method)) -#endif - { - PyObject *self; - int self_found; -#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY - self = PyObject_GetAttrString(method, "__self__"); - if (!self) { - PyErr_Clear(); - } -#else - self = PyCFunction_GET_SELF(method); -#endif - self_found = (self && self != Py_None); -#if CYTHON_COMPILING_IN_LIMITED_API || CYTHON_COMPILING_IN_PYPY - Py_XDECREF(self); -#endif - if (self_found) { - PyObject *unbound_method = PyCFunction_New(&__Pyx_UnboundCMethod_Def, method); - if (unlikely(!unbound_method)) return -1; - Py_DECREF(method); - target->method = unbound_method; - } - } - return 0; -} - -/* CallUnboundCMethod1 */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg) { - if (likely(cfunc->func)) { - int flag = cfunc->flag; - if (flag == METH_O) { - return (*(cfunc->func))(self, arg); - } else if ((PY_VERSION_HEX >= 0x030600B1) && flag == METH_FASTCALL) { - #if PY_VERSION_HEX >= 0x030700A0 - return (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)cfunc->func)(self, &arg, 1); - #else - return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL); - #endif - } else if ((PY_VERSION_HEX >= 0x030700A0) && flag == (METH_FASTCALL | METH_KEYWORDS)) { - return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, &arg, 1, NULL); - } - } - return __Pyx__CallUnboundCMethod1(cfunc, self, arg); -} -#endif -static PyObject* __Pyx__CallUnboundCMethod1(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg){ - PyObject *args, *result = NULL; - if (unlikely(!cfunc->func && !cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL; -#if CYTHON_COMPILING_IN_CPYTHON - if (cfunc->func && (cfunc->flag & METH_VARARGS)) { - args = PyTuple_New(1); - if (unlikely(!args)) goto bad; - Py_INCREF(arg); - PyTuple_SET_ITEM(args, 0, arg); - if (cfunc->flag & METH_KEYWORDS) - result = (*(PyCFunctionWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, NULL); - else - result = (*cfunc->func)(self, args); - } else { - args = PyTuple_New(2); - if (unlikely(!args)) goto bad; - Py_INCREF(self); - PyTuple_SET_ITEM(args, 0, self); - Py_INCREF(arg); - PyTuple_SET_ITEM(args, 1, arg); - result = __Pyx_PyObject_Call(cfunc->method, args, NULL); - } -#else - args = PyTuple_Pack(2, self, arg); - if (unlikely(!args)) goto bad; - result = __Pyx_PyObject_Call(cfunc->method, args, NULL); -#endif -bad: - Py_XDECREF(args); - return result; -} - -/* RaiseUnexpectedTypeError */ -static int -__Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj) -{ - __Pyx_TypeName obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, "Expected %s, got " __Pyx_FMT_TYPENAME, - expected, obj_type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return 0; -} - -/* decode_c_bytes */ -static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes( - const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop, - const char* encoding, const char* errors, - PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors)) { - if (unlikely((start < 0) | (stop < 0))) { - if (start < 0) { - start += length; - if (start < 0) - start = 0; - } - if (stop < 0) - stop += length; - } - if (stop > length) - stop = length; - if (unlikely(stop <= start)) - return __Pyx_NewRef(__pyx_empty_unicode); - length = stop - start; - cstring += start; - if (decode_func) { - return decode_func(cstring, length, errors); - } else { - return PyUnicode_Decode(cstring, length, encoding, errors); - } -} - -/* ArgTypeTest */ -static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) -{ - __Pyx_TypeName type_name; - __Pyx_TypeName obj_type_name; - if (unlikely(!type)) { - PyErr_SetString(PyExc_SystemError, "Missing type object"); - return 0; - } - else if (exact) { - #if PY_MAJOR_VERSION == 2 - if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; - #endif - } - else { - if (likely(__Pyx_TypeCheck(obj, type))) return 1; - } - type_name = __Pyx_PyType_GetName(type); - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME - ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name); - __Pyx_DECREF_TypeName(type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return 0; -} - -/* PyFunctionFastCall */ -#if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL -static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, - PyObject *globals) { - PyFrameObject *f; - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject **fastlocals; - Py_ssize_t i; - PyObject *result; - assert(globals != NULL); - /* XXX Perhaps we should create a specialized - PyFrame_New() that doesn't take locals, but does - take builtins without sanity checking them. - */ - assert(tstate != NULL); - f = PyFrame_New(tstate, co, globals, NULL); - if (f == NULL) { - return NULL; - } - fastlocals = __Pyx_PyFrame_GetLocalsplus(f); - for (i = 0; i < na; i++) { - Py_INCREF(*args); - fastlocals[i] = *args++; - } - result = PyEval_EvalFrameEx(f,0); - ++tstate->recursion_depth; - Py_DECREF(f); - --tstate->recursion_depth; - return result; -} -static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) { - PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); - PyObject *globals = PyFunction_GET_GLOBALS(func); - PyObject *argdefs = PyFunction_GET_DEFAULTS(func); - PyObject *closure; -#if PY_MAJOR_VERSION >= 3 - PyObject *kwdefs; -#endif - PyObject *kwtuple, **k; - PyObject **d; - Py_ssize_t nd; - Py_ssize_t nk; - PyObject *result; - assert(kwargs == NULL || PyDict_Check(kwargs)); - nk = kwargs ? PyDict_Size(kwargs) : 0; - #if PY_MAJOR_VERSION < 3 - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) { - return NULL; - } - #else - if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) { - return NULL; - } - #endif - if ( -#if PY_MAJOR_VERSION >= 3 - co->co_kwonlyargcount == 0 && -#endif - likely(kwargs == NULL || nk == 0) && - co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { - if (argdefs == NULL && co->co_argcount == nargs) { - result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); - goto done; - } - else if (nargs == 0 && argdefs != NULL - && co->co_argcount == Py_SIZE(argdefs)) { - /* function called with no arguments, but all parameters have - a default value: use default values as arguments .*/ - args = &PyTuple_GET_ITEM(argdefs, 0); - result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); - goto done; - } - } - if (kwargs != NULL) { - Py_ssize_t pos, i; - kwtuple = PyTuple_New(2 * nk); - if (kwtuple == NULL) { - result = NULL; - goto done; - } - k = &PyTuple_GET_ITEM(kwtuple, 0); - pos = i = 0; - while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { - Py_INCREF(k[i]); - Py_INCREF(k[i+1]); - i += 2; - } - nk = i / 2; - } - else { - kwtuple = NULL; - k = NULL; - } - closure = PyFunction_GET_CLOSURE(func); -#if PY_MAJOR_VERSION >= 3 - kwdefs = PyFunction_GET_KW_DEFAULTS(func); -#endif - if (argdefs != NULL) { - d = &PyTuple_GET_ITEM(argdefs, 0); - nd = Py_SIZE(argdefs); - } - else { - d = NULL; - nd = 0; - } -#if PY_MAJOR_VERSION >= 3 - result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, - args, (int)nargs, - k, (int)nk, - d, (int)nd, kwdefs, closure); -#else - result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, - args, (int)nargs, - k, (int)nk, - d, (int)nd, closure); -#endif - Py_XDECREF(kwtuple); -done: - Py_LeaveRecursiveCall(); - return result; -} -#endif - -/* PyObjectCallMethO */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { - PyObject *self, *result; - PyCFunction cfunc; - cfunc = __Pyx_CyOrPyCFunction_GET_FUNCTION(func); - self = __Pyx_CyOrPyCFunction_GET_SELF(func); - #if PY_MAJOR_VERSION < 3 - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - #else - if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) - return NULL; - #endif - result = cfunc(self, arg); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -/* PyObjectFastCall */ -#if PY_VERSION_HEX < 0x03090000 || CYTHON_COMPILING_IN_LIMITED_API -static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) { - PyObject *argstuple; - PyObject *result = 0; - size_t i; - argstuple = PyTuple_New((Py_ssize_t)nargs); - if (unlikely(!argstuple)) return NULL; - for (i = 0; i < nargs; i++) { - Py_INCREF(args[i]); - if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad; - } - result = __Pyx_PyObject_Call(func, argstuple, kwargs); - bad: - Py_DECREF(argstuple); - return result; -} -#endif -static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) { - Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs); -#if CYTHON_COMPILING_IN_CPYTHON - if (nargs == 0 && kwargs == NULL) { - if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_NOARGS)) - return __Pyx_PyObject_CallMethO(func, NULL); - } - else if (nargs == 1 && kwargs == NULL) { - if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_O)) - return __Pyx_PyObject_CallMethO(func, args[0]); - } -#endif - #if PY_VERSION_HEX < 0x030800B1 - #if CYTHON_FAST_PYCCALL - if (PyCFunction_Check(func)) { - if (kwargs) { - return _PyCFunction_FastCallDict(func, args, nargs, kwargs); - } else { - return _PyCFunction_FastCallKeywords(func, args, nargs, NULL); - } - } - #if PY_VERSION_HEX >= 0x030700A1 - if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) { - return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL); - } - #endif - #endif - #if CYTHON_FAST_PYCALL - if (PyFunction_Check(func)) { - return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs); - } - #endif - #endif - if (kwargs == NULL) { - #if CYTHON_VECTORCALL - #if PY_VERSION_HEX < 0x03090000 - vectorcallfunc f = _PyVectorcall_Function(func); - #else - vectorcallfunc f = PyVectorcall_Function(func); - #endif - if (f) { - return f(func, args, (size_t)nargs, NULL); - } - #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL - if (__Pyx_CyFunction_CheckExact(func)) { - __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func); - if (f) return f(func, args, (size_t)nargs, NULL); - } - #endif - } - if (nargs == 0) { - return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs); - } - #if PY_VERSION_HEX >= 0x03090000 && !CYTHON_COMPILING_IN_LIMITED_API - return PyObject_VectorcallDict(func, args, (size_t)nargs, kwargs); - #else - return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs); - #endif -} - -/* PyObjectCallOneArg */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject *args[2] = {NULL, arg}; - return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); -} - -/* SliceObject */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(PyObject* obj, - Py_ssize_t cstart, Py_ssize_t cstop, - PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, - int has_cstart, int has_cstop, int wraparound) { - __Pyx_TypeName obj_type_name; -#if CYTHON_USE_TYPE_SLOTS - PyMappingMethods* mp; -#if PY_MAJOR_VERSION < 3 - PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; - if (likely(ms && ms->sq_slice)) { - if (!has_cstart) { - if (_py_start && (*_py_start != Py_None)) { - cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); - if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; - } else - cstart = 0; - } - if (!has_cstop) { - if (_py_stop && (*_py_stop != Py_None)) { - cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); - if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; - } else - cstop = PY_SSIZE_T_MAX; - } - if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { - Py_ssize_t l = ms->sq_length(obj); - if (likely(l >= 0)) { - if (cstop < 0) { - cstop += l; - if (cstop < 0) cstop = 0; - } - if (cstart < 0) { - cstart += l; - if (cstart < 0) cstart = 0; - } - } else { - if (!PyErr_ExceptionMatches(PyExc_OverflowError)) - goto bad; - PyErr_Clear(); - } - } - return ms->sq_slice(obj, cstart, cstop); - } -#else - CYTHON_UNUSED_VAR(wraparound); -#endif - mp = Py_TYPE(obj)->tp_as_mapping; - if (likely(mp && mp->mp_subscript)) -#else - CYTHON_UNUSED_VAR(wraparound); -#endif - { - PyObject* result; - PyObject *py_slice, *py_start, *py_stop; - if (_py_slice) { - py_slice = *_py_slice; - } else { - PyObject* owned_start = NULL; - PyObject* owned_stop = NULL; - if (_py_start) { - py_start = *_py_start; - } else { - if (has_cstart) { - owned_start = py_start = PyInt_FromSsize_t(cstart); - if (unlikely(!py_start)) goto bad; - } else - py_start = Py_None; - } - if (_py_stop) { - py_stop = *_py_stop; - } else { - if (has_cstop) { - owned_stop = py_stop = PyInt_FromSsize_t(cstop); - if (unlikely(!py_stop)) { - Py_XDECREF(owned_start); - goto bad; - } - } else - py_stop = Py_None; - } - py_slice = PySlice_New(py_start, py_stop, Py_None); - Py_XDECREF(owned_start); - Py_XDECREF(owned_stop); - if (unlikely(!py_slice)) goto bad; - } -#if CYTHON_USE_TYPE_SLOTS - result = mp->mp_subscript(obj, py_slice); -#else - result = PyObject_GetItem(obj, py_slice); -#endif - if (!_py_slice) { - Py_DECREF(py_slice); - } - return result; - } - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "'" __Pyx_FMT_TYPENAME "' object is unsliceable", obj_type_name); - __Pyx_DECREF_TypeName(obj_type_name); -bad: - return NULL; -} - -/* GetAttr */ -static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *o, PyObject *n) { -#if CYTHON_USE_TYPE_SLOTS -#if PY_MAJOR_VERSION >= 3 - if (likely(PyUnicode_Check(n))) -#else - if (likely(PyString_Check(n))) -#endif - return __Pyx_PyObject_GetAttrStr(o, n); -#endif - return PyObject_GetAttr(o, n); -} - -/* SetItemInt */ -static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) { - int r; - if (unlikely(!j)) return -1; - r = PyObject_SetItem(o, j, v); - Py_DECREF(j); - return r; -} -static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list, - CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS - if (is_list || PyList_CheckExact(o)) { - Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o)); - if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o)))) { - PyObject* old = PyList_GET_ITEM(o, n); - Py_INCREF(v); - PyList_SET_ITEM(o, n, v); - Py_DECREF(old); - return 1; - } - } else { - PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; - PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; - if (mm && mm->mp_ass_subscript) { - int r; - PyObject *key = PyInt_FromSsize_t(i); - if (unlikely(!key)) return -1; - r = mm->mp_ass_subscript(o, key, v); - Py_DECREF(key); - return r; - } - if (likely(sm && sm->sq_ass_item)) { - if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { - Py_ssize_t l = sm->sq_length(o); - if (likely(l >= 0)) { - i += l; - } else { - if (!PyErr_ExceptionMatches(PyExc_OverflowError)) - return -1; - PyErr_Clear(); - } - } - return sm->sq_ass_item(o, i, v); - } - } -#else - if (is_list || !PyMapping_Check(o)) - { - return PySequence_SetItem(o, i, v); - } -#endif - return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v); -} - -/* HasAttr */ -static CYTHON_INLINE int __Pyx_HasAttr(PyObject *o, PyObject *n) { - PyObject *r; - if (unlikely(!__Pyx_PyBaseString_Check(n))) { - PyErr_SetString(PyExc_TypeError, - "hasattr(): attribute name must be string"); - return -1; - } - r = __Pyx_GetAttr(o, n); - if (!r) { - PyErr_Clear(); - return 0; - } else { - Py_DECREF(r); - return 1; - } -} - -/* RaiseUnboundLocalError */ -static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname) { - PyErr_Format(PyExc_UnboundLocalError, "local variable '%s' referenced before assignment", varname); -} - -/* SliceObject */ -static CYTHON_INLINE int __Pyx_PyObject_SetSlice(PyObject* obj, PyObject* value, - Py_ssize_t cstart, Py_ssize_t cstop, - PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice, - int has_cstart, int has_cstop, int wraparound) { - __Pyx_TypeName obj_type_name; -#if CYTHON_USE_TYPE_SLOTS - PyMappingMethods* mp; -#if PY_MAJOR_VERSION < 3 - PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence; - if (likely(ms && ms->sq_ass_slice)) { - if (!has_cstart) { - if (_py_start && (*_py_start != Py_None)) { - cstart = __Pyx_PyIndex_AsSsize_t(*_py_start); - if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; - } else - cstart = 0; - } - if (!has_cstop) { - if (_py_stop && (*_py_stop != Py_None)) { - cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop); - if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; - } else - cstop = PY_SSIZE_T_MAX; - } - if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) { - Py_ssize_t l = ms->sq_length(obj); - if (likely(l >= 0)) { - if (cstop < 0) { - cstop += l; - if (cstop < 0) cstop = 0; - } - if (cstart < 0) { - cstart += l; - if (cstart < 0) cstart = 0; - } - } else { - if (!PyErr_ExceptionMatches(PyExc_OverflowError)) - goto bad; - PyErr_Clear(); - } - } - return ms->sq_ass_slice(obj, cstart, cstop, value); - } -#else - CYTHON_UNUSED_VAR(wraparound); -#endif - mp = Py_TYPE(obj)->tp_as_mapping; - if (likely(mp && mp->mp_ass_subscript)) -#else - CYTHON_UNUSED_VAR(wraparound); -#endif - { - int result; - PyObject *py_slice, *py_start, *py_stop; - if (_py_slice) { - py_slice = *_py_slice; - } else { - PyObject* owned_start = NULL; - PyObject* owned_stop = NULL; - if (_py_start) { - py_start = *_py_start; - } else { - if (has_cstart) { - owned_start = py_start = PyInt_FromSsize_t(cstart); - if (unlikely(!py_start)) goto bad; - } else - py_start = Py_None; - } - if (_py_stop) { - py_stop = *_py_stop; - } else { - if (has_cstop) { - owned_stop = py_stop = PyInt_FromSsize_t(cstop); - if (unlikely(!py_stop)) { - Py_XDECREF(owned_start); - goto bad; - } - } else - py_stop = Py_None; - } - py_slice = PySlice_New(py_start, py_stop, Py_None); - Py_XDECREF(owned_start); - Py_XDECREF(owned_stop); - if (unlikely(!py_slice)) goto bad; - } -#if CYTHON_USE_TYPE_SLOTS - result = mp->mp_ass_subscript(obj, py_slice, value); -#else - result = value ? PyObject_SetItem(obj, py_slice, value) : PyObject_DelItem(obj, py_slice); -#endif - if (!_py_slice) { - Py_DECREF(py_slice); - } - return result; - } - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "'" __Pyx_FMT_TYPENAME "' object does not support slice %.10s", - obj_type_name, value ? "assignment" : "deletion"); - __Pyx_DECREF_TypeName(obj_type_name); -bad: - return -1; -} - -/* PyObjectCall2Args */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2) { - PyObject *args[3] = {NULL, arg1, arg2}; - return __Pyx_PyObject_FastCall(function, args+1, 2 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); -} - -/* PyObjectGetMethod */ -static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) { - PyObject *attr; -#if CYTHON_UNPACK_METHODS && CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_PYTYPE_LOOKUP - __Pyx_TypeName type_name; - PyTypeObject *tp = Py_TYPE(obj); - PyObject *descr; - descrgetfunc f = NULL; - PyObject **dictptr, *dict; - int meth_found = 0; - assert (*method == NULL); - if (unlikely(tp->tp_getattro != PyObject_GenericGetAttr)) { - attr = __Pyx_PyObject_GetAttrStr(obj, name); - goto try_unpack; - } - if (unlikely(tp->tp_dict == NULL) && unlikely(PyType_Ready(tp) < 0)) { - return 0; - } - descr = _PyType_Lookup(tp, name); - if (likely(descr != NULL)) { - Py_INCREF(descr); -#if defined(Py_TPFLAGS_METHOD_DESCRIPTOR) && Py_TPFLAGS_METHOD_DESCRIPTOR - if (__Pyx_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR)) -#elif PY_MAJOR_VERSION >= 3 - #ifdef __Pyx_CyFunction_USED - if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type) || __Pyx_CyFunction_Check(descr))) - #else - if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type))) - #endif -#else - #ifdef __Pyx_CyFunction_USED - if (likely(PyFunction_Check(descr) || __Pyx_CyFunction_Check(descr))) - #else - if (likely(PyFunction_Check(descr))) - #endif -#endif - { - meth_found = 1; - } else { - f = Py_TYPE(descr)->tp_descr_get; - if (f != NULL && PyDescr_IsData(descr)) { - attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); - Py_DECREF(descr); - goto try_unpack; - } - } - } - dictptr = _PyObject_GetDictPtr(obj); - if (dictptr != NULL && (dict = *dictptr) != NULL) { - Py_INCREF(dict); - attr = __Pyx_PyDict_GetItemStr(dict, name); - if (attr != NULL) { - Py_INCREF(attr); - Py_DECREF(dict); - Py_XDECREF(descr); - goto try_unpack; - } - Py_DECREF(dict); - } - if (meth_found) { - *method = descr; - return 1; - } - if (f != NULL) { - attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); - Py_DECREF(descr); - goto try_unpack; - } - if (likely(descr != NULL)) { - *method = descr; - return 0; - } - type_name = __Pyx_PyType_GetName(tp); - PyErr_Format(PyExc_AttributeError, -#if PY_MAJOR_VERSION >= 3 - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", - type_name, name); -#else - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", - type_name, PyString_AS_STRING(name)); -#endif - __Pyx_DECREF_TypeName(type_name); - return 0; -#else - attr = __Pyx_PyObject_GetAttrStr(obj, name); - goto try_unpack; -#endif -try_unpack: -#if CYTHON_UNPACK_METHODS - if (likely(attr) && PyMethod_Check(attr) && likely(PyMethod_GET_SELF(attr) == obj)) { - PyObject *function = PyMethod_GET_FUNCTION(attr); - Py_INCREF(function); - Py_DECREF(attr); - *method = function; - return 1; - } -#endif - *method = attr; - return 0; -} - -/* PyObjectCallMethod1 */ -#if !(CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C00A2) -static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) { - PyObject *result = __Pyx_PyObject_CallOneArg(method, arg); - Py_DECREF(method); - return result; -} -#endif -static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) { -#if CYTHON_VECTORCALL && __PYX_LIMITED_VERSION_HEX >= 0x030C00A2 - PyObject *args[2] = {obj, arg}; - (void) __Pyx_PyObject_GetMethod; - (void) __Pyx_PyObject_CallOneArg; - (void) __Pyx_PyObject_Call2Args; - return PyObject_VectorcallMethod(method_name, args, 2 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); -#else - PyObject *method = NULL, *result; - int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); - if (likely(is_method)) { - result = __Pyx_PyObject_Call2Args(method, obj, arg); - Py_DECREF(method); - return result; - } - if (unlikely(!method)) return NULL; - return __Pyx__PyObject_CallMethod1(method, arg); -#endif -} - -/* StringJoin */ -static CYTHON_INLINE PyObject* __Pyx_PyBytes_Join(PyObject* sep, PyObject* values) { - (void) __Pyx_PyObject_CallMethod1; -#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION < 3 - return _PyString_Join(sep, values); -#elif CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 - return _PyBytes_Join(sep, values); -#else - return __Pyx_PyObject_CallMethod1(sep, __pyx_n_s_join, values); -#endif -} - -/* PyObjectSetAttrStr */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value) { - PyTypeObject* tp = Py_TYPE(obj); - if (likely(tp->tp_setattro)) - return tp->tp_setattro(obj, attr_name, value); -#if PY_MAJOR_VERSION < 3 - if (likely(tp->tp_setattr)) - return tp->tp_setattr(obj, PyString_AS_STRING(attr_name), value); -#endif - return PyObject_SetAttr(obj, attr_name, value); -} -#endif - -/* PyObjectCallNoArg */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { - PyObject *arg[2] = {NULL, NULL}; - return __Pyx_PyObject_FastCall(func, arg + 1, 0 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); -} - -/* PyObjectCallMethod0 */ -static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) { - PyObject *method = NULL, *result = NULL; - int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); - if (likely(is_method)) { - result = __Pyx_PyObject_CallOneArg(method, obj); - Py_DECREF(method); - return result; - } - if (unlikely(!method)) goto bad; - result = __Pyx_PyObject_CallNoArg(method); - Py_DECREF(method); -bad: - return result; -} - -/* ValidateBasesTuple */ -#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS -static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases) { - Py_ssize_t i, n; -#if CYTHON_ASSUME_SAFE_MACROS - n = PyTuple_GET_SIZE(bases); -#else - n = PyTuple_Size(bases); - if (n < 0) return -1; -#endif - for (i = 1; i < n; i++) - { -#if CYTHON_AVOID_BORROWED_REFS - PyObject *b0 = PySequence_GetItem(bases, i); - if (!b0) return -1; -#elif CYTHON_ASSUME_SAFE_MACROS - PyObject *b0 = PyTuple_GET_ITEM(bases, i); -#else - PyObject *b0 = PyTuple_GetItem(bases, i); - if (!b0) return -1; -#endif - PyTypeObject *b; -#if PY_MAJOR_VERSION < 3 - if (PyClass_Check(b0)) - { - PyErr_Format(PyExc_TypeError, "base class '%.200s' is an old-style class", - PyString_AS_STRING(((PyClassObject*)b0)->cl_name)); -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - return -1; - } -#endif - b = (PyTypeObject*) b0; - if (!__Pyx_PyType_HasFeature(b, Py_TPFLAGS_HEAPTYPE)) - { - __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); - PyErr_Format(PyExc_TypeError, - "base class '" __Pyx_FMT_TYPENAME "' is not a heap type", b_name); - __Pyx_DECREF_TypeName(b_name); -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - return -1; - } - if (dictoffset == 0) - { - Py_ssize_t b_dictoffset = 0; -#if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - b_dictoffset = b->tp_dictoffset; -#else - PyObject *py_b_dictoffset = PyObject_GetAttrString((PyObject*)b, "__dictoffset__"); - if (!py_b_dictoffset) goto dictoffset_return; - b_dictoffset = PyLong_AsSsize_t(py_b_dictoffset); - Py_DECREF(py_b_dictoffset); - if (b_dictoffset == -1 && PyErr_Occurred()) goto dictoffset_return; -#endif - if (b_dictoffset) { - { - __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); - PyErr_Format(PyExc_TypeError, - "extension type '%.200s' has no __dict__ slot, " - "but base type '" __Pyx_FMT_TYPENAME "' has: " - "either add 'cdef dict __dict__' to the extension type " - "or add '__slots__ = [...]' to the base type", - type_name, b_name); - __Pyx_DECREF_TypeName(b_name); - } -#if !(CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY) - dictoffset_return: -#endif -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - return -1; - } - } -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - } - return 0; -} -#endif - -/* PyType_Ready */ -static int __Pyx_PyType_Ready(PyTypeObject *t) { -#if CYTHON_USE_TYPE_SPECS || !(CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API) || defined(PYSTON_MAJOR_VERSION) - (void)__Pyx_PyObject_CallMethod0; -#if CYTHON_USE_TYPE_SPECS - (void)__Pyx_validate_bases_tuple; -#endif - return PyType_Ready(t); -#else - int r; - PyObject *bases = __Pyx_PyType_GetSlot(t, tp_bases, PyObject*); - if (bases && unlikely(__Pyx_validate_bases_tuple(t->tp_name, t->tp_dictoffset, bases) == -1)) - return -1; -#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) - { - int gc_was_enabled; - #if PY_VERSION_HEX >= 0x030A00b1 - gc_was_enabled = PyGC_Disable(); - (void)__Pyx_PyObject_CallMethod0; - #else - PyObject *ret, *py_status; - PyObject *gc = NULL; - #if PY_VERSION_HEX >= 0x030700a1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM+0 >= 0x07030400) - gc = PyImport_GetModule(__pyx_kp_u_gc); - #endif - if (unlikely(!gc)) gc = PyImport_Import(__pyx_kp_u_gc); - if (unlikely(!gc)) return -1; - py_status = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_isenabled); - if (unlikely(!py_status)) { - Py_DECREF(gc); - return -1; - } - gc_was_enabled = __Pyx_PyObject_IsTrue(py_status); - Py_DECREF(py_status); - if (gc_was_enabled > 0) { - ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_disable); - if (unlikely(!ret)) { - Py_DECREF(gc); - return -1; - } - Py_DECREF(ret); - } else if (unlikely(gc_was_enabled == -1)) { - Py_DECREF(gc); - return -1; - } - #endif - t->tp_flags |= Py_TPFLAGS_HEAPTYPE; -#if PY_VERSION_HEX >= 0x030A0000 - t->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; -#endif -#else - (void)__Pyx_PyObject_CallMethod0; -#endif - r = PyType_Ready(t); -#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) - t->tp_flags &= ~Py_TPFLAGS_HEAPTYPE; - #if PY_VERSION_HEX >= 0x030A00b1 - if (gc_was_enabled) - PyGC_Enable(); - #else - if (gc_was_enabled) { - PyObject *tp, *v, *tb; - PyErr_Fetch(&tp, &v, &tb); - ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_enable); - if (likely(ret || r == -1)) { - Py_XDECREF(ret); - PyErr_Restore(tp, v, tb); - } else { - Py_XDECREF(tp); - Py_XDECREF(v); - Py_XDECREF(tb); - r = -1; - } - } - Py_DECREF(gc); - #endif - } -#endif - return r; -#endif -} - -/* PyObject_GenericGetAttrNoDict */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static PyObject *__Pyx_RaiseGenericGetAttributeError(PyTypeObject *tp, PyObject *attr_name) { - __Pyx_TypeName type_name = __Pyx_PyType_GetName(tp); - PyErr_Format(PyExc_AttributeError, -#if PY_MAJOR_VERSION >= 3 - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", - type_name, attr_name); -#else - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", - type_name, PyString_AS_STRING(attr_name)); -#endif - __Pyx_DECREF_TypeName(type_name); - return NULL; -} -static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name) { - PyObject *descr; - PyTypeObject *tp = Py_TYPE(obj); - if (unlikely(!PyString_Check(attr_name))) { - return PyObject_GenericGetAttr(obj, attr_name); - } - assert(!tp->tp_dictoffset); - descr = _PyType_Lookup(tp, attr_name); - if (unlikely(!descr)) { - return __Pyx_RaiseGenericGetAttributeError(tp, attr_name); - } - Py_INCREF(descr); - #if PY_MAJOR_VERSION < 3 - if (likely(PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_HAVE_CLASS))) - #endif - { - descrgetfunc f = Py_TYPE(descr)->tp_descr_get; - if (unlikely(f)) { - PyObject *res = f(descr, obj, (PyObject *)tp); - Py_DECREF(descr); - return res; - } - } - return descr; -} -#endif - -/* PyObject_GenericGetAttr */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name) { - if (unlikely(Py_TYPE(obj)->tp_dictoffset)) { - return PyObject_GenericGetAttr(obj, attr_name); - } - return __Pyx_PyObject_GenericGetAttrNoDict(obj, attr_name); -} -#endif - -/* SetupReduce */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __Pyx_setup_reduce_is_named(PyObject* meth, PyObject* name) { - int ret; - PyObject *name_attr; - name_attr = __Pyx_PyObject_GetAttrStrNoError(meth, __pyx_n_s_name); - if (likely(name_attr)) { - ret = PyObject_RichCompareBool(name_attr, name, Py_EQ); - } else { - ret = -1; - } - if (unlikely(ret < 0)) { - PyErr_Clear(); - ret = 0; - } - Py_XDECREF(name_attr); - return ret; -} -static int __Pyx_setup_reduce(PyObject* type_obj) { - int ret = 0; - PyObject *object_reduce = NULL; - PyObject *object_getstate = NULL; - PyObject *object_reduce_ex = NULL; - PyObject *reduce = NULL; - PyObject *reduce_ex = NULL; - PyObject *reduce_cython = NULL; - PyObject *setstate = NULL; - PyObject *setstate_cython = NULL; - PyObject *getstate = NULL; -#if CYTHON_USE_PYTYPE_LOOKUP - getstate = _PyType_Lookup((PyTypeObject*)type_obj, __pyx_n_s_getstate); -#else - getstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_getstate); - if (!getstate && PyErr_Occurred()) { - goto __PYX_BAD; - } -#endif - if (getstate) { -#if CYTHON_USE_PYTYPE_LOOKUP - object_getstate = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_getstate); -#else - object_getstate = __Pyx_PyObject_GetAttrStrNoError((PyObject*)&PyBaseObject_Type, __pyx_n_s_getstate); - if (!object_getstate && PyErr_Occurred()) { - goto __PYX_BAD; - } -#endif - if (object_getstate != getstate) { - goto __PYX_GOOD; - } - } -#if CYTHON_USE_PYTYPE_LOOKUP - object_reduce_ex = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; -#else - object_reduce_ex = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; -#endif - reduce_ex = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce_ex); if (unlikely(!reduce_ex)) goto __PYX_BAD; - if (reduce_ex == object_reduce_ex) { -#if CYTHON_USE_PYTYPE_LOOKUP - object_reduce = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; -#else - object_reduce = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; -#endif - reduce = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce); if (unlikely(!reduce)) goto __PYX_BAD; - if (reduce == object_reduce || __Pyx_setup_reduce_is_named(reduce, __pyx_n_s_reduce_cython)) { - reduce_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_reduce_cython); - if (likely(reduce_cython)) { - ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce, reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - } else if (reduce == object_reduce || PyErr_Occurred()) { - goto __PYX_BAD; - } - setstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate); - if (!setstate) PyErr_Clear(); - if (!setstate || __Pyx_setup_reduce_is_named(setstate, __pyx_n_s_setstate_cython)) { - setstate_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate_cython); - if (likely(setstate_cython)) { - ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate, setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - } else if (!setstate || PyErr_Occurred()) { - goto __PYX_BAD; - } - } - PyType_Modified((PyTypeObject*)type_obj); - } - } - goto __PYX_GOOD; -__PYX_BAD: - if (!PyErr_Occurred()) { - __Pyx_TypeName type_obj_name = - __Pyx_PyType_GetName((PyTypeObject*)type_obj); - PyErr_Format(PyExc_RuntimeError, - "Unable to initialize pickling for " __Pyx_FMT_TYPENAME, type_obj_name); - __Pyx_DECREF_TypeName(type_obj_name); - } - ret = -1; -__PYX_GOOD: -#if !CYTHON_USE_PYTYPE_LOOKUP - Py_XDECREF(object_reduce); - Py_XDECREF(object_reduce_ex); - Py_XDECREF(object_getstate); - Py_XDECREF(getstate); -#endif - Py_XDECREF(reduce); - Py_XDECREF(reduce_ex); - Py_XDECREF(reduce_cython); - Py_XDECREF(setstate); - Py_XDECREF(setstate_cython); - return ret; -} -#endif - -/* Import */ -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { - PyObject *module = 0; - PyObject *empty_dict = 0; - PyObject *empty_list = 0; - #if PY_MAJOR_VERSION < 3 - PyObject *py_import; - py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); - if (unlikely(!py_import)) - goto bad; - if (!from_list) { - empty_list = PyList_New(0); - if (unlikely(!empty_list)) - goto bad; - from_list = empty_list; - } - #endif - empty_dict = PyDict_New(); - if (unlikely(!empty_dict)) - goto bad; - { - #if PY_MAJOR_VERSION >= 3 - if (level == -1) { - if (strchr(__Pyx_MODULE_NAME, '.') != NULL) { - module = PyImport_ImportModuleLevelObject( - name, __pyx_d, empty_dict, from_list, 1); - if (unlikely(!module)) { - if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) - goto bad; - PyErr_Clear(); - } - } - level = 0; - } - #endif - if (!module) { - #if PY_MAJOR_VERSION < 3 - PyObject *py_level = PyInt_FromLong(level); - if (unlikely(!py_level)) - goto bad; - module = PyObject_CallFunctionObjArgs(py_import, - name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL); - Py_DECREF(py_level); - #else - module = PyImport_ImportModuleLevelObject( - name, __pyx_d, empty_dict, from_list, level); - #endif - } - } -bad: - Py_XDECREF(empty_dict); - Py_XDECREF(empty_list); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(py_import); - #endif - return module; -} - -/* ImportDottedModule */ -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) { - PyObject *partial_name = NULL, *slice = NULL, *sep = NULL; - if (unlikely(PyErr_Occurred())) { - PyErr_Clear(); - } - if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) { - partial_name = name; - } else { - slice = PySequence_GetSlice(parts_tuple, 0, count); - if (unlikely(!slice)) - goto bad; - sep = PyUnicode_FromStringAndSize(".", 1); - if (unlikely(!sep)) - goto bad; - partial_name = PyUnicode_Join(sep, slice); - } - PyErr_Format( -#if PY_MAJOR_VERSION < 3 - PyExc_ImportError, - "No module named '%s'", PyString_AS_STRING(partial_name)); -#else -#if PY_VERSION_HEX >= 0x030600B1 - PyExc_ModuleNotFoundError, -#else - PyExc_ImportError, -#endif - "No module named '%U'", partial_name); -#endif -bad: - Py_XDECREF(sep); - Py_XDECREF(slice); - Py_XDECREF(partial_name); - return NULL; -} -#endif -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) { - PyObject *imported_module; -#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) - PyObject *modules = PyImport_GetModuleDict(); - if (unlikely(!modules)) - return NULL; - imported_module = __Pyx_PyDict_GetItemStr(modules, name); - Py_XINCREF(imported_module); -#else - imported_module = PyImport_GetModule(name); -#endif - return imported_module; -} -#endif -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) { - Py_ssize_t i, nparts; - nparts = PyTuple_GET_SIZE(parts_tuple); - for (i=1; i < nparts && module; i++) { - PyObject *part, *submodule; -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - part = PyTuple_GET_ITEM(parts_tuple, i); -#else - part = PySequence_ITEM(parts_tuple, i); -#endif - submodule = __Pyx_PyObject_GetAttrStrNoError(module, part); -#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) - Py_DECREF(part); -#endif - Py_DECREF(module); - module = submodule; - } - if (unlikely(!module)) { - return __Pyx__ImportDottedModule_Error(name, parts_tuple, i); - } - return module; -} -#endif -static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) { -#if PY_MAJOR_VERSION < 3 - PyObject *module, *from_list, *star = __pyx_n_s__6; - CYTHON_UNUSED_VAR(parts_tuple); - from_list = PyList_New(1); - if (unlikely(!from_list)) - return NULL; - Py_INCREF(star); - PyList_SET_ITEM(from_list, 0, star); - module = __Pyx_Import(name, from_list, 0); - Py_DECREF(from_list); - return module; -#else - PyObject *imported_module; - PyObject *module = __Pyx_Import(name, NULL, 0); - if (!parts_tuple || unlikely(!module)) - return module; - imported_module = __Pyx__ImportDottedModule_Lookup(name); - if (likely(imported_module)) { - Py_DECREF(module); - return imported_module; - } - PyErr_Clear(); - return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); -#endif -} -static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) { -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1 - PyObject *module = __Pyx__ImportDottedModule_Lookup(name); - if (likely(module)) { - PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec); - if (likely(spec)) { - PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing); - if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) { - Py_DECREF(spec); - spec = NULL; - } - Py_XDECREF(unsafe); - } - if (likely(!spec)) { - PyErr_Clear(); - return module; - } - Py_DECREF(spec); - Py_DECREF(module); - } else if (PyErr_Occurred()) { - PyErr_Clear(); - } -#endif - return __Pyx__ImportDottedModule(name, parts_tuple); -} - -/* ImportDottedModuleRelFirst */ -static PyObject *__Pyx_ImportDottedModuleRelFirst(PyObject *name, PyObject *parts_tuple) { - PyObject *module; - PyObject *from_list = NULL; -#if PY_MAJOR_VERSION < 3 - PyObject *star = __pyx_n_s__6; - from_list = PyList_New(1); - if (unlikely(!from_list)) - return NULL; - Py_INCREF(star); - PyList_SET_ITEM(from_list, 0, star); -#endif - module = __Pyx_Import(name, from_list, -1); - Py_XDECREF(from_list); - if (module) { - #if PY_MAJOR_VERSION >= 3 - if (parts_tuple) { - module = __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); - } - #endif - return module; - } - if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) - return NULL; - PyErr_Clear(); - return __Pyx_ImportDottedModule(name, parts_tuple); -} - -/* PyDictVersioning */ -#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) { - PyObject *dict = Py_TYPE(obj)->tp_dict; - return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0; -} -static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) { - PyObject **dictptr = NULL; - Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset; - if (offset) { -#if CYTHON_COMPILING_IN_CPYTHON - dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj); -#else - dictptr = _PyObject_GetDictPtr(obj); -#endif - } - return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0; -} -static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) { - PyObject *dict = Py_TYPE(obj)->tp_dict; - if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict))) - return 0; - return obj_dict_version == __Pyx_get_object_dict_version(obj); -} -#endif - -/* CLineInTraceback */ -#ifndef CYTHON_CLINE_IN_TRACEBACK -static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) { - PyObject *use_cline; - PyObject *ptype, *pvalue, *ptraceback; -#if CYTHON_COMPILING_IN_CPYTHON - PyObject **cython_runtime_dict; -#endif - CYTHON_MAYBE_UNUSED_VAR(tstate); - if (unlikely(!__pyx_cython_runtime)) { - return c_line; - } - __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); -#if CYTHON_COMPILING_IN_CPYTHON - cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); - if (likely(cython_runtime_dict)) { - __PYX_PY_DICT_LOOKUP_IF_MODIFIED( - use_cline, *cython_runtime_dict, - __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback)) - } else -#endif - { - PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); - if (use_cline_obj) { - use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; - Py_DECREF(use_cline_obj); - } else { - PyErr_Clear(); - use_cline = NULL; - } - } - if (!use_cline) { - c_line = 0; - (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); - } - else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) { - c_line = 0; - } - __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); - return c_line; -} -#endif - -/* CodeObjectCache */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { - int start = 0, mid = 0, end = count - 1; - if (end >= 0 && code_line > entries[end].code_line) { - return count; - } - while (start < end) { - mid = start + (end - start) / 2; - if (code_line < entries[mid].code_line) { - end = mid; - } else if (code_line > entries[mid].code_line) { - start = mid + 1; - } else { - return mid; - } - } - if (code_line <= entries[mid].code_line) { - return mid; - } else { - return mid + 1; - } -} -static PyCodeObject *__pyx_find_code_object(int code_line) { - PyCodeObject* code_object; - int pos; - if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { - return NULL; - } - pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); - if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { - return NULL; - } - code_object = __pyx_code_cache.entries[pos].code_object; - Py_INCREF(code_object); - return code_object; -} -static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { - int pos, i; - __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; - if (unlikely(!code_line)) { - return; - } - if (unlikely(!entries)) { - entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); - if (likely(entries)) { - __pyx_code_cache.entries = entries; - __pyx_code_cache.max_count = 64; - __pyx_code_cache.count = 1; - entries[0].code_line = code_line; - entries[0].code_object = code_object; - Py_INCREF(code_object); - } - return; - } - pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); - if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { - PyCodeObject* tmp = entries[pos].code_object; - entries[pos].code_object = code_object; - Py_DECREF(tmp); - return; - } - if (__pyx_code_cache.count == __pyx_code_cache.max_count) { - int new_max = __pyx_code_cache.max_count + 64; - entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( - __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry)); - if (unlikely(!entries)) { - return; - } - __pyx_code_cache.entries = entries; - __pyx_code_cache.max_count = new_max; - } - for (i=__pyx_code_cache.count; i>pos; i--) { - entries[i] = entries[i-1]; - } - entries[pos].code_line = code_line; - entries[pos].code_object = code_object; - __pyx_code_cache.count++; - Py_INCREF(code_object); -} -#endif - -/* AddTraceback */ -#include "compile.h" -#include "frameobject.h" -#include "traceback.h" -#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict, - PyObject *firstlineno, PyObject *name) { - PyObject *replace = NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL; - replace = PyObject_GetAttrString(code, "replace"); - if (likely(replace)) { - PyObject *result; - result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict); - Py_DECREF(replace); - return result; - } - PyErr_Clear(); - #if __PYX_LIMITED_VERSION_HEX < 0x030780000 - { - PyObject *compiled = NULL, *result = NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL; - compiled = Py_CompileString( - "out = type(code)(\n" - " code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n" - " code.co_flags, code.co_code, code.co_consts, code.co_names,\n" - " code.co_varnames, code.co_filename, co_name, co_firstlineno,\n" - " code.co_lnotab)\n", "", Py_file_input); - if (!compiled) return NULL; - result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict); - Py_DECREF(compiled); - if (!result) PyErr_Print(); - Py_DECREF(result); - result = PyDict_GetItemString(scratch_dict, "out"); - if (result) Py_INCREF(result); - return result; - } - #else - return NULL; - #endif -} -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename) { - PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL; - PyObject *replace = NULL, *getframe = NULL, *frame = NULL; - PyObject *exc_type, *exc_value, *exc_traceback; - int success = 0; - if (c_line) { - (void) __pyx_cfilenm; - (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line); - } - PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); - code_object = Py_CompileString("_getframe()", filename, Py_eval_input); - if (unlikely(!code_object)) goto bad; - py_py_line = PyLong_FromLong(py_line); - if (unlikely(!py_py_line)) goto bad; - py_funcname = PyUnicode_FromString(funcname); - if (unlikely(!py_funcname)) goto bad; - dict = PyDict_New(); - if (unlikely(!dict)) goto bad; - { - PyObject *old_code_object = code_object; - code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname); - Py_DECREF(old_code_object); - } - if (unlikely(!code_object)) goto bad; - getframe = PySys_GetObject("_getframe"); - if (unlikely(!getframe)) goto bad; - if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad; - frame = PyEval_EvalCode(code_object, dict, dict); - if (unlikely(!frame) || frame == Py_None) goto bad; - success = 1; - bad: - PyErr_Restore(exc_type, exc_value, exc_traceback); - Py_XDECREF(code_object); - Py_XDECREF(py_py_line); - Py_XDECREF(py_funcname); - Py_XDECREF(dict); - Py_XDECREF(replace); - if (success) { - PyTraceBack_Here( - (struct _frame*)frame); - } - Py_XDECREF(frame); -} -#else -static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( - const char *funcname, int c_line, - int py_line, const char *filename) { - PyCodeObject *py_code = NULL; - PyObject *py_funcname = NULL; - #if PY_MAJOR_VERSION < 3 - PyObject *py_srcfile = NULL; - py_srcfile = PyString_FromString(filename); - if (!py_srcfile) goto bad; - #endif - if (c_line) { - #if PY_MAJOR_VERSION < 3 - py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); - if (!py_funcname) goto bad; - #else - py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); - if (!py_funcname) goto bad; - funcname = PyUnicode_AsUTF8(py_funcname); - if (!funcname) goto bad; - #endif - } - else { - #if PY_MAJOR_VERSION < 3 - py_funcname = PyString_FromString(funcname); - if (!py_funcname) goto bad; - #endif - } - #if PY_MAJOR_VERSION < 3 - py_code = __Pyx_PyCode_New( - 0, - 0, - 0, - 0, - 0, - 0, - __pyx_empty_bytes, /*PyObject *code,*/ - __pyx_empty_tuple, /*PyObject *consts,*/ - __pyx_empty_tuple, /*PyObject *names,*/ - __pyx_empty_tuple, /*PyObject *varnames,*/ - __pyx_empty_tuple, /*PyObject *freevars,*/ - __pyx_empty_tuple, /*PyObject *cellvars,*/ - py_srcfile, /*PyObject *filename,*/ - py_funcname, /*PyObject *name,*/ - py_line, - __pyx_empty_bytes /*PyObject *lnotab*/ - ); - Py_DECREF(py_srcfile); - #else - py_code = PyCode_NewEmpty(filename, funcname, py_line); - #endif - Py_XDECREF(py_funcname); - return py_code; -bad: - Py_XDECREF(py_funcname); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(py_srcfile); - #endif - return NULL; -} -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename) { - PyCodeObject *py_code = 0; - PyFrameObject *py_frame = 0; - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject *ptype, *pvalue, *ptraceback; - if (c_line) { - c_line = __Pyx_CLineForTraceback(tstate, c_line); - } - py_code = __pyx_find_code_object(c_line ? -c_line : py_line); - if (!py_code) { - __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); - py_code = __Pyx_CreateCodeObjectForTraceback( - funcname, c_line, py_line, filename); - if (!py_code) { - /* If the code object creation fails, then we should clear the - fetched exception references and propagate the new exception */ - Py_XDECREF(ptype); - Py_XDECREF(pvalue); - Py_XDECREF(ptraceback); - goto bad; - } - __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); - __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); - } - py_frame = PyFrame_New( - tstate, /*PyThreadState *tstate,*/ - py_code, /*PyCodeObject *code,*/ - __pyx_d, /*PyObject *globals,*/ - 0 /*PyObject *locals*/ - ); - if (!py_frame) goto bad; - __Pyx_PyFrame_SetLineNumber(py_frame, py_line); - PyTraceBack_Here(py_frame); -bad: - Py_XDECREF(py_code); - Py_XDECREF(py_frame); -} -#endif - -/* CIntFromPyVerify */ -#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) -#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) -#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ - {\ - func_type value = func_value;\ - if (sizeof(target_type) < sizeof(func_type)) {\ - if (unlikely(value != (func_type) (target_type) value)) {\ - func_type zero = 0;\ - if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ - return (target_type) -1;\ - if (is_unsigned && unlikely(value < zero))\ - goto raise_neg_overflow;\ - else\ - goto raise_overflow;\ - }\ - }\ - return (target_type) value;\ - } - -/* CIntFromPy */ -static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const size_t neg_one = (size_t) -1, const_zero = (size_t) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(size_t) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (size_t) val; - } - } -#endif - if (unlikely(!PyLong_Check(x))) { - size_t val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (size_t) -1; - val = __Pyx_PyInt_As_size_t(tmp); - Py_DECREF(tmp); - return val; - } - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(size_t, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(size_t) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) >= 2 * PyLong_SHIFT)) { - return (size_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(size_t) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) >= 3 * PyLong_SHIFT)) { - return (size_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(size_t) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) >= 4 * PyLong_SHIFT)) { - return (size_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (size_t) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(size_t) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(size_t) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(size_t, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(size_t) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { - return (size_t) (((size_t)-1)*(((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(size_t) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { - return (size_t) ((((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { - return (size_t) (((size_t)-1)*(((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(size_t) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { - return (size_t) ((((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 4 * PyLong_SHIFT)) { - return (size_t) (((size_t)-1)*(((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(size_t) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 4 * PyLong_SHIFT)) { - return (size_t) ((((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(size_t) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(size_t) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - size_t val; - int ret = -1; -#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API - Py_ssize_t bytes_copied = PyLong_AsNativeBytes( - x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); - if (unlikely(bytes_copied == -1)) { - } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { - goto raise_overflow; - } else { - ret = 0; - } -#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)x, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *v; - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (likely(PyLong_CheckExact(x))) { - v = __Pyx_NewRef(x); - } else { - v = PyNumber_Long(x); - if (unlikely(!v)) return (size_t) -1; - assert(PyLong_CheckExact(v)); - } - { - int result = PyObject_RichCompareBool(v, Py_False, Py_LT); - if (unlikely(result < 0)) { - Py_DECREF(v); - return (size_t) -1; - } - is_negative = result == 1; - } - if (is_unsigned && unlikely(is_negative)) { - Py_DECREF(v); - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - Py_DECREF(v); - if (unlikely(!stepval)) - return (size_t) -1; - } else { - stepval = v; - } - v = NULL; - val = (size_t) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(size_t) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - long idigit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - val |= ((size_t) idigit) << bits; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - } - Py_DECREF(shift); shift = NULL; - Py_DECREF(mask); mask = NULL; - { - long idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(size_t) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((size_t) idigit) << bits; - } - if (!is_unsigned) { - if (unlikely(val & (((size_t) 1) << (sizeof(size_t) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - if (unlikely(ret)) - return (size_t) -1; - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to size_t"); - return (size_t) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to size_t"); - return (size_t) -1; -} - -/* CIntToPy */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const long neg_one = (long) -1, const_zero = (long) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(long) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(long) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(long) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 - if (is_unsigned) { - return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); - } else { - return PyLong_FromNativeBytes(bytes, sizeof(value), -1); - } -#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 - int one = 1; int little = (int)*(unsigned char *)&one; - return _PyLong_FromByteArray(bytes, sizeof(long), - little, !is_unsigned); -#else - int one = 1; int little = (int)*(unsigned char *)&one; - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - if (!is_unsigned) { - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; - } - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(kwds); - Py_XDECREF(arg_tuple); - Py_XDECREF(order_str); - Py_XDECREF(py_bytes); - Py_XDECREF(from_bytes); - return result; -#endif - } -} - -/* CIntToPy */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int neg_one = (int) -1, const_zero = (int) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(int) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(int) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(int) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 - if (is_unsigned) { - return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); - } else { - return PyLong_FromNativeBytes(bytes, sizeof(value), -1); - } -#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 - int one = 1; int little = (int)*(unsigned char *)&one; - return _PyLong_FromByteArray(bytes, sizeof(int), - little, !is_unsigned); -#else - int one = 1; int little = (int)*(unsigned char *)&one; - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(int)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - if (!is_unsigned) { - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; - } - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(kwds); - Py_XDECREF(arg_tuple); - Py_XDECREF(order_str); - Py_XDECREF(py_bytes); - Py_XDECREF(from_bytes); - return result; -#endif - } -} - -/* CIntFromPy */ -static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int neg_one = (int) -1, const_zero = (int) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(int) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (int) val; - } - } -#endif - if (unlikely(!PyLong_Check(x))) { - int val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (int) -1; - val = __Pyx_PyInt_As_int(tmp); - Py_DECREF(tmp); - return val; - } - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) { - return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) { - return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) { - return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (int) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(int) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { - return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(int) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - int val; - int ret = -1; -#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API - Py_ssize_t bytes_copied = PyLong_AsNativeBytes( - x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); - if (unlikely(bytes_copied == -1)) { - } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { - goto raise_overflow; - } else { - ret = 0; - } -#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)x, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *v; - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (likely(PyLong_CheckExact(x))) { - v = __Pyx_NewRef(x); - } else { - v = PyNumber_Long(x); - if (unlikely(!v)) return (int) -1; - assert(PyLong_CheckExact(v)); - } - { - int result = PyObject_RichCompareBool(v, Py_False, Py_LT); - if (unlikely(result < 0)) { - Py_DECREF(v); - return (int) -1; - } - is_negative = result == 1; - } - if (is_unsigned && unlikely(is_negative)) { - Py_DECREF(v); - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - Py_DECREF(v); - if (unlikely(!stepval)) - return (int) -1; - } else { - stepval = v; - } - v = NULL; - val = (int) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - long idigit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - val |= ((int) idigit) << bits; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - } - Py_DECREF(shift); shift = NULL; - Py_DECREF(mask); mask = NULL; - { - long idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((int) idigit) << bits; - } - if (!is_unsigned) { - if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - if (unlikely(ret)) - return (int) -1; - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to int"); - return (int) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to int"); - return (int) -1; -} - -/* CIntFromPy */ -static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const char neg_one = (char) -1, const_zero = (char) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(char) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(char, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (char) val; - } - } -#endif - if (unlikely(!PyLong_Check(x))) { - char val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (char) -1; - val = __Pyx_PyInt_As_char(tmp); - Py_DECREF(tmp); - return val; - } - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(char, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(char) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) >= 2 * PyLong_SHIFT)) { - return (char) (((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(char) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) >= 3 * PyLong_SHIFT)) { - return (char) (((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(char) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) >= 4 * PyLong_SHIFT)) { - return (char) (((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (char) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(char) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(char, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(char) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(char, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(char, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(char) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { - return (char) (((char)-1)*(((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(char) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { - return (char) ((((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { - return (char) (((char)-1)*(((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(char) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { - return (char) ((((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 4 * PyLong_SHIFT)) { - return (char) (((char)-1)*(((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(char) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 4 * PyLong_SHIFT)) { - return (char) ((((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(char) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(char, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(char) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(char, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - char val; - int ret = -1; -#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API - Py_ssize_t bytes_copied = PyLong_AsNativeBytes( - x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); - if (unlikely(bytes_copied == -1)) { - } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { - goto raise_overflow; - } else { - ret = 0; - } -#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)x, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *v; - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (likely(PyLong_CheckExact(x))) { - v = __Pyx_NewRef(x); - } else { - v = PyNumber_Long(x); - if (unlikely(!v)) return (char) -1; - assert(PyLong_CheckExact(v)); - } - { - int result = PyObject_RichCompareBool(v, Py_False, Py_LT); - if (unlikely(result < 0)) { - Py_DECREF(v); - return (char) -1; - } - is_negative = result == 1; - } - if (is_unsigned && unlikely(is_negative)) { - Py_DECREF(v); - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - Py_DECREF(v); - if (unlikely(!stepval)) - return (char) -1; - } else { - stepval = v; - } - v = NULL; - val = (char) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(char) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - long idigit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - val |= ((char) idigit) << bits; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - } - Py_DECREF(shift); shift = NULL; - Py_DECREF(mask); mask = NULL; - { - long idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(char) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((char) idigit) << bits; - } - if (!is_unsigned) { - if (unlikely(val & (((char) 1) << (sizeof(char) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - if (unlikely(ret)) - return (char) -1; - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to char"); - return (char) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to char"); - return (char) -1; -} - -/* CIntToPy */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_char(char value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const char neg_one = (char) -1, const_zero = (char) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(char) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(char) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(char) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(char) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(char) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4 - if (is_unsigned) { - return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1); - } else { - return PyLong_FromNativeBytes(bytes, sizeof(value), -1); - } -#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000 - int one = 1; int little = (int)*(unsigned char *)&one; - return _PyLong_FromByteArray(bytes, sizeof(char), - little, !is_unsigned); -#else - int one = 1; int little = (int)*(unsigned char *)&one; - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(char)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - if (!is_unsigned) { - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad; - } - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(kwds); - Py_XDECREF(arg_tuple); - Py_XDECREF(order_str); - Py_XDECREF(py_bytes); - Py_XDECREF(from_bytes); - return result; -#endif - } -} - -/* FormatTypeName */ -#if CYTHON_COMPILING_IN_LIMITED_API -static __Pyx_TypeName -__Pyx_PyType_GetName(PyTypeObject* tp) -{ - PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp, - __pyx_n_s_name); - if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) { - PyErr_Clear(); - Py_XDECREF(name); - name = __Pyx_NewRef(__pyx_n_s__13); - } - return name; -} -#endif - -/* CIntFromPy */ -static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const long neg_one = (long) -1, const_zero = (long) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(long) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (long) val; - } - } -#endif - if (unlikely(!PyLong_Check(x))) { - long val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (long) -1; - val = __Pyx_PyInt_As_long(tmp); - Py_DECREF(tmp); - return val; - } - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) { - return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) { - return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) { - return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (long) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(long) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { - return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(long) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - long val; - int ret = -1; -#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API - Py_ssize_t bytes_copied = PyLong_AsNativeBytes( - x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0)); - if (unlikely(bytes_copied == -1)) { - } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) { - goto raise_overflow; - } else { - ret = 0; - } -#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)x, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *v; - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (likely(PyLong_CheckExact(x))) { - v = __Pyx_NewRef(x); - } else { - v = PyNumber_Long(x); - if (unlikely(!v)) return (long) -1; - assert(PyLong_CheckExact(v)); - } - { - int result = PyObject_RichCompareBool(v, Py_False, Py_LT); - if (unlikely(result < 0)) { - Py_DECREF(v); - return (long) -1; - } - is_negative = result == 1; - } - if (is_unsigned && unlikely(is_negative)) { - Py_DECREF(v); - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - Py_DECREF(v); - if (unlikely(!stepval)) - return (long) -1; - } else { - stepval = v; - } - v = NULL; - val = (long) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - long idigit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - val |= ((long) idigit) << bits; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - } - Py_DECREF(shift); shift = NULL; - Py_DECREF(mask); mask = NULL; - { - long idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((long) idigit) << bits; - } - if (!is_unsigned) { - if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - if (unlikely(ret)) - return (long) -1; - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to long"); - return (long) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to long"); - return (long) -1; -} - -/* SwapException */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { - PyObject *tmp_type, *tmp_value, *tmp_tb; - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = tstate->exc_info; - tmp_value = exc_info->exc_value; - exc_info->exc_value = *value; - if (tmp_value == NULL || tmp_value == Py_None) { - Py_XDECREF(tmp_value); - tmp_value = NULL; - tmp_type = NULL; - tmp_tb = NULL; - } else { - tmp_type = (PyObject*) Py_TYPE(tmp_value); - Py_INCREF(tmp_type); - #if CYTHON_COMPILING_IN_CPYTHON - tmp_tb = ((PyBaseExceptionObject*) tmp_value)->traceback; - Py_XINCREF(tmp_tb); - #else - tmp_tb = PyException_GetTraceback(tmp_value); - #endif - } - #elif CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = tstate->exc_info; - tmp_type = exc_info->exc_type; - tmp_value = exc_info->exc_value; - tmp_tb = exc_info->exc_traceback; - exc_info->exc_type = *type; - exc_info->exc_value = *value; - exc_info->exc_traceback = *tb; - #else - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = *type; - tstate->exc_value = *value; - tstate->exc_traceback = *tb; - #endif - *type = tmp_type; - *value = tmp_value; - *tb = tmp_tb; -} -#else -static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb) { - PyObject *tmp_type, *tmp_value, *tmp_tb; - PyErr_GetExcInfo(&tmp_type, &tmp_value, &tmp_tb); - PyErr_SetExcInfo(*type, *value, *tb); - *type = tmp_type; - *value = tmp_value; - *tb = tmp_tb; -} -#endif - -/* CoroutineBase */ -#include -#if PY_VERSION_HEX >= 0x030b00a6 - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif -#define __Pyx_Coroutine_Undelegate(gen) Py_CLEAR((gen)->yieldfrom) -static int __Pyx_PyGen__FetchStopIterationValue(PyThreadState *__pyx_tstate, PyObject **pvalue) { - PyObject *et, *ev, *tb; - PyObject *value = NULL; - CYTHON_UNUSED_VAR(__pyx_tstate); - __Pyx_ErrFetch(&et, &ev, &tb); - if (!et) { - Py_XDECREF(tb); - Py_XDECREF(ev); - Py_INCREF(Py_None); - *pvalue = Py_None; - return 0; - } - if (likely(et == PyExc_StopIteration)) { - if (!ev) { - Py_INCREF(Py_None); - value = Py_None; - } -#if PY_VERSION_HEX >= 0x030300A0 - else if (likely(__Pyx_IS_TYPE(ev, (PyTypeObject*)PyExc_StopIteration))) { - value = ((PyStopIterationObject *)ev)->value; - Py_INCREF(value); - Py_DECREF(ev); - } -#endif - else if (unlikely(PyTuple_Check(ev))) { - if (PyTuple_GET_SIZE(ev) >= 1) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - value = PyTuple_GET_ITEM(ev, 0); - Py_INCREF(value); -#else - value = PySequence_ITEM(ev, 0); -#endif - } else { - Py_INCREF(Py_None); - value = Py_None; - } - Py_DECREF(ev); - } - else if (!__Pyx_TypeCheck(ev, (PyTypeObject*)PyExc_StopIteration)) { - value = ev; - } - if (likely(value)) { - Py_XDECREF(tb); - Py_DECREF(et); - *pvalue = value; - return 0; - } - } else if (!__Pyx_PyErr_GivenExceptionMatches(et, PyExc_StopIteration)) { - __Pyx_ErrRestore(et, ev, tb); - return -1; - } - PyErr_NormalizeException(&et, &ev, &tb); - if (unlikely(!PyObject_TypeCheck(ev, (PyTypeObject*)PyExc_StopIteration))) { - __Pyx_ErrRestore(et, ev, tb); - return -1; - } - Py_XDECREF(tb); - Py_DECREF(et); -#if PY_VERSION_HEX >= 0x030300A0 - value = ((PyStopIterationObject *)ev)->value; - Py_INCREF(value); - Py_DECREF(ev); -#else - { - PyObject* args = __Pyx_PyObject_GetAttrStr(ev, __pyx_n_s_args); - Py_DECREF(ev); - if (likely(args)) { - value = PySequence_GetItem(args, 0); - Py_DECREF(args); - } - if (unlikely(!value)) { - __Pyx_ErrRestore(NULL, NULL, NULL); - Py_INCREF(Py_None); - value = Py_None; - } - } -#endif - *pvalue = value; - return 0; -} -static CYTHON_INLINE -void __Pyx_Coroutine_ExceptionClear(__Pyx_ExcInfoStruct *exc_state) { -#if PY_VERSION_HEX >= 0x030B00a4 - Py_CLEAR(exc_state->exc_value); -#else - PyObject *t, *v, *tb; - t = exc_state->exc_type; - v = exc_state->exc_value; - tb = exc_state->exc_traceback; - exc_state->exc_type = NULL; - exc_state->exc_value = NULL; - exc_state->exc_traceback = NULL; - Py_XDECREF(t); - Py_XDECREF(v); - Py_XDECREF(tb); -#endif -} -#define __Pyx_Coroutine_AlreadyRunningError(gen) (__Pyx__Coroutine_AlreadyRunningError(gen), (PyObject*)NULL) -static void __Pyx__Coroutine_AlreadyRunningError(__pyx_CoroutineObject *gen) { - const char *msg; - CYTHON_MAYBE_UNUSED_VAR(gen); - if ((0)) { - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_Coroutine_Check((PyObject*)gen)) { - msg = "coroutine already executing"; - #endif - #ifdef __Pyx_AsyncGen_USED - } else if (__Pyx_AsyncGen_CheckExact((PyObject*)gen)) { - msg = "async generator already executing"; - #endif - } else { - msg = "generator already executing"; - } - PyErr_SetString(PyExc_ValueError, msg); -} -#define __Pyx_Coroutine_NotStartedError(gen) (__Pyx__Coroutine_NotStartedError(gen), (PyObject*)NULL) -static void __Pyx__Coroutine_NotStartedError(PyObject *gen) { - const char *msg; - CYTHON_MAYBE_UNUSED_VAR(gen); - if ((0)) { - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_Coroutine_Check(gen)) { - msg = "can't send non-None value to a just-started coroutine"; - #endif - #ifdef __Pyx_AsyncGen_USED - } else if (__Pyx_AsyncGen_CheckExact(gen)) { - msg = "can't send non-None value to a just-started async generator"; - #endif - } else { - msg = "can't send non-None value to a just-started generator"; - } - PyErr_SetString(PyExc_TypeError, msg); -} -#define __Pyx_Coroutine_AlreadyTerminatedError(gen, value, closing) (__Pyx__Coroutine_AlreadyTerminatedError(gen, value, closing), (PyObject*)NULL) -static void __Pyx__Coroutine_AlreadyTerminatedError(PyObject *gen, PyObject *value, int closing) { - CYTHON_MAYBE_UNUSED_VAR(gen); - CYTHON_MAYBE_UNUSED_VAR(closing); - #ifdef __Pyx_Coroutine_USED - if (!closing && __Pyx_Coroutine_Check(gen)) { - PyErr_SetString(PyExc_RuntimeError, "cannot reuse already awaited coroutine"); - } else - #endif - if (value) { - #ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(gen)) - PyErr_SetNone(__Pyx_PyExc_StopAsyncIteration); - else - #endif - PyErr_SetNone(PyExc_StopIteration); - } -} -static -PyObject *__Pyx_Coroutine_SendEx(__pyx_CoroutineObject *self, PyObject *value, int closing) { - __Pyx_PyThreadState_declare - PyThreadState *tstate; - __Pyx_ExcInfoStruct *exc_state; - PyObject *retval; - assert(!self->is_running); - if (unlikely(self->resume_label == 0)) { - if (unlikely(value && value != Py_None)) { - return __Pyx_Coroutine_NotStartedError((PyObject*)self); - } - } - if (unlikely(self->resume_label == -1)) { - return __Pyx_Coroutine_AlreadyTerminatedError((PyObject*)self, value, closing); - } -#if CYTHON_FAST_THREAD_STATE - __Pyx_PyThreadState_assign - tstate = __pyx_tstate; -#else - tstate = __Pyx_PyThreadState_Current; -#endif - exc_state = &self->gi_exc_state; - if (exc_state->exc_value) { - #if CYTHON_COMPILING_IN_PYPY - #else - PyObject *exc_tb; - #if PY_VERSION_HEX >= 0x030B00a4 && !CYTHON_COMPILING_IN_CPYTHON - exc_tb = PyException_GetTraceback(exc_state->exc_value); - #elif PY_VERSION_HEX >= 0x030B00a4 - exc_tb = ((PyBaseExceptionObject*) exc_state->exc_value)->traceback; - #else - exc_tb = exc_state->exc_traceback; - #endif - if (exc_tb) { - PyTracebackObject *tb = (PyTracebackObject *) exc_tb; - PyFrameObject *f = tb->tb_frame; - assert(f->f_back == NULL); - #if PY_VERSION_HEX >= 0x030B00A1 - f->f_back = PyThreadState_GetFrame(tstate); - #else - Py_XINCREF(tstate->frame); - f->f_back = tstate->frame; - #endif - #if PY_VERSION_HEX >= 0x030B00a4 && !CYTHON_COMPILING_IN_CPYTHON - Py_DECREF(exc_tb); - #endif - } - #endif - } -#if CYTHON_USE_EXC_INFO_STACK - exc_state->previous_item = tstate->exc_info; - tstate->exc_info = exc_state; -#else - if (exc_state->exc_type) { - __Pyx_ExceptionSwap(&exc_state->exc_type, &exc_state->exc_value, &exc_state->exc_traceback); - } else { - __Pyx_Coroutine_ExceptionClear(exc_state); - __Pyx_ExceptionSave(&exc_state->exc_type, &exc_state->exc_value, &exc_state->exc_traceback); - } -#endif - self->is_running = 1; - retval = self->body(self, tstate, value); - self->is_running = 0; -#if CYTHON_USE_EXC_INFO_STACK - exc_state = &self->gi_exc_state; - tstate->exc_info = exc_state->previous_item; - exc_state->previous_item = NULL; - __Pyx_Coroutine_ResetFrameBackpointer(exc_state); -#endif - return retval; -} -static CYTHON_INLINE void __Pyx_Coroutine_ResetFrameBackpointer(__Pyx_ExcInfoStruct *exc_state) { -#if CYTHON_COMPILING_IN_PYPY - CYTHON_UNUSED_VAR(exc_state); -#else - PyObject *exc_tb; - #if PY_VERSION_HEX >= 0x030B00a4 - if (!exc_state->exc_value) return; - exc_tb = PyException_GetTraceback(exc_state->exc_value); - #else - exc_tb = exc_state->exc_traceback; - #endif - if (likely(exc_tb)) { - PyTracebackObject *tb = (PyTracebackObject *) exc_tb; - PyFrameObject *f = tb->tb_frame; - Py_CLEAR(f->f_back); - #if PY_VERSION_HEX >= 0x030B00a4 - Py_DECREF(exc_tb); - #endif - } -#endif -} -static CYTHON_INLINE -PyObject *__Pyx_Coroutine_MethodReturn(PyObject* gen, PyObject *retval) { - CYTHON_MAYBE_UNUSED_VAR(gen); - if (unlikely(!retval)) { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - if (!__Pyx_PyErr_Occurred()) { - PyObject *exc = PyExc_StopIteration; - #ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(gen)) - exc = __Pyx_PyExc_StopAsyncIteration; - #endif - __Pyx_PyErr_SetNone(exc); - } - } - return retval; -} -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) -static CYTHON_INLINE -PyObject *__Pyx_PyGen_Send(PyGenObject *gen, PyObject *arg) { -#if PY_VERSION_HEX <= 0x030A00A1 - return _PyGen_Send(gen, arg); -#else - PyObject *result; - if (PyIter_Send((PyObject*)gen, arg ? arg : Py_None, &result) == PYGEN_RETURN) { - if (PyAsyncGen_CheckExact(gen)) { - assert(result == Py_None); - PyErr_SetNone(PyExc_StopAsyncIteration); - } - else if (result == Py_None) { - PyErr_SetNone(PyExc_StopIteration); - } - else { -#if PY_VERSION_HEX < 0x030d00A1 - _PyGen_SetStopIterationValue(result); -#else - if (!PyTuple_Check(result) && !PyExceptionInstance_Check(result)) { - PyErr_SetObject(PyExc_StopIteration, result); - } else { - PyObject *exc = __Pyx_PyObject_CallOneArg(PyExc_StopIteration, result); - if (likely(exc != NULL)) { - PyErr_SetObject(PyExc_StopIteration, exc); - Py_DECREF(exc); - } - } -#endif - } - Py_DECREF(result); - result = NULL; - } - return result; -#endif -} -#endif -static CYTHON_INLINE -PyObject *__Pyx_Coroutine_FinishDelegation(__pyx_CoroutineObject *gen) { - PyObject *ret; - PyObject *val = NULL; - __Pyx_Coroutine_Undelegate(gen); - __Pyx_PyGen__FetchStopIterationValue(__Pyx_PyThreadState_Current, &val); - ret = __Pyx_Coroutine_SendEx(gen, val, 0); - Py_XDECREF(val); - return ret; -} -static PyObject *__Pyx_Coroutine_Send(PyObject *self, PyObject *value) { - PyObject *retval; - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject*) self; - PyObject *yf = gen->yieldfrom; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - PyObject *ret; - gen->is_running = 1; - #ifdef __Pyx_Generator_USED - if (__Pyx_Generator_CheckExact(yf)) { - ret = __Pyx_Coroutine_Send(yf, value); - } else - #endif - #ifdef __Pyx_Coroutine_USED - if (__Pyx_Coroutine_Check(yf)) { - ret = __Pyx_Coroutine_Send(yf, value); - } else - #endif - #ifdef __Pyx_AsyncGen_USED - if (__pyx_PyAsyncGenASend_CheckExact(yf)) { - ret = __Pyx_async_gen_asend_send(yf, value); - } else - #endif - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) - if (PyGen_CheckExact(yf)) { - ret = __Pyx_PyGen_Send((PyGenObject*)yf, value == Py_None ? NULL : value); - } else - #endif - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03050000 && defined(PyCoro_CheckExact) && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) - if (PyCoro_CheckExact(yf)) { - ret = __Pyx_PyGen_Send((PyGenObject*)yf, value == Py_None ? NULL : value); - } else - #endif - { - if (value == Py_None) - ret = __Pyx_PyObject_GetIterNextFunc(yf)(yf); - else - ret = __Pyx_PyObject_CallMethod1(yf, __pyx_n_s_send, value); - } - gen->is_running = 0; - if (likely(ret)) { - return ret; - } - retval = __Pyx_Coroutine_FinishDelegation(gen); - } else { - retval = __Pyx_Coroutine_SendEx(gen, value, 0); - } - return __Pyx_Coroutine_MethodReturn(self, retval); -} -static int __Pyx_Coroutine_CloseIter(__pyx_CoroutineObject *gen, PyObject *yf) { - PyObject *retval = NULL; - int err = 0; - #ifdef __Pyx_Generator_USED - if (__Pyx_Generator_CheckExact(yf)) { - retval = __Pyx_Coroutine_Close(yf); - if (!retval) - return -1; - } else - #endif - #ifdef __Pyx_Coroutine_USED - if (__Pyx_Coroutine_Check(yf)) { - retval = __Pyx_Coroutine_Close(yf); - if (!retval) - return -1; - } else - if (__Pyx_CoroutineAwait_CheckExact(yf)) { - retval = __Pyx_CoroutineAwait_Close((__pyx_CoroutineAwaitObject*)yf, NULL); - if (!retval) - return -1; - } else - #endif - #ifdef __Pyx_AsyncGen_USED - if (__pyx_PyAsyncGenASend_CheckExact(yf)) { - retval = __Pyx_async_gen_asend_close(yf, NULL); - } else - if (__pyx_PyAsyncGenAThrow_CheckExact(yf)) { - retval = __Pyx_async_gen_athrow_close(yf, NULL); - } else - #endif - { - PyObject *meth; - gen->is_running = 1; - meth = __Pyx_PyObject_GetAttrStrNoError(yf, __pyx_n_s_close); - if (unlikely(!meth)) { - if (unlikely(PyErr_Occurred())) { - PyErr_WriteUnraisable(yf); - } - } else { - retval = __Pyx_PyObject_CallNoArg(meth); - Py_DECREF(meth); - if (unlikely(!retval)) - err = -1; - } - gen->is_running = 0; - } - Py_XDECREF(retval); - return err; -} -static PyObject *__Pyx_Generator_Next(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject*) self; - PyObject *yf = gen->yieldfrom; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - PyObject *ret; - gen->is_running = 1; - #ifdef __Pyx_Generator_USED - if (__Pyx_Generator_CheckExact(yf)) { - ret = __Pyx_Generator_Next(yf); - } else - #endif - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) - if (PyGen_CheckExact(yf)) { - ret = __Pyx_PyGen_Send((PyGenObject*)yf, NULL); - } else - #endif - #ifdef __Pyx_Coroutine_USED - if (__Pyx_Coroutine_Check(yf)) { - ret = __Pyx_Coroutine_Send(yf, Py_None); - } else - #endif - ret = __Pyx_PyObject_GetIterNextFunc(yf)(yf); - gen->is_running = 0; - if (likely(ret)) { - return ret; - } - return __Pyx_Coroutine_FinishDelegation(gen); - } - return __Pyx_Coroutine_SendEx(gen, Py_None, 0); -} -static PyObject *__Pyx_Coroutine_Close_Method(PyObject *self, PyObject *arg) { - CYTHON_UNUSED_VAR(arg); - return __Pyx_Coroutine_Close(self); -} -static PyObject *__Pyx_Coroutine_Close(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - PyObject *retval, *raised_exception; - PyObject *yf = gen->yieldfrom; - int err = 0; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - Py_INCREF(yf); - err = __Pyx_Coroutine_CloseIter(gen, yf); - __Pyx_Coroutine_Undelegate(gen); - Py_DECREF(yf); - } - if (err == 0) - PyErr_SetNone(PyExc_GeneratorExit); - retval = __Pyx_Coroutine_SendEx(gen, NULL, 1); - if (unlikely(retval)) { - const char *msg; - Py_DECREF(retval); - if ((0)) { - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_Coroutine_Check(self)) { - msg = "coroutine ignored GeneratorExit"; - #endif - #ifdef __Pyx_AsyncGen_USED - } else if (__Pyx_AsyncGen_CheckExact(self)) { -#if PY_VERSION_HEX < 0x03060000 - msg = "async generator ignored GeneratorExit - might require Python 3.6+ finalisation (PEP 525)"; -#else - msg = "async generator ignored GeneratorExit"; -#endif - #endif - } else { - msg = "generator ignored GeneratorExit"; - } - PyErr_SetString(PyExc_RuntimeError, msg); - return NULL; - } - raised_exception = PyErr_Occurred(); - if (likely(!raised_exception || __Pyx_PyErr_GivenExceptionMatches2(raised_exception, PyExc_GeneratorExit, PyExc_StopIteration))) { - if (raised_exception) PyErr_Clear(); - Py_INCREF(Py_None); - return Py_None; - } - return NULL; -} -static PyObject *__Pyx__Coroutine_Throw(PyObject *self, PyObject *typ, PyObject *val, PyObject *tb, - PyObject *args, int close_on_genexit) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - PyObject *yf = gen->yieldfrom; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - PyObject *ret; - Py_INCREF(yf); - if (__Pyx_PyErr_GivenExceptionMatches(typ, PyExc_GeneratorExit) && close_on_genexit) { - int err = __Pyx_Coroutine_CloseIter(gen, yf); - Py_DECREF(yf); - __Pyx_Coroutine_Undelegate(gen); - if (err < 0) - return __Pyx_Coroutine_MethodReturn(self, __Pyx_Coroutine_SendEx(gen, NULL, 0)); - goto throw_here; - } - gen->is_running = 1; - if (0 - #ifdef __Pyx_Generator_USED - || __Pyx_Generator_CheckExact(yf) - #endif - #ifdef __Pyx_Coroutine_USED - || __Pyx_Coroutine_Check(yf) - #endif - ) { - ret = __Pyx__Coroutine_Throw(yf, typ, val, tb, args, close_on_genexit); - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_CoroutineAwait_CheckExact(yf)) { - ret = __Pyx__Coroutine_Throw(((__pyx_CoroutineAwaitObject*)yf)->coroutine, typ, val, tb, args, close_on_genexit); - #endif - } else { - PyObject *meth = __Pyx_PyObject_GetAttrStrNoError(yf, __pyx_n_s_throw); - if (unlikely(!meth)) { - Py_DECREF(yf); - if (unlikely(PyErr_Occurred())) { - gen->is_running = 0; - return NULL; - } - __Pyx_Coroutine_Undelegate(gen); - gen->is_running = 0; - goto throw_here; - } - if (likely(args)) { - ret = __Pyx_PyObject_Call(meth, args, NULL); - } else { - PyObject *cargs[4] = {NULL, typ, val, tb}; - ret = __Pyx_PyObject_FastCall(meth, cargs+1, 3 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); - } - Py_DECREF(meth); - } - gen->is_running = 0; - Py_DECREF(yf); - if (!ret) { - ret = __Pyx_Coroutine_FinishDelegation(gen); - } - return __Pyx_Coroutine_MethodReturn(self, ret); - } -throw_here: - __Pyx_Raise(typ, val, tb, NULL); - return __Pyx_Coroutine_MethodReturn(self, __Pyx_Coroutine_SendEx(gen, NULL, 0)); -} -static PyObject *__Pyx_Coroutine_Throw(PyObject *self, PyObject *args) { - PyObject *typ; - PyObject *val = NULL; - PyObject *tb = NULL; - if (unlikely(!PyArg_UnpackTuple(args, (char *)"throw", 1, 3, &typ, &val, &tb))) - return NULL; - return __Pyx__Coroutine_Throw(self, typ, val, tb, args, 1); -} -static CYTHON_INLINE int __Pyx_Coroutine_traverse_excstate(__Pyx_ExcInfoStruct *exc_state, visitproc visit, void *arg) { -#if PY_VERSION_HEX >= 0x030B00a4 - Py_VISIT(exc_state->exc_value); -#else - Py_VISIT(exc_state->exc_type); - Py_VISIT(exc_state->exc_value); - Py_VISIT(exc_state->exc_traceback); -#endif - return 0; -} -static int __Pyx_Coroutine_traverse(__pyx_CoroutineObject *gen, visitproc visit, void *arg) { - Py_VISIT(gen->closure); - Py_VISIT(gen->classobj); - Py_VISIT(gen->yieldfrom); - return __Pyx_Coroutine_traverse_excstate(&gen->gi_exc_state, visit, arg); -} -static int __Pyx_Coroutine_clear(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - Py_CLEAR(gen->closure); - Py_CLEAR(gen->classobj); - Py_CLEAR(gen->yieldfrom); - __Pyx_Coroutine_ExceptionClear(&gen->gi_exc_state); -#ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(self)) { - Py_CLEAR(((__pyx_PyAsyncGenObject*)gen)->ag_finalizer); - } -#endif - Py_CLEAR(gen->gi_code); - Py_CLEAR(gen->gi_frame); - Py_CLEAR(gen->gi_name); - Py_CLEAR(gen->gi_qualname); - Py_CLEAR(gen->gi_modulename); - return 0; -} -static void __Pyx_Coroutine_dealloc(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - PyObject_GC_UnTrack(gen); - if (gen->gi_weakreflist != NULL) - PyObject_ClearWeakRefs(self); - if (gen->resume_label >= 0) { - PyObject_GC_Track(self); -#if PY_VERSION_HEX >= 0x030400a1 && CYTHON_USE_TP_FINALIZE - if (unlikely(PyObject_CallFinalizerFromDealloc(self))) -#else - Py_TYPE(gen)->tp_del(self); - if (unlikely(Py_REFCNT(self) > 0)) -#endif - { - return; - } - PyObject_GC_UnTrack(self); - } -#ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(self)) { - /* We have to handle this case for asynchronous generators - right here, because this code has to be between UNTRACK - and GC_Del. */ - Py_CLEAR(((__pyx_PyAsyncGenObject*)self)->ag_finalizer); - } -#endif - __Pyx_Coroutine_clear(self); - __Pyx_PyHeapTypeObject_GC_Del(gen); -} -static void __Pyx_Coroutine_del(PyObject *self) { - PyObject *error_type, *error_value, *error_traceback; - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - __Pyx_PyThreadState_declare - if (gen->resume_label < 0) { - return; - } -#if !CYTHON_USE_TP_FINALIZE - assert(self->ob_refcnt == 0); - __Pyx_SET_REFCNT(self, 1); -#endif - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&error_type, &error_value, &error_traceback); -#ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(self)) { - __pyx_PyAsyncGenObject *agen = (__pyx_PyAsyncGenObject*)self; - PyObject *finalizer = agen->ag_finalizer; - if (finalizer && !agen->ag_closed) { - PyObject *res = __Pyx_PyObject_CallOneArg(finalizer, self); - if (unlikely(!res)) { - PyErr_WriteUnraisable(self); - } else { - Py_DECREF(res); - } - __Pyx_ErrRestore(error_type, error_value, error_traceback); - return; - } - } -#endif - if (unlikely(gen->resume_label == 0 && !error_value)) { -#ifdef __Pyx_Coroutine_USED -#ifdef __Pyx_Generator_USED - if (!__Pyx_Generator_CheckExact(self)) -#endif - { - PyObject_GC_UnTrack(self); -#if PY_MAJOR_VERSION >= 3 || defined(PyErr_WarnFormat) - if (unlikely(PyErr_WarnFormat(PyExc_RuntimeWarning, 1, "coroutine '%.50S' was never awaited", gen->gi_qualname) < 0)) - PyErr_WriteUnraisable(self); -#else - {PyObject *msg; - char *cmsg; - #if CYTHON_COMPILING_IN_PYPY - msg = NULL; - cmsg = (char*) "coroutine was never awaited"; - #else - char *cname; - PyObject *qualname; - qualname = gen->gi_qualname; - cname = PyString_AS_STRING(qualname); - msg = PyString_FromFormat("coroutine '%.50s' was never awaited", cname); - if (unlikely(!msg)) { - PyErr_Clear(); - cmsg = (char*) "coroutine was never awaited"; - } else { - cmsg = PyString_AS_STRING(msg); - } - #endif - if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, cmsg, 1) < 0)) - PyErr_WriteUnraisable(self); - Py_XDECREF(msg);} -#endif - PyObject_GC_Track(self); - } -#endif - } else { - PyObject *res = __Pyx_Coroutine_Close(self); - if (unlikely(!res)) { - if (PyErr_Occurred()) - PyErr_WriteUnraisable(self); - } else { - Py_DECREF(res); - } - } - __Pyx_ErrRestore(error_type, error_value, error_traceback); -#if !CYTHON_USE_TP_FINALIZE - assert(Py_REFCNT(self) > 0); - if (likely(--self->ob_refcnt == 0)) { - return; - } - { - Py_ssize_t refcnt = Py_REFCNT(self); - _Py_NewReference(self); - __Pyx_SET_REFCNT(self, refcnt); - } -#if CYTHON_COMPILING_IN_CPYTHON - assert(PyType_IS_GC(Py_TYPE(self)) && - _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED); - _Py_DEC_REFTOTAL; -#endif -#ifdef COUNT_ALLOCS - --Py_TYPE(self)->tp_frees; - --Py_TYPE(self)->tp_allocs; -#endif -#endif -} -static PyObject * -__Pyx_Coroutine_get_name(__pyx_CoroutineObject *self, void *context) -{ - PyObject *name = self->gi_name; - CYTHON_UNUSED_VAR(context); - if (unlikely(!name)) name = Py_None; - Py_INCREF(name); - return name; -} -static int -__Pyx_Coroutine_set_name(__pyx_CoroutineObject *self, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__name__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(self->gi_name, value); - return 0; -} -static PyObject * -__Pyx_Coroutine_get_qualname(__pyx_CoroutineObject *self, void *context) -{ - PyObject *name = self->gi_qualname; - CYTHON_UNUSED_VAR(context); - if (unlikely(!name)) name = Py_None; - Py_INCREF(name); - return name; -} -static int -__Pyx_Coroutine_set_qualname(__pyx_CoroutineObject *self, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__qualname__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(self->gi_qualname, value); - return 0; -} -static PyObject * -__Pyx_Coroutine_get_frame(__pyx_CoroutineObject *self, void *context) -{ - PyObject *frame = self->gi_frame; - CYTHON_UNUSED_VAR(context); - if (!frame) { - if (unlikely(!self->gi_code)) { - Py_RETURN_NONE; - } - frame = (PyObject *) PyFrame_New( - PyThreadState_Get(), /*PyThreadState *tstate,*/ - (PyCodeObject*) self->gi_code, /*PyCodeObject *code,*/ - __pyx_d, /*PyObject *globals,*/ - 0 /*PyObject *locals*/ - ); - if (unlikely(!frame)) - return NULL; - self->gi_frame = frame; - } - Py_INCREF(frame); - return frame; -} -static __pyx_CoroutineObject *__Pyx__Coroutine_New( - PyTypeObject* type, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name) { - __pyx_CoroutineObject *gen = PyObject_GC_New(__pyx_CoroutineObject, type); - if (unlikely(!gen)) - return NULL; - return __Pyx__Coroutine_NewInit(gen, body, code, closure, name, qualname, module_name); -} -static __pyx_CoroutineObject *__Pyx__Coroutine_NewInit( - __pyx_CoroutineObject *gen, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name) { - gen->body = body; - gen->closure = closure; - Py_XINCREF(closure); - gen->is_running = 0; - gen->resume_label = 0; - gen->classobj = NULL; - gen->yieldfrom = NULL; - #if PY_VERSION_HEX >= 0x030B00a4 - gen->gi_exc_state.exc_value = NULL; - #else - gen->gi_exc_state.exc_type = NULL; - gen->gi_exc_state.exc_value = NULL; - gen->gi_exc_state.exc_traceback = NULL; - #endif -#if CYTHON_USE_EXC_INFO_STACK - gen->gi_exc_state.previous_item = NULL; -#endif - gen->gi_weakreflist = NULL; - Py_XINCREF(qualname); - gen->gi_qualname = qualname; - Py_XINCREF(name); - gen->gi_name = name; - Py_XINCREF(module_name); - gen->gi_modulename = module_name; - Py_XINCREF(code); - gen->gi_code = code; - gen->gi_frame = NULL; - PyObject_GC_Track(gen); - return gen; -} - -/* PatchModuleWithCoroutine */ -static PyObject* __Pyx_Coroutine_patch_module(PyObject* module, const char* py_code) { -#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - int result; - PyObject *globals, *result_obj; - globals = PyDict_New(); if (unlikely(!globals)) goto ignore; - result = PyDict_SetItemString(globals, "_cython_coroutine_type", - #ifdef __Pyx_Coroutine_USED - (PyObject*)__pyx_CoroutineType); - #else - Py_None); - #endif - if (unlikely(result < 0)) goto ignore; - result = PyDict_SetItemString(globals, "_cython_generator_type", - #ifdef __Pyx_Generator_USED - (PyObject*)__pyx_GeneratorType); - #else - Py_None); - #endif - if (unlikely(result < 0)) goto ignore; - if (unlikely(PyDict_SetItemString(globals, "_module", module) < 0)) goto ignore; - if (unlikely(PyDict_SetItemString(globals, "__builtins__", __pyx_b) < 0)) goto ignore; - result_obj = PyRun_String(py_code, Py_file_input, globals, globals); - if (unlikely(!result_obj)) goto ignore; - Py_DECREF(result_obj); - Py_DECREF(globals); - return module; -ignore: - Py_XDECREF(globals); - PyErr_WriteUnraisable(module); - if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, "Cython module failed to patch module with custom type", 1) < 0)) { - Py_DECREF(module); - module = NULL; - } -#else - py_code++; -#endif - return module; -} - -/* PatchGeneratorABC */ -#ifndef CYTHON_REGISTER_ABCS -#define CYTHON_REGISTER_ABCS 1 -#endif -#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) -static PyObject* __Pyx_patch_abc_module(PyObject *module); -static PyObject* __Pyx_patch_abc_module(PyObject *module) { - module = __Pyx_Coroutine_patch_module( - module, "" -"if _cython_generator_type is not None:\n" -" try: Generator = _module.Generator\n" -" except AttributeError: pass\n" -" else: Generator.register(_cython_generator_type)\n" -"if _cython_coroutine_type is not None:\n" -" try: Coroutine = _module.Coroutine\n" -" except AttributeError: pass\n" -" else: Coroutine.register(_cython_coroutine_type)\n" - ); - return module; -} -#endif -static int __Pyx_patch_abc(void) { -#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - static int abc_patched = 0; - if (CYTHON_REGISTER_ABCS && !abc_patched) { - PyObject *module; - module = PyImport_ImportModule((PY_MAJOR_VERSION >= 3) ? "collections.abc" : "collections"); - if (unlikely(!module)) { - PyErr_WriteUnraisable(NULL); - if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, - ((PY_MAJOR_VERSION >= 3) ? - "Cython module failed to register with collections.abc module" : - "Cython module failed to register with collections module"), 1) < 0)) { - return -1; - } - } else { - module = __Pyx_patch_abc_module(module); - abc_patched = 1; - if (unlikely(!module)) - return -1; - Py_DECREF(module); - } - module = PyImport_ImportModule("backports_abc"); - if (module) { - module = __Pyx_patch_abc_module(module); - Py_XDECREF(module); - } - if (!module) { - PyErr_Clear(); - } - } -#else - if ((0)) __Pyx_Coroutine_patch_module(NULL, NULL); -#endif - return 0; -} - -/* Generator */ -static PyMethodDef __pyx_Generator_methods[] = { - {"send", (PyCFunction) __Pyx_Coroutine_Send, METH_O, - (char*) PyDoc_STR("send(arg) -> send 'arg' into generator,\nreturn next yielded value or raise StopIteration.")}, - {"throw", (PyCFunction) __Pyx_Coroutine_Throw, METH_VARARGS, - (char*) PyDoc_STR("throw(typ[,val[,tb]]) -> raise exception in generator,\nreturn next yielded value or raise StopIteration.")}, - {"close", (PyCFunction) __Pyx_Coroutine_Close_Method, METH_NOARGS, - (char*) PyDoc_STR("close() -> raise GeneratorExit inside generator.")}, - {0, 0, 0, 0} -}; -static PyMemberDef __pyx_Generator_memberlist[] = { - {(char *) "gi_running", T_BOOL, offsetof(__pyx_CoroutineObject, is_running), READONLY, NULL}, - {(char*) "gi_yieldfrom", T_OBJECT, offsetof(__pyx_CoroutineObject, yieldfrom), READONLY, - (char*) PyDoc_STR("object being iterated by 'yield from', or None")}, - {(char*) "gi_code", T_OBJECT, offsetof(__pyx_CoroutineObject, gi_code), READONLY, NULL}, - {(char *) "__module__", T_OBJECT, offsetof(__pyx_CoroutineObject, gi_modulename), 0, 0}, -#if CYTHON_USE_TYPE_SPECS - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CoroutineObject, gi_weakreflist), READONLY, 0}, -#endif - {0, 0, 0, 0, 0} -}; -static PyGetSetDef __pyx_Generator_getsets[] = { - {(char *) "__name__", (getter)__Pyx_Coroutine_get_name, (setter)__Pyx_Coroutine_set_name, - (char*) PyDoc_STR("name of the generator"), 0}, - {(char *) "__qualname__", (getter)__Pyx_Coroutine_get_qualname, (setter)__Pyx_Coroutine_set_qualname, - (char*) PyDoc_STR("qualified name of the generator"), 0}, - {(char *) "gi_frame", (getter)__Pyx_Coroutine_get_frame, NULL, - (char*) PyDoc_STR("Frame of the generator"), 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_GeneratorType_slots[] = { - {Py_tp_dealloc, (void *)__Pyx_Coroutine_dealloc}, - {Py_tp_traverse, (void *)__Pyx_Coroutine_traverse}, - {Py_tp_iter, (void *)PyObject_SelfIter}, - {Py_tp_iternext, (void *)__Pyx_Generator_Next}, - {Py_tp_methods, (void *)__pyx_Generator_methods}, - {Py_tp_members, (void *)__pyx_Generator_memberlist}, - {Py_tp_getset, (void *)__pyx_Generator_getsets}, - {Py_tp_getattro, (void *) __Pyx_PyObject_GenericGetAttrNoDict}, -#if CYTHON_USE_TP_FINALIZE - {Py_tp_finalize, (void *)__Pyx_Coroutine_del}, -#endif - {0, 0}, -}; -static PyType_Spec __pyx_GeneratorType_spec = { - __PYX_TYPE_MODULE_PREFIX "generator", - sizeof(__pyx_CoroutineObject), - 0, - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, - __pyx_GeneratorType_slots -}; -#else -static PyTypeObject __pyx_GeneratorType_type = { - PyVarObject_HEAD_INIT(0, 0) - __PYX_TYPE_MODULE_PREFIX "generator", - sizeof(__pyx_CoroutineObject), - 0, - (destructor) __Pyx_Coroutine_dealloc, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, - 0, - (traverseproc) __Pyx_Coroutine_traverse, - 0, - 0, - offsetof(__pyx_CoroutineObject, gi_weakreflist), - 0, - (iternextfunc) __Pyx_Generator_Next, - __pyx_Generator_methods, - __pyx_Generator_memberlist, - __pyx_Generator_getsets, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -#if CYTHON_USE_TP_FINALIZE - 0, -#else - __Pyx_Coroutine_del, -#endif - 0, -#if CYTHON_USE_TP_FINALIZE - __Pyx_Coroutine_del, -#elif PY_VERSION_HEX >= 0x030400a1 - 0, -#endif -#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, -#endif -#if __PYX_NEED_TP_PRINT_SLOT - 0, -#endif -#if PY_VERSION_HEX >= 0x030C0000 - 0, -#endif -#if PY_VERSION_HEX >= 0x030d00A4 - 0, -#endif -#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, -#endif -}; -#endif -static int __pyx_Generator_init(PyObject *module) { -#if CYTHON_USE_TYPE_SPECS - __pyx_GeneratorType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_GeneratorType_spec, NULL); -#else - CYTHON_UNUSED_VAR(module); - __pyx_GeneratorType_type.tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - __pyx_GeneratorType_type.tp_iter = PyObject_SelfIter; - __pyx_GeneratorType = __Pyx_FetchCommonType(&__pyx_GeneratorType_type); -#endif - if (unlikely(!__pyx_GeneratorType)) { - return -1; - } - return 0; -} - -/* CheckBinaryVersion */ -static unsigned long __Pyx_get_runtime_version(void) { -#if __PYX_LIMITED_VERSION_HEX >= 0x030B00A4 - return Py_Version & ~0xFFUL; -#else - const char* rt_version = Py_GetVersion(); - unsigned long version = 0; - unsigned long factor = 0x01000000UL; - unsigned int digit = 0; - int i = 0; - while (factor) { - while ('0' <= rt_version[i] && rt_version[i] <= '9') { - digit = digit * 10 + (unsigned int) (rt_version[i] - '0'); - ++i; - } - version += factor * digit; - if (rt_version[i] != '.') - break; - digit = 0; - factor >>= 8; - ++i; - } - return version; -#endif -} -static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer) { - const unsigned long MAJOR_MINOR = 0xFFFF0000UL; - if ((rt_version & MAJOR_MINOR) == (ct_version & MAJOR_MINOR)) - return 0; - if (likely(allow_newer && (rt_version & MAJOR_MINOR) > (ct_version & MAJOR_MINOR))) - return 1; - { - char message[200]; - PyOS_snprintf(message, sizeof(message), - "compile time Python version %d.%d " - "of module '%.100s' " - "%s " - "runtime version %d.%d", - (int) (ct_version >> 24), (int) ((ct_version >> 16) & 0xFF), - __Pyx_MODULE_NAME, - (allow_newer) ? "was newer than" : "does not match", - (int) (rt_version >> 24), (int) ((rt_version >> 16) & 0xFF) - ); - return PyErr_WarnEx(NULL, message, 1); - } -} - -/* InitStrings */ -#if PY_MAJOR_VERSION >= 3 -static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) { - if (t.is_unicode | t.is_str) { - if (t.intern) { - *str = PyUnicode_InternFromString(t.s); - } else if (t.encoding) { - *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL); - } else { - *str = PyUnicode_FromStringAndSize(t.s, t.n - 1); - } - } else { - *str = PyBytes_FromStringAndSize(t.s, t.n - 1); - } - if (!*str) - return -1; - if (PyObject_Hash(*str) == -1) - return -1; - return 0; -} -#endif -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { - while (t->p) { - #if PY_MAJOR_VERSION >= 3 - __Pyx_InitString(*t, t->p); - #else - if (t->is_unicode) { - *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); - } else if (t->intern) { - *t->p = PyString_InternFromString(t->s); - } else { - *t->p = PyString_FromStringAndSize(t->s, t->n - 1); - } - if (!*t->p) - return -1; - if (PyObject_Hash(*t->p) == -1) - return -1; - #endif - ++t; - } - return 0; -} - -#include -static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) { - size_t len = strlen(s); - if (unlikely(len > (size_t) PY_SSIZE_T_MAX)) { - PyErr_SetString(PyExc_OverflowError, "byte string is too long"); - return -1; - } - return (Py_ssize_t) len; -} -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { - Py_ssize_t len = __Pyx_ssize_strlen(c_str); - if (unlikely(len < 0)) return NULL; - return __Pyx_PyUnicode_FromStringAndSize(c_str, len); -} -static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char* c_str) { - Py_ssize_t len = __Pyx_ssize_strlen(c_str); - if (unlikely(len < 0)) return NULL; - return PyByteArray_FromStringAndSize(c_str, len); -} -static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { - Py_ssize_t ignore; - return __Pyx_PyObject_AsStringAndSize(o, &ignore); -} -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT -#if !CYTHON_PEP393_ENABLED -static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { - char* defenc_c; - PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); - if (!defenc) return NULL; - defenc_c = PyBytes_AS_STRING(defenc); -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - { - char* end = defenc_c + PyBytes_GET_SIZE(defenc); - char* c; - for (c = defenc_c; c < end; c++) { - if ((unsigned char) (*c) >= 128) { - PyUnicode_AsASCIIString(o); - return NULL; - } - } - } -#endif - *length = PyBytes_GET_SIZE(defenc); - return defenc_c; -} -#else -static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { - if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - if (likely(PyUnicode_IS_ASCII(o))) { - *length = PyUnicode_GET_LENGTH(o); - return PyUnicode_AsUTF8(o); - } else { - PyUnicode_AsASCIIString(o); - return NULL; - } -#else - return PyUnicode_AsUTF8AndSize(o, length); -#endif -} -#endif -#endif -static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT - if ( -#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - __Pyx_sys_getdefaultencoding_not_ascii && -#endif - PyUnicode_Check(o)) { - return __Pyx_PyUnicode_AsStringAndSize(o, length); - } else -#endif -#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) - if (PyByteArray_Check(o)) { - *length = PyByteArray_GET_SIZE(o); - return PyByteArray_AS_STRING(o); - } else -#endif - { - char* result; - int r = PyBytes_AsStringAndSize(o, &result, length); - if (unlikely(r < 0)) { - return NULL; - } else { - return result; - } - } -} -static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { - int is_true = x == Py_True; - if (is_true | (x == Py_False) | (x == Py_None)) return is_true; - else return PyObject_IsTrue(x); -} -static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) { - int retval; - if (unlikely(!x)) return -1; - retval = __Pyx_PyObject_IsTrue(x); - Py_DECREF(x); - return retval; -} -static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { - __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result)); -#if PY_MAJOR_VERSION >= 3 - if (PyLong_Check(result)) { - if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "__int__ returned non-int (type " __Pyx_FMT_TYPENAME "). " - "The ability to return an instance of a strict subclass of int is deprecated, " - "and may be removed in a future version of Python.", - result_type_name)) { - __Pyx_DECREF_TypeName(result_type_name); - Py_DECREF(result); - return NULL; - } - __Pyx_DECREF_TypeName(result_type_name); - return result; - } -#endif - PyErr_Format(PyExc_TypeError, - "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")", - type_name, type_name, result_type_name); - __Pyx_DECREF_TypeName(result_type_name); - Py_DECREF(result); - return NULL; -} -static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { -#if CYTHON_USE_TYPE_SLOTS - PyNumberMethods *m; -#endif - const char *name = NULL; - PyObject *res = NULL; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x) || PyLong_Check(x))) -#else - if (likely(PyLong_Check(x))) -#endif - return __Pyx_NewRef(x); -#if CYTHON_USE_TYPE_SLOTS - m = Py_TYPE(x)->tp_as_number; - #if PY_MAJOR_VERSION < 3 - if (m && m->nb_int) { - name = "int"; - res = m->nb_int(x); - } - else if (m && m->nb_long) { - name = "long"; - res = m->nb_long(x); - } - #else - if (likely(m && m->nb_int)) { - name = "int"; - res = m->nb_int(x); - } - #endif -#else - if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { - res = PyNumber_Int(x); - } -#endif - if (likely(res)) { -#if PY_MAJOR_VERSION < 3 - if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { -#else - if (unlikely(!PyLong_CheckExact(res))) { -#endif - return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); - } - } - else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, - "an integer is required"); - } - return res; -} -static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { - Py_ssize_t ival; - PyObject *x; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_CheckExact(b))) { - if (sizeof(Py_ssize_t) >= sizeof(long)) - return PyInt_AS_LONG(b); - else - return PyInt_AsSsize_t(b); - } -#endif - if (likely(PyLong_CheckExact(b))) { - #if CYTHON_USE_PYLONG_INTERNALS - if (likely(__Pyx_PyLong_IsCompact(b))) { - return __Pyx_PyLong_CompactValue(b); - } else { - const digit* digits = __Pyx_PyLong_Digits(b); - const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b); - switch (size) { - case 2: - if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { - return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -2: - if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case 3: - if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { - return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -3: - if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case 4: - if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { - return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -4: - if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - } - } - #endif - return PyLong_AsSsize_t(b); - } - x = PyNumber_Index(b); - if (!x) return -1; - ival = PyInt_AsSsize_t(x); - Py_DECREF(x); - return ival; -} -static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) { - if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) { - return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o); -#if PY_MAJOR_VERSION < 3 - } else if (likely(PyInt_CheckExact(o))) { - return PyInt_AS_LONG(o); -#endif - } else { - Py_ssize_t ival; - PyObject *x; - x = PyNumber_Index(o); - if (!x) return -1; - ival = PyInt_AsLong(x); - Py_DECREF(x); - return ival; - } -} -static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { - return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False); -} -static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { - return PyInt_FromSize_t(ival); -} - - -/* #### Code section: utility_code_pragmas_end ### */ -#ifdef _MSC_VER -#pragma warning( pop ) -#endif - - - -/* #### Code section: end ### */ -#endif /* Py_PYTHON_H */ diff --git a/jcvi/formats/cblast.pyx b/jcvi/formats/cblast.pyx deleted file mode 100644 index 15f89a47..00000000 --- a/jcvi/formats/cblast.pyx +++ /dev/null @@ -1,210 +0,0 @@ -# cython: language_level=2, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True - -""" -Cythonized (fast) version of BlastLine - -Stolen from brentp's biostuff (thanks): - -""" -import sys -from libc.stdio cimport FILE, EOF, fopen, fscanf, rewind, fclose, sscanf, \ - fgets, sprintf -from libc.string cimport strcpy - - -cdef const char *blast_format = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f" -cdef const char *blast_format_line = "%s\t%s\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\t%f\n" -cdef const char *blast_output = "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.2g\t%.3g" -cdef const char *bed_output = "%s\t%d\t%d\t%s:%d-%d\t%.2g\t%c" - - -cdef class Blast: - cdef: - FILE* fh - object filename - - def __cinit__(self, char* filename): - self.fh = fopen(filename, 'r') - self.filename = filename - - def __iter__(self): - rewind(self.fh) - return self - - def __next__(self): - cdef: - float pct = 0.0, evalue = 0.0, bit = 0.0 - char qname[128] - char sname[128] - int hlen, nmiss, ngap, qstart, qstop, sstart, sstop - char *tmp - int success - - success = fscanf(self.fh, blast_format_line, qname, sname, \ - &pct, &hlen, &nmiss, &ngap, &qstart, &qstop,\ - &sstart, &sstop, &evalue, &bit ) - if success == EOF: - raise StopIteration - return create_blast_line(qname, sname, pct, hlen, nmiss, ngap, - qstart, qstop, sstart, sstop, evalue, bit) - - def __dealloc__(self): - fclose(self.fh) - - def __repr__(self): - return "Blast('%s')" % (self.filename, ) - -# Python 2 and 3 differ in str and unicode handling -# https://github.com/PySlurm/pyslurm/wiki/Strings-and-bytes-in-Cython -cdef bytes c_str(str s): - return s.encode("UTF-8") - -cdef str py_str(bytes s): - return s.decode("UTF-8", "replace") - - -cdef class BlastLine: - """ - Given a string of tab-delimited (-m 8) blast output, parse it and create - an object with the usual attrs: - - >>> b = BlastLine("Os09g11510 Os08g13650 92.31 39 3 0 2273 2311 3237 3199 0.001 54.0") - >>> b.query - 'Os09g11510' - >>> attrs = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ - ... 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score') - >>> [getattr(b, attr) for attr in attrs] # doctest: +ELLIPSIS - ['Os09g11510', 'Os08g13650', 92.3..., 39, 3, 0, 2273, 2311, 3237, 3199, 0.001..., 54.0] - """ - - __slots__ = ('query', 'subject', 'pctid', 'hitlen', 'nmismatch', 'ngaps', \ - 'qstart', 'qstop', 'sstart', 'sstop', 'evalue', 'score', \ - 'qseqid', 'sseqid', 'qi', 'si', 'orientation') - - cdef public: - char _query[128] - char _subject[128] - int hitlen, nmismatch, ngaps, qstart, qstop, sstart, sstop - float pctid, score - double evalue - object qseqid, sseqid - int qi, si - char orientation - - property query: - def __get__(self): - return py_str(self._query) - def __set__(self, val: str): - strcpy(self._query, c_str(val)) - - property subject: - def __get__(self): - return py_str(self._subject) - def __set__(self, val: str): - strcpy(self._subject, c_str(val)) - - def __init__(self, s): - sline = c_str(s) - sscanf(sline, blast_format, self._query, self._subject, - &self.pctid, &self.hitlen, &self.nmismatch, &self.ngaps, - &self.qstart, &self.qstop, - &self.sstart, &self.sstop, - &self.evalue, &self.score) - - self.orientation = '+' - if self.qstart > self.qstop: - self.qstart, self.qstop = self.qstop, self.qstart - self.orientation = '-' - if self.sstart > self.sstop: - self.sstart, self.sstop = self.sstop, self.sstart - self.orientation = '-' - - def __richcmp__(BlastLine self, BlastLine other, size_t op): - if op == 2: # == - if self.query != other.query and self.qstart != other.qstart: - return False - return self.subject == other.subject and \ - self.qstop == other.qstop and \ - self.sstop == other.sstop and \ - self.evalue == other.evalue and \ - self.hitlen == other.hitlen - - elif op == 3: # != - return not self.__richcmp__(other, 2) - else: - raise Exception("that comparison not implemented") - - def __hash__(self): - return id(self) - - def __repr__(self): - return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % \ - (self.query, self.subject, self.evalue, self.score) - - def __str__(self): - args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - if self.orientation == '-': - args[8], args[9] = args[9], args[8] - - cdef char result[512] - sprintf(result, blast_output, self._query, self._subject, - self.pctid, self.hitlen, self.nmismatch, self.ngaps, - self.qstart, self.qstop, - self.sstart, self.sstop, - self.evalue, self.score) - - return py_str(result) - - @property - def has_score(self): - return hasattr(self, "score") - - @property - def swapped(self): - """ - Swap query and subject. - """ - args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - args[0:2] = [self.subject, self.query] - args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - if self.orientation == '-': - args[8], args[9] = args[9], args[8] - b = "\t".join(str(x) for x in args) - return BlastLine(b) - - @property - def bedline(self): - cdef char result[512] - sprintf(result, bed_output, - self._subject, self.sstart - 1, self.sstop, - self._query, self.qstart, self.qstop, - self.score, self.orientation) - - return py_str(result) - - def __reduce__(self): - return create_blast_line, ( - self.query, self.subject, self.pctid, self.hitlen, self.nmismatch, - self.ngaps, self.qstart, self.qstop, self.sstart, self.sstop, - self.evalue, self.score) - - -cdef BlastLine create_blast_line(char *query, char *subject, float pctid, int hitlen, - int nmismatch, int ngaps, int qstart, int qstop, - int sstart, int sstop, float evalue, float score): - """ Factory method. - """ - cdef BlastLine b = BlastLine.__new__(BlastLine) - b.query = query - b.subject = subject - b.pctid = pctid - b.hitlen = hitlen - b.nmismatch = nmismatch - b.ngaps = ngaps - b.qstart = qstart - b.qstop = qstop - b.sstart = sstart - b.sstop = sstop - b.evalue = evalue - b.score = score - return b diff --git a/jcvi/formats/cdt.py b/jcvi/formats/cdt.py deleted file mode 100644 index 077b34f4..00000000 --- a/jcvi/formats/cdt.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog data.cdt data.nwk - -Convert the result from Eisen's CLUSTER program: data.gtr and data.cdt into NEWICK format -""" - -import csv -import sys - -from collections import namedtuple -from itertools import groupby - -from ..apps.base import OptionParser, logger - -from .base import BaseFile - - -GTRLine = namedtuple("GTRLine", "parent left_child right_child dist") - - -class CDT(BaseFile): - def __init__(self, filename): - super().__init__(filename) - - pf = filename.rsplit(".", 1)[0] - self.gtrfile = pf + ".gtr" - self.atrfile = pf + ".atr" - self.get_names() - - def get_names(self): - cdt_file = self.filename - reader = csv.reader(open(cdt_file), delimiter="\t") - - gid = next(reader) - assert gid[0] == "GID" - aid = next(reader) - if aid[0] == "AID": - eweight = next(reader) - else: - eweight = aid - assert eweight[0] == "EWEIGHT" - - self.gnames = [x[:2] for x in reader] - self.anames = list(zip(aid, gid))[4:] - - def get_gtr_tree(self): - - from ete3 import Tree - - fp = open(self.gtrfile) - reader = csv.reader(fp, delimiter="\t") - nodes = {} - gnames = dict(self.gnames) - for g in map(GTRLine._make, reader): - node = Tree() - parent_name, parent_dist = g.parent, float(g.dist) - for child in (g.left_child, g.right_child): - if child in gnames: - node.add_child(name=gnames[child], dist=1 - parent_dist) - else: - assert child in nodes, child - child_node, child_dist = nodes[child] - node.add_child(child_node, dist=child_dist - parent_dist) - - nodes[parent_name] = (node, parent_dist) - - self.gtr_tree = node - - def print_newick(self, nwk_file): - - self.gtr_tree.write(format=5, outfile=nwk_file) - logger.debug("Newick tree written to `%s`", nwk_file) - - def iter_partitions(self, cutoff=0.3, gtr=True): - from jcvi.utils.grouper import Grouper - - if gtr: - names = self.gnames - fp = open(self.gtrfile) - else: - names = self.anames - fp = open(self.atrfile) - - reader = csv.reader(fp, delimiter="\t") - grouper = Grouper() - for g in map(GTRLine._make, reader): - d = float(g.dist) - if d < cutoff: - continue - - grouper.join(g.parent, g.left_child, g.right_child) - - parents = {} - for i, group in enumerate(grouper): - for g in group: - parents[g] = i - - partitions = [[parents.get(a, x), x] for a, x in names] - for key, parts in groupby(partitions, key=lambda x: x[0]): - yield list(x[1] for x in parts) - - -def main(args): - - cdt_file, nwk_file = args - cdt = CDT(cdt_file) - cdt.get_gtr_tree() - cdt.print_newick(nwk_file) - - -if __name__ == "__main__": - - p = OptionParser(__doc__) - opts, args = p.parse_args() - - if len(args) != 2: - sys.exit(not p.print_help()) - - main(args) diff --git a/jcvi/formats/chain.py b/jcvi/formats/chain.py deleted file mode 100644 index 4f4f0d96..00000000 --- a/jcvi/formats/chain.py +++ /dev/null @@ -1,311 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Create the UCSC chain file which is needed to lift over from one coordinate -system to another. - -File format: - - -chain 4900 chrY 58368225 + 25985403 25985638 chr5 151006098 - 43257292 43257528 1 - 9 1 0 - 10 0 5 - 48 - -Header Line: - chain score tName tSize tStrand tStart tEnd qName qSize qStrand qStart qEnd id -Alignment Data Lines - size dt dq - -NOTE: The last line of the alignment section contains only one number: the ungapped -alignment size of the last block. -""" - -import os.path as op -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh, which - -from .base import BaseFile, read_block -from .sizes import Sizes - - -class ChainLine(object): - def __init__(self, chain, lines): - self.chain = chain - self.blocks = [] - for line in lines: - atoms = line.split() - if len(atoms) == 1: - atoms += [0, 0] - if len(atoms) == 0: - continue - - self.blocks.append([int(x) for x in atoms]) - - self.ungapped, self.dt, self.dq = zip(*self.blocks) - self.ungapped = sum(self.ungapped) - self.dt = sum(self.dt) - self.dq = sum(self.dq) - - -class Chain(BaseFile): - def __init__(self, filename): - super().__init__(filename) - self.chains = list(self.iter_chain()) - - self.ungapped = sum(x.ungapped for x in self.chains) - self.dt = sum(x.dt for x in self.chains) - self.dq = sum(x.dq for x in self.chains) - - def __len__(self): - return len(self.chains) - - def iter_chain(self): - fp = open(self.filename) - for row in fp: - if row[0] != "#": - break - - for chain, lines in read_block(fp, "chain"): - lines = list(lines) - yield ChainLine(chain, lines) - - -def main(): - - actions = ( - ("blat", "generate PSL file using BLAT"), - ("frompsl", "generate chain file from PSL format"), - ("fromagp", "generate chain file from AGP format"), - ("summary", "provide stats of the chain file"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def summary(args): - """ - %prog summary old.new.chain old.fasta new.fasta - - Provide stats of the chain file. - """ - from jcvi.formats.fasta import summary as fsummary - from jcvi.utils.cbook import percentage, human_size - - p = OptionParser(summary.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - chainfile, oldfasta, newfasta = args - chain = Chain(chainfile) - ungapped, dt, dq = chain.ungapped, chain.dt, chain.dq - print( - "File `{0}` contains {1} chains.".format(chainfile, len(chain)), file=sys.stderr - ) - print( - "ungapped={0} dt={1} dq={2}".format( - human_size(ungapped), human_size(dt), human_size(dq) - ), - file=sys.stderr, - ) - - oldreal, oldnn, oldlen = fsummary([oldfasta, "--outfile=/dev/null"]) - print( - "Old fasta (`{0}`) mapped: {1}".format(oldfasta, percentage(ungapped, oldreal)), - file=sys.stderr, - ) - - newreal, newnn, newlen = fsummary([newfasta, "--outfile=/dev/null"]) - print( - "New fasta (`{0}`) mapped: {1}".format(newfasta, percentage(ungapped, newreal)), - file=sys.stderr, - ) - - -def fromagp(args): - """ - %prog fromagp agpfile componentfasta objectfasta - - Generate chain file from AGP format. The components represent the old - genome (target) and the objects represent new genome (query). - """ - from jcvi.formats.agp import AGP - - p = OptionParser(fromagp.__doc__) - p.add_argument( - "--novalidate", default=False, action="store_true", help="Do not validate AGP" - ) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - agpfile, componentfasta, objectfasta = args - chainfile = agpfile.rsplit(".", 1)[0] + ".chain" - fw = open(chainfile, "w") - agp = AGP(agpfile, validate=(not opts.novalidate)) - componentsizes = Sizes(componentfasta).mapping - objectsizes = Sizes(objectfasta).mapping - chain = "chain" - score = 1000 - tStrand = "+" - id = 0 - for a in agp: - if a.is_gap: - continue - - tName = a.component_id - tSize = componentsizes[tName] - tStart = a.component_beg - tEnd = a.component_end - tStart -= 1 - - qName = a.object - qSize = objectsizes[qName] - qStrand = "-" if a.orientation == "-" else "+" - qStart = a.object_beg - qEnd = a.object_end - if qStrand == "-": - _qStart = qSize - qEnd + 1 - _qEnd = qSize - qStart + 1 - qStart, qEnd = _qStart, _qEnd - qStart -= 1 - - id += 1 - size = a.object_span - headerline = "\t".join( - str(x) - for x in ( - chain, - score, - tName, - tSize, - tStrand, - tStart, - tEnd, - qName, - qSize, - qStrand, - qStart, - qEnd, - id, - ) - ) - alignmentline = size - print(headerline, file=fw) - print(alignmentline, file=fw) - print(file=fw) - - fw.close() - logger.debug("File written to `%s`.", chainfile) - - -def faToTwoBit(fastafile): - twobitfile = fastafile.rsplit(".", 1)[0] + ".2bit" - cmd = "faToTwoBit {0} {1}".format(fastafile, twobitfile) - if need_update(fastafile, twobitfile): - sh(cmd) - return twobitfile - - -def blat(args): - """ - %prog blat old.fasta new.fasta - - Generate psl file using blat. - """ - p = OptionParser(blat.__doc__) - p.add_argument( - "--minscore", - default=100, - type=int, - help="Matches minus mismatches gap penalty", - ) - p.add_argument( - "--minid", - default=98, - type=int, - help="Minimum sequence identity", - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - oldfasta, newfasta = args - twobitfiles = [] - for fastafile in args: - tbfile = faToTwoBit(fastafile) - twobitfiles.append(tbfile) - - oldtwobit, newtwobit = twobitfiles - cmd = "pblat -threads={0}".format(opts.cpus) if which("pblat") else "blat" - cmd += " {0} {1}".format(oldtwobit, newfasta) - cmd += " -tileSize=12 -minScore={0} -minIdentity={1} ".format( - opts.minscore, opts.minid - ) - pslfile = "{0}.{1}.psl".format( - *(op.basename(x).split(".")[0] for x in (newfasta, oldfasta)) - ) - cmd += pslfile - sh(cmd) - - -def frompsl(args): - """ - %prog frompsl old.new.psl old.fasta new.fasta - - Generate chain file from psl file. The pipeline is describe in: - - """ - p = OptionParser(frompsl.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - pslfile, oldfasta, newfasta = args - pf = oldfasta.split(".")[0] - - # Chain together alignments from using axtChain - chainfile = pf + ".chain" - twobitfiles = [] - for fastafile in (oldfasta, newfasta): - tbfile = faToTwoBit(fastafile) - twobitfiles.append(tbfile) - oldtwobit, newtwobit = twobitfiles - - if need_update(pslfile, chainfile): - cmd = "axtChain -linearGap=medium -psl {0}".format(pslfile) - cmd += " {0} {1} {2}".format(oldtwobit, newtwobit, chainfile) - sh(cmd) - - # Sort chain files - sortedchain = chainfile.rsplit(".", 1)[0] + ".sorted.chain" - if need_update(chainfile, sortedchain): - cmd = "chainSort {0} {1}".format(chainfile, sortedchain) - sh(cmd) - - # Make alignment nets from chains - netfile = pf + ".net" - oldsizes = Sizes(oldfasta).filename - newsizes = Sizes(newfasta).filename - if need_update((sortedchain, oldsizes, newsizes), netfile): - cmd = "chainNet {0} {1} {2}".format(sortedchain, oldsizes, newsizes) - cmd += " {0} /dev/null".format(netfile) - sh(cmd) - - # Create liftOver chain file - liftoverfile = pf + ".liftover.chain" - if need_update((netfile, sortedchain), liftoverfile): - cmd = "netChainSubset {0} {1} {2}".format(netfile, sortedchain, liftoverfile) - sh(cmd) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/contig.py b/jcvi/formats/contig.py deleted file mode 100644 index 48882e0d..00000000 --- a/jcvi/formats/contig.py +++ /dev/null @@ -1,182 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -TIGR contig format, see spec: - - -""" - -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger - -from .base import BaseFile, read_block - - -class ReadLine(object): - def __init__(self, row, contig): - # '#16(0) [RC] 3046 bases, 00000000 checksum. {3046 1} <1 3046>' - assert row[0] == "#" - self.id = row.strip("#").split("(")[0] - coords = row.split("<")[1].split(">")[0] - start, end = coords.split() - self.contig = contig - self.start = int(start) - self.end = int(end) - if self.start > self.end: - self.start, self.end = self.end, self.start - - self.orientation = "-" if "[RC]" in row else "+" - - def __str__(self): - return self.id - - @property - def bedline(self): - return "\t".join( - str(x) - for x in ( - self.contig, - self.start - 1, - self.end, - self.id, - "0", - self.orientation, - ) - ) - - __repr__ = __str__ - - -class ContigLine(object): - def __init__(self, row): - # '##1 6 8914 bases, 00000000 checksum.' - assert row[:2] == "##" - self.id = row.strip("#").split()[0] - self.reads = [] - - def __str__(self): - return ":".join((self.id, str(self.reads))) - - __repr__ = __str__ - - -class ContigFile(BaseFile): - def __init__(self, filename): - super().__init__(filename) - self.fp = open(filename) - - def iter_records(self): - c = None - for a, b in read_block(self.fp, "#"): - if a[:2] == "##": - if c: - yield c - c = ContigLine(a) - else: - c.reads.append(ReadLine(a, c.id)) - if c: # last one - yield c - - -def main(): - - actions = ( - ("bed", "convert read membership to bed format"), - ("frombed", "convert read placement to contig format"), - ) - - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def frombed(args): - """ - %prog frombed bedfile contigfasta readfasta - - Convert read placement to contig format. This is useful before running BAMBUS. - """ - from jcvi.formats.fasta import Fasta - from jcvi.formats.bed import Bed - from jcvi.utils.cbook import fill - - p = OptionParser(frombed.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - bedfile, contigfasta, readfasta = args - prefix = bedfile.rsplit(".", 1)[0] - contigfile = prefix + ".contig" - idsfile = prefix + ".ids" - - contigfasta = Fasta(contigfasta) - readfasta = Fasta(readfasta) - - bed = Bed(bedfile) - checksum = "00000000 checksum." - fw_ids = open(idsfile, "w") - fw = open(contigfile, "w") - - for ctg, reads in bed.sub_beds(): - ctgseq = contigfasta[ctg] - ctgline = "##{0} {1} {2} bases, {3}".format( - ctg, len(reads), len(ctgseq), checksum - ) - - print(ctg, file=fw_ids) - print(ctgline, file=fw) - print(fill(ctgseq.seq), file=fw) - - for b in reads: - read = b.accn - strand = b.strand - readseq = readfasta[read] - rc = " [RC]" if strand == "-" else "" - readlen = len(readseq) - rstart, rend = 1, readlen - if strand == "-": - rstart, rend = rend, rstart - - readrange = "{{{0} {1}}}".format(rstart, rend) - conrange = "<{0} {1}>".format(b.start, b.end) - readline = "#{0}(0){1} {2} bases, {3} {4} {5}".format( - read, rc, readlen, checksum, readrange, conrange - ) - print(readline, file=fw) - print(fill(readseq.seq), file=fw) - - logger.debug("Mapped contigs written to `{0}`.".format(contigfile)) - logger.debug("Contig IDs written to `{0}`.".format(idsfile)) - - -def bed(args): - """ - %prog bed contigfile - - Prints out the contigs and their associated reads. - """ - p = OptionParser(main.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (contigfile,) = args - bedfile = contigfile.rsplit(".", 1)[0] + ".bed" - fw = open(bedfile, "w") - c = ContigFile(contigfile) - - for rec in c.iter_records(): - for r in rec.reads: - print(r.bedline, file=fw) - - logger.debug("File written to `%s`.", bedfile) - - return bedfile - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/coords.py b/jcvi/formats/coords.py deleted file mode 100644 index 03596620..00000000 --- a/jcvi/formats/coords.py +++ /dev/null @@ -1,612 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -parses JCVI software NUCMER (http://mummer.sourceforge.net/manual/) -output - mostly as *.coords file. -""" -import os.path as op -import sys - -from itertools import groupby -from math import exp - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - get_abs_path, - logger, - need_update, - sh, -) -from ..assembly.base import calculate_A50 - -from .base import LineFile, must_open -from .blast import AlignStats - - -Overlap_types = ("none", "a ~ b", "b ~ a", "a in b", "b in a") - - -class CoordsLine(object): - """ - The coords line looks like (in one line): - 2953 4450 | 525 2023 | 1498 1499 | 98.07 | - 8046 2023 | 18.62 74.10 | AC182814.30 contig_100476 - - the coords file needs to be generated by `show-coords -rcl` - """ - - def __init__(self, row): - - row = row.replace(" | ", "") - atoms = row.split() - assert len(atoms) in (13, 17), "expecting 13 or 17 columns" - - self.start1 = int(atoms[0]) - self.end1 = int(atoms[1]) - - self.start2 = int(atoms[2]) - self.end2 = int(atoms[3]) - - if self.start2 > self.end2: - self.start2, self.end2 = self.end2, self.start2 - self.orientation = "-" - else: - self.orientation = "+" - - self.len1 = int(atoms[4]) - self.len2 = int(atoms[5]) - - self.identity = float(atoms[6]) - - self.reflen = int(atoms[7]) - self.querylen = int(atoms[8]) - - self.refcov = float(atoms[9]) / 100.0 - self.querycov = float(atoms[10]) / 100.0 - - self.ref = atoms[11] - self.query = atoms[12] - - # this is taken from CoGeBlast: - # the coverage of the hit muliplied by percent seq identity - # range from 0-100 - self.quality = self.identity * self.querycov - self.score = int(self.identity * self.len1 / 100) - - def __str__(self): - slots = "ref start1 end1 reflen " + "query start2 end2 querylen orientation" - return "\t".join( - str(x) for x in [getattr(self, attr) for attr in slots.split()] - ) - - def bedline(self, pctid=False): - score = self.identity if pctid else self.score - return "\t".join( - str(x) - for x in ( - self.ref, - self.start1 - 1, - self.end1, - self.query, - score, - self.orientation, - ) - ) - - def qbedline(self, pctid=False): - score = self.identity if pctid else self.score - return "\t".join( - str(x) - for x in ( - self.query, - self.start2 - 1, - self.end2, - self.ref, - score, - self.orientation, - ) - ) - - @property - def blastline(self): - hitlen = max(self.len1, self.len2) - score = self.score - mismatch = int(self.len1 * (1 - self.identity / 100)) - log_prob = -score * 0.693147181 - evalue = 3.0e9 * exp(log_prob) - evalue = "{0:.1g}".format(evalue) - return "\t".join( - str(x) - for x in ( - self.query, - self.ref, - self.identity, - hitlen, - mismatch, - 0, - self.start2, - self.end2, - self.start1, - self.end1, - evalue, - score, - ) - ) - - def overlap(self, max_hang=100): - r""" - Determine the type of overlap given query, ref alignment coordinates - Consider the following alignment between sequence a and b: - - aLhang \ / aRhang - \------------/ - /------------\ - bLhang / \ bRhang - - Terminal overlap: a before b, b before a - Contain overlap: a in b, b in a - """ - aL, aR = 1, self.reflen - bL, bR = 1, self.querylen - aLhang, aRhang = self.start1 - aL, aR - self.end1 - bLhang, bRhang = self.start2 - bL, bR - self.end2 - if self.orientation == "-": - bLhang, bRhang = bRhang, bLhang - - s1 = aLhang + bRhang - s2 = aRhang + bLhang - s3 = aLhang + aRhang - s4 = bLhang + bRhang - - # Dovetail (terminal) overlap - if s1 < max_hang: - type = 2 # b ~ a - elif s2 < max_hang: - type = 1 # a ~ b - # Containment overlap - elif s3 < max_hang: - type = 3 # a in b - elif s4 < max_hang: - type = 4 # b in a - else: - type = 0 - - return type - - -class Coords(LineFile): - """ - when parsing the .coords file, first skip first 5 lines - [S1] [E1] | [S2] [E2] | [LEN 1] [LEN 2] | [% IDY] | [TAGS] - - then each row would be composed as this - """ - - def __init__(self, filename, sorted=False, header=False): - - if filename.endswith(".delta"): - coordsfile = filename.rsplit(".", 1)[0] + ".coords" - if need_update(filename, coordsfile): - fromdelta([filename]) - filename = coordsfile - - super().__init__(filename) - - fp = open(filename) - if header: - self.cmd = next(fp) - - for row in fp: - try: - self.append(CoordsLine(row)) - except AssertionError: - pass - - if sorted: - self.ref_sort() - - def ref_sort(self): - # sort by reference positions - self.sort(key=lambda x: (x.ref, x.start1)) - - def quality_sort(self): - # sort descending with score = identity * coverage - self.sort(key=lambda x: (x.query, -x.quality)) - - @property - def hits(self): - """ - returns a dict with query => blastline - """ - self.quality_sort() - - hits = dict( - (query, list(blines)) - for (query, blines) in groupby(self, lambda x: x.query) - ) - - self.ref_sort() - - return hits - - @property - def best_hits(self): - """ - returns a dict with query => best mapped position - """ - self.quality_sort() - - best_hits = dict( - (query, next(blines)) - for (query, blines) in groupby(self, lambda x: x.query) - ) - - self.ref_sort() - - return best_hits - - -def get_stats(coordsfile): - - from jcvi.utils.range import range_union - - logger.debug("Report stats on `%s`", coordsfile) - coords = Coords(coordsfile) - ref_ivs = [] - qry_ivs = [] - identicals = 0 - alignlen = 0 - alignlens = [] - - for c in coords: - - qstart, qstop = c.start2, c.end2 - if qstart > qstop: - qstart, qstop = qstop, qstart - qry_ivs.append((c.query, qstart, qstop)) - - sstart, sstop = c.start1, c.end1 - if sstart > sstop: - sstart, sstop = sstop, sstart - ref_ivs.append((c.ref, sstart, sstop)) - - alen = sstop - sstart - alignlen += alen - identicals += c.identity / 100.0 * alen - alignlens.append(alen) - - qrycovered = range_union(qry_ivs) - refcovered = range_union(ref_ivs) - _, AL50, _ = calculate_A50(alignlens) - filename = op.basename(coordsfile) - alignstats = AlignStats( - filename, qrycovered, refcovered, None, None, identicals, AL50 - ) - - return alignstats - - -def main(): - - actions = ( - ("annotate", "annotate overlap types in coordsfile"), - ("blast", "convert to blast tabular output"), - ("filter", "filter based on id% and cov%, write a new coords file"), - ("fromdelta", "convert deltafile to coordsfile"), - ("merge", "merge deltafiles"), - ("sort", "sort coords file based on query or subject"), - ("summary", "provide summary on id% and cov%"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def merge(args): - """ - %prog merge ref.fasta query.fasta *.delta - - Merge delta files into a single delta. - """ - p = OptionParser(merge.__doc__) - p.set_outfile(outfile="merged_results.delta") - opts, args = p.parse_args(args) - - if len(args) < 3: - sys.exit(not p.print_help()) - - ref, query = args[:2] - deltafiles = args[2:] - outfile = opts.outfile - - ref = get_abs_path(ref) - query = get_abs_path(query) - fw = must_open(outfile, "w") - print(" ".join((ref, query)), file=fw) - print("NUCMER", file=fw) - fw.close() - - for d in deltafiles: - cmd = "awk 'NR > 2 {{print $0}}' {0}".format(d) - sh(cmd, outfile=outfile, append=True) - - -def blast(args): - """ - %prog blast - - Covert delta or coordsfile to BLAST tabular output. - """ - p = OptionParser(blast.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (deltafile,) = args - blastfile = deltafile.rsplit(".", 1)[0] + ".blast" - - if need_update(deltafile, blastfile): - coords = Coords(deltafile) - fw = open(blastfile, "w") - for c in coords: - print(c.blastline, file=fw) - - -def fromdelta(args): - """ - %prog fromdelta deltafile - - Convert deltafile to coordsfile. - """ - p = OptionParser(fromdelta.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (deltafile,) = args - coordsfile = deltafile.rsplit(".", 1)[0] + ".coords" - cmd = "show-coords -rclH {0}".format(deltafile) - sh(cmd, outfile=coordsfile) - - return coordsfile - - -def sort(args): - """ - %prog sort coordsfile - - Sort coordsfile based on query or ref. - """ - import jcvi.formats.blast - - return jcvi.formats.blast.sort(args + ["--coords"]) - - -def coverage(args): - """ - %prog coverage coordsfile - - Report the coverage per query record, useful to see which query matches - reference. The coords file MUST be filtered with supermap:: - - jcvi.algorithms.supermap --filter query - """ - p = OptionParser(coverage.__doc__) - p.add_argument( - "-c", - dest="cutoff", - default=0.5, - type=float, - help="only report query with coverage greater than", - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (coordsfile,) = args - fp = open(coordsfile) - - coords = [] - for row in fp: - try: - c = CoordsLine(row) - except AssertionError: - continue - coords.append(c) - - coords.sort(key=lambda x: x.query) - - coverages = [] - for query, lines in groupby(coords, key=lambda x: x.query): - cumulative_cutoff = sum(x.querycov for x in lines) - coverages.append((query, cumulative_cutoff)) - - coverages.sort(key=lambda x: (-x[1], x[0])) - for query, cumulative_cutoff in coverages: - if cumulative_cutoff < opts.cutoff: - break - print("{0}\t{1:.2f}".format(query, cumulative_cutoff)) - - -def annotate(args): - """ - %prog annotate coordsfile - - Annotate coordsfile to append an additional column, with the following - overlaps: {0}. - """ - p = OptionParser(annotate.__doc__.format(", ".join(Overlap_types))) - p.add_argument( - "--maxhang", - default=100, - type=int, - help="Max hang to call dovetail overlap", - ) - p.add_argument( - "--all", - default=False, - action="store_true", - help="Output all lines [default: terminal/containment]", - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (coordsfile,) = args - fp = open(coordsfile) - - for row in fp: - try: - c = CoordsLine(row) - except AssertionError: - continue - - ov = c.overlap(opts.maxhang) - if not opts.all and ov == 0: - continue - - print("{0}\t{1}".format(row.strip(), Overlap_types[ov])) - - -def summary(args): - """ - %prog summary coordsfile - - provide summary on id% and cov%, for both query and reference - """ - - p = OptionParser(summary.__doc__) - p.add_argument( - "-s", - dest="single", - default=False, - action="store_true", - help="provide stats per reference seq", - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (coordsfile,) = args - alignstats = get_stats(coordsfile) - alignstats.print_stats() - - -def filter(args): - """ - %prog filter - - Produce a new delta/coords file and filter based on id% or cov%. - Use `delta-filter` for .delta file. - """ - p = OptionParser(filter.__doc__) - p.set_align(pctid=0, hitlen=0) - p.add_argument( - "--overlap", - default=False, - action="store_true", - help="Print overlap status (e.g. terminal, contained)", - ) - - opts, args = p.parse_args(args) - if len(args) != 1: - sys.exit(not p.print_help()) - - pctid = opts.pctid - hitlen = opts.hitlen - - (filename,) = args - if pctid == 0 and hitlen == 0: - return filename - - pf, suffix = filename.rsplit(".", 1) - outfile = "".join((pf, ".P{0}L{1}.".format(int(pctid), int(hitlen)), suffix)) - if not need_update(filename, outfile): - return outfile - - if suffix == "delta": - cmd = "delta-filter -i {0} -l {1} {2}".format(pctid, hitlen, filename) - sh(cmd, outfile=outfile) - return outfile - - fp = open(filename) - fw = must_open(outfile, "w") - for row in fp: - try: - c = CoordsLine(row) - except AssertionError: - continue - - if c.identity < pctid: - continue - if c.len2 < hitlen: - continue - if opts.overlap and not c.overlap: - continue - - outrow = row.rstrip() - if opts.overlap: - ov = Overlap_types[c.overlap] - outrow += "\t" + ov - print(outrow, file=fw) - - return outfile - - -def bed(args): - """ - %prog bed coordsfile - - will produce a bed list of mapped position and orientation (needs to - be beyond quality cutoff, say 50) in bed format - """ - p = OptionParser(bed.__doc__) - p.add_argument( - "--query", - default=False, - action="store_true", - help="print out query intervals rather than ref", - ) - p.add_argument( - "--pctid", - default=False, - action="store_true", - help="use pctid in score", - ) - p.add_argument( - "--cutoff", - dest="cutoff", - default=0, - type=float, - help="get all the alignments with quality above threshold", - ) - - opts, args = p.parse_args(args) - if len(args) != 1: - sys.exit(p.print_help()) - - (coordsfile,) = args - query = opts.query - pctid = opts.pctid - quality_cutoff = opts.cutoff - - coords = Coords(coordsfile) - - for c in coords: - if c.quality < quality_cutoff: - continue - line = c.qbedline(pctid=pctid) if query else c.bedline(pctid=pctid) - print(line) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/excel.py b/jcvi/formats/excel.py deleted file mode 100644 index 64dfff7f..00000000 --- a/jcvi/formats/excel.py +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Read and write EXCEL file. - -http://www.simplistix.co.uk/presentations/python-excel.pdf - -Library dependency: xlutils -""" -import os.path as op -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger - - -class ColorMatcher(object): - def __init__(self): - self.reset() - - def reset(self): - self.unused_colors = set(self.xlwt_colors) - # Never use black. - self.unused_colors.discard((0, 0, 0)) - - # Culled from a table at http://www.mvps.org/dmcritchie/excel/colors.htm - xlwt_colors = [ - (0, 0, 0), - (255, 255, 255), - (255, 0, 0), - (0, 255, 0), - (0, 0, 255), - (255, 255, 0), - (255, 0, 255), - (0, 255, 255), - (0, 0, 0), - (255, 255, 255), - (255, 0, 0), - (0, 255, 0), - (0, 0, 255), - (255, 255, 0), - (255, 0, 255), - (0, 255, 255), - (128, 0, 0), - (0, 128, 0), - (0, 0, 128), - (128, 128, 0), - (128, 0, 128), - (0, 128, 128), - (192, 192, 192), - (128, 128, 128), - (153, 153, 255), - (153, 51, 102), - (255, 255, 204), - (204, 255, 255), - (102, 0, 102), - (255, 128, 128), - (0, 102, 204), - (204, 204, 255), - (0, 0, 128), - (255, 0, 255), - (255, 255, 0), - (0, 255, 255), - (128, 0, 128), - (128, 0, 0), - (0, 128, 128), - (0, 0, 255), - (0, 204, 255), - (204, 255, 255), - (204, 255, 204), - (255, 255, 153), - (153, 204, 255), - (255, 153, 204), - (204, 153, 255), - (255, 204, 153), - (51, 102, 255), - (51, 204, 204), - (153, 204, 0), - (255, 204, 0), - (255, 153, 0), - (255, 102, 0), - (102, 102, 153), - (150, 150, 150), - (0, 51, 102), - (51, 153, 102), - (0, 51, 0), - (51, 51, 0), - (153, 51, 0), - (153, 51, 102), - (51, 51, 153), - (51, 51, 51), - ] - - @staticmethod - def color_distance(rgb1, rgb2): - # Adapted from Colour metric by Thiadmer Riemersma, - # http://www.compuphase.com/cmetric.htm - rmean = (rgb1[0] + rgb2[0]) / 2 - r = rgb1[0] - rgb2[0] - g = rgb1[1] - rgb2[1] - b = rgb1[2] - rgb2[2] - return ( - (((512 + rmean) * r * r) / 256) - + 4 * g * g - + (((767 - rmean) * b * b) / 256) - ) - - def match_color_index(self, color): - """Takes an "R,G,B" string or wx.Color and returns a matching xlwt - color. - """ - from jcvi.utils.webcolors import color_diff - - if isinstance(color, int): - return color - if color: - if isinstance(color, str): - rgb = map(int, color.split(",")) - else: - rgb = color.Get() - logger.disable(logger.DEBUG) - distances = [color_diff(rgb, x) for x in self.xlwt_colors] - logger.disable(logger.NOTSET) - result = distances.index(min(distances)) - self.unused_colors.discard(self.xlwt_colors[result]) - return result - - def get_unused_color(self): - """Returns an xlwt color index that has not been previously returned by - this instance. Attempts to maximize the distance between the color and - all previously used colors. - """ - if not self.unused_colors: - # If we somehow run out of colors, reset the color matcher. - self.reset() - used_colors = [c for c in self.xlwt_colors if c not in self.unused_colors] - result_color = max( - self.unused_colors, - key=lambda c: min(self.color_distance(c, c2) for c2 in used_colors), - ) - result_index = self.xlwt_colors.index(result_color) - self.unused_colors.discard(result_color) - return result_index - - -def main(): - - actions = ( - ("csv", "Convert EXCEL to csv file"), - ("fromcsv", "Convert csv file to EXCEL"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def fromcsv(args): - """ - %prog fromcsv csvfile - - Convert csv file to EXCEL. - """ - from csv import reader - from xlwt import Workbook, easyxf - from jcvi.formats.base import flexible_cast - - p = OptionParser(fromcsv.__doc__) - p.add_argument( - "--noheader", - default=False, - action="store_true", - help="Do not treat the first row as header", - ) - p.add_argument("--rgb", default=-1, type=int, help="Show RGB color box") - p.set_sep() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (csvfile,) = args - header = not opts.noheader - rgb = opts.rgb - excelfile = csvfile.rsplit(".", 1)[0] + ".xls" - - data = [] - for row in reader(open(csvfile), delimiter=opts.sep): - data.append(row) - - w = Workbook() - s = w.add_sheet(op.basename(csvfile)) - - header_style = easyxf("font: bold on") - if header: - s.panes_frozen = True - s.horz_split_pos = 1 - - cm = ColorMatcher() - for i, row in enumerate(data): - for j, cell in enumerate(row): - cell = flexible_cast(cell) - if header and i == 0: - s.write(i, j, cell, header_style) - else: - if j == rgb: - cix = cm.match_color_index(cell) - color_style = easyxf("font: color_index {0}".format(cix)) - s.write(i, j, cell, color_style) - else: - s.write(i, j, cell) - - w.save(excelfile) - logger.debug("File written to `%s`.", excelfile) - return excelfile - - -def csv(args): - """ - %prog csv excelfile - - Convert EXCEL to csv file. - """ - from xlrd import open_workbook - - p = OptionParser(csv.__doc__) - p.set_sep(sep=",") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (excelfile,) = args - sep = opts.sep - csvfile = excelfile.rsplit(".", 1)[0] + ".csv" - wb = open_workbook(excelfile) - fw = open(csvfile, "w") - for s in wb.sheets(): - print("Sheet:", s.name, file=sys.stderr) - for row in range(s.nrows): - values = [] - for col in range(s.ncols): - values.append(s.cell(row, col).value) - print(sep.join(str(x) for x in values), file=fw) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/fasta.py b/jcvi/formats/fasta.py deleted file mode 100644 index 5ca4a516..00000000 --- a/jcvi/formats/fasta.py +++ /dev/null @@ -1,2642 +0,0 @@ -""" -Wrapper for biopython Fasta, add option to parse sequence headers -""" - -import hashlib -import os.path as op -import re -import shutil -import string -import sys - -from itertools import groupby, zip_longest -from random import choice - -from Bio import SeqIO -from Bio.Seq import Seq -from Bio.SeqRecord import SeqRecord -from Bio.SeqUtils.CheckSum import seguid -from more_itertools import grouper, pairwise - -from ..apps.base import ActionDispatcher, OptionParser, cleanup, logger, need_update -from ..utils.cbook import percentage -from ..utils.console import printf -from ..utils.table import write_csv - -from .base import BaseFile, DictFile, must_open -from .bed import Bed - - -class Fasta(BaseFile, dict): - def __init__(self, filename, index=False, key_function=None, lazy=False): - super().__init__(filename) - self.key_function = key_function - - if lazy: # do not incur the overhead - return - - if index: - self.index = SeqIO.index(filename, "fasta", key_function=key_function) - else: - # SeqIO.to_dict expects a different key_function that operates on - # the SeqRecord instead of the raw string - _key_function = ( - (lambda rec: key_function(rec.description)) if key_function else None - ) - self.index = SeqIO.to_dict( - SeqIO.parse(must_open(filename), "fasta"), key_function=_key_function - ) - - def _key_function(self, key): - return self.key_function(key) if self.key_function else key - - def __len__(self): - return len(self.index) - - def __contains__(self, key): - key = self._key_function(key) - return key in self.index - - def __getitem__(self, key): - key = self._key_function(key) - rec = self.index[key] - return rec - - def keys(self): - return self.index.keys() - - def iterkeys(self): - for k in self.index.keys(): - yield k - - def iteritems(self): - for k in self.iterkeys(): - yield k, self[k] - - def itersizes(self): - for k in self.iterkeys(): - yield k, len(self[k]) - - def iteritems_ordered(self): - for rec in SeqIO.parse(must_open(self.filename), "fasta"): - yield rec.name, rec - - def iterdescriptions_ordered(self): - for k, rec in self.iteritems_ordered(): - yield rec.description, rec - - def iterkeys_ordered(self): - for k, rec in self.iteritems_ordered(): - yield k - - def itersizes_ordered(self): - for k, rec in self.iteritems_ordered(): - yield k, len(rec) - - def tostring(self): - d = {} - for k, rec in self.iteritems(): - d[k] = str(rec.seq) - return d - - @property - def totalsize(self): - return sum(size for k, size in self.itersizes()) - - @classmethod - def subseq(cls, fasta, start=None, stop=None, strand=None): - """ - Take Bio.SeqRecord and slice "start:stop" from it, does proper - index and error handling - """ - start = start - 1 if start is not None else 0 - stop = stop if stop is not None else len(fasta) - - if start < 0: - msg = "start ({0}) must > 0 of `{1}`. Reset to 1".format( - start + 1, fasta.id - ) - logger.error(msg) - start = 0 - - if stop > len(fasta): - msg = "stop ({0}) must be <= length of `{1}` ({2}). Reset to {2}.".format( - stop, fasta.id, len(fasta) - ) - logger.error(msg) - stop = len(fasta) - - seq = fasta.seq[start:stop] - - if strand in (-1, "-1", "-"): - seq = seq.reverse_complement() - - return seq - - def sequence(self, f, asstring=True): - """ - Emulate brentp's pyfasta/fasta.py sequence() methods - - take a feature and use the start/stop or exon_keys to return - the sequence from the assocatied fasta file: - - f: a feature - asstring: if true, return the sequence as a string - : if false, return as a biopython Seq - - >>> f = Fasta('tests/data/three_chrs.fasta') - >>> f.sequence({'start':1, 'stop':2, 'strand':1, 'chr': 'chr1'}) - 'AC' - >>> f.sequence({'start':1, 'stop':2, 'strand': -1, 'chr': 'chr1'}) - 'GT' - """ - - assert "chr" in f, "`chr` field required" - name = f["chr"] - - assert name in self, "feature: %s not in `%s`" % (f, self.filename) - - fasta = self[f["chr"]] - - seq = Fasta.subseq(fasta, f.get("start"), f.get("stop"), f.get("strand")) - - if asstring: - return str(seq) - - return seq - - -class ORFFinder(object): - """ - Class derived from https://gist.github.com/933737 - Original code written by David Winter (https://github.com/dwinter) - - Code writted to answer this challenge at Biostar: - http://biostar.stackexchange.com/questions/5902/ - - (Code includes improvements from Brad Chapman) - - Find the longest ORF in a given sequence - "seq" is a string, if "start" is not provided any codon can be the start of - and ORF. If muliple ORFs have the longest length the first one encountered - is printed - """ - - def __init__(self, seq, start=[], stop=["TAG", "TAA", "TGA"]): - self.seq = str(seq).upper() - self.start = start - self.stop = stop - # strand, frame, start, end, length; coordinates are 1-based - self.result = ["+", 0, 0, 0, 0] - self.longest = 0 - self.size = len(seq) - - def __str__(self): - # Format similar to getorf - strand, frame, start, end, length = self.result - start += 1 # 1-based coordinates - if strand == "-": - start, end = end, start - return "[{0} - {1}]".format(start, end) - - @property - def info(self): - strand, frame, start, end, length = self.result - return "\t".join(str(x) for x in (strand, frame, start, end)) - - def codons(self, frame): - """A generator that yields DNA in one codon blocks - "frame" counts for 0. This function yields a tuple (triplet, index) with - index relative to the original DNA sequence - """ - start = frame - while start + 3 <= self.size: - yield self.sequence[start : start + 3], start - start += 3 - - def scan_sequence(self, frame, direction): - """Search in one reading frame""" - orf_start = None - for c, index in self.codons(frame): - if ( - c not in self.stop - and (c in self.start or not self.start) - and orf_start is None - ): - orf_start = index - elif c in self.stop and orf_start is not None: - self._update_longest(orf_start, index + 3, direction, frame) - orf_start = None - - if orf_start is not None: - self._update_longest(orf_start, index + 3, direction, frame) - - def _update_longest(self, orf_start, index, direction, frame): - orf_end = index - L = orf_end - orf_start - if L > self.longest: - self.longest = L - self.result = [direction, frame, orf_start, orf_end, L] - - def get_longest_orf(self): - dirs = ("+", "-") - for direction in dirs: - self.sequence = self.seq - if direction == "-": - self.sequence = rc(self.sequence) - for frame in range(3): - self.scan_sequence(frame, direction) - - strand, frame, start, end, length = self.result - size = self.size - if strand == "-": - start, end = size - end, size - start - self.result[2:4] = start, end - - assert start <= end, self.result - if start == end: - return "N" - - orf = self.seq[start:end] - if strand == "-": - orf = rc(orf) - - assert len(orf) % 3 == 0 - - return orf - - -class SequenceInfo(object): - """ - Emulate output from `sequence_info`: - - File SUBAC32.contigs.fasta - - Number of sequences 80 - - Residue counts: - Number of A's 66266 31.36 % - Number of C's 40032 18.95 % - Number of G's 39145 18.53 % - Number of T's 65799 31.14 % - Number of N's 58 0.03 % - Total 211300 - - Sequence lengths: - Minimum 242 - Maximum 8398 - Average 2641.25 - N50 4791 - """ - - def __init__(self, filename, gapstats=False): - from jcvi.utils.cbook import SummaryStats - from jcvi.assembly.base import calculate_A50 - - f = Fasta(filename) - self.filename = filename - self.header = "File|#_seqs|#_reals|#_Ns|Total|Min|Max|N50".split("|") - if gapstats: - self.header += ["Gaps"] - self.nseqs = len(f) - sizes = [] - gaps = [] - na = nc = ng = nt = 0 - for k, s in f.iteritems(): - s = str(s.seq).upper() - sizes.append(len(s)) - na += s.count("A") - nc += s.count("C") - ng += s.count("G") - nt += s.count("T") - if gapstats: - gaps += list(self.iter_gap_len(s)) - self.real = real = na + nc + ng + nt - s = SummaryStats(sizes) - self.sum = s.sum - if gapstats: - self.gaps = len(gaps) - self.nn = self.sum - real - a50, l50, nn50 = calculate_A50(sizes) - self.min = s.min - self.max = s.max - self.mean = int(s.mean) - self.n50 = l50 - self.data = [ - self.filename, - self.nseqs, - self.real, - self.nn, - self.sum, - self.min, - self.max, - self.n50, - ] - if gapstats: - self.data += [self.gaps] - assert len(self.header) == len(self.data) - - def iter_gap_len(self, seq, mingap=10): - for gap, seq in groupby(seq, lambda x: x == "N"): - if not gap: - continue - gap_len = len(list(seq)) - if gap_len >= mingap: - yield len(list(seq)) - - -def rc(s): - _complement = str.maketrans("ATCGatcgNnXx", "TAGCtagcNnXx") - cs = s.translate(_complement) - return cs[::-1] - - -def main(): - - actions = ( - ("clean", "remove irregular chars in FASTA seqs"), - ("diff", "check if two fasta records contain same information"), - ( - "extract", - "given fasta file and seq id, retrieve the sequence in fasta format", - ), - ("fastq", "combine fasta and qual to create fastq file"), - ( - "format", - "trim accession id to the first space or switch id based on 2-column mapping file", - ), - ("filter", "filter the records by size"), - ("fromtab", "convert 2-column sequence file to FASTA format"), - ("gaps", "print out a list of gap sizes within sequences"), - ("gc", "plot G+C content distribution"), - ("identical", "given 2 fasta files, find all exactly identical records"), - ("ids", "generate a list of headers"), - ("info", "run `sequence_info` on fasta files"), - ("ispcr", "reformat paired primers into isPcr query format"), - ("join", "concatenate a list of seqs and add gaps in between"), - ("longestorf", "find longest orf for CDS fasta"), - ("pair", "sort paired reads to .pairs, rest to .fragments"), - ( - "pairinplace", - "starting from fragment.fasta, find if adjacent records can form pairs", - ), - ("pool", "pool a bunch of fastafiles together and add prefix"), - ("qual", "generate dummy .qual file based on FASTA file"), - ("random", "randomly take some records"), - ("sequin", "generate a gapped fasta file for sequin submission"), - ("simulate", "simulate random fasta file for testing"), - ( - "some", - "include or exclude a list of records (also performs on .qual file if available)", - ), - ("sort", "sort the records by IDs, sizes, etc."), - ("summary", "report the real no of bases and N's in fasta files"), - ("tidy", "normalize gap sizes and remove small components in fasta"), - ("translate", "translate CDS to proteins"), - ("trim", "given a cross_match screened fasta, trim the sequence"), - ("trimsplit", "split sequences at lower-cased letters"), - ("uniq", "remove records that are the same"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def simulate_one(fw, name, size): - """ - Simulate a random sequence with name and size - """ - seq = Seq("".join(choice("ACGT") for _ in range(size))) - s = SeqRecord(seq, id=name, description="Fake sequence") - SeqIO.write([s], fw, "fasta") - - -def simulate(args): - """ - %prog simulate idsfile - - Simulate random FASTA file based on idsfile, which is a two-column - tab-separated file with sequence name and size. - """ - p = OptionParser(simulate.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (idsfile,) = args - fp = open(idsfile) - fw = must_open(opts.outfile, "w") - for row in fp: - name, size = row.split() - size = int(size) - simulate_one(fw, name, size) - fp.close() - - -def gc(args): - """ - %prog gc fastafile - - Plot G+C content distribution. - """ - p = OptionParser(gc.__doc__) - p.add_argument("--binsize", default=500, type=int, help="Bin size to use") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - binsize = opts.binsize - allbins = [] - for name, seq in parse_fasta(fastafile): - for i in range(len(seq) / binsize): - atcnt = gccnt = 0 - for c in seq[i * binsize : (i + 1) * binsize].upper(): - if c in "AT": - atcnt += 1 - elif c in "GC": - gccnt += 1 - totalcnt = atcnt + gccnt - if totalcnt == 0: - continue - gcpct = gccnt * 100 / totalcnt - allbins.append(gcpct) - - from jcvi.graphics.base import asciiplot - from collections import Counter - - title = "Total number of bins={}".format(len(allbins)) - c = Counter(allbins) - x, y = zip(*sorted(c.items())) - asciiplot(x, y, title=title) - - -def trimsplit(args): - """ - %prog trimsplit fastafile - - Split sequences at lower-cased letters and stretch of Ns. This is useful - at cleaning up the low quality bases for the QUIVER output. - """ - from jcvi.utils.cbook import SummaryStats - - p = OptionParser(trimsplit.__doc__) - p.add_argument( - "--minlength", default=1000, type=int, help="Min length of contigs to keep" - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - minlength = opts.minlength - - fw = must_open(fastafile.rsplit(".", 1)[0] + ".split.fasta", "w") - ntotal = 0 - removed = [] - Ns = [] - for name, seq in parse_fasta(fastafile): - stretches = [] - ntotal += len(seq) - for lower, stretch in groupby(seq, key=lambda x: x.islower()): - stretch = "".join(stretch) - if lower or len(stretch) < minlength: - removed.append(len(stretch)) - continue - for isN, s in groupby(stretch, key=lambda x: x in "Nn"): - s = "".join(s) - if isN or len(s) < minlength: - Ns.append(len(s)) - continue - stretches.append(s) - for i, seq in enumerate(stretches): - id = "{0}_{1}".format(name.split("|")[0], i) - s = SeqRecord(Seq(seq), id=id, description="") - SeqIO.write([s], fw, "fasta") - fw.close() - - # Reporting - if removed: - logger.debug( - "Total bases removed: {0}".format(percentage(sum(removed), ntotal)) - ) - print(SummaryStats(removed), file=sys.stderr) - if Ns: - logger.debug("Total Ns removed: {0}".format(percentage(sum(Ns), ntotal))) - print(SummaryStats(Ns), file=sys.stderr) - - -def qual(args): - """ - %prog qual fastafile - - Generate dummy .qual file based on FASTA file. - """ - from jcvi.formats.sizes import Sizes - - p = OptionParser(qual.__doc__) - p.add_argument( - "--qv", default=31, type=int, help="Dummy qv score for extended bases" - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - sizes = Sizes(fastafile) - qvchar = str(opts.qv) - fw = must_open(opts.outfile, "w") - total = 0 - for s, slen in sizes.iter_sizes(): - print(">" + s, file=fw) - print(" ".join([qvchar] * slen), file=fw) - total += 1 - fw.close() - logger.debug("Written {0} records in `{1}`.".format(total, opts.outfile)) - - -def info(args): - """ - %prog info *.fasta - - Run `sequence_info` on FASTA files. Generate a report per file. - """ - p = OptionParser(info.__doc__) - p.add_argument( - "--gaps", default=False, action="store_true", help="Count number of gaps" - ) - p.set_table() - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) == 0: - sys.exit(not p.print_help()) - - fastafiles = args - data = [] - for f in fastafiles: - s = SequenceInfo(f, gapstats=opts.gaps) - data.append(s.data) - write_csv(s.header, data, sep=opts.sep, filename=opts.outfile, align=opts.align) - - -def fromtab(args): - """ - %prog fromtab tabfile fastafile - - Convert 2-column sequence file to FASTA format. One usage for this is to - generatea `adapters.fasta` for TRIMMOMATIC. - """ - p = OptionParser(fromtab.__doc__) - p.set_sep(sep=None) - p.add_argument( - "--noheader", default=False, action="store_true", help="Ignore first line" - ) - p.add_argument("--replace", help="Replace spaces in name to char") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - tabfile, fastafile = args - sep = opts.sep - replace = opts.replace - fp = must_open(tabfile) - fw = must_open(fastafile, "w") - nseq = 0 - if opts.noheader: - next(fp) - for row in fp: - row = row.strip() - if not row or row[0] == "#": - continue - - name, seq = row.rsplit(sep, 1) - if replace: - name = name.replace(" ", replace) - print(">{0}\n{1}".format(name, seq), file=fw) - nseq += 1 - fw.close() - - logger.debug("A total of {0} sequences written to `{1}`.".format(nseq, fastafile)) - - -def longestorf(args): - """ - %prog longestorf fastafile - - Find longest ORF for each sequence in fastafile. - """ - p = OptionParser(longestorf.__doc__) - p.add_argument("--ids", action="store_true", help="Generate table with ORF info") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - pf = fastafile.rsplit(".", 1)[0] - orffile = pf + ".orf.fasta" - idsfile = None - if opts.ids: - idsfile = pf + ".orf.ids" - fwids = open(idsfile, "w") - - f = Fasta(fastafile, lazy=True) - fw = must_open(orffile, "w") - before, after = 0, 0 - for name, rec in f.iteritems_ordered(): - cds = rec.seq - before += len(cds) - # Try all six frames - orf = ORFFinder(cds) - lorf = orf.get_longest_orf() - newcds = Seq(lorf) - after += len(newcds) - newrec = SeqRecord(newcds, id=name, description=rec.description) - SeqIO.write([newrec], fw, "fasta") - if idsfile: - print("\t".join((name, orf.info)), file=fwids) - - fw.close() - if idsfile: - fwids.close() - - logger.debug( - "Longest ORFs written to `{0}` ({1}).".format( - orffile, percentage(after, before) - ) - ) - - return orffile - - -def ispcr(args): - """ - %prog ispcr fastafile - - Reformat paired primers into isPcr query format, which is three column - format: name, forward, reverse - """ - p = OptionParser(ispcr.__doc__) - p.add_argument( - "-r", - dest="rclip", - default=1, - type=int, - help="pair ID is derived from rstrip N chars", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - ispcrfile = fastafile + ".isPcr" - fw = open(ispcrfile, "w") - - N = opts.rclip - strip_name = lambda x: x[:-N] if N else str - - npairs = 0 - fastaiter = SeqIO.parse(fastafile, "fasta") - for a, b in grouper(fastaiter, 2): - - aid, bid = [strip_name(x) for x in (a.id, b.id)] - assert aid == bid, "Name mismatch {0}".format((aid, bid)) - - print("\t".join((aid, str(a.seq), str(b.seq))), file=fw) - npairs += 1 - - fw.close() - logger.debug("A total of {0} pairs written to `{1}`.".format(npairs, ispcrfile)) - - -def parse_fasta(infile, upper=False): - """ - parse a fasta-formatted file and returns header - can be a fasta file that contains multiple records. - """ - try: - fp = must_open(infile) - except: - fp = infile - # keep header - fa_iter = (x[1] for x in groupby(fp, lambda row: row[0] == ">")) - for header in fa_iter: - header = next(header) - if header[0] != ">": - continue - # drop '>' - header = header.strip()[1:] - # stitch the sequence lines together and make into upper case - seq = "".join(s.strip() for s in next(fa_iter)) - if upper: - seq = seq.upper() - yield header, seq - - -def iter_clean_fasta(fastafile): - for header, seq in parse_fasta(fastafile): - seq = "".join(x for x in seq if x in string.ascii_letters or x == "*") - yield header, seq - - -def iter_canonical_fasta(fastafile): - canonical = "ACGTN" - totalbad = 0 - for header, seq in parse_fasta(fastafile): - badcounts = sum(1 for x in seq if x not in canonical) - seq = "".join((x if x in canonical else "N") for x in seq) - totalbad += badcounts - yield header, seq - - logger.debug("Total bad char: {0}".format(totalbad)) - - -def fancyprint(fw, seq, width=60, chunk=10): - assert width % chunk == 0 - nchunks = width / chunk - seqlen = len(seq) - maxchar = len(str(seqlen)) - - s = ["".join(x) for x in grouper(seq, chunk, fillvalue="")] - s = [" ".join(x) for x in grouper(s, nchunks, fillvalue="")] - for a, b in zip(range(1, len(seq), width), s): - b = b.rstrip() - a = str(a).rjust(maxchar, " ") - print(" ".join((a, b)), file=fw) - - -def clean(args): - """ - %prog clean fastafile - - Remove irregular chars in FASTA seqs. - """ - p = OptionParser(clean.__doc__) - p.add_argument( - "--fancy", default=False, action="store_true", help="Pretty print the sequence" - ) - p.add_argument( - "--canonical", default=False, action="store_true", help="Use only acgtnACGTN" - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - fw = must_open(opts.outfile, "w") - if opts.fancy: - for header, seq in iter_clean_fasta(fastafile): - print(">" + header, file=fw) - fancyprint(fw, seq) - - return 0 - - iterator = iter_canonical_fasta if opts.canonical else iter_clean_fasta - - for header, seq in iterator(fastafile): - seq = Seq(seq) - s = SeqRecord(seq, id=header, description="") - SeqIO.write([s], fw, "fasta") - - -def translate(args): - """ - %prog translate cdsfasta - - Translate CDS to proteins. The tricky thing is that sometimes the CDS - represents a partial gene, therefore disrupting the frame of the protein. - Check all three frames to get a valid translation. - """ - from jcvi.utils.cbook import gene_name - - transl_tables = [str(x) for x in range(1, 25)] - p = OptionParser(translate.__doc__) - p.add_argument( - "--ids", - default=False, - action="store_true", - help="Create .ids file with the complete/partial/gaps label", - ) - p.add_argument( - "--longest", - default=False, - action="store_true", - help="Find the longest ORF from each input CDS", - ) - p.add_argument( - "--table", - default=1, - choices=transl_tables, - help="Specify translation table to use", - ) - p.add_argument( - "--strip_names", - default=False, - action="store_true", - help="Strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", - ) - p.add_argument( - "--unique", - default=False, - action="store_true", - help="Ensure the output FASTA contains unique identifiers", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - strip_names = opts.strip_names - unique = opts.unique - - if len(args) != 1: - sys.exit(not p.print_help()) - - (cdsfasta,) = args - if opts.longest: - cdsfasta = longestorf([cdsfasta]) - - f = Fasta(cdsfasta, lazy=True) - outfile = opts.outfile - fw = must_open(outfile, "w") - - if opts.ids: - idsfile = cdsfasta.rsplit(".", 1)[0] + ".ids" - ids = open(idsfile, "w") - else: - ids = None - - five_prime_missing = three_prime_missing = 0 - contain_ns = complete = cannot_translate = total = 0 - - seen = set() - grand_total = 0 - for name, rec in f.iteritems_ordered(): - grand_total += 1 - - if strip_names: - name = gene_name(name) - - if unique and name in seen: - continue - - cds = rec.seq - cdslen = len(cds) - peplen = cdslen // 3 - total += 1 - - # Try all three frames - pep = "" - for i in range(3): - newcds = cds[i : i + peplen * 3] - newpep = newcds.translate(table=opts.table) - if len(newpep.split("*")[0]) > len(pep.split("*")[0]): - pep = newpep - - labels = [] - if "*" in pep.rstrip("*"): - logger.error("{0} cannot translate".format(name)) - cannot_translate += 1 - labels.append("cannot_translate") - - contains_start = pep.startswith("M") - contains_stop = pep.endswith("*") - contains_ns = "X" in pep - start_ns = pep.startswith("X") - end_ns = pep.endswith("X") - - if not contains_start: - five_prime_missing += 1 - labels.append("five_prime_missing") - if not contains_stop: - three_prime_missing += 1 - labels.append("three_prime_missing") - if contains_ns: - contain_ns += 1 - labels.append("contain_ns") - if contains_start and contains_stop: - complete += 1 - labels.append("complete") - if start_ns: - labels.append("start_ns") - if end_ns: - labels.append("end_ns") - - if ids: - print("\t".join((name, ",".join(labels))), file=ids) - - peprec = SeqRecord(pep, id=name, description=rec.description) - SeqIO.write([peprec], fw, "fasta") - fw.flush() - seen.add(name) - - print( - "Complete gene models: {0}".format(percentage(complete, total)), file=sys.stderr - ) - print( - "Missing 5`-end: {0}".format(percentage(five_prime_missing, total)), - file=sys.stderr, - ) - print( - "Missing 3`-end: {0}".format(percentage(three_prime_missing, total)), - file=sys.stderr, - ) - print("Contain Ns: {0}".format(percentage(contain_ns, total)), file=sys.stderr) - - if cannot_translate: - print( - "Cannot translate: {0}".format(percentage(cannot_translate, total)), - file=sys.stderr, - ) - - fw.close() - - logger.debug( - "Total records: {}, Unique records (strip_names={}): {}".format( - grand_total, strip_names, len(seen) - ) - ) - - return cdsfasta, outfile - - -def filter(args): - """ - %prog filter fastafile 100 - - Filter the FASTA file to contain records with size >= or <= certain cutoff. - """ - p = OptionParser(filter.__doc__) - p.add_argument( - "--less", - default=False, - action="store_true", - help="filter the sizes < certain cutoff [default: >=]", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, cutoff = args - try: - cutoff = int(cutoff) - except ValueError: - sys.exit(not p.print_help()) - - f = Fasta(fastafile, lazy=True) - - fw = must_open(opts.outfile, "w") - for name, rec in f.iteritems_ordered(): - - if opts.less and len(rec) >= cutoff: - continue - - if (not opts.less) and len(rec) < cutoff: - continue - - SeqIO.write([rec], fw, "fasta") - fw.flush() - - return fw.name - - -def pool(args): - """ - %prog pool fastafiles > pool.fasta - - Pool a bunch of FASTA files, and add prefix to each record based on - filenames. File names are simplified to longest unique prefix to avoid - collisions after getting shortened. - """ - from jcvi.formats.base import longest_unique_prefix - - p = OptionParser(pool.__doc__) - p.add_argument("--sep", default=".", help="Separator between prefix and name") - p.add_argument( - "--sequential", default=False, action="store_true", help="Add sequential IDs" - ) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - for fastafile in args: - pf = longest_unique_prefix(fastafile, args) - print(fastafile, "=>", pf, file=sys.stderr) - prefixopt = "--prefix={0}{1}".format(pf, opts.sep) - format_args = [fastafile, "stdout", prefixopt] - if opts.sequential: - format_args += ["--sequential=replace"] - format(format_args) - - -def ids(args): - """ - %prog ids fastafiles - - Generate the FASTA headers without the '>'. - """ - p = OptionParser(ids.__doc__) - p.add_argument( - "--until", default=None, help="Truncate the name and description at words" - ) - p.add_argument( - "--description", - default=False, - action="store_true", - help="Generate a second column with description", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - until = opts.until - fw = must_open(opts.outfile, "w") - for row in must_open(args): - if row[0] == ">": - row = row[1:].rstrip() - if until: - row = row.split(until)[0] - - atoms = row.split(None, 1) - if opts.description: - outrow = "\t".join(atoms) - else: - outrow = atoms[0] - print(outrow, file=fw) - - fw.close() - - -def sort(args): - """ - %prog sort fastafile - - Sort a list of sequences and output with sorted IDs, etc. - """ - p = OptionParser(sort.__doc__) - p.add_argument( - "--sizes", default=False, action="store_true", help="Sort by decreasing size" - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (fastafile,) = args - sortedfastafile = fastafile.rsplit(".", 1)[0] + ".sorted.fasta" - - f = Fasta(fastafile, index=False) - fw = must_open(sortedfastafile, "w") - if opts.sizes: - # Sort by decreasing size - sortlist = sorted(f.itersizes(), key=lambda x: (-x[1], x[0])) - logger.debug( - "Sort by size: max: {0}, min: {1}".format(sortlist[0], sortlist[-1]) - ) - sortlist = [x for x, s in sortlist] - else: - sortlist = sorted(f.iterkeys()) - - for key in sortlist: - rec = f[key] - SeqIO.write([rec], fw, "fasta") - - logger.debug("Sorted file written to `{0}`.".format(sortedfastafile)) - fw.close() - - return sortedfastafile - - -def join(args): - """ - %prog join fastafile [phasefile] - - Make AGP file for a bunch of sequences, and add gaps between, and then build - the joined fastafile. This is useful by itself, but with --oo option this - can convert the .oo (BAMBUS output) into AGP and a joined fasta. - - Phasefile is optional, but must contain two columns - BAC and phase (0, 1, 2, 3). - """ - from jcvi.formats.agp import OO, Phases, build - from jcvi.formats.sizes import Sizes - - p = OptionParser(join.__doc__) - p.add_argument("--newid", default=None, help="New sequence ID") - p.add_argument( - "--gapsize", - default=100, - type=int, - help="Number of N's in between the sequences", - ) - p.add_argument( - "--gaptype", default="contig", help="Gap type to use in the AGP file" - ) - p.add_argument( - "--evidence", default="", help="Linkage evidence to report in the AGP file" - ) - p.add_argument("--oo", help="Use .oo file generated by bambus") - opts, args = p.parse_args(args) - - nargs = len(args) - if nargs not in (1, 2): - sys.exit(not p.print_help()) - - if nargs == 2: - fastafile, phasefile = args - phases = DictFile(phasefile) - phases = dict((a, Phases[int(b)]) for a, b in phases.items()) - else: - (fastafile,) = args - phases = {} - - sizes = Sizes(fastafile) - prefix = fastafile.rsplit(".", 1)[0] - agpfile = prefix + ".agp" - newid = opts.newid - oo = opts.oo - - o = OO(oo, sizes.mapping) - - if oo: - seen = o.contigs - # The leftover contigs not in the oo file - logger.debug( - "A total of {0} contigs ({1} in `{2}`)".format(len(sizes), len(seen), oo) - ) - - for ctg, size in sizes.iter_sizes(): - if ctg in seen: - continue - o.add(ctg, ctg, size) - - else: - if newid: - for ctg, size in sizes.iter_sizes(): - o.add(newid, ctg, size) - else: - for scaffold_number, (ctg, size) in enumerate(sizes.iter_sizes()): - object_id = "scaffold{0:03d}".format(scaffold_number + 1) - o.add(object_id, ctg, size) - - fw = open(agpfile, "w") - o.write_AGP( - fw, - gapsize=opts.gapsize, - gaptype=opts.gaptype, - evidence=opts.evidence, - phases=phases, - ) - fw.close() - - joinedfastafile = prefix + ".joined.fasta" - build([agpfile, fastafile, joinedfastafile]) - - return joinedfastafile - - -def summary(args): - """ - %prog summary *.fasta - - Report real bases and N's in fastafiles in a tabular report - """ - from natsort import natsort_key - - p = OptionParser(summary.__doc__) - p.add_argument( - "--suffix", default="Mb", help="make the base pair counts human readable" - ) - p.add_argument("--ids", help="write the ids that have >= 50%% N's") - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) == 0: - sys.exit(not p.print_help()) - - idsfile = opts.ids - header = "Seqid Real N's Total %_real".split() - if idsfile: - idsfile = open(idsfile, "w") - nids = 0 - - data = [] - for fastafile in args: - for rec in SeqIO.parse(must_open(fastafile), "fasta"): - seqlen = len(rec) - nns = rec.seq.count("n") + rec.seq.count("N") - reals = seqlen - nns - pct = reals * 100.0 / seqlen - pctreal = "{0:.1f}%".format(pct) - if idsfile and pct < 50: - nids += 1 - print(rec.id, file=idsfile) - - data.append((rec.id, reals, nns, seqlen, pctreal)) - - data.sort(key=natsort_key) - ids, reals, nns, seqlen, pctreal = zip(*data) - reals = sum(reals) - nns = sum(nns) - seqlen = sum(seqlen) - pctreal = "{0:.1f}%".format(reals * 100.0 / seqlen) - data.append(("Total", reals, nns, seqlen, pctreal)) - - write_csv(header, data, sep=" ", filename=opts.outfile, thousands=True) - if idsfile: - logger.debug( - "A total of {0} ids >= 50% N's written to {1}.".format(nids, idsfile.name) - ) - idsfile.close() - - return reals, nns, seqlen - - -def format(args): - """ - %prog format infasta outfasta - - Reformat FASTA file and also clean up names. - """ - sequential_choices = ("replace", "prefix", "suffix") - p = OptionParser(format.__doc__) - p.add_argument( - "--pairs", - default=False, - action="store_true", - help="Add trailing /1 and /2 for interleaved pairs", - ) - p.add_argument( - "--sequential", - default=None, - choices=sequential_choices, - help="Add sequential IDs", - ) - p.add_argument( - "--sequentialoffset", default=0, type=int, help="Sequential IDs start at" - ) - p.add_argument( - "--pad0", default=0, type=int, help="Pad a few zeros in front of sequential" - ) - p.add_argument( - "--gb", - default=False, - action="store_true", - help="For Genbank ID, get the accession", - ) - p.add_argument("--sep", default=None, help="Split description by certain symbol") - p.add_argument( - "--index", - default=0, - type=int, - help="Extract i-th field after split with --sep", - ) - p.add_argument( - "--noversion", - default=False, - action="store_true", - help="Remove the gb trailing version", - ) - p.add_argument("--prefix", help="Prepend prefix to sequence ID") - p.add_argument("--suffix", help="Append suffix to sequence ID") - p.add_argument( - "--template", - default=False, - action="store_true", - help="Extract `template=aaa dir=x library=m` to `m-aaa/x`", - ) - p.add_argument("--switch", help="Switch ID from two-column file") - p.add_argument( - "--annotation", - help="Add functional annotation from two-column file ('ID <--> Annotation')", - ) - p.add_argument("--ids", help="Generate ID conversion table") - p.add_argument( - "--upper", - default=False, - action="store_true", - help="Convert sequence to upper case", - ) - p.add_argument( - "--nodesc", - default=False, - action="store_true", - help="Remove description after identifier", - ) - p.add_argument( - "--minlength", default=0, type=int, help="Minimum sequence length to keep" - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - infasta, outfasta = args - gb = opts.gb - pairs = opts.pairs - prefix = opts.prefix - suffix = opts.suffix - noversion = opts.noversion - sequential = opts.sequential - sequentialoffset = opts.sequentialoffset - sep = opts.sep - idx = opts.index - mapfile = opts.switch - annotfile = opts.annotation - desc = not opts.nodesc - idsfile = opts.ids - idsfile = open(idsfile, "w") if idsfile else None - upper = opts.upper - minlength = opts.minlength - - if mapfile: - mapping = DictFile(mapfile, delimiter="\t") - if annotfile: - annotation = DictFile(annotfile, delimiter="\t") - - fp = SeqIO.parse(must_open(infasta), "fasta") - fw = must_open(outfasta, "w") - nremoved = 0 - for i, rec in enumerate(fp): - if len(rec) < minlength: - nremoved += 1 - continue - origid = rec.id - description = rec.description.replace(origid, "").strip() - if sep: - rec.id = rec.description.split(sep)[idx].strip() - if gb: - # gi|262233616|gb|GU123895.1| Coffea arabica clone BAC - atoms = rec.id.split("|") - if len(atoms) >= 3: - rec.id = atoms[3] - elif len(atoms) == 2: - rec.id = atoms[1] - if pairs: - id = "/1" if (i % 2 == 0) else "/2" - rec.id += id - if noversion: - rec.id = rec.id.rsplit(".", 1)[0] - if sequential: - rec.id = "{0:0{1}d}".format(sequentialoffset, opts.pad0) - if sequential == "prefix": - rec.id = "{0}-{1}".format(rec.id, origid) - elif sequential == "suffix": - rec.id = "{0}-{1}".format(origid, rec.id) - sequentialoffset += 1 - if opts.template: - template, dir, lib = [ - x.split("=")[-1] for x in rec.description.split()[1:4] - ] - rec.id = "{0}-{1}/{2}".format(lib, template, dir) - if mapfile: - if origid in mapping: - rec.id = mapping[origid] - else: - logger.error( - "{0} not found in `{1}`. ID unchanged.".format(origid, mapfile) - ) - if prefix: - rec.id = prefix + rec.id - if suffix: - rec.id += suffix - if annotfile: - rec.description = ( - annotation.get(origid, "") - if not mapfile - else annotation.get(rec.id, "") - ) - else: - rec.description = description if desc else "" - if idsfile: - print("\t".join((origid, rec.id)), file=idsfile) - if upper: - rec.seq = rec.seq.upper() - - SeqIO.write(rec, fw, "fasta") - - if idsfile: - logger.debug("Conversion table written to `{0}`.".format(idsfile.name)) - idsfile.close() - - if nremoved: - logger.debug( - "Removed {} sequences with length < {}".format(nremoved, minlength) - ) - - -def print_first_difference( - arec, brec, ignore_case=False, ignore_N=False, rc=False, report_match=True -): - """ - Returns the first different nucleotide in two sequence comparisons - runs both Plus and Minus strand - """ - plus_match = _print_first_difference( - arec, - brec, - ignore_case=ignore_case, - ignore_N=ignore_N, - report_match=report_match, - ) - if rc and not plus_match: - logger.debug("trying reverse complement of %s" % brec.id) - brec.seq = brec.seq.reverse_complement() - minus_match = _print_first_difference( - arec, - brec, - ignore_case=ignore_case, - ignore_N=ignore_N, - report_match=report_match, - ) - return minus_match - - else: - return plus_match - - -def _print_first_difference( - arec, brec, ignore_case=False, ignore_N=False, report_match=True -): - """ - Returns the first different nucleotide in two sequence comparisons - """ - aseq, bseq = arec.seq, brec.seq - asize, bsize = len(aseq), len(bseq) - - matched = True - for i, (a, b) in enumerate(zip_longest(aseq, bseq)): - if ignore_case and None not in (a, b): - a, b = a.upper(), b.upper() - - if ignore_N and ("N" in (a, b) or "X" in (a, b)): - continue - - if a != b: - matched = False - break - - if i + 1 == asize and matched: - if report_match: - printf("[green]Two sequences match") - match = True - else: - printf("[red]Two sequences do not match") - - snippet_size = 20 # show the context of the difference - - printf("[red]Sequence start to differ at position {}:".format(i + 1)) - - begin = max(i - snippet_size, 0) - aend = min(i + snippet_size, asize) - bend = min(i + snippet_size, bsize) - - printf("[red]{}|{}".format(aseq[begin:i], aseq[i:aend])) - printf("[red]{}|{}".format(bseq[begin:i], bseq[i:bend])) - match = False - - return match - - -def diff(args): - """ - %prog diff afasta bfasta - - print out whether the records in two fasta files are the same - """ - from jcvi.utils.table import banner - - p = OptionParser(diff.__doc__) - p.add_argument( - "--ignore_case", - default=False, - action="store_true", - help="ignore case when comparing sequences", - ) - p.add_argument( - "--ignore_N", - default=False, - action="store_true", - help="ignore N and X's when comparing sequences", - ) - p.add_argument( - "--ignore_stop", - default=False, - action="store_true", - help="ignore stop codon when comparing sequences", - ) - p.add_argument( - "--rc", - default=False, - action="store_true", - help="also consider reverse complement", - ) - p.add_argument( - "--quiet", - default=False, - action="store_true", - help="don't output comparison details", - ) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - afasta, bfasta = args - - afastan = len(Fasta(afasta)) - bfastan = len(Fasta(bfasta)) - - if afastan == bfastan: - printf( - "[green]Two sets contain the same number of sequences ({}, {})".format( - afastan, bfastan - ) - ) - else: - printf( - "[red]Two sets contain different number of sequences ({}, {})".format( - afastan, bfastan - ) - ) - - ah = SeqIO.parse(afasta, "fasta") - bh = SeqIO.parse(bfasta, "fasta") - - problem_ids = [] - for arec, brec in zip(ah, bh): - - if opts.ignore_stop: - arec.seq = arec.seq.rstrip("*") - brec.seq = brec.seq.rstrip("*") - - asize, bsize = len(arec), len(brec) - - if not opts.quiet: - print(banner(str(arec), [str(brec)])) - if asize == bsize: - printf("[green]Two sequence size match ({})".format(asize)) - else: - printf( - "[red]Two sequence size do not match ({}, {}})".format(asize, bsize) - ) - - # print out the first place the two sequences diff - fd = print_first_difference( - arec, - brec, - ignore_case=opts.ignore_case, - ignore_N=opts.ignore_N, - rc=opts.rc, - report_match=not opts.quiet, - ) - if not fd: - logger.error("Two sets of sequences differ at `{0}`".format(arec.id)) - problem_ids.append( - "\t".join(str(x) for x in (arec.id, asize, bsize, abs(asize - bsize))) - ) - - if problem_ids: - print("A total of {0} records mismatch.".format(len(problem_ids))) - fw = must_open("Problems.ids", "w") - print("\n".join(problem_ids), file=fw) - - -def hash_fasta( - seq, ignore_case=False, ignore_N=False, ignore_stop=False, checksum="MD5" -): - """ - Generates checksum of input sequence element - """ - if ignore_stop: - seq = seq.rstrip("*") - if ignore_case: - seq = seq.upper() - if ignore_N: - if not all(c.upper() in "ATGCN" for c in seq): - seq = re.sub("X", "", seq) - else: - seq = re.sub("N", "", seq) - - return seguid(seq) if checksum == "GCG" else hashlib.sha256(seq) - - -def identical(args): - """ - %prog identical *.fasta - - Given multiple fasta files, find all the exactly identical records - based on the computed md5 hexdigest or GCG checksum of each sequence. - - Output is an N + 1 column file (where N = number of input fasta files). - If there are duplicates within a given fasta file, they will all be - listed out in the same row separated by a comma. - - Example output: - --------------------------- - tta1.fsa tta2.fsa - t0 2131 na - t1 3420 na - t2 3836,3847 852 - t3 148 890 - t4 584 614 - t5 623 684 - t6 1281 470 - t7 3367 na - """ - from jcvi.utils.cbook import AutoVivification - - allowed_checksum = ["MD5", "GCG"] - - p = OptionParser(identical.__doc__) - p.add_argument( - "--ignore_case", - default=False, - action="store_true", - help="ignore case when comparing sequences", - ) - p.add_argument( - "--ignore_N", - default=False, - action="store_true", - help="ignore N and X's when comparing sequences", - ) - p.add_argument( - "--ignore_stop", - default=False, - action="store_true", - help="ignore stop codon when comparing sequences", - ) - p.add_argument( - "--output_uniq", - default=False, - action="store_true", - help="output uniq sequences in FASTA format", - ) - p.add_argument( - "--checksum", - default="MD5", - choices=allowed_checksum, - help="specify checksum method", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) == 0: - sys.exit(not p.print_help()) - - d = AutoVivification() - files = [] - for fastafile in args: - f = Fasta(fastafile) - pf = fastafile.rsplit(".", 1)[0] - files.append(pf) - - logger.debug("Hashing individual elements of {0}".format(fastafile)) - for name, rec in f.iteritems_ordered(): - seq = re.sub(" ", "", str(rec.seq)) - hashed = hash_fasta( - seq, - ignore_case=opts.ignore_case, - ignore_N=opts.ignore_N, - ignore_stop=opts.ignore_stop, - checksum=opts.checksum, - ) - if not d[hashed]: - d[hashed]["seq"] = seq - d[hashed]["count"] = 0 - if not d[hashed]["names"][pf]: - d[hashed]["names"][pf] = set() - d[hashed]["names"][pf].add(name) - - fw = must_open(opts.outfile, "w") - if opts.output_uniq: - uniqfile = "_".join(files) + ".uniq.fasta" - uniqfw = must_open(uniqfile, "w") - - header = "\t".join(str(x) for x in args) - print("\t".join(str(x) for x in ("", header)), file=fw) - for idx, hashed in enumerate(d.keys()): - line = [] - line.append("t{0}".format(idx)) - for fastafile in files: - if fastafile in d[hashed]["names"].keys(): - line.append(",".join(d[hashed]["names"][fastafile])) - if opts.output_uniq: - d[hashed]["count"] += len(d[hashed]["names"][fastafile]) - else: - line.append("na") - print("\t".join(line), file=fw) - - if opts.output_uniq: - seqid = "\t".join(str(x) for x in ("t{0}".format(idx), d[hashed]["count"])) - rec = SeqRecord(Seq(d[hashed]["seq"]), id=seqid, description="") - SeqIO.write([rec], uniqfw, "fasta") - - fw.close() - if opts.output_uniq: - logger.debug("Uniq sequences written to `{0}`".format(uniqfile)) - uniqfw.close() - - -QUALSUFFIX = ".qual" - - -def get_qual(fastafile, suffix=QUALSUFFIX, check=True): - """ - Check if current folder contains a qual file associated with the fastafile - """ - qualfile1 = fastafile.rsplit(".", 1)[0] + suffix - qualfile2 = fastafile + suffix - - if check: - if op.exists(qualfile1): - logger.debug("qual file `{0}` found".format(qualfile1)) - return qualfile1 - elif op.exists(qualfile2): - logger.debug("qual file `{0}` found".format(qualfile2)) - return qualfile2 - else: - return None - - return qualfile1 - - -def some(args): - """ - %prog some fastafile listfile outfastafile - - generate a subset of fastafile, based on a list - """ - from jcvi.utils.cbook import gene_name - - p = OptionParser(some.__doc__) - p.add_argument( - "--exclude", - default=False, - action="store_true", - help="Output sequences not in the list file", - ) - p.add_argument( - "--no_strip_names", - default=False, - action="store_true", - help="Do not strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", - ) - p.add_argument( - "--uniprot", default=False, action="store_true", help="Header is from uniprot" - ) - - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(p.print_help()) - - strip_names = not opts.no_strip_names - fastafile, listfile, outfastafile = args - outfastahandle = must_open(outfastafile, "w") - qualfile = get_qual(fastafile) - - names = set(open(listfile).read().split()) - if qualfile: - outqualfile = outfastafile + ".qual" - outqualhandle = open(outqualfile, "w") - parser = iter_fasta_qual(fastafile, qualfile) - else: - parser = SeqIO.parse(fastafile, "fasta") - - recs = [] - seen = set() - for rec in parser: - name = rec.id - if strip_names: - name = gene_name(name) - - if name in seen: # Only report one instance - continue - - if opts.uniprot: - name = name.split("|")[-1] - - if opts.exclude: - if name in names: - continue - else: - if name not in names: - continue - - recs.append(rec) - seen.add(name) - - for rec in recs: - SeqIO.write([rec], outfastahandle, "fasta") - if qualfile: - SeqIO.write([rec], outqualhandle, "qual") - - logger.debug("A total of %d records written to `%s`" % (len(recs), outfastafile)) - - -def fastq(args): - """ - %prog fastq fastafile - - Generate fastqfile by combining fastafile and fastafile.qual. - Also check --qv option to use a default qv score. - """ - from jcvi.formats.fastq import FastqLite - - p = OptionParser(fastq.__doc__) - p.add_argument("--qv", type=int, help="Use generic qv value") - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - fastqfile = fastafile.rsplit(".", 1)[0] + ".fastq" - fastqhandle = open(fastqfile, "w") - num_records = 0 - - if opts.qv is not None: - qv = chr(ord("!") + opts.qv) - logger.debug("QV char '{0}' ({1})".format(qv, opts.qv)) - else: - qv = None - - if qv: - f = Fasta(fastafile, lazy=True) - for name, rec in f.iteritems_ordered(): - r = FastqLite("@" + name, str(rec.seq).upper(), qv * len(rec.seq)) - print(r, file=fastqhandle) - num_records += 1 - - else: - qualfile = get_qual(fastafile) - for rec in iter_fasta_qual(fastafile, qualfile): - SeqIO.write([rec], fastqhandle, "fastq") - num_records += 1 - - fastqhandle.close() - logger.debug("A total of %d records written to `%s`" % (num_records, fastqfile)) - - -def pair(args): - """ - %prog pair fastafile - - Generate .pairs.fasta and .fragments.fasta by matching records - into the pairs and the rest go to fragments. - """ - p = OptionParser(pair.__doc__) - p.set_sep( - sep=None, - help="Separator in name to reduce to clone id" - + "e.g. GFNQ33242/1 use /, BOT01-2453H.b1 use .", - ) - p.add_argument( - "-m", - dest="matepairs", - default=False, - action="store_true", - help="generate .matepairs file [often used for Celera Assembler]", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (fastafile,) = args - qualfile = get_qual(fastafile) - - prefix = fastafile.rsplit(".", 1)[0] - pairsfile = prefix + ".pairs.fasta" - fragsfile = prefix + ".frags.fasta" - pairsfw = open(pairsfile, "w") - fragsfw = open(fragsfile, "w") - - # TODO: need a class to handle coupled fasta and qual iterating and indexing - if opts.matepairs: - matepairsfile = prefix + ".matepairs" - matepairsfw = open(matepairsfile, "w") - - if qualfile: - pairsqualfile = pairsfile + ".qual" - pairsqualhandle = open(pairsqualfile, "w") - fragsqualfile = fragsfile + ".qual" - fragsqualhandle = open(fragsqualfile, "w") - - f = Fasta(fastafile) - if qualfile: - q = SeqIO.index(qualfile, "qual") - - all_keys = list(f.keys()) - all_keys.sort() - sep = opts.sep - - if sep: - key_fun = lambda x: x.split(sep, 1)[0] - else: - key_fun = lambda x: x[:-1] - - for key, variants in groupby(all_keys, key=key_fun): - variants = list(variants) - paired = len(variants) == 2 - - if paired and opts.matepairs: - print("\t".join(("%s/1" % key, "%s/2" % key)), file=matepairsfw) - - fw = pairsfw if paired else fragsfw - if qualfile: - qualfw = pairsqualhandle if paired else fragsqualhandle - - for i, var in enumerate(variants): - rec = f[var] - if qualfile: - recqual = q[var] - newid = "%s/%d" % (key, i + 1) - - rec.id = newid - rec.description = "" - SeqIO.write([rec], fw, "fasta") - if qualfile: - recqual.id = newid - recqual.description = "" - SeqIO.write([recqual], qualfw, "qual") - - logger.debug("sequences written to `%s` and `%s`" % (pairsfile, fragsfile)) - if opts.matepairs: - logger.debug("mates written to `%s`" % matepairsfile) - - -def pairinplace(args): - """ - %prog pairinplace bulk.fasta - - Pair up the records in bulk.fasta by comparing the names for adjacent - records. If they match, print to bulk.pairs.fasta, else print to - bulk.frags.fasta. - """ - p = OptionParser(pairinplace.__doc__) - p.add_argument( - "-r", - dest="rclip", - default=1, - type=int, - help="pair ID is derived from rstrip N chars", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - base = op.basename(fastafile).split(".")[0] - - frags = base + ".frags.fasta" - pairs = base + ".pairs.fasta" - if fastafile.endswith(".gz"): - frags += ".gz" - pairs += ".gz" - - fragsfw = must_open(frags, "w") - pairsfw = must_open(pairs, "w") - - N = opts.rclip - strip_name = lambda x: x[:-N] if N else str - - skipflag = False # controls the iterator skip - fastaiter = SeqIO.parse(fastafile, "fasta") - for a, b in pairwise(fastaiter): - - aid, bid = [strip_name(x) for x in (a.id, b.id)] - - if skipflag: - skipflag = False - continue - - if aid == bid: - SeqIO.write([a, b], pairsfw, "fasta") - skipflag = True - else: - SeqIO.write([a], fragsfw, "fasta") - - # don't forget the last one, when b is None - if not skipflag: - SeqIO.write([a], fragsfw, "fasta") - - logger.debug("Reads paired into `%s` and `%s`" % (pairs, frags)) - - -def extract(args): - """ - %prog extract fasta query - - extract query out of fasta file, query needs to be in the form of - "seqname", or "seqname:start-stop", or "seqname:start-stop:-" - """ - p = OptionParser(extract.__doc__) - p.add_argument("--newname", help="Use this new name instead") - p.add_argument( - "--include", - default=False, - action="store_true", - help="search description line for match", - ) - p.add_argument( - "--exclude", - default=False, - action="store_true", - help="exclude description that matches", - ) - p.add_argument( - "--idonly", default=False, action="store_true", help="Only search identifier" - ) - p.add_argument( - "--bed", - default=None, - help="path to bed file to guide extraction by matching seqname", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) == 2: - fastafile, query = args - elif len(args) == 1 and opts.bed: - (fastafile,) = args - bedaccns = Bed(opts.bed).accns - else: - sys.exit(p.print_help()) - - if opts.bed: - fw = must_open(opts.outfile, "w") - f = Fasta(fastafile) - for accn in bedaccns: - try: - rec = f[accn] - except: - logger.error("{0} not found in {1}".format(accn, fastafile)) - continue - SeqIO.write([rec], fw, "fasta") - return fw.name - - atoms = query.split(":") - key = atoms[0] - - assert len(atoms) <= 3, "cannot have more than two ':' in your query" - - pos = "" - if len(atoms) in (2, 3): - pos = atoms[1] - - strand = "+" - if len(atoms) == 3: - strand = atoms[2] - - assert strand in ("+", "-"), "strand must be either '+' or '-'" - - feature = dict(chr=key) - - if "-" in pos: - start, stop = pos.split("-") - try: - start, stop = int(start), int(stop) - except ValueError as e: - logger.error(e) - sys.exit(p.print_help()) - - feature["start"] = start - feature["stop"] = stop - else: - start, stop = None, None - - assert ( - None - in ( - start, - stop, - ) - or start < stop - ), "start must be < stop, you have ({0}, {1})".format(start, stop) - feature["strand"] = strand - - include, exclude = opts.include, opts.exclude - # conflicting options, cannot be true at the same time - assert not ( - include and exclude - ), "--include and --exclude cannot be on at the same time" - fw = must_open(opts.outfile, "w") - - if include or exclude: - f = Fasta(fastafile, lazy=True) - fi = f.iteritems_ordered if opts.idonly else f.iterdescriptions_ordered - for k, rec in fi(): - if include and key not in k: - continue - if exclude and key in k: - continue - - seq = Fasta.subseq(rec, start, stop, strand) - newid = rec.id - if start is not None: - newid += ":{0}-{1}:{2}".format(start, stop, strand) - - rec = SeqRecord(seq, id=newid, description=k) - SeqIO.write([rec], fw, "fasta") - else: - f = Fasta(fastafile) - try: - seq = f.sequence(feature, asstring=False) - except AssertionError as e: - logger.error(e) - return - - newid = opts.newname or query - rec = SeqRecord(seq, id=newid, description="") - SeqIO.write([rec], fw, "fasta") - - return fw.name - - -def _uniq_rec(fastafile, seq=False): - """ - Returns unique records - """ - seen = set() - for rec in SeqIO.parse(must_open(fastafile), "fasta"): - name = str(rec.seq) if seq else rec.id - if name in seen: - logger.debug("ignore {0}".format(rec.id)) - continue - seen.add(name) - yield rec - - -def uniq(args): - """ - %prog uniq fasta uniq.fasta - - remove fasta records that are the same - """ - p = OptionParser(uniq.__doc__) - p.add_argument( - "--seq", default=False, action="store_true", help="Uniqify the sequences" - ) - p.add_argument( - "-t", - "--trimname", - dest="trimname", - action="store_true", - default=False, - help="turn on the defline trim to first space", - ) - - opts, args = p.parse_args(args) - if len(args) != 2: - sys.exit(p.print_help()) - - fastafile, uniqfastafile = args - fw = must_open(uniqfastafile, "w") - seq = opts.seq - - for rec in _uniq_rec(fastafile, seq=seq): - if opts.trimname: - rec.description = "" - SeqIO.write([rec], fw, "fasta") - - -def random(args): - """ - %prog random fasta 100 > random100.fasta - - Take number of records randomly from fasta - """ - from random import sample - - p = OptionParser(random.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, N = args - N = int(N) - assert N > 0 - - f = Fasta(fastafile) - fw = must_open("stdout", "w") - - for key in sample(f.keys(), N): - rec = f[key] - SeqIO.write([rec], fw, "fasta") - - fw.close() - - -XQUAL = -1000 # default quality for X -NQUAL = 5 # default quality value for N -QUAL = 10 # default quality value -OKQUAL = 15 - - -def modify_qual(rec): - qv = rec.letter_annotations["phred_quality"] - for i, (s, q) in enumerate(zip(rec.seq, qv)): - if s == "X" or s == "x": - qv[i] = XQUAL - if s == "N" or s == "x": - qv[i] = NQUAL - return rec - - -def make_qual(fastafile, score=OKQUAL): - logger.warning("assume qual ({0})".format(score)) - qualfile = fastafile.rsplit(".", 1)[0] + ".qual" - fw = open(qualfile, "w") - fasta = Fasta(fastafile, lazy=True) - score = str(score) + " " - for entry, size in fasta.itersizes_ordered(): - print(">" + entry, file=fw) - print(score * size, file=fw) - fw.close() - return qualfile - - -def iter_fasta_qual(fastafile, qualfile, defaultqual=OKQUAL, modify=False): - """ - used by trim, emits one SeqRecord with quality values in it - """ - from Bio.SeqIO.QualityIO import PairedFastaQualIterator - - if not qualfile: - qualfile = make_qual(fastafile, score=defaultqual) - - rec_iter = PairedFastaQualIterator(open(fastafile), open(qualfile)) - for rec in rec_iter: - yield rec if not modify else modify_qual(rec) - - -def write_fasta_qual(rec, fastahandle, qualhandle): - if fastahandle: - SeqIO.write([rec], fastahandle, "fasta") - if qualhandle: - SeqIO.write([rec], qualhandle, "qual") - - -def trim(args): - """ - %prog trim fasta.screen newfasta - - take the screen output from `cross_match` (against a vector db, for - example), then trim the sequences to remove X's. Will also perform quality - trim if fasta.screen.qual is found. The trimming algorithm is based on - finding the subarray that maximize the sum - """ - - from jcvi.algorithms.maxsum import max_sum - - p = OptionParser(trim.__doc__) - p.add_argument( - "-c", - dest="min_length", - type=int, - default=64, - help="minimum sequence length after trimming", - ) - p.add_argument("-s", dest="score", default=QUAL, help="quality trimming cutoff") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(p.print_help()) - - fastafile, newfastafile = args - qualfile = get_qual(fastafile) - newqualfile = get_qual(newfastafile, check=False) - - logger.debug( - "Trim bad sequence from fasta file `%s` to `%s`" % (fastafile, newfastafile) - ) - - fw = must_open(newfastafile, "w") - fw_qual = open(newqualfile, "w") - - dropped = trimmed = 0 - - for rec in iter_fasta_qual(fastafile, qualfile, modify=True): - qv = [x - opts.score for x in rec.letter_annotations["phred_quality"]] - msum, trim_start, trim_end = max_sum(qv) - score = trim_end - trim_start + 1 - - if score < opts.min_length: - dropped += 1 - continue - - if score < len(rec): - trimmed += 1 - rec = rec[trim_start : trim_end + 1] - - write_fasta_qual(rec, fw, fw_qual) - - print("A total of %d sequences modified." % trimmed, file=sys.stderr) - print( - "A total of %d sequences dropped (length < %d)." % (dropped, opts.min_length), - file=sys.stderr, - ) - - fw.close() - fw_qual.close() - - -def sequin(args): - """ - %prog sequin inputfasta - - Generate a gapped fasta format with known gap sizes embedded. suitable for - Sequin submission. - - A gapped sequence represents a newer method for describing non-contiguous - sequences, but only requires a single sequence identifier. A gap is - represented by a line that starts with >? and is immediately followed by - either a length (for gaps of known length) or "unk100" for gaps of unknown - length. For example, ">?200". The next sequence segment continues on the - next line, with no separate definition line or identifier. The difference - between a gapped sequence and a segmented sequence is that the gapped - sequence uses a single identifier and can specify known length gaps. - Gapped sequences are preferred over segmented sequences. A sample gapped - sequence file is shown here: - - >m_gagei [organism=Mansonia gagei] Mansonia gagei NADH dehydrogenase ... - ATGGAGCATACATATCAATATTCATGGATCATACCGTTTGTGCCACTTCCAATTCCTATTTTAATAGGAA - TTGGACTCCTACTTTTTCCGACGGCAACAAAAAATCTTCGTCGTATGTGGGCTCTTCCCAATATTTTATT - >?200 - GGTATAATAACAGTATTATTAGGGGCTACTTTAGCTCTTGC - TCAAAAAGATATTAAGAGGGGTTTAGCCTATTCTACAATGTCCCAACTGGGTTATATGATGTTAGCTCTA - >?unk100 - TCAATAAAACTATGGGGTAAAGAAGAACAAAAAATAATTAACAGAAATTTTCGTTTATCTCCTTTATTAA - TATTAACGATGAATAATAATGAGAAGCCATATAGAATTGGTGATAATGTAAAAAAAGGGGCTCTTATTAC - """ - p = OptionParser(sequin.__doc__) - p.add_argument("--unk", default=100, type=int, help="The size for unknown gaps") - p.add_argument("--newid", default=None, help="Use this identifier instead") - p.add_argument( - "--chromosome", default=None, help="Add [chromosome= ] to FASTA header" - ) - p.add_argument("--clone", default=None, help="Add [clone= ] to FASTA header") - p.set_mingap(default=100) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (inputfasta,) = args - unk = opts.unk - - outputfasta = inputfasta.rsplit(".", 1)[0] + ".split" - rec = next(SeqIO.parse(must_open(inputfasta), "fasta")) - seq = "" - unknowns, knowns = 0, 0 - for gap, gap_group in groupby(rec.seq, lambda x: x.upper() == "N"): - subseq = "".join(gap_group) - if gap: - gap_length = len(subseq) - if gap_length == unk: - subseq = "\n>?unk{0}\n".format(unk) - unknowns += 1 - elif gap_length >= opts.mingap: - subseq = "\n>?{0}\n".format(gap_length) - knowns += 1 - seq += subseq - - fw = must_open(outputfasta, "w") - id = opts.newid or rec.id - fastaheader = ">{0}".format(id) - if opts.chromosome: - fastaheader += " [chromosome={0}]".format(opts.chromosome) - if opts.clone: - fastaheader += " [clone={0}]".format(opts.clone) - - print(fastaheader, file=fw) - print(seq, file=fw) - fw.close() - logger.debug( - "Sequin FASTA written to `{0}` (gaps: {1} unknowns, {2} knowns).".format( - outputfasta, unknowns, knowns - ) - ) - - return outputfasta, unknowns + knowns - - -def remove_small_components(rec, minlen): - newseq = [] - removed = 0 - for gap, seq in groupby(rec.seq, lambda x: x.upper() == "N"): - seq = "".join(seq) - seqlen = len(seq) - if not gap and seqlen < minlen: - seq = seqlen * "N" # Mask small components - logger.debug("Discard component ({0}) in {1}".format(seqlen, rec.name)) - removed += seqlen - newseq.append(seq) - rec.seq = Seq("".join(newseq)) - return removed - - -def trim_terminal_Ns(rec): - rec.seq = rec.seq.strip("N") - - -def normalize_gaps(rec, gapsize): - newseq = [] - normalized = 0 - NN = gapsize * "N" - for gap, seq in groupby(rec.seq, lambda x: x.upper() == "N"): - seq = "".join(seq) - if gap: - seq = NN - normalized += 1 - newseq.append(seq) - rec.seq = Seq("".join(newseq)) - return normalized - - -def tidy(args): - """ - %prog tidy fastafile - - Trim terminal Ns, normalize gap sizes and remove small components. - """ - p = OptionParser(tidy.__doc__) - p.add_argument( - "--gapsize", - dest="gapsize", - default=0, - type=int, - help="Set all gaps to the same size", - ) - p.add_argument( - "--minlen", - dest="minlen", - default=100, - type=int, - help="Minimum component size", - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - gapsize = opts.gapsize - minlen = opts.minlen - - tidyfastafile = fastafile.rsplit(".", 1)[0] + ".tidy.fasta" - fw = must_open(tidyfastafile, "w") - - removed = normalized = 0 - fasta = Fasta(fastafile, lazy=True) - for name, rec in fasta.iteritems_ordered(): - rec.seq = rec.seq.upper() - if minlen: - removed += remove_small_components(rec, minlen) - trim_terminal_Ns(rec) - if gapsize: - normalized += normalize_gaps(rec, gapsize) - - if len(rec) == 0: - logger.debug("Drop seq {0}".format(rec.id)) - continue - SeqIO.write([rec], fw, "fasta") - - # Print statistics - if removed: - logger.debug("Total discarded bases: {0}".format(removed)) - if normalized: - logger.debug("Gaps normalized: {0}".format(normalized)) - - logger.debug("Tidy FASTA written to `{0}`.".format(tidyfastafile)) - fw.close() - - return tidyfastafile - - -def write_gaps_worker(rec): - start = 0 - seq = rec.seq.upper() - output = [] - for gap, seq in groupby(seq, lambda x: x == "N"): - seq = "".join(seq) - current_length = len(seq) - object_beg = start + 1 - object_end = start + current_length - if gap: - s = "\t".join(str(x) for x in (rec.id, object_beg - 1, object_end)) - output.append(s) - start += current_length - - return "\n".join(output) - - -def write_gaps_bed(inputfasta, prefix, mingap, cpus): - from jcvi.apps.grid import WriteJobs - from jcvi.formats.bed import sort - - bedfile = prefix + ".gaps.bed" - f = Fasta(inputfasta) - recs = list(rec for k, rec in f.iteritems()) - pool = WriteJobs(write_gaps_worker, recs, bedfile, cpus=cpus) - pool.run() - - sort([bedfile, "-i"]) - - bed = Bed(bedfile) - nbedfile = prefix + ".{0}N.bed".format(mingap) - - gapnum = 0 - fw = open(nbedfile, "w") - for b in bed: - if b.span < mingap: - continue - gapnum += 1 - gapname = "gap.{0:05d}".format(gapnum) - print("\t".join(str(x) for x in (b, gapname, b.span)), file=fw) - - shutil.move(nbedfile, bedfile) - logger.debug("Write gap (>={0}bp) locations to `{1}`.".format(mingap, bedfile)) - - -def gaps(args): - """ - %prog gaps fastafile - - Print out a list of gaps in BED format (.gaps.bed). - """ - from jcvi.formats.sizes import agp - from jcvi.formats.agp import mask, build - - p = OptionParser(gaps.__doc__) - p.add_argument( - "--split", default=False, action="store_true", help="Generate .split.fasta" - ) - p.set_mingap(default=100) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (inputfasta,) = args - mingap = opts.mingap - split = opts.split - prefix = inputfasta.rsplit(".", 1)[0] - bedfile = prefix + ".gaps.bed" - - if need_update(inputfasta, bedfile): - write_gaps_bed(inputfasta, prefix, mingap, opts.cpus) - - if split: - splitfile = prefix + ".split.fasta" - oagpfile = prefix + ".splitobject.agp" - cagpfile = prefix + ".splitcomponent.agp" - - if need_update((inputfasta, bedfile), splitfile): - - sizesagpfile = agp([inputfasta]) - - maskedagpfile = mask([sizesagpfile, bedfile, "--splitobject"]) - shutil.move(maskedagpfile, oagpfile) - logger.debug("AGP file written to `{0}`.".format(oagpfile)) - - maskedagpfile = mask([sizesagpfile, bedfile, "--splitcomponent"]) - shutil.move(maskedagpfile, cagpfile) - logger.debug("AGP file written to `{0}`.".format(cagpfile)) - - build([oagpfile, inputfasta, splitfile]) - cleanup(sizesagpfile) - - return splitfile, oagpfile, cagpfile - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/fastq.py b/jcvi/formats/fastq.py deleted file mode 100644 index e076fa83..00000000 --- a/jcvi/formats/fastq.py +++ /dev/null @@ -1,1104 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Processing fastq files -""" -import os.path as op -import sys -import re -import json - -from itertools import islice - -from Bio import SeqIO -from Bio.SeqIO.QualityIO import FastqGeneralIterator - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - logger, - mkdir, - need_update, - sh, - which, -) -from ..utils.cbook import percentage - -from .base import DictFile -from .fasta import must_open, rc - - -qual_offset = lambda x: 33 if x == "sanger" else 64 -allowed_dialect_conversions = { - ">=1.8": "<1.8", - "sra": "<1.8", -} - - -class FastqLite(object): - def __init__(self, name, seq, qual): - self.name = name - self.seq = seq - self.qual = qual - - def __str__(self): - return "\n".join((self.name, self.seq, "+", self.qual)) - - def rc(self): - self.seq = rc(self.seq) - self.qual = self.qual[::-1] - - -class FastqRecord(object): - def __init__(self, fh, offset=0, key=None): - self.name = self.header = fh.readline() - if not self.name: - return - self.name = self.name.split()[0] - self.seq = fh.readline().rstrip() - self.l3 = fh.readline().rstrip() - self.qual = fh.readline().rstrip() - if offset != 0: - self.qual = "".join(chr(ord(x) + offset) for x in self.qual) - self.length = len(self.seq) - assert self.length == len( - self.qual - ), "length mismatch: seq(%s) and qual(%s)" % (self.seq, self.qual) - if key: - self.name = key(self.name) - - def __str__(self): - return "\n".join((self.name, self.seq, "+", self.qual)) - - def __len__(self): - return self.length - - @property - def quality(self): - return [ord(x) for x in self.qual] - - -class FastqHeader(object): - def __init__(self, row): - header = row.strip().split(" ") - self.readId, self.readLen, self.readNum = None, None, None - self.multiplexId = 0 - self.paired = False - if len(header) == 3 and "length" in header[2]: - self.dialect = "sra" - self.readId = header[0].lstrip("@") - m = re.search(r"length=(\d+)", header[2]) - if m: - self.readLen = m.group(1) - h = header[1].split(":") - - self.instrument = h[0] - if len(h) == 7: - self.runId, self.flowcellId = int(h[1]), h[2] - self.laneNum, self.tileNum = int(h[3]), int(h[4]) - self.xPos, self.yPos = h[5], h[6] - else: - self.runId, self.flowcellId = None, None - self.laneNum, self.tileNum = int(h[1]), int(h[2]) - self.xPos, self.yPos = h[3], h[4] - else: - h = header[0].split(":") - self.instrument = h[0].lstrip("@") - if len(header) == 2 and header[1].find(":"): - self.dialect = ">=1.8" # Illumina Casava 1.8+ format - - self.runId = int(h[1]) - self.flowcellId = h[2] - self.laneNum = int(h[3]) - self.tileNum = int(h[4]) - self.xPos = int(h[5]) - self.yPos = h[6] - if re.search("/", self.yPos): - self.paired = True - self.yPos, self.readNum = self.yPos.split("/") - - a = header[1].split(":") - self.readNum = int(a[0]) - self.isFiltered = a[1] - self.controlNum = int(a[2]) - self.barcode = a[3] - else: - self.dialect = "<1.8" # Old Illumina Casava format (< 1.8) - self.laneNum = int(h[1]) - self.tileNum = int(h[2]) - self.xPos = int(h[3]) - self.yPos = h[4] - m = re.search(r"(\d+)(#\S+)/(\d+)", self.yPos) - if m: - self.paired = True - self.yPos, self.multiplexId, self.readNum = ( - m.group(1), - m.group(2), - m.group(3), - ) - - def __str__(self): - if self.dialect == "sra": - h0 = self.readId - if self.readNum: - h0 += "/{0}".format(self.readNum) - - h1elems = [ - self.instrument, - self.laneNum, - self.tileNum, - self.xPos, - self.yPos, - ] - if self.runId and self.flowcellId: - h1elems[1:1] = [self.runId, self.flowcellId] - h1 = ":".join(str(x) for x in h1elems) - h2 = "length={0}".format(self.readLen) - - return "@{0} {1} {2}".format(h0, h1, h2) - elif self.dialect == ">=1.8": - yPos = ( - "{0}/{1}".format(self.yPos, self.readNum) if self.paired else self.yPos - ) - - h0 = ":".join( - str(x) - for x in ( - self.instrument, - self.runId, - self.flowcellId, - self.laneNum, - self.tileNum, - self.xPos, - yPos, - ) - ) - h1 = ":".join( - str(x) - for x in (self.readNum, self.isFiltered, self.controlNum, self.barcode) - ) - - return "@{0} {1}".format(h0, h1) - else: - yPos = ( - "{0}#{1}/{2}".format(self.yPos, self.multiplexId, self.readNum) - if self.paired - else self.yPos - ) - h0 = ":".join( - str(x) - for x in (self.instrument, self.laneNum, self.tileNum, self.xPos, yPos) - ) - - return "@{0}".format(h0) - - def format_header(self, dialect=None, tag=None): - if dialect: - if self.dialect == dialect: - logger.error("Input and output dialect are the same") - elif dialect not in allowed_dialect_conversions[self.dialect]: - logger.error( - "Error: Cannot convert from `{0}` to `{1}` dialect".format( - self.dialect, dialect - ) - ) - logger.error( - "Allowed conversions: {0}".format( - json.dumps(allowed_dialect_conversions, indent=4) - ) - ) - sys.exit() - else: - self.dialect = dialect - - if tag: - readNum = tag.split("/")[1] - self.readNum = readNum - self.paired = True - - return str(self) - - -def pairspf(pp, commonprefix=True): - if commonprefix: - pf = op.commonprefix(pp).rstrip("._-") - else: - pf = min(pp) - pf = op.basename(pf) - if not pf.strip(): - pf = op.basename(pp[0]) - return pf - - -def iter_fastq(filename, offset=0, key=None): - if isinstance(filename, str): - logger.debug("Read file `{0}`".format(filename)) - fh = must_open(filename) - else: - fh = filename - - while True: - rec = FastqRecord(fh, offset=offset, key=key) - if not rec.name: - break - yield rec - yield None # sentinel - - -def main(): - - actions = ( - ("size", "total base pairs in the fastq files"), - ("shuffle", "shuffle paired reads into the same file interleaved"), - ("split", "split paired reads into two files"), - ("splitread", "split appended reads (from JGI)"), - ("catread", "cat pairs together (reverse of splitread)"), - ("pairinplace", "collect pairs by checking adjacent ids"), - ("convert", "convert between illumina and sanger offset"), - ("first", "get first N reads from file"), - ("filter", "filter to get high qv reads"), - ("suffix", "filter reads based on suffix"), - ("trim", "trim reads using fastx_trimmer"), - ("some", "select a subset of fastq reads"), - ("guessoffset", "guess the quality offset of the fastq records"), - ("readlen", "calculate read length"), - ( - "format", - "format fastq file, convert header from casava 1.8+ to older format", - ), - ("fasta", "convert fastq to fasta and qual file"), - ("fromsra", "convert sra to fastq using `fastq-dump`"), - ("uniq", "retain only first instance of duplicate (by name) reads"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def uniq(args): - """ - %prog uniq fastqfile - - Retain only first instance of duplicate reads. Duplicate is defined as - having the same read name. - """ - p = OptionParser(uniq.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastqfile,) = args - fw = must_open(opts.outfile, "w") - nduplicates = nreads = 0 - seen = set() - for rec in iter_fastq(fastqfile): - nreads += 1 - if rec is None: - break - name = rec.name - if name in seen: - nduplicates += 1 - continue - seen.add(name) - print(rec, file=fw) - logger.debug("Removed duplicate reads: {}".format(percentage(nduplicates, nreads))) - - -def suffix(args): - """ - %prog suffix fastqfile CAG - - Filter reads based on suffix. - """ - p = OptionParser(suffix.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastqfile, sf = args - fw = must_open(opts.outfile, "w") - nreads = nselected = 0 - for rec in iter_fastq(fastqfile): - nreads += 1 - if rec is None: - break - if rec.seq.endswith(sf): - print(rec, file=fw) - nselected += 1 - logger.debug( - "Selected reads with suffix {0}: {1}".format(sf, percentage(nselected, nreads)) - ) - - -def calc_readlen(f, first): - from jcvi.utils.cbook import SummaryStats - - L = [] - ai = iter_fastq(f) - rec = next(ai) - while rec: - L.append(rec.length) - if len(L) > first: - break - rec = next(ai) - s = SummaryStats(L) - - return s - - -def is_fastq(f): - fq = f.replace(".gz", "") if f.endswith(".gz") else f - return fq.endswith((".fastq", ".fq")) - - -def readlen(args): - """ - %prog readlen fastqfile - - Calculate read length, will only try the first N reads. Output min, max, and - avg for each file. - """ - p = OptionParser(readlen.__doc__) - p.set_firstN() - p.add_argument( - "--silent", - default=False, - action="store_true", - help="Do not print read length stats", - ) - p.add_argument( - "--nocheck", - default=False, - action="store_true", - help="Do not check file type suffix", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (f,) = args - if (not opts.nocheck) and (not is_fastq(f)): - logger.debug("File `{}` does not endswith .fastq or .fq".format(f)) - return 0 - - s = calc_readlen(f, opts.firstN) - if not opts.silent: - print("\t".join(str(x) for x in (f, s.min, s.max, s.mean, s.median))) - - return int(s.max) - - -def fasta(args): - """ - %prog fasta fastqfiles - - Convert fastq to fasta and qual file. - """ - p = OptionParser(fasta.__doc__) - p.add_argument( - "--seqtk", default=False, action="store_true", help="Use seqtk to convert" - ) - p.set_outdir() - p.set_outfile(outfile=None) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - fastqfiles = args - outdir = opts.outdir - if outdir and outdir != ".": - mkdir(outdir) - - fastqfile = fastqfiles[0] - pf = op.basename(fastqfile) - gzinput = pf.endswith(".gz") - if gzinput: - pf = pf.rsplit(".", 1)[0] - - pf, sf = pf.rsplit(".", 1) - if sf not in ("fq", "fastq"): - logger.debug("Assumed FASTA: suffix not `fq` or `fastq`") - return fastqfile, None - - fastafile, qualfile = pf + ".fasta", pf + ".qual" - outfile = opts.outfile or fastafile - outfile = op.join(outdir, outfile) - if opts.seqtk: - if need_update(fastqfiles, outfile): - for i, fastqfile in enumerate(fastqfiles): - cmd = "seqtk seq -A {0} -L 30 -l 70".format(fastqfile) - # First one creates file, following ones append to it - sh(cmd, outfile=outfile, append=i) - else: - logger.debug("Outfile `{0}` already exists.".format(outfile)) - return outfile, None - - for fastqfile in fastqfiles: - SeqIO.convert(fastqfile, "fastq", fastafile, "fasta") - SeqIO.convert(fastqfile, "fastq", qualfile, "qual") - - return fastafile, qualfile - - -def first(args): - """ - %prog first N fastqfile(s) - - Get first N reads from file. - """ - from jcvi.apps.base import need_update - - p = OptionParser(first.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - N = int(args[0]) - nlines = N * 4 - fastqfiles = args[1:] - fastqfile = fastqfiles[0] - outfile = opts.outfile - if not need_update(fastqfiles, outfile): - logger.debug("File `{0}` exists. Will not overwrite.".format(outfile)) - return - - gz = fastqfile.endswith(".gz") - for fastqfile in fastqfiles: - if gz: - cmd = "zcat {0} | head -n {1}".format(fastqfile, nlines) - else: - cmd = "head -n {0} {1}".format(nlines, fastqfile) - - sh(cmd, outfile=opts.outfile, append=True) - - -def FastqPairedIterator(read1, read2): - if read1 == read2: - p1fp = p2fp = must_open(read1) - else: - p1fp = must_open(read1) - p2fp = must_open(read2) - - return p1fp, p2fp - - -def isHighQv(qs, qvchar, pct=90): - cutoff = len(qs) * pct / 100 - highs = sum(1 for x in qs if x >= qvchar) - return highs >= cutoff - - -def filter(args): - """ - %prog filter paired.fastq - - Filter to get high qv reads. Use interleaved format (one file) or paired - format (two files) to filter on paired reads. - """ - p = OptionParser(filter.__doc__) - p.add_argument( - "-q", - dest="qv", - default=20, - type=int, - help="Minimum quality score to keep", - ) - p.add_argument( - "-p", - dest="pct", - default=95, - type=int, - help="Minimum percent of bases that have [-q] quality", - ) - - opts, args = p.parse_args(args) - - if len(args) not in (1, 2): - sys.exit(not p.print_help()) - - if len(args) == 1: - r1 = r2 = args[0] - else: - r1, r2 = args - - qv = opts.qv - pct = opts.pct - - offset = guessoffset([r1]) - qvchar = chr(offset + qv) - logger.debug("Call base qv >= {0} as good.".format(qvchar)) - outfile = r1.rsplit(".", 1)[0] + ".q{0}.paired.fastq".format(qv) - fw = open(outfile, "w") - - p1fp, p2fp = FastqPairedIterator(r1, r2) - while True: - a = list(islice(p1fp, 4)) - if not a: - break - - b = list(islice(p2fp, 4)) - q1 = a[-1].rstrip() - q2 = b[-1].rstrip() - - if isHighQv(q1, qvchar, pct=pct) and isHighQv(q2, qvchar, pct=pct): - fw.writelines(a) - fw.writelines(b) - - -def checkShuffleSizes(p1, p2, pairsfastq, extra=0): - from jcvi.apps.base import getfilesize - - pairssize = getfilesize(pairsfastq) - p1size = getfilesize(p1) - p2size = getfilesize(p2) - assert ( - pairssize == p1size + p2size + extra - ), "The sizes do not add up: {0} + {1} + {2} != {3}".format( - p1size, p2size, extra, pairssize - ) - - -def shuffle(args): - """ - %prog shuffle p1.fastq p2.fastq - - Shuffle pairs into interleaved format. - """ - p = OptionParser(shuffle.__doc__) - p.set_tag() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - p1, p2 = args - pairsfastq = pairspf((p1, p2)) + ".fastq" - tag = opts.tag - - p1fp = must_open(p1) - p2fp = must_open(p2) - pairsfw = must_open(pairsfastq, "w") - nreads = 0 - while True: - a = list(islice(p1fp, 4)) - if not a: - break - - b = list(islice(p2fp, 4)) - if tag: - name = a[0].rstrip() - a[0] = name + "/1\n" - b[0] = name + "/2\n" - - pairsfw.writelines(a) - pairsfw.writelines(b) - nreads += 2 - - pairsfw.close() - extra = nreads * 2 if tag else 0 - checkShuffleSizes(p1, p2, pairsfastq, extra=extra) - - logger.debug( - "File `{0}` verified after writing {1} reads.".format(pairsfastq, nreads) - ) - return pairsfastq - - -def split(args): - """ - %prog split pairs.fastq - - Split shuffled pairs into `.1.fastq` and `.2.fastq`, using `sed`. Can work - on gzipped file. - - - """ - from jcvi.apps.grid import Jobs - - p = OptionParser(split.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (pairsfastq,) = args - gz = pairsfastq.endswith(".gz") - pf = pairsfastq.replace(".gz", "").rsplit(".", 1)[0] - p1 = pf + ".1.fastq" - p2 = pf + ".2.fastq" - - cmd = "zcat" if gz else "cat" - p1cmd = cmd + " {0} | sed -ne '1~8{{N;N;N;p}}'".format(pairsfastq) - p2cmd = cmd + " {0} | sed -ne '5~8{{N;N;N;p}}'".format(pairsfastq) - - if gz: - p1cmd += " | gzip" - p2cmd += " | gzip" - p1 += ".gz" - p2 += ".gz" - - p1cmd += " > " + p1 - p2cmd += " > " + p2 - - args = [(p1cmd,), (p2cmd,)] - m = Jobs(target=sh, args=args) - m.run() - - checkShuffleSizes(p1, p2, pairsfastq) - - -def guessoffset(args): - r""" - %prog guessoffset fastqfile - - Guess the quality offset of the fastqfile, whether 33 or 64. - See encoding schemes: - - SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS............................... - ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII - .................................JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ - LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL............................... - !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh - | | | | | - 33 59 64 73 104 - - S - Sanger Phred+33, raw reads typically (0, 40) - X - Solexa Solexa+64, raw reads typically (-5, 40) - I - Illumina 1.3+ Phred+64, raw reads typically (0, 40) - J - Illumina 1.5+ Phred+64, raw reads typically (3, 40) - L - Illumina 1.8+ Phred+33, raw reads typically (0, 40) - with 0=unused, 1=unused, 2=Read Segment Quality Control Indicator (bold) - """ - p = OptionParser(guessoffset.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastqfile,) = args - ai = iter_fastq(fastqfile) - rec = next(ai) - offset = 64 - while rec: - quality = rec.quality - lowcounts = len([x for x in quality if x < 59]) - highcounts = len([x for x in quality if x > 74]) - diff = highcounts - lowcounts - if diff > 10: - break - elif diff < -10: - offset = 33 - break - rec = next(ai) - - if offset == 33: - print("Sanger encoding (offset=33)", file=sys.stderr) - elif offset == 64: - print("Illumina encoding (offset=64)", file=sys.stderr) - - return offset - - -def format(args): - """ - %prog format fastqfile - - Format FASTQ file. Currently provides option to convert FASTQ header from - one dialect to another. - """ - p = OptionParser(format.__doc__) - - p.add_argument( - "--convert", - default=None, - choices=[">=1.8", "<1.8", "sra"], - help="Convert fastq header to a different format", - ) - p.set_tag(specify_tag=True) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastqfile,) = args - ai = iter_fastq(fastqfile) - rec = next(ai) - dialect = None - while rec: - h = FastqHeader(rec.header) - if not dialect: - dialect = h.dialect - logger.debug("Input fastq dialect: `{0}`".format(dialect)) - if opts.convert: - logger.debug("Output fastq dialect: `{0}`".format(opts.convert)) - - rec.name = h.format_header(dialect=opts.convert, tag=opts.tag) - - print(rec) - rec = next(ai) - - -def some(args): - """ - %prog some idsfile afastq [bfastq] - - Select a subset of the reads with ids present in the idsfile. - `bfastq` is optional (only if reads are paired) - """ - p = OptionParser(some.__doc__) - opts, args = p.parse_args(args) - - if len(args) not in (2, 3): - sys.exit(not p.print_help()) - - ( - idsfile, - afastq, - ) = args[:2] - bfastq = args[2] if len(args) == 3 else None - - ids = DictFile(idsfile, valuepos=None) - - ai = iter_fastq(open(afastq)) - arec = next(ai) - if bfastq: - bi = iter_fastq(open(bfastq)) - brec = next(bi) - - while arec: - if arec.name[1:] in ids: - print(arec) - if bfastq: - print(brec) - - arec = next(ai) - if bfastq: - brec = next(bi) - - -def trim(args): - """ - %prog trim fastqfile - - Wraps `fastx_trimmer` to trim from begin or end of reads. - """ - p = OptionParser(trim.__doc__) - p.add_argument( - "-f", - dest="first", - default=0, - type=int, - help="First base to keep. Default is 1.", - ) - p.add_argument( - "-l", - dest="last", - default=0, - type=int, - help="Last base to keep. Default is entire read.", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastqfile,) = args - obfastqfile = op.basename(fastqfile) - fq = obfastqfile.rsplit(".", 1)[0] + ".ntrimmed.fastq" - if fastqfile.endswith(".gz"): - fq = obfastqfile.rsplit(".", 2)[0] + ".ntrimmed.fastq.gz" - - cmd = "fastx_trimmer -Q33 " - if opts.first: - cmd += "-f {0.first} ".format(opts) - if opts.last: - cmd += "-l {0.last} ".format(opts) - - sh(cmd, infile=fastqfile, outfile=fq) - - -def catread(args): - """ - %prog catread fastqfile1 fastqfile2 - - Concatenate paired end reads into one. Useful for example to do single-end - mapping and perform filtering on the whole read pair level. - """ - p = OptionParser(catread.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - r1, r2 = args - p1fp, p2fp = FastqPairedIterator(r1, r2) - outfile = pairspf((r1, r2)) + ".cat.fastq" - fw = must_open(outfile, "w") - while True: - a = list(islice(p1fp, 4)) - if not a: - break - atitle, aseq, _, aqual = a - btitle, bseq, _, bqual = list(islice(p2fp, 4)) - print( - "\n".join( - ( - atitle.strip(), - aseq.strip() + bseq.strip(), - "+", - aqual.strip() + bqual.strip(), - ) - ), - file=fw, - ) - - -def splitread(args): - """ - %prog splitread fastqfile - - Split fastqfile into two read fastqfiles, cut in the middle. - """ - p = OptionParser(splitread.__doc__) - p.add_argument( - "-n", - dest="n", - default=76, - type=int, - help="Split at N-th base position", - ) - p.add_argument( - "--rc", - default=False, - action="store_true", - help="Reverse complement second read", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (pairsfastq,) = args - - base = op.basename(pairsfastq).split(".")[0] - fq1 = base + ".1.fastq" - fq2 = base + ".2.fastq" - fw1 = must_open(fq1, "w") - fw2 = must_open(fq2, "w") - - fp = must_open(pairsfastq) - n = opts.n - minsize = n * 8 / 5 - - for name, seq, qual in FastqGeneralIterator(fp): - if len(seq) < minsize: - logger.error("Skipping read {0}, length={1}".format(name, len(seq))) - continue - - name = "@" + name - rec1 = FastqLite(name, seq[:n], qual[:n]) - rec2 = FastqLite(name, seq[n:], qual[n:]) - if opts.rc: - rec2.rc() - - print(rec1, file=fw1) - print(rec2, file=fw2) - - logger.debug("Reads split into `{0},{1}`".format(fq1, fq2)) - fw1.close() - fw2.close() - - -def size(args): - """ - %prog size fastqfile - - Find the total base pairs in a list of fastq files - """ - p = OptionParser(size.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - total_size = total_numrecords = 0 - for f in args: - cur_size = cur_numrecords = 0 - for rec in iter_fastq(f): - if not rec: - break - cur_numrecords += 1 - cur_size += len(rec) - - print(" ".join(str(x) for x in (op.basename(f), cur_numrecords, cur_size))) - total_numrecords += cur_numrecords - total_size += cur_size - - if len(args) > 1: - print(" ".join(str(x) for x in ("Total", total_numrecords, total_size))) - - -def convert(args): - """ - %prog convert in.fastq - - illumina fastq quality encoding uses offset 64, and sanger uses 33. This - script creates a new file with the correct encoding. Output gzipped file if - input is also gzipped. - """ - p = OptionParser(convert.__doc__) - p.set_phred() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (infastq,) = args - phred = opts.phred or str(guessoffset([infastq])) - ophred = {"64": "33", "33": "64"}[phred] - - gz = infastq.endswith(".gz") - outfastq = infastq.rsplit(".", 1)[0] if gz else infastq - pf, sf = outfastq.rsplit(".", 1) - outfastq = "{0}.q{1}.{2}".format(pf, ophred, sf) - if gz: - outfastq += ".gz" - - fin = "illumina" if phred == "64" else "sanger" - fout = "sanger" if phred == "64" else "illumina" - - seqret = "seqret" - if infastq.endswith(".gz"): - cmd = "zcat {0} | ".format(infastq) - cmd += seqret + " fastq-{0}::stdin fastq-{1}::stdout".format(fin, fout) - else: - cmd = seqret + " fastq-{0}::{1} fastq-{2}::stdout".format(fin, infastq, fout) - - sh(cmd, outfile=outfastq) - - return outfastq - - -def pairinplace(args): - """ - %prog pairinplace bulk.fastq - - Pair up the records in bulk.fastq by comparing the names for adjancent - records. If they match, print to bulk.pairs.fastq, else print to - bulk.frags.fastq. - """ - from more_itertools import pairwise - - p = OptionParser(pairinplace.__doc__) - p.set_rclip() - p.set_tag() - p.add_argument("--base", help="Base name for the output files") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastqfile,) = args - base = opts.base or op.basename(fastqfile).split(".")[0] - - frags = base + ".frags.fastq" - pairs = base + ".pairs.fastq" - if fastqfile.endswith(".gz"): - frags += ".gz" - pairs += ".gz" - - fragsfw = must_open(frags, "w") - pairsfw = must_open(pairs, "w") - - N = opts.rclip - tag = opts.tag - strip_name = (lambda x: x[:-N]) if N else None - - fh_iter = iter_fastq(fastqfile, key=strip_name) - skipflag = False # controls the iterator skip - for a, b in pairwise(fh_iter): - if b is None: # hit the eof - break - - if skipflag: - skipflag = False - continue - - if a.name == b.name: - if tag: - a.name += "/1" - b.name += "/2" - print(a, file=pairsfw) - print(b, file=pairsfw) - skipflag = True - else: - print(a, file=fragsfw) - - # don't forget the last one, when b is None - if not skipflag: - print(a, file=fragsfw) - - logger.debug("Reads paired into `%s` and `%s`" % (pairs, frags)) - return pairs - - -def fromsra(args): - """ - %prog fromsra srafile - - Convert sra file to fastq using the sratoolkit `fastq-dump` - """ - p = OptionParser(fromsra.__doc__) - p.add_argument( - "--paired", - default=False, - action="store_true", - help="Specify if library layout is paired-end", - ) - p.add_argument( - "--compress", - default=None, - choices=["gzip", "bzip2"], - help="Compress output fastq files", - ) - p.set_outdir() - p.set_grid() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (srafile,) = args - paired = opts.paired - compress = opts.compress - outdir = opts.outdir - - script_path = which("fastq-dump") - if not script_path: - logger.error("Cannot find `fastq-dump` in the PATH") - sys.exit() - - cmd = [script_path] - if compress: - cmd.append("--{0}".format(compress)) - if paired: - cmd.append("--split-files") - if outdir: - cmd.append("--outdir {0}".format(outdir)) - cmd.append(srafile) - - outcmd = " ".join(cmd) - sh(outcmd, grid=opts.grid) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/genbank.py b/jcvi/formats/genbank.py deleted file mode 100644 index 28a85e22..00000000 --- a/jcvi/formats/genbank.py +++ /dev/null @@ -1,522 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Genbank record operations based on biopython Bio.SeqIO -https://github.com/biopython/biopython/blob/master/Bio/SeqIO/InsdcIO.py -""" -import os.path as op -import sys - -from collections import defaultdict - -from Bio import SeqIO - -from ..apps.fetch import entrez -from ..apps.base import ActionDispatcher, OptionParser, cleanup, glob, logger, mkdir, sh - -from .base import BaseFile, get_number, must_open -from .gff import GffLine - - -MT = "mol_type" -LT = "locus_tag" - - -class MultiGenBank(BaseFile): - """ - Wrapper for parsing concatenated GenBank records. - """ - - def __init__(self, filename, source="JCVI"): - super().__init__(filename) - assert op.exists(filename) - - pf = filename.rsplit(".", 1)[0] - fastafile, gfffile = pf + ".fasta", pf + ".gff" - fasta_fw = must_open(fastafile, "w") - gff_fw = must_open(gfffile, "w") - - self.source = source - self.counter = defaultdict(list) - - nrecs, nfeats = 0, 0 - for rec in SeqIO.parse(filename, "gb"): - seqid = rec.name - rec.id = seqid - SeqIO.write([rec], fasta_fw, "fasta") - rf = rec.features - for f in rf: - self.print_gffline(gff_fw, f, seqid) - nfeats += 1 - nrecs += 1 - - logger.debug( - "A total of {0} records written to `{1}`.".format(nrecs, fastafile) - ) - fasta_fw.close() - - logger.debug( - "A total of {0} features written to `{1}`.".format(nfeats, gfffile) - ) - gff_fw.close() - - def print_gffline(self, fw, f, seqid, parent=None): - - score = phase = "." - type = f.type - if type == "source": - type = "contig" - - attr = "ID=tmp" - source = self.source - - start = get_number(f.location.start) + 1 - end = get_number(f.location.end) - strand = "-" if f.strand < 0 else "+" - g = "\t".join( - str(x) - for x in (seqid, source, type, start, end, score, strand, phase, attr) - ) - g = GffLine(g) - - qual = f.qualifiers - if MT in qual: - id = seqid - elif LT in qual: - (id,) = qual[LT] - else: - qual[LT] = [self.current_id] - (id,) = qual[LT] - - id = id.split()[0] - - if parent: - (id,) = parent.qualifiers[LT] - id = id.split()[0] - - assert id != "tmp", f - oid = id - self.counter[(oid, type)].append((start, end)) - count = len(self.counter[(oid, type)]) - - if type in ("mRNA", "gene"): - if type == "gene" and count > 1: - return - self.start = min(a for a, b in self.counter[(id, type)]) - self.end = max(a for a, b in self.counter[(id, type)]) - self.set_attribute("gene", "Alias", qual, g) - self.set_attribute("product", "Note", qual, g) - else: - suffix = ".{0}.{1}".format(type.lower(), count) - id = id + suffix - g.attributes["Parent"] = [oid] - self.set_attribute("product", "Note", qual, g) - - g.attributes["ID"] = [id] - g.update_attributes() - print(g, file=fw) - - self.current_id = oid - - def set_attribute(self, gb_tag, gff_tag, qual, g): - if gb_tag in qual: - (tag,) = qual[gb_tag] - g.attributes[gff_tag] = [tag] - - -class GenBank(dict): - """ - Wrapper of the GenBank record object in biopython SeqIO. - """ - - def __init__(self, filenames=None, accessions=None, idfile=None): - super(GenBank, self).__init__() - self.accessions = accessions - self.idfile = idfile - - if filenames is not None: - self.accessions = [op.basename(f).split(".")[0] for f in filenames] - d = dict( - next(iter(SeqIO.to_dict(SeqIO.parse(f, "gb")).items())) - for f in filenames - ) - for k, v in d.items(): - self[k.split(".")[0]] = v - - elif idfile is not None: - gbdir = self._get_records() - d = dict( - next(iter(SeqIO.to_dict(SeqIO.parse(f, "gb")).items())) - for f in glob(gbdir + "/*.gb") - ) - for k, v in d.items(): - self[k.split(".")[0]] = v - - else: - sys.exit( - "GenBank object is initiated from either gb files or " "accession IDs." - ) - - def __getitem__(self, accession): - rec = self[accession] - return rec - - def __repr__(self): - recs = [] - for accession in self.keys(): - recs.append([accession, self.__getitem__(accession)]) - return recs - - def _get_records(self): - gbdir = "gb" - dirmade = mkdir(gbdir) - if not dirmade: - sh( - "rm -rf {0}_old; mv -f {0} {0}_old".format( - gbdir, - ) - ) - assert mkdir(gbdir) - - entrez( - [ - self.idfile, - "--format=gb", - "--database=nuccore", - "--outdir={0}".format(gbdir), - ] - ) - - logger.debug("GenBank records written to {0}.".format(gbdir)) - return gbdir - - @classmethod - def write_genes_bed(cls, gbrec, outfile): - seqid = gbrec.id.split(".")[0] - if not seqid: - seqid = gbrec.name.split(".")[0] - - genecount = 0 - consecutivecds = 0 - for feature in gbrec.features: - if feature.type == "gene": - genecount += 1 - consecutivecds = 0 - continue - - if feature.type == "CDS": - if consecutivecds: - genecount += 1 - consecutivecds = 1 - start = feature.location.start - stop = feature.location.end - if start > stop: - start, stop = stop, start - if feature.strand < 0: - strand = "-" - else: - strand = "+" - score = "." - accn = ( - feature.qualifiers[LT][0] - if LT in feature.qualifiers - else "{}_{}".format(seqid, genecount) - ) - - start = str(start).lstrip("><") - stop = str(stop).lstrip("><") - bedline = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format( - seqid, start, stop, accn, score, strand - ) - outfile.write(bedline) - - @classmethod - def write_genes_fasta(cls, gbrec, fwcds, fwpep): - seqid = gbrec.id.split(".")[0] - if not seqid: - seqid = gbrec.name.split(".")[0] - - genecount = 0 - consecutivecds = 0 - for feature in gbrec.features: - if feature.type == "gene": - genecount += 1 - consecutivecds = 0 - continue - - if feature.type == "CDS": - if consecutivecds: - genecount += 1 - consecutivecds = 1 - accn = ( - feature.qualifiers[LT][0] - if LT in feature.qualifiers - else "{}_{}".format(seqid, genecount) - ) - - seq = feature.extract(gbrec.seq) - - fwcds.write(">{0}\n{1}\n".format(accn, seq)) - fwpep.write(">{0}\n{1}\n".format(accn, seq.translate())) - - def write_genes(self, output="gbout", individual=False, pep=True): - if not individual: - fwbed = must_open(output + ".bed", "w") - fwcds = must_open(output + ".cds", "w") - fwpep = must_open(output + ".pep", "w") - - for recid, rec in self.items(): - if individual: - mkdir(output) - fwbed = must_open(op.join(output, recid + ".bed"), "w") - fwcds = must_open(op.join(output, recid + ".cds"), "w") - fwpep = must_open(op.join(output, recid + ".pep"), "w") - - GenBank.write_genes_bed(rec, fwbed) - GenBank.write_genes_fasta(rec, fwcds, fwpep) - - if not pep: - cleanup(fwpep.name) - - def write_fasta(self, output="gbfasta", individual=False): - if not individual: - fw = must_open(output + ".fasta", "w") - - for recid, rec in self.items(): - if individual: - mkdir(output) - fw = must_open(op.join(output, recid + ".fasta"), "w") - - seqid = rec.id.split(".")[0] - if not seqid: - seqid = rec.name.split(".")[0] - seq = rec.seq - fw.write(">{0}\n{1}\n".format(seqid, seq)) - - -def main(): - - actions = ( - ("tofasta", "generate fasta file for multiple gb records"), - ("getgenes", "extract protein coding genes from Genbank file"), - ("getquals", "extract qualifiers from Genbank file"), - ("gff", "convert Genbank file to GFF file"), - ) - - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def gff(args): - """ - %prog gff seq.gbk - - Convert Genbank file to GFF and FASTA file. - The Genbank file can contain multiple records. - """ - p = OptionParser(gff.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gbkfile,) = args - MultiGenBank(gbkfile) - - -def preparegb(p, args): - p.add_argument( - "--gb_dir", default=None, help="path to dir containing GanBank files (.gb)" - ) - p.add_argument( - "--id", - default=None, - help="GenBank accession IDs in a file. One ID per row, or all IDs" - " in one row comma separated.", - ) - p.add_argument( - "--simple", - default=None, - type=str, - help="GenBank accession IDs comma separated " - "(for lots of IDs please use --id instead).", - ) - p.add_argument( - "--individual", - default=False, - action="store_true", - help="parse gb accessions individually", - ) - opts, args = p.parse_args(args) - accessions = opts.id - filenames = opts.gb_dir - - if not (opts.gb_dir or opts.id or opts.simple): - sys.exit(not p.print_help()) - - if opts.gb_dir: - filenames = glob(opts.gb_dir + "/*.gb") - - if opts.id: - rows = open(opts.id).readlines() - accessions = [] - for row in rows: - accessions += map(str.strip, row.strip().split(",")) - - if opts.simple: - accessions = opts.simple.split(",") - - if opts.id or opts.simple: - fw = must_open("GenBank_accession_IDs.txt", "w") - for atom in accessions: - print(atom, file=fw) - fw.close() - idfile = fw.name - else: - idfile = None - - return filenames, accessions, idfile, opts, args - - -def tofasta(args): - """ - %prog tofasta [--options] - - Read GenBank file, or retrieve from web. - Output fasta file with one record per file - or all records in one file - """ - p = OptionParser(tofasta.__doc__) - p.add_argument("--prefix", default="gbfasta", help="prefix of output files") - filenames, accessions, idfile, opts, args = preparegb(p, args) - prefix = opts.prefix - - GenBank(filenames=filenames, accessions=accessions, idfile=idfile).write_fasta( - output=prefix, individual=opts.individual - ) - - if opts.individual: - logger.debug("Output written dir {0}".format(prefix)) - else: - logger.debug("Output written to {0}.fasta".format(prefix)) - - -def getgenes(args): - """ - %prog getgenes [--options] - - Read GenBank file, or retrieve from web. - Output bed, cds files, and pep file (can turn off with --nopep). - Either --gb_dir or --id/--simple should be provided. - """ - p = OptionParser(getgenes.__doc__) - p.add_argument("--prefix", default="gbout", help="prefix of output files") - p.add_argument( - "--nopep", - default=False, - action="store_true", - help="Only get cds and bed, no pep", - ) - filenames, accessions, idfile, opts, args = preparegb(p, args) - prefix = opts.prefix - - GenBank(filenames=filenames, accessions=accessions, idfile=idfile).write_genes( - output=prefix, individual=opts.individual, pep=(not opts.nopep) - ) - - if opts.individual: - logger.debug("Output written dir {0}".format(prefix)) - elif opts.nopep: - logger.debug( - "Output written to {0}.bed, {0}.cds".format( - prefix, - ) - ) - else: - logger.debug( - "Output written to {0}.bed, {0}.cds, {0}.pep".format( - prefix, - ) - ) - - -def print_locus_quals(locus_tag, locus, quals_ftypes): - """ - Given a locus_tag and dict of features, print out 3-column output: - locus_tag, qualifier, value - - Replace locus_tag with protein_id if processing an "mRNA" or "CDS" - """ - for ftype in quals_ftypes: - for i, quals in enumerate(locus[locus_tag][ftype]): - for elem in quals: - elem_id = elem[0] - if len(locus[locus_tag]["protein_id"]) > 0 and ftype in ("mRNA", "CDS"): - elem_id = locus[locus_tag]["protein_id"][i] - if ftype == "misc_RNA": - ftype = "ncRNA" - print("\t".join(str(x) for x in (elem_id, elem[1], elem[2], ftype))) - - -def getquals(args): - """ - %prog getquals [--options] gbkfile > qualsfile - - Read GenBank file and extract all qualifiers per feature type - into a tab-delimited file - """ - p = OptionParser(getquals.__doc__) - p.add_argument( - "--types", - default="gene,mRNA,CDS", - type=str, - dest="quals_ftypes", - help="Feature types from which to extract qualifiers", - ) - p.add_argument( - "--ignore", - default="locus_tag,product,codon_start,translation", - type=str, - dest="quals_ignore", - help="Qualifiers to exclude from parsing", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gbkfile,) = args - quals_ftypes = opts.quals_ftypes.split(",") - quals_ignore = opts.quals_ignore.split(",") - - locus = dict() - for rec in SeqIO.parse(gbkfile, "gb"): - for f in rec.features: - if f.type in quals_ftypes: - locus_tag = f.qualifiers[LT][0] - if locus_tag not in locus: - locus[locus_tag] = dict() - for ftype in quals_ftypes: - if ftype not in locus[locus_tag]: - locus[locus_tag][ftype] = [] - if ftype == "CDS": # store the CDS protein_id - locus[locus_tag]["protein_id"] = [] - - quals = [] - for qual in f.qualifiers: - if qual in quals_ignore: - continue - for qval in f.qualifiers[qual]: - quals.append((locus_tag, qual, qval)) - if qual == "protein_id": - locus[locus_tag]["protein_id"].append(qval) - if len(quals) > 0: - locus[locus_tag][f.type].append(quals) - - for locus_tag in locus: - print_locus_quals(locus_tag, locus, quals_ftypes) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/gff.py b/jcvi/formats/gff.py deleted file mode 100644 index da13f3b6..00000000 --- a/jcvi/formats/gff.py +++ /dev/null @@ -1,3768 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -import os -import os.path as op -import re -import sys - -from collections import defaultdict -from urllib.parse import quote, unquote - -from ..annotation.reformat import atg_name -from ..apps.base import ( - ActionDispatcher, - OptionParser, - cleanup, - flatten, - logger, - mkdir, - need_update, - parse_multi_values, - sh, -) -from ..utils.cbook import AutoVivification -from ..utils.orderedcollections import DefaultOrderedDict, OrderedDict, parse_qs -from ..utils.range import Range, range_minmax - -from .base import DictFile, LineFile, must_open, is_number -from .bed import Bed, BedLine, natsorted -from .fasta import Fasta, SeqIO - - -Valid_strands = ("+", "-", "?", ".") -Valid_phases = ("0", "1", "2", ".") -FastaTag = "##FASTA" -RegionTag = "##sequence-region" -valid_gff_parent_child = { - "match": "match_part", - "cDNA_match": "match_part", - "EST_match": "match_part", - "nucleotide_to_protein_match": "match_part", - "expressed_sequence_match": "match_part", - "protein_match": "match_part", - "transposable_element": "transposon_fragment", - "gene": "mRNA", - "mRNA": "exon,CDS,five_prime_UTR,three_prime_UTR", -} -valid_gff_to_gtf_type = { - "exon": "exon", - "pseudogenic_exon": "exon", - "CDS": "CDS", - "start_codon": "start_codon", - "stop_codon": "stop_codon", - "five_prime_UTR": "5UTR", - "three_prime_UTR": "3UTR", -} -valid_gff_type = tuple(valid_gff_parent_child.keys()) -reserved_gff_attributes = ( - "ID", - "Name", - "Alias", - "Parent", - "Target", - "Gap", - "Derives_from", - "Note", - "Dbxref", - "Ontology_term", - "Is_circular", -) -multiple_gff_attributes = ("Parent", "Alias", "Dbxref", "Ontology_term") -safechars = " /:?~#+!$'@()*[]|" -VALID_HUMAN_CHROMOSMES = set([str(x) for x in range(1, 23)] + ["X", "Y"]) - - -class GffLine(object): - """ - Specification here (http://www.sequenceontology.org/gff3.shtml) - """ - - def __init__( - self, - sline, - key="ID", - parent_key="Parent", - gff3=True, - line_index=None, - strict=True, - append_source=False, - append_ftype=False, - append_attrib=None, - score_attrib=False, - keep_attr_order=True, - compute_signature=False, - ): - sline = sline.strip() - args = sline.split("\t") - if len(args) != 9: - args = sline.split() - if strict: - assert len(args) == 9, "Malformed line ({0} columns != 9): {1}".format( - len(args), args - ) - self.seqid = args[0] - self.source = args[1] - self.type = args[2] - self.start = int(args[3]) - self.end = int(args[4]) - self.score = args[5] - self.strand = args[6] - assert self.strand in Valid_strands, "strand must be one of {0}".format( - Valid_strands - ) - self.phase = args[7] - assert self.phase in Valid_phases, "phase must be one of {0}".format( - Valid_phases - ) - self.attributes_text = "" if len(args) <= 8 else args[8].strip() - self.attributes = make_attributes( - self.attributes_text, gff3=gff3, keep_attr_order=keep_attr_order - ) - # key is not in the gff3 field, this indicates the conversion to accn - self.key = key # usually it's `ID=xxxxx;` - self.parent_key = parent_key # usually it's `Parent=xxxxx;` - self.gff3 = gff3 - - if append_ftype and self.key in self.attributes: - # if `append_ftype` is True, append the gff `self.type` - # to `self.key`. use this option to enhance the `self.accn` - # column in bed file - self.attributes[self.key][0] = ":".join( - (self.type, self.attributes[self.key][0]) - ) - - if append_source and self.key in self.attributes: - # if `append_source` is True, append the gff `self.source` - # to `self.key`. use this option to enhance the `self.accn` - # column in bed file - self.attributes[self.key][0] = ":".join( - (self.source, self.attributes[self.key][0]) - ) - - if append_attrib and append_attrib in self.attributes: - self.attributes[self.key][0] = ":".join( - (self.attributes[self.key][0], self.attributes[append_attrib][0]) - ) - - if ( - score_attrib - and score_attrib in self.attributes - and is_number(self.attributes[score_attrib][0]) - ): - # if `score_attrib` is specified, check if it is indeed an - # attribute or not. If yes, check if the value of attribute - # is numeric or not. If not, keep original GFF score value - self.score = self.attributes[score_attrib][0] - - if line_index is not None and is_number(line_index): - # if `line_index` in provided, initialize an idx variable - # used to autcompute the ID for a feature - self.idx = line_index - - if compute_signature: - # if `compute_signature` is specified, compute a signature for - # the gff line and store in variable `sign` - self.sign = self.signature - - def __getitem__(self, key): - return getattr(self, key) - - def __str__(self): - return "\t".join( - str(x) - for x in ( - self.seqid, - self.source, - self.type, - self.start, - self.end, - self.score, - self.strand, - self.phase, - self.attributes_text, - ) - ) - - def get_attr(self, key, first=True): - if key in self.attributes: - if first: - return self.attributes[key][0] - return self.attributes[key] - return None - - def set_attr( - self, key, value, update=False, append=False, dbtag=None, urlquote=False - ): - if value is None: - self.attributes.pop(key, None) - else: - if key == "Dbxref" and dbtag: - value = value.split(",") - value = ["{0}:{1}".format(dbtag, x) for x in value] - if type(value) is not list: - value = [value] - if key not in self.attributes or not append: - self.attributes[key] = [] - self.attributes[key].extend(value) - if update: - self.update_attributes(gff3=self.gff3, urlquote=urlquote) - - def update_attributes(self, skipEmpty=True, gff3=True, gtf=None, urlquote=True): - attributes = [] - if gtf: - gff3 = None - elif gff3 is None: - gff3 = self.gff3 - - sep = ";" if gff3 else "; " - for tag, val in self.attributes.items(): - if not val and skipEmpty: - continue - val = ",".join(val) - val = '"{0}"'.format(val) if (" " in val and (not gff3)) or gtf else val - equal = "=" if gff3 else " " - if urlquote: - sc = safechars - if tag in multiple_gff_attributes: - sc += "," - val = quote(val, safe=sc) - attributes.append(equal.join((tag, val))) - - self.attributes_text = sep.join(attributes) - if gtf: - self.attributes_text += ";" - - def update_tag(self, old_tag, new_tag): - if old_tag not in self.attributes: - return - self.attributes[new_tag] = self.attributes[old_tag] - del self.attributes[old_tag] - - @property - def accn(self): - if self.key: # GFF3 format - if self.key not in self.attributes: - a = ["{0}_{1}".format(str(self.type).lower(), self.idx)] - else: - a = self.attributes[self.key] - else: # GFF2 format - a = self.attributes_text.split() - return quote(",".join(a), safe=safechars) - - id = accn - - @property - def name(self): - return self.attributes["Name"][0] if "Name" in self.attributes else None - - @property - def parent(self): - return ( - self.attributes[self.parent_key][0] - if self.parent_key in self.attributes - else None - ) - - @property - def span(self): - return self.end - self.start + 1 - - @property - def bedline(self): - score = "0" if self.score == "." else self.score - row = "\t".join( - ( - self.seqid, - str(self.start - 1), - str(self.end), - self.accn, - score, - self.strand, - ) - ) - return BedLine(row) - - @property - def signature(self): - """ - create a unique signature for any GFF line based on joining - columns 1,2,3,4,5,7,8 (into a comma separated string) - """ - sig_elems = [ - self.seqid, - self.source, - self.type, - self.start, - self.end, - self.strand, - self.phase, - ] - if re.search("exon|CDS|UTR", self.type): - parent = self.get_attr("Parent") - if parent: - (locus, iso) = atg_name(parent, retval="locus,iso", trimpad0=False) - if locus: - sig_elems.append(locus) - else: - sig_elems.extend([self.accn]) - - return ",".join(str(elem) for elem in sig_elems) - - -class Gff(LineFile): - def __init__( - self, - filename, - key="ID", - parent_key="Parent", - strict=True, - append_source=False, - append_ftype=False, - append_attrib=None, - score_attrib=False, - keep_attr_order=True, - make_gff_store=False, - compute_signature=False, - ): - super().__init__(filename) - self.make_gff_store = make_gff_store - self.gff3 = True - if self.make_gff_store: - self.gffstore = [] - gff = Gff( - self.filename, - key=key, - parent_key=parent_key, - strict=True, - append_source=append_source, - append_ftype=append_ftype, - score_attrib=score_attrib, - keep_attr_order=keep_attr_order, - compute_signature=compute_signature, - ) - for g in gff: - self.gffstore.append(g) - else: - self.key = key - self.parent_key = parent_key - self.strict = strict - self.append_source = append_source - self.append_ftype = append_ftype - self.append_attrib = append_attrib - self.score_attrib = score_attrib - self.keep_attr_order = keep_attr_order - self.compute_signature = compute_signature - if filename in ("-", "stdin") or filename.endswith(".gz"): - if ".gtf" in filename: - self.gff3 = False - logger.debug("File is not gff3 standard.") - return - - self.set_gff_type() - - def set_gff_type(self): - # Determine file type - row = None - for row in self: - break - gff3 = False if not row else "=" in row.attributes_text - if not gff3: - logger.debug("File is not gff3 standard.") - - self.gff3 = gff3 - self.fp.seek(0) - - def __iter__(self): - if self.make_gff_store: - for row in self.gffstore: - yield row - else: - self.fp = must_open(self.filename) - for idx, row in enumerate(self.fp): - row = row.strip() - if row.strip() == "": - continue - if row[0] == "#": - if row == FastaTag: - break - continue - yield GffLine( - row, - key=self.key, - parent_key=self.parent_key, - line_index=idx, - strict=self.strict, - append_source=self.append_source, - append_ftype=self.append_ftype, - append_attrib=self.append_attrib, - score_attrib=self.score_attrib, - keep_attr_order=self.keep_attr_order, - compute_signature=self.compute_signature, - gff3=self.gff3, - ) - - @property - def seqids(self): - return set(x.seqid for x in self) - - -class GffFeatureTracker(object): - def __init__(self): - self.ftype = "exon|CDS|UTR|fragment" - self.tracker = {} - self.symbolstore = {} - - def track(self, parent, g): - if re.search(self.ftype, g.type): - if parent not in self.tracker: - self.tracker[parent] = {} - if g.type not in self.tracker[parent]: - self.tracker[parent][g.type] = set() - self.tracker[parent][g.type].add((g.start, g.end, g.sign)) - - def _sort(self, parent, ftype, reverse=False): - if not isinstance(self.tracker[parent][ftype], list): - self.tracker[parent][ftype] = sorted( - list(self.tracker[parent][ftype]), - key=lambda x: (x[0], x[1]), - reverse=reverse, - ) - - def feat_index(self, parent, ftype, strand, feat_tuple): - reverse = True if strand == "-" else False - self._sort(parent, ftype, reverse=reverse) - return self.tracker[parent][ftype].index(feat_tuple) - - def store_symbol(self, g): - for symbol_attr in ("symbol", "ID"): - if symbol_attr in g.attributes: - break - self.symbolstore[g.accn] = g.get_attr(symbol_attr) - - def get_symbol(self, parent): - return self.symbolstore[parent] - - -def make_attributes(s, gff3=True, keep_attr_order=True): - """ - In GFF3, the last column is typically: - ID=cds00002;Parent=mRNA00002; - - In GFF2, the last column is typically: - Gene 22240.t000374; Note "Carbonic anhydrase" - """ - if gff3: - """ - hack: temporarily replace the '+' sign in the attributes column - with the string 'PlusSign' to prevent urlparse.parse_qsl() from - replacing the '+' sign with a space - """ - s = s.replace("+", "PlusSign") - d = parse_qs(s, separator=";", keep_attr_order=keep_attr_order) - for key in d: - d[key][0] = unquote(d[key][0].replace("PlusSign", "+").replace('"', "")) - else: - attributes = s.split(";") - d = DefaultOrderedDict(list) if keep_attr_order else defaultdict(list) - for a in attributes: - a = a.strip() - if " " not in a: - continue - key, val = a.split(" ", 1) - val = unquote(val.replace('"', "").replace("=", " ").strip()) - d[key].append(val) - - for key, val in d.items(): - d[key] = flatten([v.split(",") for v in val]) - - return d - - -def to_range(obj, score=None, id=None, strand=None): - """ - Given a gffutils object, convert it to a range object - """ - if score or id: - _score = score if score else obj.score - _id = id if id else obj.id - return Range( - seqid=obj.seqid, start=obj.start, end=obj.end, score=_score, id=_id - ) - elif strand: - return obj.seqid, obj.start, obj.end, obj.strand - - return obj.seqid, obj.start, obj.end - - -def main(): - actions = ( - ("addparent", "merge sister features and infer their parent"), - ("bed", "parse gff and produce bed file for particular feature type"), - ("bed12", "produce bed12 file for coding features"), - ("chain", "fill in parent features by chaining children"), - ("children", "find all children that belongs to the same parent"), - ("cluster", "cluster transcripts based on shared splicing structure"), - ("extract", "extract contig or features from gff file"), - ("filter", "filter the gff file based on Identity and Coverage"), - ( - "fixboundaries", - "fix boundaries of parent features by range chaining child features", - ), - ( - "fixpartials", - "fix 5/3 prime partial transcripts, locate nearest in-frame start/stop", - ), - ("format", "format the gff file, change seqid, etc."), - ("frombed", "convert from bed format to gff3"), - ("fromgtf", "convert gtf to gff3 format"), - ("fromsoap", "convert from soap format to gff3"), - ("gapsplit", "split alignment GFF3 at gaps based on CIGAR string"), - ("gb", "convert gff3 to genbank format"), - ("gtf", "convert gff3 to gtf format"), - ("liftover", "adjust gff coordinates based on tile number"), - ("load", "extract the feature (e.g. CDS) sequences and concatenate"), - ("merge", "merge several gff files into one"), - ("note", "extract certain attribute field for each feature"), - ("orient", "orient the coding features based on translation"), - ("parents", "find the parents given a list of IDs"), - ("rename", "change the IDs within the gff3"), - ("sizes", "calculate sizes of features in gff file"), - ("sort", "sort the gff file"), - ("splicecov", "tag gff introns with coverage info from junctions.bed"), - ("split", "split the gff into one contig per file"), - ("summary", "print summary stats for features of different types"), - ("uniq", "remove the redundant gene models"), - ) - - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def addparent(args): - """ - %prog addparent file.gff - - Merge sister features and infer parents. - """ - p = OptionParser(addparent.__doc__) - p.add_argument("--childfeat", default="CDS", help="Type of children feature") - p.add_argument("--parentfeat", default="mRNA", help="Type of merged feature") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gff_file,) = args - gff = Gff(gff_file) - data = defaultdict(list) - for g in gff: - if g.type != opts.childfeat: - continue - data[g.parent].append(g) - - logger.debug("A total of %d %s features clustered", len(data), opts.childfeat) - - parents = [] - for parent, dd in data.items(): - d = dd[0] - start, end = min(x.start for x in dd), max(x.end for x in dd) - gffline = "\t".join( - str(x) - for x in ( - d.seqid, - d.source, - opts.parentfeat, - start, - end, - ".", - d.strand, - ".", - "ID={0};Name={0}".format(parent), - ) - ) - parents.append(GffLine(gffline)) - parents.sort(key=lambda x: (x.seqid, x.start)) - logger.debug("Merged feature sorted") - - fw = must_open(opts.outfile, "w") - for parent in parents: - print(parent, file=fw) - parent_id = parent.id - for d in data[parent_id]: - if d.accn == parent_id: - new_id = "{0}.{1}1".format(parent_id, opts.childfeat) - d.set_attr("ID", new_id) - d.set_attr("Name", new_id, update=True) - print(d, file=fw) - fw.close() - - -def _fasta_slice(fasta, seqid, start, stop, strand): - """ - Return slice of fasta, given (seqid, start, stop, strand) - """ - _strand = 1 if strand == "+" else -1 - return fasta.sequence( - {"chr": seqid, "start": start, "stop": stop, "strand": _strand} - ) - - -def is_valid_codon(codon, type="start"): - """ - Given a codon sequence, check if it is a valid start/stop codon - """ - if len(codon) != 3: - return False - - if type == "start": - if codon != "ATG": - return False - elif type == "stop": - if not any(_codon == codon for _codon in ("TGA", "TAG", "TAA")): - return False - else: - logger.error( - "`%s` is not a valid codon type. Should be one of (`start` or `stop`)", type - ) - sys.exit() - - return True - - -def scan_for_valid_codon(codon_span, strand, seqid, genome, type="start"): - """ - Given a codon span, strand and reference seqid, scan upstream/downstream - to find a valid in-frame start/stop codon - """ - s, e = codon_span[0], codon_span[1] - while True: - if (type == "start" and strand == "+") or (type == "stop" and strand == "-"): - s, e = s - 3, e - 3 - else: - s, e = s + 3, e + 3 - - codon = _fasta_slice(genome, seqid, s, e, strand) - is_valid = is_valid_codon(codon, type=type) - if not is_valid: - if type == "start": - # if we are scanning upstream for a valid start codon, - # stop scanning when we encounter a stop - if is_valid_codon(codon, type="stop"): - return None, None - elif type == "stop": - # if we are scanning downstream for a valid stop codon, - # stop scanning when we encounter a start - if is_valid_codon(codon, type="start"): - return None, None - continue - break - - return s, e - - -def fixpartials(args): - """ - %prog fixpartials genes.gff genome.fasta partials.ids - - Given a gff file of features, fix partial (5'/3' incomplete) transcripts - by trying to locate nearest in-frame start/stop codon - """ - p = OptionParser(fixpartials.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - ( - gffile, - gfasta, - partials, - ) = args - - gff = make_index(gffile) - genome = Fasta(gfasta, index=True) - partials = LineFile(partials, load=True).lines - - # all_transcripts = [f.id for f in gff.features_of_type("mRNA", \ - # order_by=("seqid", "start"))] - seen = set() - fw = must_open(opts.outfile, "w") - for gene in gff.features_of_type("gene", order_by=("seqid", "start")): - children = AutoVivification() - cflag = False - transcripts = list(gff.children(gene, level=1, order_by="start")) - for transcript in transcripts: - trid, seqid, strand = transcript.id, transcript.seqid, transcript.strand - - for child in gff.children(transcript, order_by="start"): - ftype = child.featuretype - if ftype not in children[trid]: - children[trid][ftype] = [] - children[trid][ftype].append(child) - - five_prime, three_prime = True, True - nstart, nstop = (None, None), (None, None) - cds_span = [children[trid]["CDS"][0].start, children[trid]["CDS"][-1].stop] - new_cds_span = [x for x in cds_span] - - start_codon = (cds_span[0], cds_span[0] + 2) - stop_codon = (cds_span[1] - 2, cds_span[1]) - if strand == "-": - start_codon, stop_codon = stop_codon, start_codon - - if trid in partials: - seen.add(trid) - start_codon_fasta = _fasta_slice( - genome, seqid, start_codon[0], start_codon[1], strand - ) - stop_codon_fasta = _fasta_slice( - genome, seqid, stop_codon[0], stop_codon[1], strand - ) - - if not is_valid_codon(start_codon_fasta, type="start"): - five_prime = False - nstart = scan_for_valid_codon( - start_codon, strand, seqid, genome, type="start" - ) - - if not is_valid_codon(stop_codon_fasta, type="stop"): - three_prime = False - nstop = scan_for_valid_codon( - stop_codon, strand, seqid, genome, type="stop" - ) - - logger.debug( - "feature=%s (%s), 5'=%s, 3'=%s, %d <== %d ==> %d", - trid, - strand, - five_prime, - three_prime, - nstart if strand == "+" else nstop, - cds_span, - nstop if strand == "+" else nstart, - ) - - if not five_prime or not three_prime: - if nstart != (None, None) and (start_codon != nstart): - i = 0 if strand == "+" else 1 - new_cds_span[i] = nstart[i] - if nstop != (None, None) and (stop_codon != nstop): - i = 1 if strand == "+" else 0 - new_cds_span[i] = nstop[i] - new_cds_span.sort() - - if set(cds_span) != set(new_cds_span): - cflag = True - # if CDS has been extended, appropriately adjust all relevent - # child feature (CDS, exon, UTR) coordinates - for ftype in children[trid]: - for idx in range(len(children[trid][ftype])): - child_span = ( - children[trid][ftype][idx].start, - children[trid][ftype][idx].stop, - ) - if ftype in ("exon", "CDS"): - # if exons/CDSs, adjust start and stop according to - # new CDS start and stop, respectively - if child_span[0] == cds_span[0]: - children[trid][ftype][idx].start = new_cds_span[0] - if child_span[1] == cds_span[1]: - children[trid][ftype][idx].stop = new_cds_span[1] - elif ftype.endswith("UTR"): - # if *_prime_UTR, adjust stop according to new CDS start and - # adjust start according to new CDS stop - if child_span[1] == cds_span[0]: - children[trid][ftype][idx].stop = new_cds_span[0] - if child_span[0] == cds_span[1]: - children[trid][ftype][idx].start = new_cds_span[1] - - transcript.start, transcript.stop = ( - children[trid]["exon"][0].start, - children[trid]["exon"][-1].stop, - ) - - if cflag: - _gene_span = range_minmax([(tr.start, tr.stop) for tr in transcripts]) - gene.start, gene.stop = _gene_span[0], _gene_span[1] - - # print gff file - print(gene, file=fw) - for transcript in transcripts: - trid = transcript.id - print(transcript, file=fw) - for cftype in children[trid]: - for child in children[trid][cftype]: - print(child, file=fw) - - fw.close() - - -def sizes(args): - """ - %prog sizes gffile - - Given a gff file of features, calculate the sizes of chosen parent feature - based on summation of sizes of child features. - - For example, for parent 'mRNA' and child 'CDS' feature types, calcuate sizes of - mRNA by summing the sizes of the disjoint CDS parts. - """ - p = OptionParser(sizes.__doc__) - p.set_outfile() - p.add_argument( - "--parents", - dest="parents", - default="mRNA", - help="parent feature(s) for which size is to be calculated", - ) - p.add_argument( - "--child", - dest="child", - default="CDS", - help="child feature to use for size calculations", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - parents, cftype = set(opts.parents.split(",")), opts.child - - gff = make_index(gffile) - - fw = must_open(opts.outfile, "w") - for parent in parents: - for feat in gff.features_of_type(parent, order_by=("seqid", "start")): - fsize = 0 - fsize = ( - feat.end - feat.start + 1 - if cftype == parent - else gff.children_bp(feat, child_featuretype=cftype) - ) - print("\t".join(str(x) for x in (feat.id, fsize)), file=fw) - fw.close() - - -def cluster(args): - """ - %prog cluster gffile - - Given a gff file of gene structures (multiple transcripts per gene locus), - cluster/consolidate all transcripts based on shared splicing structure. - - If `slop` is enabled, clustering/consolidation will collapse any variation - in terminal UTR lengths, keeping only the longest as representative. - """ - from jcvi.utils.grouper import Grouper - from itertools import combinations - - p = OptionParser(cluster.__doc__) - p.add_argument( - "--slop", - default=False, - action="store_true", - help="allow minor variation in terminal 5'/3' UTR" + " start/stop position", - ) - p.add_argument( - "--inferUTR", - default=False, - action="store_true", - help="infer presence of UTRs from exon coordinates", - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - slop = opts.slop - inferUTR = opts.inferUTR - - gff = make_index(gffile) - - fw = must_open(opts.outfile, "w") - print("##gff-version 3", file=fw) - seen = {} - for gene in gff.features_of_type("gene", order_by=("seqid", "start")): - g = Grouper() - mrnas = list( - combinations( - [ - mrna - for mrna in gff.children(gene, featuretype="mRNA", order_by="start") - ], - 2, - ) - ) - if len(mrnas) > 0: - for mrna1, mrna2 in mrnas: - mrna1s, mrna2s = ( - gff.children_bp(mrna1, child_featuretype="exon"), - gff.children_bp(mrna2, child_featuretype="exon"), - ) - g.join((mrna1.id, mrna1s)) - g.join((mrna2.id, mrna2s)) - - if match_subfeats(mrna1, mrna2, gff, gff, featuretype="CDS"): - res = [] - ftypes = ( - ["exon"] if inferUTR else ["five_prime_UTR", "three_prime_UTR"] - ) - for ftype in ftypes: - res.append( - match_subfeats( - mrna1, mrna2, gff, gff, featuretype=ftype, slop=slop - ) - ) - - if all(res): - g.join((mrna1.id, mrna1s), (mrna2.id, mrna2s)) - else: - for mrna1 in gff.children(gene, featuretype="mRNA", order_by="start"): - mrna1s = gff.children_bp(mrna1, child_featuretype="exon") - g.join((mrna1.id, mrna1s)) - - print(gene, file=fw) - for group in sorted(g): - group.sort(key=lambda x: x[1], reverse=True) - mrnas = [el[0] for el in group] - m = mrnas[0] - - _mrnaid = [] - for x in mrnas: - if x not in _mrnaid: - _mrnaid.append(x) - mrnaid = "{0}".format("-".join(_mrnaid)) - if mrnaid not in seen: - seen[mrnaid] = 0 - else: - seen[mrnaid] += 1 - mrnaid = "{0}-{1}".format(mrnaid, seen[mrnaid]) - - _mrna = gff[m] - _mrna.attributes["ID"] = [mrnaid] - _mrna.attributes["Parent"] = [gene.id] - children = gff.children(m, order_by="start") - print(_mrna, file=fw) - for child in children: - child.attributes["ID"] = ["{0}".format(child.id)] - child.attributes["Parent"] = [mrnaid] - print(child, file=fw) - - fw.close() - - -def summary(args): - """ - %prog summary gffile - - Print summary stats for features of different types. - """ - from jcvi.formats.base import SetFile - from jcvi.formats.bed import BedSummary - from jcvi.utils.table import tabulate - - p = OptionParser(summary.__doc__) - p.add_argument( - "--isoform", - default=False, - action="store_true", - help="Find longest isoform of each id", - ) - p.add_argument("--ids", help="Only include features from certain IDs") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gff_file,) = args - ids = opts.ids - - if ids: - ids = SetFile(ids) - logger.debug("Total ids loaded: %d", len(ids)) - - if opts.isoform: - pids = set() - gff = Gff(gff_file) - for g in gff: - if g.type != "mRNA": - continue - if g.parent not in ids: - continue - if "longest" not in g.attributes: - pids = set(x + ".1" for x in ids) - break - if g.attributes["longest"][0] == "0": - continue - pids.add(g.id) - ids = pids - logger.debug("After checking longest: %d", len(ids)) - - # Collects aliases - gff = Gff(gff_file) - for g in gff: - if g.name in ids: - ids.add(g.id) - logger.debug("Total ids including aliases: %d", len(ids)) - - gff = Gff(gff_file) - beds = defaultdict(list) - for g in gff: - if ids and not (g.id in ids or g.name in ids or g.parent in ids): - continue - - beds[g.type].append(g.bedline) - - table = {} - for type, bb in sorted(beds.items()): - bs = BedSummary(bb) - table[(type, "Features")] = bs.nfeats - table[(type, "Unique bases")] = bs.unique_bases - table[(type, "Total bases")] = bs.total_bases - - print(tabulate(table), file=sys.stdout) - - -def gb(args): - """ - %prog gb gffile fastafile - - Convert GFF3 to Genbank format. Recipe taken from: - - """ - try: - from BCBio import GFF - except ImportError: - print( - "You need to install dep first: $ easy_install bcbio-gff", file=sys.stderr - ) - - p = OptionParser(gb.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gff_file, fasta_file = args - pf = op.splitext(gff_file)[0] - out_file = pf + ".gb" - fasta_input = SeqIO.to_dict(SeqIO.parse(fasta_file, "fasta")) - gff_iter = GFF.parse(gff_file, fasta_input) - SeqIO.write(gff_iter, out_file, "genbank") - - -def orient(args): - """ - %prog orient in.gff3 features.fasta > out.gff3 - - Change the feature orientations based on translation. This script is often - needed in fixing the strand information after mapping RNA-seq transcripts. - - You can generate the features.fasta similar to this command: - - $ %prog load --parents=EST_match --children=match_part clc.JCVIv4a.gff - JCVI.Medtr.v4.fasta -o features.fasta - """ - from jcvi.formats.fasta import longestorf - - p = OptionParser(orient.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - ingff3, fastafile = args - idsfile = fastafile.rsplit(".", 1)[0] + ".orf.ids" - if need_update(fastafile, idsfile): - longestorf([fastafile, "--ids"]) - - orientations = DictFile(idsfile) - gff = Gff(ingff3) - flipped = 0 - for g in gff: - id = None - for tag in ("ID", "Parent"): - if tag in g.attributes: - (id,) = g.attributes[tag] - break - assert id - - orientation = orientations.get(id, "+") - if orientation == "-": - g.strand = {"+": "-", "-": "+"}[g.strand] - flipped += 1 - - print(g) - - logger.debug("A total of %d features flipped.", flipped) - - -def rename(args): - """ - %prog rename in.gff3 switch.ids > reindexed.gff3 - - Change the IDs within the gff3. - """ - p = OptionParser(rename.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - ingff3, switch = args - switch = DictFile(switch) - - gff = Gff(ingff3) - for g in gff: - (id,) = g.attributes["ID"] - newname = switch.get(id, id) - g.attributes["ID"] = [newname] - - if "Parent" in g.attributes: - parents = g.attributes["Parent"] - g.attributes["Parent"] = [switch.get(x, x) for x in parents] - - g.update_attributes() - print(g) - - -def parents(args): - """ - %prog parents gffile models.ids - - Find the parents given a list of IDs in "models.ids". - """ - p = OptionParser(parents.__doc__) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gff_file, idsfile = args - g = make_index(gff_file) - fp = open(idsfile) - for row in fp: - cid = row.strip() - b = next(g.parents(cid, 1)) - print("\t".join((cid, b.id))) - - -def filter(args): - """ - %prog filter gffile > filtered.gff - - Filter the gff file based on criteria below: - (1) feature attribute values: [Identity, Coverage]. - You can get this type of gff by using gmap - $ gmap -f 2 .... - - (2) Total bp length of child features - """ - p = OptionParser(filter.__doc__) - p.add_argument( - "--type", default="mRNA", help="The feature to scan for the attributes" - ) - g1 = p.add_argument_group("Filter by identity/coverage attribute values") - g1.add_argument("--id", default=95, type=float, help="Minimum identity") - g1.add_argument("--coverage", default=90, type=float, help="Minimum coverage") - g1.add_argument( - "--nocase", - default=False, - action="store_true", - help="Case insensitive lookup of attribute names", - ) - g2 = p.add_argument_group("Filter by child feature bp length") - g2.add_argument( - "--child_ftype", default=None, type=str, help="Child featuretype to consider" - ) - g2.add_argument( - "--child_bp", - default=None, - type=int, - help="Filter by total bp of children of chosen ftype", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - otype, oid, ocov = opts.type, opts.id, opts.coverage - cftype, clenbp = opts.child_ftype, opts.child_bp - - id_attr, cov_attr = "Identity", "Coverage" - if opts.nocase: - id_attr, cov_attr = id_attr.lower(), cov_attr.lower() - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - - gffdb = make_index(gffile) - bad = set() - ptype = None - for g in gffdb.features_of_type(otype, order_by=("seqid", "start")): - if not ptype: - parent = list(gffdb.parents(g)) - ptype = parent[0].featuretype if len(parent) > 0 else otype - if cftype and clenbp: - if gffdb.children_bp(g, child_featuretype=cftype) < clenbp: - bad.add(g.id) - elif oid and ocov: - identity = float(g.attributes[id_attr][0]) - coverage = float(g.attributes[cov_attr][0]) - if identity < oid or coverage < ocov: - bad.add(g.id) - - logger.debug("%d bad accns marked.", len(bad)) - - fw = must_open(opts.outfile, "w") - for g in gffdb.features_of_type(ptype, order_by=("seqid", "start")): - if ptype != otype: - feats = list(gffdb.children(g, featuretype=otype, order_by="start")) - ok_feats = [f for f in feats if f.id not in bad] - if len(ok_feats) > 0: - g.keep_order = True - print(g, file=fw) - for feat in ok_feats: - feat.keep_order = True - print(feat, file=fw) - for child in gffdb.children(feat, order_by="start"): - child.keep_order = True - print(child, file=fw) - else: - if g.id not in bad: - print(g, file=fw) - for child in gffdb.children(g, order_by="start"): - print(child, file=fw) - fw.close() - - -def fix_gsac(g, notes): - a = g.attributes - - if g.type == "gene": - note = a["Name"] - elif g.type == "mRNA": - parent = a["Parent"][0] - note = notes[parent] - else: - return - - a["Name"] = a["ID"] - a["Note"] = note - g.update_attributes() - - -def gapsplit(args): - """ - %prog gapsplit gffile > split.gff - - Read in the gff (normally generated by GMAP) and print it out after splitting - each feature into one parent and multiple child features based on alignment - information encoded in CIGAR string. - """ - p = OptionParser(gapsplit.__doc__) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - - gff = Gff(gffile) - for g in gff: - if re.match("EST_match", g.type): - """ - hacky implementation: - since the standard urlparse.parse_qsl() replaces all "+" symbols with spaces - we will write a regex to check either for a "-" or a " " (space) - """ - match = re.search(r"\S+ (\d+) \d+ ([\s{1}\-])", g.attributes["Target"][0]) - if match.group(2) == "-": - strand = match.group(2) - else: - strand = "+" - g.attributes["Target"][0] = " ".join( - str(x) for x in [g.attributes["Target"][0].rstrip(), strand] - ) - - if g.strand == "?": - g.strand = strand - else: - match = re.match(r"\S+ (\d+) \d+", g.attributes["Target"][0]) - target_start = int(match.group(1)) - - re_cigar = re.compile(r"(\D+)(\d+)") - cigar = g.attributes["Gap"][0].split(" ") - g.attributes["Gap"] = None - - parts = [] - if g.strand == "+": - for event in cigar: - match = re_cigar.match(event) - op, count = match.group(1), int(match.group(2)) - if op in "IHS": - target_start += count - elif op in "DN": - g.start += count - elif op == "P": - continue - else: - parts.append( - [ - g.start, - g.start + count - 1, - target_start, - target_start + count - 1, - ] - ) - g.start += count - target_start += count - else: - for event in cigar: - match = re_cigar.match(event) - op, count = match.group(1), int(match.group(2)) - if op in "IHS": - target_start += count - elif op in "DN": - g.end -= count - elif op == "P": - continue - else: - parts.append( - [ - g.end - count + 1, - g.end, - target_start, - target_start + count - 1, - ] - ) - g.end -= count - target_start += count - - g.update_attributes() - print(g) - - parent = g.attributes["Name"][0] - g.type = "match_part" - g.attributes.clear() - - for part in parts: - g.start, g.end = part[0], part[1] - g.score, g.strand, g.phase = ".", g.strand, "." - - if re.match("EST", g.type): - target_list = [parent, part[2], part[3], g.strand] - else: - target_list = [parent, part[2], part[3]] - target = " ".join(str(x) for x in target_list) - - g.attributes["Parent"] = [parent] - g.attributes["Target"] = [target] - - g.update_attributes() - print(g) - - -def chain(args): - """ - %prog chain gffile > chained.gff - - Fill in parent features by chaining child features and return extent of the - child coordinates. - """ - valid_merge_op = ("sum", "min", "max", "mean", "collapse") - - p = OptionParser(chain.__doc__) - p.add_argument( - "--key", - dest="attrib_key", - default=None, - help="Attribute to use as `key` for chaining operation", - ) - p.add_argument( - "--chain_ftype", - default="cDNA_match", - help="GFF feature type to use for chaining operation", - ) - p.add_argument( - "--parent_ftype", - default=None, - help="GFF feature type to use for the chained coordinates", - ) - p.add_argument( - "--break", - dest="break_chain", - action="store_true", - help="Break long chains which are non-contiguous", - ) - p.add_argument( - "--transfer_attrib", - dest="attrib_list", - help="Attributes to transfer to parent feature; accepts comma" - + " separated list of attribute names", - ) - p.add_argument( - "--transfer_score", - dest="score_merge_op", - choices=valid_merge_op, - help="Transfer value stored in score field to parent feature." - + " Score is reported based on chosen operation", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - attrib_key = opts.attrib_key - attrib_list = opts.attrib_list - score_merge_op = opts.score_merge_op - break_chain = opts.break_chain - - chain_ftype = opts.chain_ftype - parent_ftype = opts.parent_ftype if opts.parent_ftype else chain_ftype - - gffdict = {} - fw = must_open(opts.outfile, "w") - gff = Gff(gffile) - if break_chain: - ctr, prev_gid = dict(), None - for g in gff: - if g.type != chain_ftype: - print(g, file=fw) - continue - - id = g.accn - gid = id - if attrib_key: - assert ( - attrib_key in g.attributes.keys() - ), "Attribute `{0}` not present in GFF3".format(attrib_key) - gid = g.get_attr(attrib_key) - curr_gid = gid - if break_chain: - if prev_gid != curr_gid: - if curr_gid not in ctr: - ctr[curr_gid] = 0 - else: - ctr[curr_gid] += 1 - gid = "{0}:{1}".format(gid, ctr[curr_gid]) - gkey = (g.seqid, gid) - if gkey not in gffdict: - gffdict[gkey] = { - "seqid": g.seqid, - "source": g.source, - "strand": g.strand, - "type": parent_ftype, - "coords": [], - "children": [], - "score": [], - "attrs": DefaultOrderedDict(set), - } - gffdict[gkey]["attrs"]["ID"].add(gid) - - if attrib_list: - for a in attrib_list.split(","): - if a in g.attributes: - [gffdict[gkey]["attrs"][a].add(x) for x in g.attributes[a]] - del g.attributes[a] - - if break_chain: - _attrib = "Alias" if attrib_list and ("Name" not in attrib_list) else "Name" - gffdict[gkey]["attrs"][_attrib].add(curr_gid) - - gffdict[gkey]["coords"].append((g.start, g.end)) - if score_merge_op: - if is_number(g.score): - gffdict[gkey]["score"].append(float(g.score)) - g.score = "." - - g.attributes["Parent"] = [gid] - g.attributes["ID"] = ["{0}-{1}".format(gid, len(gffdict[gkey]["children"]) + 1)] - g.type = valid_gff_parent_child[g.type] - g.update_attributes() - gffdict[gkey]["children"].append(g) - if break_chain: - prev_gid = curr_gid - - for gkey, v in sorted(gffdict.items()): - gseqid, key = gkey - seqid = v["seqid"] - source = v["source"] - type = v["type"] - strand = v["strand"] - start, stop = range_minmax(gffdict[gkey]["coords"]) - - score = "." - if score_merge_op: - v["score"].sort() - if score_merge_op == "sum": - score = sum(v["score"]) - elif score_merge_op == "min": - score = min(v["score"]) - elif score_merge_op == "max": - score = max(v["score"]) - elif score_merge_op == "mean": - score = sum(v["score"], 0.0) / len(v["score"]) - elif score_merge_op == "collapse": - score = ",".join((str(x) for x in v["score"])) - - g = GffLine( - "\t".join( - str(x) - for x in [seqid, source, type, start, stop, score, strand, ".", None] - ) - ) - g.attributes = v["attrs"] - g.update_attributes() - - print(g, file=fw) - - for child in gffdict[gkey]["children"]: - print(child, file=fw) - - fw.close() - - -def format(args): - """ - %prog format gffile > formatted.gff - - Read in the gff and print it out, changing seqid, etc. - """ - from jcvi.formats.obo import GODag_from_SO, validate_term - - valid_multiparent_ops = ["split", "merge"] - - p = OptionParser(format.__doc__) - - g1 = p.add_argument_group("Parameter(s) used to modify GFF attributes (9th column)") - g1.add_argument("--name", help="Add Name attribute from two-column file") - g1.add_argument("--note", help="Add Note attribute from two-column file") - g1.add_argument( - "--add_attribute", - dest="attrib_files", - help="Add new attribute(s) " - + "from two-column file(s); attribute name comes from filename; " - + "accepts comma-separated list of files", - ) - g1.add_argument( - "--add_dbxref", - dest="dbxref_files", - help="Add new Dbxref value(s) (DBTAG:ID) " - + "from two-column file(s). DBTAG comes from filename, ID comes from 2nd column; " - + "accepts comma-separated list of files", - ) - g1.add_argument( - "--nostrict", - default=False, - action="store_true", - help="Disable strict parsing of GFF file and/or mapping file", - ) - g1.add_argument( - "--remove_attr", - dest="remove_attrs", - help="Specify attributes to remove; " - + "accepts comma-separated list of attribute names", - ) - g1.add_argument( - "--copy_id_attr_to_name", - default=False, - action="store_true", - help="Copy `ID` attribute value to `Name`, when `Name` is not defined", - ) - g1.add_argument( - "--invent_name_attr", - default=False, - action="store_true", - help="Invent `Name` attribute for 2nd level child features; " - + "Formatted like PARENT:FEAT_TYPE:FEAT_INDEX", - ) - g1.add_argument( - "--no_keep_attr_order", - default=False, - action="store_true", - help="Do not maintain attribute order", - ) - - g2 = p.add_argument_group("Parameter(s) used to modify content within columns 1-8") - g2.add_argument( - "--seqid", - help="Switch seqid from two-column file. If not" - + " a file, value will globally replace GFF seqid", - ) - g2.add_argument( - "--source", - help="Switch GFF source from two-column file. If not" - + " a file, value will globally replace GFF source", - ) - g2.add_argument( - "--type", - help="Switch GFF feature type from two-column file. If not" - + " a file, value will globally replace GFF type", - ) - g2.add_argument( - "--fixphase", - default=False, - action="store_true", - help="Change phase 1<->2, 2<->1", - ) - - g3 = p.add_argument_group( - "Other parameter(s) to perform manipulations to the GFF file content" - ) - g3.add_argument( - "--unique", default=False, action="store_true", help="Make IDs unique" - ) - g3.add_argument( - "--chaindup", - default=None, - dest="duptype", - help="Chain duplicate features of a particular GFF3 `type`," - + " sharing the same ID attribute", - ) - g3.add_argument( - "--multiparents", - default=None, - choices=valid_multiparent_ops, - help="Split/merge identical features (same `seqid`, `source`, `type`, `coord-range`, `strand`, `phase`) mapping to multiple parents", - ) - g3.add_argument( - "--remove_feats", help="Comma separated list of features to remove by type" - ) - g3.add_argument( - "--remove_feats_by_ID", - help="List of features to remove by ID;" - + " accepts comma-separated list or list file", - ) - g3.add_argument( - "--gsac", - default=False, - action="store_true", - help="Fix GSAC GFF3 file attributes", - ) - g3.add_argument( - "--invent_protein_feat", - default=False, - action="store_true", - help="Invent a protein feature span (chain CDS feats)", - ) - g3.add_argument( - "--process_ftype", - default=None, - type=str, - help="Specify feature types to process; " - "accepts comma-separated list of feature types", - ) - g3.add_argument( - "--gff3", default=False, action="store_true", help="Print output in GFF3 format" - ) - g3.add_argument( - "--make_gff_store", - default=False, - action="store_true", - help="Store entire GFF file in memory during first iteration", - ) - - p.set_outfile() - p.set_SO_opts() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - mapfile = opts.seqid - names = opts.name - note = opts.note - source = opts.source - ftype = opts.type - attrib_files = opts.attrib_files.split(",") if opts.attrib_files else None - dbxref_files = opts.dbxref_files.split(",") if opts.dbxref_files else None - remove_attrs = opts.remove_attrs.split(",") if opts.remove_attrs else None - process_ftype = opts.process_ftype.split(",") if opts.process_ftype else None - gsac = opts.gsac - assert not ( - opts.unique and opts.duptype - ), "Cannot use `--unique` and `--chaindup` together" - assert not ( - opts.type and opts.duptype - ), "Cannot use `--type` and `--chaindup` together" - unique = opts.unique - duptype = opts.duptype - fixphase = opts.fixphase - phaseT = {"1": "2", "2": "1"} - remove_feats = opts.remove_feats.split(",") if opts.remove_feats else None - remove_feats_by_ID = None - if opts.remove_feats_by_ID: - remove_feats_by_ID = ( - LineFile(opts.remove_feats_by_ID, load=True).lines - if op.isfile(opts.remove_feats_by_ID) - else opts.remove_feats_by_ID.split(",") - ) - strict = False if opts.nostrict else True - make_gff_store = True if gffile in ("-", "stdin") else opts.make_gff_store - assert not ( - opts.copy_id_attr_to_name and opts.invent_name_attr - ), "Cannot use `--copy_id_attr_to_name` and `--invent_name_attr` together" - copy_id_attr_to_name = opts.copy_id_attr_to_name - invent_name_attr = opts.invent_name_attr - invent_protein_feat = opts.invent_protein_feat - compute_signature = False - - outfile = opts.outfile - - mapping = None - mod_attrs = set() - if mapfile and op.isfile(mapfile): - mapping = DictFile(mapfile, delimiter="\t", strict=strict) - mod_attrs.add("ID") - if note: - note = DictFile(note, delimiter="\t", strict=strict) - mod_attrs.add("Note") - if source and op.isfile(source): - source = DictFile(source, delimiter="\t", strict=strict) - if ftype and op.isfile(ftype): - ftype = DictFile(ftype, delimiter="\t", strict=strict) - if names: - names = DictFile(names, delimiter="\t", strict=strict) - mod_attrs.add("Name") - - if attrib_files: - attr_values = {} - for fn in attrib_files: - attr_name = op.basename(fn).rsplit(".", 1)[0] - if attr_name not in reserved_gff_attributes: - attr_name = attr_name.lower() - attr_values[attr_name] = DictFile(fn, delimiter="\t", strict=strict) - mod_attrs.add(attr_name) - if dbxref_files: - dbxref_values = {} - for fn in dbxref_files: - dbtag = op.basename(fn).rsplit(".", 1)[0] - dbxref_values[dbtag] = DictFile(fn, delimiter="\t", strict=strict) - mod_attrs.add("Dbxref") - - if remove_attrs: - mod_remove_attrs = [] - for remove_attr in remove_attrs: - if remove_attr in mod_attrs: - mod_remove_attrs.append(remove_attr) - - if mod_remove_attrs: - logger.error( - "Attributes `%s` cannot be removed and modified", - ",".join(mod_remove_attrs), - ) - sys.exit() - - if gsac: # setting gsac will force IDs to be unique - unique = True - notes = {} - - remove = set() - if ( - unique - or duptype - or remove_feats - or remove_feats_by_ID - or opts.multiparents == "merge" - or invent_name_attr - or make_gff_store - or invent_protein_feat - ): - if unique: - dupcounts = defaultdict(int) - seen = defaultdict(int) - newparentid = {} - elif duptype: - dupranges = AutoVivification() - skip = defaultdict(int) - if opts.multiparents == "merge": - merge_feats = AutoVivification() - if invent_name_attr: - ft = GffFeatureTracker() - elif copy_id_attr_to_name: - pass - if invent_protein_feat: - cds_track = {} - if opts.multiparents == "merge" or invent_name_attr: - make_gff_store = compute_signature = True - gff = Gff( - gffile, - keep_attr_order=(not opts.no_keep_attr_order), - make_gff_store=make_gff_store, - compute_signature=compute_signature, - strict=strict, - ) - for g in gff: - if process_ftype and g.type not in process_ftype: - continue - id = g.accn - if remove_feats and g.type in remove_feats: - remove.add(id) - if remove_feats_by_ID and id in remove_feats_by_ID: - remove.add(id) - if unique: - dupcounts[id] += 1 - elif duptype and g.type == duptype: - dupranges[g.seqid][id][g.idx] = (g.start, g.end) - if opts.multiparents == "merge" and g.type != "CDS": # don't merge CDS - pp = g.get_attr("Parent", first=False) - if pp and len(pp) > 0: - for parent in pp: - if parent not in remove: - sig = g.sign - if sig not in merge_feats: - merge_feats[sig]["parents"] = [] - if parent not in merge_feats[sig]["parents"]: - merge_feats[sig]["parents"].append(parent) - if invent_name_attr: - parent, iso = atg_name(g.get_attr("Parent"), retval="locus,iso") - if not parent: - parent = g.get_attr("Parent") - ft.track(parent, g) - if invent_protein_feat: - if g.type == "CDS": - cds_parent = g.get_attr("Parent") - if cds_parent not in cds_track: - cds_track[cds_parent] = [] - cds_track[cds_parent].append((g.start, g.end)) - - if opts.verifySO: - so, _ = GODag_from_SO() - valid_soterm = {} - - fw = must_open(outfile, "w") - if not make_gff_store: - gff = Gff(gffile, keep_attr_order=(not opts.no_keep_attr_order), strict=strict) - for g in gff: - if process_ftype and g.type not in process_ftype: - print(g, file=fw) - continue - - id = g.accn - - if opts.multiparents == "merge" and g.type != "CDS": # don't merge CDS - sig = g.sign - if len(merge_feats[sig]["parents"]) > 1: - if "candidate" not in merge_feats[sig]: - merge_feats[sig]["candidate"] = id - g.set_attr("Parent", merge_feats[sig]["parents"]) - else: - continue - - if remove_feats or remove_feats_by_ID: - if id in remove: - continue - else: - if "Parent" in g.attributes: - keep, parent = [], g.get_attr("Parent", first=False) - for i, pid in enumerate(parent): - if pid not in remove: - keep.append(parent[i]) - else: - remove.add(id) - if len(keep) == 0: - continue - parent = g.set_attr("Parent", keep) - - if remove_attrs: - for remove_attr in remove_attrs: - if remove_attr in g.attributes: - g.set_attr(remove_attr, None) - - if opts.verifySO: - if g.type not in valid_soterm: - valid_soterm[g.type] = validate_term( - g.type, so=so, method=opts.verifySO - ) - ntype = valid_soterm[g.type] - if ntype and g.type != ntype: - g.type = ntype - - origid = g.seqid - if fixphase: - phase = g.phase - g.phase = phaseT.get(phase, phase) - - if mapfile: - if isinstance(mapping, dict): - if origid in mapping: - g.seqid = mapping[origid] - else: - logger.error("%s not found in `%s`. ID unchanged.", origid, mapfile) - else: - g.seqid = mapfile - - if source: - if isinstance(source, dict) and g.source in source: - g.source = source[g.source] - else: - g.source = source - - if names: - if id in names: - g.set_attr("Name", names[id]) - - if note: - name = g.get_attr("Name") - tag = None - if id in note: - tag = note[id] - elif name and name in note: - tag = note[name] - - if tag: - g.set_attr("Note", tag, update=False) - - if attrib_files: - for attr_name in attr_values: - name = g.get_attr("Name") - if id in attr_values[attr_name]: - g.set_attr(attr_name, attr_values[attr_name][id]) - elif name and name in attr_values[attr_name]: - g.set_attr(attr_name, attr_values[attr_name][name]) - - if dbxref_files: - for dbtag in dbxref_values: - if id in dbxref_values[dbtag]: - g.set_attr( - "Dbxref", dbxref_values[dbtag][id], dbtag=dbtag, append=True - ) - - if unique: - if dupcounts[id] > 1: - seen[id] += 1 - old_id = id - id = "{0}-{1}".format(old_id, seen[old_id]) - newparentid[old_id] = id - g.set_attr("ID", id) - - if "Parent" in g.attributes: - parent = g.attributes["Parent"][0] - if dupcounts[parent] > 1: - g.set_attr("Parent", newparentid[parent]) - - if duptype: - if duptype == g.type and len(dupranges[g.seqid][id]) > 1: - p = sorted(dupranges[g.seqid][id]) - s, e = dupranges[g.seqid][id][p[0]][ - 0:2 - ] # get coords of first encountered feature - if g.start == s and g.end == e and p[0] == g.idx: - r = [dupranges[g.seqid][id][x] for x in dupranges[g.seqid][id]] - g.start, g.end = range_minmax(r) - else: - skip[(g.seqid, g.idx, id, g.start, g.end)] = 1 - - if gsac and g.type == "gene": - notes[id] = g.attributes["Name"] - - if ftype: - if isinstance(ftype, dict) and g.type in ftype: - g.type = ftype[g.type] - else: - g.type = ftype - - if invent_name_attr: - ft.store_symbol(g) - if re.search(ft.ftype, g.type): - parent, iso = atg_name(g.get_attr("Parent"), retval="locus,iso") - if not parent: - parent = g.get_attr("Parent") - if parent in ft.tracker: - fidx = ft.feat_index( - parent, g.type, g.strand, (g.start, g.end, g.sign) - ) - symbol = ft.get_symbol(parent) - attr = "ID" if symbol == parent else "Name" - g.set_attr(attr, "{0}:{1}:{2}".format(symbol, g.type, fidx + 1)) - if opts.multiparents == "merge" and attr == "Name": - g.set_attr("ID", "{0}:{1}:{2}".format(parent, g.type, fidx + 1)) - elif copy_id_attr_to_name: - if "Name" not in g.attributes.keys(): - g.set_attr("Name", g.get_attr("ID")) - - protein_feat = None - if invent_protein_feat: - if g.type == "mRNA": - if id in cds_track: - pstart, pstop = range_minmax(cds_track[id]) - protein_feat = GffLine( - "\t".join( - str(x) - for x in [ - g.seqid, - g.source, - "protein", - pstart, - pstop, - ".", - g.strand, - ".", - "ID={0}-Protein;Name={0};Derives_from={0}".format(id), - ] - ) - ) - elif g.type == "CDS": - parent = g.get_attr("Parent") - if parent in cds_track: - _parent = [parent, "{0}-Protein".format(parent)] - g.set_attr("Parent", _parent) - - pp = g.get_attr("Parent", first=False) - if ( - opts.multiparents == "split" and (pp and len(pp) > 1) and g.type != "CDS" - ): # separate features with multiple parents - id = g.get_attr("ID") - for i, parent in enumerate(pp): - if id: - g.set_attr("ID", "{0}-{1}".format(id, i + 1)) - g.set_attr("Parent", parent, update=True, urlquote=True) - if gsac: - fix_gsac(g, notes) - print(g, file=fw) - else: - if g.gff3 and not opts.gff3: - opts.gff3 = True - g.update_attributes(gff3=opts.gff3) - if gsac: - fix_gsac(g, notes) - if duptype == g.type and skip[(g.seqid, g.idx, id, g.start, g.end)] == 1: - continue - print(g, file=fw) - if g.type == "mRNA" and invent_protein_feat: - print(protein_feat, file=fw) - - fw.close() - - -def fixboundaries(args): - """ - %prog fixboundaries gffile --type="gene" --child_ftype="mRNA" > gffile.fixed - - Adjust the boundary coordinates of parents features based on - range chained child features, extracting their min and max values - """ - p = OptionParser(fixboundaries.__doc__) - p.add_argument( - "--type", - default="gene", - type=str, - help="Feature type for which to adjust boundaries", - ) - p.add_argument( - "--child_ftype", - default="mRNA", - type=str, - help="Child featuretype(s) to use for identifying boundaries", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - gffdb = make_index(gffile) - - fw = must_open(opts.outfile, "w") - for f in gffdb.all_features(order_by=("seqid", "start")): - if f.featuretype == opts.type: - child_coords = [] - for cftype in opts.child_ftype.split(","): - for c in gffdb.children(f, featuretype=cftype, order_by="start"): - child_coords.append((c.start, c.stop)) - f.start, f.stop = range_minmax(child_coords) - - print(f, file=fw) - - fw.close() - - -def liftover(args): - """ - %prog liftover gffile > liftover.gff - - Adjust gff coordinates based on tile number. For example, - "gannotation.asmbl.000095.7" is the 8-th tile on asmbl.000095. - """ - p = OptionParser(liftover.__doc__) - p.add_argument("--tilesize", default=50000, type=int, help="The size for each tile") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - gff = Gff(gffile) - for g in gff: - seqid = g.seqid - seqid, tilenum = seqid.rsplit(".", 1) - tilenum = int(tilenum) - g.seqid = seqid - offset = tilenum * opts.tilesize - g.start += offset - g.end += offset - print(g) - - -def get_piles(allgenes): - """ - Before running uniq, we need to compute all the piles. The piles are a set - of redundant features we want to get rid of. Input are a list of GffLines - features. Output are list of list of features distinct "piles". - """ - from jcvi.utils.range import Range, range_piles - - ranges = [Range(a.seqid, a.start, a.end, 0, i) for i, a in enumerate(allgenes)] - - for pile in range_piles(ranges): - yield [allgenes[x] for x in pile] - - -def match_span(f1, f2): - return (f1.start == f2.start) and (f1.stop == f2.stop) - - -def match_ftype(f1, f2): - return f1.featuretype == f2.featuretype - - -def match_nchildren(f1c, f2c): - return len(f1c) == len(f2c) - - -def match_child_ftype(f1c, f2c): - from collections import Counter - - return len( - set(Counter(i.featuretype for i in f1c).keys()) - ^ set(Counter(i.featuretype for i in f2c).keys()) - ) - - -def match_Nth_child(f1c, f2c, N=1, slop=False): - i = N - 1 - f1, f2 = f1c[i], f2c[i] - - if slop: - if 1 == len(f1c): - if f1.featuretype.endswith("UTR"): - if f1.strand == "+": - Npos = "F" if f1.featuretype.startswith("five_prime") else "L" - elif f1.strand == "-": - Npos = "L" if f1.featuretype.startswith("five_prime") else "F" - elif f1.featuretype == "exon": - return not match_span(f1, f2) - elif N == 1: - Npos = "F" - elif N == len(f1c): - Npos = "L" - - if Npos == "F": - return f1.stop == f2.stop - elif Npos == "L": - return f1.start == f2.start - - return match_span(f1, f2) - - -def match_subfeats(f1, f2, dbx1, dbx2, featuretype=None, slop=False): - """ - Given 2 gffutils features located in 2 separate gffutils databases, - iterate through all subfeatures of a certain type and check whether - they are identical or not - - The `slop` parameter allows for variation in the terminal UTR region - """ - f1c, f2c = ( - list(dbx1.children(f1, featuretype=featuretype, order_by="start")), - list(dbx2.children(f2, featuretype=featuretype, order_by="start")), - ) - - lf1c, lf2c = len(f1c), len(f2c) - if match_nchildren(f1c, f2c): - if lf1c > 0 and lf2c > 0: - exclN = set() - if featuretype.endswith("UTR") or featuretype == "exon": - N = [] - if featuretype.startswith("five_prime"): - N = [1] if f1.strand == "+" else [lf1c] - elif featuretype.startswith("three_prime"): - N = [lf1c] if f1.strand == "+" else [1] - else: # infer UTR from exon collection - N = [1] if 1 == lf1c else [1, lf1c] - - for n in N: - if match_Nth_child(f1c, f2c, N=n, slop=slop): - exclN.add(n - 1) - else: - return False - - for i, (cf1, cf2) in enumerate(zip(f1c, f2c)): - if i in exclN: - continue - if not match_span(cf1, cf2): - return False - else: - if (lf1c, lf2c) in [(0, 1), (1, 0)] and slop and featuretype.endswith("UTR"): - return True - - return False - - return True - - -def import_feats(gffile, type="gene"): - gff = Gff(gffile) - allgenes = [] - for g in gff: - if g.type != type: - continue - allgenes.append(g) - - logger.debug("A total of %d %s features imported.", len(allgenes), type) - allgenes.sort(key=lambda x: (x.seqid, x.start)) - return allgenes - - -def uniq(args): - """ - %prog uniq gffile > uniq.gff - - Remove redundant gene models. For overlapping gene models, take the longest - gene. A second scan takes only the genes selected. - - --mode controls whether you want larger feature, or higher scoring feature. - --best controls how many redundant features to keep, e.g. 10 for est2genome. - """ - supported_modes = ("span", "score") - p = OptionParser(uniq.__doc__) - p.add_argument("--type", default="gene", help="Types of features to non-redundify") - p.add_argument("--mode", default="span", choices=supported_modes, help="Pile mode") - p.add_argument("--best", default=1, type=int, help="Use best N features") - p.add_argument( - "--name", - default=False, - action="store_true", - help="Non-redundify Name attribute", - ) - p.add_argument( - "--iter", - default="2", - choices=("1", "2"), - help="Number of iterations to grab children", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - mode = opts.mode - bestn = opts.best - - allgenes = import_feats(gffile, opts.type) - g = get_piles(allgenes) - - bestids = set() - for group in g: - if mode == "span": - sort_key = lambda x: -x.span - else: - sort_key = lambda x: -float(x.score) - - group.sort(key=sort_key) - seen = set() - for x in group: - if len(seen) >= bestn: - break - - name = x.attributes["Name"][0] if opts.name else x.accn - if name in seen: - continue - - seen.add(name) - bestids.add(x.accn) - - populate_children(opts.outfile, bestids, gffile, iter=opts.iter) - - -def populate_children(outfile, ids, gffile, iter="2", types=None): - ids = set(ids) - fw = must_open(outfile, "w") - logger.debug("A total of %d features selected.", len(ids)) - logger.debug("Populate children. Iteration 1..") - gff = Gff(gffile) - children = set() - for g in gff: - if types and g.type in types: - ids.add(g.accn) - if "Parent" not in g.attributes: - continue - for parent in g.attributes["Parent"]: - if parent in ids: - children.add(g.accn) - - if iter == "2": - logger.debug("Populate grand children. Iteration 2..") - gff = Gff(gffile) - for g in gff: - if "Parent" not in g.attributes: - continue - for parent in g.attributes["Parent"]: - if parent in children: - children.add(g.accn) - - logger.debug("Populate parents..") - gff = Gff(gffile) - parents = set() - for g in gff: - if g.accn not in ids: - continue - if "Parent" not in g.attributes: - continue - for parent in g.attributes["Parent"]: - parents.add(parent) - - combined = ids | children | parents - logger.debug("Original: %d", len(ids)) - logger.debug("Children: %d", len(children)) - logger.debug("Parents: %d", len(parents)) - logger.debug("Combined: %d", len(combined)) - - logger.debug("Filter gff file..") - gff = Gff(gffile) - seen = set() - for g in gff: - accn = g.accn - if accn in seen: - continue - if accn in combined: - seen.add(accn) - print(g, file=fw) - fw.close() - - -def sort(args): - """ - %prog sort gffile - - Sort gff file using plain old unix sort based on [chromosome, start coordinate]. - or topologically based on hierarchy of features using the gt (genometools) toolkit - """ - valid_sort_methods = ("unix", "topo") - - p = OptionParser(sort.__doc__) - p.add_argument( - "--method", - default="unix", - choices=valid_sort_methods, - help="Specify sort method", - ) - p.add_argument( - "-i", - dest="inplace", - default=False, - action="store_true", - help="If doing a unix sort, perform sort inplace", - ) - p.set_tmpdir() - p.set_outfile() - p.set_home("gt") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - sortedgff = opts.outfile - if opts.inplace: - if opts.method == "topo" or ( - opts.method == "unix" and gffile in ("-", "stdin") - ): - logger.error( - "Cannot perform inplace sort when method is `topo`" - + " or method is `unix` and input is `stdin` stream" - ) - sys.exit() - - if opts.method == "unix": - cmd = "sort" - cmd += " -k1,1 -k4,4n {0}".format(gffile) - if opts.tmpdir: - cmd += " -T {0}".format(opts.tmpdir) - if opts.inplace: - cmd += " -o {0}".gffile - sortedgff = None - sh(cmd, outfile=sortedgff) - elif opts.method == "topo": - GT_HOME = opts.gt_home - if not op.isdir(GT_HOME): - logger.error("GT_HOME=%s directory does not exist", GT_HOME) - sys.exit() - cmd = "{0}".format(op.join(GT_HOME, "bin", "gt")) - cmd += " gff3 -sort -tidy -retainids -addids no {0}".format(gffile) - sh(cmd, outfile=sortedgff) - - -def fromgtf(args): - """ - %prog fromgtf gtffile - - Convert gtf to gff file. In gtf, the "transcript_id" will convert to "ID=", - the "transcript_id" in exon/CDS feature will be converted to "Parent=". - """ - p = OptionParser(fromgtf.__doc__) - p.add_argument( - "--transcript_id", default="transcript_id", help="Field name for transcript" - ) - p.add_argument("--gene_id", default="gene_id", help="Field name for gene") - p.add_argument( - "--augustus", default=False, action="store_true", help="Input is AUGUSTUS gtf" - ) - p.set_home("augustus") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gtffile,) = args - outfile = opts.outfile - if opts.augustus: - ahome = opts.augustus_home - s = op.join(ahome, "scripts/gtf2gff.pl") - cmd = "{0} --gff3 < {1} --out={2}".format(s, gtffile, outfile) - sh(cmd) - return - - gff = Gff(gtffile) - fw = must_open(outfile, "w") - transcript_id = opts.transcript_id - gene_id = opts.gene_id - nfeats = 0 - for g in gff: - if g.type in ("transcript", "mRNA"): - g.type = "mRNA" - g.update_tag(transcript_id, "ID") - g.update_tag("mRNA", "ID") - g.update_tag(gene_id, "Parent") - g.update_tag("Gene", "Parent") - elif g.type in ("exon", "CDS") or "UTR" in g.type: - g.update_tag("transcript_id", "Parent") - g.update_tag(g.type, "Parent") - elif g.type == "gene": - g.update_tag(gene_id, "ID") - g.update_tag("Gene", "ID") - else: - assert 0, "Don't know how to deal with {0}".format(g.type) - - g.update_attributes() - print(g, file=fw) - nfeats += 1 - - logger.debug("A total of %d features written.", nfeats) - - -def frombed(args): - """ - %prog frombed bed_file [--options] > gff_file - - Convert bed to gff file. In bed, the accn will convert to key='ID' - Default type will be `match` and default source will be `source` - """ - p = OptionParser(frombed.__doc__) - p.add_argument("--type", default="match", help="GFF feature type") - p.add_argument("--source", default="default", help="GFF source qualifier") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - bed = Bed(bedfile) - - for b in bed: - print(b.gffline(type=opts.type, source=opts.source)) - - -def fromsoap(args): - """ - %prog fromsoap soapfile > gff_file - - """ - p = OptionParser(fromsoap.__doc__) - p.add_argument("--type", default="nucleotide_match", help="GFF feature type") - p.add_argument("--source", default="soap", help="GFF source qualifier") - p.set_fixchrnames(orgn="maize") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (soapfile,) = args - pad0 = len(str(sum(1 for line in open(soapfile)))) - - fw = must_open(opts.outfile, "w") - fp = must_open(soapfile) - for idx, line in enumerate(fp): - if opts.fix_chr_name: - from jcvi.utils.cbook import fixChromName - - line = fixChromName(line, orgn=opts.fix_chr_name) - - atoms = line.strip().split("\t") - attributes = "ID=match{0};Name={1}".format(str(idx).zfill(pad0), atoms[0]) - start, end = int(atoms[8]), int(atoms[5]) + int(atoms[8]) - 1 - seqid = atoms[7] - - print( - "\t".join( - str(x) - for x in ( - seqid, - opts.source, - opts.type, - start, - end, - ".", - atoms[6], - ".", - attributes, - ) - ), - file=fw, - ) - - -def gtf(args): - """ - %prog gtf gffile - - Convert gff to gtf file. In gtf, only exon/CDS features are important. The - first 8 columns are the same as gff, but in the attributes field, we need to - specify "gene_id" and "transcript_id". - """ - p = OptionParser(gtf.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - gff = Gff(gffile) - transcript_info = AutoVivification() - for g in gff: - if g.type.endswith(("RNA", "transcript")): - if "ID" in g.attributes and "Parent" in g.attributes: - transcript_id = g.get_attr("ID") - gene_id = g.get_attr("Parent") - elif "mRNA" in g.attributes and "Gene" in g.attributes: - transcript_id = g.get_attr("mRNA") - gene_id = g.get_attr("Gene") - else: - transcript_id = g.get_attr("ID") - gene_id = transcript_id - transcript_info[transcript_id]["gene_id"] = gene_id - transcript_info[transcript_id]["gene_type"] = g.type - continue - - if g.type not in valid_gff_to_gtf_type.keys(): - continue - - try: - transcript_id = g.get_attr("Parent", first=False) - except IndexError: - transcript_id = g.get_attr("mRNA", first=False) - - g.type = valid_gff_to_gtf_type[g.type] - for tid in transcript_id: - if tid not in transcript_info: - continue - gene_type = transcript_info[tid]["gene_type"] - if not gene_type.endswith("RNA") and not gene_type.endswith("transcript"): - continue - gene_id = transcript_info[tid]["gene_id"] - g.attributes = OrderedDict( - [("gene_id", [gene_id]), ("transcript_id", [tid])] - ) - g.update_attributes(gtf=True, urlquote=False) - - print(g) - - -def merge(args): - """ - %prog merge gffiles - - Merge several gff files into one. When only one file is given, it is assumed - to be a file with a list of gff files. - """ - p = OptionParser(merge.__doc__) - p.add_argument( - "--seq", - default=False, - action="store_true", - help="Print FASTA sequences at the end", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - nargs = len(args) - if nargs < 1: - sys.exit(not p.print_help()) - - if nargs == 1: - (listfile,) = args - fp = open(listfile) - gffiles = [x.strip() for x in fp] - else: - gffiles = args - - outfile = opts.outfile - - deflines = set() - fw = must_open(outfile, "w") - fastarecs = {} - for gffile in natsorted(gffiles, key=lambda x: op.basename(x)): - logger.debug(gffile) - fp = open(gffile) - for row in fp: - row = row.rstrip() - if not row or row[0] == "#": - if row == FastaTag: - break - if row in deflines: - continue - else: - deflines.add(row) - - print(row, file=fw) - - if not opts.seq: - continue - - f = Fasta(gffile, lazy=True) - for key, rec in f.iteritems_ordered(): - if key in fastarecs: - continue - fastarecs[key] = rec - - if opts.seq: - print(FastaTag, file=fw) - SeqIO.write(fastarecs.values(), fw, "fasta") - - fw.close() - - -def extract(args): - """ - %prog extract gffile - - --contigs: Extract particular contig(s) from the gff file. If multiple contigs are - involved, use "," to separate, e.g. "contig_12,contig_150"; or provide a file - with multiple contig IDs, one per line - --names: Process particular ID(s) from the gff file. If multiple IDs are - involved, use "," to separate; or provide a file with multiple IDs, one per line - """ - p = OptionParser(extract.__doc__) - p.add_argument("--contigs", help="Extract features from certain contigs") - p.add_argument("--names", help="Extract features with certain names") - p.add_argument( - "--types", - type=str, - default=None, - help="Extract features of certain feature types", - ) - p.add_argument( - "--children", - default=0, - choices=["1", "2"], - help="Specify number of iterations: `1` grabs children, " - + "`2` grabs grand-children", - ) - p.add_argument("--tag", default="ID", help="Scan the tags for the names") - p.add_argument( - "--fasta", default=False, action="store_true", help="Write FASTA if available" - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - contigfile = opts.contigs - namesfile = opts.names - typesfile = opts.types - nametag = opts.tag - - contigID = parse_multi_values(contigfile) - names = parse_multi_values(namesfile) - types = parse_multi_values(typesfile) - outfile = opts.outfile - - if opts.children: - assert types is not None or names is not None, "Must set --names or --types" - if names is None: - names = list() - populate_children(outfile, names, gffile, iter=opts.children, types=types) - return - - fp = must_open(gffile) - fw = must_open(opts.outfile, "w") - for row in fp: - atoms = row.split() - if len(atoms) == 0: - continue - tag = atoms[0] - if row[0] == "#": - if row.strip() == "###": - continue - if not (tag == RegionTag and contigID and atoms[1] not in contigID): - print(row.rstrip(), file=fw) - if tag == FastaTag: - break - continue - - b = GffLine(row) - attrib = b.attributes - if contigID and tag not in contigID: - continue - if types and b.type in types: - _id = b.accn - if _id not in names: - names.append(_id) - if names is not None: - if nametag not in attrib: - continue - if attrib[nametag][0] not in names: - continue - - print(row.rstrip(), file=fw) - - if not opts.fasta: - return - - f = Fasta(gffile) - for s in contigID: - if s in f: - SeqIO.write([f[s]], fw, "fasta") - - -def split(args): - """ - %prog split gffile outdir - - Split the gff into one contig per file. Will also take sequences if the file - contains FASTA sequences. - """ - p = OptionParser(split.__doc__) - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - gffile, outdir = args - mkdir(outdir) - - g = Gff(gffile) - seqids = g.seqids - - for s in seqids: - outfile = op.join(outdir, s + ".gff") - extract([gffile, "--contigs=" + s, "--outfile=" + outfile]) - - -def note(args): - """ - %prog note gffile > tabfile - - Extract certain attribute field for each feature. - """ - p = OptionParser(note.__doc__) - p.add_argument( - "--type", - default=None, - help="Only process certain types, multiple types allowed with comma", - ) - p.add_argument( - "--attribute", - default="Parent,Note", - help="Attribute field to extract, multiple fields allowd with comma", - ) - p.add_argument("--AED", type=float, help="Only extract lines with AED score <=") - p.add_argument( - "--exoncount", - default=False, - action="store_true", - help="Get the exon count for each mRNA feat", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - type = opts.type - if type: - type = type.split(",") - - exoncounts = {} - if opts.exoncount: - g = make_index(gffile) - for feat in g.features_of_type("mRNA"): - nexons = 0 - for c in g.children(feat.id, 1): - if c.featuretype != "exon": - continue - nexons += 1 - exoncounts[feat.id] = nexons - - attrib = opts.attribute.split(",") - - gff = Gff(gffile) - seen = set() - AED = opts.AED - for g in gff: - if type and g.type not in type: - continue - if AED is not None and float(g.attributes["_AED"][0]) > AED: - continue - keyval = [g.accn] + [ - ",".join(g.attributes.get(x, ["nan"])) for x in attrib - ] - if exoncounts: - nexons = exoncounts.get(g.accn, 0) - keyval.append(str(nexons)) - keyval = tuple(keyval) - if keyval not in seen: - print("\t".join(keyval)) - seen.add(keyval) - - -def splicecov(args): - """ - %prog splicecov annotation.gff3 junctions.bed - - Given an annotation GFF file (containing introns) and a - TopHat junctions.bed file (preprocessed using formats.bed.juncs(), - each intron gets tagged with the JUNC identifier and read coverage. - - Output is a summary table listing for each gene locus, the isoform number, - number of splice junctions and {average, median, min & max} read coverage - across the junctions. - """ - from tempfile import mkstemp - from pybedtools import BedTool - from jcvi.utils.cbook import SummaryStats - - p = OptionParser(splicecov.__doc__) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - ( - gfffile, - juncsbed, - ) = args - tagged = "{0}.{1}.gff3".format(gfffile.rsplit(".", 1)[0], "tag_introns") - - gff3, junc = BedTool(gfffile), BedTool(juncsbed) - ab = gff3.intersect(junc, wao=True, f=1.0, s=True) - abfh = must_open(ab.fn) - - seen = set() - scov = AutoVivification() - - fh, tmpgff = mkstemp(suffix=".gff3") - fw = must_open(tmpgff, "w") - for line in abfh: - args = line.strip().split("\t") - g = GffLine("\t".join(str(x) for x in args[:9])) - if g.type == "intron" and args[10] != -1: - ispan, jspan = g.span, int(args[11]) - int(args[10]) - if ispan == jspan: - g.set_attr("ID", args[12], update=True) - g.score = int(args[13]) - - pparts = g.get_attr("Parent").split(".") - locus, iso = pparts[0], ".".join(pparts[1:]) - seen.add(iso) - if not scov[locus][iso]: - scov[locus][iso] = [] - scov[locus][iso].append(g.score) - else: - continue - print(g, file=fw) - fw.close() - - format([tmpgff, "--unique", "-o", tagged]) - os.unlink(tmpgff) - - isos = sorted(list(seen)) - fw = must_open(opts.outfile, "w") - h1, h2, stats = ["#"], ["#locus"], ["N", "mean", "median", "min", "max"] - for iso in isos: - h1.extend([str(iso)] + [""] * (len(stats) - 1)) - h2.extend(stats) - print("\t".join(str(x) for x in h1), file=fw) - print("\t".join(str(x) for x in h2), file=fw) - for locus in scov.keys(): - out = [locus] - for iso in isos: - if iso in scov[locus].keys(): - juncs = scov[locus][iso] - jstats = SummaryStats(juncs, dtype=int) - out.extend( - [jstats.size, jstats.mean, jstats.median, jstats.min, jstats.max] - ) - else: - out.extend(["-"] * len(stats)) - print("\t".join(str(x) for x in out), file=fw) - fw.close() - - -def bed(args): - """ - %prog bed gff_file [--options] - - Parses the start, stop locations of the selected features out of GFF and - generate a bed file - """ - p = OptionParser(bed.__doc__) - p.add_argument( - "--type", - dest="type", - default="gene", - help="Feature type to extract, use comma for multiple, and `all` for all", - ) - p.add_argument("--key", default="ID", help="Key in the attributes to extract") - p.add_argument("--accn", help="Use fixed accn in the 4th column") - p.add_argument("--source", help="Source to extract from, use comma for multiple") - p.add_argument( - "--span", - default=False, - action="store_true", - help="Use feature span in the score column", - ) - p.add_argument( - "--score_attrib", - dest="score_attrib", - default=False, - help="Attribute whose value is to be used as score in `bedline`", - ) - p.add_argument( - "--append_source", - default=False, - action="store_true", - help="Append GFF source name to extracted key value", - ) - p.add_argument( - "--append_ftype", - default=False, - action="store_true", - help="Append GFF feature type to extracted key value", - ) - p.add_argument( - "--append_attrib", - default=None, - help="Append attribute to extracted key value", - ) - p.add_argument( - "--nosort", - default=False, - action="store_true", - help="Do not sort the output bed file", - ) - p.add_argument( - "--primary_only", - default=False, - action="store_true", - help="Only retains a single transcript per gene", - ) - p.add_argument( - "--parent_key", - default="Parent", - help="Parent gene key to group with --primary_only", - ) - p.add_argument( - "--human_chr", - default=False, - action="store_true", - help="Only allow 1-22XY, and add `chr` prefix to seqid", - ) - p.add_argument( - "--ensembl_cds", - default=False, - action="store_true", - help="Use transcript_name.exon_number as accn", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - key = opts.key or None - accn = opts.accn - span = opts.span - primary_only = opts.primary_only - parent_key = opts.parent_key - human_chr = opts.human_chr - ensembl_cds = opts.ensembl_cds - if opts.type and opts.type != "all": - type = set(x.strip() for x in opts.type.split(",")) - else: - type = set() - if opts.source: - source = set(x.strip() for x in opts.source.split(",")) - else: - source = set() - if ensembl_cds: - type = {"CDS"} - - gff = Gff( - gffile, - key=key, - parent_key=parent_key, - append_source=opts.append_source, - append_ftype=opts.append_ftype, - append_attrib=opts.append_attrib, - score_attrib=opts.score_attrib, - ) - b = Bed() - seen_parents = set() # used with --primary_only - seen = set() # used with --ensembl_cds - skipped_identical_range = 0 - skipped_non_primary = 0 - - for g in gff: - if type and g.type not in type: - continue - if source and g.source not in source: - continue - if primary_only: - if g.parent in seen_parents: - skipped_non_primary += 1 - continue - elif g.parent: - seen_parents.add(g.parent) - - bl = g.bedline - if accn: - bl.accn = accn - if span: - bl.score = bl.span - if human_chr: - if bl.seqid not in VALID_HUMAN_CHROMOSMES: - continue - bl.seqid = "chr" + bl.seqid - if ensembl_cds: - if g.get_attr("gene_biotype") != "protein_coding": - continue - bl.accn = "{0}.{1}".format( - g.get_attr("transcript_name"), g.get_attr("exon_number") - ) - position = (bl.seqid, bl.start, bl.end) - if position in seen: - skipped_identical_range += 1 - continue - seen.add(position) - - b.append(bl) - - sorted = not opts.nosort - b.print_to_file(opts.outfile, sorted=sorted) - logger.debug( - "Extracted %d features (type=%s id=%s parent=%s)", - len(b), - ",".join(type), - key, - parent_key, - ) - if primary_only: - logger.debug("Skipped non-primary: %d", skipped_non_primary) - if ensembl_cds: - logger.debug("Skipped due to identical range: %d", skipped_identical_range) - - -def make_index(gff_file): - """ - Make a sqlite database for fast retrieval of features. - """ - import gffutils - - db_file = gff_file + ".db" - - if need_update(gff_file, db_file): - cleanup(db_file) - logger.debug("Indexing `%s`", gff_file) - gffutils.create_db(gff_file, db_file, merge_strategy="create_unique") - else: - logger.debug("Load index `%s`", gff_file) - - return gffutils.FeatureDB(db_file) - - -def get_parents(gff_file, parents): - gff = Gff(gff_file) - for g in gff: - if g.type not in parents: - continue - yield g - - -def children(args): - """ - %prog children gff_file - - Get the children that have the same parent. - """ - p = OptionParser(children.__doc__) - p.add_argument( - "--parents", - default="gene", - help="list of features to extract, use comma to separate (e.g. 'gene,mRNA')", - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gff_file,) = args - g = make_index(gff_file) - parents = set(opts.parents.split(",")) - - for feat in get_parents(gff_file, parents): - cc = [c.id for c in g.children(feat.id, 1)] - if len(cc) <= 1: - continue - - print("\t".join(str(x) for x in (feat.id, feat.start, feat.stop, "|".join(cc)))) - - -def load(args): - """ - %prog load gff_file fasta_file [--options] - - Parses the selected features out of GFF, with subfeatures concatenated. - For example, to get the CDS sequences, do this: - $ %prog load athaliana.gff athaliana.fa --parents mRNA --children CDS - - To get 500bp upstream of a genes Transcription Start Site (TSS), do this: - $ %prog load athaliana.gff athaliana.fa --feature=upstream:TSS:500 - - Switch TSS with TrSS for Translation Start Site. - - To get 500bp downstream of a gene's Transcription End Site (TES), do this: - $ %prog load athaliana.gff athaliana.fa --feature=downstream:TES:500 - - To get up- or downstream sequences of a certain max length not overlapping - with the next feature, use `--avoidFeatures`. Features may be avoided on both - strands or on the strand containing each feature, use either "both_strands" or - "strand_specific" - $ %prog load athaliana.gff athaliana.fa --feature=downstream:TES:500 --avoidFeatures=both_strands - """ - from datetime import datetime as dt - from jcvi.formats.fasta import Seq, SeqRecord - - # can request output fasta sequence id to be picked from following attributes - valid_id_attributes = ["ID", "Name", "Parent", "Alias", "Target", "orig_protein_id"] - - valid_avoid_features = ["both_strands", "strand_specific"] - - p = OptionParser(load.__doc__) - p.add_argument( - "--parents", - dest="parents", - default="mRNA", - help="list of features to extract, use comma to separate (e.g." - + "'gene,mRNA')", - ) - p.add_argument( - "--children", - dest="children", - default="CDS", - help="list of features to extract, use comma to separate (e.g." - + "'five_prime_UTR,CDS,three_prime_UTR')", - ) - p.add_argument( - "--feature", - dest="feature", - help="feature type to extract (e.g. `--feature=CDS`). Extract " - + "up- or downstream using " - + "upstream|downstream:TSS|TrSS|TES|TrES:length " - + "(e.g. `--feature=upstream:TSS:500`)", - ) - p.add_argument( - "--avoidFeatures", - default=None, - choices=["both_strands", "strand_specific"], - help="Specify whether or not to avoid up or downstream features", - ) - p.add_argument( - "--id_attribute", - choices=valid_id_attributes, - help="The attribute field to extract and use as FASTA sequence ID", - ) - p.add_argument( - "--desc_attribute", - default="Note", - help="The attribute field to extract and use as FASTA sequence description", - ) - p.add_argument( - "--full_header", - default=None, - choices=["default", "tair"], - help="Specify if full FASTA header (with seqid, coordinates and datestamp) should be generated", - ) - - g1 = p.add_argument_group("Optional parameters (if generating full header)") - g1.add_argument( - "--sep", - dest="sep", - default=" ", - help="Specify separator used to delimiter header elements", - ) - g1.add_argument( - "--datestamp", - dest="datestamp", - help="Specify a datestamp in the format YYYYMMDD or automatically pick `today`", - ) - g1.add_argument( - "--conf_class", - dest="conf_class", - default=False, - action="store_true", - help="Specify if `conf_class` attribute should be parsed and placed in the header", - ) - - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(p.print_help()) - - gff_file, fasta_file = args - - if opts.feature: - ( - opts.feature, - opts.parent, - opts.children, - site, - fLen, - flag, - error_msg, - ) = parse_feature_param(opts.feature) - if flag: - sys.exit(error_msg) - if opts.avoidFeatures: - if opts.avoidFeatures not in valid_avoid_features: - sys.exit("[error] avoidFeatures must be one of {valid_avoid_features}") - - parents = set(opts.parents.split(",")) - children_list = set(opts.children.split(",")) - - """ - In a situation where we want to extract sequence for only the top-level - parent feature, specify feature type of parent == child - """ - skipChildren = ( - True if len(parents.symmetric_difference(children_list)) == 0 else False - ) - - id_attr = opts.id_attribute - desc_attr = opts.desc_attribute - sep = opts.sep - - import gffutils - - g = make_index(gff_file) - f = Fasta(fasta_file, index=False) - seqlen = {} - for seqid, size in f.itersizes(): - seqlen[seqid] = size - - fw = must_open(opts.outfile, "w") - - for feat in get_parents(gff_file, parents): - desc = "" - if desc_attr: - fparent = ( - feat.attributes["Parent"][0] if "Parent" in feat.attributes else None - ) - if fparent: - try: - g_fparent = g[fparent] - except gffutils.exceptions.FeatureNotFoundError: - logger.error("%s not found in index .. skipped", fparent) - continue - if desc_attr in g_fparent.attributes: - desc = ",".join(g_fparent.attributes[desc_attr]) - if not desc and desc_attr in feat.attributes: - desc = ",".join(feat.attributes[desc_attr]) - - if opts.full_header: - desc_parts = [] - desc_parts.append(desc) - - if opts.conf_class and "conf_class" in feat.attributes: - desc_parts.append(feat.attributes["conf_class"][0]) - - if opts.full_header == "tair": - orient = "REVERSE" if feat.strand == "-" else "FORWARD" - feat_coords = "{0}:{1}-{2} {3} LENGTH=[LEN]".format( - feat.seqid, feat.start, feat.end, orient - ) - else: - (s, e) = ( - (feat.start, feat.end) - if (feat.strand == "+") - else (feat.end, feat.start) - ) - feat_coords = "{0}:{1}-{2}".format(feat.seqid, s, e) - desc_parts.append(feat_coords) - - datestamp = ( - opts.datestamp - if opts.datestamp - else "{0}{1}{2}".format(dt.now().year, dt.now().month, dt.now().day) - ) - desc_parts.append(datestamp) - - desc = sep.join(str(x) for x in desc_parts) - desc = "".join(str(x) for x in (sep, desc)).strip() - - if opts.feature == "upstream" or opts.feature == "downstream": - start, stop = get_coords( - opts.feature, site, fLen, seqlen[feat.seqid], feat, children_list, g - ) - - overlap = None - if opts.avoidFeatures: - stranded = opts.avoidFeatures == "strand_specific" - start, stop, overlap = update_coords_avoidFeatures( - stranded, opts.feature, site, fLen, start, stop, feat, g - ) - - if not start or not stop or overlap: - continue - - feat_seq = f.sequence( - dict( - chr=feat.seqid, - start=start, - stop=stop, - strand=feat.strand, - ) - ) - - (s, e) = (start, stop) if feat.strand == "+" else (stop, start) - seq_loc = str(feat.seqid) + ":" + str(s) + "-" + str(e) - desc = sep.join( - str(x) - for x in (desc, seq_loc, "FLANKLEN=" + str(abs(stop - start) + 1)) - ) - else: - children = [] - if not skipChildren: - for c in g.children(feat.id, 1): - if c.featuretype not in children_list: - continue - child = f.sequence( - dict(chr=c.chrom, start=c.start, stop=c.stop, strand=c.strand) - ) - children.append((child, c)) - - if not children: - print( - "[warning] %s has no children with type %s" - % (feat.id, ",".join(children_list)), - file=sys.stderr, - ) - continue - else: - child = f.sequence( - dict( - chr=feat.seqid, - start=feat.start, - stop=feat.end, - strand=feat.strand, - ) - ) - children.append((child, feat)) - - # sort children in incremental position - children.sort(key=lambda x: x[1].start) - # reverse children if negative strand - if feat.strand == "-": - children.reverse() - feat_seq = "".join(x[0] for x in children) - - desc = desc.replace('"', "") - - id = ( - ",".join(feat.attributes[id_attr]) - if id_attr and feat.attributes[id_attr] - else feat.id - ) - - if opts.full_header == "tair": - desc = desc.replace("[LEN]", str(len(feat_seq))) - - rec = SeqRecord(Seq(feat_seq), id=id, description=desc) - SeqIO.write([rec], fw, "fasta") - fw.flush() - - -def parse_feature_param(feature): - """ - Take the --feature param (coming from gff.load() and parse it. - Returns feature, parents and children terms. - - Also returns length of up or downstream sequence (and start site) requested - - If erroneous, returns a flag and error message to be displayed on exit - """ - # can request up- or downstream sequence only from the following valid sites - valid_sites = ["TSS", "TrSS", "TES", "TrES"] - - site, fLen = None, None - flag, error_msg = None, None - parents, children = None, None - if re.match(r"upstream", feature) or re.match(r"downstream", feature): - parents, children = "mRNA", "CDS" - feature, site, fLen = re.search(r"([A-z]+):([A-z]+):(\S+)", feature).groups() - - if not is_number(fLen): - flag, error_msg = ( - 1, - "Error: len `" + fLen + "` should be an integer", - ) - - fLen = int(fLen) - if fLen < 0: - flag, error_msg = ( - 1, - "Error: len `" + str(fLen) + "` should be > 0", - ) - - if site not in valid_sites: - flag, error_msg = ( - 1, - f"Error: site `{site}` not valid. Please choose from {valid_sites}", - ) - elif feature == "upstream" and site not in ["TSS", "TrSS"]: - flag, error_msg = ( - 1, - f"Error: site `{site}` not valid for upstream. Please choose from `TSS TrSS`", - ) - elif feature == "downstream" and site not in ["TES", "TrES"]: - flag, error_msg = ( - 1, - f"Error: site `{site}` not valid for downstream. Please use `TES`", - ) - elif feature == "CDS": - parents, children = "mRNA", "CDS" - else: - flag, error_msg = 1, "Error: unrecognized option --feature=" + feature - - return feature, parents, children, site, fLen, flag, error_msg - - -def get_coords(feature, site, fLen, seqlen, feat, children_list, gffdb): - """ - Subroutine takes feature, site, length, reference sequence length, - parent mRNA feature (GffLine object), list of child feature types - and a GFFutils.GFFDB object as the input - - If upstream of TSS is requested, use the parent feature coords - to extract the upstream sequence - - If upstream of TrSS is requested, iterates through all the - children (CDS features stored in the sqlite GFFDB) and use child - feature coords to extract the upstream sequence - - If downstream of TES is requested, use parent feature coords to - extract the downstream sequence - - If downstream of TrES is requested, iterates through all the - children (CDS features stored in the sqlite GFFDB) and use child - feature coords to extract the downstream sequence - - If success, returns the start and stop coordinates - else, returns None - """ - if site in ["TSS", "TES"]: - if feature == "upstream" and site == "TSS": - (start, stop) = ( - (feat.start - fLen, feat.start - 1) - if feat.strand == "+" - else (feat.end + 1, feat.end + fLen) - ) - if feature == "downstream" and site == "TES": - (start, stop) = ( - (feat.end + 1, feat.end + fLen) - if feat.strand == "+" - else (feat.start - fLen, feat.start - 1) - ) - elif site in ["TrSS", "TrES"]: - children = [] - for c in gffdb.children(feat.id, 1): - if c.featuretype not in children_list: - continue - children.append((c.start, c.stop)) - - if not children: - print( - "[warning] %s has no children with type %s" - % (feat.id, ",".join(children_list)), - file=sys.stderr, - ) - return None, None - - cds_start, cds_stop = range_minmax(children) - if feature == "upstream" and site == "TrSS": - (start, stop) = ( - (cds_start - fLen, cds_start - 1) - if feat.strand == "+" - else (cds_stop + 1, cds_stop + fLen) - ) - elif feature == "downstream" and site == "TrES": - (start, stop) = ( - (cds_stop + 1, cds_stop + fLen) - if feat.strand == "+" - else (cds_start - fLen, cds_start - 1) - ) - - if feat.strand == "+" and start < 1: - start = 1 - elif feat.strand == "-" and stop > seqlen: - stop = seqlen - - actual_len = stop - start + 1 - - if actual_len < fLen: - print( - "[warning] sequence upstream of {0} ({1} bp) is less than upstream length {2}".format( - feat.id, actual_len, fLen - ), - file=sys.stderr, - ) - return None, None - - return start, stop - - -def update_coords_avoidFeatures( - stranded, feature, site, fLen, start, stop, feat, gffdb -): - """ - Subroutine takes start and stop coordinates for a given feature and updates the - coordinates to avoid overlapping with unrelated up- or downstream features. - - This is done on a strand-dependent or -independent manner based on the value of - --avoidFeatures. - - Returns, updated start and stop coordinates for loading sequences. - - Genes with overlapping neighbor features raise a flag and the feature is skipped. - """ - flag = None - collisions = [] - s = feat.strand if stranded else (None) - - allChildren = [] - for c in gffdb.children(feat.parent): - allChildren.append(c.id) - - for r in gffdb.region(seqid=feat.seqid, start=start, end=stop, strand=s): - if r.id in allChildren or r.id == feat.parent: - continue - - if feature == "upstream" and feat.strand == "+": - collisions.append(r.end) - elif feature == "upstream" and feat.strand == "-": - collisions.append(r.start) - elif feature == "downstream" and feat.strand == "+": - collisions.append(r.start) - elif feature == "downstream" and feat.strand == "-": - collisions.append(r.end) - - if site in ["TrSS", "TrES"]: - children = [] - for c in gffdb.children(feat.id, 1): - if c.featuretype != "CDS": - continue - children.append((c.start, c.stop)) - - if not children: - feat_start = feat.start - feat_end = feat.end - else: - feat_start, feat_end = range_minmax(children) - else: - feat_start = feat.start - feat_end = feat.end - - # Identify up- or downstream features that overlap with the current feature. Skip these... - if len(collisions) > 0: - if feature == "upstream" and feat.strand == "+": - start = max(collisions) - if start > feat_start: - flag = 1 - elif feature == "upstream" and feat.strand == "-": - stop = min(collisions) - if stop < feat_end: - flag = 1 - elif feature == "downstream" and feat.strand == "+": - stop = min(collisions) - if stop < feat_end: - flag = 1 - elif feature == "downstream" and feat.strand == "-": - start = max(collisions) - if start > feat_start: - flag = 1 - - if flag: - print( - "Overlap detected while searching {0}. Skipping {1}:{2} strand:{3}".format( - feature, feat.parent, feat.id, feat.strand - ), - file=sys.stderr, - ) - else: - print( - "[avoidFeatures] a feature {0} of {1} is within {2} bp. Using {0} length of {3} bp".format( - feature, feat.id, fLen, abs(start - stop) + 1 - ), - file=sys.stderr, - ) - - return start, stop, flag - - -def bed12(args): - """ - %prog bed12 gffile > bedfile - - Produce bed12 file for coding features. The exons will be converted to blocks. - The CDS range will be shown between thickStart to thickEnd. For reference, - bed format consists of the following fields: - - 1. chrom - 2. chromStart - 3. chromEnd - 4. name - 5. score - 6. strand - 7. thickStart - 8. thickEnd - 9. itemRgb - 10. blockCount - 11. blockSizes - 12. blockStarts - """ - p = OptionParser(bed12.__doc__) - p.add_argument("--parent", default="mRNA", help="Top feature type") - p.add_argument("--block", default="exon", help="Feature type for regular blocks") - p.add_argument("--thick", default="CDS", help="Feature type for thick blocks") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gffile,) = args - parent, block, thick = opts.parent, opts.block, opts.thick - outfile = opts.outfile - - g = make_index(gffile) - fw = must_open(outfile, "w") - - for f in g.features_of_type(parent): - chrom = f.chrom - chromStart = f.start - 1 - chromEnd = f.stop - name = f.id - score = 0 - strand = f.strand - # When there is no thick part, thickStart and thickEnd are usually set - # to the chromStart position - # - thickStart = chromStart - thickEnd = chromStart - blocks = [] - - for c in g.children(name, 1): - cstart, cend = c.start - 1, c.stop - - if c.featuretype == block: - blockStart = cstart - chromStart - blockSize = cend - cstart - blocks.append((blockStart, blockSize)) - - elif c.featuretype == thick: - thickStart = min(thickStart, cstart) - thickEnd = max(thickEnd, cend) - - blocks.sort() - blockStarts, blockSizes = zip(*blocks) - blockCount = len(blocks) - blockSizes = ",".join(str(x) for x in blockSizes) + "," - blockStarts = ",".join(str(x) for x in blockStarts) + "," - itemRgb = 0 - - print( - "\t".join( - str(x) - for x in ( - chrom, - chromStart, - chromEnd, - name, - score, - strand, - thickStart, - thickEnd, - itemRgb, - blockCount, - blockSizes, - blockStarts, - ) - ), - file=fw, - ) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/html.py b/jcvi/formats/html.py deleted file mode 100644 index 657b7fca..00000000 --- a/jcvi/formats/html.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Parse html pages. -""" -import os.path as op -import sys - -from urllib.parse import urljoin - -from BeautifulSoup import BeautifulSoup - -from ..apps.base import ActionDispatcher, OptionParser, download, logger - - -def main(): - - actions = ( - ("table", "convert HTML tables to csv"), - ("links", "extract all links from web page"), - ("gallery", "convert a folder of figures to a HTML table"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def gallery(args): - """ - %prog gallery folder link_prefix - - Convert a folder of figures to a HTML table. For example: - - $ python -m jcvi.formats.html gallery Paper-figures/ - https://dl.dropboxusercontent.com/u/15937715/Data/Paper-figures/ - - Maps the images from local to remote. - """ - from more_itertools import grouper - from jcvi.apps.base import iglob - - p = OptionParser(gallery.__doc__) - p.add_argument("--columns", default=3, type=int, help="How many cells per row") - p.add_argument("--width", default=200, type=int, help="Image width") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - folder, link_prefix = args - width = opts.width - images = iglob(folder, "*.jpg,*.JPG,*.png") - td = '{0}
' - print("") - for ims in grouper(images, opts.columns): - print(''.format(width + 5)) - for im in ims: - if not im: - continue - im = op.basename(im) - pf = im.split(".")[0].replace("_", "-") - link = link_prefix.rstrip("/") + "/" + im - print(td.format(pf, link, width)) - print("") - print("
") - - -def links(args): - """ - %prog links url - - Extract all the links "" from web page. - """ - p = OptionParser(links.__doc__) - p.add_argument( - "--img", - default=False, - action="store_true", - help="Extract tags", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (url,) = args - img = opts.img - - htmlfile = download(url) - page = open(htmlfile).read() - soup = BeautifulSoup(page) - - tag = "img" if img else "a" - src = "src" if img else "href" - aa = soup.findAll(tag) - for a in aa: - link = a.get(src) - link = urljoin(url, link) - print(link) - - -def unescape(s, unicode_action="replace"): - """ - Unescape HTML strings, and convert & etc. - """ - from html.parser import HTMLParser - - hp = HTMLParser.HTMLParser() - s = hp.unescape(s) - s = s.encode("ascii", unicode_action) - s = s.replace("\n", "").strip() - return s - - -def table(args): - """ - %prog table page.html - - Convert HTML tables to csv. - """ - import csv - - p = OptionParser(table.__doc__) - p.set_sep(sep=",") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (htmlfile,) = args - page = open(htmlfile).read() - soup = BeautifulSoup(page) - - for i, tabl in enumerate(soup.findAll("table")): - nrows = 0 - csvfile = htmlfile.rsplit(".", 1)[0] + ".{0}.csv".format(i) - writer = csv.writer(open(csvfile, "w"), delimiter=opts.sep) - rows = tabl.findAll("tr") - for tr in rows: - cols = tr.findAll("td") - if not cols: - cols = tr.findAll("th") - - row = [] - for td in cols: - try: - cell = "".join(td.find(text=True)) - cell = unescape(cell) - except TypeError: - cell = "" - row.append(cell) - writer.writerow(row) - nrows += 1 - logger.debug("Table with %d rows written to `%s`.", nrows, csvfile) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/maf.py b/jcvi/formats/maf.py deleted file mode 100644 index dd63b1d8..00000000 --- a/jcvi/formats/maf.py +++ /dev/null @@ -1,286 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -MAF format specification: - -""" -import sys - -from bisect import bisect -from dataclasses import dataclass - -from Bio import AlignIO -from Bio import SeqIO -from bx import interval_index_file -from bx.align import maf - -from ..apps.base import ActionDispatcher, OptionParser, need_update -from ..apps.lastz import blastz_score_to_ncbi_expectation, blastz_score_to_ncbi_bits - -from .base import BaseFile, logger - - -FLANK = 60 - - -class Maf(BaseFile, dict): - def __init__(self, filename, index=False): - super().__init__(filename) - - indexfile = filename + ".idx" - if index: - if need_update(filename, indexfile): - self.build_index(filename, indexfile) - - self.index = maf.Index(filename, indexfile) - - fp = open(filename) - self.reader = maf.Reader(fp) - - def build_index(self, filename, indexfile): - """ - Recipe from Brad Chapman's blog - - """ - indexes = interval_index_file.Indexes() - in_handle = open(filename) - - reader = maf.Reader(in_handle) - while True: - pos = reader.file.tell() - rec = next(reader) - if rec is None: - break - for c in rec.components: - indexes.add( - c.src, - c.forward_strand_start, - c.forward_strand_end, - pos, - max=c.src_size, - ) - - index_handle = open(indexfile, "w") - indexes.write(index_handle) - index_handle.close() - - -@dataclass -class Breakpoint: - arec: str - astart: int - brec: str - bstart: int - - def __str__(self): - return f"{self.arec}:{self.astart}-{self.brec}:{self.bstart}" - - -def main(): - - actions = ( - ("bed", "convert MAF to BED format"), - ("blast", "convert MAF to BLAST tabular format"), - ("breakpoints", "find breakpoints in MAF and 'simulate' chimeric contigs"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def breakpoints(args): - """ - %prog breakpoints A.B.maf A.fa B.fa AB 1000000 2000000 - - Find breakpoints in MAF and 'simulate' chimeric contigs in `AB.fa`. - Breakpoints are 'roughly' around the user defined positions. The idea is - to simulate chimeric contigs, which are useful for testing algorithms, - e.g. klassify. - """ - p = OptionParser(breakpoints.__doc__) - p.add_argument( - "--minsize", - default=10000, - type=int, - help="Minimum size of alignment to consider", - ) - opts, args = p.parse_args(args) - - if len(args) not in (5, 6): - sys.exit(not p.print_help()) - - maf_file, a_fasta, b_fasta, ab = args[:4] - bps = sorted(int(x) for x in args[4:]) - minsize = opts.minsize - - filtered_msa = [] - for msa in AlignIO.parse(maf_file, "maf"): - arec, brec = msa - if brec.annotations["size"] < minsize: - continue - filtered_msa.append((brec.annotations["start"], arec, brec)) - logger.info("Total alignments: %d", len(filtered_msa)) - - final = [] - # Load the sequences - ar = next(SeqIO.parse(a_fasta, "fasta")) - br = next(SeqIO.parse(b_fasta, "fasta")) - for bp in bps: - i = bisect(filtered_msa, (bp,)) - _, arec, brec = filtered_msa[i] - logger.info("%s", arec) - logger.info("%s", brec) - assert len(arec) == len(brec) - # Find the midpoint, safe to crossover there - midpoint = len(arec) // 2 - aseq = arec.seq[:midpoint] - astart = arec.annotations["start"] + len(aseq) - aseq.count("-") - logger.info("%s|%s", aseq[-FLANK:], arec.seq[midpoint:][:FLANK]) - bseq = brec.seq[:midpoint] - bstart = brec.annotations["start"] + len(bseq) - bseq.count("-") - logger.info("%s|%s", bseq[-FLANK:], brec.seq[midpoint:][:FLANK]) - bpt = Breakpoint(arec.id, astart, brec.id, bstart) - logger.info("-" * FLANK * 2 + ">") - logger.info("%s|%s", ar.seq[:astart][-FLANK:], br.seq[bstart:][:FLANK]) - final.append(bpt) - - logger.info("Breakpoints found: %s", final) - if len(final) == 2: - bp1, bp2 = final[:2] - # ====-------======= - # bp1 bp2 - abseq = ( - ar.seq[: bp1.astart] - + br.seq[bp1.bstart : bp2.bstart] - + ar.seq[bp2.astart :] - ) - elif len(final) == 1: - bp = final[0] - abseq = ar.seq[: bp.astart] + br.seq[bp.bstart :] - abrec = SeqIO.SeqRecord(abseq, id=ab, description="") - ab_fasta = f"{ab}.fa" - SeqIO.write([abrec], ab_fasta, "fasta") - logger.info("Writing to %s", ab_fasta) - - -def bed(args): - """ - %prog bed maffiles > out.bed - - Convert a folder of maf alignments to the bed features - then useful to check coverage, etc. - """ - p = OptionParser(bed.__doc__) - _, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - flist = args - prefix = flist[0].split(".")[0] - - j = 0 - for f in flist: - reader = Maf(f).reader - for rec in reader: - a, b = rec.components - - for a, tag in zip((a, b), "ab"): - name = "{0}_{1:07d}{2}".format(prefix, j, tag) - print( - "\t".join( - str(x) - for x in ( - a.src, - a.forward_strand_start, - a.forward_strand_end, - name, - ) - ) - ) - - j += 1 - - -def alignment_details(a, b): - nmatch = 0 - nmismatch = 0 - ngaps = 0 - - assert len(a) == len(b) - l = len(a) - - for i in range(l): - if a[i] == b[i]: - nmatch += 1 - elif a[i] == "-" or b[i] == "-": - ngaps += 1 - else: - nmismatch += 1 - - pctid = 100.0 * nmatch / l - return pctid, nmismatch, ngaps - - -def maf_to_blast8(f): - """ - Convert a MAF file to BLAST tabular format. - """ - reader = Maf(f).reader - for rec in reader: - a, b = rec.components - query = a.src - subject = b.src - qstart = a.forward_strand_start - qstop = a.forward_strand_end - sstart = b.forward_strand_start - sstop = b.forward_strand_end - score = rec.score - - evalue = blastz_score_to_ncbi_expectation(score) - score = blastz_score_to_ncbi_bits(score) - evalue, score = "{0:.2g}".format(evalue), "{0:.1f}".format(score) - hitlen = len(a.text) - - pctid, nmismatch, ngaps = alignment_details(a.text, b.text) - print( - "\t".join( - str(x) - for x in ( - query, - subject, - pctid, - hitlen, - nmismatch, - ngaps, - qstart, - qstop, - sstart, - sstop, - evalue, - score, - ) - ) - ) - - -def blast(args): - """ - %prog blast maffiles > out.blast - - From a folder of .maf files, generate .blast file with tabular format. - """ - p = OptionParser(blast.__doc__) - _, args = p.parse_args(args) - - if len(args) == 0: - sys.exit(p.print_help()) - - flist = args - - for f in flist: - maf_to_blast8(f) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/obo.py b/jcvi/formats/obo.py deleted file mode 100755 index 001df4e6..00000000 --- a/jcvi/formats/obo.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog obo_file - -Parses obo_file and plot GO lineage -""" -import sys - -from collections import deque -from functools import partial -from typing import IO, Optional - -from goatools.obo_parser import GODag - -from ..apps.base import OptionParser, logger - -GO_URL = "http://purl.obolibrary.org/obo/go/go-basic.obo" -SO_URL = ( - "http://obo.cvs.sourceforge.net/viewvc/obo/obo/ontology/genomic-proteomic/so.obo" -) - - -def load_GODag(obo_url: str, prt: Optional[IO] = None) -> (GODag, str): - """ - Load given obo url and returns GODag object. - - Args: - obo_url (str): URL to the remote OBO file. - prt (Optional[IO]): IO stream to print verbose information. - - Returns: - (GODag, str): GODag object that contains the dict, and path to the downloaded OBO file. - """ - - from jcvi.apps.base import download - - so_file = download(obo_url, debug=False) - - return GODag(so_file, prt=prt), so_file - - -GODag_from_GO = partial(load_GODag, obo_url=GO_URL) -GODag_from_SO = partial(load_GODag, obo_url=SO_URL) - - -def validate_term(term, so=None, method="verify"): - """ - Validate an SO term against so.obo - """ - if so is None: - so, _ = GODag_from_SO() - - oterm = term - valid_names = set(x.name for x in so.values()) - if term not in valid_names: - if "resolve" in method: - if "_" in term: - tparts = deque(term.split("_")) - tparts.pop() if "prefix" in method else tparts.popleft() - nterm = "_".join(tparts).strip() - term = validate_term(nterm, so=so, method=method) - if term is None: - return None - else: - logger.error("Term `%s` does not exist", term) - sys.exit(1) - - if oterm != term: - logger.debug("Resolved term `%s` to `%s`", oterm, term) - return term - - -if __name__ == "__main__": - p = OptionParser(__doc__) - p.add_argument( - "--term", - help="Write the parents and children of this query term", - ) - - opts, args = p.parse_args() - - if len(args) != 1: - sys.exit(p.print_help()) - - (obo_file,) = args - - def description(record): - level = "level-{:>02}".format(record.level) - desc = "{} [{}]".format(record.name, record.namespace) - if record.is_obsolete: - desc += " obsolete" - alt_ids = ",".join(record.alt_ids) - return "\t".join((record.item_id, level, desc, alt_ids)) - - g = GODag(obo_file, prt=None) - header = "\t".join(("#id", "level", "name", "alt_ids")) - print(header) - for rec in sorted(set(g.values()), key=lambda x: x.item_id): - print(description(rec)) - - # run a test case - if opts.term: - rec = g.query_term(opts.term, verbose=True) - g.draw_lineage([rec]) diff --git a/jcvi/formats/paf.py b/jcvi/formats/paf.py deleted file mode 100644 index aefc93b5..00000000 --- a/jcvi/formats/paf.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# -# paf.py -# formats -# -# Created by Haibao Tang on 09/03/20 -# Copyright © 2020 Haibao Tang. All rights reserved. -# - -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger - -from .base import must_open - - -class PAFLine: - """ - PAF specification - https://github.com/lh3/miniasm/blob/master/PAF.md - """ - - __slots__ = ( - "query", - "qsize", - "qstart", - "qstop", - "orientation", - "subject", - "ssize", - "sstart", - "sstop", - "nmatch", - "hitlen", - "mapq", - ) - - def __init__(self, row): - args = row.split() - self.query = args[0] - self.qsize = int(args[1]) - self.qstart = int(args[2]) + 1 - self.qstop = int(args[3]) - self.orientation = args[4] - self.subject = args[5] - self.ssize = int(args[6]) - self.sstart = int(args[7]) + 1 - self.sstop = int(args[8]) - self.nmatch = int(args[9]) - self.hitlen = int(args[10]) - self.mapq = int(args[11]) - - @property - def sbedline(self): - return "\t".join( - str(x) - for x in ( - self.subject, - self.sstart - 1, - self.sstop, - self.query, - self.hitlen, - self.orientation, - ) - ) - - @property - def qbedline(self): - return "\t".join( - str(x) - for x in ( - self.query, - self.qstart - 1, - self.qstop, - self.subject, - self.hitlen, - self.orientation, - ) - ) - - -def bed(args): - """ - %prog bed paffile - - Print out BED file based on coordinates in BLAST PAF results. By default, - write out subject positions. Use --swap to write query positions. - """ - from jcvi.formats.bed import sort as sort_bed - - p = OptionParser(bed.__doc__) - p.add_argument( - "--swap", default=False, action="store_true", help="Write query positions" - ) - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (paffile,) = args - write_qbed = opts.swap - bedfile = "{}.{}.bed".format( - paffile.rsplit(".", 1)[0], "query" if write_qbed else "subject" - ) - with must_open(paffile) as fp, open(bedfile, "w") as fw: - for row in fp: - b = PAFLine(row) - if write_qbed: - print(b.qbedline, file=fw) - else: - print(b.sbedline, file=fw) - - logger.debug("File written to `%s`.", bedfile) - sort_bed([bedfile, "-i"]) - return bedfile - - -def main(): - actions = (("bed", "get BED file from PAF"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/pdf.py b/jcvi/formats/pdf.py deleted file mode 100644 index abe4a010..00000000 --- a/jcvi/formats/pdf.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Manipulate PDF files, using PyPDF2 library. -""" -import sys - -from natsort import natsorted - -from pypdf import PdfMerger, parse_filename_page_ranges -from pypdf.pagerange import PageRange - -from ..apps.base import ActionDispatcher, OptionParser, cleanup, logger - -from .base import must_open - -PAGE_RANGE_HELP = PageRange.__init__.__doc__ - - -def main(): - - actions = (("cat", "concatenate pages from pdf files into a single pdf file"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def cat(args): - """ - %prog cat *.pdf -o output.pdf - - Concatenate pages from pdf files into a single pdf file. - - Page ranges refer to the previously-named file. - A file not followed by a page range means all the pages of the file. - - PAGE RANGES are like Python slices. - {page_range_help} - EXAMPLES - pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1 - Concatenate all of head.pdf, all but page seven of content.pdf, - and the last page of tail.pdf, producing output.pdf. - - pdfcat chapter*.pdf >book.pdf - You can specify the output file by redirection. - - pdfcat chapter?.pdf chapter10.pdf >book.pdf - In case you don't want chapter 10 before chapter 2. - """ - p = OptionParser(cat.__doc__.format(page_range_help=PAGE_RANGE_HELP)) - p.add_argument( - "--nosort", default=False, action="store_true", help="Do not sort file names" - ) - p.add_argument( - "--cleanup", - default=False, - action="store_true", - help="Remove individual pdfs after merging", - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - outfile = opts.outfile - if outfile in args: - args.remove(outfile) - - should_sort = not opts.nosort - if not all(x.endswith(".pdf") for x in args): - should_sort = False - logger.debug("Not sorting filenames because non-pdf args") - - if should_sort: - args = natsorted(args) - - filename_page_ranges = parse_filename_page_ranges(args) - nfiles = len(filename_page_ranges) - merger = PdfMerger() - with must_open(outfile, "wb") as fw: - in_fs = {} - try: - for filename, page_range in filename_page_ranges: - logger.debug("%s: %s", filename, page_range) - if filename not in in_fs: - in_fs[filename] = open(filename, "rb") - merger.append(in_fs[filename], pages=page_range) - except Exception as e: - logger.error("Error while reading %s: %s", filename, e) - sys.exit(1) - merger.write(fw) - logger.info("Extracted %d files into `%s`", nfiles, outfile) - - if opts.cleanup: - logger.debug("Cleaning up %d files", nfiles) - cleanup(args) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/psl.py b/jcvi/formats/psl.py deleted file mode 100755 index df1774c8..00000000 --- a/jcvi/formats/psl.py +++ /dev/null @@ -1,395 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Classes to handle the .psl files -""" -import math -import re -import sys - -from ..apps.base import ActionDispatcher, OptionParser - -from .base import LineFile, must_open - - -class PslLine(object): - def __init__(self, sline): - args = sline.strip().split() - self.nargs = len(args) - self.matches = int(args[0]) - self.misMatches = int(args[1]) - self.repMatches = int(args[2]) - self.nCount = int(args[3]) - self.qNumInsert = int(args[4]) - self.qBaseInsert = int(args[5]) - self.tNumInsert = int(args[6]) - self.tBaseInsert = int(args[7]) - self.qstrand, self.strand = args[8], None - m = re.match(r"(?P[+-]?)(?P[+-])", self.qstrand) - if m: - self.qstrand, self.strand = m.group("qs"), m.group("gs") - self.qName = args[9] - self.qSize = int(args[10]) - self.qStart = int(args[11]) - self.qEnd = int(args[12]) - self.tName = args[13] - self.tSize = int(args[14]) - self.tStart = int(args[15]) - self.tEnd = int(args[16]) - self.blockCount = int(args[17]) - self.blockSizes = [int(x) for x in args[18].strip().split(",")[:-1]] - self.qStarts = [int(x) for x in args[19].strip().split(",")[:-1]] - self.tStarts = [int(x) for x in args[20].strip().split(",")[:-1]] - - def __str__(self): - args = [ - self.matches, - self.misMatches, - self.repMatches, - self.nCount, - self.qNumInsert, - self.qBaseInsert, - self.tNumInsert, - self.tBaseInsert, - self.strand, - self.qName, - self.qSize, - self.qStart, - self.qEnd, - self.tName, - self.tSize, - self.tStart, - self.tEnd, - self.blockCount, - self.blockSizes, - self.qStarts, - self.tStarts, - ] - - s = "\t".join(str(x) for x in args) - return s - - def __getitem__(self, key): - return getattr(self, key) - - @property - def qspan(self): - return self.qEnd - self.qStart - - @property - def tspan(self): - return self.tEnd - self.tStart - - @property - def score(self): - sizeMult = self._sizeMult - - return ( - sizeMult * (self.matches + (self.repMatches >> 1)) - - sizeMult * self.misMatches - - self.qNumInsert - - self.tNumInsert - ) - - @property - def coverage(self): - return ( - 100 - * (self.matches + self.misMatches + self.repMatches + self.nCount) - / self.qSize - ) - - def swap(self): - self.qName, self.qSize, self.tName, self.tSize = ( - self.tName, - self.tSize, - self.qName, - self.qSize, - ) - - self.qStart, self.qEnd, self.tStart, self.tEnd = ( - self.tStart, - self.tEnd, - self.qStart, - self.qEnd, - ) - - self.qStarts, self.tStarts = self.tStarts, self.qStarts - - @property - def _sizeMult(self): - """ - decide the size multiplier based on sequence space (protein/nucleotide) - """ - return 3 if self._isProtein else 1 - - @property - def _isProtein(self): - """ - check if blockSizes and scores are in the protein space or not - """ - last = self.blockCount - 1 - return ( - (self.tEnd == self.tStarts[last] + 3 * self.blockSizes[last]) - and self.strand == "+" - ) or ( - ( - self.tStart - == self.tSize - (self.tStarts[last] + 3 * self.blockSizes[last]) - and self.strand == "-" - ) - ) - - def _milliBad(self, ismRNA=False): - """ - calculate badness in parts per thousand - i.e. number of non-identical matches - """ - sizeMult = self._sizeMult - - qAlnSize, tAlnSize = self.qspan * sizeMult, self.tspan - alnSize = min(qAlnSize, tAlnSize) - if alnSize <= 0: - return 0 - - sizeDiff = qAlnSize - tAlnSize - if sizeDiff < 0: - sizeDiff = 0 if ismRNA else -sizeDiff - - insertFactor = self.qNumInsert - if not ismRNA: - insertFactor += self.tNumInsert - - total = (self.matches + self.repMatches + self.misMatches) * sizeMult - - return ( - ( - 1000 - * ( - self.misMatches * sizeMult - + insertFactor - + round(3 * math.log(1 + sizeDiff)) - ) - ) - / total - if total != 0 - else 0 - ) - - def pct_id(self, simple=None): - return ( - 100.00 - self._milliBad(ismRNA=True) * 0.1 - if not simple - else 100.00 * self.matches / (self.matches + self.misMatches) - ) - # else 100.00 * self.score / self.qSize - - def gffline( - self, - source="GMAP", - type="match_part", - primary_tag="Parent", - alt_score=None, - suffix=".match", - count=0, - ): - - score = "." if type == "match_part" else "{0:.2f}".format(self.score) - - target = " ".join(str(x) for x in [self.qName, self.qStart, self.qEnd]) - - attributes = [ - primary_tag + "=" + self.qName + suffix + str(count), - "Target=" + target, - ] - if primary_tag == "ID": - attributes.extend( - [ - "identity={0:.2f}".format(self.pct_id(simple=alt_score)), - "coverage={0:.2f}".format(self.coverage), - ] - ) - attrs = ";".join(str(x) for x in attributes) - - line = "\t".join( - str(x) - for x in [ - self.tName, - source, - type, - self.tStart, - self.tEnd, - score, - self.strand, - ".", - attrs, - ] - ) - return line - - @property - def bed12line(self): - color = "255,0,0" - self.blockStarts = ",".join([str(x - self.tStart) for x in self.tStarts]) - line = "\t".join( - str(x) - for x in ( - self.tName, - self.tStart, - self.tEnd, - self.qName, - "{0:.2f}".format(self.pct_id()), - self.strand, - self.tStart, - self.tEnd, - color, - self.blockCount, - ",".join(str(bs) for bs in self.blockSizes), - self.blockStarts, - ) - ) - return line - - -class Psl(LineFile): - def __init__(self, filename=None): - super().__init__(filename) - self.mCounts = {} # dict to hold match counts - if not filename: - return - - for line in must_open(filename): - if not re.match(r"\d+", line[0]): - continue - self.append(PslLine(line)) - - def trackMatches(self, id): - self.mCounts[id] = self.mCounts.get(id, 0) + 1 - - def getMatchCount(self, id): - return self.mCounts[id] - - -def main(): - - actions = ( - ("gff", "convert psl to gff3 format"), - ("bed", "convert psl to bed12 format"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def bed(args): - """ - %prog bed pslfile - - Convert to bed format. - """ - p = OptionParser(bed.__doc__) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (pslfile,) = args - fw = must_open(opts.outfile, "w") - - psl = Psl(pslfile) - for p in psl: - print(p.bed12line, file=fw) - - -def gff(args): - """ - %prog gff pslfile - - Convert to gff format. - """ - p = OptionParser(gff.__doc__) - p.add_argument("--source", default="GMAP", help="specify GFF source") - p.add_argument( - "--type", - default="EST_match", - help="specify GFF feature type", - ) - p.add_argument("--suffix", default=".match", help="match ID suffix") - p.add_argument( - "--swap", - default=False, - action="store_true", - help="swap query and target features", - ) - p.add_argument( - "--simple_score", - default=False, - action="store_true", - help="calculate a simple percent score", - ) - p.set_outfile() - - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (pslfile,) = args - fw = must_open(opts.outfile, "w") - - print("##gff-version 3", file=fw) - psl = Psl(pslfile) - for p in psl: - if opts.swap: - p.swap() - - psl.trackMatches(p.qName) - # switch from 0-origin to 1-origin - p.qStart += 1 - p.tStart += 1 - - print( - p.gffline( - source=opts.source, - type=opts.type, - suffix=opts.suffix, - primary_tag="ID", - alt_score=opts.simple_score, - count=psl.getMatchCount(p.qName), - ), - file=fw, - ) - - # create an empty PslLine() object and load only - # the targetName, queryName and strand info - part = PslLine("\t".join(str(x) for x in [0] * p.nargs)) - part.tName, part.qName, part.strand = p.tName, p.qName, p.strand - - nparts = len(p.qStarts) - for n in range(nparts): - part.qStart, part.tStart, aLen = ( - p.qStarts[n] + 1, - p.tStarts[n] + 1, - p.blockSizes[n], - ) - part.qEnd = part.qStart + aLen - 1 - part.tEnd = part.tStart + aLen - 1 - - if part.strand == "-": - part.qStart = p.qSize - (p.qStarts[n] + p.blockSizes[n]) + 1 - part.qEnd = p.qSize - p.qStarts[n] - - print( - part.gffline( - source=opts.source, - suffix=opts.suffix, - count=psl.getMatchCount(part.qName), - ), - file=fw, - ) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/pyblast.py b/jcvi/formats/pyblast.py deleted file mode 100644 index 23926f52..00000000 --- a/jcvi/formats/pyblast.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Python implementation of BlastLine, an alternative Cython implementation is -available in .cblast.BlastLine, which may be up to 2x faster -""" - - -class BlastLine(object): - __slots__ = ( - "query", - "subject", - "pctid", - "hitlen", - "nmismatch", - "ngaps", - "qstart", - "qstop", - "sstart", - "sstop", - "evalue", - "score", - "qseqid", - "sseqid", - "qi", - "si", - "orientation", - ) - - def __init__(self, sline): - args = sline.split("\t") - self.query = args[0] - self.subject = args[1] - self.pctid = float(args[2]) - self.hitlen = int(args[3]) - self.nmismatch = int(args[4]) - self.ngaps = int(args[5]) - self.qstart = int(args[6]) - self.qstop = int(args[7]) - self.sstart = int(args[8]) - self.sstop = int(args[9]) - if len(args) > 10: - self.evalue = float(args[10]) - self.score = float(args[11]) - - self.orientation = "+" - if self.qstart > self.qstop: - self.qstart, self.qstop = self.qstop, self.qstart - self.orientation = "-" - if self.sstart > self.sstop: - self.sstart, self.sstop = self.sstop, self.sstart - self.orientation = "-" - - @property - def has_score(self): - return hasattr(self, "score") - - def __repr__(self): - return "BlastLine('%s' to '%s', eval=%.3f, score=%.1f)" % ( - self.query, - self.subject, - self.evalue, - self.score, - ) - - def __str__(self): - if self.has_score: - args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - else: - args = [getattr(self, attr) for attr in BlastLine.__slots__[:10]] - if self.orientation == "-": - args[8], args[9] = args[9], args[8] - return "\t".join(str(x) for x in args) - - @property - def swapped(self): - """ - Swap query and subject. - """ - args = [getattr(self, attr) for attr in BlastLine.__slots__[:12]] - args[0:2] = [self.subject, self.query] - args[6:10] = [self.sstart, self.sstop, self.qstart, self.qstop] - if self.orientation == "-": - args[8], args[9] = args[9], args[8] - b = "\t".join(str(x) for x in args) - return BlastLine(b) - - @property - def bedline(self): - return "\t".join( - str(x) - for x in ( - self.subject, - self.sstart - 1, - self.sstop, - self.query, - self.score, - self.orientation, - ) - ) diff --git a/jcvi/formats/sam.py b/jcvi/formats/sam.py deleted file mode 100644 index 01b4d904..00000000 --- a/jcvi/formats/sam.py +++ /dev/null @@ -1,1025 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -SAM alignment format. There are other tools that handles better SAM and BAM. -This script simply parses the lines in SAM into human readable fields. - -http://samtools.sourceforge.net/SAM1.pdf -""" -import os -import os.path as op -import sys - -from collections import defaultdict -from itertools import groupby - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - PIPE, - Popen, - cleanup, - get_abs_path, - glob, - logger, - mkdir, - need_update, - popen, - sh, -) -from ..utils.cbook import fill -from ..assembly.base import Astat - -from .base import LineFile, must_open -from .fasta import Fasta -from .sizes import Sizes - - -class SamLine(object): - def __init__(self, row): - - args = row.strip().split("\t") - self.qname = args[0] - self.flag = int(args[1]) - self.rname = args[2] - self.pos = args[3] - self.mapq = args[4] - self.cigar = args[5] - self.mrnm = args[6] - self.mpos = args[7] - self.isize = args[8] - self.seq = args[9] - self.qual = args[10] - self.extra = args[11:] - - def __str__(self): - return "\t".join( - str(x) - for x in ( - self.qname, - self.flag, - self.rname, - self.pos, - self.mapq, - self.cigar, - self.mrnm, - self.mpos, - self.isize, - self.seq, - self.qual, - "\t".join(self.extra), - ) - ) - - @property - def orientation(self): - return "-" if self.flag & 0x10 == 0 else "+" - - def update_readname(self): - if self.flag & 0x40 == 0: - tag = "/1" - elif self.flag & 0x80 == 0: - tag = "/2" - else: - tag = "" - self.qname += tag - - @property - def pairline(self): - qpos = self.cigar.split("H", 1)[0] - return "%s:%s\t%s:%s" % (self.qname, qpos, self.rname, self.pos) - - -class Sam(LineFile): - def __init__(self, filename, callback=None): - super().__init__(filename) - fp = open(filename) - for row in fp: - if row[0] == "@": - continue - s = SamLine(row) - if callback: - callback(s) - - -def output_bam(cmd, outfile, cpus=8): - bam = outfile.endswith(".bam") - if not bam: - return cmd + " > {0}".format(outfile) - - outcmd, mflag = ("samtools view -bS", "-@ {0}".format(cpus)) - cmd += " | {0} {1} - > {2}".format(outcmd, mflag, outfile) - - return cmd - - -class GenomeCoverageLine(object): - def __init__(self, row): - args = row.split() - self.seqid = args[0] - self.depth = int(args[1]) - self.positions = int(args[2]) - self.length = int(args[3]) - self.freq = float(args[4]) - - -class GenomeCoverageFile(LineFile): - def __init__(self, filename): - super().__init__(filename) - fp = open(filename) - for row in fp: - self.append(GenomeCoverageLine(row)) - - def iter_coverage_seqid(self): - for seqid, lines in groupby(self, key=lambda x: x.seqid): - lines = list(lines) - length = lines[0].length - counts = 0 - for r in lines: - counts += r.depth * r.positions - yield seqid, counts * 1.0 / length - - -def get_prefix(readfile, dbfile): - rdpf = op.basename(readfile).replace(".gz", "").rsplit(".", 1)[0] - dbpf = op.basename(dbfile).split(".")[0] - return ".".join((rdpf, dbpf)) - - -def get_samfile( - readfile, dbfile, bam=False, mapped=False, unmapped=False, bowtie=False -): - prefix = get_prefix(readfile, dbfile) - ext = ".bam" if bam else ".sam" - samfile = prefix + ext - ext = ".fastq" if bowtie else ext - mapped = (prefix + ".mapped" + ext) if mapped else None - unmapped = (prefix + ".unmapped" + ext) if unmapped else None - return samfile, mapped, unmapped - - -def get_minibam(bamfile, region, overwrite=True): - xregion = region.replace(":", "_").replace("-", "_").replace(",", "") - minibamfile = op.basename(bamfile).replace(".bam", ".{}.bam".format(xregion)) - baifile = minibamfile + ".bai" - if op.exists(baifile): - sh("rm {}".format(baifile)) - - if not overwrite and op.exists(minibamfile): - logger.error("Output name exists: `{}`".format(minibamfile)) - return - - cmd = "samtools view {} {} -b".format(bamfile, region) - cmd += " -o {0}".format(minibamfile) - - sh(cmd) - sh("samtools index {0}".format(minibamfile)) - - return minibamfile - - -def get_minibam_bed(bamfile, bedfile, minibam=None): - """samtools view -L could do the work, but it is NOT random access. Here we - are processing multiple regions sequentially. See also: - - https://www.biostars.org/p/49306/ - """ - pf = op.basename(bedfile).split(".")[0] - minibamfile = minibam or op.basename(bamfile).replace(".bam", ".{}.bam".format(pf)) - minisamfile = minibam.replace(".bam", ".sam") - baifile = minibamfile + ".bai" - if op.exists(baifile): - sh("rm {}".format(baifile)) - - cmd = "samtools view -H {} > {}".format(bamfile, minisamfile) - sh(cmd) - - cmd = "cat {}".format(bedfile) - cmd += " | perl -lane 'print \"$F[0]:$F[1]-$F[2]\"'" - cmd += " | xargs -n1 -t -I \{\}" - cmd += " samtools view {}".format(bamfile) - cmd += " \{\} >> " + minisamfile - sh(cmd) - - cmd = "samtools view {} -b".format(minisamfile) - cmd += " | samtools sort -" - cmd += " -o {0}".format(minibamfile) - - sh(cmd) - sh("samtools index {0}".format(minibamfile)) - return minibamfile - - -def main(): - - actions = ( - # Alter read names - ("append", "append or prepend string to read names"), - # Extract info - ("bed", "convert bam files to bed"), - ("fastq", "convert bam files to paired fastq"), - ("pair", "parse sam file and get pairs"), - ("pairs", "print paired-end reads from BAM file"), - ("chimera", "parse sam file from `bwasw` and list multi-hit reads"), - ("noclip", "remove clipped reads from bam"), - ("ace", "convert sam file to ace"), - ("consensus", "convert bam alignments to consensus FASTA"), - ("fpkm", "calculate FPKM values from BAM file"), - ("coverage", "calculate depth for BAM file"), - ("vcf", "call SNPs on a set of bam files"), - ("mapped", "extract mapped/unmapped reads from samfile"), - ("count", "count the number of reads mapped using htseq"), - ("merge", "merge bam files"), - # Convenience function - ("index", "convert to bam, sort and then index"), - ("mini", "extract mini-bam for a single region"), - ) - - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def fastq(args): - """ - %prog fastq bamfile prefix - - Convert BAM files to paired FASTQ files. - """ - p = OptionParser(fastq.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bamfile, pf = args - singletons = pf + ".se.fastq" - a = pf + ".read1.fastq" - b = pf + ".read2.fastq" - - cmd = "samtools collate -uOn 128 {} tmp-prefix".format(bamfile) - cmd += " | samtools fastq -s {} -1 {} -2 {} -".format(singletons, a, b) - sh(cmd) - - if os.stat(singletons).st_size == 0: # singleton file is empty - cleanup(singletons) - return a, b - - -def mini(args): - """ - %prog mini bamfile region - - Extract mini-bam for a single region. - """ - p = OptionParser(mini.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bamfile, region = args - get_minibam(bamfile, region) - - -def noclip(args): - """ - %prog noclip bamfile - - Remove clipped reads from BAM. - """ - p = OptionParser(noclip.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bamfile,) = args - noclipbam = bamfile.replace(".bam", ".noclip.bam") - cmd = "samtools view -h {} | awk -F '\t' '($6 !~ /H|S/)'".format(bamfile) - cmd += " | samtools view -@ 4 -b -o {}".format(noclipbam) - sh(cmd) - - sh("samtools index {}".format(noclipbam)) - - -def append(args): - """ - %prog append bamfile - - Append /1 or /2 to read names. Useful for using the Tophat2 bam file for - training AUGUSTUS gene models. - """ - p = OptionParser(append.__doc__) - p.add_argument("--prepend", help="Prepend string to read names") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bamfile,) = args - prepend = opts.prepend - - icmd = "samtools view -h {0}".format(bamfile) - bamfile = bamfile.rsplit(".", 1)[0] + ".append.bam" - ocmd = "samtools view -b -@ 64 - -o {0}".format(bamfile) - p = Popen(ocmd, stdin=PIPE) - for row in popen(icmd): - if row[0] == "@": - print(row.strip(), file=p.stdin) - else: - s = SamLine(row) - if prepend: - s.qname = prepend + "_" + s.qname - else: - s.update_readname() - print(s, file=p.stdin) - - -def bed(args): - """ - %prog bed bedfile bamfiles - - Convert bam files to bed. - """ - p = OptionParser(bed.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - bedfile = args[0] - bamfiles = args[1:] - for bamfile in bamfiles: - cmd = "bamToBed -i {0}".format(bamfile) - sh(cmd, outfile=bedfile, append=True) - - -def merge(args): - """ - %prog merge merged_bams bams1_dir bams2_dir ... - - Merge BAM files. Treat the bams with the same prefix as a set. - Output the commands first. - """ - from jcvi.apps.grid import MakeManager - - p = OptionParser(merge.__doc__) - p.set_sep(sep="_", help="Separator to group per prefix") - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - merged_bams = args[0] - bamdirs = args[1:] - - mkdir(merged_bams) - bams = [] - for x in bamdirs: - bams += glob(op.join(x, "*.bam")) - bams = [x for x in bams if "nsorted" not in x] - - logger.debug("Found a total of {0} BAM files.".format(len(bams))) - - sep = opts.sep - key = lambda x: op.basename(x).split(sep)[0] - bams.sort(key=key) - mm = MakeManager() - for prefix, files in groupby(bams, key=key): - files = sorted(list(files)) - nfiles = len(files) - source = " ".join(files) - target = op.join(merged_bams, op.basename(files[0])) - if nfiles == 1: - source = get_abs_path(source) - cmd = "ln -s {0} {1}".format(source, target) - mm.add("", target, cmd) - else: - cmd = "samtools merge -@ 8 {0} {1}".format(target, source) - mm.add(files, target, cmd, remove=True) - mm.write() - - -def count(args): - """ - %prog count bamfile gtf - - Count the number of reads mapped using `htseq-count`. - """ - p = OptionParser(count.__doc__) - p.add_argument("--type", default="exon", help="Only count feature type") - p.set_cpus(cpus=8) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bamfile, gtf = args - cpus = opts.cpus - pf = bamfile.split(".")[0] - countfile = pf + ".count" - if not need_update(bamfile, countfile): - return - - nsorted = pf + "_nsorted" - nsortedbam, nsortedsam = nsorted + ".bam", nsorted + ".sam" - if need_update(bamfile, nsortedsam): - cmd = "samtools sort -@ {0} -n {1} {2}".format(cpus, bamfile, nsorted) - sh(cmd) - cmd = "samtools view -@ {0} -h {1}".format(cpus, nsortedbam) - sh(cmd, outfile=nsortedsam) - - if need_update(nsortedsam, countfile): - cmd = "htseq-count --stranded=no --minaqual=10" - cmd += " -t {0}".format(opts.type) - cmd += " {0} {1}".format(nsortedsam, gtf) - sh(cmd, outfile=countfile) - - -def coverage(args): - """ - %prog coverage fastafile bamfile - - Calculate coverage for BAM file. BAM file will be sorted unless with - --nosort. - """ - p = OptionParser(coverage.__doc__) - p.add_argument( - "--format", - default="bigwig", - choices=("bedgraph", "bigwig", "coverage"), - help="Output format", - ) - p.add_argument( - "--nosort", default=False, action="store_true", help="Do not sort BAM" - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, bamfile = args - format = opts.format - if opts.nosort: - logger.debug("BAM sorting skipped") - else: - bamfile = index([bamfile, "--fasta={0}".format(fastafile)]) - - pf = bamfile.rsplit(".", 2)[0] - sizesfile = Sizes(fastafile).filename - cmd = "genomeCoverageBed -ibam {0} -g {1}".format(bamfile, sizesfile) - if format in ("bedgraph", "bigwig"): - cmd += " -bg" - bedgraphfile = pf + ".bedgraph" - sh(cmd, outfile=bedgraphfile) - - if format == "bedgraph": - return bedgraphfile - - bigwigfile = pf + ".bigwig" - cmd = "bedGraphToBigWig {0} {1} {2}".format(bedgraphfile, sizesfile, bigwigfile) - sh(cmd) - return bigwigfile - - coveragefile = pf + ".coverage" - if need_update(fastafile, coveragefile): - sh(cmd, outfile=coveragefile) - - gcf = GenomeCoverageFile(coveragefile) - fw = must_open(opts.outfile, "w") - for seqid, cov in gcf.iter_coverage_seqid(): - print("\t".join((seqid, "{0:.1f}".format(cov))), file=fw) - fw.close() - - -def fpkm(args): - """ - %prog fpkm fastafile *.bam - - Calculate FPKM values from BAM file. - """ - p = OptionParser(fpkm.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - fastafile = args[0] - bamfiles = args[1:] - # Create a DUMMY gff file for cuffdiff - gffile = fastafile.rsplit(".", 1)[0] + ".gff" - if need_update(fastafile, gffile): - fw = open(gffile, "w") - f = Fasta(fastafile, lazy=True) - for key, size in f.itersizes_ordered(): - print( - "\t".join( - str(x) - for x in ( - key, - "dummy", - "transcript", - 1, - size, - ".", - ".", - ".", - "ID=" + key, - ) - ), - file=fw, - ) - fw.close() - logger.debug("Dummy GFF created: {0}".format(gffile)) - - cmd = "cuffdiff {0} {1}".format(gffile, " ".join(bamfiles)) - sh(cmd) - - -def pairs(args): - """ - See __doc__ for OptionParser.set_pairs(). - """ - import jcvi.formats.bed - - p = OptionParser(pairs.__doc__) - p.set_pairs() - opts, targs = p.parse_args(args) - - if len(targs) != 1: - sys.exit(not p.print_help()) - - (samfile,) = targs - bedfile = samfile.rsplit(".", 1)[0] + ".bed" - if need_update(samfile, bedfile): - cmd = "bamToBed -i {0}".format(samfile) - sh(cmd, outfile=bedfile) - - args[args.index(samfile)] = bedfile - - return jcvi.formats.bed.pairs(args) - - -def consensus(args): - """ - %prog consensus fastafile bamfile - - Convert bam alignments to consensus FASTQ/FASTA. See also: - https://cbc.brown.edu/blog/consensus-vcf/ - """ - valid_callers = ("bcftools", "gatk4") - p = OptionParser(consensus.__doc__) - p.add_argument( - "--nosort", default=False, action="store_true", help="Do not sort the BAM files" - ) - p.add_argument( - "--caller", - default="bcftools", - choices=valid_callers, - help="Use consensus caller", - ) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - fastafile, bamfile = args - pf = bamfile.rsplit(".", 1)[0] - cnsfile = pf + ".cns.fasta" - vcfgzfile = pf + ".vcf.gz" - vcf_args = [fastafile, bamfile, "-o", vcfgzfile] - if opts.nosort: - vcf_args += ["--nosort"] - vcf(vcf_args) - if opts.caller == "bcftools": - cmd = "bcftools consensus -f {} -o {} {}".format(fastafile, cnsfile, vcfgzfile) - else: - cmd = "gatk4 FastaAlternateReferenceMaker -R {} -O {} -V {}".format( - fastafile, cnsfile, vcfgzfile - ) - sh(cmd) - - -def vcf(args): - """ - %prog vcf fastafile bamfiles > out.vcf.gz - - Call SNPs on bam files. - """ - from jcvi.apps.grid import Jobs - - valid_callers = ("mpileup", "freebayes") - p = OptionParser(vcf.__doc__) - p.set_outfile(outfile="out.vcf.gz") - p.add_argument( - "--nosort", default=False, action="store_true", help="Do not sort the BAM files" - ) - p.add_argument( - "--caller", default="mpileup", choices=valid_callers, help="Use variant caller" - ) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - fastafile = args[0] - bamfiles = args[1:] - caller = opts.caller - - unsorted = [x for x in bamfiles if ".sorted." not in x] - if opts.nosort: - bamfiles = unsorted - else: - jargs = [[[x, "--unique"]] for x in unsorted] - jobs = Jobs(index, args=jargs) - jobs.run() - bamfiles = [x.replace(".sorted.bam", ".bam") for x in bamfiles] - bamfiles = [x.replace(".bam", ".sorted.bam") for x in bamfiles] - - if caller == "mpileup": - cmd = "bcftools mpileup -Ou -f" - cmd += " {} {}".format(fastafile, " ".join(bamfiles)) - cmd += " | bcftools call -mv -Oz -o {}".format(opts.outfile) - elif caller == "freebayes": - cmd = "freebayes -f" - cmd += " {} {} > {}".format(fastafile, " ".join(bamfiles), opts.outfile) - sh(cmd) - - cmd = "bcftools index {}".format(opts.outfile) - sh(cmd) - - -def breakpoint(r): - op_prev = None - cum_length = 0 - is_clip = lambda x: x in (4, 5) - rl = sum(l for o, l in r.cigartuples) - for op, length in r.cigartuples: - if is_clip(op) != is_clip(op_prev) and op_prev is not None: - yield rl - cum_length if r.is_reverse else cum_length - op_prev = op - cum_length += length - - -def chimera(args): - """ - %prog chimera bamfile - - Parse BAM file from `bwasw` and list multi-hit reads and breakpoints. - """ - import pysam - from natsort import natsorted - - p = OptionParser(chimera.__doc__) - p.set_verbose() - opts, args = p.parse_args(args) - if len(args) != 1: - sys.exit(not p.print_help()) - - (samfile,) = args - samfile = pysam.AlignmentFile(samfile) - rstore = defaultdict(list) - hstore = defaultdict(int) - for r in samfile.fetch(): - rstore[r.query_name] += list(breakpoint(r)) - hstore[r.query_name] += 1 - if opts.verbose: - print( - r.query_name, - "+-"[r.is_reverse], - sum(l for o, l in r.cigartuples), - r.cigarstring, - list(breakpoint(r)), - file=sys.stderr, - ) - - for rn, bps in natsorted(rstore.items()): - bps = "|".join(str(x) for x in sorted(bps)) if bps else "na" - print("\t".join((rn, str(hstore[rn]), bps))) - - -def index(args): - """ - %prog index samfile/bamfile - - If SAM file, convert to BAM, sort and then index, using SAMTOOLS - """ - p = OptionParser(index.__doc__) - p.add_argument( - "--fasta", dest="fasta", default=None, help="add @SQ header to the BAM file" - ) - p.add_argument( - "--unique", - default=False, - action="store_true", - help="only retain uniquely mapped reads", - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (samfile,) = args - cpus = opts.cpus - fastafile = opts.fasta - if fastafile: - assert op.exists(fastafile) - - bamfile = samfile.replace(".sam", ".bam") - if fastafile: - faifile = fastafile + ".fai" - if need_update(fastafile, faifile): - sh("samtools faidx {0}".format(fastafile)) - cmd = "samtools view -bt {0} {1} -o {2}".format(faifile, samfile, bamfile) - else: - cmd = "samtools view -bS {0} -o {1}".format(samfile, bamfile) - - cmd += " -@ {0}".format(cpus) - if opts.unique: - cmd += " -q 1" - - if samfile.endswith(".sam") and need_update(samfile, bamfile): - sh(cmd) - - # Already sorted? - if bamfile.endswith(".sorted.bam"): - sortedbamfile = bamfile - else: - prefix = bamfile.replace(".bam", "") - sortedbamfile = prefix + ".sorted.bam" - - if need_update(bamfile, sortedbamfile): - cmd = "samtools sort {0} -o {1}".format(bamfile, sortedbamfile) - cmd += " -@ {0}".format(cpus) - sh(cmd) - - baifile = sortedbamfile + ".bai" - if need_update(sortedbamfile, baifile): - sh("samtools index {0}".format(sortedbamfile)) - - return sortedbamfile - - -def mapped(args): - """ - %prog mapped sam/bamfile - - Given an input sam/bam file, output a sam/bam file containing only the mapped reads. - Optionally, extract the unmapped reads into a separate file - """ - import pysam - from jcvi.apps.grid import Jobs - - p = OptionParser(mapped.__doc__) - p.set_sam_options(extra=False) - - opts, args = p.parse_args(args) - if len(args) != 1: - sys.exit(p.print_help()) - - (samfile,) = args - - view_opts = [] - oext, mopts = (".sam", ["-S"]) if samfile.endswith(".sam") else (".bam", []) - - flag, ext = ("-b", ".bam") if opts.bam else ("-h", ".sam") - mopts.append(flag) - - if opts.uniq: - mopts.append("-q1") - ext = ".uniq{0}".format(ext) - - if opts.unmapped: - uopts = [x for x in mopts] - uoutfile = samfile.replace(oext, ".unmapped{0}".format(ext)) - uopts.extend(["-f4", samfile, "-o{0}".format(uoutfile)]) - view_opts.append(uopts) - - outfile = samfile.replace(oext, ".mapped{0}".format(ext)) - mopts.extend(["-F4", samfile, "-o{0}".format(outfile)]) - view_opts.append(mopts) - - for vo in view_opts: - logger.debug("samtools view {0}".format(" ".join(vo))) - - jobs = Jobs(pysam.view, [(z for z in x) for x in view_opts]) - jobs.run() - - -def pair(args): - """ - %prog pair samfile - - Parses the sam file and retrieve in pairs format, - query:pos ref:pos - """ - p = OptionParser(pair.__doc__) - - opts, args = p.parse_args(args) - if len(args) != 1: - sys.exit(p.print_help()) - - def callback(s): - print(s.pairline) - - Sam(args[0], callback=callback) - - -def cigar_to_seq(a, gap="*"): - """ - Accepts a pysam row. - - cigar alignment is presented as a list of tuples (operation,length). For - example, the tuple [ (0,3), (1,5), (0,2) ] refers to an alignment with 3 - matches, 5 insertions and another 2 matches. - - Op BAM Description - M 0 alignment match (can be a sequence match or mismatch) - I 1 insertion to the reference - D 2 deletion from the reference - N 3 skipped region from the reference - S 4 soft clipping (clipped sequences present in SEQ) - H 5 hard clipping (clipped sequences NOT present in SEQ) - P 6 padding (silent deletion from padded reference) - = 7 sequence match - X 8 sequence mismatch - - convert the sequence based on the cigar string. For example: - """ - seq, cigar = a.seq, a.cigar - start = 0 - subseqs = [] - npadded = 0 - if cigar is None: - return None, npadded - - for operation, length in cigar: - end = start if operation == 2 else start + length - - if operation == 0: # match - subseq = seq[start:end] - elif operation == 1: # insertion - subseq = "" - elif operation == 2: # deletion - subseq = gap * length - npadded += length - elif operation == 3: # skipped - subseq = "N" * length - elif operation in (4, 5): # clip - subseq = "" - else: - raise NotImplementedError - - subseqs.append(subseq) - start = end - - return "".join(subseqs), npadded - - -def ace(args): - """ - %prog ace bamfile fastafile - - convert bam format to ace format. This often allows the remapping to be - assessed as a denovo assembly format. bam file needs to be indexed. also - creates a .mates file to be used in amos/bambus, and .astat file to mark - whether the contig is unique or repetitive based on A-statistics in Celera - assembler. - """ - p = OptionParser(ace.__doc__) - p.add_argument( - "--splitdir", - dest="splitdir", - default="outRoot", - help="split the ace per contig to dir", - ) - p.add_argument( - "--unpaired", - dest="unpaired", - default=False, - help="remove read pairs on the same contig", - ) - p.add_argument( - "--minreadno", - dest="minreadno", - default=3, - type=int, - help="minimum read numbers per contig", - ) - p.add_argument( - "--minctgsize", - dest="minctgsize", - default=100, - type=int, - help="minimum contig size per contig", - ) - p.add_argument( - "--astat", - default=False, - action="store_true", - help="create .astat to list repetitiveness", - ) - p.add_argument( - "--readids", - default=False, - action="store_true", - help="create file of mapped and unmapped ids", - ) - - from pysam import Samfile - - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bamfile, fastafile = args - astat = opts.astat - readids = opts.readids - - f = Fasta(fastafile) - prefix = bamfile.split(".")[0] - acefile = prefix + ".ace" - readsfile = prefix + ".reads" - astatfile = prefix + ".astat" - - logger.debug("Load {0}".format(bamfile)) - s = Samfile(bamfile, "rb") - - ncontigs = s.nreferences - genomesize = sum(x for a, x in f.itersizes()) - logger.debug("Total {0} contigs with size {1} base".format(ncontigs, genomesize)) - qual = "20" # default qual - - totalreads = sum(s.count(x) for x in s.references) - logger.debug("Total {0} reads mapped".format(totalreads)) - - fw = open(acefile, "w") - if astat: - astatfw = open(astatfile, "w") - if readids: - readsfw = open(readsfile, "w") - - print("AS {0} {1}".format(ncontigs, totalreads), file=fw) - print(file=fw) - - for i, contig in enumerate(s.references): - cseq = f[contig] - nbases = len(cseq) - - mapped_reads = [x for x in s.fetch(contig) if not x.is_unmapped] - nreads = len(mapped_reads) - - nsegments = 0 - print("CO {0} {1} {2} {3} U".format(contig, nbases, nreads, nsegments), file=fw) - print(fill(str(cseq.seq)), file=fw) - print(file=fw) - - if astat: - astat = Astat(nbases, nreads, genomesize, totalreads) - print("{0}\t{1:.1f}".format(contig, astat), file=astatfw) - - text = fill([qual] * nbases, delimiter=" ", width=30) - print("BQ\n{0}".format(text), file=fw) - print(file=fw) - - rnames = [] - for a in mapped_reads: - readname = a.qname - rname = readname - - if readids: - print(readname, file=readsfw) - rnames.append(rname) - - strand = "C" if a.is_reverse else "U" - paddedstart = a.pos + 1 # 0-based to 1-based - af = "AF {0} {1} {2}".format(rname, strand, paddedstart) - print(af, file=fw) - - print(file=fw) - - for a, rname in zip(mapped_reads, rnames): - aseq, npadded = cigar_to_seq(a) - if aseq is None: - continue - - ninfos = 0 - ntags = 0 - alen = len(aseq) - rd = "RD {0} {1} {2} {3}\n{4}".format( - rname, alen, ninfos, ntags, fill(aseq) - ) - qs = "QA 1 {0} 1 {0}".format(alen) - - print(rd, file=fw) - print(file=fw) - print(qs, file=fw) - print(file=fw) - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/sizes.py b/jcvi/formats/sizes.py deleted file mode 100644 index c2817eaf..00000000 --- a/jcvi/formats/sizes.py +++ /dev/null @@ -1,289 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -import os.path as op -import sys - -import numpy as np - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - cleanup, - get_abs_path, - logger, - need_update, -) -from .base import LineFile - - -class Sizes(LineFile): - """ - Two-column .sizes file, often generated by `faSize -detailed` - contigID size - """ - - def __init__(self, filename, select=None): - assert op.exists(filename), "File `{0}` not found".format(filename) - - # filename can be both .sizes file or FASTA formatted file - sizesname = filename - - if not filename.endswith(".sizes"): - sizesname = filename + ".sizes" - filename = get_abs_path(filename) - if need_update(filename, sizesname): - from jcvi.formats.fasta import Fasta - - f = Fasta(filename) - with open(sizesname, "w") as fw: - for k, size in f.itersizes_ordered(): - print("\t".join((k, str(size))), file=fw) - - filename = sizesname - - assert filename.endswith(".sizes") - - super().__init__(filename) - self.fp = open(filename) - self.filename = filename - - # get sizes for individual contigs, both in list and dict - # this is to preserve the input order in the sizes file - sizes = list(self.iter_sizes()) - if select: - assert select > 0 - sizes = [x for x in sizes if x[1] >= select] - self.sizes_mapping = dict(sizes) - - # get cumulative sizes, both in list and dict - ctgs, sizes = zip(*sizes) - self.sizes = sizes - cumsizes = np.cumsum([0] + list(sizes)) - self.ctgs = ctgs - self.cumsizes = cumsizes - self.cumsizes_mapping = dict(zip(ctgs, cumsizes)) - - def __len__(self): - return len(self.sizes) - - def get_size(self, ctg): - return self.sizes_mapping[ctg] - - def get_cumsize(self, ctg): - return self.cumsizes_mapping[ctg] - - def close(self, clean=False): - self.fp.close() - if clean: - cleanup(self.filename) - - @property - def mapping(self): - return self.sizes_mapping - - @property - def totalsize(self): - return sum(self.sizes) - - def iter_sizes(self): - self.fp.seek(0) - for row in self.fp: - ctg, size = row.split()[:2] - yield ctg, int(size) - - def iter_names(self): - self.fp.seek(0) - for row in self.fp: - ctg, size = row.split()[:2] - yield ctg - - def get_position(self, ctg, pos): - if ctg not in self.cumsizes_mapping: - return None - return self.cumsizes_mapping[ctg] + pos - - def get_breaks(self): - for i in range(len(self)): - yield self.ctgs[i], self.cumsizes[i], self.cumsizes[i + 1] - - @property - def summary(self): - from jcvi.assembly.base import calculate_A50 - - ctgsizes = self.sizes - a50, l50, n50 = calculate_A50(ctgsizes) - return sum(ctgsizes), l50, n50 - - -def main(): - - actions = ( - ("agp", "write to AGP format from sizes file"), - ("extract", "extract the lines containing only the given IDs"), - ("histogram", "plot read/contig length distribution"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def histogram(args): - """ - %prog histogram [reads.fasta|reads.fastq] - - Plot read length distribution for reads. The plot would be similar to the - one generated by SMRT-portal, for example: - - http://blog.pacificbiosciences.com/2013/10/data-release-long-read-shotgun.html - - Plot has two axes - corresponding to pdf and cdf, respectively. Also adding - number of reads, average/median, N50, and total length. - """ - from jcvi.utils.cbook import human_size, thousands, SUFFIXES - from jcvi.formats.fastq import fasta - from jcvi.graphics.histogram import stem_leaf_plot - from jcvi.graphics.base import ( - plt, - markup, - human_formatter, - human_base_formatter, - savefig, - set2, - set_ticklabels_helvetica, - ) - - p = OptionParser(histogram.__doc__) - p.set_histogram( - vmax=50000, bins=100, xlabel="Read length", title="Read length distribution" - ) - p.add_argument("--ylabel1", default="Counts", help="Label of y-axis on the left") - p.add_argument( - "--color", - default="0", - choices=[str(x) for x in range(8)], - help="Color of bars, which is an index 0-7 in brewer set2", - ) - opts, args, iopts = p.set_image_options(args, figsize="6x6", style="dark") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - fastafile, qualfile = fasta([fastafile, "--seqtk"]) - sizes = Sizes(fastafile) - all_sizes = sorted(sizes.sizes) - xmin, xmax, bins = opts.vmin, opts.vmax, opts.bins - left, height = stem_leaf_plot(all_sizes, xmin, xmax, bins) - - plt.figure(1, (iopts.w, iopts.h)) - ax1 = plt.gca() - - width = (xmax - xmin) * 0.5 / bins - color = set2[int(opts.color)] - ax1.bar(left, height, width=width, linewidth=0, fc=color, align="center") - ax1.set_xlabel(markup(opts.xlabel)) - ax1.set_ylabel(opts.ylabel1) - - ax2 = ax1.twinx() - cur_size = 0 - total_size, l50, n50 = sizes.summary - cdf = {} - hsize = human_size(total_size) - tag = hsize[-2:] - unit = 1000 ** SUFFIXES[1000].index(tag) - - for x in all_sizes: - if x not in cdf: - cdf[x] = (total_size - cur_size) * 1.0 / unit - cur_size += x - x, y = zip(*sorted(cdf.items())) - ax2.plot(x, y, "-", color="darkslategray") - ylabel2 = "{0} above read length".format(tag) - ax2.set_ylabel(ylabel2) - - for ax in (ax1, ax2): - set_ticklabels_helvetica(ax) - ax.set_xlim((xmin - width / 2, xmax + width / 2)) - - tc = "gray" - axt = ax1.transAxes - xx, yy = 0.95, 0.95 - ma = "Total bases: {0}".format(hsize) - mb = "Total reads: {0}".format(thousands(len(sizes))) - mc = "Average read length: {0}bp".format(thousands(np.mean(all_sizes))) - md = "Median read length: {0}bp".format(thousands(np.median(all_sizes))) - me = "N50 read length: {0}bp".format(thousands(l50)) - for t in (ma, mb, mc, md, me): - print(t, file=sys.stderr) - ax1.text(xx, yy, t, color=tc, transform=axt, ha="right") - yy -= 0.05 - - ax1.set_title(markup(opts.title)) - # Seaborn removes ticks for all styles except 'ticks'. Now add them back: - ax1.tick_params( - axis="x", - direction="out", - length=3, - left=False, - right=False, - top=False, - bottom=True, - ) - ax1.xaxis.set_major_formatter(human_base_formatter) - ax1.yaxis.set_major_formatter(human_formatter) - figname = sizes.filename + ".pdf" - savefig(figname) - - -def extract(args): - """ - %prog extract idsfile sizesfile - - Extract the lines containing only the given IDs. - """ - p = OptionParser(extract.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - idsfile, sizesfile = args - sizes = Sizes(sizesfile).mapping - fp = open(idsfile) - for row in fp: - name = row.strip() - size = sizes[name] - print("\t".join(str(x) for x in (name, size))) - - -def agp(args): - """ - %prog agp - - Convert the sizes file to a trivial AGP file. - """ - from jcvi.formats.agp import OO - - p = OptionParser(agp.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (sizesfile,) = args - sizes = Sizes(sizesfile) - agpfile = sizes.filename.rsplit(".", 1)[0] + ".agp" - fw = open(agpfile, "w") - o = OO() # Without a filename - for ctg, size in sizes.iter_sizes(): - o.add(ctg, ctg, size) - - o.write_AGP(fw) - fw.close() - logger.debug("AGP file written to `%s`.", agpfile) - - return agpfile - - -if __name__ == "__main__": - main() diff --git a/jcvi/formats/vcf.py b/jcvi/formats/vcf.py deleted file mode 100644 index 7b675713..00000000 --- a/jcvi/formats/vcf.py +++ /dev/null @@ -1,849 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Variant call format. -""" -import os.path as op -import sys - -from collections import defaultdict -from itertools import groupby -from pyfaidx import Fasta -from pyliftover import LiftOver - -from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh -from ..utils.cbook import percentage - -from .base import must_open -from .sizes import Sizes - - -class VcfLine: - def __init__(self, row): - args = row.strip().split("\t") - self.seqid = args[0] - self.pos = int(args[1]) - self.rsid = args[2] - self.ref = args[3] - self.alt = args[4] - self.qual = args[5] - self.filter = args[6] - self.info = args[7] - self.format = args[8] - self.genotype = args[9] - - def __str__(self): - return "\t".join( - str(x) - for x in ( - self.seqid, - self.pos, - self.rsid, - self.ref, - self.alt, - self.qual, - self.filter, - self.info, - self.format, - self.genotype, - ) - ) - - -class UniqueLiftover(object): - def __init__(self, chainfile): - """ - This object will perform unique single positional liftovers - it will only lift over chromosome positions that - map unique to the new genome and if the strand hasn't changed. - Note: You should run a VCF Normalization sweep on all lifted ofer CPRAs to check for variants that need to be - re-normalized, and to remove variants where the REF now doesn't match after a liftover. - The combination of these steps will ensure high quality liftovers. However, it should be noted that this won't - prevent the situation where multiple positions in the old genome pile up uniquely in the new genome, so one - needs to check for this. - It's organised as an object rather than a collection of functions so that the LiftOver chainfile - only gets opened/passed once and not for every position to be lifted over. - :param chainfile: A string containing the path to the local UCSC .gzipped chainfile - :return: - """ - - self.liftover = LiftOver(chainfile) - - def liftover_cpra(self, chromosome, position, verbose=False): - """ - Given chromosome, position in 1-based co-ordinates, - This will use pyliftover to liftover a CPRA, will return a (c,p) tuple or raise NonUniqueLiftover if no unique - and strand maintaining liftover is possible - :param chromosome: string with the chromosome as it's represented in the from_genome - :param position: position on chromosome (will be cast to int) - :param verbose: print verbose information for debugging - :return: ((str) chromosome, (int) position) or None if no liftover - """ - - chromosome = str(chromosome) - position = int(position) - - # Perform the liftover lookup, shift the position by 1 as pyliftover deals in 0-based co-ords - new = self.liftover.convert_coordinate(chromosome, position - 1) - # This has to be here as new will be NoneType when the chromosome doesn't exist in the chainfile - if new: - # If the liftover is unique - if len(new) == 1: - # If the liftover hasn't changed strand - if new[0][2] == "+": - # Set the co-ordinates to the lifted-over ones and write out - new_chromosome = str(new[0][0]) - # Shift the position forward by one to convert back to a 1-based co-ords - new_position = int(new[0][1]) + 1 - return new_chromosome, new_position - else: - exception_string = ( - "{},{} has a flipped strand in liftover: {}".format( - chromosome, position, new - ) - ) - else: - exception_string = "{},{} lifts over to multiple positions: {}".format( - chromosome, position, new - ) - elif new is None: - exception_string = "Chromosome '{}' provided not in chain file".format( - chromosome - ) - - if verbose: - logger.error(exception_string) - return None, None - - -CM = dict( - list( - zip([str(x) for x in range(1, 23)], ["chr{0}".format(x) for x in range(1, 23)]) - ) - + [("X", "chrX"), ("Y", "chrY"), ("MT", "chrM")] -) - - -def main(): - - actions = ( - ("from23andme", "convert 23andme file to vcf file"), - ("fromimpute2", "convert impute2 output to vcf file"), - ("liftover", "lift over coordinates in vcf file"), - ("location", "given SNP locations characterize the locations"), - ("mstmap", "convert vcf format to mstmap input"), - ("refallele", "make refAllele file"), - ("sample", "sample subset of vcf file"), - ("summary", "summarize the genotype calls in table"), - ("uniq", "retain only the first entry in vcf file"), - ("validate", "fast validation of vcf file"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def validate(args): - """ - %prog validate input.vcf genome.fasta - - Fasta validation of vcf file. - """ - import pyfasta - - p = OptionParser(validate.__doc__) - p.add_argument("--prefix", help="Add prefix to seqid") - opts, args = p.parse_args(args) - - vcffile, fastafile = args - pf = opts.prefix - genome = pyfasta.Fasta(fastafile, record_class=pyfasta.MemoryRecord) - fp = must_open(vcffile) - match_ref = match_alt = total = 0 - for row in fp: - if row[0] == "#": - continue - seqid, pos, id, ref, alt = row.split()[:5] - total += 1 - if pf: - seqid = pf + seqid - pos = int(pos) - if seqid not in genome: - continue - true_ref = genome[seqid][pos - 1] - if total % 100000 == 0: - print(total, "sites parsed", file=sys.stderr) - if ref == true_ref: - match_ref += 1 - elif alt == true_ref: - match_alt += 1 - - logger.debug("Match REF: {}".format(percentage(match_ref, total))) - logger.debug("Match ALT: {}".format(percentage(match_alt, total))) - - -def uniq(args): - """ - %prog uniq vcffile - - Retain only the first entry in vcf file. - """ - from urllib.parse import parse_qs - - p = OptionParser(uniq.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (vcffile,) = args - fp = must_open(vcffile) - data = [] - for row in fp: - if row[0] == "#": - print(row.strip()) - continue - v = VcfLine(row) - data.append(v) - - for pos, vv in groupby(data, lambda x: x.pos): - vv = list(vv) - if len(vv) == 1: - print(vv[0]) - continue - bestv = max(vv, key=lambda x: float(parse_qs(x.info)["R2"][0])) - print(bestv) - - -def sample(args): - """ - %prog sample vcffile 0.9 - - Sample subset of vcf file. - """ - from random import random - - p = OptionParser(sample.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - vcffile, ratio = args - ratio = float(ratio) - fp = open(vcffile) - pf = vcffile.rsplit(".", 1)[0] - kept = pf + ".kept.vcf" - withheld = pf + ".withheld.vcf" - fwk = open(kept, "w") - fww = open(withheld, "w") - nkept = nwithheld = 0 - for row in fp: - if row[0] == "#": - print(row.strip(), file=fwk) - continue - if random() < ratio: - nkept += 1 - print(row.strip(), file=fwk) - else: - nwithheld += 1 - print(row.strip(), file=fww) - logger.debug("{0} records kept to `{1}`".format(nkept, kept)) - logger.debug("{0} records withheld to `{1}`".format(nwithheld, withheld)) - - -def get_vcfstanza(fastafile, sampleid="SAMP_001"): - from jcvi.formats.base import timestamp - - # VCF spec - m = "##fileformat=VCFv4.1\n" - m += "##fileDate={0}\n".format(timestamp()) - m += "##source={0}\n".format(__file__) - m += "##reference=file://{0}\n".format(op.abspath(fastafile).strip("/")) - m += '##INFO=\n' - m += '##INFO=\n' - m += '##FORMAT=\n' - m += '##FORMAT=\n' - header = "CHROM POS ID REF ALT QUAL FILTER INFO FORMAT\n".split() + [sampleid] - m += "#" + "\t".join(header) - return m - - -def fromimpute2(args): - """ - %prog fromimpute2 impute2file fastafile 1 - - Convert impute2 output to vcf file. Imputed file looks like: - - --- 1:10177:A:AC 10177 A AC 0.451 0.547 0.002 - """ - p = OptionParser(fromimpute2.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - impute2file, fastafile, chr = args - fasta = Fasta(fastafile) - print(get_vcfstanza(fastafile)) - fp = open(impute2file) - seen = set() - for row in fp: - snp_id, rsid, pos, ref, alt, aa, ab, bb = row.split() - pos = int(pos) - if pos in seen: - continue - seen.add(pos) - code = max((float(aa), "0/0"), (float(ab), "0/1"), (float(bb), "1/1"))[-1] - tag = "PR" if snp_id == chr else "IM" - print( - "\t".join( - str(x) - for x in ( - chr, - pos, - rsid, - ref, - alt, - ".", - ".", - tag, - "GT:GP", - code + ":" + ",".join((aa, ab, bb)), - ) - ) - ) - - -def read_rsid(seqid, legend): - if seqid in ["Y", "MT"]: - return {} - # Read rsid - fp = open(legend) - # rs145072688:10352:T:TA - register = {} - for row in fp: - atoms = row.strip().split(":") - if len(atoms) == 4: - rsid, pos, ref, alt = atoms - else: - continue - pos = int(pos) - # Use position for non-rsid - rsids = [pos] if rsid == seqid else [rsid, pos] - for rsid in rsids: - if rsid in register: - pos1, ref1, alt1 = register[rsid] - if alt not in alt1: - register[rsid][-1].append(alt) - else: - register[rsid] = (pos, ref, [alt]) - logger.debug( - "A total of {0} sites imported from `{1}`".format(len(register), legend) - ) - return register - - -def from23andme(args): - """ - %prog from23andme txtfile 1 - - Convert from23andme file to vcf file. - - --ref points to the folder that contains chr1.rsids - - $ zcat 1000GP_Phase3/1000GP_Phase3_chr1.legend.gz \\ - | cut -d" " -f1 | grep ":" > chr1.rsids - """ - p = OptionParser(from23andme.__doc__) - p.set_ref() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - txtfile, seqid = args - ref_dir = opts.ref - fastafile = op.join(ref_dir, "hs37d5.fa") - fasta = Fasta(fastafile) - - pf = txtfile.rsplit(".", 1)[0] - px = CM[seqid] - chrvcf = pf + ".{0}.vcf".format(px) - legend = op.join(ref_dir, "1000GP_Phase3/{0}.rsids".format(px)) - register = read_rsid(seqid, legend) - - fw = open(chrvcf, "w") - print(get_vcfstanza(fastafile, txtfile), file=fw) - - fp = open(txtfile) - seen = set() - duplicates = skipped = missing = 0 - for row in fp: - if row[0] == "#": - continue - rsid, chr, pos, genotype = row.split() - if chr != seqid: - continue - pos = int(pos) - if (chr, pos) in seen: - duplicates += 1 - continue - seen.add((chr, pos)) - genotype = list(genotype) - if "-" in genotype: # missing daa - missing += 1 - continue - - # Y or MT - if not register: - assert len(genotype) == 1 - ref = fasta[chr][pos - 1].seq.upper() - if "D" in genotype or "I" in genotype: - skipped += 1 - continue - genotype = genotype[0] - code = "0/0" if ref == genotype else "1/1" - alt = "." if ref == genotype else genotype - print( - "\t".join( - str(x) - for x in (chr, pos, rsid, ref, alt, ".", ".", "PR", "GT", code) - ), - file=fw, - ) - continue - - # If rsid is seen in the db, use that - if rsid in register: - pos, ref, alt = register[rsid] - elif pos in register: - pos, ref, alt = register[pos] - else: - skipped += 1 # Not in reference panel - continue - - assert fasta[chr][pos - 1 : pos + len(ref) - 1].seq.upper() == ref - # Keep it bi-allelic - not_seen = [x for x in alt if x not in genotype] - while len(alt) > 1 and not_seen: - alt.remove(not_seen.pop()) - if len(alt) > 1: - alt = [alt[0]] - alleles = [ref] + alt - - if len(genotype) == 1: - genotype = [genotype[0]] * 2 - - alt = ",".join(alt) or "." - if "D" in genotype or "I" in genotype: - max_allele = max((len(x), x) for x in alleles)[1] - alleles = [("I" if x == max_allele else "D") for x in alleles] - assert "I" in alleles and "D" in alleles - a, b = genotype - try: - ia, ib = alleles.index(a), alleles.index(b) - except ValueError: # alleles not seen - logger.error( - "{0}: alleles={1}, genotype={2}".format(rsid, alleles, genotype) - ) - skipped += 1 - continue - code = "/".join(str(x) for x in sorted((ia, ib))) - - print( - "\t".join( - str(x) for x in (chr, pos, rsid, ref, alt, ".", ".", "PR", "GT", code) - ), - file=fw, - ) - - logger.debug( - "duplicates={0} skipped={1} missing={2}".format(duplicates, skipped, missing) - ) - - -def refallele(args): - """ - %prog refallele vcffile > out.refAllele - - Make refAllele file which can be used to convert PLINK file to VCF file. - """ - p = OptionParser(refallele.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (vcffile,) = args - fp = open(vcffile) - for row in fp: - if row[0] == "#": - continue - atoms = row.split() - marker = "{0}:{1}".format(*atoms[:2]) - ref = atoms[3] - print("\t".join((marker, ref))) - - -def location(args): - """ - %prog location bedfile fastafile - - Given SNP locations, summarize the locations in the sequences. For example, - find out if there are more 3`-SNPs than 5`-SNPs. - """ - from jcvi.formats.bed import BedLine - from jcvi.graphics.histogram import stem_leaf_plot - - p = OptionParser(location.__doc__) - p.add_argument( - "--dist", - default=100, - type=int, - help="Distance cutoff to call 5` and 3`", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, fastafile = args - dist = opts.dist - sizes = Sizes(fastafile).mapping - fp = open(bedfile) - fiveprime = threeprime = total = 0 - percentages = [] - for row in fp: - b = BedLine(row) - pos = b.start - size = sizes[b.seqid] - if pos < dist: - fiveprime += 1 - if size - pos < dist: - threeprime += 1 - total += 1 - percentages.append(100 * pos / size) - - m = "Five prime (within {0}bp of start codon): {1}\n".format(dist, fiveprime) - m += "Three prime (within {0}bp of stop codon): {1}\n".format(dist, threeprime) - m += "Total: {0}".format(total) - print(m, file=sys.stderr) - - bins = 10 - title = "Locations within the gene [0=Five-prime, 100=Three-prime]" - stem_leaf_plot(percentages, 0, 100, bins, title=title) - - -def summary(args): - """ - %prog summary txtfile fastafile - - The txtfile can be generated by: %prog mstmap --noheader --freq=0 - - Tabulate on all possible combinations of genotypes and provide results - in a nicely-formatted table. Give a fastafile for SNP rate (average - # of SNPs per Kb). - - Only three-column file is supported: - locus_id intra- genotype inter- genotype - """ - from jcvi.utils.cbook import thousands - from jcvi.utils.table import tabulate - - p = OptionParser(summary.__doc__) - p.add_argument("--counts", help="Print SNP counts in a txt file") - p.add_argument("--bed", help="Print SNPs locations in a bed file") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - txtfile, fastafile = args - bedfw = open(opts.bed, "w") if opts.bed else None - - fp = open(txtfile) - header = next(fp).split() # Header - snps = defaultdict(list) # contig => list of loci - combinations = defaultdict(int) - intraSNPs = interSNPs = 0 - distinctSet = set() # set of genes that show A-B pattern - ref, alt = header[1:3] - snpcounts, goodsnpcounts = defaultdict(int), defaultdict(int) - for row in fp: - atoms = row.split() - assert len(atoms) == 3, "Only three-column file is supported" - locus, intra, inter = atoms - ctg, pos = locus.rsplit(".", 1) - pos = int(pos) - snps[ctg].append(pos) - snpcounts[ctg] += 1 - - if intra == "X": - intraSNPs += 1 - if inter in ("B", "X"): - interSNPs += 1 - if intra == "A" and inter == "B": - distinctSet.add(ctg) - goodsnpcounts[ctg] += 1 - # Tabulate all possible combinations - intra = ref + "-" + intra - inter = alt + "-" + inter - combinations[(intra, inter)] += 1 - - if bedfw: - print("\t".join(str(x) for x in (ctg, pos - 1, pos, locus)), file=bedfw) - - if bedfw: - logger.debug("SNP locations written to `{0}`.".format(opts.bed)) - bedfw.close() - - nsites = sum(len(x) for x in snps.values()) - sizes = Sizes(fastafile) - bpsize = sizes.totalsize - snprate = lambda a: a * 1000.0 / bpsize - m = "Dataset `{0}` contains {1} contigs ({2} bp).\n".format( - fastafile, len(sizes), thousands(bpsize) - ) - m += "A total of {0} SNPs within {1} contigs ({2} bp).\n".format( - nsites, len(snps), thousands(sum(sizes.mapping[x] for x in snps.keys())) - ) - m += "SNP rate: {0:.1f}/Kb, ".format(snprate(nsites)) - m += "IntraSNPs: {0} ({1:.1f}/Kb), InterSNPs: {2} ({3:.1f}/Kb)".format( - intraSNPs, snprate(intraSNPs), interSNPs, snprate(interSNPs) - ) - print(m, file=sys.stderr) - print(tabulate(combinations), file=sys.stderr) - - leg = "Legend: A - homozygous same, B - homozygous different, X - heterozygous" - print(leg, file=sys.stderr) - - tag = (ref + "-A", alt + "-B") - distinctSNPs = combinations[tag] - tag = str(tag).replace("'", "") - print( - "A total of {0} disparate {1} SNPs in {2} contigs.".format( - distinctSNPs, tag, len(distinctSet) - ), - file=sys.stderr, - ) - - if not opts.counts: - return - - snpcountsfile = opts.counts - fw = open(snpcountsfile, "w") - header = "\t".join(("Contig", "#_SNPs", "#_AB_SNP")) - print(header, file=fw) - - assert sum(snpcounts.values()) == nsites - assert sum(goodsnpcounts.values()) == distinctSNPs - - for ctg in sorted(snps.keys()): - snpcount = snpcounts[ctg] - goodsnpcount = goodsnpcounts[ctg] - print("\t".join(str(x) for x in (ctg, snpcount, goodsnpcount)), file=fw) - - fw.close() - logger.debug("SNP counts per contig is written to `{0}`.".format(snpcountsfile)) - - -g2x = {"0/0": "A", "0/1": "X", "1/1": "B", "./.": "-", ".": "-"} - - -def encode_genotype(s, mindepth=3, depth_index=2, nohet=False): - """ - >>> encode_genotype("1/1:128,18,0:6:18") # homozygote B - 'B' - >>> encode_genotype("0/1:0,0,0:0:3") # missing data - '-' - >>> encode_genotype("0/1:128,0,26:7:22") # heterozygous A/B - 'X' - """ - atoms = s.split(":") - if len(atoms) < 3: - return g2x[atoms[0]] - - inferred = atoms[0] - depth = int(atoms[depth_index]) - if depth < mindepth: - return "-" - if inferred == "0/0": - return "A" - if inferred == "0/1": - return "-" if nohet else "X" - if inferred == "1/1": - return "B" - return "-" - - -def mstmap(args): - """ - %prog mstmap bcffile/vcffile > matrixfile - - Convert bcf/vcf format to mstmap input. - """ - from jcvi.assembly.geneticmap import MSTMatrix - - p = OptionParser(mstmap.__doc__) - p.add_argument( - "--dh", - default=False, - action="store_true", - help="Double haploid population, no het", - ) - p.add_argument( - "--freq", - default=0.2, - type=float, - help="Allele must be above frequency", - ) - p.add_argument( - "--mindepth", - default=3, - type=int, - help="Only trust genotype calls with depth", - ) - p.add_argument( - "--missing_threshold", - default=0.25, - type=float, - help="Fraction missing must be below", - ) - p.add_argument( - "--noheader", - default=False, - action="store_true", - help="Do not print MSTmap run parameters", - ) - p.add_argument( - "--pv4", - default=False, - action="store_true", - help="Enable filtering strand-bias, tail distance bias, etc.", - ) - p.add_argument( - "--freebayes", - default=False, - action="store_true", - help="VCF output from freebayes", - ) - p.set_sep(sep=".", help="Use separator to simplify individual names") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (vcffile,) = args - if vcffile.endswith(".bcf"): - bcffile = vcffile - vcffile = bcffile.rsplit(".", 1)[0] + ".vcf" - cmd = "bcftools view {0}".format(bcffile) - cmd += " | vcfutils.pl varFilter" - if not opts.pv4: - cmd += " -1 0 -2 0 -3 0 -4 0 -e 0" - if need_update(bcffile, vcffile): - sh(cmd, outfile=vcffile) - - freq = opts.freq - sep = opts.sep - depth_index = 1 if opts.freebayes else 2 - - ptype = "DH" if opts.dh else "RIL6" - nohet = ptype == "DH" - fp = open(vcffile) - genotypes = [] - for row in fp: - if row[:2] == "##": - continue - atoms = row.split() - if row[0] == "#": - ind = [x.split(sep)[0] for x in atoms[9:]] - nind = len(ind) - mh = ["locus_name"] + ind - continue - - marker = "{0}.{1}".format(*atoms[:2]) - - geno = atoms[9:] - geno = [ - encode_genotype( - x, mindepth=opts.mindepth, depth_index=depth_index, nohet=nohet - ) - for x in geno - ] - assert len(geno) == nind - f = 1.0 / nind - - if geno.count("A") * f < freq: - continue - if geno.count("B") * f < freq: - continue - if geno.count("-") * f > opts.missing_threshold: - continue - - genotype = [marker] + geno - genotypes.append(genotype) - - mm = MSTMatrix(genotypes, mh, ptype, opts.missing_threshold) - mm.write(opts.outfile, header=(not opts.noheader)) - - -def liftover(args): - """ - %prog liftover old.vcf hg19ToHg38.over.chain.gz new.vcf - - Lift over coordinates in vcf file. - """ - p = OptionParser(liftover.__doc__) - p.add_argument( - "--newid", default=False, action="store_true", help="Make new identifiers" - ) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - oldvcf, chainfile, newvcf = args - ul = UniqueLiftover(chainfile) - num_excluded = 0 - fp = open(oldvcf) - fw = open(newvcf, "w") - for row in fp: - row = row.strip() - if row[0] == "#": - if row.startswith("##source="): - row = "##source={0}".format(__file__) - elif row.startswith("##reference="): - row = "##reference=hg38" - elif row.startswith("##contig="): - continue - print(row.strip(), file=fw) - continue - - v = VcfLine(row) - # GRCh37.p2 has the same MT sequence as hg38 (but hg19 is different) - if v.seqid == "MT": - v.seqid = "chrM" - print(v, file=fw) - continue - - try: - new_chrom, new_pos = ul.liftover_cpra(CM[v.seqid], v.pos) - except: - num_excluded += 1 - continue - - if new_chrom is not None and new_pos is not None: - v.seqid, v.pos = new_chrom, new_pos - if opts.newid: - v.rsid = "{0}:{1}".format(new_chrom.replace("chr", ""), new_pos) - print(v, file=fw) - else: - num_excluded += 1 - - logger.debug("Excluded {0}".format(num_excluded)) - - -if __name__ == "__main__": - main() diff --git a/jcvi/graphics/__init__.py b/jcvi/graphics/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/graphics/__main__.py b/jcvi/graphics/__main__.py deleted file mode 100644 index 41088d82..00000000 --- a/jcvi/graphics/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Suite of visualization tools for dot-plots, histograms, karytotypes, macro-/micro-synteny plots, seed counting using GRABSEEDS, etc. -""" - -from ..apps.base import dmain - - -if __name__ == "__main__": - dmain(__file__) diff --git a/jcvi/graphics/align.py b/jcvi/graphics/align.py deleted file mode 100644 index a8ab05a7..00000000 --- a/jcvi/graphics/align.py +++ /dev/null @@ -1,554 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog demo - -Illustrate three different types of alignments. -- Pairwise sequence alignment, aka, "dot plot" -- Read alignment, similar to the visualization of a BAM file -- Optical map alignment, matchings between restriction fragments -""" - - -import sys - -from bisect import bisect -from random import choice, randint - -from more_itertools import pairwise - -from ..apps.base import OptionParser -from ..utils.range import range_overlap - -from .base import FancyArrow, Rectangle, plt, savefig, normalize_axes -from .chromosome import Chromosome, HorizontalChromosome -from .glyph import BaseGlyph, GeneGlyph - - -class BaseAlign(object): - def __init__(self, fig, xywh, xpad=0, ypad=0, xmax=100): - x, y, w, h = xywh - self.ax = fig.add_axes(xywh) - self.sax = fig.add_axes( - [x + xpad * w, y + ypad * h, (1 - 2 * xpad) * w, (1 - 2 * ypad) * h] - ) - self.amax = self.bmax = xmax - self.a = [(1, xmax)] - self.b = [(1, xmax)] - self.apatch = self.bpatch = None - self.apatchcolor = self.bpatchcolor = "darkslategrey" - self.xpad = xpad - self.ypad = ypad - self.canvas = 1 - 2 * xpad - - def convert(self, pos, xmax): - return self.xpad + pos * self.canvas / xmax - - def invert(self, a, b): - self.a = [(1, a), (a, b), (b, self.amax)] - self.b = [(1, a), (b, a), (b, self.bmax)] - self.apatch = (self.convert(a, self.amax), self.convert(b, self.amax)) - self.bpatch = (self.convert(a, self.bmax), self.convert(b, self.bmax)) - self.bpatchcolor = "y" - - def delete(self, a, b): - self.bmax -= b - a - self.a = [(1, a), (b, self.amax)] - self.b = [(1, a), (a, self.bmax)] - self.apatch = (self.convert(a, self.amax), self.convert(b, self.amax)) - - def duplicate(self, a, b, gap=0): - self.bmax += b - a + gap - self.a = [(1, b), (a, self.amax)] - self.b = [(1, b), (b + gap, self.bmax)] - self.apatch = (self.convert(a, self.amax), self.convert(b, self.amax)) - self.bpatch = ( - self.convert(a, self.bmax), - self.convert(b, self.bmax), - self.convert(b + gap, self.bmax), - self.convert(2 * b - a + gap, self.bmax), - ) - self.bpatchcolor = "tomato" - - -class PairwiseAlign(BaseAlign): - def __init__(self, fig, xywh, xpad=0.15, ypad=0.15): - super().__init__(fig, xywh, xpad, ypad) - - def draw(self, width=0.03): - HorizontalChromosome( - self.ax, - self.xpad, - 1 - self.xpad, - self.ypad - 0.05, - height=width * 1.5, - patch=self.apatch, - lw=2, - ) - Chromosome( - self.ax, - self.xpad - 0.05, - self.ypad, - 1 - self.ypad, - width=width, - patch=self.bpatch, - patchcolor=self.bpatchcolor, - lw=2, - ) - for a, b in zip(self.a, self.b): - self.sax.plot(a, b, "-", color="darkslategrey", lw=2) - self.sax.set_xticklabels([]) - self.sax.set_yticklabels([]) - self.sax.set_xlim((1, self.amax)) - self.sax.set_ylim((1, self.bmax)) - normalize_axes(self.ax) - - -class ReadAlign(BaseAlign): - def __init__(self, fig, xywh, xpad=0.05, ypad=0.2, readlen=6, gap=3): - super().__init__(fig, xywh, xpad, ypad) - self.readlen = readlen - self.gap = gap - self.reads = [] - self.ymax = 12 - self.ntracks = 0 - self.layout(1, self.amax) - - def layout(self, start, end, maxtracks=8): - readrange = 2 * self.readlen + self.gap - end -= readrange - assert start < end, "end must be > start + readlen" - reads = [] - for x in range(100): - pos = randint(start, end) - reads.append(PairedRead(pos, readlen=self.readlen, gap=self.gap)) - reads, ntracks = self.arrange(reads, self.ntracks, maxtracks=maxtracks) - self.reads += reads - self.ntracks += ntracks - - def arrange(self, reads, ntracks, maxtracks=8): - track_ends = [0] - reads.sort(key=lambda x: x.start) - for r in reads: - m = min(track_ends) - mi = track_ends.index(m) - if r.start > m + 0.005: - track_ends[mi] = r.end - else: - if len(track_ends) >= maxtracks: - continue - track_ends.append(r.end) - mi = len(track_ends) - 1 - r.set_y(ntracks + mi) - ntracks = len(track_ends) - reads = [x for x in reads if x.y is not None] - return reads, ntracks - - def remove(self, a, b, maxtracks=0): - self.reads = [ - r - for r in self.reads - if not (a <= r.start <= b and a <= r.end <= b and r.y >= maxtracks) - ] - - def draw(self, width=0.03): - HorizontalChromosome( - self.ax, - self.xpad, - 1 - self.xpad, - self.ypad - width / 2, - height=width * 1.5, - patch=self.apatch, - lw=2, - ) - for r in self.reads: - r.draw(self.sax) - self.sax.set_xlim((1, self.amax)) - self.sax.set_ylim((-1, self.ymax)) - normalize_axes(self.ax) - self.sax.set_axis_off() - - def highlight(self, a, b): - self.apatch = (self.convert(a, self.amax), self.convert(b, self.amax)) - self.sax.plot((a, a), (-1, self.ntracks), "m-", lw=2) - self.sax.plot((b, b), (-1, self.ntracks), "m-", lw=2) - - def invert(self, a, b): - reads = [] - for r in self.reads: - r.set_y(None) - keep = True - if r.start < a < r.end or r.start < b < r.end: - adist, bdist = abs(a - r.mid), abs(b - r.mid) - flipr = r.r2 if adist > bdist else r.r1 - flipr.x1 = a + b - flipr.x1 - flipr.x2 = a + b - flipr.x2 - flipr.color = "y" - if adist > self.gap and bdist > self.gap: - keep = False - if keep: - reads.append(r) - self.reads, self.ntracks = self.arrange(reads, 0) - self.highlight(a, b) - - def delete(self, a, b): - self.remove(a, b) - for r in self.reads: - r.breakpoint(a, "g", "lightgrey") - r.breakpoint(b, "lightgrey", "g") - self.highlight(a, b) - - def duplicate(self, a, b, gap=0): - self.layout(1, self.amax, maxtracks=4) - self.remove(1, a, maxtracks=6) - self.remove(b, self.amax, maxtracks=6) - for r in self.reads: - r.paint(a, b, "tomato") - r.breakpoint(a, "k", "tomato") - r.breakpoint(b, "tomato", "k") - r.breakpoint(a, "lightgrey", "tomato", ystart=6) - r.breakpoint(b, "tomato", "lightgrey", ystart=6) - self.highlight(a, b) - - -class OpticalMapAlign(BaseAlign): - def __init__(self, fig, xywh, xpad=0.05, ypad=0.3): - super().__init__(fig, xywh, xpad, ypad) - om = self.from_silico() - self.om1 = OpticalMapTrack(self.sax, om) - self.om2 = OpticalMapTrack(self.sax, om, ystart=-3, color="orange") - - def from_silico(self, filename="Ecoli.silico", nfrags=25): - fp = open(filename) - next(fp) - ar = [0] + [int(x) for x in next(fp).split()] - sizes = [] # Only retain frags beyond certain size - for a, b in pairwise(ar): - size = b - a - if size < max(ar[:nfrags]) / 100: - continue - sizes.append(size) - - sizes = [choice(sizes) for x in range(nfrags)] - return sizes - - def draw(self): - self.om1.draw() - self.om2.draw() - self.sax.set_xlim(0, self.om1.amax) - self.sax.set_ylim(-8, 8) - normalize_axes(self.ax) - self.sax.set_axis_off() - - def invert(self, a, b): - ai, bi = self.om2.invert(a, b) - self.om1.highlight(ai, bi, "lightslategrey") - self.om2.highlight(ai, bi, "y", arrow_inverse=True) - - def delete(self, a, b): - ai, bi = self.om2.delete(a, b) - self.om1.highlight(ai, bi, "lightslategrey") - self.om2.highlight(ai, bi, None) - - def duplicate(self, a, b, gap=0): - (ai, bi), (ci, di) = self.om1.duplicate(a, b) - (ai, bi), (ci, di) = self.om2.duplicate(a, b) - self.om1.highlight(ai, bi, None) - self.om1.highlight(ci, di, "lightslategrey") - self.om2.highlight(ai, bi, "tomato") - self.om2.highlight(ci, di, "tomato") - - -class OpticalMapTrack(BaseGlyph): - def __init__(self, ax, sizes, ystart=0, color="darkslategrey", height=1, wiggle=3): - - super().__init__(ax) - self.ax = ax - self.sizes = sizes[:] - self.ystart = ystart - self.height = height - self.color = color - self.wiggle = wiggle - self.make_wiggles() - - def draw(self): - ar = self.ar - pad = self.pad - pads = 0 - for (a, b), w, color in zip(pairwise(ar), self.wiggles, self.colors): - yf = self.ystart + w * 1.0 / self.wiggle - if color: - p = Rectangle((a + pads, yf), b - a, self.height, color=color) - self.append(p) - pads += pad - self.add_patches() - - def get_endpoints(self, a, b, xmax=100): - ar = self.ar - a, b = max(ar) * a / xmax, max(ar) * b / xmax - return bisect(ar, a) - 1, bisect(ar, b) - - def invert(self, a, b): - ai, bi = self.get_endpoints(a, b) - bb = self.sizes[ai:bi] - self.sizes = self.sizes[:ai] + bb[::-1] + self.sizes[bi:] - return ai, bi - - def delete(self, a, b): - return self.get_endpoints(a, b) - - def duplicate(self, a, b): - ai, bi = self.get_endpoints(a, b) - ai += self.wiggle / 2 - bi += self.wiggle / 2 - ci, di = ai - self.wiggle, ai - bb = self.sizes[ai:bi] - bs = len(bb) - self.sizes = self.sizes[:ci] + bb + self.sizes[ci:] - self.make_wiggles() - return (ci, ci + bs), (di + bs, di + 2 * bs) - - def highlight(self, ai, bi, color, arrow_inverse=False): - self.colors[ai:bi] = [color] * (bi - ai) - ar = self.ar - a, b = ar[ai], ar[bi] - a += self.pad * (ai - 1) - b += self.pad * (bi - 1) - if self.ystart < 0: - yy = self.ystart - 2 - shape = "left" - else: - yy = self.ystart + 4 - shape = "right" - if arrow_inverse: - a, b = b, a - shape = "right" if shape == "left" else "left" - if not color: - return - p = FancyArrow( - a, - yy, - b - a, - 0, - fc=color, - lw=0, - shape=shape, - length_includes_head=True, - width=1, - head_length=abs(b - a) * 0.15, - head_width=3, - ) - self.ax.add_patch(p) - - @property - def amax(self): - return sum(self.sizes) + (self.length - 1) * self.pad - - @property - def length(self): - return len(self.sizes) - - @property - def ar(self): - cumsizes = [0] - for a in self.sizes: - cumsizes.append(cumsizes[-1] + a) - return cumsizes - - def make_wiggles(self): - ar = [self.wiggle / 2 + 1] - while len(ar) <= self.length: - ar += range(self.wiggle, 0, -1) - self.wiggles = ar[: self.length] - self.colors = [self.color] * self.length - ar = self.ar - self.pad = max(ar) / 100 - - -class SingleRead(object): - def __init__(self, start, readlen, sign=1): - self.x1 = start - self.x2 = start + sign * readlen - self.y = None - self.color = "k" - self.broken = None - - @property - def sign(self): - return 1 if self.x2 >= self.x1 else -1 - - @property - def start(self): - return min(self.x1, self.x2) - - @property - def end(self): - return max(self.x1, self.x2) - - @property - def span(self): - return self.end - self.start + 1 - - def draw(self, ax, height=0.6): - if self.broken is None: - GeneGlyph( - ax, - self.x1, - self.x2, - self.y, - height, - tip=2, - color=self.color, - gradient=True, - ) - else: - a, lcolor, rcolor = self.broken - if self.sign < 0: - lcolor, rcolor = rcolor, lcolor - GeneGlyph( - ax, self.x1, a, self.y, height, tip=0, color=lcolor, gradient=True - ) - GeneGlyph( - ax, a, self.x2, self.y, height, tip=2, color=rcolor, gradient=True - ) - - def breakpoint(self, a, lcolor, rcolor): - if a > self.end: - self.color = lcolor - elif a < self.start: - self.color = rcolor - else: - self.broken = (a, lcolor, rcolor) - - -class PairedRead(object): - def __init__(self, start, readlen, gap): - self.r1 = SingleRead(start, readlen) - self.r2 = SingleRead(start + gap + 2 * readlen, readlen, sign=-1) - self.color = "k" - self.y = None - - @property - def start(self): - return min(self.r1.start, self.r2.start) - - @property - def end(self): - return max(self.r1.end, self.r2.end) - - @property - def i1(self): - return min(self.r1.end, self.r2.end) - - @property - def i2(self): - return max(self.r1.start, self.r2.start) - - @property - def mid(self): - return (self.start + self.end) * 0.5 - - def set_y(self, y): - self.y = y - self.r1.y = self.r2.y = y - - def draw(self, ax): - self.r1.draw(ax) - self.r2.draw(ax) - ax.plot((self.i1, self.i2), (self.y, self.y), "-", color=self.color) - - def paint(self, a, b, color): - if range_overlap((0, self.start + 1, self.end - 1), (0, a, b)): - self.r1.color = self.r2.color = self.color = color - - def breakpoint(self, a, lcolor, rcolor, ystart=0): - if not self.start < a < self.end: - return - if self.y < ystart: - return - self.color = lcolor if a > self.mid else rcolor - self.r1.breakpoint(a, lcolor, rcolor) - self.r2.breakpoint(a, lcolor, rcolor) - - -def main(): - p = OptionParser(__doc__) - opts, args, iopts = p.set_image_options(figsize="9x7") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (mode,) = args - assert mode == "demo" - - a, b = 30, 70 - pad = 0.08 - w = 0.31 - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - # Row separators - yy = 1 - pad - for i in range(3): - root.plot((0, 1), (yy, yy), "-", lw=2, color="lightgray") - yy -= w - - # Row headers - xx = pad * 0.6 - yy = 1 - pad - 0.5 * w - for title in ("Inversion", "Indel", "Duplication"): - root.text(xx, yy, title, ha="center", va="center") - yy -= w - - # Column headers - xx = pad + 0.5 * w - yy = 1 - pad / 2 - for title in ("Assembly alignment", "Read alignment", "Optical map alignment"): - root.text(xx, yy, title, ha="center", va="center") - xx += w - - p = PairwiseAlign(fig, [pad, 2 * w, w, w]) - p.invert(a, b) - p.draw() - - p = PairwiseAlign(fig, [pad, w, w, w]) - p.delete(a, b) - p.draw() - - p = PairwiseAlign(fig, [pad, 0, w, w]) - p.duplicate(a, b, gap=5) - p.draw() - - p = ReadAlign(fig, [pad + w, 2 * w, w, w]) - p.invert(a, b) - p.draw() - - p = ReadAlign(fig, [pad + w, w, w, w]) - p.delete(a, b) - p.draw() - - p = ReadAlign(fig, [pad + w, 0, w, w]) - p.duplicate(a, b) - p.draw() - - p = OpticalMapAlign(fig, [pad + 2 * w, 2 * w, w, w]) - p.invert(a, b) - p.draw() - - p = OpticalMapAlign(fig, [pad + 2 * w, w, w, w]) - p.delete(a, b) - p.draw() - - p = OpticalMapAlign(fig, [pad + 2 * w, 0, w, w]) - p.duplicate(a, b) - p.draw() - - normalize_axes(root) - - image_name = mode + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main() diff --git a/jcvi/graphics/assembly.py b/jcvi/graphics/assembly.py deleted file mode 100644 index 1ea11a84..00000000 --- a/jcvi/graphics/assembly.py +++ /dev/null @@ -1,516 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Assembly QC plots, including general statistics, base and mate coverages, and -scaffolding consistencies. -""" -import os.path as op -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger, need_update -from ..assembly.base import calculate_A50 -from ..assembly.coverage import Coverage -from ..formats.bed import Bed, BedLine -from ..formats.fasta import Fasta -from ..formats.sizes import Sizes -from ..utils.cbook import thousands - -from .base import plt, Rectangle, set_human_base_axis, savefig - - -def main(): - actions = ( - ("A50", "compare A50 graphics for a set of FASTA files"), - ("coverage", "plot coverage from a set of BED files"), - ("qc", "performs QC graphics on given contig/scaffold"), - ("scaffold", "plot the alignment of the scaffold to other evidences"), - ("covlen", "plot coverage vs length"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def covlen(args): - """ - %prog covlen covfile fastafile - - Plot coverage vs length. `covfile` is two-column listing contig id and - depth of coverage. - """ - import numpy as np - import pandas as pd - import seaborn as sns - from jcvi.formats.base import DictFile - - p = OptionParser(covlen.__doc__) - p.add_argument("--maxsize", default=1000000, type=int, help="Max contig size") - p.add_argument("--maxcov", default=100, type=int, help="Max contig size") - p.add_argument("--color", default="m", help="Color of the data points") - p.add_argument( - "--kind", - default="scatter", - choices=("scatter", "reg", "resid", "kde", "hex"), - help="Kind of plot to draw", - ) - opts, args, iopts = p.set_image_options(args, figsize="8x8") - - if len(args) != 2: - sys.exit(not p.print_help()) - - covfile, fastafile = args - cov = DictFile(covfile, cast=float) - s = Sizes(fastafile) - data = [] - maxsize, maxcov = opts.maxsize, opts.maxcov - for ctg, size in s.iter_sizes(): - c = cov.get(ctg, 0) - if size > maxsize: - continue - if c > maxcov: - continue - data.append((size, c)) - - x, y = zip(*data) - x = np.array(x) - y = np.array(y) - logger.debug("X size {0}, Y size {1}".format(x.size, y.size)) - - df = pd.DataFrame() - xlab, ylab = "Length", "Coverage of depth (X)" - df[xlab] = x - df[ylab] = y - sns.jointplot( - xlab, - ylab, - kind=opts.kind, - data=df, - xlim=(0, maxsize), - ylim=(0, maxcov), - stat_func=None, - edgecolor="w", - color=opts.color, - ) - - figname = covfile + ".pdf" - savefig(figname, dpi=iopts.dpi, iopts=iopts) - - -def coverage(args): - """ - %prog coverage fastafile ctg bedfile1 bedfile2 .. - - Plot coverage from a set of BED files that contain the read mappings. The - paired read span will be converted to a new bedfile that contain the happy - mates. ctg is the chr/scf/ctg that you want to plot the histogram on. - - If the bedfiles already contain the clone spans, turn on --spans. - """ - from jcvi.formats.bed import mates, bedpe - - p = OptionParser(coverage.__doc__) - p.add_argument("--ymax", default=None, type=int, help="Limit ymax") - p.add_argument( - "--spans", - default=False, - action="store_true", - help="BED files already contain clone spans", - ) - opts, args, iopts = p.set_image_options(args, figsize="8x5") - - if len(args) < 3: - sys.exit(not p.print_help()) - - fastafile, ctg = args[0:2] - bedfiles = args[2:] - - sizes = Sizes(fastafile) - size = sizes.mapping[ctg] - - plt.figure(1, (iopts.w, iopts.h)) - ax = plt.gca() - - bins = 100 # smooth the curve - lines = [] - legends = [] - not_covered = [] - yy = 0.9 - for bedfile, c in zip(bedfiles, "rgbcky"): - if not opts.spans: - pf = bedfile.rsplit(".", 1)[0] - matesfile = pf + ".mates" - if need_update(bedfile, matesfile): - matesfile, matesbedfile = mates([bedfile, "--lib"]) - - bedspanfile = pf + ".spans.bed" - if need_update(matesfile, bedspanfile): - bedpefile, bedspanfile = bedpe( - [bedfile, "--span", "--mates={0}".format(matesfile)] - ) - bedfile = bedspanfile - - bedsum = Bed(bedfile).sum(seqid=ctg) - notcoveredbases = size - bedsum - - legend = bedfile.split(".")[0] - msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) - not_covered.append(msg) - print(msg, file=sys.stderr) - ax.text(0.1, yy, msg, color=c, size=9, transform=ax.transAxes) - yy -= 0.08 - - cov = Coverage(bedfile, sizes.filename) - x, y = cov.get_plot_data(ctg, bins=bins) - (line,) = ax.plot(x, y, "-", color=c, lw=2, alpha=0.5) - lines.append(line) - legends.append(legend) - - leg = ax.legend(lines, legends, shadow=True, fancybox=True) - leg.get_frame().set_alpha(0.5) - - ylabel = "Average depth per {0}Kb".format(size / bins / 1000) - ax.set_xlim(0, size) - ax.set_ylim(0, opts.ymax) - ax.set_xlabel(ctg) - ax.set_ylabel(ylabel) - set_human_base_axis(ax) - - figname = "{0}.{1}.pdf".format(fastafile, ctg) - savefig(figname, dpi=iopts.dpi, iopts=iopts) - - -def scaffolding(ax, scaffoldID, blastf, qsizes, ssizes, qbed, sbed, highlights=None): - - from jcvi.graphics.blastplot import blastplot - - # qsizes, qbed are properties for the evidences - # ssizes, sbed are properties for the current scaffoldID - blastplot( - ax, - blastf, - qsizes, - ssizes, - qbed, - sbed, - style="circle", - insetLabels=True, - stripNames=True, - highlights=highlights, - ) - - # FPC_scf.bed => FPC - fname = qbed.filename.split(".")[0].split("_")[0] - xtitle = fname - if xtitle == "FPC": - ax.set_xticklabels([""] * len(ax.get_xticklabels())) - ax.set_xlabel(xtitle, color="g") - for x in ax.get_xticklines(): - x.set_visible(False) - - -def plot_one_scaffold( - scaffoldID, ssizes, sbed, trios, imagename, iopts, highlights=None -): - ntrios = len(trios) - fig = plt.figure(1, (14, 8)) - plt.cla() - plt.clf() - root = fig.add_axes([0, 0, 1, 1]) - axes = [fig.add_subplot(1, ntrios, x) for x in range(1, ntrios + 1)] - scafsize = ssizes.get_size(scaffoldID) - - for trio, ax in zip(trios, axes): - blastf, qsizes, qbed = trio - scaffolding( - ax, scaffoldID, blastf, qsizes, ssizes, qbed, sbed, highlights=highlights - ) - - root.text( - 0.5, - 0.95, - "{0} (size={1})".format(scaffoldID, thousands(scafsize)), - size=18, - ha="center", - color="b", - ) - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - savefig(imagename, dpi=iopts.dpi, iopts=iopts) - - -def scaffold(args): - """ - %prog scaffold scaffold.fasta synteny.blast synteny.sizes synteny.bed - physicalmap.blast physicalmap.sizes physicalmap.bed - - As evaluation of scaffolding, visualize external line of evidences: - * Plot synteny to an external genome - * Plot alignments to physical map - * Plot alignments to genetic map (TODO) - - Each trio defines one panel to be plotted. blastfile defines the matchings - between the evidences vs scaffolds. Then the evidence sizes, and evidence - bed to plot dot plots. - - This script will plot a dot in the dot plot in the corresponding location - the plots are one contig/scaffold per plot. - """ - from more_itertools import grouper - - p = OptionParser(scaffold.__doc__) - p.add_argument( - "--cutoff", - type=int, - default=1000000, - help="Plot scaffolds with size larger than", - ) - p.add_argument( - "--highlights", - help="A set of regions in BED format to highlight", - ) - opts, args, iopts = p.set_image_options(args, figsize="14x8", dpi=150) - - if len(args) < 4 or len(args) % 3 != 1: - sys.exit(not p.print_help()) - - highlights = opts.highlights - scafsizes = Sizes(args[0]) - trios = list(grouper(args[1:], 3)) - trios = [(a, Sizes(b), Bed(c)) for a, b, c in trios] - if highlights: - hlbed = Bed(highlights) - - for scaffoldID, scafsize in scafsizes.iter_sizes(): - if scafsize < opts.cutoff: - continue - logger.debug("Loading {0} (size={1})".format(scaffoldID, thousands(scafsize))) - - tmpname = scaffoldID + ".sizes" - tmp = open(tmpname, "w") - tmp.write("{0}\t{1}".format(scaffoldID, scafsize)) - tmp.close() - - tmpsizes = Sizes(tmpname) - tmpsizes.close(clean=True) - - if highlights: - subhighlights = list(hlbed.sub_bed(scaffoldID)) - - imagename = ".".join((scaffoldID, opts.format)) - plot_one_scaffold( - scaffoldID, - tmpsizes, - None, - trios, - imagename, - iopts, - highlights=subhighlights, - ) - - -def qc(args): - """ - %prog qc prefix - - Expects data files including: - 1. `prefix.bedpe` draws Bezier curve between paired reads - 2. `prefix.sizes` draws length of the contig/scaffold - 3. `prefix.gaps.bed` mark the position of the gaps in sequence - 4. `prefix.bed.coverage` plots the base coverage - 5. `prefix.pairs.bed.coverage` plots the clone coverage - - See assembly.coverage.posmap() for the generation of these files. - """ - from jcvi.graphics.glyph import Bezier - - p = OptionParser(qc.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(p.print_help()) - - (prefix,) = args - scf = prefix - - # All these files *must* be present in the current folder - fastafile = prefix + ".fasta" - sizesfile = prefix + ".sizes" - gapsbedfile = prefix + ".gaps.bed" - bedfile = prefix + ".bed" - bedpefile = prefix + ".bedpe" - pairsbedfile = prefix + ".pairs.bed" - - sizes = Sizes(fastafile).mapping - size = sizes[scf] - - fig = plt.figure(1, (8, 5)) - root = fig.add_axes([0, 0, 1, 1]) - - # the scaffold - root.add_patch(Rectangle((0.1, 0.15), 0.8, 0.03, fc="k")) - - # basecoverage and matecoverage - ax = fig.add_axes([0.1, 0.45, 0.8, 0.45]) - - bins = 200 # Smooth the curve - basecoverage = Coverage(bedfile, sizesfile) - matecoverage = Coverage(pairsbedfile, sizesfile) - - x, y = basecoverage.get_plot_data(scf, bins=bins) - (baseline,) = ax.plot(x, y, "g-") - x, y = matecoverage.get_plot_data(scf, bins=bins) - (mateline,) = ax.plot(x, y, "r-") - legends = ("Base coverage", "Mate coverage") - leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True) - leg.get_frame().set_alpha(0.5) - ax.set_xlim(0, size) - - # draw the read pairs - fp = open(bedpefile) - pairs = [] - for row in fp: - scf, astart, aend, scf, bstart, bend, clonename = row.split() - astart, bstart = int(astart), int(bstart) - aend, bend = int(aend), int(bend) - start = min(astart, bstart) + 1 - end = max(aend, bend) - pairs.append((start, end)) - - bpratio = 0.8 / size - cutoff = 1000 # inserts smaller than this are not plotted - # this convert from base => x-coordinate - pos = lambda x: (0.1 + x * bpratio) - ypos = 0.15 + 0.03 - for start, end in pairs: - dist = end - start - - if dist < cutoff: - continue - - dist = min(dist, 10000) - # 10Kb == .25 canvas height - height = 0.25 * dist / 10000 - xstart = pos(start) - xend = pos(end) - p0 = (xstart, ypos) - p1 = (xstart, ypos + height) - p2 = (xend, ypos + height) - p3 = (xend, ypos) - Bezier(root, p0, p1, p2, p3) - - # gaps on the scaffold - fp = open(gapsbedfile) - for row in fp: - b = BedLine(row) - start, end = b.start, b.end - xstart = pos(start) - xend = pos(end) - root.add_patch(Rectangle((xstart, 0.15), xend - xstart, 0.03, fc="w")) - - root.text(0.5, 0.1, scf, color="b", ha="center") - warn_msg = "Only the inserts > {0}bp are shown".format(cutoff) - root.text(0.5, 0.1, scf, color="b", ha="center") - root.text(0.5, 0.05, warn_msg, color="gray", ha="center") - # clean up and output - set_human_base_axis(ax) - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - figname = prefix + ".pdf" - savefig(figname, dpi=300) - - -def generate_plot(filename, rplot="A50.rplot", rpdf="A50.pdf"): - - from jcvi.apps.r import RTemplate - - rplot_template = """ - library(ggplot2) - - data <- read.table("$rplot", header=T, sep="\t") - g <- ggplot(data, aes(x=index, y=cumsize, group=fasta)) - g + geom_line(aes(colour=fasta)) + - xlab("Contigs") + ylab("Cumulative size (Mb)") + - opts(title="A50 plot", legend.position="top") - - ggsave(file="$rpdf") - """ - - rtemplate = RTemplate(rplot_template, locals()) - rtemplate.run() - - -def A50(args): - """ - %prog A50 contigs_A.fasta contigs_B.fasta ... - - Plots A50 graphics, see blog post (http://blog.malde.org/index.php/a50/) - """ - p = OptionParser(A50.__doc__) - p.add_argument( - "--overwrite", - default=False, - action="store_true", - help="overwrite .rplot file if exists", - ) - p.add_argument( - "--cutoff", - default=0, - type=int, - dest="cutoff", - help="use contigs above certain size", - ) - p.add_argument( - "--stepsize", - default=10, - type=int, - dest="stepsize", - help="stepsize for the distribution", - ) - opts, args = p.parse_args(args) - - if not args: - sys.exit(p.print_help()) - - import numpy as np - from jcvi.utils.table import loadtable - - stepsize = opts.stepsize # use stepsize to speed up drawing - rplot = "A50.rplot" - if not op.exists(rplot) or opts.overwrite: - fw = open(rplot, "w") - header = "\t".join(("index", "cumsize", "fasta")) - statsheader = ("Fasta", "L50", "N50", "Min", "Max", "Average", "Sum", "Counts") - statsrows = [] - print(header, file=fw) - for fastafile in args: - f = Fasta(fastafile, index=False) - ctgsizes = [length for k, length in f.itersizes()] - ctgsizes = np.array(ctgsizes) - - a50, l50, n50 = calculate_A50(ctgsizes, cutoff=opts.cutoff) - cmin, cmax, cmean = min(ctgsizes), max(ctgsizes), np.mean(ctgsizes) - csum, counts = np.sum(ctgsizes), len(ctgsizes) - cmean = int(round(cmean)) - statsrows.append((fastafile, l50, n50, cmin, cmax, cmean, csum, counts)) - - logger.debug("`{0}` ctgsizes: {1}".format(fastafile, ctgsizes)) - - tag = "{0} (L50={1})".format(op.basename(fastafile).rsplit(".", 1)[0], l50) - logger.debug(tag) - - for i, s in zip(range(0, len(a50), stepsize), a50[::stepsize]): - print("\t".join((str(i), str(s / 1000000.0), tag)), file=fw) - fw.close() - - table = loadtable(statsheader, statsrows) - print(table, file=sys.stderr) - - generate_plot(rplot) - - -if __name__ == "__main__": - main() diff --git a/jcvi/graphics/base.py b/jcvi/graphics/base.py deleted file mode 100644 index 6b40aa45..00000000 --- a/jcvi/graphics/base.py +++ /dev/null @@ -1,843 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -import copy -import os.path as op -import re -import sys - -from os import remove - -from functools import partial -from typing import Optional, List, Tuple, Union - -import numpy as np -import matplotlib as mpl -import seaborn as sns - -mpl.use("Agg") - -import matplotlib.pyplot as plt -import matplotlib.ticker as ticker - -from brewer2mpl import get_map -from matplotlib import cm, rc, rcParams -from matplotlib.colors import Colormap -from matplotlib.patches import ( - Rectangle, - Polygon, - CirclePolygon, - Ellipse, - PathPatch, - FancyArrow, - FancyArrowPatch, - FancyBboxPatch, -) - -from ..apps.base import datadir, glob, logger, sample_N, which -from ..formats.base import LineFile -from ..utils.cbook import human_size - -Extent = Tuple[float, float, float, float] - -CHARS = { - "&": r"\&", - "%": r"\%", - "$": r"\$", - "#": r"\#", - "_": r"\_", - "{": r"\{", - "}": r"\}", -} - -GRAPHIC_FORMATS = ( - "emf", - "eps", - "pdf", - "png", - "ps", - "raw", - "rgba", - "svg", - "svgz", -) - - -def is_tex_available() -> bool: - """Check if latex command is available""" - return bool(which("latex")) and bool(which("lp")) - - -class ImageOptions(object): - def __init__(self, opts): - self.w, self.h = [int(x) for x in opts.figsize.split("x")] - self.dpi = opts.dpi - self.format = opts.format - self.cmap = mpl.colormaps[opts.cmap] - self.seed = opts.seed - self.usetex = is_tex_available() and not opts.notex - self.opts = opts - - def __str__(self): - return "({0}px x {1}px)".format(self.dpi * self.w, self.dpi * self.h) - - @property - def diverge(self): - colors = get_map(self.opts.diverge, "diverging", 5).mpl_colors - return colors[0], colors[-1] - - -class TextHandler(object): - def __init__(self, fig, usetex: bool = True): - self.heights = [] - try: - self.build_height_array(fig, usetex=usetex) - except ValueError as e: - logger.debug( - "Failed to init heights (error: %s). Variable label sizes skipped.", e - ) - - @classmethod - def get_text_width_height(cls, fig, txt="chr01", size=12, usetex: bool = True): - tp = mpl.textpath.TextPath((0, 0), txt, size=size, usetex=usetex) - bb = tp.get_extents() - xmin, ymin = fig.transFigure.inverted().transform((bb.xmin, bb.ymin)) - xmax, ymax = fig.transFigure.inverted().transform((bb.xmax, bb.ymax)) - return xmax - xmin, ymax - ymin - - def build_height_array(self, fig, start=1, stop=36, usetex: bool = True): - for i in range(start, stop + 1): - w, h = TextHandler.get_text_width_height(fig, size=i, usetex=usetex) - self.heights.append((h, i)) - - def select_fontsize(self, height, minsize=1, maxsize=12): - if not self.heights: - return maxsize if height > 0.01 else minsize - - from bisect import bisect_left - - i = bisect_left(self.heights, (height,)) - size = self.heights[i - 1][1] if i else minsize - size = min(size, maxsize) - return size - - -class AbstractLayout(LineFile): - """ - Simple csv layout file for complex plotting settings. Typically, each line - represents a subplot, a track or a panel. - """ - - def __init__(self, filename): - super().__init__(filename) - - def assign_array(self, attrib, array): - assert len(array) == len(self) - for x, c in zip(self, array): - if not getattr(x, attrib): - setattr(x, attrib, c) - - def assign_colors(self, seed: Optional[int] = None): - number = len(self) - palette = set2_n if number <= 8 else set3_n - # Restrict palette numbers between [3, 12] - palette_number = max(3, min(number, 12)) - colorset = palette(palette_number) - colorset = sample_N(colorset, number, seed=seed) - self.assign_array("color", colorset) - - def assign_markers(self, seed: Optional[int] = None): - markerset = sample_N(mpl.lines.Line2D.filled_markers, len(self), seed=seed) - self.assign_array("marker", markerset) - - def __str__(self): - return "\n".join(str(x) for x in self) - - -def adjust_extent(extent: Extent, root_extent: Extent) -> Extent: - """ - Adjust the extent of the root axes. - """ - rx, ry, rw, rh = root_extent - ex, ey, ew, eh = extent - return rx + ex * rw, ry + ey * rh, ew * rw, eh * rh - - -def linear_blend(from_color, to_color, fraction=0.5): - """Interpolate a new color between two colors. - - https://github.com/PimpTrizkit/PJs/wiki/12.-Shade,-Blend-and-Convert-a-Web-Color-(pSBC.js) - - Args: - from_color (matplotlib color): starting color - to_color (matplotlib color): ending color - fraction (float, optional): Range is 0 (closer to starting color) to 1 - (closer to ending color). Defaults to 0.5. - """ - from matplotlib.colors import to_rgb - - def lerp(v0, v1, t): - # Precise method, which guarantees v = v1 when t = 1 - return (1 - t) * v0 + t * v1 - - r1, g1, b1 = to_rgb(from_color) - r2, g2, b2 = to_rgb(to_color) - return lerp(r1, r2, fraction), lerp(g1, g2, fraction), lerp(b1, b2, fraction) - - -def linear_shade(from_color, fraction=0.5): - """Interpolate a lighter or darker color. - - https://github.com/PimpTrizkit/PJs/wiki/12.-Shade,-Blend-and-Convert-a-Web-Color-(pSBC.js) - - Args: - from_color (matplotlib color): starting color - fraction (float, optional): Range is -1 (darker) to 1 (lighter). Defaults to 0.5. - """ - assert -1 <= fraction <= 1, "Fraction must be between -1 and 1" - if fraction < 0: - return linear_blend("k", from_color, 1 + fraction) - return linear_blend(from_color, "w", fraction) - - -def load_image(filename: str) -> np.ndarray: - """ - Load an image file and return as numpy array. - """ - img = plt.imread(filename) - if len(img.shape) == 2: # Gray-scale image, convert to RGB - # http://www.socouldanyone.com/2013/03/converting-grayscale-to-rgb-with-numpy.html - h, w = img.shape - ret = np.empty((h, w, 3), dtype=np.uint8) - ret[:, :, 2] = ret[:, :, 1] = ret[:, :, 0] = img - img = ret - else: - h, w, _ = img.shape - logger.debug("Image `%s` loaded (%dpx x %dpx).", filename, w, h) - return img - - -def latex(s): - """Latex doesn't work well with certain characters, like '_', in plain text. - These characters would be interpreted as control characters, so we sanitize - these strings. - - Args: - s (str): Input string - - Returns: - str: Output string sanitized - """ - return "".join([CHARS.get(char, char) for char in s]) - - -def shorten(s, maxchar=20, mid="..."): - if len(s) <= maxchar or len(mid) >= maxchar: - return s - pad = (maxchar - len(mid)) // 2 - right_pad = maxchar - len(mid) - pad - return s[:pad] + mid + s[-right_pad:] - - -def set1_n(number=9): - return get_map("Set1", "qualitative", number).hex_colors - - -def set2_n(number=8): - # Get Set2 from ColorBrewer, a set of colors deemed colorblind-safe and - # pleasant to look at by Drs. Cynthia Brewer and Mark Harrower of Pennsylvania - # State University. These colors look lovely together, and are less - # saturated than those colors in Set1. - return get_map("Set2", "qualitative", number).hex_colors - - -def set3_n(number=12): - return get_map("Set3", "qualitative", number).hex_colors - - -def paired_n(number=12): - """See also: https://colorbrewer2.org/#type=qualitative&scheme=Paired&n=12""" - return get_map("Paired", "qualitative", number).hex_colors - - -set1, set2, set3, paired = set1_n(), set2_n(), set3_n(), paired_n() - - -def prettyplot(): - reds = copy.copy(mpl.cm.Reds) - reds.set_bad("white") - reds.set_under("white") - - blues_r = copy.copy(mpl.cm.Blues_r) - blues_r.set_bad("white") - blues_r.set_under("white") - - # Need to 'reverse' red to blue so that blue=cold=small numbers, - # and red=hot=large numbers with '_r' suffix - blue_red = get_map("RdBu", "diverging", 11, reverse=True).mpl_colormap - green_purple = get_map("PRGn", "diverging", 11).mpl_colormap - red_purple = get_map("RdPu", "sequential", 9).mpl_colormap - - return blues_r, reds, blue_red, green_purple, red_purple - - -blues_r, reds, blue_red, green_purple, red_purple = prettyplot() - - -def normalize_axes(*axes): - """ - Normalize the axes to have the same scale. - """ - for ax in axes: - ax.set_xlim(0, 1) - ax.set_ylim(0, 1) - ax.set_axis_off() - - -def panel_labels(ax, labels, size: int = 16): - """ - Add panel labels (A, B, ...) to a figure. - """ - for xx, yy, panel_label in labels: - if rcParams["text.usetex"]: - panel_label = r"$\textbf{{{0}}}$".format(panel_label) - ax.text(xx, yy, panel_label, size=size, ha="center", va="center") - - -def update_figname(figname: str, format: str) -> str: - """Update the name of a figure to include the format. - - Args: - figname (str): Path to the figure - format (str): Figure format, must be one of GRAPHIC_FORMATS - - Returns: - str: New file path - """ - _, ext = op.splitext(figname) - if ext.strip(".") in GRAPHIC_FORMATS: # User suffix has precedence - return figname - # When the user has not supplied a format in the filename, use the requested format - assert format in GRAPHIC_FORMATS, "Invalid format" - return figname + "." + format - - -def savefig(figname, dpi=150, iopts=None, cleanup=True): - try: - format = figname.rsplit(".", 1)[-1].lower() - except: - format = "pdf" - try: - logger.debug("Matplotlib backend is: %s", mpl.get_backend()) - plt.savefig(figname, dpi=dpi, format=format) - except Exception as e: - logger.error("savefig failed with message:\n%s", e) - logger.info("Try running again with --notex option to disable latex.") - if op.exists(figname): - if op.getsize(figname) < 1000: - logger.debug("Cleaning up empty file: %s", figname) - remove(figname) - sys.exit(1) - - msg = f"Figure saved to `{figname}`" - if iopts: - msg += f" {iopts}" - logger.debug(msg) - - if cleanup: - plt.rcdefaults() - - -# human readable size (Kb, Mb, Gb) -def human_readable(x: Union[str, int], _, base=False): - x = str(int(x)) - if x.endswith("000000000"): - x = x[:-9] + "G" - elif x.endswith("000000"): - x = x[:-6] + "M" - elif x.endswith("000"): - x = x[:-3] + "K" - if base and x[-1] in "MKG": - x += "b" - return x - - -human_readable_base = partial(human_readable, base=True) -human_formatter = ticker.FuncFormatter(human_readable) -human_base_formatter = ticker.FuncFormatter(human_readable_base) -mb_formatter = ticker.FuncFormatter(lambda x, pos: "{0}M".format(int(x / 1000000))) -mb_float_formatter = ticker.FuncFormatter( - lambda x, pos: "{0:.1f}M".format(x / 1000000.0) -) -kb_formatter = ticker.FuncFormatter(lambda x, pos: "{0}K".format(int(x / 1000))) - - -def set_human_axis(ax, formatter=human_formatter): - ax.xaxis.set_major_formatter(formatter) - ax.yaxis.set_major_formatter(formatter) - - -set_human_base_axis = partial(set_human_axis, formatter=human_base_formatter) - - -def set_helvetica_axis(ax): - xtick_locs = ax.get_xticks().tolist() - ytick_locs = ax.get_yticks().tolist() - # If we dont do the following, we have - # UserWarning: FixedFormatter should only be used together with FixedLocator - ax.xaxis.set_major_locator(mpl.ticker.FixedLocator(xtick_locs)) - ax.yaxis.set_major_locator(mpl.ticker.FixedLocator(ytick_locs)) - ax.set_xticklabels([int(x) for x in xtick_locs], family="Helvetica") - ax.set_yticklabels([int(x) for x in ytick_locs], family="Helvetica") - - -available_fonts = [op.basename(x) for x in glob(datadir + "/*.ttf")] - - -def fontprop(ax, name, size=12): - assert name in available_fonts, "Font must be one of {0}.".format(available_fonts) - - import matplotlib.font_manager as fm - - fname = op.join(datadir, name) - prop = fm.FontProperties(fname=fname, size=size) - - logger.debug("Set font to `%s` (`%s`)", name, prop.get_file()) - for text in ax.texts: - text.set_fontproperties(prop) - - return prop - - -def markup(s: str): - """ - Change the string to latex format, and italicize the text between *. - """ - if not rcParams["text.usetex"]: - return s - if "$" in s: - return s - s = latex(s) - s = re.sub(r"\*(.*)\*", r"\\textit{\1}", s) - return s - - -def append_percentage(s): - # The percent symbol needs escaping in latex - if rcParams["text.usetex"]: - return s + r"$\%$" - else: - return s + "%" - - -def setup_theme( - context="notebook", - style="darkgrid", - palette="deep", - font="Helvetica", - usetex: bool = True, -): - try: - extra_rc = { - "lines.linewidth": 1, - "lines.markeredgewidth": 1, - "patch.edgecolor": "k", - } - sns.set_theme(context=context, style=style, palette=palette, rc=extra_rc) - except (ImportError, SyntaxError): - pass - - if usetex: - rc("text", usetex=True) - else: - logger.info("Set text.usetex=%s. Font styles may be inconsistent.", usetex) - rc("text", usetex=False) - - if font == "Helvetica": - rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"]}) - elif font == "Palatino": - rc("font", **{"family": "serif", "serif": ["Palatino"]}) - elif font == "Schoolbook": - rc("font", **{"family": "serif", "serif": ["Century Schoolbook L"]}) - - -def asciiaxis(x, digit=1): - if isinstance(x, int): - x = str(x) - elif isinstance(x, float): - x = "{0:.{1}f}".format(x, digit) - elif isinstance(x, np.int64): - x = str(x) - elif isinstance(x, np.ndarray): - assert len(x) == 2 - x = str(x).replace("]", ")") # upper bound not inclusive - - return x - - -def asciiplot(x, y, digit=1, width=50, title=None, char="="): - """ - Print out a horizontal plot using ASCII chars. - width is the textwidth (height) of the plot. - """ - ax = np.array(x) - ay = np.array(y) - - if title: - print("[bold white]".format(title), file=sys.stderr) - - az = ay * width // ay.max() - tx = [asciiaxis(x, digit=digit) for x in ax] - rjust = max([len(x) for x in tx]) + 1 - - for x, y, z in zip(tx, ay, az): - x = x.rjust(rjust) - y = y or "" - z = "[green]{}".format(char * z) - print("{} | {} {}".format(x, z, y), file=sys.stderr) - - -def print_colors(palette, outfile="Palette.png"): - """ - print color palette (a tuple) to a PNG file for quick check - """ - fig = plt.figure() - ax = fig.add_subplot(111) - - xmax = 20 * (len(palette) + 1) - x1s = np.arange(0, xmax, 20) - xintervals = [10] * len(palette) - xx = zip(x1s, xintervals) - ax.broken_barh(xx, (5, 10), facecolors=palette) - - ax.set_ylim(0, 20) - ax.set_xlim(0, xmax) - ax.set_axis_off() - - savefig(outfile) - - -def plot_heatmap( - ax, - M: np.ndarray, - breaks: List[int], - groups: List[Tuple[int, int, List[Tuple[int, str]], str]] = [], - plot_breaks: bool = False, - cmap: Optional[Union[str, Colormap]] = None, - binsize: Optional[int] = None, -): - """Plot heatmap illustrating the contact probabilities in Hi-C data. - - Args: - ax (pyplot.axes): Matplotlib axis - M (np.array): 2D numpy-array - breaks (List[int]): Positions of chromosome starts. Can be None. - iopts (OptionParser options): Graphical options passed in from commandline - groups (List, optional): [(start, end, [(position, seqid)], color)]. Defaults to []. - plot_breaks (bool): Whether to plot white breaks. Defaults to False. - cmap (str | Colormap, optional): Colormap. Defaults to None, which uses cubehelix. - binsize (int, optional): Resolution of the heatmap. - """ - cmap = cmap or sns.cubehelix_palette(rot=0.5, as_cmap=True) - ax.imshow(M, cmap=cmap, interpolation="none") - _, xmax = ax.get_xlim() - xlim = (0, xmax) - if plot_breaks: - for b in breaks[:-1]: - ax.plot([b, b], xlim, "w-") - ax.plot(xlim, [b, b], "w-") - - def simplify_seqid(seqid): - seqid = seqid.replace("_", "") - if seqid[:3].lower() == "chr": - seqid = seqid[3:] - return seqid.lstrip("0") - - for start, end, position_seqids, color in groups: - # Plot a square - ax.plot([start, start], [start, end], "-", color=color) - ax.plot([start, end], [start, start], "-", color=color) - ax.plot([start, end], [end, end], "-", color=color) - ax.plot([end, end], [start, end], "-", color=color) - for position, seqid in position_seqids: - seqid = simplify_seqid(seqid) - ax.text(position, end, seqid, ha="center", va="top") - - ax.set_xlim(xlim) - ax.set_ylim((xlim[1], xlim[0])) # Flip the y-axis so the origin is at the top - ax.set_xticklabels(ax.get_xticks(), family="Helvetica", color="gray") - ax.set_yticklabels(ax.get_yticks(), family="Helvetica", color="gray", rotation=90) - ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True) - if binsize is not None: - formatter = ticker.FuncFormatter( - lambda x, pos: human_readable(int(x) * binsize, pos, base=True) - ) - ax.xaxis.set_major_formatter(formatter) - ax.yaxis.set_major_formatter(formatter) - title = f"Resolution = {human_size(binsize, precision=0)} per bin" - ax.set_xlabel(title) - - -def discrete_rainbow(N=7, cmap=cm.Set1, usepreset=True, shuffle=False, plot=False): - """ - Return a discrete colormap and the set of colors. - - modified from - - - cmap: colormap instance, eg. cm.jet. - N: Number of colors. - - Example - >>> x = resize(arange(100), (5,100)) - >>> djet = cmap_discretize(cm.jet, 5) - >>> imshow(x, cmap=djet) - - See available matplotlib colormaps at: - - - If N>20 the sampled colors might not be very distinctive. - If you want to error and try anyway, set usepreset=False - """ - import random - from scipy import interpolate - - if usepreset: - if 0 < N <= 5: - cmap = cm.gist_rainbow - elif N <= 20: - cmap = cm.Set1 - else: - sys.exit(discrete_rainbow.__doc__) - - cdict = cmap._segmentdata.copy() - # N colors - colors_i = np.linspace(0, 1.0, N) - # N+1 indices - indices = np.linspace(0, 1.0, N + 1) - rgbs = [] - for key in ("red", "green", "blue"): - # Find the N colors - D = np.array(cdict[key]) - I = interpolate.interp1d(D[:, 0], D[:, 1]) - colors = I(colors_i) - rgbs.append(colors) - # Place these colors at the correct indices. - A = np.zeros((N + 1, 3), float) - A[:, 0] = indices - A[1:, 1] = colors - A[:-1, 2] = colors - # Create a tuple for the dictionary. - L = [] - for l in A: - L.append(tuple(l)) - cdict[key] = tuple(L) - - palette = zip(*rgbs) - - if shuffle: - random.shuffle(palette) - - if plot: - print_colors(palette) - - # Return (colormap object, RGB tuples) - return mpl.colors.LinearSegmentedColormap("colormap", cdict, 1024), palette - - -def get_intensity(octal): - from math import sqrt - - r, g, b = octal[1:3], octal[3:5], octal[5:] - r, g, b = int(r, 16), int(g, 16), int(b, 16) - intensity = sqrt((r * r + g * g + b * b) / 3) - return intensity - - -def adjust_spines(ax, spines, outward=False, color="lightslategray"): - # Modified from - for loc, spine in ax.spines.items(): - if loc in spines: - if outward: - spine.set_position(("outward", 8)) # outward by 10 points - spine.set_color(color) - else: - spine.set_color("none") # don't draw spine - - if "left" in spines: - ax.yaxis.set_ticks_position("left") - else: - ax.yaxis.set_ticks_position("right") - - if "bottom" in spines: - ax.xaxis.set_ticks_position("bottom") - else: - ax.xaxis.set_ticks_position("top") - - # Change tick styles directly - ax.tick_params(color=color) - set_helvetica_axis(ax) - - -def set_ticklabels_helvetica(ax, xcast=int, ycast=int): - xticklabels = [xcast(x) for x in ax.get_xticks()] - yticklabels = [ycast(x) for x in ax.get_yticks()] - ax.set_xticklabels(xticklabels, family="Helvetica") - ax.set_yticklabels(yticklabels, family="Helvetica") - - -def draw_cmap(ax, cmap_text, vmin, vmax, cmap=None, reverse=False): - # Draw a horizontal colormap at bottom-right corder of the canvas - Y = np.outer(np.ones(10), np.arange(0, 1, 0.01)) - if reverse: - Y = Y[::-1] - xmin, xmax = 0.6, 0.9 - ymin, ymax = 0.02, 0.04 - ax.imshow(Y, extent=(xmin, xmax, ymin, ymax), cmap=cmap) - ax.text( - xmin - 0.01, - (ymin + ymax) * 0.5, - markup(cmap_text), - ha="right", - va="center", - size=10, - ) - vmiddle = (vmin + vmax) * 0.5 - xmiddle = (xmin + xmax) * 0.5 - for x, v in zip((xmin, xmiddle, xmax), (vmin, vmiddle, vmax)): - ax.text(x, ymin - 0.005, "%.1f" % v, ha="center", va="top", size=10) - - -def write_messages(ax, messages: List[str], ypad: float = 0.04): - """ - Write text on canvas, usually on the top right corner. - """ - tc = "gray" - axt = ax.transAxes - yy = 0.95 - for msg in messages: - ax.text(0.95, yy, markup(msg), color=tc, transform=axt, ha="right") - yy -= ypad - - -def quickplot_ax( - ax, - data, - xmin, - xmax, - xlabel, - title=None, - ylabel="Counts", - counts=True, - percentage=True, - highlight=None, -): - # TODO: redundant with quickplot(), need to be refactored. - if percentage: - total_length = sum(data.values()) - data = dict((k, v * 100.0 / total_length) for (k, v) in data.items()) - - left, height = zip(*sorted(data.items())) - pad = max(height) * 0.01 - c1, c2 = "darkslategray", "tomato" - if counts: - for l, h in zip(left, height): - if xmax and l > xmax: - break - tag = str(int(h)) - rotation = 90 - if percentage: - tag = append_percentage(tag) if int(tag) > 0 else "" - rotation = 0 - color = c1 - if highlight is not None and l in highlight: - color = c2 - ax.text( - l, - h + pad, - tag, - color=color, - size=8, - ha="center", - va="bottom", - rotation=rotation, - ) - if xmax is None: - xmax = max(left) - - ax.bar(left, height, align="center", fc=c1) - if highlight: - for h in highlight: - ax.bar([h], [data[h]], align="center", ec=c2, fc=c2) - - ax.set_xlabel(markup(xlabel)) - if ylabel: - ax.set_ylabel(markup(ylabel)) - if title: - ax.set_title(markup(title)) - ax.set_xlim((xmin - 0.5, xmax + 0.5)) - if percentage: - ax.set_ylim(0, 100) - - -def quickplot( - data, - xmin, - xmax, - xlabel, - title, - ylabel="Counts", - figname="plot.pdf", - counts=True, - print_stats=True, -): - """ - Simple plotting function - given a dictionary of data, produce a bar plot - with the counts shown on the plot. - """ - plt.figure(1, (6, 6)) - left, height = zip(*sorted(data.items())) - pad = max(height) * 0.01 - if counts: - for l, h in zip(left, height): - if xmax and l > xmax: - break - plt.text( - l, - h + pad, - str(h), - color="darkslategray", - size=8, - ha="center", - va="bottom", - rotation=90, - ) - if xmax is None: - xmax = max(left) - - plt.bar(left, height, align="center") - plt.xlabel(markup(xlabel)) - plt.ylabel(markup(ylabel)) - plt.title(markup(title)) - plt.xlim((xmin - 0.5, xmax + 0.5)) - - # Basic statistics - messages = [] - counts_over_xmax = sum([v for k, v in data.items() if k > xmax]) - if counts_over_xmax: - messages += ["Counts over xmax({0}): {1}".format(xmax, counts_over_xmax)] - kk = [] - for k, v in data.items(): - kk += [k] * v - messages += ["Total: {0}".format(np.sum(height))] - messages += ["Maximum: {0}".format(np.max(kk))] - messages += ["Minimum: {0}".format(np.min(kk))] - messages += ["Average: {0:.2f}".format(np.mean(kk))] - messages += ["Median: {0}".format(np.median(kk))] - ax = plt.gca() - if print_stats: - write_messages(ax, messages) - - set_human_axis(ax) - set_ticklabels_helvetica(ax) - savefig(figname) diff --git a/jcvi/graphics/blastplot.py b/jcvi/graphics/blastplot.py deleted file mode 100755 index 998278fb..00000000 --- a/jcvi/graphics/blastplot.py +++ /dev/null @@ -1,345 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog blastfile --qsizes query.sizes --ssizes subject.sizes - -Visualize the blastfile in a dotplot. At least one of --qsizes and --qbed must -be specified, also at least one of --ssizes and --sbed. The --sizes options help -to define the molecule border as well as the drawing order. The --bed options -help to position names maker (e.g. genes) onto the dot plot. So depending on -whether you are BLASTing raw sequences or makers, you need to place --sizes or ---bed options. -""" - -import os.path as op -import sys - -from random import sample - -import numpy as np - -from ..apps.base import OptionParser, logger -from ..formats.base import is_number -from ..formats.bed import Bed, BedLine -from ..formats.blast import BlastLine -from ..formats.sizes import Sizes - -from .base import Rectangle, plt, savefig, set_human_base_axis - - -DotStyles = ("line", "circle", "dot") - - -def rename_seqid(seqid): - seqid = seqid.split("_")[-1] - seqid = seqid.replace("supercont", "s") - seqid = seqid.replace("contig", "c").replace("scaffold", "s") - return "c{}".format(int(seqid)) if is_number(seqid, int) else seqid - - -def blastplot( - ax, - blastfile, - qsizes, - ssizes, - qbed, - sbed, - style="dot", - sampleN=None, - baseticks=False, - insetLabels=False, - stripNames=False, - highlights=None, -): - - assert style in DotStyles - fp = open(blastfile) - - qorder = qbed.order if qbed else None - sorder = sbed.order if sbed else None - - data = [] - - for row in fp: - b = BlastLine(row) - query, subject = b.query, b.subject - - if stripNames: - query = query.rsplit(".", 1)[0] - subject = subject.rsplit(".", 1)[0] - - if qorder: - if query not in qorder: - continue - qi, q = qorder[query] - query = q.seqid - qstart, qend = q.start, q.end - else: - qstart, qend = b.qstart, b.qstop - - if sorder: - if subject not in sorder: - continue - si, s = sorder[subject] - subject = s.seqid - sstart, send = s.start, s.end - else: - sstart, send = b.sstart, b.sstop - - qi = qsizes.get_position(query, qstart) - qj = qsizes.get_position(query, qend) - si = ssizes.get_position(subject, sstart) - sj = ssizes.get_position(subject, send) - - if None in (qi, si): - continue - data.append(((qi, qj), (si, sj))) - - if sampleN: - if len(data) > sampleN: - data = sample(data, sampleN) - - if not data: - return logger.error("no blast data imported") - - xsize, ysize = qsizes.totalsize, ssizes.totalsize - logger.debug("xsize=%d ysize=%d" % (xsize, ysize)) - - if style == "line": - for a, b in data: - ax.plot(a, b, "ro-", mfc="w", mec="r", ms=3) - else: - data = [(x[0], y[0]) for x, y in data] - x, y = zip(*data) - - if style == "circle": - ax.plot(x, y, "mo", mfc="w", mec="m", ms=3) - elif style == "dot": - ax.scatter(x, y, s=3, lw=0) - - xlim = (0, xsize) - ylim = (ysize, 0) # invert the y-axis - - xchr_labels, ychr_labels = [], [] - ignore = True # tag to mark whether to plot chr name (skip small ones) - ignore_size_x = ignore_size_y = 0 - - # plot the chromosome breaks - logger.debug("xbreaks={0} ybreaks={1}".format(len(qsizes), len(ssizes))) - for seqid, beg, end in qsizes.get_breaks(): - ignore = abs(end - beg) < ignore_size_x - if ignore: - continue - seqid = rename_seqid(seqid) - - xchr_labels.append((seqid, (beg + end) / 2, ignore)) - ax.plot([end, end], ylim, "-", lw=1, color="grey") - - for seqid, beg, end in ssizes.get_breaks(): - ignore = abs(end - beg) < ignore_size_y - if ignore: - continue - seqid = rename_seqid(seqid) - - ychr_labels.append((seqid, (beg + end) / 2, ignore)) - ax.plot(xlim, [end, end], "-", lw=1, color="grey") - - # plot the chromosome labels - for label, pos, ignore in xchr_labels: - if not ignore: - if insetLabels: - ax.text(pos, 0, label, size=8, ha="center", va="top", color="grey") - else: - pos = 0.1 + pos * 0.8 / xsize - root.text( - pos, - 0.91, - label, - size=10, - ha="center", - va="bottom", - rotation=45, - color="grey", - ) - - # remember y labels are inverted - for label, pos, ignore in ychr_labels: - if not ignore: - if insetLabels: - continue - pos = 0.9 - pos * 0.8 / ysize - root.text(0.91, pos, label, size=10, va="center", color="grey") - - # Highlight regions based on a list of BedLine - qhighlights = shighlights = None - if highlights: - if isinstance(highlights[0], BedLine): - shighlights = highlights - elif len(highlights) == 2: - qhighlights, shighlights = highlights - - if qhighlights: - for hl in qhighlights: - hls = qsizes.get_position(hl.seqid, hl.start) - ax.add_patch(Rectangle((hls, 0), hl.span, ysize, fc="r", alpha=0.2, lw=0)) - if shighlights: - for hl in shighlights: - hls = ssizes.get_position(hl.seqid, hl.start) - ax.add_patch(Rectangle((0, hls), xsize, hl.span, fc="r", alpha=0.2, lw=0)) - - if baseticks: - - def increaseDensity(a, ratio=4): - assert len(a) > 1 - stepsize = a[1] - a[0] - newstepsize = int(stepsize / ratio) - return np.arange(0, a[-1], newstepsize) - - # Increase the density of the ticks - xticks = ax.get_xticks() - yticks = ax.get_yticks() - xticks = increaseDensity(xticks, ratio=2) - yticks = increaseDensity(yticks, ratio=2) - ax.set_xticks(xticks) - - # Plot outward ticklines - for pos in xticks[1:]: - if pos > xsize: - continue - pos = 0.1 + pos * 0.8 / xsize - root.plot((pos, pos), (0.08, 0.1), "-", color="grey", lw=2) - - for pos in yticks[1:]: - if pos > ysize: - continue - pos = 0.9 - pos * 0.8 / ysize - root.plot((0.09, 0.1), (pos, pos), "-", color="grey", lw=2) - - ax.set_xlim(xlim) - ax.set_ylim(ylim) - - # beautify the numeric axis - for tick in ax.get_xticklines() + ax.get_yticklines(): - tick.set_visible(False) - - set_human_base_axis(ax) - - plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="gray", size=10) - plt.setp(ax.get_yticklabels(), rotation=90) - - -if __name__ == "__main__": - - from jcvi.formats.bed import sizes - - p = OptionParser(__doc__) - p.add_argument("--qsizes", help="Path to two column qsizes file") - p.add_argument("--ssizes", help="Path to two column ssizes file") - p.add_argument("--qbed", help="Path to qbed") - p.add_argument("--sbed", help="Path to sbed") - p.add_argument( - "--qselect", - default=0, - type=int, - help="Minimum size of query contigs to select", - ) - p.add_argument( - "--sselect", - default=0, - type=int, - help="Minimum size of subject contigs to select", - ) - p.add_argument("--qh", help="Path to highlight bed for query") - p.add_argument("--sh", help="Path to highlight bed for subject") - p.add_argument( - "--dotstyle", - default="dot", - choices=DotStyles, - help="Style of the dots", - ) - p.add_argument( - "--proportional", - default=False, - action="store_true", - help="Make image width:height equal to seq ratio", - ) - p.add_argument( - "--stripNames", - default=False, - action="store_true", - help="Remove trailing .? from gene names", - ) - p.add_argument( - "--nmax", - default=None, - type=int, - help="Only plot maximum of N dots", - ) - opts, args, iopts = p.set_image_options(figsize="8x8", style="dark", dpi=150) - - qsizes, ssizes = opts.qsizes, opts.ssizes - qbed, sbed = opts.qbed, opts.sbed - proportional = opts.proportional - - if len(args) != 1: - sys.exit(not p.print_help()) - - if qbed: - qsizes = qsizes or sizes([qbed]) - qbed = Bed(qbed) - if sbed: - ssizes = ssizes or sizes([sbed]) - sbed = Bed(sbed) - - assert qsizes and ssizes, "You must specify at least one of --sizes of --bed" - - qsizes = Sizes(qsizes, select=opts.qselect) - ssizes = Sizes(ssizes, select=opts.sselect) - - (blastfile,) = args - - image_name = op.splitext(blastfile)[0] + "." + opts.format - plt.rcParams["xtick.major.pad"] = 16 - plt.rcParams["ytick.major.pad"] = 16 - - # Fix the width - xsize, ysize = qsizes.totalsize, ssizes.totalsize - - # get highlight beds - qh, sh = opts.qh, opts.sh - qh = Bed(qh) if qh else None - sh = Bed(sh) if sh else None - highlights = (qh, sh) if qh or sh else None - - ratio = ysize * 1.0 / xsize if proportional else 1 - width = iopts.w - height = iopts.h * ratio - fig = plt.figure(1, (width, height)) - root = fig.add_axes([0, 0, 1, 1]) # the whole canvas - ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # the dot plot - - blastplot( - ax, - blastfile, - qsizes, - ssizes, - qbed, - sbed, - style=opts.dotstyle, - sampleN=opts.nmax, - baseticks=True, - stripNames=opts.stripNames, - highlights=highlights, - ) - - # add genome names - to_ax_label = lambda fname: op.basename(fname).split(".")[0] - gx, gy = [to_ax_label(x.filename) for x in (qsizes, ssizes)] - ax.set_xlabel(gx, size=16) - ax.set_ylabel(gy, size=16) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - savefig(image_name, dpi=iopts.dpi, iopts=iopts) diff --git a/jcvi/graphics/chromosome.py b/jcvi/graphics/chromosome.py deleted file mode 100644 index 6aa6fada..00000000 --- a/jcvi/graphics/chromosome.py +++ /dev/null @@ -1,730 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Legacy script to plot distribution of certain classes onto chromosomes. Adapted -from the script used in the Tang et al. PNAS 2010 paper, sigma figure. -""" -import sys - -from itertools import groupby -from math import ceil -from typing import Optional, Tuple - -import numpy as np - -from natsort import natsorted - -from ..apps.base import OptionParser, datafile, logger, sample_N -from ..formats.base import DictFile, get_number -from ..formats.bed import Bed -from ..formats.sizes import Sizes - -from .base import ( - CirclePolygon, - Polygon, - Rectangle, - latex, - markup, - normalize_axes, - plt, - savefig, - set1_n, - set3_n, -) -from .glyph import BaseGlyph, plot_cap - - -class Chromosome(BaseGlyph): - # Chromosome styles: rect - rectangle, roundrect - rounded rectangle, auto - - # automatically pick the best style - Styles = ("auto", "rect", "roundrect") - - def __init__( - self, - ax, - x, - y1, - y2, - width=0.015, - ec="k", - patch=None, - patchcolor="lightgrey", - lw=1, - zorder=2, - ): - """ - Chromosome with positions given in (x, y1) => (x, y2) - - The chromosome can also be patched, e.g. to show scaffold composition in - alternating shades. Use a list of starting locations to segment. - """ - y1, y2 = sorted((y1, y2)) - super().__init__(ax) - pts, r = self.get_pts(x, y1, y2, width) - self.append(Polygon(pts, fill=False, lw=lw, ec=ec, zorder=zorder)) - if patch: - rr = r * 0.9 # Shrink a bit for the patches - # First patch is colored if there is an even number of patches, otherwise not colored - start = len(patch) % 2 - for i in range(start, len(patch), 2): - if i + 1 > len(patch) - 1: - continue - p1, p2 = patch[i], patch[i + 1] - self.append( - Rectangle((x - rr, p1), 2 * rr, p2 - p1, lw=0, fc=patchcolor) - ) - - self.add_patches() - - def get_pts(self, x, y1, y2, width): - w = width / 2 - r = width / (3**0.5) - - pts = [] - pts += plot_cap((x, y1 + r), np.radians(range(210, 330)), r) - pts += [[x + w, y1 + r / 2], [x + w, y2 - r / 2]] - pts += plot_cap((x, y2 - r), np.radians(range(30, 150)), r) - pts += [[x - w, y2 - r / 2], [x - w, y1 + r / 2]] - - return pts, r - - -class HorizontalChromosome(BaseGlyph): - def __init__( - self, - ax, - x1, - x2, - y, - height=0.015, - ec="k", - patch=None, - patchcolor="lightgrey", - lw=1, - fc=None, - zorder=2, - style="auto", - ): - """ - Horizontal version of the Chromosome glyph above. - """ - assert style in Chromosome.Styles, f"Unknown style `{style}`" - - x1, x2 = sorted((x1, x2)) - super().__init__(ax) - pts, r = self.get_pts(x1, x2, y, height, style=style) - self.append(Polygon(pts, fill=False, lw=lw, ec=ec, zorder=zorder + 1)) - - if fc: - pts, r = self.get_pts(x1, x2, y, height / 2, style=style) - self.append(Polygon(pts, fc=fc, lw=0, zorder=zorder)) - if patch: - rr = r * 0.9 # Shrink a bit for the patches - # First patch is colored if there is an even number of patches, otherwise not colored - start = len(patch) % 2 - for i in range(start, len(patch), 2): - if i + 1 > len(patch) - 1: - continue - p1, p2 = patch[i], patch[i + 1] - self.append( - Rectangle((p1, y - rr), p2 - p1, 2 * rr, lw=0, fc=patchcolor) - ) - - self.add_patches() - - def get_pts(self, x1, x2, y, height, style="auto") -> Tuple[list, float]: - h = height / 2 - r = height / (3**0.5) - - if style == "rect" or ( - style == "auto" and x2 - x1 < 2 * height - ): # rectangle for small chromosomes - return [[x1, y + h], [x1, y - h], [x2, y - h], [x2, y + h]], r - - pts = [] - pts += plot_cap((x1 + r, y), np.radians(range(120, 240)), r) - pts += [[x1 + r / 2, y - h], [x2 - r / 2, y - h]] - pts += plot_cap((x2 - r, y), np.radians(range(-60, 60)), r) - pts += [[x2 - r / 2, y + h], [x1 + r / 2, y + h]] - - return pts, r - - -class ChromosomeWithCentromere(object): - def __init__(self, ax, x, y1, y2, y3, width=0.015, fc="k", fill=False, zorder=2): - """ - Chromosome with centromeres at y2 position - """ - pts = [] - r = width * 0.5 - pts += plot_cap((x, y1 - r), np.radians(range(180)), r) - pts += [[x - r, y1 - r], [x - r, y2 + r]] - pts += plot_cap((x, y2 + r), np.radians(range(180, 360)), r) - pts += [[x + r, y2 + r], [x + r, y1 - r]] - ax.add_patch(Polygon(pts, fc=fc, fill=fill, zorder=zorder)) - pts = [] - pts += plot_cap((x, y2 - r), np.radians(range(180)), r) - pts += [[x - r, y2 - r], [x - r, y3 + r]] - pts += plot_cap((x, y3 + r), np.radians(range(180, 360)), r) - pts += [[x + r, y3 + r], [x + r, y2 - r]] - ax.add_patch(Polygon(pts, fc=fc, fill=fill, zorder=zorder)) - ax.add_patch( - CirclePolygon((x, y2), radius=r * 0.5, fc="k", ec="k", zorder=zorder) - ) - - -class ChromosomeMap(object): - """ - Line plots along the chromosome. - """ - - def __init__( - self, - fig, - root, - xstart, - xend, - ystart, - yend, - pad, - ymin, - ymax, - bins, - title, - subtitle, - patchstart=None, - ): - width, height = xend - xstart, yend - ystart - - y = ystart - pad - HorizontalChromosome(root, xstart, xend, y, patch=patchstart, height=0.03) - - # Gauge - lsg = "lightslategrey" - root.plot( - [xstart - pad, xstart - pad], [ystart, ystart + height], lw=2, color=lsg - ) - root.plot([xend + pad, xend + pad], [ystart, ystart + height], lw=2, color=lsg) - root.text( - (xstart + xend) / 2, - ystart + height + 2 * pad, - title, - ha="center", - va="center", - color=lsg, - ) - - iv = (ymax - ymin) / bins - iv_height = height / bins - val = ymin - yy = ystart - while val <= ymax: - root.text(xstart - 2 * pad, yy, str(val), ha="right", va="center", size=10) - val += iv - yy += iv_height - - root.text( - (xstart + xend) / 2, y - 0.05, subtitle, ha="center", va="center", color=lsg - ) - - self.axes = fig.add_axes([xstart, ystart, width, height]) - - -class GeneticMap(BaseGlyph): - def __init__( - self, ax, x, y1, y2, markers, unit="cM", tip=0.008, fc="k", flip=False - ): - super().__init__(ax) - # tip = length of the ticks - y1, y2 = sorted((y1, y2)) - ax.plot([x, x], [y1, y2], "-", color=fc, lw=2) - _, max_chr_len = max(markers, key=lambda x: x[-1]) - r = y2 - y1 - ratio = r / max_chr_len - marker_pos = {} - for marker_name, cm in markers: - yy = (y1 + ratio * cm) if flip else (y2 - ratio * cm) - ax.plot((x - tip, x + tip), (yy, yy), "-", color=fc) - marker_pos[marker_name] = yy - self.marker_pos = marker_pos - - t = tip / 2 - end_cm_labels = ( - ((y2 + t, max_chr_len, "bottom"), (y1 - t, 0, "top")) - if flip - else ((y2 + t, 0, "bottom"), (y1 - t, max_chr_len, "top")) - ) - for yy, cm, va in end_cm_labels: - label = "{0} {1}".format(int(cm), unit) - ax.text(x, yy, label, color="gray", va=va, ha="center") - - -class Gauge(BaseGlyph): - def __init__( - self, - ax, - x, - y1, - y2, - max_chr_len, - step=1e6, - tip=0.008, - extra=0.006, - fc="lightslategray", - ): - """ - Args: - ax (matplotlib.Axes): axes - x (float): x position - y1 (float): y start position - y2 (float): y end position - max_chr_len (int): maximum chromosome size - step (int): step to show the ticks - tip (float): length of the ticks - extra (float): offset for the unit label - fc (str): face color of the glyph - """ - super().__init__(ax) - ax.plot([x, x], [y1, y2], "-", color=fc, lw=2) - r = y2 - y1 - yy = y2 - gauge = int(ceil(max_chr_len / step)) - ratio = r / max_chr_len - yinterval = 2 * ratio * step - for g in range(0, gauge, 2): - if g % 10: - ax.plot((x, x + tip), (yy, yy), "-", color=fc) - else: - ax.plot((x - tip, x + tip), (yy, yy), "-", color=fc, lw=2) - ax.text(x + tip + extra, yy, g, color="gray", va="center") - yy -= yinterval - ax.text(x, yy - 0.03, "Mb", color="gray", va="center") - - -def canvas2px(coord, dmn, dpi): - """ - Convert matplotlib canvas coordinate to pixels - """ - return int(round(coord * dmn * dpi)) - - -def write_ImageMapLine(tlx, tly, brx, bry, w, h, dpi, chr, segment_start, segment_end): - """ - Write out an image map area line with the coordinates passed to this - function - - """ - tlx, brx = [canvas2px(x, w, dpi) for x in (tlx, brx)] - tly, bry = [canvas2px(y, h, dpi) for y in (tly, bry)] - chr, _ = chr.split(":") - return ( - '" - ) - - -def get_hg38_chromsizes(filename=datafile("hg38.chrom.sizes")): - chromsizes = DictFile(filename) - chromsizes = dict((k, int(v)) for k, v in chromsizes.items()) - return chromsizes - - -def get_color(tag): - if "neg" in tag: - return "w", 1 - if "acen" in tag: - return "r", 1 - try: - alpha = int(tag[4:]) * 1.0 / 100 - except: - return "w", 1 - return "k", alpha - - -def draw_cytoband( - ax, chrom, filename=datafile("hg38.band.txt"), ymid=0.5, width=0.99, height=0.11 -): - import pandas as pd - - bands = pd.read_csv(filename, sep="\t") - chrombands = bands[bands["#chrom"] == chrom] - data = [] - for _, (chr, start, end, name, gie) in chrombands.iterrows(): - data.append((chr, start, end, name, gie)) - chromsize = max(x[2] for x in data) - scale = width * 1.0 / chromsize - xstart, ystart = (1 - width) / 2, ymid - height / 2 - bp_to_pos = lambda x: xstart + x * scale - in_acen = False - for chr, start, end, name, gie in data: - color, alpha = get_color(gie) - bplen = end - start - if "acen" in gie: - if in_acen: - xys = [ - (bp_to_pos(start), ymid), - (bp_to_pos(end), ystart), - (bp_to_pos(end), ystart + height), - ] - else: - xys = [ - (bp_to_pos(start), ystart), - (bp_to_pos(start), ystart + height), - (bp_to_pos(end), ymid), - ] - p = Polygon(xys, closed=True, ec="k", fc=color, alpha=alpha) - in_acen = True - else: - p = Rectangle( - (bp_to_pos(start), ystart), - bplen * scale, - height, - ec="k", - fc=color, - alpha=alpha, - ) - # print bp_to_pos(end) - ax.add_patch(p) - ax.text( - bp_to_pos((start + end) / 2), - ymid + height * 0.8, - name, - rotation=40, - color="lightslategray", - ) - - ax.text(0.5, ystart - height, chrom, size=16, ha="center", va="center") - - ax.set_xlim(0, 1) - ax.set_ylim(0, 1) - ax.set_axis_off() - - -def main(): - """ - %prog bedfile id_mappings - - Takes a bedfile that contains the coordinates of features to plot on the - chromosomes, and `id_mappings` file that map the ids to certain class. Each - class will get assigned a unique color. `id_mappings` file is optional (if - omitted, will not paint the chromosome features, except the centromere). - - The extent of the chromosomes are given by --sizes, which contains - chrsize, one per line. If not specified, the extent of the chromosomes - are assumed to be the end for the last feature, which might be an underestimate. - """ - - p = OptionParser(main.__doc__) - p.add_argument( - "--sizes", help="FASTA sizes file, which contains chrsize, one per line" - ) - g = p.add_argument_group("Display accessories") - g.add_argument( - "--title", - help="title of the image", - ) - g.add_argument( - "--gauge", - default=False, - action="store_true", - help="draw a gauge with size label", - ) - - g = p.add_argument_group("HTML image map") - g.add_argument( - "--imagemap", - default=False, - action="store_true", - help="generate an HTML image map associated with the image", - ) - g.add_argument( - "--winsize", - default=50000, - type=int, - help="if drawing an imagemap, specify the window size (bases) of each map element ", - ) - - g = p.add_argument_group("Color legend") - g.add_argument( - "--nolegend", - dest="legend", - default=True, - action="store_false", - help="Do not generate color legend", - ) - g.add_argument( - "--mergedist", default=0, type=int, help="Merge regions closer than " - ) - g.add_argument("--empty", help="Write legend for unpainted region") - - opts, args, iopts = p.set_image_options(figsize="6x6", dpi=300) - - if len(args) not in (1, 2): - sys.exit(p.print_help()) - - bedfile = args[0] - mappingfile = None - if len(args) == 2: - mappingfile = args[1] - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - draw_chromosomes( - root, - bedfile, - sizes=opts.sizes, - iopts=iopts, - mergedist=opts.mergedist, - winsize=opts.winsize, - imagemap=opts.imagemap, - mappingfile=mappingfile, - gauge=opts.gauge, - legend=opts.legend, - empty=opts.empty, - title=opts.title, - ) - - normalize_axes(root) - - prefix = bedfile.rsplit(".", 1)[0] - figname = prefix + "." + opts.format - savefig(figname, dpi=iopts.dpi, iopts=iopts) - - -def draw_chromosomes( - root, - bedfile, - sizes, - iopts, - mergedist: int, - winsize: int, - imagemap: bool = False, - mappingfile: Optional[str] = None, - gauge: bool = False, - legend: bool = True, - empty: bool = False, - title: Optional[str] = None, -): - bed = Bed(bedfile) - prefix = bedfile.rsplit(".", 1)[0] - - if imagemap: - imgmapfile = prefix + ".map" - mapfh = open(imgmapfile, "w") - print('', file=mapfh) - - if mappingfile: - mappings = DictFile(mappingfile, delimiter="\t") - classes = sorted(set(mappings.values())) - preset_colors = ( - DictFile(mappingfile, keypos=1, valuepos=2, delimiter="\t") - if DictFile.num_columns(mappingfile) >= 3 - else {} - ) - else: - classes = sorted(set(x.accn for x in bed)) - mappings = dict((x, x) for x in classes) - preset_colors = {} - - logger.debug("A total of %d classes found: %s", len(classes), ",".join(classes)) - - # Assign colors to classes - ncolors = max(3, min(len(classes), 12)) - palette = set1_n if ncolors <= 8 else set3_n - colorset = palette(number=ncolors) - colorset = sample_N(colorset, len(classes), seed=iopts.seed) - class_colors = dict(zip(classes, colorset)) - class_colors.update(preset_colors) - logger.debug("Assigned colors: %s", class_colors) - - chr_lens = {} - centromeres = {} - if sizes: - chr_lens = Sizes(sizes).sizes_mapping - else: - for b, blines in groupby(bed, key=lambda x: x.seqid): - blines = list(blines) - maxlen = max(x.end for x in blines) - chr_lens[b] = maxlen - - for b in bed: - accn = b.accn - if accn == "centromere": - centromeres[b.seqid] = b.start - if accn in mappings: - b.accn = mappings[accn] - else: - b.accn = "-" - - chr_number = len(chr_lens) - if centromeres: - assert chr_number == len( - centromeres - ), "chr_number = {}, centromeres = {}".format(chr_number, centromeres) - - r = 0.7 # width and height of the whole chromosome set - xstart, ystart = 0.15, 0.85 - xinterval = r / chr_number - xwidth = xinterval * 0.5 # chromosome width - max_chr_len = max(chr_lens.values()) - ratio = r / max_chr_len # canvas / base - - # first the chromosomes - chr_locations = {} - for a, (chr, clen) in enumerate(natsorted(chr_lens.items())): - xx = xstart + a * xinterval + 0.5 * xwidth - chr_locations[chr] = xx - root.text(xx, ystart + 0.01, str(get_number(chr)), ha="center") - if centromeres: - yy = ystart - centromeres[chr] * ratio - ChromosomeWithCentromere( - root, xx, ystart, yy, ystart - clen * ratio, width=xwidth - ) - else: - Chromosome(root, xx, ystart, ystart - clen * ratio, width=xwidth) - - alpha = 1 - # color the regions - for chr in sorted(chr_lens.keys()): - excess = 0 - bac_list = [] - prev_end, prev_klass = 0, None - xx = chr_locations[chr] - 0.5 * xwidth - for b in bed.sub_bed(chr): - klass = b.accn - if klass == "centromere": - continue - start = b.start - end = b.end - if start < prev_end + mergedist and klass == prev_klass: - start = prev_end - yystart = ystart - end * ratio - yyend = ystart - start * ratio - root.add_patch( - Rectangle( - (xx, yystart), - xwidth, - yyend - yystart, - fc=class_colors.get(klass, "lightslategray"), - lw=0, - alpha=alpha, - ) - ) - prev_end, prev_klass = b.end, klass - - if imagemap: - # `segment` : size of current BAC being investigated + `excess` - # `excess` : left-over bases from the previous BAC, as a result of - # iterating over `winsize` regions of `segment` - if excess == 0: - segment_start = start - segment = (end - start + 1) + excess - while True: - if segment < winsize: - bac_list.append(b.accn) - excess = segment - break - segment_end = segment_start + winsize - 1 - tlx, tly, brx, bry = ( - xx, - (1 - ystart) + segment_start * ratio, - xx + xwidth, - (1 - ystart) + segment_end * ratio, - ) - print( - "\t" - + write_ImageMapLine( - tlx, - tly, - brx, - bry, - iopts.w, - iopts.h, - iopts.dpi, - chr + ":" + ",".join(bac_list), - segment_start, - segment_end, - ), - file=mapfh, - ) - - segment_start += winsize - segment -= winsize - bac_list = [] - - if imagemap and excess > 0: - bac_list.append(b.accn) - segment_end = end - tlx, tly, brx, bry = ( - xx, - (1 - ystart) + segment_start * ratio, - xx + xwidth, - (1 - ystart) + segment_end * ratio, - ) - print( - "\t" - + write_ImageMapLine( - tlx, - tly, - brx, - bry, - iopts.w, - iopts.h, - iopts.dpi, - chr + ":" + ",".join(bac_list), - segment_start, - segment_end, - ), - file=mapfh, - ) - - if imagemap: - print("", file=mapfh) - mapfh.close() - logger.debug("Image map written to `%s`", mapfh.name) - - if gauge: - xstart, ystart = 0.9, 0.85 - Gauge(root, xstart, ystart - r, ystart, max_chr_len) - - if "centromere" in class_colors: - del class_colors["centromere"] - - # class legends, four in a row - if legend: - xstart = 0.1 - xinterval = 0.8 / len(class_colors) - xwidth = 0.04 - yy = 0.08 - for klass, cc in sorted(class_colors.items()): - if klass == "-": - continue - root.add_patch( - Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha) - ) - root.text(xstart + xwidth + 0.01, yy, latex(klass), fontsize=10) - xstart += xinterval - - if empty: - root.add_patch(Rectangle((xstart, yy), xwidth, xwidth, fill=False, lw=1)) - root.text(xstart + xwidth + 0.01, yy, empty, fontsize=10) - - if title: - root.text(0.5, 0.95, markup(title), ha="center", va="center") - - -if __name__ == "__main__": - main() diff --git a/jcvi/graphics/coverage.py b/jcvi/graphics/coverage.py deleted file mode 100644 index 05179642..00000000 --- a/jcvi/graphics/coverage.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog chrC01 chr.sizes data - -Read coverage histogram, similar to wiggle plot. Data contains all the track -data in the form of tab-delimited (x, y) lists. -""" - -import os.path as op -import sys - -import numpy as np - -from ..apps.base import OptionParser, glob, logger -from ..formats.sizes import Sizes - -from .base import ( - Rectangle, - adjust_spines, - get_map, - mb_float_formatter, - mb_formatter, - plt, - savefig, -) - - -class XYtrack(object): - def __init__(self, ax, datafile, color=None, ymax=40): - self.ax = ax - self.xy = [] - fp = open(datafile) - for row in fp: - atoms = row.split() - self.xy.append([int(atoms[0]), float(atoms[1])]) - fp.close() - - self.x, self.y = zip(*self.xy) - logger.debug("File `{0}` imported (records={1}).".format(datafile, len(self.x))) - self.color = color or "k" - self.ymax = ymax - - @property - def mapping(self): - return dict(zip(self.x, self.y)) - - def interpolate(self, maxsize, unit=10000): - maxsize = int(maxsize) - for pos in range(unit, maxsize + unit, unit): - if pos in self.x: - continue - self.xy.append([pos, 0]) - self.xy.sort() - self.x, self.y = zip(*self.xy) - logger.debug("After interpolate: {0}".format(len(self.x))) - - def cap(self, ymax): - self.xy = [[a, 0] if b > ymax else [a, b] for a, b in self.xy] - self.x, self.y = zip(*self.xy) - - def draw(self): - ax = self.ax - color = self.color - ax.plot(self.x, self.y, lw=0) - ax.fill_between(self.x, self.y, color=color, lw=0) - ax.set_ylim(0, self.ymax) - ax.set_axis_off() - - def import_hlfile(self, hlfile, chr, unit=10000, diverge=("r", "g")): - rr, gg = diverge - fp = open(hlfile) - imported = 0 - mapping = self.mapping - for row in fp: - if row.strip() == "": - continue - seqid, start, end, tag = row.split() - if seqid != chr: - continue - start = int(start) - end = int(end) - if tag == "double": - self.highlight(mapping, start, end, color=rr, unit=unit) - else: - self.highlight(mapping, start, end, color=gg, unit=unit) - imported += 1 - logger.debug("Imported {0} regions from file `{1}`.".format(imported, hlfile)) - - def highlight(self, mapping, start, end, color="r", unit=10000, zorder=10): - ax = self.ax - x = range(start, end + unit, unit) - y = [mapping[z] for z in x] - # Mask the highlight region so that they don't appear in background - for a in self.xy: - if start <= a[0] <= end: - a[1] = 0 - self.x, self.y = zip(*self.xy) - ax.plot(x, y, lw=0) - ax.fill_between(x, y, color=color, lw=0, zorder=zorder) - - def vlines(self, xs, color="m"): - for x in xs: - self.ax.plot((x, x), (0, self.ymax), "-", color=color, lw=2) - - -class Coverage(object): - def __init__( - self, - fig, - root, - canvas, - chr, - xlim, - datadir, - order=None, - hlsuffix=None, - palette=None, - cap=50, - gauge="bottom", - plot_label=True, - plot_chr_label=True, - gauge_step=5000000, - vlines=None, - labels_dict={}, - diverge=("r", "g"), - ): - x, y, w, h = canvas - p = 0.01 - root.add_patch( - Rectangle( - (x - p, y - p), - w + 2 * p, - h + 2 * p, - lw=1, - fill=False, - ec="darkslategray", - zorder=10, - ) - ) - datafiles = glob(op.join(datadir, chr + "*")) - - if order: - datafiles = [z for z in datafiles if z.split(".")[1] in order] - datafiles.sort(key=lambda x: order.index(x.split(".")[1])) - - ntracks = len(datafiles) - yinterval = h / ntracks - yy = y + h - - if palette is None: - # Get the palette - set2 = get_map("Set2", "qualitative", ntracks).mpl_colors - else: - set2 = [palette] * ntracks - - if gauge == "top": - gauge_ax = fig.add_axes([x, yy + p, w, 0.0001]) - adjust_spines(gauge_ax, ["top"]) - tpos = yy + 0.07 - elif gauge == "bottom": - gauge_ax = fig.add_axes([x, y - p, w, 0.0001]) - adjust_spines(gauge_ax, ["bottom"]) - tpos = y - 0.07 - - start, end = xlim - if gauge: - fs = gauge_step < 1000000 - setup_gauge_ax(gauge_ax, start, end, gauge_step, float_formatter=fs) - - if plot_chr_label: - root.text( - x + w / 2, - tpos, - chr, - ha="center", - va="center", - color="darkslategray", - size=16, - ) - - yys = [] - for label, datafile, c in zip(order, datafiles, set2): - yy -= yinterval - yys.append(yy) - ax = fig.add_axes([x, yy, w, yinterval * 0.9]) - xy = XYtrack(ax, datafile, color=c) - xy.interpolate(end) - xy.cap(ymax=cap) - if vlines: - xy.vlines(vlines) - if hlsuffix: - hlfile = op.join(datadir, ".".join((label, hlsuffix))) - xy.import_hlfile(hlfile, chr, diverge=diverge) - if plot_label: - label = labels_dict.get(label, label.capitalize()) - label = r"\textit{{{0}}}".format(label) - root.text(x - 0.015, yy + yinterval / 2, label, ha="right", va="center") - xy.draw() - ax.set_xlim(*xlim) - - self.yys = yys - - -def setup_gauge_ax(gauge_ax, start, end, gauge_step, float_formatter=False): - gauge_ax.set_xlim(start, end) - formatter = mb_float_formatter if float_formatter else mb_formatter - gauge_ax.xaxis.set_major_formatter(formatter) - gauge_ax.xaxis.set_ticks(np.arange(start + gauge_step, end, gauge_step)) - gauge_ax.yaxis.set_ticks([]) - - -def main(): - p = OptionParser(__doc__) - p.add_argument("--order", help="The order to plot the tracks, comma-separated") - opts, args, iopts = p.set_image_options() - - if len(args) != 3: - sys.exit(not p.print_help()) - - chr, sizes, datadir = args - order = opts.order - hlsuffix = opts.hlsuffix - if order: - order = order.split(",") - sizes = Sizes(sizes) - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - canvas = (0.12, 0.35, 0.8, 0.35) - chr_size = sizes.get_size(chr) - Coverage( - fig, root, canvas, chr, (0, chr_size), datadir, order=order, hlsuffix=hlsuffix - ) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - image_name = chr + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main() diff --git a/jcvi/graphics/dotplot.py b/jcvi/graphics/dotplot.py deleted file mode 100755 index 59dbbf16..00000000 --- a/jcvi/graphics/dotplot.py +++ /dev/null @@ -1,549 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog [anchorfile|ksfile] --qbed query.bed --sbed subject.bed - -visualize the anchorfile in a dotplot. anchorfile contains two columns -indicating gene pairs, followed by an optional column (e.g. Ks value). - -The option --colormap specifies the block color to highlight certain blocks in -a file. Block ids are 1-based (non-digit chars will be removed). For example, below -requests that block 1 is class 'sigma' and block 2 is class 'tau'. - -1 sigma -2 tau -3 tau - -These classes will be mapped to auto-assigned colors and figure legend added to -the bottom of the figure. - -*Important* - -Before running this script it is recommended to check/install -TeX Live (http://www.tug.org/texlive/) and -Ghostscript (http://www.ghostscript.com/) -see more here: http://matplotlib.sourceforge.net/users/usetex.html -""" - -import os.path as op -import string -import sys - -from copy import deepcopy -from random import sample -from typing import Optional - -from ..apps.base import OptionParser, logger, need_update -from ..compara.base import AnchorFile -from ..compara.synteny import batch_scan, check_beds, get_orientation -from ..utils.cbook import seqid_parse, thousands - -from .base import ( - Rectangle, - TextHandler, - draw_cmap, - latex, - markup, - normalize_axes, - plt, - savefig, - set_human_axis, - set1, -) - - -class Palette(dict): - def __init__(self, palettedict=None, palettefile=None): - """Instantiate a palette to map from block_id to color - - Args: - palettedict (Dict, optional): Get the mapping from a dict. Defaults to None. - palettefile (str, optional): Get the mapping from a two-column file. Defaults to None. - """ - super().__init__() - if palettedict is not None: - self.update(palettedict) - if palettefile is None: - return - - pal = "rbcygmk" - - fp = open(palettefile) - for row in fp: - a, b = row.split() - a = "".join(x for x in a if x in string.digits) - a = int(a) - self[a] = b - - self.categories = sorted(set(self.values())) - self.colors = dict(zip(self.categories, pal)) - - logger.debug( - "Color info ({0} categories) imported for {1} blocks.".format( - len(self.colors), len(self) - ) - ) - logger.debug(str(self.colors)) - - for k, v in self.items(): # Update from categories to colors - self[k] = self.colors[v] - - @classmethod - def from_block_orientation( - cls, anchorfile, qbed, sbed, forward_color="#e7298a", reverse_color="#3690c0" - ): - """Generate a palette which contains mapping from block_id (1-based) to colors. - - Args: - anchorfile (str): Path to the .anchors file - qbed (BedFile): Query BED - sbed (BedFile): Subject BED - forward_color (str, optional): Color of forward block. Defaults to "#e7298a". - reverse_color (str, optional): Color of reverse block. Defaults to "#3690c0". - """ - ac = AnchorFile(anchorfile) - blocks = ac.blocks - palette = {} - qorder = qbed.order - sorder = sbed.order - - for i, block in enumerate(blocks): - block_id = i + 1 - - a, b, _ = zip(*block) - a = [qorder[x] for x in a] - b = [sorder[x] for x in b] - ia, _ = zip(*a) - ib, _ = zip(*b) - - orientation = get_orientation(ia, ib) - palette[block_id] = reverse_color if orientation == "-" else forward_color - return cls(palettedict=palette) - - -def draw_box(clusters, ax, color="b"): - for cluster in clusters: - xrect, yrect = zip(*cluster) - xmin, xmax, ymin, ymax = min(xrect), max(xrect), min(yrect), max(yrect) - ax.add_patch( - Rectangle( - (xmin, ymin), xmax - xmin, ymax - ymin, ec=color, fc="y", alpha=0.5 - ) - ) - - -def plot_breaks_and_labels( - fig, - root, - ax, - gx, - gy, - xsize, - ysize, - qbreaks, - sbreaks, - sep=True, - chrlw=0.1, - sepcolor="g", - minfont=5, - stdpf=True, - chpf=True, - usetex: bool = True, -): - xlim = (0, xsize) - ylim = (ysize, 0) # invert the y-axis - - # Tag to mark whether to plot chr name (skip small ones) - xchr_labels, ychr_labels = [], [] - th = TextHandler(fig, usetex=usetex) - - # plot the chromosome breaks - for seqid, beg, end in qbreaks: - xsize_ratio = abs(end - beg) * 0.8 / xsize - fontsize = th.select_fontsize(xsize_ratio) - if chpf: - seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) - - xchr_labels.append((seqid, (beg + end) / 2, fontsize)) - if sep: - ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) - - for seqid, beg, end in sbreaks: - ysize_ratio = abs(end - beg) * 0.8 / ysize - fontsize = th.select_fontsize(ysize_ratio) - if chpf: - seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) - - ychr_labels.append((seqid, (beg + end) / 2, fontsize)) - if sep: - ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) - - # plot the chromosome labels - for label, pos, fontsize in xchr_labels: - pos = 0.1 + pos * 0.8 / xsize - if fontsize >= minfont: - root.text( - pos, - 0.91, - latex(label), - size=fontsize, - ha="center", - va="bottom", - rotation=45, - color="grey", - ) - - # remember y labels are inverted - for label, pos, fontsize in ychr_labels: - pos = 0.9 - pos * 0.8 / ysize - if fontsize >= minfont: - root.text(0.91, pos, latex(label), size=fontsize, va="center", color="grey") - - # Plot the frame - ax.plot(xlim, [0, 0], "-", lw=chrlw, color=sepcolor) - ax.plot(xlim, [ysize, ysize], "-", lw=chrlw, color=sepcolor) - ax.plot([0, 0], ylim, "-", lw=chrlw, color=sepcolor) - ax.plot([xsize, xsize], ylim, "-", lw=chrlw, color=sepcolor) - - ax.set_xlim(xlim) - ax.set_ylim(ylim) - - ax.set_xlabel(gx, size=16) - ax.set_ylabel(gy, size=16) - - # beautify the numeric axis - for tick in ax.get_xticklines() + ax.get_yticklines(): - tick.set_visible(False) - - set_human_axis(ax) - - plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="gray", size=10) - - return xlim, ylim - - -def downsample(data, sample_number: int = 10000): - """ - Downsample the data to a manageable size for plotting. - """ - npairs = len(data) - # Only show random subset - if npairs > sample_number: - logger.debug( - "Showing a random subset of %d data points (total %d) for clarity.", - sample_number, - npairs, - ) - data = sample(data, sample_number) - return data - - -def dotplot( - anchorfile: str, - qbed, - sbed, - fig, - root, - ax, - vmin: float = 0, - vmax: float = 1, - is_self: bool = False, - synteny: bool = False, - cmap_text: Optional[str] = None, - cmap="copper", - genomenames=None, - sample_number: int = 10000, - minfont: int = 5, - palette: Optional[Palette] = None, - chrlw: float = 0.1, - title: Optional[str] = None, - sep: bool = True, - sepcolor: str = "g", - stdpf: bool = True, - chpf: bool = True, - usetex: bool = True, -): - """ - Draw a dotplot from an anchor file. - """ - fp = open(anchorfile, encoding="utf-8") - # add genome names - if genomenames: - gx, gy = genomenames.split("_") - else: - to_ax_label = lambda fname: op.basename(fname).split(".")[0] - gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] - - # Stylize the axis labels - gx, gy = markup(gx), markup(gy) - - qorder = qbed.order - sorder = sbed.order - - data = [] - if cmap_text: - logger.debug("Capping values within [%.1f, %.1f]", vmin, vmax) - - block_id = 0 - block_color = None - for row in fp: - atoms = row.split() - if row[0] == "#": - block_id += 1 - block_color = palette.get(block_id, "k") if palette else None - continue - - # first two columns are query and subject, and an optional third column - if len(atoms) < 2: - continue - - query, subject = atoms[:2] - value = atoms[-1] - - if cmap_text: - try: - value = float(value) - except ValueError: - value = vmax - - if value < vmin: - continue - if value > vmax: - continue - else: - value = 0 - - if query not in qorder: - continue - if subject not in sorder: - continue - - qi, q = qorder[query] - si, s = sorder[subject] - - nv = block_color or value - data.append((qi, si, nv)) - if is_self: # Mirror image - data.append((si, qi, nv)) - - npairs = len(data) - data = downsample(data, sample_number=sample_number) - x, y, c = zip(*data) - - if palette: - ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) - else: - ax.scatter( - x, y, c=c, edgecolors="none", s=2, lw=0, cmap=cmap, vmin=vmin, vmax=vmax - ) - - if synteny: - clusters = batch_scan(data, qbed, sbed) - draw_box(clusters, ax) - - if cmap_text: - draw_cmap(root, cmap_text, vmin, vmax, cmap=cmap) - - xsize, ysize = len(qbed), len(sbed) - logger.debug("xsize=%d ysize=%d", xsize, ysize) - qbreaks = qbed.get_breaks() - sbreaks = sbed.get_breaks() - xlim, _ = plot_breaks_and_labels( - fig, - root, - ax, - gx, - gy, - xsize, - ysize, - qbreaks, - sbreaks, - sep=sep, - chrlw=chrlw, - sepcolor=sepcolor, - minfont=minfont, - stdpf=stdpf, - chpf=chpf, - usetex=usetex, - ) - - # create a diagonal to separate mirror image for self comparison - if is_self: - ax.plot(xlim, (0, ysize), "m-", alpha=0.5, lw=2) - - if palette and hasattr( - palette, "colors" - ): # bottom-left has the palette, if available - colors = palette.colors - xstart, ystart = 0.1, 0.05 - for category, c in sorted(colors.items()): - root.add_patch(Rectangle((xstart, ystart), 0.03, 0.02, lw=0, fc=c)) - root.text(xstart + 0.04, ystart, category, color=c) - xstart += 0.1 - - if title is None: - title = f"Inter-genomic comparison: {gx} vs {gy}" - if is_self: - title = f"Intra-genomic comparison within {gx}" - npairs //= 2 - title += f" ({thousands(npairs)} gene pairs)" - root.set_title(title, x=0.5, y=0.96, color="k") - if title: - logger.debug("Dot plot title: %s", title) - normalize_axes(root) - - -def subset_bed(bed, seqids): - - newbed = deepcopy(bed) - del newbed[:] - for b in bed: - if b.seqid not in seqids: - continue - newbed.append(b) - return newbed - - -def dotplot_main(args): - p = OptionParser(__doc__) - p.set_beds() - p.add_argument( - "--synteny", - default=False, - action="store_true", - help="Run a fast synteny scan and display blocks", - ) - p.add_argument("--cmaptext", help="Draw colormap box on the bottom-left corner") - p.add_argument( - "--vmin", - dest="vmin", - type=float, - default=0, - help="Minimum value in the colormap", - ) - p.add_argument( - "--vmax", - dest="vmax", - type=float, - default=2, - help="Maximum value in the colormap", - ) - p.add_argument( - "--nmax", - dest="sample_number", - type=int, - default=10000, - help="Maximum number of data points to plot", - ) - p.add_argument( - "--minfont", - type=int, - default=4, - help="Do not render labels with size smaller than", - ) - p.add_argument("--colormap", help="Two column file, block id to color mapping") - p.add_argument( - "--colororientation", - action="store_true", - default=False, - help="Color the blocks based on orientation, similar to mummerplot", - ) - p.add_argument( - "--nosort", - default=False, - action="store_true", - help="Do not sort the seqids along the axes", - ) - p.add_argument( - "--nosep", default=False, action="store_true", help="Do not add contig lines" - ) - p.add_argument("--title", help="Title of the dot plot") - p.set_dotplot_opts() - p.set_outfile(outfile=None) - opts, args, iopts = p.set_image_options( - args, figsize="9x9", style="dark", dpi=90, cmap="copper" - ) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (anchorfile,) = args - qbed, sbed, qorder, sorder, is_self = check_beds( - anchorfile, p, opts, sorted=(not opts.nosort) - ) - - palette = opts.colormap - if palette: - palette = Palette(palettefile=palette) - elif opts.colororientation: - palette = Palette.from_block_orientation(anchorfile, qbed, sbed) - - cmaptext = opts.cmaptext - if anchorfile.endswith(".ks"): - from ..compara.ks import KsFile - - logger.debug("Anchors contain Ks values") - cmaptext = cmaptext or "*Ks* values" - anchorksfile = anchorfile + ".anchors" - if need_update(anchorfile, anchorksfile): - ksfile = KsFile(anchorfile) - ksfile.print_to_anchors(anchorksfile) - anchorfile = anchorksfile - - if opts.skipempty: - ac = AnchorFile(anchorfile) - if is_self: - qseqids = sseqids = set() - else: - qseqids, sseqids = set(), set() - - for pair in ac.iter_pairs(): - q, s = pair[:2] - _, q = qorder[q] - _, s = sorder[s] - qseqids.add(q.seqid) - sseqids.add(s.seqid) - - if is_self: - qbed = sbed = subset_bed(qbed, qseqids) - else: - qbed = subset_bed(qbed, qseqids) - sbed = subset_bed(sbed, sseqids) - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) # the whole canvas - ax = fig.add_axes((0.1, 0.1, 0.8, 0.8)) # the dot plot - - dotplot( - anchorfile, - qbed, - sbed, - fig, - root, - ax, - vmin=opts.vmin, - vmax=opts.vmax, - is_self=is_self, - synteny=opts.synteny, - cmap_text=opts.cmaptext, - cmap=iopts.cmap, - genomenames=opts.genomenames, - sample_number=opts.sample_number, - minfont=opts.minfont, - palette=palette, - sep=(not opts.nosep), - sepcolor=set1[int(opts.theme)], - title=opts.title, - stdpf=(not opts.nostdpf), - chpf=(not opts.nochpf), - usetex=iopts.usetex, - ) - - image_name = opts.outfile or (op.splitext(anchorfile)[0] + "." + opts.format) - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - fig.clear() - - -if __name__ == "__main__": - dotplot_main(sys.argv[1:]) diff --git a/jcvi/graphics/glyph.py b/jcvi/graphics/glyph.py deleted file mode 100644 index d6de01a1..00000000 --- a/jcvi/graphics/glyph.py +++ /dev/null @@ -1,761 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Gradient gene features -""" - -import os.path as op -import sys - -from random import choice, shuffle, random, randint - -import numpy as np - -from ..apps.base import OptionParser, ActionDispatcher -from ..utils.grouper import Grouper - -from .base import ( - CirclePolygon, - Ellipse, - FancyArrowPatch, - Polygon, - Rectangle, - get_map, - plt, - savefig, - set3, -) - - -tstep = 0.05 -Timing = np.arange(0, 1 + tstep, tstep) -arrowprops = dict( - arrowstyle="fancy", - fc="lightslategray", - ec="lightslategray", - connectionstyle="arc3,rad=-0.05", -) - - -class Bezier(object): - """ - Cubic bezier curve, see the math: - - p0 : origin, p1, p2 :control, p3: destination - """ - - def __init__(self, ax, p0, p1, p2, p3, color="m", alpha=0.2): - pts = (p0, p1, p2, p3) - px, py = zip(*pts) - xt = self.get_array(px) - yt = self.get_array(py) - - ax.plot(xt, yt, "-", color=color, alpha=alpha) - - def get_array(self, pts, t=Timing): - p0, p1, p2, p3 = pts - - # Get the coeffiencients - c = 3 * (p1 - p0) - b = 3 * (p2 - p1) - c - a = p3 - p0 - c - b - - tsquared = t**2 - tcubic = tsquared * t - return a * tcubic + b * tsquared + c * t + p0 - - -class RoundLabel(object): - """Round rectangle around the text label""" - - def __init__(self, ax, x1, x2, t, lw=0, fill=False, fc="lavender", **kwargs): - ax.text( - x1, - x2, - t, - ha="center", - bbox=dict(boxstyle="round", fill=fill, fc=fc, lw=lw), - **kwargs - ) - - -class RoundRect(object): - """Round rectangle directly""" - - def __init__(self, ax, xy, width, height, shrink=0.1, label=None, **kwargs): - shrink *= height - x, y = xy - pts = [] - # plot the four rounded cap one by one - pts += plot_cap( - (x + width - shrink, y + height - shrink), np.radians(range(0, 90)), shrink - ) - pts += [[x + width - shrink, y + height], [x + shrink, y + height]] - pts += plot_cap( - (x + shrink, y + height - shrink), np.radians(range(90, 180)), shrink - ) - pts += [[x, y + height - shrink], [x, y + shrink]] - pts += plot_cap((x + shrink, y + shrink), np.radians(range(180, 270)), shrink) - pts += [[x + shrink, y], [x + width - shrink, y]] - pts += plot_cap( - (x + width - shrink, y + shrink), np.radians(range(270, 360)), shrink - ) - pts += [[x + width, y + shrink], [x + width, y + height - shrink]] - p1 = Polygon(pts, **kwargs) - ax.add_patch(p1) - # add a white transparency ellipse filter - if label: - ax.text( - x + width / 2, - y + height / 2, - label, - size=10, - ha="center", - va="center", - color="w", - ) - - -class DoubleSquare(object): - """Square with a double-line margin""" - - def __init__(self, ax, x, y, radius=0.01, **kwargs): - d = radius * 1.5 - ax.add_patch(Rectangle((x - d, y - d), 2 * d, 2 * d, fc="w", ec="k", zorder=10)) - d = radius - ax.add_patch(Rectangle((x - d, y - d), 2 * d, 2 * d, zorder=10, **kwargs)) - - -class DoubleCircle(object): - """Circle with a double-line margin""" - - def __init__(self, ax, x, y, radius=0.01, **kwargs): - ax.add_patch(CirclePolygon((x, y), radius * 1.4, resolution=50, fc="w", ec="k")) - ax.add_patch(CirclePolygon((x, y), radius, resolution=50, **kwargs)) - - -def get_asymmetry(ax, radius): - """Calculates asymmetry of x and y axes. For axes that do not keep equal aspect ratio. - - Args: - ax (Axes): matplotlib axes - radius (float): - """ - x0, y0 = ax.transAxes.transform((0, 0)) # Lower left in pixels - x1, y1 = ax.transAxes.transform((1, 1)) # Upper right in pixels - dx = x1 - x0 - dy = y1 - y0 - maxd = max(dx, dy) - width = radius * maxd / dx - height = radius * maxd / dy - return width, height - - -class TextCircle(object): - """Circle with a character wrapped in""" - - def __init__( - self, - ax, - x, - y, - label, - radius=0.02, - fc="k", - color="w", - size=12, - zorder=4, - fontweight="bold", - **kwargs - ): - width, height = get_asymmetry(ax, radius) - circle = Ellipse((x, y), width, height, fc=fc, ec=fc, zorder=zorder, **kwargs) - ax.add_patch(circle) - ax.text( - x, - y, - label, - ha="center", - va="center", - color=color, - size=size, - zorder=zorder + 1, - fontweight=fontweight, - **kwargs - ) - - -class BasePalette(dict): - """Base class for coloring gene glyphs""" - - palette: dict - - def get_color_and_zorder(self, feature: str) -> tuple: - """Get color and zorder based on the orientation. - - Args: - feature (str): orientation, name etc. - - Returns: - (str, int): color and zorder for the given orientation - """ - color = self.palette.get(feature) - return color, 4 - - -class OrientationPalette(BasePalette): - """Color gene glyphs with forward/reverse""" - - forward, backward = "b", "g" # Genes with different orientations - palette = {"+": forward, "-": backward} - - -class OrthoGroupPalette(BasePalette): - """Color gene glyphs with random orthogroup color""" - - grouper: Grouper - palette = set3 - - def __init__(self, grouper: Grouper): - """Initialize with grouper instance indicating orthogroup assignments. - - Args: - grouper (Grouper): Orthogroup assignments - """ - super().__init__() - self.grouper = grouper - - def get_color_and_zorder(self, feature: str) -> tuple: - """Get color based on orthogroup assignement of a gene. - - Args: - feature (str): Name of the gene - - Returns: - str: color and zorder for the given gene_name based on the assignment - """ - if feature not in self.grouper: - return "gray", 3 - group = self.grouper[feature] - # Any gene part of an orthogroup gets a higher zorder - return self.palette[hash(group) % len(self.palette)], 4 - - -class BaseGlyph(list): - def __init__(self, ax): - super().__init__() - self.ax = ax - - def add_patches(self): - for p in self: - self.ax.add_patch(p) - - def set_transform(self, tr): - for p in self: - p.set_transform(tr) - - -class Glyph(BaseGlyph): - Styles = ("box", "arrow") - Palette = ("orientation", "orthogroup") - ArrowStyle = "Simple,head_length=1.5,head_width=7,tail_width=7" - - def __init__( - self, - ax, - x1, - x2, - y, - height=0.04, - gradient=True, - fc="gray", - ec="gainsboro", - lw=0, - style="box", - **kwargs - ): - """Draw a region that represent an interval feature, e.g. gene or repeat - - Args: - ax (matplotlib.axis): matplot axis object - x1 (float): start coordinate - x2 (float): end coordinate - y (float): y coordinate. Note that the feature is horizontally drawn. - height (float, optional): Height of the feature. Defaults to 0.04. - gradient (bool, optional): Shall we draw color gradient on the box? Defaults to True. - fc (str, optional): Face color of the feature. Defaults to "gray". - style (str, optional): Style, either box|arrow. Defaults to "box". - """ - - super().__init__(ax) - width = x2 - x1 - # Frame around the gradient rectangle - p1 = (x1, y - 0.5 * height) - if style == "arrow": - patch = FancyArrowPatch( - (x1, y), - (x2, y), - shrinkA=0, - shrinkB=0, - arrowstyle=self.ArrowStyle, - fc=fc, - ec=ec, - lw=lw, - **kwargs - ) - else: - patch = Rectangle(p1, width, height, fc=fc, ec=ec, lw=lw, **kwargs) - self.append(patch) - - # Several overlaying patches - if gradient: - for cascade in np.arange(0.1, 0.55, 0.05): - p1 = (x1, y - height * cascade) - self.append( - Rectangle( - p1, - width, - 2 * cascade * height, - fc="w", - lw=0, - alpha=0.1, - **kwargs - ) - ) - - self.add_patches() - - -class ExonGlyph(BaseGlyph): - """Multiple rectangles linked together.""" - - def __init__(self, ax, x, y, mrnabed, exonbeds, height=0.03, ratio=1, align="left"): - super().__init__(ax) - start, end = mrnabed.start, mrnabed.end - xa = lambda a: x + (a - start) * ratio - xb = lambda a: x - (end - a) * ratio - xc = xa if align == "left" else xb - - Glyph(ax, xc(start), xc(end), y, height=height / 3) - for b in exonbeds: - bstart, bend = b.start, b.end - Glyph(ax, xc(bstart), xc(bend), y, fc="orange") - - -class GeneGlyph(BaseGlyph): - """Draws an oriented gene symbol, with color gradient, to represent genes""" - - def __init__( - self, - ax, - x1, - x2, - y, - height, - gradient=True, - tip=0.0025, - color="k", - shadow=False, - **kwargs - ): - super().__init__(ax) - # Figure out the polygon vertices first - orientation = 1 if x1 < x2 else -1 - level = 10 - tip = min(tip, abs(x1 - x2)) - # Frame - p1 = (x1, y - height * 0.5) - p2 = (x2 - orientation * tip, y - height * 0.5) - p3 = (x2, y) - p4 = (x2 - orientation * tip, y + height * 0.5) - p5 = (x1, y + 0.5 * height) - if "fc" not in kwargs: - kwargs["fc"] = color - if "ec" not in kwargs: - kwargs["ec"] = color - P = Polygon([p1, p2, p3, p4, p5], **kwargs) - self.append(P) - - if gradient: - zz = kwargs.get("zorder", 1) - zz += 1 - # Patch (apply white mask) - for cascade in np.arange(0, 0.5, 0.5 / level): - p1 = (x1, y - height * cascade) - p2 = (x2 - orientation * tip, y - height * cascade) - p3 = (x2, y) - p4 = (x2 - orientation * tip, y + height * cascade) - p5 = (x1, y + height * cascade) - self.append( - Polygon([p1, p2, p3, p4, p5], fc="w", lw=0, alpha=0.2, zorder=zz) - ) - - if shadow: - import matplotlib.patheffects as pe - - P.set_path_effects([pe.withSimplePatchShadow((1, -1), alpha=0.4)]) - - self.add_patches() - - -class CartoonRegion(object): - """ - Draw a collection of GeneGlyphs along chromosome. - """ - - def __init__(self, n, k=12): - # Chromosome - self.n = n - self.orientations = [choice([-1, 1]) for i in range(n)] - self.assign_colors(k) - - def draw(self, ax, x, y, gene_len=0.012, strip=True, color=True): - if strip: - self.strip() - - t = gene_len * 1.2 - length = t * (self.n + 1) - x1, x2 = x - length / 2, x + length / 2 - self.x1, self.x2 = x1, x2 - self.y = y - ax.plot((x1, x2), (y, y), color="gray", lw=2, zorder=1) - bit = 0.008 - xs = (x1 - 2 * bit, x1 - bit, x2 + bit, x2 + 2 * bit) - ax.plot(xs, [y] * 4, ".", lw=2, color="gray") - pos = np.arange(x1 + t, x2, t)[: self.n] - assert len(pos) == self.n, "len(pos) = {0}".format(len(pos)) - - gl = gene_len / 2 - for x, c, o in zip(pos, self.colors, self.orientations): - x1, x2 = x - gl, x + gl - if o < 0: - x1, x2 = x2, x1 - if not color and c != "k": - c = "w" - GeneGlyph( - ax, - x1, - x2, - y, - gene_len, - color=c, - ec="k", - gradient=False, - shadow=True, - zorder=10, - ) - - def assign_colors(self, k): - from matplotlib.colors import rgb2hex - - colorset = get_map("Paired", "qualitative", k).mpl_colors - colorset = [rgb2hex(x) for x in colorset] - cs = colorset + ["w"] * (self.n - k - 1) - shuffle(cs) - self.colors = cs[: self.n / 2] + ["k"] + cs[self.n / 2 :] - lf, p, rf = self.find_k() - self.exchange(lf, p - 2) - self.exchange(rf, p + 2) - - def exchange(self, p1, p2): - self.colors[p1], self.colors[p2] = self.colors[p2], self.colors[p1] - self.orientations[p1], self.orientations[p2] = ( - self.orientations[p2], - self.orientations[p1], - ) - - def delete(self, p, waiver=None): - if waiver and self.colors[p] in waiver: - return - self.colors.pop(p) - self.orientations.pop(p) - self.n -= 1 - - def insert(self, p): - self.colors.insert(p, "w") - self.orientations.insert(p, choice([-1, 1])) - self.n += 1 - - def truncate(self, b, e): - b = max(b, 0) - e = min(self.n, e) - self.colors = self.colors[b:e] - self.orientations = self.orientations[b:e] - self.n = e - b - - def assign_flankers(self): - lf, p, rf = self.find_k() - self.flanks = [self.colors[lf], self.colors[rf]] - return p - - def truncate_between_flankers(self, target=0): - try: - lf, rf = self.flanks - except: - self.assign_flankers() - lf, rf = self.flanks - lf = self.colors.index(lf) if lf in self.colors else -1 - rf = self.colors.index(rf) if rf in self.colors else -1 - assert lf >= 0 or rf >= 0 - if rf < 0: - rf = lf - if lf < 0: - lf = rf - if rf + 1 - lf < target: - gap = target - rf - 1 + lf - lf -= gap / 2 - rf += gap / 2 - self.truncate(lf, rf + 1) - - def strip(self): - while self.colors[0] == "w": - self.delete(0) - while self.colors[-1] == "w": - self.delete(self.n - 1) - - def find_k(self): - p = self.colors.index("k") - lf = max(i for i, c in enumerate(self.colors[:p]) if c != "w") - rf = min(i for i, c in enumerate(self.colors[p + 1 :]) if c != "w") - return lf, p, rf + p + 1 - - def evolve(self, mode="S", target=10): - n = self.n - assert mode in ("S", "F", "G") - keep_k = mode == "S" - p = self.assign_flankers() - waiver = self.flanks[:] - if mode == "S": - waiver += ["k"] - if mode == "F": - self.delete(p) - elif mode == "G": - left_score = sum(1 for x in self.colors[:p] if x != "w") - right_score = sum(1 for x in self.colors[p + 1 :] if x != "w") - if left_score > right_score: - self.colors[: p + 1] = ["w"] * (p + 1) - else: - self.colors[p:] = ["w"] * (self.n - p) - while self.nonwhites > target: - if random() > 0.35: - self.delete(randint(0, self.n - 1), waiver=waiver) - if random() > 0.65 and self.n < n * 0.8: - self.insert(randint(0, self.n - 1)) - - @property - def nonwhites(self): - return sum(1 for x in self.colors if x != "w") - - -def plot_cap(center, t, r): - x, y = center - return zip(x + r * np.cos(t), y + r * np.sin(t)) - - -def main(): - actions = ( - ("demo", "run a demo to showcase some common usages of various glyphs"), - ("gff", "draw exons for genes based on gff files"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def get_cds_beds(gffile, noUTR=False): - from jcvi.formats.gff import Gff - - mrnabed = None - cdsbeds = [] - gf = Gff(gffile) - for g in gf: - if g.type == "mRNA": - mrnabed = g.bedline - elif g.type == "CDS": - cdsbeds.append(g.bedline) - - if noUTR: - mrnabed.start = min(x.start for x in cdsbeds) - mrnabed.end = max(x.end for x in cdsbeds) - - return mrnabed, cdsbeds - - -def get_setups(gffiles, canvas=0.6, noUTR=False): - setups = [] - for gffile in gffiles: - genename = op.basename(gffile).rsplit(".", 1)[0] - mrnabed, cdsbeds = get_cds_beds(gffile, noUTR=noUTR) - setups.append((genename, mrnabed, cdsbeds)) - - genenames, mrnabeds, cdsbedss = zip(*setups) - maxspan = max(x.span for x in mrnabeds) - ratio = canvas / maxspan - return setups, ratio - - -def gff(args): - """ - %prog gff *.gff - - Draw exons for genes based on gff files. Each gff file should contain only - one gene, and only the "mRNA" and "CDS" feature will be drawn on the canvas. - """ - align_choices = ("left", "center", "right") - p = OptionParser(gff.__doc__) - p.add_argument( - "--align", default="left", choices=align_choices, help="Horizontal alignment" - ) - p.add_argument( - "--noUTR", default=False, action="store_true", help="Do not plot UTRs" - ) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - fig = plt.figure(1, (8, 5)) - root = fig.add_axes([0, 0, 1, 1]) - - gffiles = args - ngenes = len(gffiles) - - canvas = 0.6 - setups, ratio = get_setups(gffiles, canvas=canvas, noUTR=opts.noUTR) - align = opts.align - xs = 0.2 if align == "left" else 0.8 - yinterval = canvas / ngenes - ys = 0.8 - tip = 0.01 - for genename, mrnabed, cdsbeds in setups: - ExonGlyph(root, xs, ys, mrnabed, cdsbeds, ratio=ratio, align=align) - if align == "left": - root.text(xs - tip, ys, genename, ha="right", va="center") - elif align == "right": - root.text(xs + tip, ys, genename, ha="left", va="center") - ys -= yinterval - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - figname = "exons.pdf" - savefig(figname, dpi=300) - - -def demo(args): - """ - %prog demo - - Draw sample gene features to illustrate the various fates of duplicate - genes - to be used in a book chapter. - """ - p = OptionParser(demo.__doc__) - opts, args = p.parse_args(args) - - fig = plt.figure(1, (8, 5)) - root = fig.add_axes([0, 0, 1, 1]) - - panel_space = 0.23 - dup_space = 0.025 - # Draw a gene and two regulatory elements at these arbitrary locations - locs = [ - (0.5, 0.9), # ancestral gene - (0.5, 0.9 - panel_space + dup_space), # identical copies - (0.5, 0.9 - panel_space - dup_space), - (0.5, 0.9 - 2 * panel_space + dup_space), # degenerate copies - (0.5, 0.9 - 2 * panel_space - dup_space), - (0.2, 0.9 - 3 * panel_space + dup_space), # sub-functionalization - (0.2, 0.9 - 3 * panel_space - dup_space), - (0.5, 0.9 - 3 * panel_space + dup_space), # neo-functionalization - (0.5, 0.9 - 3 * panel_space - dup_space), - (0.8, 0.9 - 3 * panel_space + dup_space), # non-functionalization - (0.8, 0.9 - 3 * panel_space - dup_space), - ] - - default_regulator = "gm" - regulators = [ - default_regulator, - default_regulator, - default_regulator, - "wm", - default_regulator, - "wm", - "gw", - "wb", - default_regulator, - "ww", - default_regulator, - ] - - width = 0.24 - for i, (xx, yy) in enumerate(locs): - regulator = regulators[i] - x1, x2 = xx - 0.5 * width, xx + 0.5 * width - Glyph(root, x1, x2, yy) - if i == 9: # upper copy for non-functionalization - continue - - # coding region - x1, x2 = xx - 0.16 * width, xx + 0.45 * width - Glyph(root, x1, x2, yy, fc="k") - - # two regulatory elements - x1, x2 = xx - 0.4 * width, xx - 0.28 * width - for xx, fc in zip((x1, x2), regulator): - if fc == "w": - continue - - DoubleCircle(root, xx, yy, fc=fc) - - rotation = 30 - tip = 0.02 - if i == 0: - ya = yy + tip - root.text(x1, ya, "Flower", rotation=rotation, va="bottom") - root.text(x2, ya, "Root", rotation=rotation, va="bottom") - elif i == 7: - ya = yy + tip - root.text(x2, ya, "Leaf", rotation=rotation, va="bottom") - - # Draw arrows between panels (center) - arrow_dist = 0.08 - ar_xpos = 0.5 - for ar_ypos in (0.3, 0.53, 0.76): - root.annotate( - " ", - (ar_xpos, ar_ypos), - (ar_xpos, ar_ypos + arrow_dist), - arrowprops=arrowprops, - ) - - ar_ypos = 0.3 - for ar_xpos in (0.2, 0.8): - root.annotate( - " ", (ar_xpos, ar_ypos), (0.5, ar_ypos + arrow_dist), arrowprops=arrowprops - ) - - # Duplication, Degeneration - xx = 0.6 - ys = (0.76, 0.53) - processes = ("Duplication", "Degeneration") - for yy, process in zip(ys, processes): - root.text(xx, yy + 0.02, process, fontweight="bold") - - # Label of fates - xs = (0.2, 0.5, 0.8) - fates = ("Subfunctionalization", "Neofunctionalization", "Nonfunctionalization") - yy = 0.05 - for xx, fate in zip(xs, fates): - RoundLabel(root, xx, yy, fate) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - figname = "demo.pdf" - savefig(figname, dpi=300) - - -if __name__ == "__main__": - main() diff --git a/jcvi/graphics/grabseeds.py b/jcvi/graphics/grabseeds.py deleted file mode 100644 index 756a2d78..00000000 --- a/jcvi/graphics/grabseeds.py +++ /dev/null @@ -1,881 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Image processing pipelines for phenotyping projects. -""" -import json -import os.path as op -import string -import sys - -from collections import Counter -from datetime import date -from math import cos, pi, sin -from typing import Any, List, Optional, Tuple - -import numpy as np - -from ..apps.base import setup_magick_home - -# Attempt to set MACICK_HOME ENV variable if imagemagick installed with homebrew on Mac -setup_magick_home() - -from PIL.Image import open as iopen -from pyefd import elliptic_fourier_descriptors -from pytesseract import image_to_string -from scipy.ndimage import binary_fill_holes, distance_transform_edt -from scipy.optimize import fmin_bfgs as fmin -from skimage.color import gray2rgb, rgb2gray -from skimage.feature import canny, peak_local_max -from skimage.filters import roberts, sobel, threshold_otsu -from skimage.measure import find_contours, regionprops, label -from skimage.morphology import disk, closing -from skimage.segmentation import clear_border, watershed -from wand.image import Image -from webcolors import rgb_to_hex, normalize_integer_triplet - -from ..algorithms.formula import get_kmeans, reject_outliers -from ..apps.base import ( - ActionDispatcher, - OptionParser, - datadir, - logger, - iglob, - mkdir, -) -from ..formats.base import must_open -from ..formats.pdf import cat -from ..utils.webcolors import closest_color - -from .base import ( - Rectangle, - latex, - load_image, - normalize_axes, - plt, - savefig, - set_helvetica_axis, -) - - -np.seterr(all="ignore") - -RGBTuple = Tuple[int, int, int] - - -class Seed(object): - """ - Seed object with metrics. - """ - - def __init__( - self, - imagename: str, - accession: str, - seedno: int, - rgb: RGBTuple, - props: Any, - efds: np.ndarray, - exif: dict, - ): - self.imagename = imagename - self.accession = accession - self.seedno = seedno - y, x = props.centroid - self.x, self.y = int(round(x)), int(round(y)) - self.location = f"{self.x}|{self.y}" - self.area = int(round(props.area)) - self.length = int(round(props.major_axis_length)) - self.width = int(round(props.minor_axis_length)) - self.props = props - self.efds = efds - self.circularity = 4 * pi * props.area / props.perimeter**2 - self.rgb = rgb - self.colorname = closest_color(rgb) - self.datetime = exif.get("exif:DateTimeOriginal", date.today()) - self.rgbtag = triplet_to_rgb(rgb) - self.pixeltag = f"length={self.length} width={self.width} area={self.area}" - self.hashtag = " ".join((self.rgbtag, self.colorname)) - self.calibrated = False - - def __str__(self): - fields = [ - self.imagename, - self.datetime, - self.accession, - self.seedno, - self.location, - self.area, - f"{self.circularity:.2f}", - self.length, - self.width, - self.colorname, - self.rgbtag, - ] - if self.calibrated: - fields += [ - self.pixelcmratio, - self.rgbtransform, - self.correctedlength, - self.correctedwidth, - self.correctedcolorname, - self.correctedrgb, - ] - fields += [",".join(f"{x:.3f}" for x in self.efds)] - return "\t".join(str(x) for x in fields) - - @classmethod - def header(cls, calibrated: bool = False) -> str: - """ - Return header line for the TSV file. - """ - fields = ( - "ImageName DateTime Accession SeedNum Location " - "Area Circularity Length(px) Width(px) ColorName RGB".split() - ) - if calibrated: - fields += ( - "PixelCMratio RGBtransform Length(cm)" - " Width(cm) CorrectedColorName CorrectedRGB".split() - ) - fields += ["EllipticFourierDescriptors"] - return "\t".join(fields) - - def calibrate(self, pixel_cm_ratio: float, tr: np.ndarray): - """ - Calibrate pixel-inch ratio and color adjustment. - """ - self.pixelcmratio = f"{pixel_cm_ratio:.2f}" - self.rgbtransform = ",".join([f"{x:.2f}" for x in tr.flatten()]) - self.correctedlength = f"{self.length / pixel_cm_ratio:.2f}" - self.correctedwidth = f"{self.width / pixel_cm_ratio:.2f}" - correctedrgb = np.dot(tr, np.array(self.rgb)) - self.correctedrgb = triplet_to_rgb(correctedrgb) - self.correctedcolorname = closest_color(correctedrgb) - self.calibrated = True - - -def sam(img: np.ndarray, checkpoint: str) -> List[dict]: - """ - Use Segment Anything Model (SAM) to segment objects. - """ - try: - from segment_anything import sam_model_registry, SamAutomaticMaskGenerator - except ImportError: - logger.fatal("segment_anything not installed. Please install it first.") - sys.exit(1) - - model_type = "vit_h" - if not op.exists(checkpoint): - raise AssertionError( - f"File `{checkpoint}` not found, please specify --sam-checkpoint" - ) - sam = sam_model_registry[model_type](checkpoint=checkpoint) - logger.info("Using SAM model `%s` (%s)", model_type, checkpoint) - mask_generator = SamAutomaticMaskGenerator(sam) - return mask_generator.generate(img) - - -def is_overlapping(mask1: dict, mask2: dict, threshold=0.5): - """ - Check if bounding boxes of mask1 and mask2 overlap more than the given - threshold. - """ - x1, y1, w1, h1 = mask1["bbox"] - x2, y2, w2, h2 = mask2["bbox"] - x_overlap = max(0, min(x1 + w1, x2 + w2) - max(x1, x2)) - y_overlap = max(0, min(y1 + h1, y2 + h2) - max(y1, y2)) - intersection = x_overlap * y_overlap - return intersection / min(w1 * h1, w2 * h2) > threshold - - -def deduplicate_masks(masks: List[dict], threshold=0.5): - """ - Deduplicate masks to retain only the foreground objects. - """ - masks_sorted = sorted(masks, key=lambda x: x["area"]) - retained_masks = [] - - for mask in masks_sorted: - if not any( - is_overlapping(mask, retained_mask, threshold) - for retained_mask in retained_masks - ): - retained_masks.append(mask) - return retained_masks - - -def rgb_to_triplet(rgb: str) -> RGBTuple: - """ - Convert RGB string to triplet. - """ - return tuple([int(x) for x in rgb.split(",")][:3]) - - -def triplet_to_rgb(triplet: RGBTuple) -> str: - """ - Convert triplet to RGB string. - """ - triplet = normalize_integer_triplet(triplet) - return ",".join(str(int(round(x))) for x in triplet) - - -def main(): - - actions = ( - ("batchseeds", "extract seed metrics for each image in a directory"), - ("seeds", "extract seed metrics from one image"), - ("calibrate", "calibrate pixel-inch ratio and color adjustment"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def total_error(x: np.ndarray, colormap: Tuple[Tuple[np.ndarray, np.ndarray]]) -> float: - """ - Calculate total error between observed and expected colors. - """ - xs = np.reshape(x, (3, 3)) - error_squared = sum(np.linalg.norm(np.dot(xs, o) - e) ** 2 for o, e in colormap) - return error_squared**0.5 - - -def calibrate(args): - """ - %prog calibrate calibrate.JPG boxsize - - Calibrate pixel-inch ratio and color adjustment. - - `calibrate.JPG` is the photo containig a colorchecker - - `boxsize` is the measured size for the boxes on printed colorchecker, in - squared centimeter (cm2) units - """ - xargs = args[2:] - p = OptionParser(calibrate.__doc__) - _, args, _ = add_seeds_options(p, args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - imagefile, boxsize = args - boxsize = float(boxsize) - - # Read in color checker - colorcheckerfile = op.join(datadir, "colorchecker.txt") - colorchecker = [] - expected = 0 - with open(colorcheckerfile, encoding="utf-8") as file: - for row in file: - boxes = row.split() - colorchecker.append(boxes) - expected += len(boxes) - - folder = op.split(imagefile)[0] - objects = seeds([imagefile, f"--outdir={folder}"] + xargs) - nseeds = len(objects) - logger.debug("Found %d boxes (expected=%d)", nseeds, expected) - assert ( - expected - 4 <= nseeds <= expected + 4 - ), f"Number of boxes drastically different from {expected}" - - # Calculate pixel-cm ratio - boxes = [t.area for t in objects] - reject = reject_outliers(boxes) - retained_boxes = [b for r, b in zip(reject, boxes) if not r] - mbox = np.median(retained_boxes) # in pixels - pixel_cm_ratio = (mbox / boxsize) ** 0.5 - logger.debug("Median box size: %d pixels. Measured box size: %d cm2", mbox, boxsize) - logger.debug("Pixel-cm ratio: %.2f", pixel_cm_ratio) - - xs = [t.x for t in objects] - ys = [t.y for t in objects] - xs = [float(itemx) for itemx in xs] - ys = [float(itemy) for itemy in ys] - idx_xs = get_kmeans(xs, 6) - idx_ys = get_kmeans(ys, 4) - for xi, yi, s in zip(idx_xs, idx_ys, objects): - s.rank = (yi, xi) - - objects.sort(key=lambda x: x.rank) - - colormap = [] - for s in objects: - x, y = s.rank - observed, expected = s.rgb, rgb_to_triplet(colorchecker[x][y]) - colormap.append((np.array(observed), np.array(expected))) - - # Color transfer - tr0 = np.eye(3).flatten() - print("Initial distance:", total_error(tr0, colormap), file=sys.stderr) - tr = fmin(total_error, tr0, args=(colormap,)) - tr.resize((3, 3)) - print("RGB linear transform:\n", tr, file=sys.stderr) - calib = {"PixelCMratio": pixel_cm_ratio, "RGBtransform": tr.tolist()} - - jsonfile = op.join(folder, "calibrate.json") - fw = must_open(jsonfile, "w") - print(json.dumps(calib, indent=4), file=fw) - fw.close() - logger.debug("Calibration specs written to `%s`.", jsonfile) - - return jsonfile - - -def add_seeds_options(p, args): - """ - Add options to the OptionParser for seeds() and batchseeds() functions. - """ - g1 = p.add_argument_group("Image manipulation") - g1.add_argument("--rotate", default=0, type=int, help="Rotate degrees clockwise") - g1.add_argument( - "--rows", default=":", help="Crop rows e.g. `:800` from first 800 rows" - ) - g1.add_argument( - "--cols", default=":", help="Crop cols e.g. `-800:` from last 800 cols" - ) - g1.add_argument("--labelrows", help="Label rows e.g. `:800` from first 800 rows") - g1.add_argument("--labelcols", help="Label cols e.g. `-800: from last 800 rows") - valid_colors = ("red", "green", "blue", "purple", "yellow", "orange", "INVERSE") - g1.add_argument( - "--changeBackground", - default=0, - choices=valid_colors, - help="Changes background color", - ) - - g2 = p.add_argument_group("Object recognition") - g2.add_argument( - "--minsize", - default=0.2, - type=float, - help="Min percentage of object to image", - ) - g2.add_argument( - "--maxsize", default=20, type=float, help="Max percentage of object to image" - ) - g2.add_argument( - "--count", default=100, type=int, help="Report max number of objects" - ) - g2.add_argument( - "--watershed", - default=False, - action="store_true", - help="Run watershed to segment touching objects", - ) - - g3 = p.add_argument_group("De-noise") - valid_filters = ("canny", "otsu", "roberts", "sam", "sobel") - g3.add_argument( - "--filter", - default="canny", - choices=valid_filters, - help="Edge detection algorithm", - ) - g3.add_argument( - "--sigma", - default=1, - type=int, - help="Canny edge detection sigma, higher for noisy image", - ) - g3.add_argument( - "--kernel", - default=2, - type=int, - help="Edge closure, higher if the object edges are dull", - ) - g3.add_argument( - "--border", default=5, type=int, help="Remove image border of certain pixels" - ) - g3.add_argument( - "--sam-checkpoint", default="sam_vit_h_4b8939.pth", help="SAM checkpoint file" - ) - - g4 = p.add_argument_group("Output") - g4.add_argument("--calibrate", help="JSON file to correct distance and color") - g4.add_argument( - "--edges", - default=False, - action="store_true", - help="Visualize edges in middle PDF panel", - ) - g4.add_argument( - "--outdir", default=".", help="Store intermediate images and PDF in folder" - ) - g4.add_argument("--prefix", help="Output prefix") - g4.add_argument( - "--noheader", default=False, action="store_true", help="Do not print header" - ) - opts, args, iopts = p.set_image_options(args, figsize="12x6", style="white") - - return opts, args, iopts - - -def batchseeds(args): - """ - %prog batchseeds folder - - Extract seed metrics for each image in a directory. - """ - xargs = args[1:] - p = OptionParser(batchseeds.__doc__) - opts, args, _ = add_seeds_options(p, args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (folder,) = args - folder = folder.rstrip("/") - outdir = folder + "-debug" - outfile = folder + "-output.tsv" - assert op.isdir(folder) - images = [] - jsonfile = opts.calibrate or op.join(folder, "calibrate.json") - if not op.exists(jsonfile): - jsonfile = None - for im in iglob(folder, "*.jpg,*.JPG,*.png"): - if im.endswith((".resize.jpg", ".main.jpg", ".label.jpg")): - continue - if op.basename(im).startswith("calibrate"): - continue - images.append(im) - - fw = must_open(outfile, "w") - print(Seed.header(calibrated=bool(jsonfile)), file=fw) - nseeds = 0 - for im in images: - imargs = [im, "--noheader", f"--outdir={outdir}"] + xargs - if jsonfile: - imargs += [f"--calibrate={jsonfile}"] - objects = seeds(imargs) - for o in objects: - print(o, file=fw) - nseeds += len(objects) - fw.close() - logger.debug("Processed %d images.", len(images)) - logger.debug("A total of %d objects written to `%s`.", nseeds, outfile) - - pdfs = iglob(outdir, "*.pdf") - outpdf = folder + "-output.pdf" - cat(pdfs + [f"--outfile={outpdf}"]) - - logger.debug("Debugging information written to `%s`.", outpdf) - return outfile - - -def p_round(n: int, precision: int = 5) -> int: - """ - Round to the nearest precision. - """ - precision = int(precision) - return int(round(n / float(precision))) * precision - - -def pixel_stats(img: List[RGBTuple]) -> RGBTuple: - """ - Get the most common pixel color. - """ - img = [(p_round(r), p_round(g), p_round(b)) for r, g, b in img] - c = Counter(img) - imgx, _ = c.most_common(1)[0] - return imgx - - -def slice_to_ints(s: str, m: int) -> Tuple[int, int]: - """ - Parse slice string. - """ - assert ":" in s - ra, rb = s.split(":") - ra = 0 if ra == "" else int(ra) - rb = m if rb == "" else int(rb) - return ra, rb - - -def convert_background(pngfile: str, new_background: str): - """ - Replace the background color with the specified background color, default is - blue. - """ - if new_background: - _name, _ext = op.splitext(op.basename(pngfile)) - _name += "_bgxform" - newfile = op.join(op.dirname(pngfile), _name + _ext) - - img = iopen(pngfile) - pixels = list(img.getdata()) - - # Get Standard Deviation of RGB - rgb_array = [] - for x in range(255): - rgb_array.append(x) - std_rgb = np.std(rgb_array) * 0.8 - - # Get average color - obcolor = [0, 0, 0] - pixel_values = [] - for t in range(3): - pixel_color = img.getdata(band=t) - for pixel in pixel_color: - if pixel > std_rgb: - pixel_values.append(pixel) - obcolor[t] = sum(pixel_values) // len(pixel_values) - - # Get background color using average color and standard deviation - for t in range(3): - pixel_color = img.getdata(band=t) - seed_pixel_values = [] - for i in pixel_color: - if obcolor[t] - std_rgb < i < obcolor[t] + std_rgb: - seed_pixel_values.append(i) - obcolor[t] = sum(seed_pixel_values) // len(seed_pixel_values) - # Selection of colors based on option parser - nbcolor = [0, 0, 0] - if new_background == "INVERSE": - for t in range(3): - nbcolor[t] = 255 - obcolor[t] - elif new_background == "red": - nbcolor = [255, 0, 0] - - elif new_background == "green": - nbcolor = [0, 255, 0] - - elif new_background == "blue": - nbcolor = [0, 0, 255] - - elif new_background == "yellow": - nbcolor = [255, 255, 0] - - elif new_background == "purple": - nbcolor = [255, 0, 255] - - elif new_background == "orange": - nbcolor = [255, 165, 0] - - # Change Background Color - obcolor = tuple(obcolor) - nbcolor = tuple(nbcolor) - for idx, pixel in enumerate(pixels): - if all(o - std_rgb <= p <= o + std_rgb for o, p in zip(obcolor, pixel)): - pixels[idx] = nbcolor - img.putdata(pixels) - img.save(newfile, "PNG") - return newfile - return pngfile - - -def convert_image( - pngfile: str, - pf: str, - outdir: str = ".", - resize: int = 1000, - img_format: str = "jpeg", - rotate: int = 0, - rows: str = ":", - cols: str = ":", - labelrows: Optional[str] = None, - labelcols: Optional[str] = None, -) -> Tuple[str, str, Optional[str], dict]: - """ - Convert image to JPEG format and resize it. - """ - resizefile = op.join(outdir, pf + ".resize.jpg") - mainfile = op.join(outdir, pf + ".main.jpg") - labelfile = op.join(outdir, pf + ".label.jpg") - img = Image(filename=pngfile) - exif = dict((k, img.metadata[k]) for k in img.metadata if k.startswith("exif:")) - - # Rotation, slicing and cropping of main image - if rotate: - img.rotate(rotate) - if resize: - w, h = img.size - if min(w, h) > resize: - if w < h: - nw, nh = resize, resize * h // w - else: - nw, nh = resize * w // h, resize - img.resize(nw, nh) - logger.debug( - "Image `%s` resized from %dpx:%dpx to %dpx:%dpx", pngfile, w, h, nw, nh - ) - img.format = img_format - img.save(filename=resizefile) - - rimg = img.clone() - if rows != ":" or cols != ":": - w, h = img.size - ra, rb = slice_to_ints(rows, h) - ca, cb = slice_to_ints(cols, w) - # left, top, right, bottom - logger.debug("Crop image to %d:%d %d:%d", ra, rb, ca, cb) - img.crop(ca, ra, cb, rb) - img.format = img_format - img.save(filename=mainfile) - else: - mainfile = resizefile - - # Extract text labels from image - if labelrows or labelcols: - w, h = rimg.size - if labelrows and not labelcols: - labelcols = ":" - if labelcols and not labelrows: - labelrows = ":" - ra, rb = slice_to_ints(labelrows, h) - ca, cb = slice_to_ints(labelcols, w) - logger.debug("Extract label from %d:%d %d:%d", ra, rb, ca, cb) - rimg.crop(ca, ra, cb, rb) - rimg.format = img_format - rimg.save(filename=labelfile) - else: - labelfile = None - - return resizefile, mainfile, labelfile, exif - - -def extract_label(labelfile: str) -> str: - """ - Extract accession number from label image. - """ - accession = image_to_string(iopen(labelfile)) - accession = " ".join(accession.split()) # normalize spaces - accession = "".join(x for x in accession if x in string.printable) - if not accession: - accession = "none" - return accession - - -def efd_feature(contour: np.ndarray) -> np.ndarray: - """ - To use EFD as features, one can write a small wrapper function. - - Based on: https://pyefd.readthedocs.io/en/latest - """ - coeffs = elliptic_fourier_descriptors(contour, normalize=True) - # skip the first three coefficients, which are always 1, 0, 0 - return coeffs.flatten()[3:] - - -def seeds(args): - """ - %prog seeds [pngfile|jpgfile] - - Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image. - """ - p = OptionParser(seeds.__doc__) - p.set_outfile() - opts, args, iopts = add_seeds_options(p, args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (pngfile,) = args - pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0] - sigma, kernel = opts.sigma, opts.kernel - rows, cols = opts.rows, opts.cols - labelrows, labelcols = opts.labelrows, opts.labelcols - ff = opts.filter - calib = opts.calibrate - outdir = opts.outdir - if outdir and outdir != ".": - mkdir(outdir) - if calib: - calib = json.load(must_open(calib)) - pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"] - tr = np.array(tr) - nbcolor = opts.changeBackground - pngfile = convert_background(pngfile, nbcolor) - resizefile, mainfile, labelfile, exif = convert_image( - pngfile, - pf, - outdir=outdir, - rotate=opts.rotate, - rows=rows, - cols=cols, - labelrows=labelrows, - labelcols=labelcols, - ) - oimg = load_image(resizefile) - img = load_image(mainfile) - - _, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1, figsize=(iopts.w, iopts.h)) - # Edge detection - img_gray = rgb2gray(img) - w, h = img_gray.shape - canvas_size = w * h - min_size = int(round(canvas_size * opts.minsize / 100)) - max_size = int(round(canvas_size * opts.maxsize / 100)) - - logger.debug("Running %s edge detection …", ff) - if ff == "canny": - edges = canny(img_gray, sigma=opts.sigma) - elif ff == "otsu": - thresh = threshold_otsu(img_gray) - edges = img_gray > thresh - elif ff == "roberts": - edges = roberts(img_gray) - elif ff == "sobel": - edges = sobel(img_gray) - if ff == "sam": - masks = sam(img, opts.sam_checkpoint) - filtered_masks = [ - mask for mask in masks if min_size <= mask["area"] <= max_size - ] - deduplicated_masks = deduplicate_masks(filtered_masks) - logger.info( - "SAM: %d (raw) → %d (size filtered) → %d (deduplicated)", - len(masks), - len(filtered_masks), - len(deduplicated_masks), - ) - labels = np.zeros(img_gray.shape, dtype=int) - for i, mask in enumerate(deduplicated_masks): - labels[mask["segmentation"]] = i + 1 - labels = clear_border(labels) - else: - edges = clear_border(edges, buffer_size=opts.border) - selem = disk(kernel) - closed = closing(edges, selem) if kernel else edges - filled = binary_fill_holes(closed) - - # Watershed algorithm - if opts.watershed: - distance = distance_transform_edt(filled) - local_maxi = peak_local_max(distance, threshold_rel=0.05, indices=False) - coordinates = peak_local_max(distance, threshold_rel=0.05) - markers, nmarkers = label(local_maxi, return_num=True) - logger.debug("Identified %d watershed markers", nmarkers) - labels = watershed(closed, markers, mask=filled) - else: - labels = label(filled) - - # Object size filtering - logger.debug( - "Find objects with pixels between %d (%.2f%%) and %d (%d%%)", - min_size, - opts.minsize, - max_size, - opts.maxsize, - ) - - # Plotting - ax1.set_title("Original picture") - ax1.imshow(oimg) - - params = rf"{ff}, $\sigma$={sigma}, $k$={kernel}" - if opts.watershed: - params += ", watershed" - ax2.set_title(f"Edge detection\n({params})") - if ff != "sam": - closed = gray2rgb(closed) - ax2_img = labels - if opts.edges: - ax2_img = closed - elif opts.watershed: - ax2.plot(coordinates[:, 1], coordinates[:, 0], "g.") - ax2.imshow(ax2_img, cmap=iopts.cmap) - - ax3.set_title("Object detection") - ax3.imshow(img) - - filename = op.basename(pngfile) - if labelfile: - accession = extract_label(labelfile) - else: - accession = pf - - # Calculate region properties - rp = regionprops(labels) - rp = [x for x in rp if min_size <= x.area <= max_size] - rp.sort(key=lambda x: x.area, reverse=True) - nb_labels = len(rp) - logger.debug("A total of %d objects identified.", nb_labels) - objects = [] - for i, props in enumerate(rp): - i += 1 - if i > opts.count: - break - - contour = find_contours(labels == props.label, 0.5)[0] - efds = efd_feature(contour) - y0, x0 = props.centroid - orientation = props.orientation - major, minor = props.major_axis_length, props.minor_axis_length - major_dx = sin(orientation) * major / 2 - major_dy = cos(orientation) * major / 2 - minor_dx = cos(orientation) * minor / 2 - minor_dy = -sin(orientation) * minor / 2 - ax2.plot((x0 - major_dx, x0 + major_dx), (y0 - major_dy, y0 + major_dy), "r-") - ax2.plot((x0 - minor_dx, x0 + minor_dx), (y0 - minor_dy, y0 + minor_dy), "r-") - ax2.plot(contour[:, 1], contour[:, 0], "y-") - - npixels = int(props.area) - # Sample the center of the blob for color - d = min(int(round(minor / 2 * 0.35)) + 1, 50) - x0d, y0d = int(round(x0)), int(round(y0)) - square = img[(y0d - d) : (y0d + d), (x0d - d) : (x0d + d)] - pixels = [] - for row in square: - pixels.extend(row) - logger.debug( - "Seed #%d: %d pixels (%d sampled) - %.2f%%", - i, - npixels, - len(pixels), - 100.0 * npixels / canvas_size, - ) - - rgb = pixel_stats(pixels) - objects.append(Seed(filename, accession, i, rgb, props, efds, exif)) - minr, minc, maxr, maxc = props.bbox - rect = Rectangle( - (minc, minr), maxc - minc, maxr - minr, fill=False, ec="w", lw=1 - ) - ax3.add_patch(rect) - mc, mr = (minc + maxc) // 2, (minr + maxr) // 2 - ax3.text(mc, mr, f"{i}", color="w", ha="center", va="center", size=6) - - for ax in (ax2, ax3): - ax.set_xlim(0, h) - ax.set_ylim(w, 0) - - # Output identified seed stats - ax4.text(0.1, 0.92, f"File: {latex(filename)}", color="g") - ax4.text(0.1, 0.86, f"Label: {latex(accession)}", color="m") - yy = 0.8 - fw = must_open(opts.outfile, "w") - if not opts.noheader: - print(Seed.header(calibrated=calib), file=fw) - for o in objects: - if calib: - o.calibrate(pixel_cm_ratio, tr) - print(o, file=fw) - i = o.seedno - if i > 7: - continue - ax4.text(0.01, yy, str(i), va="center", bbox=dict(fc="none", ec="k")) - ax4.text(0.1, yy, o.pixeltag, va="center") - yy -= 0.04 - ax4.add_patch( - Rectangle((0.1, yy - 0.025), 0.12, 0.05, lw=0, fc=rgb_to_hex(o.rgb)) - ) - ax4.text(0.27, yy, o.hashtag, va="center") - yy -= 0.06 - ax4.text( - 0.1, - yy, - f"(A total of {nb_labels} objects displayed)", - color="darkslategray", - ) - normalize_axes(ax4) - - for ax in (ax1, ax2, ax3): - set_helvetica_axis(ax) - - image_name = op.join(outdir, pf + "." + iopts.format) - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - return objects - - -if __name__ == "__main__": - main() diff --git a/jcvi/graphics/heatmap.py b/jcvi/graphics/heatmap.py deleted file mode 100644 index f7fdb84c..00000000 --- a/jcvi/graphics/heatmap.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog csvfile - -Draw heatmap based on the data in the csv file. In a microarray setting, the -rows represent genes, and columns represent conditions. Some conditions can be -grouped which the script expect to see on the first row when --groups is on:: - -,WT+BL,,,,irx8+BL,,,,OE+BL,,,,WT,,,,irx8,,,,OE,,, -, Day 0,Day 3,Day 6,Day 9, Day 0,Day 3,Day 6,Day 9, Day 0,Day 3,Day 6,Day 9, ... -GAUT12,0.801069878,15.34822591,5.897076869,26.17286587,0,0,0,0,296.1121751, ... -MYB46,0.812252396,31.12495832,11.39240156,44.63179732,4.469148552,57.28160454, ... - -Option --rowgroups requires an additional file that group the genes:: - -I MYB46,GUX1 -II I14H/IRX14-L,IRX10 -III I9H/IRX9-L,IRX14 -IV IRX7,GUX2 -""" - - -import sys - -from itertools import groupby - -import numpy as np - -from ..apps.base import OptionParser - -from .base import mpl, plt, savefig - - -def parse_csv(csvfile, vmin=0, groups=False): - import csv - - reader = csv.reader(open(csvfile)) - if groups: - groups = next(reader)[1:] - # Fill in empty cells in groups - filled_groups = [] - lastg = "" - for g in groups: - g = g.strip() or lastg - filled_groups.append(g) - lastg = g - groups = filled_groups - - rows = [] - cols = next(reader)[1:] - data = [] - for row in reader: - name = row[0] - d = [max(vmin, float(x)) for x in row[1:]] - rows.append(name) - data.append(d) - - data = np.array(data) - - return groups, rows, cols, data - - -def main(): - p = OptionParser(__doc__) - p.add_argument( - "--groups", - default=False, - action="store_true", - help="The first row contains group info", - ) - p.add_argument("--rowgroups", help="Row groupings") - p.add_argument( - "--horizontalbar", - default=False, - action="store_true", - help="Horizontal color bar [default: vertical]", - ) - opts, args, iopts = p.set_image_options(figsize="8x8") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (datafile,) = args - pf = datafile.rsplit(".", 1)[0] - rowgroups = opts.rowgroups - - groups, rows, cols, data = parse_csv(datafile, vmin=1, groups=opts.groups) - cols = [x.replace("ay ", "") for x in cols] - - if rowgroups: - fp = open(rowgroups) - rgroups = [] - for row in fp: - a, b = row.split() - irows = [rows.index(x) for x in b.split(",")] - rgroups.append((a, min(irows), max(irows))) - - plt.rcParams["axes.linewidth"] = 0 - - xstart = 0.18 - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - ax = fig.add_axes([xstart, 0.15, 0.7, 0.7]) - - im = ax.matshow(data, cmap=iopts.cmap, norm=mpl.colors.LogNorm(vmin=1, vmax=10000)) - nrows, ncols = len(rows), len(cols) - - xinterval = 0.7 / ncols - yinterval = 0.7 / max(nrows, ncols) - - plt.xticks(range(ncols), cols, rotation=45, size=10, ha="center") - plt.yticks(range(nrows), rows, size=10) - - for x in ax.get_xticklines() + ax.get_yticklines(): - x.set_visible(False) - - ax.set_xlim(-0.5, ncols - 0.5) - - t = [1, 10, 100, 1000, 10000] - pad = 0.06 - if opts.horizontalbar: - ypos = 0.5 * (1 - nrows * yinterval) - pad - axcolor = fig.add_axes([0.3, ypos, 0.4, 0.02]) - orientation = "horizontal" - else: - axcolor = fig.add_axes([0.9, 0.3, 0.02, 0.4]) - orientation = "vertical" - fig.colorbar(im, cax=axcolor, ticks=t, orientation=orientation) - - if groups: - groups = [(key, len(list(nn))) for key, nn in groupby(groups)] - yy = 0.5 + 0.5 * nrows / ncols * 0.7 + 0.06 - e = 0.005 - sep = -0.5 - - for k, kl in groups: - # Separator in the array area - sep += kl - ax.plot([sep, sep], [-0.5, nrows - 0.5], "w-", lw=2) - # Group labels on the top - kl *= xinterval - root.plot([xstart + e, xstart + kl - e], [yy, yy], "-", color="gray", lw=2) - root.text(xstart + 0.5 * kl, yy + e, k, ha="center", color="gray") - xstart += kl - - if rowgroups: - from jcvi.graphics.glyph import TextCircle - - xpos = 0.04 - tip = 0.015 - assert rgroups - ystart = 1 - 0.5 * (1 - nrows * yinterval) - for gname, start, end in rgroups: - start = ystart - start * yinterval - end = ystart - (end + 1) * yinterval - start -= tip / 3 - end += tip / 3 - - # Bracket the groups - root.plot((xpos, xpos + tip), (start, start), "k-", lw=2) - root.plot((xpos, xpos), (start, end), "k-", lw=2) - root.plot((xpos, xpos + tip), (end, end), "k-", lw=2) - TextCircle(root, xpos, 0.5 * (start + end), gname) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - image_name = pf + "." + opts.cmap + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main() diff --git a/jcvi/graphics/histogram.py b/jcvi/graphics/histogram.py deleted file mode 100644 index 65ad7a25..00000000 --- a/jcvi/graphics/histogram.py +++ /dev/null @@ -1,387 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Use R ggplot2 library to plot histogram, also contains an ASCII histogram (use ---text) when invoking histogram(). -""" -import os.path as op -import sys - -from math import log, ceil -from collections import defaultdict - -import numpy as np - -from ..apps.base import OptionParser, logger -from ..apps.r import RTemplate -from ..formats.base import DictFile - -from .base import asciiplot, quickplot - - -histogram_header = """ -library(ggplot2) -vmin <- $vmin -vmax <- $vmax -data <- read.table('$numberfile', skip=$skip) -data <- data[data >= vmin] -data <- data[data <= vmax] -data <- data.frame($xlabel=data) -m <- ggplot(data, aes(x=$xlabel)) + - theme(plot.title=element_text(size=11, colour="darkblue")) -""" - -histogram_template = ( - histogram_header - + """ -m + geom_histogram(colour="darkgreen", fill="$fill", binwidth=(vmax-vmin)/$bins) + -labs(title='$title') -ggsave('$outfile') -""" -) - -histogram_log_template = ( - histogram_header - + """ -library(scales) -m + geom_histogram(colour="darkgreen", fill="$fill", binwidth=0.33) + -labs(title='$title') + -scale_x_continuous(trans=log${base}_trans()) -ggsave('$outfile') -""" -) - -histogram_multiple_template = """ -library(ggplot2) -vmin <- $vmin -vmax <- $vmax -data <- read.table('$numberfile', header=T, sep="\t", skip=$skip) -""" - -histogram_multiple_template_a = ( - histogram_multiple_template - + """ -m <- ggplot(data, aes(x=$xlabel, fill=grp)) -m + geom_bar(binwidth=(vmax-vmin)/$bins, position="dodge") + -xlim(vmin, vmax) + -labs(title='$title') + -ggsave('$outfile') -""" -) - -histogram_multiple_template_b = ( - histogram_multiple_template - + """ -m <- ggplot(data, aes(x=$xlabel)) -m + geom_histogram(colour="darkgreen", fill="$fill", binwidth=(vmax-vmin)/$bins) + -xlim(vmin, vmax) + -labs(title='$title') + -facet_wrap(~grp) -ggsave('$outfile') -""" -) - - -def loghistogram(data, base=2, title="Counts", summary=False): - """ - bins is a dictionary with key: log(x, base), value: counts. - """ - from jcvi.utils.cbook import percentage - - if summary: - unique = len(data) - total = sum(data) - - # Print out a distribution - print("Unique: {0}".format(percentage(unique, total)), file=sys.stderr) - - bins = defaultdict(int) - for d in data: - logd = int(log(d, base)) - bins[logd] += 1 - - x, y = [], [] - for size, number in sorted(bins.items()): - lb, ub = base**size, base ** (size + 1) - x.append((lb, ub)) - y.append(number) - - asciiplot(x, y, title=title) - - -def get_data(filename, vmin=None, vmax=None, skip=0, col=0): - from jcvi.utils.cbook import SummaryStats - - fp = open(filename) - # Determine the data type - for s in range(skip): - next(fp) - for row in fp: - ntype = float if "." in row else int - break - - fp = open(filename) - for s in range(skip): - next(fp) - - data = np.array([ntype(x.split()[col]) for x in fp]) - s = SummaryStats(data, title=filename) - print(s, file=sys.stderr) - - vmin = min(data) if vmin is None else vmin - vmax = max(data) if vmax is None else vmax - data = data[(data >= vmin) & (data <= vmax)] - - return data, vmin, vmax - - -def stem_leaf_plot(data, vmin, vmax, bins, digit=1, title=None): - """ - Generate stem and leaf plot given a collection of numbers - """ - assert bins > 0 - range = vmax - vmin - step = range * 1.0 / bins - if isinstance(range, int): - step = int(ceil(step)) - - step = step or 1 - - bins = np.arange(vmin, vmax + step, step) - hist, bin_edges = np.histogram(data, bins=bins) - # By default, len(bin_edges) = len(hist) + 1 - bin_edges = bin_edges[: len(hist)] - asciiplot(bin_edges, hist, digit=digit, title=title) - print("Last bin ends in {0}, inclusive.".format(vmax), file=sys.stderr) - - return bin_edges, hist - - -def texthistogram(numberfiles, vmin, vmax, title=None, bins=20, skip=0, col=0, base=0): - - for nf in numberfiles: - logger.debug("Import `%s`.", nf) - data, vmin, vmax = get_data(nf, vmin, vmax, skip=skip, col=col) - if base: - loghistogram(data, base=base, title=title) - else: - stem_leaf_plot(data, vmin, vmax, bins, title=title) - - -def histogram( - numberfile, - vmin, - vmax, - xlabel, - title, - outfmt="pdf", - bins=50, - skip=0, - col=0, - ascii=False, - base=0, - fill="white", -): - """ - Generate histogram using number from numberfile, and only numbers in the - range of (vmin, vmax) - """ - if ascii: - return texthistogram( - [numberfile], - vmin, - vmax, - title=title, - bins=bins, - skip=skip, - col=col, - base=base, - ) - - data, vmin, vmax = get_data(numberfile, vmin, vmax, skip=skip, col=col) - outfile = ( - numberfile + ".base{0}.{1}".format(base, outfmt) - if base - else numberfile + ".pdf" - ) - template = histogram_log_template if base else histogram_template - rtemplate = RTemplate(template, locals()) - rtemplate.run() - - -def histogram_multiple( - numberfiles, - vmin, - vmax, - xlabel, - title, - outfmt="pdf", - tags=None, - bins=20, - skip=0, - ascii=False, - facet=False, - fill="white", - prefix="", -): - """ - Generate histogram using number from numberfile, and only numbers in the - range of (vmin, vmax). First combining multiple files. - """ - if ascii: - return texthistogram(numberfiles, vmin, vmax, title=title, bins=bins, skip=skip) - - newfile = "_".join(op.basename(x).split(".")[0] for x in numberfiles) - - fw = open(newfile, "w") - print("{0}\tgrp".format(xlabel), file=fw) - - if tags: - tags = tags.split(",") - - for i, f in enumerate(numberfiles): - data, va, vb = get_data(f, vmin, vmax, skip=skip) - vmin = min(vmin, va) - vmax = max(vmax, vb) - - fp = open(f) - if tags: - tag = tags[i] - else: - tag = op.basename(f).rsplit(".", 1)[0] - for row in fp: - val = row.strip() - print("\t".join((val, tag)), file=fw) - fw.close() - - numberfile = newfile - outfile = numberfile + "." + outfmt - if prefix: - outfile = prefix + outfile - htemplate = ( - histogram_multiple_template_b if facet else histogram_multiple_template_a - ) - rtemplate = RTemplate(htemplate, locals()) - rtemplate.run() - - -def main(): - """ - %prog numbers1.txt number2.txt ... - - Print histogram of the data files. The data files contain one number per - line. If more than one file is inputted, the program will combine the - histograms into the same plot. - """ - allowed_format = ("emf", "eps", "pdf", "png", "ps", "raw", "rgba", "svg", "svgz") - p = OptionParser(main.__doc__) - p.add_argument("--skip", default=0, type=int, help="skip the first several lines") - p.add_argument("--col", default=0, type=int, help="Get the n-th column") - p.set_histogram() - p.add_argument( - "--tags", - dest="tags", - default=None, - help="tags for data if multiple input files, comma sep", - ) - p.add_argument( - "--ascii", - default=False, - action="store_true", - help="print ASCII text stem-leaf plot", - ) - p.add_argument( - "--base", - default="0", - choices=("0", "2", "10"), - help="use logarithm axis with base, 0 to disable", - ) - p.add_argument( - "--facet", - default=False, - action="store_true", - help="place multiple histograms side-by-side", - ) - p.add_argument("--fill", default="white", help="color of the bin") - p.add_argument( - "--format", - default="pdf", - choices=allowed_format, - help="Generate image of format", - ) - p.add_argument( - "--quick", - default=False, - action="store_true", - help="Use quick plot, assuming bins are already counted", - ) - p.add_argument( - "--noprintstats", - default=False, - action="store_true", - help="Write basic stats when using --quick", - ) - opts, args = p.parse_args() - - if len(args) < 1: - sys.exit(not p.print_help()) - - skip = opts.skip - vmin, vmax = opts.vmin, opts.vmax - bins = opts.bins - xlabel, title = opts.xlabel, opts.title - title = title or args[0] - base = int(opts.base) - fileno = len(args) - - if opts.quick: - assert fileno == 1, "Single input file expected using --quick" - filename = args[0] - figname = filename.rsplit(".", 1)[0] + ".pdf" - data = DictFile(filename, keycast=int, cast=int) - quickplot( - data, - vmin, - vmax, - xlabel, - title, - figname=figname, - print_stats=(not opts.noprintstats), - ) - return - - if fileno == 1: - histogram( - args[0], - vmin, - vmax, - xlabel, - title, - outfmt=opts.format, - bins=bins, - skip=skip, - ascii=opts.ascii, - base=base, - fill=opts.fill, - col=opts.col, - ) - else: - histogram_multiple( - args, - vmin, - vmax, - xlabel, - title, - outfmt=opts.format, - tags=opts.tags, - bins=bins, - skip=skip, - ascii=opts.ascii, - facet=opts.facet, - fill=opts.fill, - ) - - -if __name__ == "__main__": - main() diff --git a/jcvi/graphics/karyotype.py b/jcvi/graphics/karyotype.py deleted file mode 100644 index 723628b7..00000000 --- a/jcvi/graphics/karyotype.py +++ /dev/null @@ -1,476 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog seqids layout - -Illustrate macrosynteny between tracks which represent individual genomes. - -seqids contain the chromosomes to plot. Each line correspond to a track. -layout provides configuration for placement of tracks and mapping file between tracks. - -Layout file example - first section specify how to draw each track. Then the "edges" -section specify which connections to draw. - -# y, xstart, xend, rotation, color, label, va, bed, label_va -.6, .1, .4, 0, m, Grape, top, grape.bed, center -.4, .3, .6, 60, k, Athaliana, top, athaliana.bed, center -# edges -e, 0, 1, athaliana.grape.4x1.simple -""" - - -import sys - -from typing import List, Optional - -from ..apps.base import OptionParser, logger -from ..compara.synteny import SimpleFile -from ..formats.bed import Bed - -from .base import ( - AbstractLayout, - markup, - mpl, - normalize_axes, - plt, - savefig, - update_figname, -) -from .chromosome import Chromosome, HorizontalChromosome -from .glyph import TextCircle -from .synteny import Shade, ymid_offset - - -class LayoutLine(object): - def __init__(self, row, delimiter=",", generank=True): - args = row.rstrip().split(delimiter) - args = [x.strip() for x in args] - - self.empty = False - if len(args) < 8: - self.empty = True - return - self.y = float(args[0]) - self.xstart = float(args[1]) - self.xend = float(args[2]) - self.rotation = int(args[3]) - self.color = args[4] - self.label = args[5] - self.va = args[6] - self.bed = Bed(args[7]) - if len(args) == 9: - self.label_va = args[8] - else: - self.label_va = "center" - self.order = self.bed.order - self.order_in_chr = self.bed.order_in_chr if generank else self.bed.bp_in_chr - - -class Layout(AbstractLayout): - def __init__( - self, filename, delimiter=",", generank=False, seed: Optional[int] = None - ): - super().__init__(filename) - fp = open(filename) - self.edges = [] - for row in fp: - if row[0] == "#": - continue - if row[0] == "e": - args = row.rstrip().split(delimiter) - args = [x.strip() for x in args] - i, j, fn = args[1:4] - if len(args) == 5 and args[4]: - samearc = args[4] - else: - samearc = None - i, j = int(i), int(j) - assert args[0] == "e" - blocks = self.parse_blocks(fn, i) - self.edges.append((i, j, blocks, samearc)) - else: - self.append(LayoutLine(row, delimiter=delimiter, generank=generank)) - - self.assign_colors(seed=seed) - - def parse_blocks(self, simplefile, i): - order = self[i].order - return SimpleFile(simplefile, order=order).blocks - - -MaxSeqids = 16 # above which no labels are written - - -def make_circle_name(sid, rev): - """Extract a succinct label based on sid. - - If there are numbers to be extracted, returns the first group of number. - Otherwise, the first letter is returned. - - If sid is in rev, then '-' gets appended to the label. - - Args: - sid (str): seqid - rev (set[str]): Set of seqids that are reversed - - Returns: - str: Single letter label for the sid - """ - import re - - in_reverse = sid in rev - sid = sid.rsplit("_", 1)[-1] - sid = sid.replace("chr", "").replace("Chr", "") - si = re.findall(r"\d+", sid) - if si: - si = str(int(si[0])) - else: - si = sid[0].upper() - if in_reverse: - si += "-" - return si - - -class Track(object): - def __init__( - self, - ax, - t, - gap=0.01, - height=0.01, - lw=1, - draw=True, - chrstyle="auto", - ): - self.empty = t.empty - if t.empty: - return - - # Copy the data from LayoutLine - self.y = t.y - self.sizes = sizes = t.sizes - self.label = t.label - self.rotation = t.rotation - self.va = t.va - self.label_va = t.label_va - self.color = t.color if t.color != "None" else None - self.seqids = t.seqids - self.bed = t.bed - self.order = t.order - self.order_in_chr = t.order_in_chr - self.rev = t.rev - self.ax = ax - self.height = height - - self.xstart = xstart = t.xstart - self.xend = t.xend - - # Rotation transform - self.x = x = (self.xstart + self.xend) / 2 - y = self.y - self.tr = ( - mpl.transforms.Affine2D().rotate_deg_around(x, y, self.rotation) - + ax.transAxes - ) - self.inv = ax.transAxes.inverted() - - nseqids = len(self.seqids) - if nseqids > MaxSeqids: - gap = min(gap, gap * MaxSeqids / nseqids + 0.001) - self.gap = gap - - rpad = 1 - t.xend - span = 1 - xstart - rpad - gap * (len(sizes) - 1) - self.total = total = sum(sizes.values()) - ratio = span / total - - self.ratio = ratio - self.update_offsets() - self.lw = lw - - if draw: - self.draw(chrstyle=chrstyle) - - def __str__(self): - return self.label - - def draw( - self, - chrstyle="auto", - keep_chrlabels=False, - plot_label=True, - plot_circles=True, - pad=0.03, - vpad=0.09, - ): - if self.empty: - return - - y = self.y - color = self.color - ax = self.ax - xstart = self.xstart - gap = self.gap - va = self.va - nseqids = len(self.seqids) - tr = self.tr - - for i, sid in enumerate(self.seqids): - size = self.sizes[sid] - rsize = self.ratio * size - xend = xstart + rsize - hc = HorizontalChromosome( - ax, - xstart, - xend, - y, - height=self.height, - lw=self.lw, - fc=color, - style=chrstyle, - ) - hc.set_transform(tr) - si = sid if keep_chrlabels else make_circle_name(sid, self.rev) - xx = (xstart + xend) / 2 - xstart = xend + gap - - step = 2 if nseqids <= 40 else 10 - if nseqids >= 2 * MaxSeqids and (i + 1) % step != 0: - continue - if nseqids < 5: - continue - - hpad = -pad if va == "bottom" else pad - if plot_circles: - TextCircle( - ax, - xx, - y + hpad, - si, - fc="w", - color=color, - size=10, - transform=tr, - ) - - label = markup(self.label) - c = color if color != "gainsboro" else "k" - if plot_label: - if self.label_va == "top": - x, y = self.x, self.y + vpad - elif self.label_va == "bottom": - x, y = self.x, self.y - vpad - else: # "center" - x, y = self.xstart - vpad / 2, self.y - ax.text(x, y, label, ha="center", va="center", color=c, transform=tr) - - def update_offsets(self): - self.offsets = {} - xs = self.xstart - gap = self.gap - for sid in self.seqids: - size = self.sizes[sid] - self.offsets[sid] = xs - xs += self.ratio * size + gap - - def get_coords(self, gene): - order_in_chr = self.order_in_chr - seqid, i, _ = order_in_chr[gene] - if seqid not in self.offsets: - return [None, None] - - x = self.offsets[seqid] - if seqid in self.rev: - x += self.ratio * (self.sizes[seqid] - i - 1) - else: - x += self.ratio * i - y = self.y - x, y = self.tr.transform((x, y)) - x, y = self.inv.transform((x, y)) - - return [x, y] - - -class ShadeManager(object): - def __init__(self, ax, tracks, layout, heightpad=0, style="curve"): - self.style = style - for i, j, blocks, samearc in layout.edges: - # if same track (duplication shades), shall we draw above or below? - self.draw_blocks( - ax, blocks, tracks[i], tracks[j], samearc=samearc, heightpad=heightpad - ) - - def draw_blocks( - self, ax, blocks, atrack, btrack, samearc: Optional[str], heightpad=0 - ): - for a, b, c, d, _, _, highlight in blocks: - p = atrack.get_coords(a), atrack.get_coords(b) - q = btrack.get_coords(c), btrack.get_coords(d) - if p[0] is None or q[0] is None: - continue - - ymid_pad = ymid_offset(samearc) - if heightpad: - if atrack.y < btrack.y: - p[0][1] = p[1][1] = atrack.y + heightpad - q[0][1] = q[1][1] = btrack.y - heightpad - else: - p[0][1] = p[1][1] = atrack.y - heightpad - q[0][1] = q[1][1] = btrack.y + heightpad - - zorder = 2 if highlight else 1 - lw = 1 if highlight else 0 - Shade( - ax, - p, - q, - ymid_pad, - highlight=highlight, - alpha=1, - fc="gainsboro", - ec="gainsboro", - lw=lw, - zorder=zorder, - style=self.style, - ) - - -class Karyotype(object): - def __init__( - self, - root, - seqidsfile, - layoutfile, - gap=0.01, - height=0.01, - lw=1, - generank=True, - sizes=None, - heightpad=0, - keep_chrlabels=False, - plot_label=True, - plot_circles=True, - shadestyle="curve", - chrstyle="auto", - seed: Optional[int] = None, - ): - layout = Layout(layoutfile, generank=generank, seed=seed) - - fp = open(seqidsfile) - # Strip the reverse orientation tag for e.g. chr3- - di = lambda x: x[:-1] if x[-1] == "-" else x - # Comments can cause layout and seqids to be out of sync - # https://github.com/tanghaibao/jcvi/issues/676 - for i, row in enumerate(_ for _ in fp if not _.startswith("#") and _.strip()): - logger.info("Processing `%s` (track %d)", row.strip(), i) - t = layout[i] - # There can be comments in seqids file: - # https://github.com/tanghaibao/jcvi/issues/335 - seqids = row.split("#", 1)[0].rstrip().split(",") - t.rev = set(x[:-1] for x in seqids if x[-1] == "-") - seqids = [di(x) for x in seqids] - if t.empty: - continue - - bed = t.bed - self.generank = generank - if generank: - sz = dict((x, len(list(bed.sub_bed(x)))) for x in seqids) - else: - sz = sizes or dict( - (x, max(z.end for z in list(bed.sub_bed(x)))) for x in seqids - ) - assert sz is not None, "sizes not available and cannot be inferred" - t.seqids = seqids - # validate if all seqids are non-empty - for k, v in sz.items(): - if v == 0: - logger.error("Size of `%s` is empty. Please check", k) - t.sizes = sz - - tracks = [] - for lo in layout: - if lo.empty: - continue - tr = Track(root, lo, gap=gap, height=height, lw=lw, draw=False) - tracks.append(tr) - - ShadeManager(root, tracks, layout, heightpad=heightpad, style=shadestyle) - - for tr in tracks: - tr.draw( - chrstyle=chrstyle, - keep_chrlabels=keep_chrlabels, - plot_label=plot_label, - plot_circles=plot_circles, - ) - - self.tracks = tracks - self.layout = layout - - -def main(args: List[str]): - p = OptionParser(__doc__) - p.add_argument( - "--basepair", - default=False, - action="store_true", - help="Use base pair position instead of gene rank", - ) - p.add_argument( - "--keep-chrlabels", - default=False, - action="store_true", - help="Keep chromosome labels", - ) - p.add_argument( - "--nocircles", - default=False, - action="store_true", - help="Do not plot chromosome circles", - ) - p.add_argument( - "--shadestyle", - default="curve", - choices=Shade.Styles, - help="Style of syntenic wedges", - ) - p.add_argument( - "--chrstyle", - default="auto", - choices=Chromosome.Styles, - help="Style of chromosome labels", - ) - p.set_outfile("karyotype.pdf") - opts, args, iopts = p.set_image_options(args, figsize="8x7") - - if len(args) != 2: - sys.exit(not p.print_help()) - - seqidsfile, layoutfile = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - Karyotype( - root, - seqidsfile, - layoutfile, - keep_chrlabels=opts.keep_chrlabels, - plot_circles=(not opts.nocircles), - shadestyle=opts.shadestyle, - chrstyle=opts.chrstyle, - generank=(not opts.basepair), - seed=iopts.seed, - ) - normalize_axes(root) - - image_name = update_figname(opts.outfile, iopts.format) - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - return image_name - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/jcvi/graphics/landscape.py b/jcvi/graphics/landscape.py deleted file mode 100644 index ad09c86d..00000000 --- a/jcvi/graphics/landscape.py +++ /dev/null @@ -1,1316 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Create chromosome landscape plots that are similar to the ones used in soybean -and sorghum paper. -""" - - -import os.path as op -import sys - -from collections import Counter, OrderedDict, defaultdict -from typing import Dict, List, Tuple, Optional - -import numpy as np -import seaborn as sns - -from ..algorithms.matrix import moving_sum -from ..apps.base import ActionDispatcher, OptionParser, logger -from ..formats.base import BaseFile, DictFile, LineFile, must_open -from ..formats.bed import Bed, bins, get_nbins -from ..formats.sizes import Sizes -from ..utils.cbook import autoscale, human_size, percentage - -from .base import ( - CirclePolygon, - Colormap, - Extent, - Rectangle, - adjust_extent, - adjust_spines, - human_readable_base, - latex, - markup, - normalize_axes, - plt, - savefig, - set_human_axis, - ticker, -) -from .chromosome import HorizontalChromosome - -# Colors picked from Schmutz soybean genome paper using ColorPic -palette = ["#ACABD5", "#DBF0F5", "#3EA77A", "#FBF5AB", "#C162A6"] + list("rgbymck") -gray = "#CCCCCB" -Registration = { - "Gypsy": "LTR-RT/Gypsy", - "Copia": "LTR-RT/Copia", - "hAT": "DNA-TE/hAT", - "Helitron": "DNA-TE/Helitron", - "Tourist": "DNA-TE/Tourist", - "Introns": "Genes (introns)", - "Exons": "Genes (exons)", -} - -# Consider a depth of 5 as minimum covered depth -MIN_COVERED_DEPTH = 5 - - -class BinLine: - def __init__(self, row): - args = row.split() - self.chr = args[0] - self.len = float(args[1]) - self.binlen = int(args[2]) - - def __str__(self): - return "\t".join(str(x) for x in (self.chr, self.len, self.binlen)) - - def subtract(self, o): - self.binlen -= o.len - - -class BinFile(LineFile): - def __init__(self, filename): - super().__init__(filename) - self.mapping = defaultdict(list) - - fp = open(filename, encoding="utf-8") - for row in fp: - b = BinLine(row) - self.append(b) - chr, len, binlen = b.chr, b.len, b.binlen - self.mapping[chr].append((len, binlen)) - fp.close() - - -class ChrInfoLine: - def __init__(self, row, delimiter=","): - args = [x.strip() for x in row.split(delimiter)] - self.name = args[0] - self.color = args[1] - if len(args) > 2: - self.new_name = args[2] - else: - self.new_name = self.name - - -class ChrInfoFile(BaseFile, OrderedDict): - def __init__(self, filename, delimiter=","): - super().__init__(filename) - with open(filename, encoding="utf-8") as fp: - for row in fp: - if row[0] == "#": - continue - line = ChrInfoLine(row, delimiter=delimiter) - self[line.name] = line - - -class TitleInfoLine: - def __init__(self, row, delimiter=","): - args = [x.strip() for x in row.split(delimiter)] - self.name = args[0] - self.title = args[1] - self.subtitle = None - if len(args) > 2: - self.subtitle = args[2] - - -class TitleInfoFile(BaseFile, OrderedDict): - def __init__(self, filename, delimiter=","): - super().__init__(filename) - with open(filename, encoding="utf-8") as fp: - for row in fp: - if row[0] == "#": - continue - line = TitleInfoLine(row, delimiter=delimiter) - self[line.name] = line - - -def main(): - - actions = ( - ("composite", "combine line plots, feature bars and alt-bars"), - ("depth", "show per chromosome depth plot across genome"), - ("heatmap", "similar to stack but adding heatmap"), - ("mosdepth", "plot depth vs. coverage per chromosome"), - ("multilineplot", "combine multiple line plots in one vertical stack"), - ("stack", "create landscape plot with genic/te composition"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def parse_distfile(filename): - """Parse mosdepth dist.txt file. The file has contents like: - - #chr start end depth (header added here for clarity) - chr01A 0 50000 31.00 - chr01A 50000 100000 36.00 - chr01A 100000 150000 280.00 - chr01A 150000 200000 190.00 - - Args: - filename (str): Path to the file. - """ - dists = defaultdict(Counter) - with must_open(filename) as fp: - for row in fp: - chromosome, _, _, depth = row.split() - depth = int(float(depth)) - dists[chromosome][depth] += 1 - logger.debug("Loaded %d seqids", len(dists)) - return dists - - -def parse_groupsfile(filename): - """Parse groupsfile, which contains the tracks to be plotted - in the vertically stacked mosdepth plot. - - chr01A,chr01B g,m - chr02A,chr02B g,m - chr03A,chr03B g,m - - Args: - filename (str): Path to the groups file. - """ - groups = [] - with open(filename, encoding="utf-8") as fp: - for row in fp: - chrs, colors = row.split() - groups.append((chrs.split(","), colors.split(","))) - logger.debug("Loaded %d groups", len(groups)) - return groups - - -def cumarray_to_array(ar): - """Convert cumulative array to normal array. - - Args: - ar (List): List of numbers - """ - ans = [] - for i, x in enumerate(ar): - ans.append(x if i == 0 else (ar[i] - ar[i - 1])) - return ans - - -def mosdepth(args): - """ - %prog mosdepth mosdepth.global.dist.txt groups - - Plot depth vs. coverage per chromosome. Inspired by mosdepth plot. See also: - https://github.com/brentp/mosdepth - """ - sns.set_style("darkgrid") - - p = OptionParser(mosdepth.__doc__) - p.add_argument("--maxdepth", default=100, type=int, help="Maximum depth to plot") - p.add_argument( - "--logscale", default=False, action="store_true", help="Use log-scale on depth" - ) - opts, args, iopts = p.set_image_options(args, style="dark", figsize="6x8") - - if len(args) != 2: - sys.exit(p.print_help()) - - # Read in datasets - distfile, groupsfile = args - dists = parse_distfile(distfile) - groups = parse_groupsfile(groupsfile) - logscale = opts.logscale - - # Construct a composite figure with N tracks indicated in the groups - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - rows = len(groups) - ypad = 0.05 - yinterval = (1 - 2 * ypad) / (rows + 1) - yy = 1 - ypad - - for group_idx, (chrs, colors) in enumerate(groups): - yy -= yinterval - ax = fig.add_axes((0.15, yy, 0.7, yinterval * 0.85)) - for c, color in zip(chrs, colors): - cdata = dists[c].items() - logger.debug("Importing %d records for %s", len(cdata), c) - cx, cy = zip(*sorted(cdata)) - ax.plot(cx, cy, "-", color=color) - if logscale: - ax.set_xscale("log", basex=2) - ax.set_xlim(1 if logscale else 0, opts.maxdepth) - ax.get_yaxis().set_visible(False) - if group_idx != rows - 1: - ax.get_xaxis().set_visible(False) - - # Add legend to the right of the canvas - label_pad = 0.02 - label_yy = yy + yinterval - for c, color in zip(chrs, colors): - label_yy -= label_pad - root.text(0.92, label_yy, c, color=color, ha="center", va="center") - - root.text( - 0.1, - 0.5, - "Proportion of bases at coverage", - rotation=90, - color="darkslategray", - ha="center", - va="center", - ) - root.text(0.5, 0.05, "Coverage", color="darkslategray", ha="center", va="center") - normalize_axes(root) - adjust_spines(ax, ["bottom"], outward=True) - - pf = "mosdepth" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def draw_depth( - root, - ax, - bed: Bed, - chrinfo: dict = {}, - defaultcolor: str = "k", - sepcolor: str = "w", - maxdepth: int = 100, - logscale: bool = False, - title: Optional[str] = None, - subtitle: Optional[str] = None, - median_line: bool = True, - draw_seqids: bool = True, - calculate_coverage: bool = False, - roi: Optional[List[Tuple[str, int]]] = None, -): - """Draw depth plot on the given axes, using data from bed - - Args: - root (matplotlib.Axes): Canvas axes - ax (matplotlib.Axes): Axes to plot data on - bed (Bed): Bed data from mosdepth - chrinfo (ChrInfoFile): seqid => color, new name - defaultcolor (str): matplotlib-compatible color for data points - sepcolor (str): matplotlib-compatible color for chromosome breaks - maxdepth (int): Upper limit of the y-axis (depth) - title (str): Title of the figure, to the right of the axis - subtitle (str): Subtitle of the figure, just below title - """ - if chrinfo is None: - chrinfo = {} - sizes = bed.max_bp_in_chr - seqids = chrinfo.keys() if chrinfo else sizes.keys() - starts = {} - ends = {} - label_positions = [] - start = 0 - end = 0 - for seqid in seqids: - if seqid not in sizes: - continue - starts[seqid] = start - end = start + sizes[seqid] - ends[seqid] = end - label_positions.append((seqid, (start + end) / 2)) - start = end - xsize = end - - # Extract plotting data - data = [] - data_by_seqid = defaultdict(list) - total_bp = 0 - covered_bp = 0 - for b in bed: - seqid = b.seqid - if seqid not in starts: - continue - # chr01A 2000000 3000000 113.00 - x = starts[seqid] + (b.start + b.end) / 2 - y = float(b.accn) - c = chrinfo[seqid].color if seqid in chrinfo else "k" - data.append((x, y, c)) - data_by_seqid[seqid].append(y) - if y >= MIN_COVERED_DEPTH: - covered_bp += b.end - b.start - total_bp += b.end - b.start - logger.debug("cov: %s", percentage(covered_bp, total_bp, precision=0)) - - x, y, c = zip(*data) - ax.scatter( - x, - y, - c=c, - edgecolors="none", - s=8, - lw=0, - ) - logger.debug("Obtained %d data points with depth data", len(data)) - - # Per seqid median - medians = {} - for seqid, values in data_by_seqid.items(): - c = chrinfo[seqid].color if seqid in chrinfo else defaultcolor - seqid_start = starts[seqid] - seqid_end = ends[seqid] - seqid_median = np.median(values) - medians[seqid] = seqid_median - if median_line: - ax.plot( - (seqid_start, seqid_end), - (seqid_median, seqid_median), - "-", - lw=4, - color=c, - alpha=0.5, - ) - - # Vertical lines for all the breaks - for pos in starts.values(): - ax.plot((pos, pos), (0, maxdepth), "-", lw=1, color=sepcolor) - - # Beautify the numeric axis - for tick in ax.get_xticklines() + ax.get_yticklines(): - tick.set_visible(False) - - median_depth_y = 0.88 - chr_label_y = 0.08 - rotation = 20 if len(label_positions) > 10 else 0 - for seqid, position in label_positions: - xpos = 0.1 + position * 0.8 / xsize - c = chrinfo[seqid].color if seqid in chrinfo else defaultcolor - newseqid = chrinfo[seqid].new_name if seqid in chrinfo else seqid - if draw_seqids: - root.text( - xpos, - chr_label_y, - newseqid, - color=c, - ha="center", - va="center", - rotation=rotation, - ) - seqid_median = medians[seqid] - if median_line: - root.text( - xpos, - median_depth_y, - str(int(seqid_median)), - color=c, - ha="center", - va="center", - ) - - # Plot the regions of interest - if roi: - for chrom, pos, name in roi: - if chrom not in starts: - continue - x = starts[chrom] + pos - # TODO: Remove this special case - color = {"II": "tomato", "low qual": "g"}.get(name, "gray") - ax.plot((x, x), (0, maxdepth), "-", lw=2, color=color) - - # Add an arrow to the right of the plot, indicating these are median depths - if median_line: - root.text( - 0.91, - 0.88, - r"$\leftarrow$median", - color="lightslategray", - va="center", - ) - - if title: - root.text( - 0.95, - 0.5, - markup(title), - color="darkslategray", - ha="center", - va="center", - size=15, - ) - if subtitle: - root.text( - 0.95, - 0.375, - markup(subtitle), - color="darkslategray", - ha="center", - va="center", - size=15, - ) - if calculate_coverage: - cov_pct = percentage(covered_bp, total_bp, precision=0, mode=None) - root.text( - 0.95, - 0.25, - latex(f"cov: {cov_pct}"), - color="darkslategray", - ha="center", - va="center", - size=15, - ) - - ax.set_xticks([]) - ax.set_xlim(0, xsize) - if logscale: - ax.set_yscale("log", basey=2) - ax.set_ylim(1 if logscale else 0, maxdepth) - ax.set_ylabel("Depth") - - set_human_axis(ax) - plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="gray", size=10) - normalize_axes(root) - - -def read_roi(roi_file: str) -> Dict[str, List[str]]: - """ - Read the regions of interest file, and return a dict of filename => regions. - """ - roi = defaultdict(list) - with open(roi_file, encoding="utf-8") as fp: - for row in fp: - filename, region, name = row.strip().split(",")[:3] - chrom, start_end = region.split(":", 1) - start, end = start_end.split("-") - region = (chrom, (int(start) + int(end)) // 2, name) - roi[filename].append(region) - logger.info("Read %d regions of interest", len(roi)) - return roi - - -def draw_multi_depth( - root, - panel_roots, - panel_axes, - bedfiles: List[str], - chrinfo_file: str, - titleinfo_file: str, - maxdepth: int, - logscale: bool, - median_line: bool = True, - calculate_coverage: bool = False, - roi: Optional[str] = None, -): - """ - Draw multiple depth plots on the same canvas. - """ - chrinfo = ChrInfoFile(chrinfo_file) if chrinfo_file else {} - titleinfo = TitleInfoFile(titleinfo_file) if titleinfo_file else {} - npanels = len(bedfiles) - yinterval = 1.0 / npanels - ypos = 1 - yinterval - roi = read_roi(roi) if roi else {} - for i, (bedfile, panel_root, panel_ax) in enumerate( - zip(bedfiles, panel_roots, panel_axes) - ): - pf = op.basename(bedfile).split(".", 1)[0] - bed = Bed(bedfile) - - if ypos > 0.001: - root.plot((0.02, 0.98), (ypos, ypos), "-", lw=2, color="lightgray") - - title = titleinfo.get(bedfile, pf.split("_", 1)[0]) - subtitle = None - if isinstance(title, TitleInfoLine): - subtitle = title.subtitle - title = title.title - - draw_seqids = i in (0, npanels - 1) - draw_depth( - panel_root, - panel_ax, - bed, - chrinfo=chrinfo, - maxdepth=maxdepth, - logscale=logscale, - title=title, - subtitle=subtitle, - median_line=median_line, - draw_seqids=draw_seqids, - calculate_coverage=calculate_coverage, - roi=roi.get(bedfile), - ) - ypos -= yinterval - - normalize_axes(root) - - -def depth(args): - """ - %prog depth *.regions.bed.gz - - Plot the mosdepth regions BED file. We recommend to generate this BED file - by (please adjust the --by parameter to your required resolution): - - $ mosdepth --no-per-base --use-median --fast-mode --by 1000000 sample.wgs - sample.bam - - Use --chrinfo to specify a colormap between seqid, desired color, and - optionally a new name. For example: - - chr01A, #c51b7d, 1A - chr01B, #4d9221, 1B - ... - - Only seqids that are in the colormap will be plotted, in the order that's - given in the file. When --colormap is not set, every seqid will be drawn in - black. - - Can take multiple BED files as input and then plot all of them in a - composite figure. - """ - p = OptionParser(depth.__doc__) - p.add_argument( - "--chrinfo", help="Comma-separated mappings between seqid, color, new_name" - ) - p.add_argument( - "--titleinfo", - help="Comma-separated titles mappings between filename, title", - ) - p.add_argument("--maxdepth", default=100, type=int, help="Maximum depth to show") - p.add_argument( - "--logscale", default=False, action="store_true", help="Use log-scale on depth" - ) - p.add_argument( - "--no-median-line", - default=False, - action="store_true", - help="Do not plot median depth line", - ) - p.add_argument( - "--calculate-coverage", - default=False, - action="store_true", - help="Calculate genome coverage", - ) - p.add_argument( - "--roi", - help="File that contains regions of interest, format: filename, chr:start-end", - ) - p.set_outfile("depth.pdf") - opts, args, iopts = p.set_image_options(args, style="dark", figsize="14x4") - - if len(args) < 1: - sys.exit(not p.print_help()) - - bedfiles = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - npanels = len(bedfiles) - yinterval = 1.0 / npanels - ypos = 1 - yinterval - panel_roots, panel_axes = [], [] - for _ in range(npanels): - panel_root = root if npanels == 1 else fig.add_axes((0, ypos, 1, yinterval)) - panel_ax = fig.add_axes((0.1, ypos + 0.2 * yinterval, 0.8, 0.65 * yinterval)) - panel_roots.append(panel_root) - panel_axes.append(panel_ax) - ypos -= yinterval - - draw_multi_depth( - root, - panel_roots, - panel_axes, - bedfiles, - opts.chrinfo, - opts.titleinfo, - opts.maxdepth, - opts.logscale, - median_line=not opts.no_median_line, - calculate_coverage=opts.calculate_coverage, - roi=opts.roi, - ) - - image_name = opts.outfile - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - return image_name - - -def add_window_options(p): - """ - Add options for window plotting. - """ - p.add_argument("--window", default=500000, type=int, help="Size of window") - p.add_argument("--shift", default=100000, type=int, help="Size of shift") - p.add_argument("--subtract", help="Subtract bases from window") - p.add_argument( - "--nomerge", default=False, action="store_true", help="Do not merge features" - ) - - -def check_window_options(opts): - """ - Check the window options, and return the values. - """ - window = opts.window - shift = opts.shift - subtract = opts.subtract - assert window % shift == 0, "--window must be divisible by --shift" - logger.debug( - "Line/stack-plot options: window=%d shift=%d subtract=%s", - window, - shift, - subtract, - ) - merge = not opts.nomerge - - return window, shift, subtract, merge - - -def get_beds(s: List[str], binned: bool = False) -> List[str]: - """ - Get the bed files for each feature, and return them as a list. - """ - return [x + ".bed" for x in s] if not binned else [x for x in s] - - -def linearray(binfile, chr, window, shift): - mn = binfile.mapping[chr] - m, _ = zip(*mn) - - m = np.array(m, dtype=float) - w = window // shift - m = moving_sum(m, window=w) - return m - - -def lineplot(ax, binfiles, nbins, chr, window, shift, color="br"): - assert len(binfiles) <= 2, "A max of two line plots are supported" - - t = np.arange(nbins) - bf = binfiles[0] - m = linearray(bf, chr, window, shift) - ax.plot(t, m, "{0}-".format(color[0]), lw=2) - - formatter = ticker.FuncFormatter( - lambda x, pos: human_readable_base(int(x) * shift, pos) - ) - ax.xaxis.set_major_formatter(formatter) - for tl in ax.get_xticklabels(): - tl.set_color("darkslategray") - - label = bf.filename.split(".")[0] - perw = "per {0}".format(human_size(window, precision=0)) - ax.set_ylabel(label + " " + perw, color=color[0]) - - if len(binfiles) == 2: - ax2 = ax.twinx() - bf = binfiles[1] - m = linearray(bf, chr, window, shift) - ax2.plot(t, m, "{0}-".format(color[1]), lw=2) - # Differentiate tick labels through colors - for tl in ax.get_yticklabels(): - tl.set_color(color[0]) - for tl in ax2.get_yticklabels(): - tl.set_color(color[1]) - - label = bf.filename.split(".")[0] - ax2.set_ylabel(label + " " + perw, color=color[1]) - - ax.set_xlim(0, nbins) - - -def composite(args): - """ - %prog composite fastafile chr1 - - Combine line plots, feature bars and alt-bars, different data types - specified in options. Inputs must be BED-formatted. Three types of viz are - currently supported: - - --lines: traditional line plots, useful for plotting feature freq - --bars: show where the extent of features are - --altbars: similar to bars, yet in two alternating tracks, e.g. scaffolds - """ - p = OptionParser(composite.__doc__) - p.add_argument("--lines", help="Features to plot in lineplot") - p.add_argument("--bars", help="Features to plot in bars") - p.add_argument("--altbars", help="Features to plot in alt-bars") - p.add_argument( - "--fatten", - default=False, - action="store_true", - help="Help visualize certain narrow features", - ) - p.add_argument( - "--mode", - default="span", - choices=("span", "count", "score"), - help="Accumulate feature based on", - ) - add_window_options(p) - opts, args, iopts = p.set_image_options(args, figsize="8x5") - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, chr = args - window, shift, _, merge = check_window_options(opts) - linebeds, barbeds, altbarbeds = [], [], [] - fatten = opts.fatten - if opts.lines: - lines = opts.lines.split(",") - linebeds = get_beds(lines) - if opts.bars: - bars = opts.bars.split(",") - barbeds = get_beds(bars) - if opts.altbars: - altbars = opts.altbars.split(",") - altbarbeds = get_beds(altbars) - - linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode, merge=merge) - - margin = 0.12 - clen = Sizes(fastafile).mapping[chr] - nbins, _ = get_nbins(clen, shift) - - plt.rcParams["xtick.major.size"] = 0 - plt.rcParams["ytick.major.size"] = 0 - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - root.text(0.5, 0.95, chr, ha="center", color="darkslategray") - - xstart, xend = margin, 1 - margin - xlen = xend - xstart - ratio = xlen / clen - # Line plots - ax = fig.add_axes((xstart, 0.6, xlen, 0.3)) - lineplot(ax, linebins, nbins, chr, window, shift) - - # Bar plots - yy = 0.5 - yinterval = 0.08 - xs = lambda x: xstart + ratio * x - r = 0.01 - fattend = 0.0025 - for bb in barbeds: - root.text(xend + 0.01, yy, bb.split(".")[0], va="center") - HorizontalChromosome(root, xstart, xend, yy, height=0.02) - bb = Bed(bb) - for b in bb: - start, end = xs(b.start), xs(b.end) - span = end - start - if fatten and span < fattend: - span = fattend - - root.add_patch( - Rectangle((start, yy - r), span, 2 * r, lw=0, fc="darkslategray") - ) - yy -= yinterval - - # Alternative bar plots - offset = r / 2 - for bb in altbarbeds: - root.text(xend + 0.01, yy, bb.split(".")[0], va="center") - bb = Bed(bb) - for b in bb: - start, end = xs(b.start), xs(b.end) - span = end - start - if span < 0.0001: - continue - offset = -offset - root.add_patch( - Rectangle( - (start, yy + offset), end - start, 0.003, lw=0, fc="darkslategray" - ) - ) - yy -= yinterval - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - image_name = chr + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def multilineplot(args): - """ - %prog multilineplot fastafile chr1 - - Combine multiple line plots in one vertical stack - Inputs must be BED-formatted. - - --lines: traditional line plots, useful for plotting feature freq - """ - p = OptionParser(multilineplot.__doc__) - p.add_argument("--lines", help="Features to plot in lineplot") - p.add_argument("--colors", help="List of colors matching number of input bed files") - p.add_argument( - "--mode", - default="span", - choices=("span", "count", "score"), - help="Accumulate feature based on", - ) - p.add_argument( - "--binned", - default=False, - action="store_true", - help="Specify whether the input is already binned; " - + "if True, input files are considered to be binfiles", - ) - p.add_argument("--ymax", type=int, help="Set Y-axis max") - add_window_options(p) - opts, args, iopts = p.set_image_options(args, figsize="8x5") - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, chr = args - window, shift, _, merge = check_window_options(opts) - linebeds = [] - colors = opts.colors - if opts.lines: - lines = opts.lines.split(",") - assert len(colors) == len(lines), ( - "Number of chosen colors must match" + " number of input bed files" - ) - linebeds = get_beds(lines, binned=opts.binned) - - linebins = get_binfiles( - linebeds, fastafile, shift, mode=opts.mode, binned=opts.binned, merge=merge - ) - - clen = Sizes(fastafile).mapping[chr] - nbins, _ = get_nbins(clen, shift) - - plt.rcParams["xtick.major.size"] = 0 - plt.rcParams["ytick.major.size"] = 0 - plt.rcParams["figure.figsize"] = iopts.w, iopts.h - - fig, axarr = plt.subplots(nrows=len(lines)) - if len(linebeds) == 1: - axarr = (axarr,) - fig.suptitle(latex(chr), color="darkslategray") - - for i, ax in enumerate(axarr): - lineplot( - ax, - [linebins[i]], - nbins, - chr, - window, - shift, - color="{0}{1}".format(colors[i], "r"), - ) - - if opts.ymax: - ax.set_ylim(0, opts.ymax) - - plt.subplots_adjust(hspace=0.5) - - image_name = chr + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def draw_heatmaps( - fig, - root, - root_extent: Extent, - fastafile: str, - chr: str, - stacks: List[str], - heatmaps: List[str], - window: int, - shift: int, - cmap: Colormap, - subtract: Optional[int] = None, - merge: bool = False, - meres: Optional[str] = None, -): - """ - Draw heatmap for the given chromosome. - """ - stackbeds = get_beds(stacks) - heatmapbeds = get_beds(heatmaps) - stackbins = get_binfiles( - stackbeds, fastafile, shift, subtract=subtract, merge=merge - ) - heatmapbins = get_binfiles( - heatmapbeds, fastafile, shift, subtract=subtract, merge=merge - ) - - margin = 0.06 - inner = 0.015 - clen = Sizes(fastafile).mapping[chr] - - # Gauge - ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin) - yinterval = 0.3 - xx = margin - yy = 1 - margin - yy -= yinterval - xlen = clen / ratio - cc = chr - if "_" in chr: - ca, cb = chr.split("_") - cc = ca[0].upper() + cb - - root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) - extent = (xx, yy, xlen, yinterval - inner) - adjusted = adjust_extent(extent, root_extent) - ax = fig.add_axes(adjusted) - - nbins, _ = get_nbins(clen, shift) - - owindow = clen / 100 - if owindow > window: - window = owindow // shift * shift - - stackplot(ax, stackbins, nbins, palette, chr, window, shift) - ax.text( - 0.05, - 0.9, - cc, - va="top", - zorder=100, - transform=ax.transAxes, - bbox=dict(boxstyle="round", fc="w", alpha=0.5), - ) - - # Legends - xx += xlen + 0.01 - yspace = (yinterval - inner) / (len(stackbins) + 1) - yy = 1 - margin - yinterval - for s, p in zip(stacks, palette): - s = s.replace("_", " ") - s = Registration.get(s, s) - - yy += yspace - root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) - root.text(xx + 1.5 * inner, yy, s, size=10) - - yh = 0.05 # Heatmap height - # Heatmaps - xx = margin - yy = 1 - margin - yinterval - inner - for s, p in zip(heatmaps, heatmapbins): - s = s.replace("_", " ") - s = Registration.get(s, s) - - yy -= yh - m = stackarray(p, chr, window, shift) - - Y = np.array([m, m]) - root.imshow( - Y, - extent=(xx, xx + xlen, yy, yy + yh - inner), - interpolation="nearest", - aspect="auto", - cmap=cmap, - ) - root.text(xx + xlen + 0.01, yy, s, size=10) - - yy -= yh - - if meres: - bed = Bed(meres) - for b in bed: - if b.seqid != chr: - continue - pos = (b.start + b.end) / 2 - cpos = pos / ratio - xx = margin + cpos - accn = b.accn.capitalize() - root.add_patch(CirclePolygon((xx, yy), radius=0.01, fc="m", ec="m")) - root.text(xx + 0.014, yy, accn, va="center", color="m") - - normalize_axes(root) - - -def heatmap(args): - """ - %prog heatmap fastafile chr1 - - Combine stack plot with heatmap to show abundance of various tracks along - given chromosome. Need to give multiple beds to --stacks and --heatmaps - """ - p = OptionParser(heatmap.__doc__) - p.add_argument( - "--stacks", - default="Exons,Introns,DNA_transposons,Retrotransposons", - help="Features to plot in stackplot", - ) - p.add_argument( - "--heatmaps", - default="Copia,Gypsy,hAT,Helitron,Introns,Exons", - help="Features to plot in heatmaps", - ) - p.add_argument("--meres", default=None, help="Extra centromere / telomere features") - add_window_options(p) - opts, args, iopts = p.set_image_options(args, figsize="8x5") - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, chr = args - window, shift, subtract, merge = check_window_options(opts) - - stacks = opts.stacks.split(",") - heatmaps = opts.heatmaps.split(",") - - fig = plt.figure(1, (iopts.w, iopts.h)) - root_extent = (0, 0, 1, 1) - root = fig.add_axes(root_extent) - - draw_heatmaps( - fig, - root, - root_extent, - fastafile, - chr, - stacks, - heatmaps, - window, - shift, - iopts.cmap, - subtract, - merge, - meres=opts.meres, - ) - - image_name = chr + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def draw_gauge(ax, margin: float, maxl: int, rightmargin: Optional[float] = None): - """ - Draw a gauge on the top of the canvas, showing the scale of the chromosome. - """ - rightmargin = rightmargin or margin - ax.plot([margin, 1 - rightmargin], [1 - margin, 1 - margin], "k-", lw=2) - - best_stride = autoscale(maxl) - nintervals = maxl / best_stride - - xx, yy = margin, 1 - margin - tip = 0.005 - xinterval = (1 - margin - rightmargin) / nintervals - l = human_size(best_stride) - if l[-1] == "b": - suffix = target = l[-2:] - - for i in range(0, maxl + 1, best_stride): - l = human_size(i, precision=0, target=target) - if l[-1] == "b": - l, suffix = l[:-2], l[-2:] - ax.plot([xx, xx], [yy, yy + tip], "k-", lw=2) - ax.text(xx, yy + 2 * tip, l, ha="center", size=13) - xx += xinterval - - xx += 4 * tip - xinterval - ax.text(xx + tip, yy + 2 * tip, suffix) - - return best_stride / xinterval - - -def get_binfiles( - inputfiles: List[str], - fastafile: str, - shift: int, - mode: str = "span", - subtract: Optional[int] = None, - binned: bool = False, - merge: bool = True, -): - """ - Get binfiles from input files. If not binned, then bin them first. - """ - if not binned: - binopts = [f"--binsize={shift}"] - binopts.append(f"--mode={mode}") - if subtract: - binopts.append(f"--subtract={subtract}") - if not merge: - binopts.append("--nomerge") - binfiles = [bins([x, fastafile] + binopts) for x in inputfiles if op.exists(x)] - else: - binfiles = inputfiles - binfiles = [BinFile(x) for x in binfiles] - - return binfiles - - -def stackarray(binfile: BinFile, chr: str, window: int, shift: int): - """ - Get stack array from binfile for the given chr. - """ - mn = binfile.mapping[chr] - m, n = zip(*mn) - - m = np.array(m, dtype=float) - n = np.array(n, dtype=float) - - w = window // shift - nw = m.shape[0] - if nw < w: - logger.info("%s length < window, using %d bins instead of %d", chr, nw, w) - w = nw - m = moving_sum(m, window=w) - n = moving_sum(n, window=w) - m /= n - - return m - - -def stackplot( - ax, - binfiles: List[BinFile], - nbins: int, - palette: List[str], - chr: str, - window: int, - shift: int, -): - """ - Plot stackplot on the given axes, using data from binfiles. - """ - t = np.arange(nbins, dtype=float) + 0.5 - m = np.zeros(nbins, dtype=float) - zorders = range(10)[::-1] - for binfile, p, z in zip(binfiles, palette, zorders): - s = stackarray(binfile, chr, window, shift) - m += s - ax.fill_between(t, m, color=p, lw=0, zorder=z) - - ax.set_xlim(0, nbins) - ax.set_ylim(0, 1) - ax.set_axis_off() - - -def draw_stacks( - fig, - root, - root_extent: Extent, - stacks: List[str], - fastafile: str, - window: int, - shift: int, - top: int, - merge: bool = True, - subtract: Optional[int] = None, - switch: Optional[DictFile] = None, -): - """ - Draw stack plot. - """ - bedfiles = get_beds(stacks) - binfiles = get_binfiles(bedfiles, fastafile, shift, subtract=subtract, merge=merge) - - sizes = Sizes(fastafile) - s = list(sizes.iter_sizes())[:top] - maxl = max(x[1] for x in s) - margin = 0.08 - inner = 0.02 # y distance between tracks - - # Gauge - ratio = draw_gauge(root, margin, maxl) - - # Per chromosome - yinterval = (1 - 2 * margin) / (top + 1) - xx = margin - yy = 1 - margin - for chr, clen in s: - yy -= yinterval - xlen = clen / ratio - cc = chr - if "_" in chr: - ca, cb = chr.split("_") - cc = ca[0].upper() + cb - - if switch and cc in switch: - cc = "\n".join((cc, f"({switch[cc]})")) - - extent = (xx, yy, xlen, yinterval - inner) - adjusted = adjust_extent(extent, root_extent) - root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) - ax = fig.add_axes(adjusted) - - nbins, _ = get_nbins(clen, shift) - - stackplot(ax, binfiles, nbins, palette, chr, window, shift) - root.text( - xx - 0.04, yy + 0.5 * (yinterval - inner), cc, ha="center", va="center" - ) - - # Legends - yy -= yinterval - xx = margin - for b, p in zip(bedfiles, palette): - b = b.rsplit(".", 1)[0].replace("_", " ") - b = Registration.get(b, b) - - root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) - xx += 2 * inner - root.text(xx, yy, b, size=13) - xx += len(b) * 0.015 + inner - - normalize_axes(root) - - -def stack(args): - """ - %prog stack fastafile - - Create landscape plots that show the amounts of genic sequences, and repetitive - sequences along the chromosomes. - """ - p = OptionParser(stack.__doc__) - p.add_argument("--top", default=10, type=int, help="Draw the first N chromosomes") - p.add_argument( - "--stacks", - default="Exons,Introns,DNA_transposons,Retrotransposons", - help="Features to plot in stackplot", - ) - p.add_argument("--switch", help="Change chr names based on two-column file") - add_window_options(p) - opts, args, iopts = p.set_image_options(args, figsize="8x8") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - top = opts.top - window, shift, subtract, merge = check_window_options(opts) - switch = opts.switch - if switch: - switch = DictFile(opts.switch) - - stacks = opts.stacks.split(",") - - fig = plt.figure(1, (iopts.w, iopts.h)) - root_extent = (0, 0, 1, 1) - root = fig.add_axes(root_extent) - - draw_stacks( - fig, - root, - root_extent, - stacks, - fastafile, - window, - shift, - top, - merge, - subtract, - switch, - ) - - pf = fastafile.rsplit(".", 1)[0] - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - return image_name - - -if __name__ == "__main__": - main() diff --git a/jcvi/graphics/mummerplot.py b/jcvi/graphics/mummerplot.py deleted file mode 100644 index 4d54cde6..00000000 --- a/jcvi/graphics/mummerplot.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Wrapper for mummerplot. Selecting a subset of queries and references to plot -main features in the dot plot. -""" -import os.path as op -import sys - -from ..apps.base import OptionParser, logger, sh -from ..formats.base import SetFile -from ..formats.coords import Coords, filter -from ..formats.sizes import Sizes - - -def writeXfile(ids, sizes_dict, filename): - fw = open(filename, "w") - for q in ids: - print("\t".join(str(x) for x in (q, sizes_dict[q], "+")), file=fw) - - logger.debug("%d ids written to `%s`.", len(ids), filename) - fw.close() - - -def main(args): - """ - %prog deltafile - - Plot one query. Extract the references that have major matches to this - query. Control "major" by option --refcov. - """ - p = OptionParser(main.__doc__) - p.add_argument("--refids", help="Use subset of contigs in the ref") - p.add_argument( - "--refcov", - default=0.01, - type=float, - help="Minimum reference coverage", - ) - p.add_argument( - "--all", - default=False, - action="store_true", - help="Plot one pdf file per ref in refidsfile", - ) - p.add_argument( - "--color", - default="similarity", - choices=("similarity", "direction", "none"), - help="Color the dots based on", - ) - p.add_argument( - "--nolayout", - default=False, - action="store_true", - help="Do not rearrange contigs", - ) - p.set_align(pctid=0, hitlen=0) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (deltafile,) = args - reffasta, queryfasta = open(deltafile).readline().split() - color = opts.color - layout = not opts.nolayout - prefix = op.basename(deltafile).split(".")[0] - qsizes = Sizes(queryfasta).mapping - rsizes = Sizes(reffasta).mapping - - refs = SetFile(opts.refids) if opts.refids else set(rsizes.keys()) - refcov = opts.refcov - pctid = opts.pctid - hitlen = opts.hitlen - deltafile = filter( - [deltafile, "--pctid={0}".format(pctid), "--hitlen={0}".format(hitlen)] - ) - - if opts.all: - for r in refs: - pdffile = plot_some_queries( - [r], - qsizes, - rsizes, - deltafile, - refcov, - prefix=prefix, - color=color, - layout=layout, - ) - if pdffile: - sh("mv {0} {1}.pdf".format(pdffile, r)) - else: - plot_some_queries( - refs, - qsizes, - rsizes, - deltafile, - refcov, - prefix=prefix, - color=color, - layout=layout, - ) - - -def plot_some_queries( - refs, - qsizes, - rsizes, - deltafile, - refcov, - prefix="out", - color="similarity", - layout=True, -): - - Qfile, Rfile = "Qfile", "Rfile" - coords = Coords(deltafile) - queries = set() - for c in coords: - if c.refcov < refcov: - continue - if c.ref not in refs: - continue - queries.add(c.query) - - if not queries or not refs: - logger.debug("Empty - %s vs. %s", queries, refs) - return None - - if not layout: - queries = sorted(queries) - refs = sorted(refs) - - writeXfile(queries, qsizes, Qfile) - writeXfile(refs, rsizes, Rfile) - - cmd = "mummerplot {0}".format(deltafile) - cmd += " -Rfile {0} -Qfile {1}".format(Rfile, Qfile) - cmd += " --postscript -p {0}".format(prefix) - if layout: - cmd += " --layout" - if color == "similarity": - cmd += " --color" - elif color == "none": - cmd += " --nocolor" - sh(cmd) - - cmd = "ps2pdf {0}.ps {0}.pdf".format(prefix) - sh(cmd) - - return prefix + ".pdf" - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/jcvi/graphics/synteny.py b/jcvi/graphics/synteny.py deleted file mode 100644 index 94c78e53..00000000 --- a/jcvi/graphics/synteny.py +++ /dev/null @@ -1,736 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -%prog mcscan.txt all.bed layout.csv - -Illustrate MCscan multiple collinearity alignments. Use layout.csv to indicate -the positions of tracks. For example: - -#x, y, rotation, ha, va, color, ratio -0.5, 0.6, 0, left, center, g -0.25, 0.7, 45, center, center, m - -With the row ordering corresponding to the column ordering in the MCscan output. - -For "ha" (horizontal alignment), accepted values are: left|right|leftalign|rightalign|center|"" -For "va" (vertical alignment), accepted values are: top|bottom|center|""(empty) -""" - -import sys - -from typing import List, Optional - -import numpy as np - -from matplotlib import transforms -from matplotlib.path import Path - -from ..apps.base import OptionParser, logger -from ..compara.synteny import BlockFile -from ..formats.base import DictFile -from ..formats.bed import Bed -from ..utils.cbook import human_size -from ..utils.validator import validate_in_choices, validate_in_range - -from .base import ( - AbstractLayout, - PathPatch, - markup, - plt, - savefig, -) -from .glyph import ( - BasePalette, - Glyph, - OrientationPalette, - OrthoGroupPalette, - RoundLabel, -) -from .tree import draw_tree, read_trees - - -HorizontalAlignments = ("left", "right", "leftalign", "rightalign", "center", "") -VerticalAlignments = ("top", "bottom", "center", "") -CANVAS_SIZE = 0.65 - - -class LayoutLine(object): - """ - Parse a line in the layout file. The line is in the following format: - - *0.5, 0.6, 0, left, center, g, 1, chr1 - """ - - def __init__(self, row, delimiter=","): - self.hidden = row[0] == "*" - if self.hidden: - row = row[1:] - args = row.rstrip().split(delimiter) - args = [x.strip() for x in args] - self.x = float(args[0]) - validate_in_range(self.x, 0, 1, "XPosition(x) column") - self.y = float(args[1]) - validate_in_range(self.y, 0, 1, "YPosition(y) column") - self.rotation = int(args[2]) - self.ha = args[3] - validate_in_choices( - self.ha, HorizontalAlignments, "HorizontaAlignment(ha) column" - ) - self.va = args[4] - validate_in_choices(self.va, VerticalAlignments, "VerticalAlignment(va) column") - self.color = args[5] - self.ratio = 1 - if len(args) > 6: - self.ratio = float(args[6]) - if len(args) > 7: - self.label = args[7].strip() - else: - self.label = None - if len(args) > 8: - self.label_fontsize = float(args[8]) - else: - self.label_fontsize = 10 - - -class Layout(AbstractLayout): - """ - Parse the layout file. - """ - - def __init__(self, filename, delimiter=",", seed: Optional[int] = None): - super().__init__(filename) - fp = open(filename, encoding="utf-8") - self.edges = [] - for row in fp: - if row[0] == "#": - continue - if row[0] == "e": - args = row.rstrip().split(delimiter) - args = [x.strip() for x in args] - a, b = args[1:3] - if len(args) >= 4 and args[3]: - blockcolor = args[3] - else: - blockcolor = None - if len(args) >= 5 and args[4]: - samearc = args[4] - else: - samearc = None - a, b = int(a), int(b) - assert args[0] == "e" - self.edges.append((a, b, blockcolor, samearc)) - else: - self.append(LayoutLine(row, delimiter=delimiter)) - - self.assign_colors(seed=seed) - - -class Shade(object): - """ - Draw a shade between two tracks. - """ - - Styles = ("curve", "line") - - def __init__( - self, - ax, - a, - b, - ymid_pad: float = 0.0, - highlight=False, - style="curve", - ec="k", - fc="k", - alpha=0.2, - lw=1, - zorder=1, - ): - """Create syntenic wedges between tracks. - - Args: - ax: matplotlib Axes - a (tuple of floats): ((start_x, start_y), (end_x, end_y)) - b (tuple of floats): ((start_x, start_y), (end_x, end_y)) - ymid_pad (float): Adjustment to y-mid position of Bezier controls, curve style only - highlight (bool, optional): Plot this shade if color is specified. Defaults to False. - style (str, optional): Style. Defaults to "curve", must be one of - ("curve", "line") - ec (str, optional): Edge color. Defaults to "k". - fc (str, optional): Face color. Defaults to "k". - alpha (float, optional): Transparency. Defaults to 0.2. - lw (int, optional): Line width. Defaults to 1. - zorder (int, optional): Z-order. Defaults to 1. - """ - fc = fc or "gainsboro" # Default block color is grayish - assert style in self.Styles, f"style must be one of {self.Styles}" - a1, a2 = a - b1, b2 = b - ax1, ay1 = a1 - ax2, ay2 = a2 - bx1, by1 = b1 - bx2, by2 = b2 - if ax1 is None or ax2 is None or bx1 is None or bx2 is None: - return - M, C4, L, CP = Path.MOVETO, Path.CURVE4, Path.LINETO, Path.CLOSEPOLY - if style == "curve": - ymid1 = (ay1 + by1) / 2 + ymid_pad - ymid2 = (ay2 + by2) / 2 + ymid_pad - pathdata = [ - (M, a1), - (C4, (ax1, ymid1)), - (C4, (bx1, ymid1)), - (C4, b1), - (L, b2), - (C4, (bx2, ymid2)), - (C4, (ax2, ymid2)), - (C4, a2), - (CP, a1), - ] - else: - pathdata = [(M, a1), (L, b1), (L, b2), (L, a2), (CP, a1)] - codes, verts = zip(*pathdata) - path = Path(verts, codes) - if highlight: - ec = fc = highlight - - pp = PathPatch(path, ec=ec, fc=fc, alpha=alpha, lw=lw, zorder=zorder) - ax.add_patch(pp) - - -class Region(object): - """ - Draw a region of synteny. - """ - - def __init__( - self, - ax, - ext, - layout, - bed, - scale, - switch=None, - chr_label=True, - loc_label=True, - gene_labels: Optional[set] = None, - genelabelsize=0, - genelabelrotation=25, - pad=0.05, - vpad=0.015, - extra_features=None, - glyphstyle="box", - glyphcolor: BasePalette = OrientationPalette(), - ): - x, y = layout.x, layout.y - ratio = layout.ratio - scale /= ratio - self.y = y - lr = layout.rotation - tr = transforms.Affine2D().rotate_deg_around(x, y, lr) + ax.transAxes - inv = ax.transAxes.inverted() - - start, end, si, ei, chrom, orientation, span = ext - flank = span / scale / 2 - xstart, xend = x - flank, x + flank - self.xstart, self.xend = xstart, xend - - cv = lambda t: xstart + abs(t - startbp) / scale - hidden = layout.hidden - - # Chromosome - if not hidden: - ax.plot((xstart, xend), (y, y), color="gray", transform=tr, lw=2, zorder=1) - - self.genes = genes = bed[si : ei + 1] - startbp, endbp = start.start, end.end - if orientation == "-": - startbp, endbp = endbp, startbp - - if switch: - chrom = switch.get(chrom, chrom) - if layout.label: - chrom = layout.label - - label = "-".join( - ( - human_size(startbp, target="Mb", precision=2)[:-2], - human_size(endbp, target="Mb", precision=2), - ) - ) - - height = 0.012 - self.gg = {} - # Genes - for g in genes: - gstart, gend = g.start, g.end - strand = g.strand - if strand == "-": - gstart, gend = gend, gstart - if orientation == "-": - strand = "+" if strand == "-" else "-" - - x1, x2, a, b = self.get_coordinates(gstart, gend, y, cv, tr, inv) - gene_name = g.accn - self.gg[gene_name] = (a, b) - - color, zorder = ( - glyphcolor.get_color_and_zorder(strand) - if isinstance(glyphcolor, OrientationPalette) - else glyphcolor.get_color_and_zorder(gene_name) - ) - - if hidden: - continue - gp = Glyph( - ax, - x1, - x2, - y, - height, - gradient=False, - fc=color, - style=glyphstyle, - zorder=zorder, - ) - gp.set_transform(tr) - if genelabelsize and (not gene_labels or gene_name in gene_labels): - if genelabelrotation == 0: - text_x = x1 if x1 > x2 else x2 - text_y = y - else: - text_x = (x1 + x2) / 2 - text_y = y + height / 2 + genelabelsize * vpad / 3 - ax.text( - text_x, - text_y, - markup(gene_name), - size=genelabelsize, - rotation=genelabelrotation, - ha="left", - va="center", - color="lightslategray", - ) - - # Extra features (like repeats) - if extra_features: - for g in extra_features: - gstart, gend = g.start, g.end - x1, x2, a, b = self.get_coordinates(gstart, gend, y, cv, tr, inv) - gp = Glyph( - ax, - x1, - x2, - y, - height * 3 / 4, - gradient=False, - fc="#ff7f00", - style=glyphstyle, - zorder=2, - ) - gp.set_transform(tr) - - ha, va = layout.ha, layout.va - - hpad = 0.02 - if ha == "left": - xx = xstart - hpad - ha = "right" - elif ha == "leftalign": - xx = 0.5 - CANVAS_SIZE / 2 - hpad - ha = "right" - elif ha == "right": - xx = xend + hpad - ha = "left" - elif ha == "rightalign": - xx = 0.5 + CANVAS_SIZE / 2 + hpad - ha = "left" - else: - xx = x - ha = "center" - - # Tentative solution to labels stick into glyph - magic = 40.0 - cc = abs(lr) / magic if abs(lr) > magic else 1 - if va == "top": - yy = y + cc * pad - elif va == "bottom": - yy = y - cc * pad - else: - yy = y - - l = np.array((xx, yy)) - trans_angle = ax.transAxes.transform_angles(np.array((lr,)), l.reshape((1, 2)))[ - 0 - ] - lx, ly = l - if not hidden: - bbox = dict(boxstyle="round", fc="w", ec="w", alpha=0.5) - kwargs = dict( - ha=ha, va="center", rotation=trans_angle, bbox=bbox, zorder=10 - ) - - chr_label = markup(chrom) if chr_label else None - loc_label = label if loc_label else None - if chr_label: - if loc_label: - ax.text( - lx, - ly + vpad, - chr_label, - size=layout.label_fontsize, - color=layout.color, - **kwargs, - ) - ax.text( - lx, - ly - vpad, - loc_label, - color="lightslategrey", - size=layout.label_fontsize, - **kwargs, - ) - else: - ax.text(lx, ly, chr_label, color=layout.color, **kwargs) - - def get_coordinates(self, gstart, gend, y, cv, tr, inv): - """ - Get coordinates of a gene. - """ - x1, x2 = cv(gstart), cv(gend) - a, b = tr.transform((x1, y)), tr.transform((x2, y)) - a, b = inv.transform(a), inv.transform(b) - return x1, x2, a, b - - -def ymid_offset(samearc: Optional[str], pad: float = 0.05): - """ - Adjustment to ymid, this is useful to adjust the appearance of the Bezier - curves between the tracks. - """ - if samearc == "above": - return 2 * pad - if samearc == "above2": - return 4 * pad - if samearc == "below": - return -2 * pad - if samearc == "below2": - return -4 * pad - return 0 - - -class Synteny(object): - """ - Draw the synteny plot. - """ - - def __init__( - self, - fig, - root, - datafile, - bedfile, - layoutfile, - switch=None, - tree=None, - extra_features=None, - chr_label: bool = True, - loc_label: bool = True, - gene_labels: Optional[set] = None, - genelabelsize: int = 0, - genelabelrotation: int = 25, - pad: float = 0.05, - vpad: float = 0.015, - scalebar: bool = False, - shadestyle: str = "curve", - glyphstyle: str = "arrow", - glyphcolor: str = "orientation", - seed: Optional[int] = None, - prune_features=True, - ): - _, h = fig.get_figwidth(), fig.get_figheight() - bed = Bed(bedfile) - order = bed.order - bf = BlockFile(datafile) - self.layout = lo = Layout(layoutfile, seed=seed) - switch = DictFile(switch, delimiter="\t") if switch else None - if extra_features: - extra_features = Bed(extra_features) - - exts = [] - extras = [] - for i in range(bf.ncols): - ext = bf.get_extent(i, order) - exts.append(ext) - if extra_features: - start, end, _, _, chrom, _, span = ext - start, end = start.start, end.end # start, end coordinates - ef = list(extra_features.extract(chrom, start, end)) - - # Pruning removes minor features with < 0.1% of the region - if prune_features: - ef_pruned = [x for x in ef if x.span >= span / 1000] - logger.info( - "Extracted %d features (%d after pruning)", - len(ef), - len(ef_pruned), - ) - extras.append(ef_pruned) - else: - logger.info("Extracted %d features", len(ef)) - extras.append(ef) - - maxspan = max(exts, key=lambda x: x[-1])[-1] - scale = maxspan / CANVAS_SIZE - - self.gg = gg = {} - self.rr = [] - ymids = [] - glyphcolor = ( - OrientationPalette() - if glyphcolor == "orientation" - else OrthoGroupPalette(bf.grouper()) - ) - for i in range(bf.ncols): - ext = exts[i] - ef = extras[i] if extras else None - r = Region( - root, - ext, - lo[i], - bed, - scale, - switch, - gene_labels=gene_labels, - genelabelsize=genelabelsize, - genelabelrotation=genelabelrotation, - chr_label=chr_label, - loc_label=loc_label, - vpad=vpad, - extra_features=ef, - glyphstyle=glyphstyle, - glyphcolor=glyphcolor, - ) - self.rr.append(r) - # Use tid and accn to store gene positions - gg.update(dict(((i, k), v) for k, v in r.gg.items())) - ymids.append(r.y) - - for i, j, blockcolor, samearc in lo.edges: - ymid_pad = ymid_offset(samearc, pad) - for ga, gb, h in bf.iter_pairs(i, j): - a, b = gg[(i, ga)], gg[(j, gb)] - Shade( - root, a, b, ymid_pad, fc=blockcolor, lw=0, alpha=1, style=shadestyle - ) - - for ga, gb, h in bf.iter_pairs(i, j, highlight=True): - a, b = gg[(i, ga)], gg[(j, gb)] - Shade( - root, - a, - b, - ymid_pad, - alpha=1, - highlight=h, - zorder=2, - style=shadestyle, - ) - - if scalebar: - logger.info("Build scalebar (scale=%.3f)", scale) - # Find the best length of the scalebar - ar = [1, 2, 5] - candidates = ( - [1000 * x for x in ar] - + [10000 * x for x in ar] - + [100000 * x for x in ar] - ) - # Find the one that's close to an optimal canvas size - dists = [(abs(x / scale - 0.12), x) for x in candidates] - dist, candidate = min(dists) - dist = candidate / scale - x, y, yp = 0.22, 0.92, 0.005 - a, b = x - dist / 2, x + dist / 2 - lsg = "lightslategrey" - root.plot([a, a], [y - yp, y + yp], "-", lw=2, color=lsg) - root.plot([b, b], [y - yp, y + yp], "-", lw=2, color=lsg) - root.plot([a, b], [y, y], "-", lw=2, color=lsg) - root.text( - x, - y + 0.02, - human_size(candidate, precision=0), - ha="center", - va="center", - ) - - if tree: - trees = read_trees(tree) - ntrees = len(trees) - logger.debug("A total of %d trees imported.", ntrees) - xiv = 1.0 / ntrees - yiv = 0.3 - xstart = 0 - ystart = min(ymids) - 0.4 - for i in range(ntrees): - ax = fig.add_axes([xstart, ystart, xiv, yiv]) - label, outgroup, color, tx = trees[i] - draw_tree( - ax, - tx, - outgroup=outgroup, - rmargin=0.4, - leaffont=11, - treecolor=color, - supportcolor=color, - leafcolor=color, - ) - xstart += xiv - RoundLabel(ax, 0.5, 0.3, label, fill=True, fc="lavender", color=color) - - -def draw_gene_legend( - ax, - x1: float, - x2: float, - ytop: float, - d: float = 0.04, - text: bool = False, - repeat: bool = False, - glyphstyle: str = "box", -): - """ - Draw a legend for gene glyphs. - """ - forward, backward = OrientationPalette.forward, OrientationPalette.backward - ax.plot([x1, x1 + d], [ytop, ytop], ":", color=forward, lw=2) - ax.plot([x1 + d], [ytop], ">", color=forward, mec=forward) - ax.plot([x2, x2 + d], [ytop, ytop], ":", color=backward, lw=2) - ax.plot([x2], [ytop], "<", color=backward, mec="g") - if text: - ax.text(x1 + d / 2, ytop + d / 2, "gene (+)", ha="center") - ax.text(x2 + d / 2, ytop + d / 2, "gene (-)", ha="center") - if repeat: - xr = (x1 + x2 + d) / 2 - Glyph( - ax, - xr - d / 2, - xr + d / 2, - ytop, - 0.012 * 3 / 4, - gradient=False, - fc="#ff7f00", - style=glyphstyle, - zorder=2, - ) - ax.text(xr, ytop + d / 2, "repeat", ha="center") - - -def main(args: List[str]): - p = OptionParser(__doc__) - p.add_argument("--switch", help="Rename the seqid with two-column file") - p.add_argument("--tree", help="Display trees on the bottom of the figure") - p.add_argument("--extra", help="Extra features in BED format") - p.add_argument( - "--genelabels", - help='Show only these gene labels, separated by comma. Example: "At1g12340,At5g54690"', - ) - p.add_argument( - "--genelabelsize", - default=0, - type=int, - help="Show gene labels at this font size, useful for debugging. " - + "However, plot may appear visually crowded. " - + "Reasonably good values are 2 to 6 [Default: disabled]", - ) - p.add_argument( - "--genelabelrotation", - default=25, - type=int, - help="Rotate gene labels at this angle (anti-clockwise), useful for debugging.", - ) - p.add_argument( - "--scalebar", - default=False, - action="store_true", - help="Add scale bar to the plot", - ) - p.add_argument( - "--glyphstyle", - default="box", - choices=Glyph.Styles, - help="Style of feature glyphs", - ) - p.add_argument( - "--glyphcolor", - default="orientation", - choices=Glyph.Palette, - help="Glyph coloring based on", - ) - p.add_argument( - "--shadestyle", - default="curve", - choices=Shade.Styles, - help="Style of syntenic wedges", - ) - p.add_argument( - "--outputprefix", - default="", - help="Prefix for the output file", - ) - p.add_argument( - "--noprune", - default=False, - action="store_true", - help="If set, do not exclude small features from annotation track (<1%% of region)", - ) - opts, args, iopts = p.set_image_options(args, figsize="8x7") - - if len(args) != 3: - sys.exit(not p.print_help()) - - datafile, bedfile, layoutfile = args - switch = opts.switch - tree = opts.tree - gene_labels = None if not opts.genelabels else set(opts.genelabels.split(",")) - prune_features = not opts.noprune - - pf = datafile.rsplit(".", 1)[0] - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - Synteny( - fig, - root, - datafile, - bedfile, - layoutfile, - switch=switch, - tree=tree, - extra_features=opts.extra, - gene_labels=gene_labels, - genelabelsize=opts.genelabelsize, - genelabelrotation=opts.genelabelrotation, - scalebar=opts.scalebar, - shadestyle=opts.shadestyle, - glyphstyle=opts.glyphstyle, - glyphcolor=opts.glyphcolor, - seed=iopts.seed, - prune_features=prune_features, - ) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - outputprefix = opts.outputprefix - if outputprefix: - pf = outputprefix - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - return image_name - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/jcvi/graphics/table.py b/jcvi/graphics/table.py deleted file mode 100644 index 73f9fc92..00000000 --- a/jcvi/graphics/table.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# -# table.py -# graphics -# -# Created by Haibao Tang on 05/25/20 -# Copyright © 2020 Haibao Tang. All rights reserved. -# -import csv -import sys - -from ..apps.base import OptionParser - -from .base import ( - Rectangle, - load_image, - markup, - normalize_axes, - plt, - savefig, -) - - -class CsvTable(list): - def __init__(self, csvfile="table.csv"): - super().__init__() - with open(csvfile) as csvfile: - reader = csv.reader(csvfile, skipinitialspace=True) - self.header = [markup(x) for x in next(reader)] - self.append(self.header) - for row in reader: - is_image_file = row[0].startswith("file://") - if is_image_file: - images = [] - for filenames in row: - images.append( - [ - load_image(filename=f.replace("file://", "")) - for f in filenames.split("|") - ] - ) - self.append(images) - else: - self.append(row) - print(self.header) - - def column_widths(self, total=1): - # Get the maximum width for each column - max_widths = [0] * self.columns - for row in self: - for j, cell in enumerate(row): - if isinstance(cell, list): - continue - max_widths[j] = max(max_widths[j], len(cell)) - total_width = sum(max_widths) - return [x * total / total_width for x in max_widths] - - @property - def rows(self): - return len(self) - - @property - def columns(self): - return len(self.header) - - -def draw_multiple_images_in_rectangle(ax, images, rect, box_width, yinflation=1): - """Draw multiple images in given rectangle. Used by draw_table(). - - Args: - ax (matplotlib axes): matplotlib axes - images (List[image]): List of images - rect (Tuple[float]): (left, bottom, width, height) - box_width (float): Width of the image square - yinflation (float): inflation along the y-axis - """ - n_images = len(images) - left, bottom, width, height = rect - box_start = (width - n_images * box_width) / 2 - left += box_start - bottom += (height - box_width * yinflation) / 2 - for image in images: - extent = (left, left + box_width, bottom, bottom + box_width * yinflation) - ax.imshow(image, extent=extent, aspect="auto") - left += box_width - - -def draw_table(ax, csv_table, extent=(0, 1, 0, 1), stripe_color="beige", yinflation=1): - """Draw table on canvas. - - Args: - ax (matplotlib axes): matplotlib axes - csv_table (CsvTable): Parsed CSV table - extent (tuple, optional): (left, right, bottom, top). Defaults to (0, 1, 0, 1). - stripe_color (str, optional): Stripe color of the table. Defaults to - "beige". - yinflation (float, optional): Inflate on y since imshow aspect ratio - sometimes create warped images. Defaults to 1. - """ - left, right, bottom, top = extent - width = right - left - height = top - bottom - rows = csv_table.rows - column_widths = csv_table.column_widths(width) - print(column_widths) - - yinterval = height / rows - for i, row in enumerate(csv_table): - should_stripe = i % 2 == 0 - contain_images = isinstance(row[0], list) - xstart = left - if contain_images: - box_width = min( - min(column_widths[j] / len(x) for j, x in enumerate(row)), yinterval - ) - for j, cell in enumerate(row): - xinterval = column_widths[j] - xmid = xstart + xinterval / 2 - ymid = top - (i + 0.5) * yinterval - if contain_images: - # There may be multiple images, center them - rect = (xstart, top - (i + 1) * yinterval, xinterval, yinterval) - draw_multiple_images_in_rectangle( - ax, cell, rect, box_width, yinflation=yinflation - ) - should_stripe = False - else: - ax.text( - xmid, - ymid, - cell, - ha="center", - va="center", - ) - - xstart += column_widths[j] - - if not should_stripe: - continue - - # Draw the stripes, extend a little longer horizontally - xpad = 0.01 - ax.add_patch( - Rectangle( - (left - xpad, top - (i + 1) * yinterval), - width + 2 * xpad, - yinterval, - fc=stripe_color, - ec=stripe_color, - ) - ) - - -def main(args): - """ - %prog table.csv - - Render a table on canvas. Input is a CSV file. - """ - p = OptionParser(main.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="7x7") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (csvfile,) = args - pf = csvfile.rsplit(".", 1)[0] - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - csv_table = CsvTable(csvfile) - - draw_table(root, csv_table) - - normalize_axes(root) - - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/jcvi/graphics/tree.py b/jcvi/graphics/tree.py deleted file mode 100644 index 8d96dc77..00000000 --- a/jcvi/graphics/tree.py +++ /dev/null @@ -1,688 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -import sys - -from collections import defaultdict -from itertools import groupby - -from ete3 import Tree - -from ..apps.base import OptionParser, glob, logger -from ..formats.base import LineFile -from ..formats.sizes import Sizes - -from .base import ( - FancyBboxPatch, - Rectangle, - linear_shade, - markup, - normalize_axes, - plt, - savefig, - set3_n, -) -from .glyph import ExonGlyph, TextCircle, get_setups - - -class LeafInfoLine: - def __init__(self, row, delimiter=","): - args = [x.strip() for x in row.split(delimiter)] - self.name = args[0] - self.color = args[1] - self.new_name = None - if len(args) > 2: - self.new_name = args[2] - - -class LeafInfoFile(LineFile): - def __init__(self, filename, delimiter=","): - super().__init__(filename) - self.cache = {} - with open(filename) as fp: - for row in fp: - if row[0] == "#": - continue - line = LeafInfoLine(row, delimiter=delimiter) - self.cache[line.name] = line - - -class WGDInfoLine: - def __init__(self, row, delimiter=",", defaultcolor="#7fc97f"): - args = [x.strip() for x in row.split(delimiter)] - self.node_name = args[0] - self.divergence = float(args[1]) / 100 - self.name = args[2] - self.color = args[3] or defaultcolor - self.style = args[4] - - -class WGDInfoFile(LineFile): - def __init__(self, filename, delimiter=","): - super().__init__(filename) - self.cache = defaultdict(list) - with open(filename) as fp: - for row in fp: - if row[0] == "#": - continue - line = WGDInfoLine(row, delimiter=delimiter) - self.cache[line.node_name].append(line) - - -def truncate_name(name, rule=None): - """ - shorten taxa names for tree display - - Options of rule. This only affects tree display. - - headn (eg. head3 truncates first 3 chars) - - oheadn (eg. ohead3 retains only the first 3 chars) - - tailn (eg. tail3 truncates last 3 chars) - - otailn (eg. otail3 retains only the last 3 chars) - n = 1 ~ 99 - """ - import re - - if rule is None: - return name - - k = re.search("(?<=^head)[0-9]{1,2}$", rule) - if k: - k = k.group(0) - tname = name[int(k) :] - else: - k = re.search("(?<=^ohead)[0-9]{1,2}$", rule) - if k: - k = k.group(0) - tname = name[: int(k)] - else: - k = re.search("(?<=^tail)[0-9]{1,2}$", rule) - if k: - k = k.group(0) - tname = name[: -int(k)] - else: - k = re.search("(?<=^otail)[0-9]{1,2}$", rule) - if k: - k = k.group(0) - tname = name[-int(k) :] - else: - print(truncate_name.__doc__, file=sys.stderr) - raise ValueError("Wrong rule for truncation!") - return tname - - -def draw_wgd_xy(ax, xx, yy, wgdline): - """Draw WGD at (xx, yy) position - - Args: - ax (axis): Matplotlib axes - xx (float): x position - yy (float): y position - wgdline (WGDInfo): WGDInfoLines that contains the styling information - """ - TextCircle( - ax, - xx, - yy, - wgdline.name, - fc=wgdline.color, - radius=0.0225, - color="k", - fontweight="bold", - ) - - -def draw_wgd(ax, y, rescale, name, wgdcache): - """Draw WGD given a name and the WGDInfo cache. - - Args: - ax (matplotlib.axes): matplotlib axes - y (float): y position - rescale (function): Rescale function to generate x position - name (str): Name of the line (usually the taxon/internal name) - wgdcache (Dict): Dictionary containing WGDInfoLines - """ - if not wgdcache or name not in wgdcache: - return - for line in wgdcache[name]: - draw_wgd_xy(ax, rescale(line.divergence), y, line) - - -def draw_tree( - ax, - t, - hpd=None, - margin=0.1, - rmargin=0.2, - ymargin=0.1, - tip=0.01, - treecolor="k", - supportcolor="k", - internal=True, - outgroup=None, - dashedoutgroup=False, - reroot=True, - gffdir=None, - sizes=None, - trunc_name=None, - SH=None, - scutoff=0, - leafcolor="k", - leaffont=12, - leafinfo=None, - wgdinfo=None, - geoscale=False, - groups=[], -): - """ - main function for drawing phylogenetic tree - """ - - if reroot: - if outgroup: - R = t.get_common_ancestor(*outgroup) - else: - # Calculate the midpoint node - R = t.get_midpoint_outgroup() - - if R is not t: - t.set_outgroup(R) - - # By default, the distance to outgroup and non-outgroup is the same - # we re-adjust the distances so that the outgroups will appear - # farthest from everything else - if dashedoutgroup: - a, b = t.children - # Avoid even split - total = a.dist + b.dist - newR = t.get_common_ancestor(*outgroup) - a.dist = 0.9 * total - b.dist = total - a.dist - - farthest, max_dist = t.get_farthest_leaf() - print("max_dist = {}".format(max_dist), file=sys.stderr) - - xstart = margin - ystart = 2 * ymargin - # scale the tree - scale = (1 - margin - rmargin) / max_dist - - def rescale(dist): - return xstart + scale * dist - - def rescale_divergence(divergence): - return rescale(max_dist - divergence) - - num_leaves = len(t.get_leaf_names()) - yinterval = (1 - ystart) / num_leaves - ytop = ystart + (num_leaves - 0.5) * yinterval - - # get exons structures, if any - structures = {} - if gffdir: - gffiles = glob("{0}/*.gff*".format(gffdir)) - setups, ratio = get_setups(gffiles, canvas=rmargin / 2, noUTR=True) - structures = dict((a, (b, c)) for a, b, c in setups) - - if sizes: - sizes = Sizes(sizes).mapping - - coords = {} - i = 0 - color_groups = [] # Used to plot groups to the right of the tree - for n in t.traverse("postorder"): - dist = n.get_distance(t) - xx = rescale(dist) - - if n.is_leaf(): - yy = ystart + i * yinterval - i += 1 - - if trunc_name: - name = truncate_name(n.name, rule=trunc_name) - else: - name = n.name - - if leafinfo and n.name in leafinfo: - line = leafinfo[n.name] - lc = line.color - sname = line.new_name - else: - lc = leafcolor - sname = None - lc = lc or "k" - sname = sname or name.replace("_", "-") - # if color is given as "R,G,B" - if "," in lc: - lc = [float(x) for x in lc.split(",")] - - ax.text( - xx + tip, - yy, - markup(sname), - va="center", - fontstyle="italic", - size=leaffont, - color=lc, - ) - color_groups.append((lc, yy, xx)) - - gname = n.name.split("_")[0] - if gname in structures: - mrnabed, cdsbeds = structures[gname] - ExonGlyph( - ax, - 1 - rmargin / 2, - yy, - mrnabed, - cdsbeds, - align="right", - ratio=ratio, - ) - if sizes and gname in sizes: - size = sizes[gname] - size = size / 3 - 1 # base pair converted to amino acid - size = "{0}aa".format(size) - ax.text(1 - rmargin / 2 + tip, yy, size, size=leaffont) - - else: - linestyle = "--" if (dashedoutgroup and n is t) else "-" - children = [coords[x] for x in n.get_children()] - children_x, children_y = zip(*children) - min_y, max_y = min(children_y), max(children_y) - # plot the vertical bar - ax.plot((xx, xx), (min_y, max_y), linestyle, color=treecolor) - # plot the horizontal bar - for cx, cy in children: - ax.plot((xx, cx), (cy, cy), linestyle, color=treecolor) - yy = sum(children_y) * 1.0 / len(children_y) - # plot HPD if exists - if hpd and n.name in hpd: - a, b = hpd[n.name] - ax.plot( - (rescale_divergence(a), rescale_divergence(b)), - (yy, yy), - "-", - color="darkslategray", - alpha=0.4, - lw=2, - ) - support = n.support - if support > 1: - support = support / 100.0 - if not n.is_root() and supportcolor: - if support > scutoff / 100.0: - ax.text( - xx, - yy + 0.005, - "{0:d}".format(int(abs(support * 100))), - ha="right", - size=leaffont, - color=supportcolor, - ) - if internal and n.name: - TextCircle(ax, xx, yy, n.name, size=9) - else: # Just a dot - TextCircle(ax, xx, yy, None, radius=0.002) - - coords[n] = (xx, yy) - # WGD info - draw_wgd(ax, yy, rescale_divergence, n.name, wgdinfo) - - # scale bar - if geoscale: - draw_geoscale( - ax, ytop, margin=margin, rmargin=rmargin, yy=ymargin, max_dist=max_dist - ) - else: - br = 0.1 - x1 = xstart + 0.1 - x2 = x1 + br * scale - yy = ymargin - ax.plot([x1, x1], [yy - tip, yy + tip], "-", color=treecolor) - ax.plot([x2, x2], [yy - tip, yy + tip], "-", color=treecolor) - ax.plot([x1, x2], [yy, yy], "-", color=treecolor) - ax.text( - (x1 + x2) / 2, - yy - tip, - "{0:g}".format(br), - va="top", - ha="center", - size=leaffont, - color=treecolor, - ) - - # Groupings on the right, often to used to show groups such as phylogenetic - # clades - if groups: - color_groups.sort() - group_extents = [] - for color, group in groupby(color_groups, key=lambda x: x[0]): - group = list(group) - _, min_yy, xx = min(group) - _, max_yy, xx = max(group) - group_extents.append((min_yy, max_yy, xx, color)) - group_extents.sort(reverse=True) - - for group_name, (min_yy, max_yy, xx, color) in zip(groups, group_extents): - group_color = linear_shade(color, fraction=0.85) - ax.add_patch( - FancyBboxPatch( - (xx, min_yy - yinterval / 2), - rmargin - 0.01, - max_yy - min_yy + yinterval, - boxstyle="round,pad=-0.002,rounding_size=0.005", - fc=group_color, - ec=group_color, - ) - ) - # Add the group label - horizontal = (max_yy - min_yy) < 0.2 - mid_yy = (min_yy + max_yy) / 2 - label_rightend = 0.98 - if horizontal: - ax.text( - label_rightend, - mid_yy, - markup(group_name), - color="darkslategray", - ha="right", - va="center", - ) - else: - ax.text( - label_rightend, - mid_yy, - markup(group_name), - color="darkslategray", - ha="right", - va="center", - rotation=-90, - ) - - if SH is not None: - xs = x1 - ys = (ymargin + yy) / 2.0 - ax.text( - xs, - ys, - "SH test against ref tree: {0}".format(SH), - ha="left", - size=leaffont, - color="g", - ) - - -def read_trees(tree): - from urllib.parse import parse_qs - from jcvi.formats.base import read_block - - trees = [] - - fp = open(tree) - for header, tx in read_block(fp, "#"): - header = parse_qs(header[1:]) - label = header["label"][0].strip('"') - outgroup = header["outgroup"] - (color,) = header.get("color", ["k"]) - trees.append((label, outgroup, color, "".join(tx))) - - return trees - - -def draw_geoscale( - ax, ytop, margin=0.1, rmargin=0.2, yy=0.1, max_dist=3.0, contrast_epochs=True -): - """ - Draw geological epoch on million year ago (mya) scale. - max_dist = 3.0 => max is 300 mya - """ - import math - - a, b = margin, 1 - rmargin # Correspond to 300mya and 0mya - minx, maxx = 0, int(max_dist * 100) - - def cv(x): - return b - (x - b) / (maxx - minx) * (b - a) - - ax.plot((a, b), (yy, yy), "k-") - tick = 0.0125 - scale_start = int(math.ceil(maxx / 25) * 25) - for mya in range(scale_start - 25, 0, -25): - p = cv(mya) - ax.plot((p, p), (yy, yy - tick), "k-") - ax.text(p, yy - 2.5 * tick, str(mya), ha="center", va="center") - - ax.text( - (a + b) / 2, - yy - 5 * tick, - "Time before present (million years)", - ha="center", - va="center", - ) - - # Source: - # https://en.wikipedia.org/wiki/Geological_period - Geo = ( - ("Neogene", 2.588, 23.03), - ("Paleogene", 23.03, 66.0), - ("Cretaceous", 66.0, 145.5), - ("Jurassic", 145.5, 201.3), - ("Triassic", 201.3, 252.17), - ("Permian", 252.17, 298.9), - ("Carboniferous", 298.9, 358.9), - ) - h = 0.05 - for (era, start, end), color in zip(Geo, set3_n(len(Geo))): - if maxx - start < 10: # not visible enough - continue - start, end = cv(start), cv(end) - end = max(a, end) - p = Rectangle((end, yy + tick / 2), abs(start - end), h, lw=1, ec="w", fc=color) - ax.text( - (start + end) / 2, - yy + (tick + h) / 2, - era, - ha="center", - va="center", - size=8, - ) - ax.add_patch(p) - - # We highlight recent epochs for better visualization, we just highlight - # Neogene and Cretaceous as these are more relevant for most phylogeny - if contrast_epochs: - for era, start, end in Geo: - if not era in ("Neogene", "Cretaceous"): - continue - - # Make a beige patch - start, end = cv(start), cv(end) - ax.add_patch( - Rectangle( - (end, yy + tick + h), - abs(start - end), - ytop - yy - tick - h, - fc="beige", - ec="beige", - ) - ) - - -def parse_tree(infile): - """Parse newick formatted tree file and returns a tuple consisted of a - Tree object, and a HPD dictionary if 95%HPD is found in the newick string, - otherwise None - - Args: - infile (str): Path to the tree file - """ - import re - - with open(infile) as fp: - treedata = fp.read() - hpd_re = re.compile(r"( \[&95%HPD=[^[]*\])") - - def repl(match): - repl.count += 1 - name = "N{}".format(repl.count) - lb, ub = re.findall(r"HPD=\{(.*), (.*)\}", match.group(0))[0] - repl.hpd[name] = (float(lb), float(ub)) - return name - - repl.count = 0 - repl.hpd = {} - - treedata, changed = re.subn(hpd_re, repl, treedata) - if repl.hpd: - print(repl.hpd, file=sys.stderr) - - return (Tree(treedata, format=1), repl.hpd) if changed else (Tree(treedata), None) - - -def main(args): - """ - %prog newicktree - - Plot Newick formatted tree. The gene structure can be plotted along if - --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is - on, also show the number of amino acids. - """ - p = OptionParser(main.__doc__) - p.add_argument( - "--outgroup", - help="Outgroup for rerooting the tree. " - + "Use comma to separate multiple taxa.", - ) - p.add_argument( - "--noreroot", - default=False, - action="store_true", - help="Don't reroot the input tree", - ) - p.add_argument( - "--rmargin", default=0.2, type=float, help="Set blank rmargin to the right" - ) - p.add_argument( - "--gffdir", default=None, help="The directory that contain GFF files" - ) - p.add_argument("--sizes", default=None, help="The FASTA file or the sizes file") - p.add_argument("--SH", default=None, type=str, help="SH test p-value") - - group = p.add_argument_group("Node style") - group.add_argument("--leafcolor", default="k", help="Font color for the OTUs") - group.add_argument("--leaffont", default=12, help="Font size for the OTUs") - group.add_argument( - "--leafinfo", help="CSV file specifying the leaves: name,color,new_name" - ) - group.add_argument( - "--scutoff", - default=0, - type=int, - help="cutoff for displaying node support, 0-100", - ) - group.add_argument( - "--no_support", - dest="support", - default=True, - action="store_false", - help="Do not print node support values", - ) - group.add_argument( - "--no_internal", - dest="internal", - default=True, - action="store_false", - help="Do not show internal nodes", - ) - - group = p.add_argument_group("Edge style") - group.add_argument( - "--dashedoutgroup", - default=False, - action="store_true", - help="Gray out the edges connecting outgroup and non-outgroup", - ) - - group = p.add_argument_group("Additional annotations") - group.add_argument( - "--geoscale", - default=False, - action="store_true", - help="Plot geological scale", - ) - group.add_argument( - "--wgdinfo", help="CSV specifying the position and style of WGD events" - ) - group.add_argument( - "--groups", - help="Group names from top to bottom, to the right of the tree. " - "Each distinct color in --leafinfo is considered part of the same group. " - "Separate the names with comma, such as 'eudicots,,monocots,'. " - "Empty names will be ignored for that specific group. ", - ) - - opts, args, iopts = p.set_image_options(args, figsize="10x7") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (datafile,) = args - outgroup = None - reroot = not opts.noreroot - if opts.outgroup: - outgroup = opts.outgroup.split(",") - - hpd = None - if datafile == "demo": - t = Tree( - """(((Os02g0681100:0.1151,Sb04g031800:0.11220)1.0:0.0537, - (Os04g0578800:0.04318,Sb06g026210:0.04798)-1.0:0.08870)1.0:0.06985, - ((Os03g0124100:0.08845,Sb01g048930:0.09055)1.0:0.05332, - (Os10g0534700:0.06592,Sb01g030630:0.04824)-1.0:0.07886):0.09389);""" - ) - else: - logger.debug("Load tree file `%s`", datafile) - t, hpd = parse_tree(datafile) - - pf = datafile.rsplit(".", 1)[0] - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - supportcolor = "k" if opts.support else None - margin, rmargin = 0.1, opts.rmargin # Left and right margin - leafinfo = LeafInfoFile(opts.leafinfo).cache if opts.leafinfo else None - wgdinfo = WGDInfoFile(opts.wgdinfo).cache if opts.wgdinfo else None - - draw_tree( - root, - t, - hpd=hpd, - margin=margin, - rmargin=rmargin, - ymargin=margin, - supportcolor=supportcolor, - internal=opts.internal, - outgroup=outgroup, - dashedoutgroup=opts.dashedoutgroup, - reroot=reroot, - gffdir=opts.gffdir, - sizes=opts.sizes, - SH=opts.SH, - scutoff=opts.scutoff, - leafcolor=opts.leafcolor, - leaffont=opts.leaffont, - leafinfo=leafinfo, - wgdinfo=wgdinfo, - geoscale=opts.geoscale, - groups=opts.groups.split(",") if opts.groups else [], - ) - - normalize_axes(root) - - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/jcvi/graphics/wheel.py b/jcvi/graphics/wheel.py deleted file mode 100644 index 13fc3dc7..00000000 --- a/jcvi/graphics/wheel.py +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Wheel plot that shows continuous data in radial axes. -""" -import sys - -from math import degrees -from collections import OrderedDict -from itertools import groupby - -import numpy as np - -from ..apps.base import ActionDispatcher, OptionParser - -from .base import normalize_axes, plt, savefig - - -def main(): - - actions = (("wheel", "wheel plot that shows continuous data in radial axes"),) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def closed_plot(ax, theta, r, *args, **kwargs): - theta = list(theta) + [theta[0]] - r = list(r) + [r[0]] - ax.plot(theta, r, *args, **kwargs) - - -def sector(ax, theta_min, theta_max, theta_pad, r, R=30, *args, **kwargs): - theta = np.linspace(theta_min - theta_pad, theta_max + theta_pad, num=100) - r = len(theta) * [r] - theta = list(theta) + [0] - r = list(r) + [-R] - closed_plot(ax, theta, r, *args, **kwargs) - - -def parse_data(datafile, score_column="score"): - data = {} - fp = open(datafile) - for row in fp: - atoms = row.split(",") - if len(atoms) == 4: # First column is SampleID - atoms = atoms[1:] - label, score, percentile = atoms - label = label.strip() - label = label.strip('"') - score = float(score.strip()) - percentile = float(percentile.strip()) - if score_column == "score": - data[label] = score - else: - data[label] = percentile - return data - - -def parse_groups(groupsfile): - groups = OrderedDict() - fp = open(groupsfile) - for row in fp: - group, label = row.split(",") - group = group.strip() - label = label.strip() - groups[label] = group - return groups - - -def wheel(args): - """ - %prog wheel datafile.csv groups.csv - - Wheel plot that shows continous data in radial axes. - """ - p = OptionParser(wheel.__doc__) - p.add_argument( - "--column", - default="score", - choices=("score", "percentile"), - help="Which column to extract from `datafile.csv`", - ) - opts, args, iopts = p.set_image_options(args, figsize="5x5", format="png") - - if len(args) != 2: - sys.exit(not p.print_help()) - - datafile, groupsfile = args - column = opts.column - linecolor = "#d6d6d6" - df = parse_data(datafile, score_column=opts.column) - groups = parse_groups(groupsfile) - labels = [g for g in groups if g in df] - print(labels) - df = [df[g] for g in labels] - print(df) - groups = [groups[g] for g in labels] - print(groups) - - pf = datafile.rsplit(".", 1)[0] - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - categories = len(df) - # ax = plt.subplot(111, projection='polar') - ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True) - - brewer = [ - "#FF3B30", - "#DD43A0", - "#5856D6", - "#007AFE", - "#56BDEC", - "#4CD8BA", - "#4CD864", - "#B0F457", - "#FEF221", - "#FFCC01", - "#FF9500", - "#FF3B30", - ] - - # Baseline - theta = np.linspace(1.5 * np.pi, 3.5 * np.pi, endpoint=False, num=categories) - _theta = np.linspace(1.5 * np.pi, 3.5 * np.pi) - R = max(max(df), 10) - xlim = (-R, R) if column == "score" else (-100, 100) - plim = (-R / 2, R) if column == "score" else (0, 100) - ci = (-0.5, 2) if column == "score" else (10, 90) - - # Grid - if column == "score": - for t in theta: - ax.plot([t, t], plim, color=linecolor) - ax.axis("off") - - # Contours - for t in plim: - ax.plot(_theta, [t] * len(_theta), color=linecolor) - - # Sectors (groupings) - collapsed_groups = [] - gg = [] - for group, c in groupby(enumerate(groups), lambda x: x[1]): - c = [x[0] for x in list(c)] - collapsed_groups.append(group) - gg.append(c) - - show_sector = False - if show_sector: - theta_interval = 2 * np.pi / categories - theta_pad = theta_interval / 2 * 0.9 - for color, group in zip(brewer, gg): - tmin, tmax = min(group), max(group) - sector( - ax, - theta[tmin], - theta[tmax], - theta_pad, - R * 0.95, - ls="-", - color=color, - lw=2, - ) - - # Data - r = df - closed_plot(ax, theta, r, color="lightslategray", alpha=0.25) - for color, group in zip(brewer, gg): - hidden_data = [(theta[x], r[x]) for x in group if (ci[0] <= r[x] <= ci[1])] - shown_data = [(theta[x], r[x]) for x in group if (r[x] < ci[0] or r[x] > ci[1])] - for alpha, data in zip((1, 1), (hidden_data, shown_data)): - if not data: - continue - color_theta, color_r = zip(*data) - ax.plot(color_theta, color_r, "o", color=color, alpha=alpha) - - # Print out data - diseaseNames, risks = labels, df - print( - "var theta = [{}]".format(",".join("{:.1f}".format(degrees(x)) for x in theta)) - ) - print("var risks = [{}]".format(",".join(str(x) for x in risks))) - print( - "var diseaseNames = [{}]".format( - ",".join(['"{}"'.format(x) for x in diseaseNames]) - ) - ) - - # Labels - from math import cos, sin - - r = 0.5 - for i, label in enumerate(labels): - tl = theta[i] - x, y = 0.5 + r * cos(tl), 0.5 + r * sin(tl) - d = degrees(tl) - if 90 < d % 360 < 270: # On the left quardrants - d -= 180 - root.text( - x, y, label, size=4, rotation=d, ha="center", va="center", color=linecolor - ) - print(x, y, label) - - # Add baseline - baseline = 0 if column == "score" else 50 - _r = len(_theta) * [baseline] - closed_plot(ax, _theta, _r, "k:", lw=1, ms=4) - - # Add confidence interval - if column == "percentile": - barcolor = "#eeeeee" - ax.bar([0], [ci[1] - ci[0]], width=2 * np.pi, bottom=ci[0], fc=barcolor) - - ax.set_rmin(xlim[0]) - ax.set_rmax(xlim[1]) - - normalize_axes(root) - - image_name = pf + "-" + column + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/__init__.py b/jcvi/projects/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/projects/__main__.py b/jcvi/projects/__main__.py deleted file mode 100644 index 9e4c5a75..00000000 --- a/jcvi/projects/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Compilation of project specific scripts, used to execute specific analysis routines and generate publication-ready figures -""" - -from ..apps.base import dmain - - -if __name__ == "__main__": - dmain(__file__) diff --git a/jcvi/projects/age.py b/jcvi/projects/age.py deleted file mode 100644 index 92c42b25..00000000 --- a/jcvi/projects/age.py +++ /dev/null @@ -1,738 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Scripts related to age prediction model. -""" -import json -import os -import os.path as op -import sys - -import numpy as np -import pandas as pd -import seaborn as sns - -from jinja2 import Template - -from ..apps.base import ActionDispatcher, OptionParser, iglob, logger -from ..graphics.base import panel_labels, plt, savefig - - -def main(): - - actions = ( - ("compile", "extract telomere length and ccn"), - ("traits", "make HTML page that reports eye and skin color"), - # Age paper plots - ("qc", "plot distributions of basic statistics of a sample"), - ("correlation", "plot correlation of age vs. postgenomic features"), - ("heritability", "plot composite on heritability estimates"), - ("regression", "plot chronological vs. predicted age"), - ("ccn", "plot several ccn plots including chr1,chrX,chrY,chrM"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -traits_template = """ - - - ART traits - - - - - - - - - - - - - - - {% for sample in samples %} - - - - - - - - {% endfor %} - -
Sample IDSkinEyes
{{ sample.sample_id }} -
-
- - -
-
- - -
- - -""" - - -def lab2rgb(L, A, B): - # Borrowed from: - # - y = (L + 16) / 116 - x = A / 500 + y - z = y - B / 200 - - x = 0.95047 * (x * x * x if (x * x * x > 0.008856) else (x - 16 / 116) / 7.787) - y = 1.00000 * (y * y * y if (y * y * y > 0.008856) else (y - 16 / 116) / 7.787) - z = 1.08883 * (z * z * z if (z * z * z > 0.008856) else (z - 16 / 116) / 7.787) - - r = x * 3.2406 + y * -1.5372 + z * -0.4986 - g = x * -0.9689 + y * 1.8758 + z * 0.0415 - b = x * 0.0557 + y * -0.2040 + z * 1.0570 - - r = (1.055 * r ** (1 / 2.4) - 0.055) if (r > 0.0031308) else 12.92 * r - g = (1.055 * g ** (1 / 2.4) - 0.055) if (g > 0.0031308) else 12.92 * g - b = (1.055 * b ** (1 / 2.4) - 0.055) if (b > 0.0031308) else 12.92 * b - - return max(0, min(1, r)) * 255, max(0, min(1, g)) * 255, max(0, min(1, b)) * 255 - - -def make_rgb(L, A, B): - r, g, b = lab2rgb(L, A, B) - r = int(round(r)) - g = int(round(g)) - b = int(round(b)) - return "rgb({}, {}, {})".format(r, g, b) - - -def traits(args): - """ - %prog traits directory - - Make HTML page that reports eye and skin color. - """ - p = OptionParser(traits.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - samples = [] - for folder in args: - targets = iglob(folder, "*-traits.json") - if not targets: - continue - filename = targets[0] - js = json.load(open(filename)) - js["skin_rgb"] = make_rgb( - js["traits"]["skin-color"]["L"], - js["traits"]["skin-color"]["A"], - js["traits"]["skin-color"]["B"], - ) - js["eye_rgb"] = make_rgb( - js["traits"]["eye-color"]["L"], - js["traits"]["eye-color"]["A"], - js["traits"]["eye-color"]["B"], - ) - samples.append(js) - - template = Template(traits_template) - fw = open("report.html", "w") - print(template.render(samples=samples), file=fw) - logger.debug("Report written to `%s`", fw.name) - fw.close() - - -def plot_fit_line(ax, x, y): - from numpy.polynomial.polynomial import polyfit - - t = np.arange(100) - xy = [(a, b) for (a, b) in zip(x, y) if np.isfinite(a) and np.isfinite(b)] - x, y = zip(*xy) - b, m = polyfit(x, y, 1) - print("y = {} + {} * x".format(b, m)) - ax.plot(t, b + m * t, "-", lw=3, color="k") - - -def composite_ccn(df, size=(12, 8)): - """Plot composite ccn figure""" - fig = plt.figure(1, size) - ax1 = plt.subplot2grid((2, 2), (0, 0)) - ax2 = plt.subplot2grid((2, 2), (0, 1)) - ax3 = plt.subplot2grid((2, 2), (1, 0)) - ax4 = plt.subplot2grid((2, 2), (1, 1)) - mf = df[df["hli_calc_gender"] == "Male"] - - age_label = "Chronological age (yr)" - ax1.scatter( - mf["hli_calc_age_sample_taken"], - mf["ccn.chrX"], - s=10, - marker=".", - color="lightslategray", - ) - ax1.set_ylim(0.8, 1.1) - plot_fit_line(ax1, mf["hli_calc_age_sample_taken"], mf["ccn.chrX"]) - ax1.set_ylabel("ChrX copy number") - ax1.set_title("ChrX copy number in Male") - - ax2.scatter( - mf["hli_calc_age_sample_taken"], - mf["ccn.chrY"], - s=10, - marker=".", - color="lightslategray", - ) - plot_fit_line(ax2, mf["hli_calc_age_sample_taken"], mf["ccn.chrY"]) - ax2.set_ylim(0.8, 1.1) - ax2.set_ylabel("ChrY copy number") - ax2.set_title("ChrY copy number in Male") - - ax3.scatter( - df["hli_calc_age_sample_taken"], - df["ccn.chr1"], - s=10, - marker=".", - color="lightslategray", - ) - plot_fit_line(ax3, df["hli_calc_age_sample_taken"], df["ccn.chr1"]) - ax3.set_ylim(1.8, 2.1) - ax3.set_ylabel("Chr1 copy number") - ax3.set_title("Chr1 copy number") - - ax4.scatter( - df["hli_calc_age_sample_taken"], - df["ccn.chrM"], - s=10, - marker=".", - color="lightslategray", - ) - plot_fit_line(ax4, df["hli_calc_age_sample_taken"], df["ccn.chrM"]) - ax4.set_ylim(0, 400) - ax4.set_ylabel("Mitochondria copy number") - ax4.set_title("Mitochondria copy number") - - from matplotlib.lines import Line2D - - for ax in (ax1, ax2, ax3, ax4): - ax.set_xlabel(age_label) - - plt.tight_layout() - root = fig.add_axes((0, 0, 1, 1)) - labels = ((0.02, 0.98, "A"), (0.52, 0.98, "B"), (0.02, 0.5, "C"), (0.52, 0.5, "D")) - panel_labels(root, labels) - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - -def ccn(args): - """ - %prog ccn combined.tsv - - Plot several ccn plots including chr1,chrX,chrY,chrM - """ - p = OptionParser(ccn.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="12x8") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (tsvfile,) = args - df = pd.read_csv(tsvfile, sep="\t") - composite_ccn(df, size=(iopts.w, iopts.h)) - outfile = tsvfile.rsplit(".", 1)[0] + ".ccn.pdf" - savefig(outfile) - - -def regression(args): - """ - %prog regression postgenomic-s.tsv - - Plot chronological vs. predicted age. - """ - p = OptionParser(regression.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="8x8") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (tsvfile,) = args - df = pd.read_csv(tsvfile, sep="\t") - chrono = "Chronological age (yr)" - pred = "Predicted age (yr)" - resdf = pd.DataFrame( - {chrono: df["hli_calc_age_sample_taken"], pred: df["Predicted Age"]} - ) - g = sns.jointplot( - chrono, pred, resdf, joint_kws={"s": 6}, xlim=(0, 100), ylim=(0, 80) - ) - g.fig.set_figwidth(iopts.w) - g.fig.set_figheight(iopts.h) - outfile = tsvfile.rsplit(".", 1)[0] + ".regression.pdf" - savefig(outfile) - - -def composite_correlation(df, size=(12, 8)): - """Plot composite correlation figure""" - fig = plt.figure(1, size) - ax1 = plt.subplot2grid((2, 2), (0, 0)) - ax2 = plt.subplot2grid((2, 2), (0, 1)) - ax3 = plt.subplot2grid((2, 2), (1, 0)) - ax4 = plt.subplot2grid((2, 2), (1, 1)) - chemistry = ["V1", "V2", "V2.5", float("nan")] - colors = sns.color_palette("Set2", 8) - color_map = dict(zip(chemistry, colors)) - - age_label = "Chronological age (yr)" - ax1.scatter( - df["hli_calc_age_sample_taken"], - df["teloLength"], - s=10, - marker=".", - color=df["Chemistry"].map(color_map), - ) - ax1.set_ylim(0, 15) - ax1.set_ylabel("Telomere length (Kb)") - - ax2.scatter( - df["hli_calc_age_sample_taken"], - df["ccn.chrX"], - s=10, - marker=".", - color=df["Chemistry"].map(color_map), - ) - ax2.set_ylim(1.8, 2.1) - ax2.set_ylabel("ChrX copy number") - - ax4.scatter( - df["hli_calc_age_sample_taken"], - df["ccn.chrY"], - s=10, - marker=".", - color=df["Chemistry"].map(color_map), - ) - ax4.set_ylim(0.8, 1.1) - ax4.set_ylabel("ChrY copy number") - - ax3.scatter( - df["hli_calc_age_sample_taken"], - df["TRA.PPM"], - s=10, - marker=".", - color=df["Chemistry"].map(color_map), - ) - ax3.set_ylim(0, 250) - ax3.set_ylabel("$TCR-\\alpha$ deletions (count per million reads)") - - from matplotlib.lines import Line2D - - legend_elements = [ - Line2D( - [0], - [0], - marker=".", - color="w", - label=chem, - markerfacecolor=color, - markersize=16, - ) - for (chem, color) in zip(chemistry, colors)[:3] - ] - for ax in (ax1, ax2, ax3, ax4): - ax.set_xlabel(age_label) - ax.legend(handles=legend_elements, loc="upper right") - - plt.tight_layout() - root = fig.add_axes((0, 0, 1, 1)) - labels = ((0.02, 0.98, "A"), (0.52, 0.98, "B"), (0.02, 0.5, "C"), (0.52, 0.5, "D")) - panel_labels(root, labels) - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - -def correlation(args): - """ - %prog correlation postgenomic-s.tsv - - Plot correlation of age vs. postgenomic features. - """ - p = OptionParser(correlation.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="12x8") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (tsvfile,) = args - df = pd.read_csv(tsvfile, sep="\t") - composite_correlation(df, size=(iopts.w, iopts.h)) - outfile = tsvfile.rsplit(".", 1)[0] + ".correlation.pdf" - savefig(outfile) - - -def composite_qc(df_orig, size=(16, 12)): - """Plot composite QC figures""" - df = df_orig.rename( - columns={ - "hli_calc_age_sample_taken": "Age", - "hli_calc_gender": "Gender", - "eth7_max": "Ethnicity", - "MeanCoverage": "Mean coverage", - "Chemistry": "Sequencing chemistry", - "Release Client": "Cohort", - } - ) - - fig = plt.figure(1, size) - ax1 = plt.subplot2grid((2, 7), (0, 0), rowspan=1, colspan=2) - ax2 = plt.subplot2grid((2, 7), (0, 2), rowspan=1, colspan=2) - ax3 = plt.subplot2grid((2, 7), (0, 4), rowspan=1, colspan=3) - ax4 = plt.subplot2grid((2, 7), (1, 0), rowspan=1, colspan=2) - ax5 = plt.subplot2grid((2, 7), (1, 2), rowspan=1, colspan=2) - ax6 = plt.subplot2grid((2, 7), (1, 4), rowspan=1, colspan=3) - - sns.distplot(df["Age"].dropna(), kde=False, ax=ax1) - sns.countplot(x="Gender", data=df, ax=ax2) - sns.countplot( - x="Ethnicity", data=df, ax=ax3, order=df["Ethnicity"].value_counts().index - ) - sns.distplot(df["Mean coverage"].dropna(), kde=False, ax=ax4) - ax4.set_xlim(0, 100) - sns.countplot(x="Sequencing chemistry", data=df, ax=ax5) - sns.countplot(x="Cohort", data=df, ax=ax6, order=df["Cohort"].value_counts().index) - # Anonymize the cohorts - cohorts = ax6.get_xticklabels() - newCohorts = [] - for i, c in enumerate(cohorts): - if c.get_text() == "Spector": - c = "TwinsUK" - elif c.get_text() != "Health Nucleus": - c = "C{}".format(i + 1) - newCohorts.append(c) - ax6.set_xticklabels(newCohorts) - - for ax in (ax6,): - ax.set_xticklabels(ax.get_xticklabels(), ha="right", rotation=30) - - for ax in (ax1, ax2, ax3, ax4, ax5, ax6): - ax.set_title(ax.get_xlabel()) - ax.set_xlabel("") - - plt.tight_layout() - - root = fig.add_axes((0, 0, 1, 1)) - labels = ( - (0.02, 0.96, "A"), - (0.3, 0.96, "B"), - (0.6, 0.96, "C"), - (0.02, 0.52, "D"), - (0.3, 0.52, "E"), - (0.6, 0.52, "F"), - ) - panel_labels(root, labels) - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - -def qc(args): - """ - %prog qc postgenomic-s.tsv - - Plot basic statistics of a given sample: - Age, Gender, Ethnicity, Cohort, Chemistry - """ - p = OptionParser(heritability.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="10x6") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (tsvfile,) = args - df = pd.read_csv(tsvfile, sep="\t") - composite_qc(df, size=(iopts.w, iopts.h)) - outfile = tsvfile.rsplit(".", 1)[0] + ".qc.pdf" - savefig(outfile) - - -def extract_trait(df, id_field, trait_field): - traits = {} - # Get the gender information for filtering DZ twins - for i, row in df.iterrows(): - sample_id = str(row[id_field]) - traits[sample_id] = row[trait_field] - return traits - - -def filter_same_gender(pairs, gender): - notPresent = 0 - diffGender = 0 - # Need to screen same gender dizygotic twins - for a, b in pairs: - if not (a in gender and b in gender): - notPresent += 1 - continue - if gender[a] != gender[b]: - diffGender += 1 - continue - yield a, b, gender[a] - print(notPresent, "not found") - print(diffGender, "different gender") - - -def extract_twin_values(triples, traits, gender=None): - """Calculate the heritability of certain traits in triplets. - - Parameters - ========== - triples: (a, b, "Female/Male") triples. The sample IDs are then used to query - the traits dictionary. - traits: sample_id => value dictionary - gender: - - Returns - ======= - tuples of size 2, that contain paired trait values of the twins - """ - # Construct the pairs of trait values - traitValuesAbsent = 0 - nanValues = 0 - genderSkipped = 0 - twinValues = [] - for a, b, t in triples: - if gender is not None and t != gender: - genderSkipped += 1 - continue - if not (a in traits and b in traits): - traitValuesAbsent += 1 - continue - if np.isnan(traits[a]) or np.isnan(traits[b]): - nanValues += 1 - continue - twinValues.append((traits[a], traits[b])) - - print( - "A total of {} pairs extracted ({} absent; {} nan; {} genderSkipped)".format( - len(twinValues), traitValuesAbsent, nanValues, genderSkipped - ) - ) - return twinValues - - -def plot_paired_values( - ax, - mzValues, - dzValues, - label=None, - gender=None, - palette=sns.color_palette("PRGn", 10), -): - from scipy.stats import pearsonr - - mzx, mzy = zip(*mzValues) - dzx, dzy = zip(*dzValues) - (mzline,) = ax.plot(mzx, mzy, ".", color=palette[0], alpha=0.75) - (dzline,) = ax.plot(dzx, dzy, ".", color=palette[-1], alpha=0.75) - ax.set_xlabel(label + r" in twin \#1") - ax.set_ylabel(label + r" in twin \#2") - ax.legend( - (mzline, dzline), - ( - "Monozygotic twins ($N$={}{})".format( - len(mzValues), ((" " + gender) if gender else "") - ), - "Dizygotic twins ($N$={}{})".format( - len(dzValues), (" " + gender) if gender else "" - ), - ), - loc="upper left", - ) - rho_mz, p_mz = pearsonr(mzx, mzy) - rho_dz, p_dz = pearsonr(dzx, dzy) - heritability = 2 * (rho_mz - rho_dz) - ax.set_title( - "{} ($\\rho_{{MZ}}$={:.2f}, $\\rho_{{DZ}}$={:.2f}, $heritability$={:.2f})".format( - label, rho_mz, rho_dz, heritability - ) - ) - - -def plot_abs_diff(ax, mzValues, dzValues, label=None, palette="PRGn"): - # Let's visualize the feature differences using boxplot - mzDelta = [abs(x - y) for (x, y) in mzValues] - dzDelta = [abs(x - y) for (x, y) in dzValues] - x = ["MZ twins"] * len(mzDelta) + ["DZ twins"] * len(dzDelta) - y = mzDelta + dzDelta - sns.boxplot(x, y, palette=palette, ax=ax) - ax.set_ylabel("Absolute difference in {}".format(label)) - - -def filter_low_values(data, cutoff): - newData = [(a, b) for a, b in data if a > cutoff and b > cutoff] - print("Removed {} outliers (<= {})".format(len(data) - len(newData), cutoff)) - return newData - - -def composite(df, sameGenderMZ, sameGenderDZ, size=(16, 24)): - """Embed both absdiff figures and heritability figures.""" - fig = plt.figure(1, size) - - ax1a = plt.subplot2grid((6, 4), (0, 0), rowspan=2, colspan=1) - ax2a = plt.subplot2grid((6, 4), (0, 1), rowspan=2, colspan=1) - ax3a = plt.subplot2grid((6, 4), (0, 2), rowspan=2, colspan=1) - ax4a = plt.subplot2grid((6, 4), (0, 3), rowspan=2, colspan=1) - ax1b = plt.subplot2grid((6, 4), (2, 0), rowspan=2, colspan=2) - ax2b = plt.subplot2grid((6, 4), (2, 2), rowspan=2, colspan=2) - ax3b = plt.subplot2grid((6, 4), (4, 0), rowspan=2, colspan=2) - ax4b = plt.subplot2grid((6, 4), (4, 2), rowspan=2, colspan=2) - - # Telomeres - telomeres = extract_trait(df, "Sample name", "telomeres.Length") - mzTelomeres = extract_twin_values(sameGenderMZ, telomeres) - dzTelomeres = extract_twin_values(sameGenderDZ, telomeres) - plot_paired_values(ax1b, mzTelomeres, dzTelomeres, label="Telomere length") - plot_abs_diff(ax1a, mzTelomeres, dzTelomeres, label="Telomere length") - - # CCNX - CCNX = extract_trait(df, "Sample name", "ccn.chrX") - mzCCNX = extract_twin_values(sameGenderMZ, CCNX, gender="Female") - dzCCNX = extract_twin_values(sameGenderDZ, CCNX, gender="Female") - dzCCNX = filter_low_values(dzCCNX, 1.75) - plot_paired_values( - ax2b, mzCCNX, dzCCNX, gender="Female only", label="ChrX copy number" - ) - plot_abs_diff(ax2a, mzCCNX, dzCCNX, label="ChrX copy number") - - # CCNY - CCNY = extract_trait(df, "Sample name", "ccn.chrY") - mzCCNY = extract_twin_values(sameGenderMZ, CCNY, gender="Male") - dzCCNY = extract_twin_values(sameGenderDZ, CCNY, gender="Male") - dzCCNY = filter_low_values(dzCCNY, 0.75) - - plot_paired_values( - ax3b, mzCCNY, dzCCNY, gender="Male only", label="ChrY copy number" - ) - plot_abs_diff(ax3a, mzCCNY, dzCCNY, label="ChrY copy number") - - # CCNY - TRA = extract_trait(df, "Sample name", "TRA.PPM") - mzTRA = extract_twin_values(sameGenderMZ, TRA) - dzTRA = extract_twin_values(sameGenderDZ, TRA) - plot_paired_values(ax4b, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") - plot_abs_diff(ax4a, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") - - plt.tight_layout() - - root = fig.add_axes((0, 0, 1, 1)) - # ABCD absdiff, EFGH heritability - labels = ( - (0.03, 0.99, "A"), - (0.27, 0.99, "B"), - (0.53, 0.99, "C"), - (0.77, 0.99, "D"), - (0.03, 0.67, "E"), - (0.53, 0.67, "F"), - (0.03, 0.34, "G"), - (0.53, 0.34, "H"), - ) - panel_labels(root, labels) - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - -def heritability(args): - """ - %prog pg.tsv MZ-twins.csv DZ-twins.csv - - Plot composite figures ABCD on absolute difference of 4 traits, - EFGH on heritability of 4 traits. The 4 traits are: - telomere length, ccn.chrX, ccn.chrY, TRA.PPM - """ - p = OptionParser(heritability.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="12x18") - - if len(args) != 3: - sys.exit(not p.print_help()) - - combined, mz, dz = args - - # Prepare twins data - def get_pairs(filename): - with open(filename) as fp: - for row in fp: - yield row.strip().split(",") - - MZ = list(get_pairs(mz)) - DZ = list(get_pairs(dz)) - - print(len(MZ), "monozygotic twins") - print(len(DZ), "dizygotic twins") - - df = pd.read_csv(combined, sep="\t", index_col=0) - df["Sample name"] = np.array(df["Sample name"], dtype=np.str) - gender = extract_trait(df, "Sample name", "hli_calc_gender") - sameGenderMZ = list(filter_same_gender(MZ, gender)) - sameGenderDZ = list(filter_same_gender(DZ, gender)) - - composite(df, sameGenderMZ, sameGenderDZ, size=(iopts.w, iopts.h)) - savefig("heritability.pdf") - - -def compile(args): - """ - %prog compile directory - - Extract telomere length and ccn. - """ - p = OptionParser(compile.__doc__) - p.set_outfile(outfile="age.tsv") - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - dfs = [] - for folder in args: - ofolder = os.listdir(folder) - - # telomeres - subdir = [x for x in ofolder if x.startswith("telomeres")][0] - subdir = op.join(folder, subdir) - filename = op.join(subdir, "tel_lengths.txt") - df = pd.read_csv(filename, sep="\t") - d1 = df.ix[0].to_dict() - - # ccn - subdir = [x for x in ofolder if x.startswith("ccn")][0] - subdir = op.join(folder, subdir) - filename = iglob(subdir, "*.ccn.json")[0] - js = json.load(open(filename)) - d1.update(js) - df = pd.DataFrame(d1, index=[0]) - dfs.append(df) - - df = pd.concat(dfs, ignore_index=True) - df.to_csv(opts.outfile, sep="\t", index=False) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/allmaps.py b/jcvi/projects/allmaps.py deleted file mode 100644 index edf714c3..00000000 --- a/jcvi/projects/allmaps.py +++ /dev/null @@ -1,532 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Scripts for the ALLMAPS manuscript -""" -import sys -import numpy as np - -from ..apps.base import OptionParser, ActionDispatcher -from ..assembly.allmaps import AGP, GapEstimator, Map, normalize_lms_axis, spearmanr -from ..formats.bed import Bed -from ..graphics.base import ( - latex, - normalize_axes, - panel_labels, - plt, - savefig, - set_ticklabels_helvetica, - set2, -) -from ..graphics.chromosome import HorizontalChromosome -from ..utils.cbook import percentage - - -def main(): - - actions = ( - ("lms", "ALLMAPS cartoon to illustrate LMS metric"), - ("estimategaps", "illustrate ALLMAPS gap estimation algorithm"), - ("simulation", "plot ALLMAPS accuracy across a range of simulated data"), - ("comparebed", "compare the scaffold links indicated in two bed files"), - ("resamplestats", "prepare resample results table"), - ("resample", "plot ALLMAPS performance across resampled real data"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def resample(args): - """ - %prog resample yellow-catfish-resample.txt medicago-resample.txt - - Plot ALLMAPS performance across resampled real data. - """ - p = OptionParser(resample.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300) - - if len(args) != 2: - sys.exit(not p.print_help()) - - dataA, dataB = args - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - A = fig.add_axes([0.1, 0.18, 0.32, 0.64]) - B = fig.add_axes([0.6, 0.18, 0.32, 0.64]) - dataA = import_data(dataA) - dataB = import_data(dataB) - xlabel = "Fraction of markers" - ylabels = ("Anchor rate", "Runtime (m)") - legend = ("anchor rate", "runtime") - subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend) - subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend) - - labels = ((0.04, 0.92, "A"), (0.54, 0.92, "B")) - panel_labels(root, labels) - - normalize_axes(root) - image_name = "resample." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def resamplestats(args): - """ - %prog resamplestats prefix run.log - - Prepare resample results table. Ten subsets of original data were generated - and ALLMAPS were iterated through them, creating `run.log` which contains the - timing results. The anchor rate can be found in `prefix.0.{1-10}.summary.txt`. - """ - p = OptionParser(resamplestats.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - pf, runlog = args - fp = open(runlog) - Real = "real" - times = [] - for row in fp: - # real 10m31.513s - if not row.startswith(Real): - continue - tag, time = row.split() - assert tag == Real - m, s = time.split("m") - s = s.rstrip("s") - m, s = float(m), float(s) - time = m + s / 60 - times.append(time) - - N = len(times) - - rates = [] - for i in range(-N + 1, 1, 1): - summaryfile = "{0}.{1}.summary.txt".format(pf, 2**i) - fp = open(summaryfile) - lines = fp.readlines() - # Total bases 580,791,244 (80.8%) 138,298,666 (19.2%) - pct = float(lines[-2].split()[3].strip("()%")) - rates.append(pct / 100.0) - - assert len(rates) == N - - print("ratio\tanchor-rate\ttime(m)") - for j, i in enumerate(range(-N + 1, 1, 1)): - print("{0}\t{1:.3f}\t{2:.3f}".format(i, rates[j], times[j])) - - -def query_links(abed, bbed): - abedlinks = abed.links - bbedlinks = bbed.links - # Reverse complement bbedlinks - bxbedlinks = bbedlinks[:] - for (a, ai), (b, bi) in bbedlinks: - ai = {"+": "-", "?": "-", "-": "+"}[ai] - bi = {"+": "-", "?": "-", "-": "+"}[bi] - bxbedlinks.append(((b, bi), (a, ai))) - - atotal = len(abedlinks) - print("Total links in {0}: {1}".format(abed.filename, atotal), file=sys.stderr) - recovered = set(abedlinks) & set(bxbedlinks) - print("Recovered {0}".format(percentage(len(recovered), atotal)), file=sys.stderr) - print(set(abedlinks) - set(bxbedlinks), file=sys.stderr) - - -def comparebed(args): - """ - %prog comparebed AP.chr.bed infer.bed - - Compare the scaffold links indicated in two bed files. - """ - p = OptionParser(comparebed.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - abed, bbed = args - abed = Bed(abed) - bbed = Bed(bbed) - query_links(abed, bbed) - query_links(bbed, abed) - - -def estimategaps(args): - """ - %prog estimategaps JM-4 chr1 JMMale-1 - - Illustrate ALLMAPS gap estimation algorithm. - """ - p = OptionParser(estimategaps.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) - - if len(args) != 3: - sys.exit(not p.print_help()) - - pf, seqid, mlg = args - bedfile = pf + ".lifted.bed" - agpfile = pf + ".agp" - - function = lambda x: x.cm - cc = Map(bedfile, scaffold_info=True, function=function) - agp = AGP(agpfile) - - g = GapEstimator(cc, agp, seqid, mlg, function=function) - pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize - spl, spld = g.spl, g.spld - g.compute_all_gaps(verbose=False) - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - # Panel A - xstart, ystart = 0.15, 0.65 - w, h = 0.7, 0.3 - t = np.linspace(0, chrsize, 1000) - ax = fig.add_axes([xstart, ystart, w, h]) - mx, my = zip(*g.scatter_data) - rho = spearmanr(mx, my) - - dsg = "g" - ax.vlines(pp, 0, mlgsize, colors="beige") - ax.plot(mx, my, ".", color=set2[3]) - ax.plot(t, spl(t), "-", color=dsg) - ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes) - normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") - if rho < 0: - ax.invert_yaxis() - - # Panel B - ystart -= 0.28 - h = 0.25 - ax = fig.add_axes([xstart, ystart, w, h]) - ax.vlines(pp, 0, mlgsize, colors="beige") - ax.plot(t, spld(t), "-", lw=2, color=dsg) - ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) - normalize_lms_axis( - ax, - xlim=chrsize, - ylim=25 * 1e-6, - xfactor=1e-6, - xlabel="Physical position (Mb)", - yfactor=1000000, - ylabel="Recomb. rate\n(cM / Mb)", - ) - ax.xaxis.grid(False) - - # Panel C (specific to JMMale-1) - a, b = "scaffold_1076", "scaffold_861" - sizes = dict( - (x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation)) - for x in g.agp - if not x.is_gap - ) - a_beg, a_end, asize, ao = sizes[a] - b_beg, b_end, bsize, bo = sizes[b] - gapsize = g.get_gapsize(a) - total_size = asize + gapsize + bsize - ratio = 0.6 / total_size - y = 0.16 - pad = 0.03 - pb_ratio = w / chrsize - - # Zoom - lsg = "lightslategray" - root.plot((0.15 + pb_ratio * a_beg, 0.2), (ystart, ystart - 0.14), ":", color=lsg) - root.plot((0.15 + pb_ratio * b_end, 0.3), (ystart, ystart - 0.08), ":", color=lsg) - ends = [] - for tag, size, marker, beg in zip( - (a, b), (asize, bsize), (49213, 81277), (0.2, 0.2 + (asize + gapsize) * ratio) - ): - end = beg + size * ratio - marker = beg + marker * ratio - ends.append((beg, end, marker)) - root.plot((marker,), (y,), "o", color=lsg) - root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center") - HorizontalChromosome(root, beg, end, y, height=0.025, fc="gainsboro") - - begs, ends, markers = zip(*ends) - fontprop = dict(color=lsg, ha="center", va="center") - ypos = y + pad * 2 - root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg) - root.text( - sum(markers) / 2, - ypos + pad, - "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", - **fontprop - ) - - ypos = y - pad - xx = markers[0], ends[0] - root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) - root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop) - xx = markers[1], begs[1] - root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) - root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop) - - root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg) - root.text( - sum(markers) / 2, - ypos - 3 * pad, - r"$\textit{Estimated gap size: 96,433bp}$", - color="r", - ha="center", - va="center", - ) - - labels = ((0.05, 0.95, "A"), (0.05, 0.6, "B"), (0.05, 0.27, "C")) - panel_labels(root, labels) - normalize_axes(root) - - pf = "estimategaps" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def lms(args): - """ - %prog lms - - ALLMAPS cartoon to illustrate LMS metric. - """ - from random import randint - from jcvi.graphics.chromosome import HorizontalChromosome - - p = OptionParser(lms.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - # Panel A - w, h = 0.7, 0.35 - ax = fig.add_axes([0.15, 0.6, w, h]) - - xdata = [x + randint(-3, 3) for x in range(10, 110, 10)] - ydata = [x + randint(-3, 3) for x in range(10, 110, 10)] - ydata[3:7] = ydata[3:7][::-1] - xydata = zip(xdata, ydata) - lis = xydata[:3] + [xydata[4]] + xydata[7:] - lds = xydata[3:7] - xlis, ylis = zip(*lis) - xlds, ylds = zip(*lds) - ax.plot( - xlis, - ylis, - "r-", - lw=12, - alpha=0.3, - solid_capstyle="round", - solid_joinstyle="round", - ) - ax.plot( - xlds, - ylds, - "g-", - lw=12, - alpha=0.3, - solid_capstyle="round", - solid_joinstyle="round", - ) - ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) - HorizontalChromosome(root, 0.15, 0.15 + w, 0.57, height=0.02, lw=2) - root.text(0.15 + w / 2, 0.55, "Chromosome location (bp)", ha="center", va="top") - - ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center") - ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center") - ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center") - normalize_lms_axis(ax, xlim=110, ylim=110) - - # Panel B - w = 0.37 - p = (0, 45, 75, 110) - ax = fig.add_axes([0.1, 0.12, w, h]) - xdata = [x for x in range(10, 110, 10)] - ydata = ydata_orig = [x for x in range(10, 110, 10)] - ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1] - xydata = zip(xdata, ydata) - lis = xydata[:7] - xlis, ylis = zip(*lis) - ax.plot( - xlis, - ylis, - "r-", - lw=12, - alpha=0.3, - solid_capstyle="round", - solid_joinstyle="round", - ) - ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) - ax.vlines(p, 0, 110, colors="beige", lw=3) - normalize_lms_axis(ax, xlim=110, ylim=110) - patch = [0.1 + w * x / 110.0 for x in p] - HorizontalChromosome(root, 0.1, 0.1 + w, 0.09, patch=patch, height=0.02, lw=2) - scaffolds = ("a", "b", "c") - for i, s in enumerate(scaffolds): - xx = (patch[i] + patch[i + 1]) / 2 - root.text(xx, 0.09, s, va="center", ha="center") - root.text(0.1 + w / 2, 0.04, "LMS($a||b||c$) = 7", ha="center") - - # Panel C - ax = fig.add_axes([0.6, 0.12, w, h]) - patch = [0.6 + w * x / 110.0 for x in p] - ydata = ydata_orig - ax.plot( - xdata, - ydata, - "r-", - lw=12, - alpha=0.3, - solid_capstyle="round", - solid_joinstyle="round", - ) - ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) - ax.vlines(p, [0], [110], colors="beige", lw=3) - normalize_lms_axis(ax, xlim=110, ylim=110) - HorizontalChromosome(root, 0.6, 0.6 + w, 0.09, patch=patch, height=0.02, lw=2) - scaffolds = ("a", "-c", "b") - for i, s in enumerate(scaffolds): - xx = (patch[i] + patch[i + 1]) / 2 - root.text(xx, 0.09, s, va="center", ha="center") - root.text(0.6 + w / 2, 0.04, "LMS($a||-c||b$) = 10", ha="center") - - labels = ((0.05, 0.95, "A"), (0.05, 0.48, "B"), (0.55, 0.48, "C")) - panel_labels(root, labels) - - normalize_axes(root) - - pf = "lms" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def import_data(datafile): - data = [] - fp = open(datafile) - fp.readline() - for row in fp: - atoms = row.split() - atoms = [float(x) for x in atoms] - data.append(atoms) - return data - - -def subplot_twinx( - ax, - data, - xlabel, - ylabels, - title=None, - legend=None, - loc="upper left", -): - columned_data = zip(*data) - x, yy = columned_data[0], columned_data[1:] - assert len(ylabels) == 2 - assert len(yy) == 2 - lines = [] - ax2 = ax.twinx() - for a, y, m, yl in zip((ax, ax2), yy, "ox", ylabels): - (line,) = a.plot(x, y, "k:", marker=m, mec="k", mfc="w", ms=4) - lines.append(line) - a.set_ylabel(yl) - if legend: - assert len(legend) == 2 - ax.legend(lines, legend, loc=loc) - ax.set_xlabel(xlabel) - if title: - ax.set_title(title) - - ax.set_ylim(0, 1.1) - xticklabels = [ - r"$\frac{{1}}{" + str(int(2 ** -float(x))) + "}$" for x in ax.get_xticks() - ] - xticklabels[-1] = r"$1$" - yticklabels = [float(x) for x in ax.get_yticks()] - ax.set_xticklabels(xticklabels) - ax.set_yticklabels(yticklabels, family="Helvetica") - - yb = ax2.get_ybound()[1] - yb = yb // 5 * 5 # make integer interval - ax2.set_yticks(np.arange(0, 1.1 * yb, yb / 5)) - ax2.set_ylim(0, 1.1 * yb) - yticklabels = [int(x) for x in ax2.get_yticks()] - ax2.set_xticklabels(xticklabels) - ax2.set_yticklabels(yticklabels, family="Helvetica") - ax2.grid(False) - - -def subplot( - ax, data, xlabel, ylabel, xlim=None, ylim=1.1, xcast=float, ycast=float, legend=None -): - columned_data = zip(*data) - x, yy = columned_data[0], columned_data[1:] - lines = [] - for y, m in zip(yy, "o^x"): - (line,) = ax.plot(x, y, "k:", marker=m, mec="k", mfc="w", ms=4) - lines.append(line) - if legend: - assert len(lines) == len(legend) - ax.legend(lines, legend, loc="best") - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - if xlim: - ax.set_xlim(0, xlim) - if ylim: - ax.set_ylim(0, ylim) - set_ticklabels_helvetica(ax, xcast=xcast, ycast=ycast) - - -def simulation(args): - """ - %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt - - Plot ALLMAPS accuracy across a range of simulated datasets. - """ - p = OptionParser(simulation.__doc__) - opts, args, iopts = p.set_image_options(args, dpi=300) - - if len(args) != 4: - sys.exit(not p.print_help()) - - dataA, dataB, dataC, dataD = args - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - A = fig.add_axes([0.12, 0.62, 0.35, 0.35]) - B = fig.add_axes([0.62, 0.62, 0.35, 0.35]) - C = fig.add_axes([0.12, 0.12, 0.35, 0.35]) - D = fig.add_axes([0.62, 0.12, 0.35, 0.35]) - dataA = import_data(dataA) - dataB = import_data(dataB) - dataC = import_data(dataC) - dataD = import_data(dataD) - subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=0.5) - subplot( - B, - dataB, - "Translocation error rate", - "Accuracy", - xlim=0.5, - legend=("intra-chromosomal", "inter-chromosomal", r"75\% intra + 25\% inter"), - ) - subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int) - subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int) - - labels = ( - (0.03, 0.97, "A"), - (0.53, 0.97, "B"), - (0.03, 0.47, "C"), - (0.53, 0.47, "D"), - ) - panel_labels(root, labels) - - normalize_axes(root) - image_name = "simulation." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/bites.py b/jcvi/projects/bites.py deleted file mode 100644 index 176e9686..00000000 --- a/jcvi/projects/bites.py +++ /dev/null @@ -1,229 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Scripts for the Brapa bites paper - -Tang et al. (2012) Altered Patterns of Fractionation and Exon Deletions in -Brassica rapa Support a Two-Step Model of Paleohexaploidy. Genetics. - -""" -from more_itertools import pairwise - -from ..apps.base import ActionDispatcher, OptionParser, fname -from ..graphics.base import CirclePolygon, Polygon, Rectangle, plt, savefig -from ..graphics.chromosome import Chromosome -from ..graphics.glyph import RoundLabel, TextCircle, arrowprops - - -def main(): - - actions = ( - ("excision", "show intra-chromosomal recombination"), - ("bites", "show the bites calling pipeline"), - ("scenario", "show step-wise genome merger events in brapa"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def excision(args): - """ - %prog excision - - Illustrate the mechanism of illegitimate recombination. - """ - p = OptionParser(__doc__) - opts, args = p.parse_args(args) - - fig = plt.figure(1, (5, 5)) - root = fig.add_axes([0, 0, 1, 1]) - - plt.plot((0.2, 0.8), (0.6, 0.6), "r-", lw=3) - plt.plot((0.4, 0.6), (0.6, 0.6), "b>-", mfc="g", mec="w", ms=12, lw=3) - plt.plot((0.3, 0.7), (0.5, 0.5), "r-", lw=3) - plt.plot((0.5,), (0.5,), "b>-", mfc="g", mec="w", ms=12, lw=3) - - # Circle excision - plt.plot((0.5,), (0.45,), "b>-", mfc="g", mec="w", ms=12, lw=3) - circle = CirclePolygon((0.5, 0.4), 0.05, fill=False, lw=3, ec="b") - root.add_patch(circle) - - arrow_dist = 0.07 - ar_xpos, ar_ypos = 0.5, 0.52 - root.annotate( - " ", (ar_xpos, ar_ypos), (ar_xpos, ar_ypos + arrow_dist), arrowprops=arrowprops - ) - - RoundLabel(root, 0.2, 0.64, "Gene") - RoundLabel(root, 0.3, 0.54, "Excision") - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - figname = fname() + ".pdf" - savefig(figname, dpi=300) - - -def bites(args): - """ - %prog bites - - Illustrate the pipeline for automated bite discovery. - """ - - p = OptionParser(__doc__) - opts, args = p.parse_args() - - fig = plt.figure(1, (6, 6)) - root = fig.add_axes([0, 0, 1, 1]) - - # HSP pairs - hsps = ( - ((50, 150), (60, 180)), - ((190, 250), (160, 235)), - ((300, 360), (270, 330)), - ((430, 470), (450, 490)), - ((570, 620), (493, 543)), - ((540, 555), (370, 385)), # non-collinear hsps - ) - - titlepos = (0.9, 0.65, 0.4) - titles = ("Compare orthologous region", "Find collinear HSPs", "Scan paired gaps") - ytip = 0.01 - mrange = 650.0 - m = lambda x: x / mrange * 0.7 + 0.1 - for i, (ya, title) in enumerate(zip(titlepos, titles)): - yb = ya - 0.1 - plt.plot((0.1, 0.8), (ya, ya), "-", color="gray", lw=2, zorder=1) - plt.plot((0.1, 0.8), (yb, yb), "-", color="gray", lw=2, zorder=1) - RoundLabel(root, 0.5, ya + 4 * ytip, title) - root.text(0.9, ya, "A. thaliana", ha="center", va="center") - root.text(0.9, yb, "B. rapa", ha="center", va="center") - myhsps = hsps - if i >= 1: - myhsps = hsps[:-1] - for (a, b), (c, d) in myhsps: - a, b, c, d = [m(x) for x in (a, b, c, d)] - r1 = Rectangle((a, ya - ytip), b - a, 2 * ytip, fc="r", lw=0, zorder=2) - r2 = Rectangle((c, yb - ytip), d - c, 2 * ytip, fc="r", lw=0, zorder=2) - r3 = Rectangle((a, ya - ytip), b - a, 2 * ytip, fill=False, zorder=3) - r4 = Rectangle((c, yb - ytip), d - c, 2 * ytip, fill=False, zorder=3) - r5 = Polygon( - ((a, ya - ytip), (c, yb + ytip), (d, yb + ytip), (b, ya - ytip)), - fc="r", - alpha=0.2, - ) - rr = (r1, r2, r3, r4, r5) - if i == 2: - rr = rr[:-1] - for r in rr: - root.add_patch(r) - - # Gap pairs - hspa, hspb = zip(*myhsps) - gapa, gapb = [], [] - for (a, b), (c, d) in pairwise(hspa): - gapa.append((b + 1, c - 1)) - for (a, b), (c, d) in pairwise(hspb): - gapb.append((b + 1, c - 1)) - gaps = zip(gapa, gapb) - tpos = titlepos[-1] - - yy = tpos - 0.05 - for i, ((a, b), (c, d)) in enumerate(gaps): - i += 1 - a, b, c, d = [m(x) for x in (a, b, c, d)] - xx = (a + b + c + d) / 4 - TextCircle(root, xx, yy, str(i)) - - # Bites - ystart = 0.24 - ytip = 0.05 - bites = ( - ("Bite(40=>-15)", True), - ("Bite(50=>35)", False), - ("Bite(70=>120)", False), - ("Bite(100=>3)", True), - ) - for i, (bite, selected) in enumerate(bites): - xx = 0.15 if (i % 2 == 0) else 0.55 - yy = ystart - i / 2 * ytip - i += 1 - TextCircle(root, xx, yy, str(i)) - color = "k" if selected else "gray" - root.text(xx + ytip, yy, bite, size=10, color=color, va="center") - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - figname = fname() + ".pdf" - savefig(figname, dpi=300) - - -def scenario(args): - """ - %prog scenario - - Illustration of the two-step genome merger process for B. rapa companion paper. - """ - p = OptionParser(__doc__) - p.parse_args() - - fig = plt.figure(1, (5, 5)) - root = fig.add_axes([0, 0, 1, 1]) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - # Layout format: (x, y, label, (chr lengths)) - anc = (0.5, 0.9, "Ancestor", (1,)) - s1 = (0.2, 0.6, "Genome I", (1,)) - s2 = (0.5, 0.6, "Genome II", (1,)) - s3 = (0.8, 0.6, "Genome III", (1,)) - tetra = (0.35, 0.4, "Tetraploid I / II", (0.5, 0.9)) - hexa = (0.5, 0.1, "Hexaploid I / II / III", (0.36, 0.46, 0.9)) - labels = (anc, s1, s2, s3, tetra, hexa) - connections = ( - (anc, s1), - (anc, s2), - (anc, s3), - (s1, tetra), - (s2, tetra), - (tetra, hexa), - (s3, hexa), - ) - - xinterval = 0.02 - yratio = 0.05 - for xx, yy, label, chrl in labels: - # RoundLabel(root, xx, yy, label) - root.text(xx, yy, label, ha="center", va="center") - offset = len(label) * 0.012 - for i, c in enumerate(chrl): - ya = yy + yratio * c - yb = yy - yratio * c - Chromosome(root, xx - offset + i * xinterval, ya, yb, width=0.01) - - # Comments - comments = ((0.15, 0.33, "II dominant"), (0.25, 0.03, "III dominant")) - - for xx, yy, c in comments: - root.text(xx, yy, c, size=9, ha="center", va="center") - - # Branches - tip = 0.04 - for a, b in connections: - xa, ya, la, chra = a - xb, yb, lb, chrb = b - plt.plot((xa, xb), (ya - tip, yb + 2 * tip), "k-", lw=2, alpha=0.5) - - figname = fname() + ".pdf" - savefig(figname, dpi=300) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/ies.py b/jcvi/projects/ies.py deleted file mode 100644 index 6c9ad7ed..00000000 --- a/jcvi/projects/ies.py +++ /dev/null @@ -1,426 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Locate IES sequences within MIC genome of tetrahymena. -""" -import os.path as op -import sys - -from collections import Counter -from itertools import groupby - -from ..algorithms.formula import outlier_cutoff -from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh -from ..formats.base import must_open -from ..formats.bed import Bed, depth, mergeBed, some, sort -from ..utils.cbook import percentage -from ..utils.range import Range, range_interleave, range_chain - - -class EndPoint(object): - def __init__(self, label): - args = label.split("-") - self.label = label - self.leftright = args[0] - self.position = int(args[1]) - self.reads = int(args[2].strip("r")) - - -def main(): - - actions = ( - ("deletion", "find IES based on mapping MAC reads"), - ("insertion", "find IES excision points based on mapping MIC reads"), - ("insertionpairs", "pair up the candidate insertions"), - ("variation", "associate IES in parents and progeny"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def variation(args): - """ - %prog variation P1.bed P2.bed F1.bed - - Associate IES in parents and progeny. - """ - p = OptionParser(variation.__doc__) - p.add_argument( - "--diversity", - choices=("breakpoint", "variant"), - default="variant", - help="Plot diversity", - ) - opts, args, iopts = p.set_image_options(args, figsize="6x6") - - if len(args) != 3: - sys.exit(not p.print_help()) - - pfs = [op.basename(x).split("-")[0] for x in args] - P1, P2, F1 = pfs - newbedfile = "-".join(pfs) + ".bed" - if need_update(args, newbedfile): - newbed = Bed() - for pf, filename in zip(pfs, args): - bed = Bed(filename) - for b in bed: - b.accn = "-".join((pf, b.accn)) - b.score = None - newbed.append(b) - newbed.print_to_file(newbedfile, sorted=True) - - neworder = Bed(newbedfile).order - mergedbedfile = mergeBed(newbedfile, nms=True) - bed = Bed(mergedbedfile) - valid = 0 - total_counts = Counter() - F1_counts = [] - bp_diff = [] - novelbedfile = "novel.bed" - fw = open(novelbedfile, "w") - for b in bed: - accns = b.accn.split(",") - pfs_accns = [x.split("-")[0] for x in accns] - pfs_counts = Counter(pfs_accns) - if len(pfs_counts) != 3: - print(b, file=fw) - continue - - valid += 1 - total_counts += pfs_counts - F1_counts.append(pfs_counts[F1]) - - # Collect breakpoint positions between P1 and F1 - P1_accns = [x for x in accns if x.split("-")[0] == P1] - F1_accns = [x for x in accns if x.split("-")[0] == F1] - if len(P1_accns) != 1: - continue - - ri, ref = neworder[P1_accns[0]] - P1_accns = [neworder[x][-1] for x in F1_accns] - bp_diff.extend(x.start - ref.start for x in P1_accns) - bp_diff.extend(x.end - ref.end for x in P1_accns) - - print( - "A total of {} sites show consistent deletions across samples.".format( - percentage(valid, len(bed)) - ), - file=sys.stderr, - ) - for pf, count in total_counts.items(): - print( - "{:>9}: {:.2f} deletions/site".format(pf, count * 1.0 / valid), - file=sys.stderr, - ) - - F1_counts = Counter(F1_counts) - - # Plot the IES variant number diversity - from jcvi.graphics.base import plt, savefig, set_ticklabels_helvetica - - plt.figure(1, (iopts.w, iopts.h)) - if opts.diversity == "variant": - left, height = zip(*sorted(F1_counts.items())) - for l, h in zip(left, height): - print("{0:>9} variants: {1}".format(l, h), file=sys.stderr) - plt.text( - l, - h + 5, - str(h), - color="darkslategray", - size=8, - ha="center", - va="bottom", - rotation=90, - ) - - plt.bar(left, height, align="center") - plt.xlabel("Identified number of IES per site") - plt.ylabel("Counts") - plt.title("IES variation in progeny pool") - ax = plt.gca() - set_ticklabels_helvetica(ax) - savefig(F1 + ".counts.pdf") - - # Plot the IES breakpoint position diversity - else: - bp_diff = Counter(bp_diff) - bp_diff_abs = Counter() - for k, v in bp_diff.items(): - bp_diff_abs[abs(k)] += v - plt.figure(1, (iopts.w, iopts.h)) - left, height = zip(*sorted(bp_diff_abs.items())) - for l, h in list(zip(left, height))[:21]: - plt.text( - l, - h + 50, - str(h), - color="darkslategray", - size=8, - ha="center", - va="bottom", - rotation=90, - ) - - plt.bar(left, height, align="center") - plt.xlabel("Progeny breakpoint relative to SB210") - plt.ylabel("Counts") - plt.xlim(-0.5, 20.5) - ax = plt.gca() - set_ticklabels_helvetica(ax) - savefig(F1 + ".breaks.pdf") - # Serialize the data to a file - fw = open("Breakpoint-offset-histogram.csv", "w") - for k, v in sorted(bp_diff.items()): - print("{0},{1}".format(k, v), file=fw) - fw.close() - - total = sum(height) - zeros = bp_diff[0] - within_20 = sum([v for i, v in bp_diff.items() if -20 <= i <= 20]) - print("No deviation: {0}".format(percentage(zeros, total)), file=sys.stderr) - print(" Within 20bp: {0}".format(percentage(within_20, total)), file=sys.stderr) - - -def insertionpairs(args): - """ - %prog insertionpairs endpoints.bed - - Pair up the candidate endpoints. A candidate exision point would contain - both left-end (LE) and right-end (RE) within a given distance. - - -----------| |------------ - -------| |-------- - ---------| |---------- - (RE) (LE) - """ - p = OptionParser(insertionpairs.__doc__) - p.add_argument( - "--extend", - default=10, - type=int, - help="Allow insertion sites to match up within distance", - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - mergedbedfile = mergeBed(bedfile, d=opts.extend, nms=True) - bed = Bed(mergedbedfile) - fw = must_open(opts.outfile, "w") - support = lambda x: -x.reads - for b in bed: - names = b.accn.split(",") - ends = [EndPoint(x) for x in names] - REs = sorted([x for x in ends if x.leftright == "RE"], key=support) - LEs = sorted([x for x in ends if x.leftright == "LE"], key=support) - if not (REs and LEs): - continue - mRE, mLE = REs[0], LEs[0] - pRE, pLE = mRE.position, mLE.position - if pLE < pRE: - b.start, b.end = pLE - 1, pRE - else: - b.start, b.end = pRE - 1, pLE - b.accn = "{0}|{1}".format(mRE.label, mLE.label) - b.score = pLE - pRE - 1 - print(b, file=fw) - - -def insertion(args): - """ - %prog insertion mic.mac.bed - - Find IES based on mapping MIC reads to MAC genome. Output a bedfile with - 'lesions' (stack of broken reads) in the MAC genome. - """ - p = OptionParser(insertion.__doc__) - p.add_argument( - "--mindepth", default=6, type=int, help="Minimum depth to call an insertion" - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bedfile,) = args - mindepth = opts.mindepth - bed = Bed(bedfile) - fw = must_open(opts.outfile, "w") - for seqid, feats in bed.sub_beds(): - left_ends = Counter([x.start for x in feats]) - right_ends = Counter([x.end for x in feats]) - selected = [] - for le, count in left_ends.items(): - if count >= mindepth: - selected.append((seqid, le, "LE-{0}".format(le), count)) - for re, count in right_ends.items(): - if count >= mindepth: - selected.append((seqid, re, "RE-{0}".format(re), count)) - selected.sort() - for seqid, pos, label, count in selected: - label = "{0}-r{1}".format(label, count) - print("\t".join((seqid, str(pos - 1), str(pos), label)), file=fw) - - -def deletion(args): - """ - %prog deletion [mac.mic.bam|mac.mic.bed] mic.gaps.bed - - Find IES based on mapping MAC reads to MIC genome. - """ - p = OptionParser(deletion.__doc__) - p.add_argument( - "--mindepth", default=3, type=int, help="Minimum depth to call a deletion" - ) - p.add_argument( - "--minspan", default=30, type=int, help="Minimum span to call a deletion" - ) - p.add_argument( - "--split", - default=False, - action="store_true", - help="Break at cigar N into separate parts", - ) - p.set_tmpdir() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bedfile, gapsbedfile = args - if bedfile.endswith(".bam"): - bamfile = bedfile - bedfile = bamfile.replace(".sorted.", ".").replace(".bam", ".bed") - if need_update(bamfile, bedfile): - cmd = "bamToBed -i {0}".format(bamfile) - if opts.split: - cmd += " -split" - cmd += " | cut -f1-4" - sh(cmd, outfile=bedfile) - - sort_tmpdir = "--tmpdir={0}".format(opts.tmpdir) - if bedfile.endswith(".sorted.bed"): - pf = bedfile.rsplit(".", 2)[0] - sortedbedfile = bedfile - else: - pf = bedfile.rsplit(".", 1)[0] - sortedbedfile = pf + ".sorted.bed" - if need_update(bedfile, sortedbedfile): - sort([bedfile, "-u", "--accn", sort_tmpdir]) - - # Find reads that contain multiple matches - ibedfile = pf + ".d.bed" - if need_update(sortedbedfile, ibedfile): - bed = Bed(sortedbedfile, sorted=False) - fw = open(ibedfile, "w") - logger.debug("Write deletions to `{0}`.".format(ibedfile)) - for accn, bb in groupby(bed, key=lambda x: x.accn): - bb = list(bb) - branges = [(x.seqid, x.start, x.end) for x in bb] - iranges = range_interleave(branges) - for seqid, start, end in iranges: - if end - start + 1 < opts.minspan: - continue - print( - "\t".join(str(x) for x in (seqid, start - 1, end, accn + "-d")), - file=fw, - ) - fw.close() - - # Uniqify the insertions and count occurrences - countbedfile = pf + ".uniq.bed" - if need_update(ibedfile, countbedfile): - bed = Bed(ibedfile) - fw = open(countbedfile, "w") - logger.debug("Write counts to `{0}`.".format(countbedfile)) - registry = Counter((x.seqid, x.start, x.end) for x in bed) - ies_id = 1 - for (seqid, start, end), count in registry.items(): - ies_name = "{0:05d}-r{1}".format(ies_id, count) - if count < opts.mindepth: - continue - print("\t".join(str(x) for x in (seqid, start - 1, end, ies_name)), file=fw) - ies_id += 1 - fw.close() - sort([countbedfile, "-i", sort_tmpdir]) - - # Remove deletions that contain some read depth - depthbedfile = pf + ".depth.bed" - if need_update((sortedbedfile, countbedfile), depthbedfile): - depth([sortedbedfile, countbedfile, "--outfile={0}".format(depthbedfile)]) - - validbedfile = pf + ".valid.bed" - if need_update(depthbedfile, validbedfile): - fw = open(validbedfile, "w") - logger.debug("Filter valid deletions to `{0}`.".format(validbedfile)) - bed = Bed(depthbedfile) - all_scores = [float(b.score) for b in bed] - lb, ub = outlier_cutoff(all_scores) - logger.debug("Bounds for depths: LB={:.2f} (ignored) UB={:.2f}".format(lb, ub)) - for b in bed: - if float(b.score) > ub: - continue - print(b, file=fw) - fw.close() - - # Remove deletions that contain sequencing gaps on its flanks - selectedbedfile = pf + ".selected.bed" - if need_update(validbedfile, selectedbedfile): - flanksbedfile = pf + ".flanks.bed" - fw = open(flanksbedfile, "w") - bed = Bed(validbedfile) - flank = 100 - logger.debug("Write deletion flanks to `{0}`.".format(flanksbedfile)) - for b in bed: - start, end = b.start, b.end - b.start, b.end = start, min(start + flank - 1, end) - print(b, file=fw) - b.start, b.end = max(start, end - flank + 1), end - print(b, file=fw) - fw.close() - - intersectidsfile = pf + ".intersect.ids" - cmd = "intersectBed -a {0} -b {1}".format(flanksbedfile, gapsbedfile) - cmd += " | cut -f4 | sort -u" - sh(cmd, outfile=intersectidsfile) - some( - [ - validbedfile, - intersectidsfile, - "-v", - "--outfile={}".format(selectedbedfile), - ] - ) - - # Find best-scoring non-overlapping set - iesbedfile = pf + ".ies.bed" - if need_update(selectedbedfile, iesbedfile): - bed = Bed(selectedbedfile) - fw = open(iesbedfile, "w") - logger.debug("Write IES to `{0}`.".format(iesbedfile)) - branges = [ - Range(x.seqid, x.start, x.end, int(x.accn.rsplit("r")[-1]), i) - for i, x in enumerate(bed) - ] - iranges, iscore = range_chain(branges) - logger.debug("Best chain score: {} ({} IES)".format(iscore, len(iranges))) - ies_id = 1 - for seqid, start, end, score, id in iranges: - ies_name = "IES-{0:05d}-r{1}".format(ies_id, score) - span = end - start + 1 - print( - "\t".join(str(x) for x in (seqid, start - 1, end, ies_name, span)), - file=fw, - ) - ies_id += 1 - fw.close() - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/jcvi.py b/jcvi/projects/jcvi.py deleted file mode 100644 index e89986da..00000000 --- a/jcvi/projects/jcvi.py +++ /dev/null @@ -1,335 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Functions in this script produce figures in the JCVI manuscript. -""" - -import sys - -import networkx as nx - -from ..apps.base import ActionDispatcher, OptionParser, logger -from ..assembly.geneticmap import draw_geneticmap_heatmap -from ..assembly.hic import draw_hic_heatmap -from ..assembly.kmer import draw_ks_histogram -from ..compara.pedigree import Pedigree, calculate_inbreeding -from ..compara.synteny import check_beds -from ..graphics.base import ( - cm, - load_image, - normalize_axes, - panel_labels, - plt, - savefig, - set1, - setup_theme, -) -from ..graphics.chromosome import draw_chromosomes -from ..graphics.dotplot import dotplot -from ..graphics.karyotype import Karyotype -from ..graphics.landscape import draw_heatmaps, draw_multi_depth, draw_stacks -from ..graphics.synteny import Synteny, draw_gene_legend - - -def synteny(args): - """ - %prog synteny grape.peach.anchors seqids layout blocks grape_peach.bed blocks.layout - - Plot synteny composite figure, including: - A. Synteny dotplot - B. Karyotype plot - """ - p = OptionParser(synteny.__doc__) - p.set_beds() - opts, args, iopts = p.set_image_options(args, figsize="14x7") - setup_theme(style="dark") - - if len(args) != 6: - sys.exit(not p.print_help()) - - anchorfile, seqidsfile, layoutfile, datafile, bedfile, blockslayoutfile = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - ax1_root = fig.add_axes((0, 0, 0.5, 1)) - ax1_canvas = fig.add_axes((0.05, 0.1, 0.4, 0.8)) # the dot plot - ax2_root = fig.add_axes((0.5, 0.5, 0.5, 0.5)) - ax3_root = fig.add_axes((0.5, 0, 0.5, 0.5)) - - # Panel A - logger.info("Plotting synteny dotplot") - qbed, sbed, _, _, is_self = check_beds(anchorfile, p, opts) - dotplot( - anchorfile, - qbed, - sbed, - fig, - ax1_root, - ax1_canvas, - is_self=is_self, - chrlw=0.5, - sepcolor=set1[3], - ) - - # Panel B - logger.info("Plotting karyotype plot") - Karyotype(ax2_root, seqidsfile, layoutfile) - - # Panel C - logger.info("Plotting synteny blocks") - Synteny(fig, ax3_root, datafile, bedfile, blockslayoutfile, pad=0.1, vpad=0.03) - draw_gene_legend(root, 0.69, 0.8, 0.34) - - labels = ((0.02, 0.95, "A"), (0.52, 0.95, "B"), (0.52, 0.45, "C")) - panel_labels(root, labels) - normalize_axes(root, ax1_root, ax2_root, ax3_root) - - image_name = "synteny.pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def diversity(args): - """ - %prog diversity pedigree.ped VAR?_srtd.wgs.regions.bed.gz - - Plot diversity composite figure, including: - A. Pedigree - B. Depth distribution across genomes - """ - p = OptionParser(diversity.__doc__) - _, args, iopts = p.set_image_options(args, figsize="14x7") - - if len(args) < 2: - sys.exit(not p.print_help()) - - pedfile, bedfiles = args[0], args[1:] - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - ax1_root = fig.add_axes((0, 0, 0.25, 1)) - ax2_root = fig.add_axes((0.25, 0, 0.75, 1)) - - # Panel A - logger.info("Plotting pedigree") - ped = Pedigree(pedfile) - pngfile = f"{pedfile}.png" - inb = calculate_inbreeding(ped, ploidy=4, N=10000) - - G = ped.to_graph(inb, title="Pedigree of Variety1") - A = nx.nx_agraph.to_agraph(G) - dpi = 300 - A.draw(pngfile, prog="dot", args=f"-Gdpi={dpi}") - logger.info("Pedigree graph written to `%s`", pngfile) - - # Show the image as is - ax1_root.imshow(load_image(pngfile)) - ax1_root.set_axis_off() - - # Panel B - logger.info("Plotting depth distribution across genomes") - npanels = len(bedfiles) - yinterval = 1.0 / npanels - ypos = 1 - yinterval - panel_roots, panel_axes = [], [] - for _ in range(npanels): - panel_root = fig.add_axes((0.25, ypos, 0.75, yinterval)) - panel_ax = fig.add_axes( - (0.25 + 0.1 * 0.75, ypos + 0.2 * yinterval, 0.8 * 0.75, 0.65 * yinterval) - ) - panel_roots.append(panel_root) - panel_axes.append(panel_ax) - ypos -= yinterval - - draw_multi_depth( - ax2_root, - panel_roots, - panel_axes, - bedfiles, - chrinfo_file="chrinfo.txt", - titleinfo_file="titleinfo.txt", - maxdepth=100, - logscale=False, - ) - - labels = ( - (0.02, 0.95, "A"), - (0.25 + 0.25 * 0.1, 0.95, "B"), - ) - panel_labels(root, labels) - normalize_axes(root, ax2_root) - - image_name = "diversity.pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def landscape(args): - """ - %prog landscape features.bed athaliana.sizes TAIR10_chr_all.fas Chr2 - - Plot landscape composite figure, including: - A. Example genomic features painted on Arabidopsis genome - B. Landscape of genomic features across the genome - """ - p = OptionParser(landscape.__doc__) - _, args, iopts = p.set_image_options(args, figsize="12x8") - - if len(args) != 4: - sys.exit(not p.print_help()) - - bedfile, sizesfile, fastafile, ch = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - aspect_ratio = iopts.w / iopts.h - ax1_root = fig.add_axes((0, 1 / 4, 0.4, 0.5 * aspect_ratio)) - ax2_root_extent = (0.4, 0.5, 0.6, 0.47) - ax2_root = fig.add_axes(ax2_root_extent) - ax3_root_extent = (0.41, 0, 0.6, 0.47) - ax3_root = fig.add_axes(ax3_root_extent) - - # Panel A - logger.info("Plotting example genomic features painted on Arabidopsis genome") - draw_chromosomes( - ax1_root, - bedfile, - sizesfile, - iopts=iopts, - mergedist=0, - winsize=50000, - gauge=True, - legend=True, - empty=False, - title="*Arabidopsis* genome features", - ) - - # Panel B - logger.info("Plotting landscape of genomic features across the genome") - stacks = ["Repeats", "Exons"] - heatmaps = ["Copia", "Gypsy", "Helitron", "hAT", "Exons"] - window = 250000 - shift = 50000 - draw_stacks( - fig, - ax2_root, - ax2_root_extent, - stacks, - fastafile, - window, - shift, - top=5, - ) - - # Panel C - draw_heatmaps( - fig, - ax3_root, - ax3_root_extent, - fastafile, - "Chr2", - stacks, - heatmaps, - window, - shift, - cmap=cm.viridis, - ) - - ax2_root.set_axis_off() - ax3_root.set_axis_off() - - labels = ((0.02, 0.95, "A"), (0.42, 0.95, "B"), (0.42, 0.48, "C")) - panel_labels(root, labels) - normalize_axes(root, ax1_root) - - image_name = "landscape.pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def genomebuild(args): - """ - %prog genomebuild reads.histo geneticmap.matrix hic.resolution_500000.npy hic.resolution_500000.json - - Plot genome build composite figure, including: - A. Read kmer histogram - B. Genetic map concordance - C. Hi-C contact map concordance - """ - p = OptionParser(genomebuild.__doc__) - _, args, iopts = p.set_image_options(args, figsize="21x7") - - if len(args) != 4: - sys.exit(not p.print_help()) - - reads_histo, mstmap, hic_matrix, hic_json = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - ax1_root = fig.add_axes((0, 0, 1 / 3, 1)) - ax2_root = fig.add_axes((1 / 3, 0, 1 / 3, 1)) - ax3_root = fig.add_axes((2 / 3, 0, 1 / 3, 1)) - ax1 = fig.add_axes((1 / 3 * 0.1, 0.1, 1 / 3 * 0.8, 0.8)) - ax2 = fig.add_axes((1 / 3 * 1.1, 0.1, 1 / 3 * 0.8, 0.8)) - ax3 = fig.add_axes((1 / 3 * 2.1, 0.1, 1 / 3 * 0.8, 0.8)) - - # Panel A - logger.info("Plotting read kmer histogram") - _ = draw_ks_histogram( - ax1, - reads_histo, - method="nbinom", - coverage=0, - vmin=2, - vmax=200, - species="*S. species* ‘Variety 1’", - K=21, - maxiter=100, - peaks=False, - ) - - # Panel B - logger.info("Plotting genetic map concordance") - draw_geneticmap_heatmap(ax2_root, ax2, mstmap, 1000) - - # Panel C - logger.info("Plotting Hi-C contact map concordance") - draw_hic_heatmap( - ax3_root, - ax3, - hic_matrix, - hic_json, - contig=None, - groups_file="groups", - title="*S. species* Hi-C contact map", - vmin=1, - vmax=6, - plot_breaks=True, - ) - - labels = ( - (1 / 3 * 0.1, 0.95, "A"), - (1 / 3 * 1.1, 0.95, "B"), - (1 / 3 * 2.1, 0.95, "C"), - ) - panel_labels(root, labels) - normalize_axes(root, ax1_root, ax2_root, ax3_root) - - image_name = "genomebuild.pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def main(): - - actions = ( - ("synteny", "Plot synteny composite figure"), - ("diversity", "Plot diversity composite figure"), - ("genomebuild", "Plot genome build composite figure"), - ("landscape", "Plot landscape composite figure"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/misc.py b/jcvi/projects/misc.py deleted file mode 100644 index 971082ff..00000000 --- a/jcvi/projects/misc.py +++ /dev/null @@ -1,777 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Functions in this script produce figures in various manuscripts. -""" - -import os.path as op -import sys - -import numpy as np -import pandas as pd - -from sklearn.decomposition import PCA -from sklearn.preprocessing import StandardScaler - -from ..apps.base import ActionDispatcher, OptionParser, fname, logger -from ..graphics.base import ( - Polygon, - normalize_axes, - panel_labels, - plt, - savefig, - set_helvetica_axis, -) -from ..graphics.glyph import DoubleSquare, GeneGlyph, RoundRect, TextCircle, plot_cap -from ..graphics.karyotype import Karyotype -from ..graphics.synteny import Synteny, draw_gene_legend - - -def main(): - - actions = ( - # Epoch paper (Woodhouse et al., 2012 Plant Cell) - ("epoch", "show the methods used in epoch paper"), - # Cotton paper (Paterson et al., 2012 Nature) - ("cotton", "plot cotton macro- and micro-synteny (requires data)"), - # Amborella paper (Albert et al., 2013 Science) - ("amborella", "plot amborella macro- and micro-synteny (requires data)"), - # Mt4.0 paper (Tang et al., 2014 BMC Genomics) - ("mtdotplots", "plot Mt3.5 and Mt4.0 side-by-side"), - # Oropetium paper (Vanburen et al., 2015 Nature) - ("oropetium", "plot oropetium micro-synteny (requires data)"), - # Pomegranate paper (Qin et al., 2017 Plant Journal) - ("pomegranate", "plot pomegranate macro- and micro-synteny (requires data)"), - ("birch", "plot birch macro-synteny (requires data)"), - ("litchi", "plot litchi micro-synteny (requires data)"), - ("utricularia", "plot utricularia micro-synteny (requires data)"), - ( - "waterlilyGOM", - "waterlily phylogeny and related infographics (requires data)", - ), - ("grabseeds", "GRABSEEDS PCA plot"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def rgb_to_hex(r: float, g: float, b: float): - """ - Convert RGB to hex. - """ - r, g, b = int(round(r)), int(round(g)), int(round(b)) - return f"#{r:02x}{g:02x}{b:02x}" - - -def grabseeds(args): - """ - %prog FINAL_DATA_FOR_ANOVA_HERITABILITY_ANALYSIS_SEED_COLOR_SHAPE_SIZE.csv - - Plot the PCA plot from GRABSEEDS. - """ - p = OptionParser(grabseeds.__doc__) - _, args, iopts = p.set_image_options(args, figsize="8x6") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (csvfile,) = args - df = pd.read_csv(csvfile).dropna() - features = [ - x - for x in df.columns - if x.startswith("Avg") - if x not in ("AvgOfL", "AvgOfa", "AvgOfb") - ] - x = df.loc[:, features].values - x = StandardScaler().fit_transform(x) - pca = PCA(n_components=2) - principal_components = pca.fit_transform(x) - logger.info("Explained variance: %s", pca.explained_variance_ratio_) - pc1_var, pc2_var = pca.explained_variance_ratio_ - - pc_df = pd.DataFrame(data=principal_components, columns=["PC1", "PC2"]) - final_df = pd.concat([pc_df, df[features]], axis=1).dropna() - final_df["Color"] = final_df.apply( - lambda x: rgb_to_hex(x["AvgOfRed"], x["AvgOfGreen"], x["AvgOfGreen"]), axis=1 - ) - final_df["ScatterSize"] = final_df["AvgOfArea"] / 500 - - fig = plt.figure(1, (iopts.w, iopts.h)) - ax = fig.add_subplot(1, 1, 1) - ax.set_xlabel(f"Principal Component 1 ({pc1_var * 100:.0f}\%)", fontsize=15) - ax.set_ylabel(f"Principal Component 2 ({pc2_var * 100:.0f}\%)", fontsize=15) - ax.set_title("Sorghum kernels, PCA Plot", fontsize=20) - ax.scatter("PC1", "PC2", s="ScatterSize", c="Color", data=final_df) - set_helvetica_axis(ax) - - image_name = "grabseeds_pca." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def waterlilyGOM(args): - """ - %prog mcmctree.tre table.csv - - Customized figure to plot phylogeny and related infographics. - """ - from ..graphics.tree import ( - LeafInfoFile, - WGDInfoFile, - draw_tree, - parse_tree, - draw_wgd_xy, - ) - from ..graphics.table import CsvTable, draw_table - - p = OptionParser(waterlilyGOM.__doc__) - _, args, iopts = p.set_image_options(args, figsize="12x9") - - if len(args) != 2: - sys.exit(not p.print_help()) - - (datafile, csvfile) = args - outgroup = ["ginkgo"] - - logger.debug("Load tree file `%s`", datafile) - t, hpd = parse_tree(datafile) - - pf = datafile.rsplit(".", 1)[0] - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - margin, rmargin = 0.15, 0.19 # Left and right margin - leafinfo = LeafInfoFile("leafinfo.csv").cache - wgdinfo = WGDInfoFile("wgdinfo.csv").cache - groups = "Monocots,Eudicots,ANA-grade,Gymnosperms" - - draw_tree( - root, - t, - hpd=hpd, - margin=margin, - rmargin=rmargin, - supportcolor=None, - internal=False, - outgroup=outgroup, - leafinfo=leafinfo, - wgdinfo=wgdinfo, - geoscale=True, - groups=groups.split(","), - ) - - # Bottom right show legends for the WGD circles - pad = 0.02 - ypad = 0.04 - xstart = 1 - rmargin + pad - ystart = 0.2 - waterlily_wgdline = wgdinfo["waterlily"][0] - ypos = ystart - 2 * ypad - draw_wgd_xy(root, xstart, ypos, waterlily_wgdline) - root.text( - xstart + pad, - ypos, - "Nymphaealean WGD", - color=waterlily_wgdline.color, - va="center", - ) - other_wgdline = wgdinfo["banana"][0] - ypos = ystart - 3 * ypad - draw_wgd_xy(root, xstart, ypos, other_wgdline) - root.text( - xstart + pad, - ypos, - "Other known WGDs", - color=other_wgdline.color, - va="center", - ) - - # Top left draw the comparison table - csv_table = CsvTable(csvfile) - draw_table( - root, - csv_table, - extent=(0.02, 0.44, 0.55, 0.985), - stripe_color="lavender", - yinflation=iopts.w / iopts.h, - ) - - normalize_axes(root) - - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def pomegranate(args): - """ - %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout - - Build a figure that calls graphics.karyotype to illustrate the high ploidy - of WGD history of pineapple genome. The script calls both graphics.karyotype - and graphic.synteny. - """ - p = OptionParser(pomegranate.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="9x7") - - if len(args) != 5: - sys.exit(not p.print_help()) - - seqidsfile, klayout, datafile, bedfile, slayout = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - Karyotype(root, seqidsfile, klayout) - Synteny(fig, root, datafile, bedfile, slayout) - - # legend showing the orientation of the genes - draw_gene_legend(root, 0.42, 0.52, 0.48) - - labels = ((0.04, 0.96, "A"), (0.04, 0.52, "B")) - panel_labels(root, labels) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "pomegranate-karyotype" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def utricularia(args): - from ..graphics.synteny import main as synteny_main - - p = OptionParser(synteny_main.__doc__) - p.add_argument("--switch", help="Rename the seqid with two-column file") - opts, args, iopts = p.set_image_options(args, figsize="8x7") - - if len(args) != 3: - sys.exit(not p.print_help()) - - datafile, bedfile, layoutfile = args - switch = opts.switch - - pf = datafile.rsplit(".", 1)[0] - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - s = Synteny( - fig, root, datafile, bedfile, layoutfile, loc_label=False, switch=switch - ) - light = "lightslategrey" - RoundRect(root, (0.02, 0.69), 0.96, 0.24, fill=False, lw=2, ec=light) - RoundRect(root, (0.02, 0.09), 0.96, 0.48, fill=False, lw=2, ec=light) - za, zb = s.layout[1].ratio, s.layout[-1].ratio # zoom level - if za != 1: - root.text( - 0.96, - 0.89, - "{}x zoom".format(za).replace(".0x", "x"), - color=light, - ha="right", - va="center", - size=14, - ) - if zb != 1: - root.text( - 0.96, - 0.12, - "{}x zoom".format(zb).replace(".0x", "x"), - color=light, - ha="right", - va="center", - size=14, - ) - - # legend showing the orientation of the genes - draw_gene_legend(root, 0.22, 0.3, 0.64, text=True) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def join_nodes( - root, coords, a, b, x, slope=2.4, fc="lightslategray", rectangle=True, circle=True -): - # Join node a and b to make an internal node - ax, ay = coords[a] - bx, by = coords[b] - if ay < by: - ax, ay, bx, by = bx, by, ax, ay - if rectangle: - nx, ny = x, (ay + by) / 2 - root.plot((nx, ax), (ay, ay), lw=2, color=fc) - root.plot((nx, bx), (by, by), lw=2, color=fc) - root.plot((nx, nx), (ay, by), lw=2, color=fc) - else: - dx = (abs(ay - by) / slope - abs(ax - bx)) / 2 - nx = max(ax, bx) + dx - ny = by + (nx - bx) * slope - root.plot((nx, ax), (ny, ay), lw=2, color=fc) - root.plot((nx, bx), (ny, by), lw=2, color=fc) - if circle: - DoubleSquare(root, nx, ny, fc=fc) - return nx, ny - - -def branch_length(ax, start, end, text, ha="left", va="bottom", color="r"): - xs, ys = start - xe, ye = end - text = r"$\mathsf{" + text + "}$" - ax.text((xs + xe) / 2, (ys + ye) / 2, text, ha=ha, va=va, color=color) - - -def birch(args): - """ - %prog birch seqids layout - - Plot birch macro-synteny, with an embedded phylogenetic tree to the right. - """ - p = OptionParser(birch.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="8x6") - - if len(args) != 2: - sys.exit(not p.print_help()) - - seqids, layout = args - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - K = Karyotype(root, seqids, layout) - L = K.layout - - xs = 0.79 - dt = dict(rectangle=False, circle=False) - # Embed a phylogenetic tree to the right - coords = {} - coords["Amborella"] = (xs, L[0].y) - coords["Vitis"] = (xs, L[1].y) - coords["Prunus"] = (xs, L[2].y) - coords["Betula"] = (xs, L[3].y) - coords["Populus"] = (xs, L[4].y) - coords["Arabidopsis"] = (xs, L[5].y) - coords["fabids"] = join_nodes(root, coords, "Prunus", "Betula", xs, **dt) - coords["malvids"] = join_nodes(root, coords, "Populus", "Arabidopsis", xs, **dt) - coords["rosids"] = join_nodes(root, coords, "fabids", "malvids", xs, **dt) - coords["eudicots"] = join_nodes(root, coords, "rosids", "Vitis", xs, **dt) - coords["angiosperm"] = join_nodes(root, coords, "eudicots", "Amborella", xs, **dt) - - # Show branch length - branch_length(root, coords["Amborella"], coords["angiosperm"], ">160.0") - branch_length(root, coords["eudicots"], coords["angiosperm"], ">78.2", va="top") - branch_length(root, coords["Vitis"], coords["eudicots"], "138.5") - branch_length(root, coords["rosids"], coords["eudicots"], "19.8", va="top") - branch_length( - root, coords["Prunus"], coords["fabids"], "104.2", ha="right", va="top" - ) - branch_length(root, coords["Arabidopsis"], coords["malvids"], "110.2", va="top") - branch_length( - root, coords["fabids"], coords["rosids"], "19.8", ha="right", va="top" - ) - branch_length(root, coords["malvids"], coords["rosids"], "8.5", va="top") - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "birch" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def mtdotplots(args): - """ - %prog mtdotplots Mt3.5 Mt4.0 medicago.medicago.lifted.1x1.anchors - - Plot Mt3.5 and Mt4.0 side-by-side. This is essentially combined from two - graphics.dotplot() function calls as panel A and B. - """ - from ..graphics.dotplot import check_beds, dotplot - - p = OptionParser(mtdotplots.__doc__) - p.set_beds() - opts, args, iopts = p.set_image_options(args, figsize="16x8", dpi=90) - - if len(args) != 3: - sys.exit(not p.print_help()) - - a, b, ac = args - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - r1 = fig.add_axes([0, 0, 0.5, 1]) - r2 = fig.add_axes([0.5, 0, 0.5, 1]) - a1 = fig.add_axes([0.05, 0.1, 0.4, 0.8]) - a2 = fig.add_axes([0.55, 0.1, 0.4, 0.8]) - - anchorfile = op.join(a, ac) - qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) - dotplot( - anchorfile, qbed, sbed, fig, r1, a1, is_self=is_self, genomenames="Mt3.5_Mt3.5" - ) - - opts.qbed = opts.sbed = None - anchorfile = op.join(b, ac) - qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) - dotplot( - anchorfile, qbed, sbed, fig, r2, a2, is_self=is_self, genomenames="Mt4.0_Mt4.0" - ) - - root.text(0.03, 0.95, "A", ha="center", va="center", size=36) - root.text(0.53, 0.95, "B", ha="center", va="center", size=36) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "mtdotplots" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def oropetium(args): - """ - %prog oropetium mcscan.out all.bed layout switch.ids - - Build a composite figure that calls graphis.synteny. - """ - p = OptionParser(oropetium.__doc__) - p.add_argument("--extra", help="Extra features in BED format") - opts, args, iopts = p.set_image_options(args, figsize="9x6") - - if len(args) != 4: - sys.exit(not p.print_help()) - - datafile, bedfile, slayout, switch = args - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - Synteny( - fig, root, datafile, bedfile, slayout, switch=switch, extra_features=opts.extra - ) - - # legend showing the orientation of the genes - draw_gene_legend(root, 0.4, 0.57, 0.74, text=True, repeat=True) - - # On the left panel, make a species tree - fc = "lightslategrey" - - coords = {} - xs, xp = 0.16, 0.03 - coords["oropetium"] = (xs, 0.7) - coords["setaria"] = (xs, 0.6) - coords["sorghum"] = (xs, 0.5) - coords["rice"] = (xs, 0.4) - coords["brachypodium"] = (xs, 0.3) - xs -= xp - coords["Panicoideae"] = join_nodes(root, coords, "setaria", "sorghum", xs) - xs -= xp - coords["BEP"] = join_nodes(root, coords, "rice", "brachypodium", xs) - coords["PACMAD"] = join_nodes(root, coords, "oropetium", "Panicoideae", xs) - xs -= xp - coords["Poaceae"] = join_nodes(root, coords, "BEP", "PACMAD", xs) - - # Names of the internal nodes - for tag in ("BEP", "Poaceae"): - nx, ny = coords[tag] - nx, ny = nx - 0.005, ny - 0.02 - root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) - for tag in ("PACMAD",): - nx, ny = coords[tag] - nx, ny = nx - 0.005, ny + 0.02 - root.text(nx, ny, tag, rotation=90, ha="right", va="bottom", color=fc) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "oropetium" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def litchi(args): - """ - %prog litchi mcscan.out all.bed layout switch.ids - - Build a composite figure that calls graphis.synteny. - """ - p = OptionParser(litchi.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="9x6") - - if len(args) != 4: - sys.exit(not p.print_help()) - - datafile, bedfile, slayout, switch = args - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - Synteny(fig, root, datafile, bedfile, slayout, switch=switch) - - # legend showing the orientation of the genes - draw_gene_legend(root, 0.4, 0.7, 0.82) - - # On the left panel, make a species tree - fc = "lightslategrey" - - coords = {} - xs, xp = 0.16, 0.03 - coords["lychee"] = (xs, 0.37) - coords["clementine"] = (xs, 0.5) - coords["cacao"] = (xs, 0.6) - coords["strawberry"] = (xs, 0.7) - coords["grape"] = (xs, 0.8) - xs -= xp - coords["Sapindales"] = join_nodes(root, coords, "clementine", "lychee", xs) - xs -= xp - coords["Rosid-II"] = join_nodes(root, coords, "cacao", "Sapindales", xs) - xs -= xp - coords["Rosid"] = join_nodes(root, coords, "strawberry", "Rosid-II", xs) - xs -= xp - coords["crown"] = join_nodes(root, coords, "grape", "Rosid", xs, circle=False) - - # Names of the internal nodes - for tag in ("Rosid", "Rosid-II", "Sapindales"): - nx, ny = coords[tag] - nx, ny = nx - 0.01, ny - 0.02 - root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "litchi" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def amborella(args): - """ - %prog amborella seqids karyotype.layout mcscan.out all.bed synteny.layout - - Build a composite figure that calls graphics.karyotype and graphics.synteny. - """ - p = OptionParser(amborella.__doc__) - p.add_argument("--tree", help="Display trees on the bottom of the figure") - p.add_argument("--switch", help="Rename the seqid with two-column file") - opts, args, iopts = p.set_image_options(args, figsize="8x7") - - if len(args) != 5: - sys.exit(not p.print_help()) - - seqidsfile, klayout, datafile, bedfile, slayout = args - switch = opts.switch - tree = opts.tree - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - Karyotype(root, seqidsfile, klayout) - Synteny(fig, root, datafile, bedfile, slayout, switch=switch, tree=tree) - - # legend showing the orientation of the genes - draw_gene_legend(root, 0.5, 0.68, 0.5) - - # annotate the WGD events - fc = "lightslategrey" - x = 0.05 - radius = 0.012 - TextCircle(root, x, 0.86, r"$\gamma$", radius=radius) - TextCircle(root, x, 0.95, r"$\epsilon$", radius=radius) - root.plot([x, x], [0.83, 0.9], ":", color=fc, lw=2) - pts = plot_cap((x, 0.95), np.radians(range(-70, 250)), 0.02) - x, y = zip(*pts) - root.plot(x, y, ":", color=fc, lw=2) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "amborella" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def cotton(args): - """ - %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout - - Build a composite figure that calls graphics.karyotype and graphic.synteny. - """ - p = OptionParser(cotton.__doc__) - p.add_argument("--depthfile", help="Use depth info in this file") - p.add_argument("--switch", help="Rename the seqid with two-column file") - opts, args, iopts = p.set_image_options(args, figsize="8x7") - - if len(args) != 5: - sys.exit(p.print_help()) - - seqidsfile, klayout, datafile, bedfile, slayout = args - switch = opts.switch - depthfile = opts.depthfile - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - kt = Karyotype(root, seqidsfile, klayout) - Synteny(fig, root, datafile, bedfile, slayout, switch=switch) - - light = "lightslategrey" - # Show the dup depth along the cotton chromosomes - if depthfile: - ymin, ymax = 0.9, 0.95 - root.text(0.11, 0.96, "Cotton duplication level", color="gray", size=10) - root.plot([0.1, 0.95], [ymin, ymin], color="gray") - root.text(0.96, 0.9, "1x", color="gray", va="center") - root.plot([0.1, 0.95], [ymax, ymax], color="gray") - root.text(0.96, 0.95, "6x", color="gray", va="center") - - fp = open(depthfile) - track = kt.tracks[0] # Cotton - depths = [] - for row in fp: - a, b, depth = row.split() - depth = int(depth) - try: - p = track.get_coords(a) - depths.append((p, depth)) - except KeyError: - pass - - depths.sort(key=lambda x: (x[0], -x[1])) - xx, yy = zip(*depths) - yy = [ymin + 0.01 * (x - 1) for x in yy] - root.plot(xx, yy, "-", color=light) - - # legend showing the orientation of the genes - draw_gene_legend(root, 0.5, 0.68, 0.5) - - # Zoom - xpos = 0.835 - ytop = 0.9 - xmin, xmax = 0.18, 0.82 - ymin, ymax = ytop, 0.55 - lc = "k" - kwargs = dict(lw=3, color=lc, mec=lc, mfc="w", zorder=3) - root.plot((xpos, xpos), (ymax, 0.63), ":o", **kwargs) - root.plot((xpos, xmin), (ymax, ymin), ":o", **kwargs) - root.plot((xpos, xmax), (ymax, ymin), ":o", **kwargs) - RoundRect(root, (0.06, 0.17), 0.92, 0.35, fill=False, lw=2, ec=light) - - # Panels - root.text(0.05, 0.95, "a", size=20, fontweight="bold") - root.text(0.1, 0.45, "b", size=20, fontweight="bold") - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "cotton" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def plot_diagram(ax, x, y, label="S", title="syntenic", gradient=True): - """ - Part of the diagrams that are re-used. (x, y) marks the center of the - diagram. Label determines the modification to the "S" graph. - """ - trackgap = 0.06 - tracklen = 0.12 - xa, xb = x - tracklen, x + tracklen - ya, yb = y + trackgap, y - trackgap - hsps = (((60, 150), (50, 130)), ((190, 225), (200, 240)), ((330, 280), (360, 310))) - - for yy in (ya, yb): - ax.plot((xa, xb), (yy, yy), "-", color="gray", lw=2, zorder=1) - - ytip = 0.015 - mrange = 400 - m = lambda t: xa + t * 1.0 / mrange * tracklen * 2 - - for i, ((a, b), (c, d)) in enumerate(hsps): - fb = False - if label == "FB" and i == 1: - c, d = 270, 280 - fb = True - if label == "G" and i == 0: - c, d = 120, 65 - - a, b, c, d = [m(t) for t in (a, b, c, d)] - color = "g" if i == 1 else "r" - GeneGlyph(ax, a, b, ya, 2 * ytip, fc=color, gradient=gradient, zorder=10) - - if i == 1 and label in ("F", "G", "FN"): - pass - else: - if fb: - GeneGlyph( - ax, c, d, yb, 2 * ytip, fc="w", tip=0, gradient=gradient, zorder=10 - ) - else: - GeneGlyph(ax, c, d, yb, 2 * ytip, fc="r", gradient=gradient, zorder=10) - - r = Polygon( - ((a, ya - ytip), (c, yb + ytip), (d, yb + ytip), (b, ya - ytip)), - fc="r", - alpha=0.2, - ) - - if i == 1 and label not in ("S", "FB"): - pass - elif i == 0 and label == "G": - pass - else: - ax.add_patch(r) - - if label == "FN": - ax.text(x + 0.005, yb, "NNNNN", ha="center", size=7) - - title = "{0}: {1}".format(label, title) - ax.text(x, ya + 5 * ytip, title, size=8, ha="center") - - -def epoch(args): - """ - %prog epoch - - Illustrate the methods used in Maggie's epoch paper, in particular, how to - classifiy S/G/F/FB/FN for the genes. - """ - p = OptionParser(__doc__) - p.parse_args(args) - - fig = plt.figure(1, (6, 4)) - root = fig.add_axes((0, 0, 1, 1)) - - # Separators - linestyle = dict(lw=2, color="b", alpha=0.2, zorder=2) - root.plot((0, 1), (0.5, 0.5), "--", **linestyle) - for i in (1.0 / 3, 2.0 / 3): - root.plot((i, i), (0.5, 1), "--", **linestyle) - for i in (1.0 / 6, 3.0 / 6, 5.0 / 6): - root.plot((i, i), (0, 0.5), "--", **linestyle) - - # Diagrams - plot_diagram(root, 1.0 / 6, 3.0 / 4, "S", "syntenic") - plot_diagram(root, 3.0 / 6, 3.0 / 4, "F", "missing, with both flankers") - plot_diagram(root, 5.0 / 6, 3.0 / 4, "G", "missing, with one flanker") - plot_diagram(root, 2.0 / 6, 1.0 / 4, "FB", "has non-coding matches") - plot_diagram(root, 4.0 / 6, 1.0 / 4, "FN", "syntenic region has gap") - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - figname = fname() + ".pdf" - savefig(figname, dpi=300) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/napus.py b/jcvi/projects/napus.py deleted file mode 100644 index 8e767e6c..00000000 --- a/jcvi/projects/napus.py +++ /dev/null @@ -1,858 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Scripts for the Brassica napus genome manuscript (Chalhoub et al. Science 2014). -""" -import os.path as op -import sys - -import numpy as np - -from ..apps.base import ActionDispatcher, OptionParser, logger -from ..formats.base import LineFile -from ..graphics.base import ( - FancyArrowPatch, - Rectangle, - adjust_spines, - mpl, - normalize_axes, - panel_labels, - plt, - savefig, -) -from ..graphics.coverage import Coverage, Sizes, XYtrack, setup_gauge_ax -from ..graphics.glyph import TextCircle -from ..graphics.karyotype import Karyotype -from ..graphics.synteny import Synteny - - -template_cov = """# y, xstart, xend, rotation, color, label, va, bed -.56, {0}, {1}, 0, darkslategray, , top, AN.bed -.48, {2}, {3}, 0, darkslategray, , top, CN.bed -# edges -e, 0, 1, AN.CN.1x1.lifted.simple -""" -template_f3a = r"""# y, xstart, xend, rotation, color, label, va, bed -.65, {0}, {1}, 0, gainsboro, \noindent\textit{{B. napus}} A$\mathsf{{_n}}$2\\(cv Darmor-\textit{{bzh}}), top, AN.bed -.55, {2}, {3}, 0, gainsboro, \textit{{B. rapa}} A$\mathsf{{_r}}$2, top, brapa.bed -.45, {4}, {5}, 0, gainsboro, \textit{{B. oleracea}} C$\mathsf{{_o}}$2, top, boleracea.bed -.35, {6}, {7}, 0, gainsboro, \noindent\textit{{B. napus}} C$\mathsf{{_n}}$2\\(cv Darmor-\textit{{bzh}}), top, CN.bed -# edges -e, 0, 1, AN.brapa.1x1.lifted.simple -e, 1, 2, brapa.boleracea.1x1.lifted.simple -e, 3, 2, CN.boleracea.1x1.lifted.simple""" - -gap = 0.03 - - -class F4ALayoutLine(object): - def __init__(self, row, delimiter=","): - args = row.rstrip().split(delimiter) - args = [x.strip() for x in args] - self.region = args[0] - self.seqid, se = self.region.split(":") - start, end = se.split("-") - self.start, self.end = int(start), int(end) - self.center = (self.start + self.end) / 2 - self.span = self.end - self.start + 1 - self.box_region = args[1] - self.y = float(args[2]) - self.i = int(args[3]) - - -class F4ALayout(LineFile): - def __init__(self, filename, delimiter=","): - super().__init__(filename) - fp = open(filename) - self.edges = [] - for row in fp: - if row[0] == "#": - continue - self.append(F4ALayoutLine(row, delimiter=delimiter)) - - -def main(): - actions = ( - ("ploidy", "plot napus macro-synteny (requires data)"), - ("expr", "plot expression values between homeologs (requires data)"), - ("cov", "plot coverage graphs between homeologs (requires data)"), - ("deletion", "plot histogram for napus deletions (requires data)"), - ("fig3", "plot Figure-3"), - ("fig4", "plot Figure-4 (not in main text)"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def calc_ratio(chrs, sizes): - chr_sizes = [[sizes[x] for x in z] for z in chrs] - chr_sum_sizes = [sum(x) for x in chr_sizes] - ratio = 0.8 / max(chr_sum_sizes) - return chr_sizes, chr_sum_sizes, ratio - - -def center_panel(chr, chr_size, ratio, gap=gap, shift=0): - # Center two panels - w = (ratio * chr_size + (len(chr) - 1) * gap) / 2 - return 0.5 - w + shift, 0.5 + w + shift - - -def make_seqids(chrs, seqidsfile="seqids"): - fw = open(seqidsfile, "w") - for chr in chrs: - print(",".join(chr), file=fw) - fw.close() - logger.debug("File `{0}` written.".format(seqidsfile)) - return seqidsfile - - -def make_layout(chrs, chr_sizes, ratio, template, klayout="layout", shift=0): - coords = [] - for chr, chr_size in zip(chrs, chr_sizes): - coords.extend(center_panel(chr, chr_size, ratio, shift=shift)) - - fw = open(klayout, "w") - print(template.format(*coords), file=fw) - fw.close() - logger.debug("File `{0}` written.".format(klayout)) - - return klayout - - -def cov(args): - """ - %prog cov chrA01 chrC01 chr.sizes data AN.CN.1x1.lifted.anchors.simple - - Plot coverage graphs between homeologs, the middle panel show the - homeologous gene pairs. Allow multiple chromosomes to multiple chromosomes. - """ - p = OptionParser(cov.__doc__) - p.add_argument( - "--order", - default="swede,kale,h165,yudal,aviso,abu,bristol,bzh", - help="The order to plot the tracks, comma-separated", - ) - p.add_argument( - "--reverse", - default=False, - action="store_true", - help="Plot the order in reverse", - ) - p.add_argument( - "--gauge_step", default=5000000, type=int, help="Step size for the base scale" - ) - p.add_argument( - "--hlsuffix", - default="regions.forhaibao", - help="Suffix for the filename to be used to highlight regions", - ) - opts, args, iopts = p.set_image_options(args, figsize="11x8") - - if len(args) != 4: - sys.exit(not p.print_help()) - - chr1, chr2, sizesfile, datadir = args - chr1 = chr1.split(",") - chr2 = chr2.split(",") - - order = opts.order - hlsuffix = opts.hlsuffix - if order: - order = order.split(",") - if opts.reverse: - order.reverse() - sizes = Sizes(sizesfile).mapping - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - chrs = (chr1, chr2) - chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) - chr_size1, chr_size2 = chr_sum_sizes - chr_sizes1, chr_sizes2 = chr_sizes - - w1_start, w1_end = center_panel(chr1, chr_size1, ratio) - w2_start, w2_end = center_panel(chr2, chr_size2, ratio) - w1s = w1_start - w2s = w2_start - - dsg = "gray" - i = 0 - for c1, s1 in zip(chr1, chr_sizes1): - w1 = ratio * s1 - plot_label = i == 0 - i += 1 - canvas1 = (w1s, 0.6, w1, 0.3) - Coverage( - fig, - root, - canvas1, - c1, - (0, s1), - datadir, - order=order, - gauge="top", - plot_label=plot_label, - gauge_step=opts.gauge_step, - palette=dsg, - cap=40, - hlsuffix=hlsuffix, - ) - w1s += w1 + gap - - i = 0 - for c2, s2 in zip(chr2, chr_sizes2): - w2 = ratio * s2 - plot_label = i == 0 - i += 1 - canvas2 = (w2s, 0.15, w2, 0.3) - Coverage( - fig, - root, - canvas2, - c2, - (0, s2), - datadir, - order=order, - gauge="bottom", - plot_label=plot_label, - gauge_step=opts.gauge_step, - palette=dsg, - cap=40, - hlsuffix=hlsuffix, - ) - w2s += w2 + gap - - # Synteny panel - seqidsfile = make_seqids(chrs) - klayout = make_layout(chrs, chr_sum_sizes, ratio, template_cov) - Karyotype(root, seqidsfile, klayout, gap=gap, generank=False, sizes=sizes) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - chr2 = "_".join(chr2) - if opts.reverse: - chr2 += ".reverse" - image_name = chr2 + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def conversion_track(order, filename, col, label, ax, color, ypos=0, asterisk=False): - ids = [] - fp = open(filename) - for row in fp: - if asterisk and row[0] != "*": - continue - if (not asterisk) and row[0] == "*": - continue - if asterisk: - row = row[1:] - atoms = row.split() - gid = atoms[col].rsplit(".", 1)[0] - gid = gid.replace("T", "G") - ids.append(gid) - - beds = [order[x][1] for x in ids if x in order] - pts = [x.start for x in beds if x.seqid == label] - if len(pts): - logger.debug("A total of {0} converted loci imported.".format(len(pts))) - else: - logger.error("Array empty. Skipped scatterplot.") - return - - ax.vlines(pts, [-1], [ypos], color=color) - ax.set_axis_off() - - -def make_affix_axis(fig, t, yoffset, height=0.001): - x, y = t.xstart, t.y + yoffset - w = t.xend - t.xstart - ax = fig.add_axes([x, y, w, height]) - return ax - - -def fig3(args): - """ - %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data - - Napus Figure 3 displays alignments between quartet chromosomes, inset - with read histograms. - """ - from jcvi.formats.bed import Bed - - p = OptionParser(fig3.__doc__) - p.add_argument( - "--gauge_step", - default=10000000, - type=int, - help="Step size for the base scale", - ) - opts, args, iopts = p.set_image_options(args, figsize="12x9") - - if len(args) != 4: - sys.exit(not p.print_help()) - - chrs, sizes, bedfile, datadir = args - gauge_step = opts.gauge_step - diverge = iopts.diverge - rr, gg = diverge - chrs = [[x] for x in chrs.split(",")] - sizes = Sizes(sizes).mapping - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) - - # Synteny panel - seqidsfile = make_seqids(chrs) - klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=0.05) - height = 0.07 - r = height / 4 - K = Karyotype( - root, - seqidsfile, - klayout, - gap=gap, - height=height, - lw=2, - generank=False, - sizes=sizes, - heightpad=r, - plot_label=False, - ) - - # Chromosome labels - for kl in K.layout: - if kl.empty: - continue - lx, ly = kl.xstart, kl.y - if lx < 0.11: - lx += 0.1 - ly += 0.06 - label = kl.label - root.text(lx - 0.015, ly, label, fontsize=15, ha="right", va="center") - - # Inset with datafiles - datafiles = ( - "chrA02.bzh.forxmgr", - "parent.A02.per10kb.forxmgr", - "parent.C2.per10kb.forxmgr", - "chrC02.bzh.forxmgr", - ) - datafiles = [op.join(datadir, x) for x in datafiles] - tracks = K.tracks - hlfile = op.join(datadir, "bzh.regions.forhaibao") - xy_axes = [] - for t, datafile in zip(tracks, datafiles): - ax = make_affix_axis(fig, t, -r, height=2 * r) - xy_axes.append(ax) - chr = t.seqids[0] - xy = XYtrack(ax, datafile, color="lightslategray") - start, end = 0, t.total - xy.interpolate(end) - xy.cap(ymax=40) - xy.import_hlfile(hlfile, chr, diverge=diverge) - xy.draw() - ax.set_xlim(start, end) - gauge_ax = make_affix_axis(fig, t, -r) - adjust_spines(gauge_ax, ["bottom"]) - setup_gauge_ax(gauge_ax, start, end, gauge_step) - - # Converted gene tracks - ax_Ar = make_affix_axis(fig, tracks[1], r, height=r / 2) - ax_Co = make_affix_axis(fig, tracks[2], r, height=r / 2) - - order = Bed(bedfile).order - for asterisk in (False, True): - conversion_track( - order, - "data/Genes.Converted.seuil.0.6.AtoC.txt", - 0, - "A02", - ax_Ar, - rr, - asterisk=asterisk, - ) - conversion_track( - order, - "data/Genes.Converted.seuil.0.6.AtoC.txt", - 1, - "C2", - ax_Co, - gg, - asterisk=asterisk, - ) - conversion_track( - order, - "data/Genes.Converted.seuil.0.6.CtoA.txt", - 0, - "A02", - ax_Ar, - gg, - ypos=1, - asterisk=asterisk, - ) - conversion_track( - order, - "data/Genes.Converted.seuil.0.6.CtoA.txt", - 1, - "C2", - ax_Co, - rr, - ypos=1, - asterisk=asterisk, - ) - - Ar, Co = xy_axes[1:3] - annotations = ( - (Ar, "Bra028920 Bra028897", "center", "1DAn2+"), - (Ar, "Bra020081 Bra020171", "right", "2DAn2+"), - (Ar, "Bra020218 Bra020286", "left", "3DAn2+"), - (Ar, "Bra008143 Bra008167", "left", "4DAn2-"), - (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"), - (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"), - (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"), - (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"), - (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"), - (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-"), - ) - - for ax, genes, ha, label in annotations: - g1, g2 = genes.split() - x1, x2 = order[g1][1].start, order[g2][1].start - if ha == "center": - x = (x1 + x2) / 2 * 0.8 - elif ha == "left": - x = x2 - else: - x = x1 - label = r"\textit{{{0}}}".format(label) - color = rr if "+" in label else gg - ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center") - - ax_Ar.set_xlim(0, tracks[1].total) - ax_Ar.set_ylim(-1, 1) - ax_Co.set_xlim(0, tracks[2].total) - ax_Co.set_ylim(-1, 1) - - # Plot coverage in resequencing lines - gstep = 5000000 - order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",") - labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"} - hlsuffix = "regions.forhaibao" - chr1, chr2 = "chrA02", "chrC02" - t1, t2 = tracks[0], tracks[-1] - s1, s2 = sizes[chr1], sizes[chr2] - - canvas1 = (t1.xstart, 0.75, t1.xend - t1.xstart, 0.2) - c = Coverage( - fig, - root, - canvas1, - chr1, - (0, s1), - datadir, - order=order, - gauge=None, - plot_chr_label=False, - gauge_step=gstep, - palette="gray", - cap=40, - hlsuffix=hlsuffix, - labels_dict=labels_dict, - diverge=diverge, - ) - yys = c.yys - x1, x2 = 0.37, 0.72 - tip = 0.02 - annotations = ( - (x1, yys[2] + 0.3 * tip, tip, tip / 2, "FLC"), - (x1, yys[3] + 0.6 * tip, tip, tip / 2, "FLC"), - (x1, yys[5] + 0.6 * tip, tip, tip / 2, "FLC"), - (x2, yys[0] + 0.9 * tip, -1.2 * tip, 0, "GSL"), - (x2, yys[4] + 0.9 * tip, -1.2 * tip, 0, "GSL"), - (x2, yys[6] + 0.9 * tip, -1.2 * tip, 0, "GSL"), - ) - - arrowprops = dict(facecolor="black", shrink=0.05, frac=0.5, width=1, headwidth=4) - for x, y, dx, dy, label in annotations: - label = r"\textit{{{0}}}".format(label) - root.annotate( - label, - xy=(x, y), - xytext=(x + dx, y + dy), - arrowprops=arrowprops, - color=rr, - fontsize=9, - ha="center", - va="center", - ) - - canvas2 = (t2.xstart, 0.05, t2.xend - t2.xstart, 0.2) - Coverage( - fig, - root, - canvas2, - chr2, - (0, s2), - datadir, - order=order, - gauge=None, - plot_chr_label=False, - gauge_step=gstep, - palette="gray", - cap=40, - hlsuffix=hlsuffix, - labels_dict=labels_dict, - diverge=diverge, - ) - - pad = 0.03 - labels = ( - (0.1, 0.67, "A"), - (t1.xstart - 3 * pad, 0.95 + pad, "B"), - (t2.xstart - 3 * pad, 0.25 + pad, "C"), - ) - panel_labels(root, labels) - normalize_axes(root) - - image_name = "napus-fig3." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def fig4(args): - """ - %prog fig4 layout data - - Napus Figure 4A displays an example deleted region for quartet chromosomes, - showing read alignments from high GL and low GL lines. - """ - p = OptionParser(fig4.__doc__) - p.add_argument( - "--gauge_step", default=200000, type=int, help="Step size for the base scale" - ) - opts, args, iopts = p.set_image_options(args, figsize="9x7") - - if len(args) != 2: - sys.exit(not p.print_help()) - - layout, datadir = args - layout = F4ALayout(layout) - - gs = opts.gauge_step - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - block, napusbed, slayout = "r28.txt", "all.bed", "r28.layout" - s = Synteny(fig, root, block, napusbed, slayout, chr_label=False) - synteny_exts = [(x.xstart, x.xend) for x in s.rr] - - h = 0.1 - order = "bzh,yudal".split(",") - labels = ( - r"\textit{B. napus} A$\mathsf{_n}$2", - r"\textit{B. rapa} A$\mathsf{_r}$2", - r"\textit{B. oleracea} C$\mathsf{_o}$2", - r"\textit{B. napus} C$\mathsf{_n}$2", - ) - for t in layout: - xstart, xend = synteny_exts[2 * t.i] - canvas = [xstart, t.y, xend - xstart, h] - root.text(xstart - h, t.y + h / 2, labels[t.i], ha="center", va="center") - ch, ab = t.box_region.split(":") - a, b = ab.split("-") - vlines = [int(x) for x in (a, b)] - Coverage( - fig, - root, - canvas, - t.seqid, - (t.start, t.end), - datadir, - order=order, - gauge="top", - plot_chr_label=False, - gauge_step=gs, - palette="gray", - cap=40, - hlsuffix="regions.forhaibao", - vlines=vlines, - ) - - # Highlight GSL biosynthesis genes - a, b = (3, "Bra029311"), (5, "Bo2g161590") - for gid in (a, b): - start, end = s.gg[gid] - xstart, ystart = start - xend, yend = end - x = (xstart + xend) / 2 - arrow = FancyArrowPatch( - posA=(x, ystart - 0.04), - posB=(x, ystart - 0.005), - arrowstyle="fancy,head_width=6,head_length=8", - lw=3, - fc="k", - ec="k", - zorder=20, - ) - root.add_patch(arrow) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - image_name = "napus-fig4." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def deletion(args): - """ - %prog deletion [deletion-genes|deletion-bases] C2-deletions boleracea.bed - - Plot histogram for napus deletions. Can plot deletion-genes or - deletion-bases. The three largest segmental deletions will be highlighted - along with a drawing of the C2 chromosome. - """ - import math - from jcvi.formats.bed import Bed - from jcvi.graphics.chromosome import HorizontalChromosome - from jcvi.graphics.base import kb_formatter - - p = OptionParser(deletion.__doc__) - opts, args, iopts = p.set_image_options(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - deletion_genes, deletions, bed = args - dg = [int(x) for x in open(deletion_genes)] - dsg, lsg = "darkslategray", "lightslategray" - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) - minval = 2 if deletion_genes == "deleted-genes" else 2048 - bins = np.logspace(math.log(minval, 10), math.log(max(dg), 10), 16) - ax.hist(dg, bins=bins, fc=lsg, alpha=0.75) - ax.set_xscale("log", basex=2) - if deletion_genes == "deleted-genes": - ax.xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter("%d")) - ax.set_xlabel("No. of deleted genes in each segment") - else: - ax.xaxis.set_major_formatter(kb_formatter) - ax.set_xlabel("No. of deleted bases in each segment") - ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter("%d")) - ax.set_ylabel("No. of segments") - ax.patch.set_alpha(0.1) - - # Draw chromosome C2 - na, nb = 0.45, 0.85 - root.text((na + nb) / 2, 0.54, "ChrC02", ha="center") - HorizontalChromosome(root, na, nb, 0.5, height=0.025, fc=lsg) - - order = Bed(bed).order - fp = open(deletions) - scale = lambda x: na + x * (nb - na) / 52886895 - for i, row in enumerate(fp): - i += 1 - num, genes = row.split() - genes = genes.split("|") - ia, a = order[genes[0]] - mi, mx = a.start, a.end - mi, mx = scale(mi), scale(mx) - root.add_patch(Rectangle((mi, 0.475), mx - mi, 0.05, fc="red", ec="red")) - if i == 1: # offset between two adjacent regions for aesthetics - mi -= 0.015 - elif i == 2: - mi += 0.015 - TextCircle(root, mi, 0.44, str(i), fc="red") - - for i, mi in zip(range(1, 4), (0.83, 0.78, 0.73)): - TextCircle(root, mi, 0.2, str(i), fc="red") - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - image_name = deletion_genes + ".pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def ploidy(args): - """ - %prog ploidy seqids layout - - Build a figure that calls graphics.karyotype to illustrate the high ploidy - of B. napus genome. - """ - p = OptionParser(ploidy.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="8x7") - - if len(args) != 2: - sys.exit(not p.print_help()) - - seqidsfile, klayout = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - Karyotype(root, seqidsfile, klayout) - - fc = "darkslategrey" - radius = 0.012 - ot = -0.05 # use this to adjust vertical position of the left panel - TextCircle(root, 0.1, 0.9 + ot, r"$\gamma$", radius=radius, fc=fc) - root.text(0.1, 0.88 + ot, r"$\times3$", ha="center", va="top", color=fc) - TextCircle(root, 0.08, 0.79 + ot, r"$\alpha$", radius=radius, fc=fc) - TextCircle(root, 0.12, 0.79 + ot, r"$\beta$", radius=radius, fc=fc) - root.text( - 0.1, 0.77 + ot, r"$\times3\times2\times2$", ha="center", va="top", color=fc - ) - root.text( - 0.1, - 0.67 + ot, - r"Brassica triplication", - ha="center", - va="top", - color=fc, - size=11, - ) - root.text( - 0.1, - 0.65 + ot, - r"$\times3\times2\times2\times3$", - ha="center", - va="top", - color=fc, - ) - root.text( - 0.1, 0.42 + ot, r"Allo-tetraploidy", ha="center", va="top", color=fc, size=11 - ) - root.text( - 0.1, - 0.4 + ot, - r"$\times3\times2\times2\times3\times2$", - ha="center", - va="top", - color=fc, - ) - - bb = dict(boxstyle="round,pad=.5", fc="w", ec="0.5", alpha=0.5) - root.text( - 0.5, - 0.2 + ot, - r"\noindent\textit{Brassica napus}\\(A$\mathsf{_n}$C$\mathsf{_n}$ genome)", - ha="center", - size=16, - color="k", - bbox=bb, - ) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "napus" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def expr(args): - """ - %prog expr block exp layout napus.bed - - Plot a composite figure showing synteny and the expression level between - homeologs in two tissues - total 4 lists of values. block file contains the - gene pairs between AN and CN. - """ - from jcvi.graphics.base import red_purple as default_cm - - p = OptionParser(expr.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="8x5") - - if len(args) != 4: - sys.exit(not p.print_help()) - - block, exp, layout, napusbed = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - s = Synteny(fig, root, block, napusbed, layout) - - # Import the expression values - # Columns are: leaf-A, leaf-C, root-A, root-C - fp = open(exp) - data = {} - for row in fp: - gid, lf, rt = row.split() - lf, rt = float(lf), float(rt) - data[gid] = (lf, rt) - - rA, rB = s.rr - gA = [x.accn for x in rA.genes] - gC = [x.accn for x in rB.genes] - - A = [data.get(x, (0, 0)) for x in gA] - C = [data.get(x, (0, 0)) for x in gC] - A = np.array(A) - C = np.array(C) - A = np.transpose(A) - C = np.transpose(C) - - d, h = 0.01, 0.1 - lsg = "lightslategrey" - coords = s.gg # Coordinates of the genes - axes = [] - for j, (y, gg) in enumerate(((0.79, gA), (0.24, gC))): - r = s.rr[j] - x = r.xstart - w = r.xend - r.xstart - ax = fig.add_axes([x, y, w, h]) - axes.append(ax) - root.add_patch( - Rectangle((x - h, y - d), w + h + d, h + 2 * d, fill=False, ec=lsg, lw=1) - ) - root.text(x - d, y + 3 * h / 4, "root", ha="right", va="center") - root.text(x - d, y + h / 4, "leaf", ha="right", va="center") - ty = y - 2 * d if y > 0.5 else y + h + 2 * d - nrows = len(gg) - for i, g in enumerate(gg): - start, end = coords[(j, g)] - sx, sy = start - ex, ey = end - assert sy == ey - sy = sy + 2 * d if sy > 0.5 else sy - 2 * d - root.plot( - ((sx + ex) / 2, x + w * (i + 0.5) / nrows), - (sy, ty), - lw=1, - ls=":", - color="k", - alpha=0.2, - ) - - axA, axC = axes - axA.pcolormesh(A, cmap=default_cm) - axC.pcolormesh(C, cmap=default_cm) - axA.set_xlim(0, len(gA)) - axC.set_xlim(0, len(gC)) - - x, y, w, h = 0.35, 0.1, 0.3, 0.05 - ax_colorbar = fig.add_axes([x, y, w, h]) - fig.colorbar(p, cax=ax_colorbar, orientation="horizontal") - root.text(x - d, y + h / 2, "RPKM", ha="right", va="center") - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - for x in (axA, axC, root): - x.set_axis_off() - - image_name = "napusf4b." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/pineapple.py b/jcvi/projects/pineapple.py deleted file mode 100644 index b092a47d..00000000 --- a/jcvi/projects/pineapple.py +++ /dev/null @@ -1,411 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Scripts for the pineapple genome paper. -""" -import sys - -from ..annotation.ahrd import read_interpro -from ..apps.base import ActionDispatcher, OptionParser, logger -from ..formats.base import DictFile, LineFile, SetFile, get_number, must_open -from ..formats.bed import Bed -from ..formats.sizes import Sizes -from ..graphics.base import Rectangle, panel_labels, plt, savefig -from ..graphics.chromosome import Chromosome -from ..graphics.glyph import TextCircle -from ..graphics.karyotype import Karyotype -from ..graphics.synteny import Synteny, draw_gene_legend - - -class RegionsLine(object): - def __init__(self, line): - args = line.split() - self.karyotype = args[0][0] - self.group = args[0][1] - self.chromosome = int(args[1]) - self.start = int(args[5]) - self.end = int(args[8]) - self.span = abs(self.start - self.end) - - -class RegionsFile(LineFile): - def __init__(self, filename): - super().__init__(filename) - fp = open(filename) - next(fp) - for row in fp: - self.append(RegionsLine(row)) - - @property - def karyotypes(self): - return sorted(set(x.karyotype for x in self)) - - def get_karyotype(self, k): - return [x for x in self if x.karyotype == k] - - -def main(): - - actions = ( - # main figures in text - ("ancestral", "karoytype evolution of pineapple (requires data)"), - ("ploidy", "plot pineapple macro-synteny (requires data)"), - # build pseudomolecule - ("agp", "make agp file"), - ("breakpoints", "make breakpoints"), - ("check", "check agreement"), - # build gene info table - ("geneinfo", "build gene info table"), - ("flanking", "extract flanking genes for given SI loci"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def flanking(args): - """ - %prog flanking SI.ids liftover.bed master.txt master-removed.txt - - Extract flanking genes for given SI loci. - """ - p = OptionParser(flanking.__doc__) - p.add_argument("-N", default=50, type=int, help="How many genes on both directions") - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - SI, liftover, master, te = args - N = opts.N - SI = SetFile(SI, column=0, delimiter=".") - liftover = Bed(liftover) - order = liftover.order - neighbors = set() - for s in SI: - si, s = order[s] - LB = max(si - N, 0) - RB = min(si + N, len(liftover)) - for j in range(LB, RB + 1): - a = liftover[j] - if a.seqid != s.seqid: - continue - neighbors.add(a.accn) - - dmain = DictFile(master, keypos=0, valuepos=None, delimiter="\t") - dte = DictFile(te, keypos=0, valuepos=None, delimiter="\t") - header = next(open(master)) - print("\t".join(("SI/Neighbor", "Gene/TE", header.strip()))) - for a in liftover: - s = a.accn - if s not in neighbors: - continue - - tag = "SI" if s in SI else "neighbor" - if s in dmain: - d = dmain[s] - print("\t".join([tag, "gene"] + d)) - elif s in dte: - d = dte[s] - print("\t".join([tag, "TE"] + d)) - - -def join_nodes_vertical(root, coords, a, b, y, lw=2): - # Join node a and b to make an internal node - ax, ay = coords[a] - bx, by = coords[b] - nx, ny = (ax + bx) / 2, y - root.plot((ax, ax), (ay, ny), "k-", lw=lw) - root.plot((bx, bx), (ay, ny), "k-", lw=lw) - root.plot((ax, bx), (ny, ny), "k-", lw=lw) - return nx, ny - - -def ancestral(args): - """ - %prog ancestral ancestral.txt assembly.fasta - - Karyotype evolution of pineapple. The figure is inspired by Amphioxus paper - Figure 3 and Tetradon paper Figure 9. - """ - p = OptionParser(ancestral.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="8x7") - - if len(args) != 2: - sys.exit(not p.print_help()) - - regionsfile, sizesfile = args - regions = RegionsFile(regionsfile) - sizes = Sizes(sizesfile).mapping - sizes = dict((k, v) for (k, v) in sizes.iteritems() if k[:2] == "LG") - maxsize = max(sizes.values()) - ratio = 0.5 / maxsize - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - from jcvi.graphics.base import set2 - - a, b, c, d, e, f, g = set2[:7] - set2 = (c, g, b, e, d, a, f) - - # Upper panel is the evolution of segments - # All segments belong to one of seven karyotypes 1 to 7 - karyotypes = regions.karyotypes - xgap = 1.0 / (1 + len(karyotypes)) - ygap = 0.05 - mgap = xgap / 4.5 - gwidth = mgap * 0.75 - tip = 0.02 - coords = {} - for i, k in enumerate(regions.karyotypes): - x = (i + 1) * xgap - y = 0.9 - root.text(x, y + tip, "Anc" + k, ha="center") - root.plot((x, x), (y, y - ygap), "k-", lw=2) - y -= 2 * ygap - coords["a"] = (x - 1.5 * mgap, y) - coords["b"] = (x - 0.5 * mgap, y) - coords["c"] = (x + 0.5 * mgap, y) - coords["d"] = (x + 1.5 * mgap, y) - coords["ab"] = join_nodes_vertical(root, coords, "a", "b", y + ygap / 2) - coords["cd"] = join_nodes_vertical(root, coords, "c", "d", y + ygap / 2) - coords["abcd"] = join_nodes_vertical(root, coords, "ab", "cd", y + ygap) - for n in "abcd": - nx, ny = coords[n] - root.text(nx, ny - tip, n, ha="center") - coords[n] = (nx, ny - ygap / 2) - - kdata = regions.get_karyotype(k) - for kd in kdata: - g = kd.group - gx, gy = coords[g] - gsize = ratio * kd.span - gy -= gsize - p = Rectangle((gx - gwidth / 2, gy), gwidth, gsize, lw=0, color=set2[i]) - root.add_patch(p) - root.text( - gx, gy + gsize / 2, kd.chromosome, ha="center", va="center", color="w" - ) - coords[g] = (gx, gy - tip) - - # Bottom panel shows the location of segments on chromosomes - # TODO: redundant code, similar to graphics.chromosome - ystart = 0.54 - chr_number = len(sizes) - xstart, xend = xgap - 2 * mgap, 1 - xgap + 2 * mgap - xinterval = (xend - xstart - gwidth) / (chr_number - 1) - chrpos = {} - for a, (chr, clen) in enumerate(sorted(sizes.items())): - chr = get_number(chr) - xx = xstart + a * xinterval + gwidth / 2 - chrpos[chr] = xx - root.text(xx, ystart + 0.01, chr, ha="center") - Chromosome(root, xx, ystart, ystart - clen * ratio, width=gwidth) - - # Start painting - for r in regions: - xx = chrpos[r.chromosome] - yystart = ystart - r.start * ratio - yyend = ystart - r.end * ratio - p = Rectangle( - (xx - gwidth / 2, yystart), - gwidth, - yyend - yystart, - color=set2[int(r.karyotype) - 1], - lw=0, - ) - root.add_patch(p) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "pineapple-karyotype" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def geneinfo(args): - """ - %prog geneinfo pineapple.20141004.bed liftover.bed pineapple.20150413.bed \ - note.txt interproscan.txt - - Build gene info table from various sources. The three beds contain - information on the original scaffolds, linkage groups, and final selected - loci (after removal of TEs and split loci). The final two text files contain - AHRD and domain data. - """ - p = OptionParser(geneinfo.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 5: - sys.exit(not p.print_help()) - - scfbed, liftoverbed, lgbed, note, ipr = args - note = DictFile(note, delimiter="\t") - scfbed = Bed(scfbed) - lgorder = Bed(lgbed).order - liftover = Bed(liftoverbed).order - header = ( - "Accession Scaffold-position LG-position " - "Description Interpro-domain Interpro-description " - "GO-term KEGG".split() - ) - ipr = read_interpro(ipr) - - fw_clean = must_open("master.txt", "w") - fw_removed = must_open("master-removed.txt", "w") - - for fw in (fw_clean, fw_removed): - print("\t".join(header), file=fw) - - for b in scfbed: - accession = b.accn - scaffold_position = b.tag - if accession in liftover: - lg_position = liftover[accession][-1].tag - else: - lg_position = "split" - fw = fw_clean if accession in lgorder else fw_removed - description = note[accession] - interpro = interpro_description = go = kegg = "" - if accession in ipr: - interpro, interpro_description, go, kegg = ipr[accession] - print( - "\t".join( - ( - accession, - scaffold_position, - lg_position, - description, - interpro, - interpro_description, - go, - kegg, - ) - ), - file=fw, - ) - fw.close() - - -def ploidy(args): - """ - %prog ploidy seqids karyotype.layout mcscan.out all.bed synteny.layout - - Build a figure that calls graphics.karyotype to illustrate the high ploidy - of WGD history of pineapple genome. The script calls both graphics.karyotype - and graphic.synteny. - """ - p = OptionParser(ploidy.__doc__) - p.add_argument("--switch", help="Rename the seqid with two-column file") - opts, args, iopts = p.set_image_options(args, figsize="9x7") - - if len(args) != 5: - sys.exit(not p.print_help()) - - seqidsfile, klayout, datafile, bedfile, slayout = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - - Karyotype(root, seqidsfile, klayout) - Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch) - - # legend showing the orientation of the genes - draw_gene_legend(root, 0.27, 0.37, 0.52) - - # annotate the WGD events - fc = "lightslategrey" - x = 0.09 - radius = 0.012 - TextCircle(root, x, 0.825, r"$\tau$", radius=radius, fc=fc) - TextCircle(root, x, 0.8, r"$\sigma$", radius=radius, fc=fc) - TextCircle(root, x, 0.72, r"$\rho$", radius=radius, fc=fc) - for ypos in (0.825, 0.8, 0.72): - root.text(0.12, ypos, r"$\times2$", color=fc, ha="center", va="center") - root.plot([x, x], [0.85, 0.775], ":", color=fc, lw=2) - root.plot([x, x], [0.75, 0.675], ":", color=fc, lw=2) - - labels = ((0.04, 0.96, "A"), (0.04, 0.54, "B")) - panel_labels(root, labels) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "pineapple-karyotype" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -scaffold = "scaffold_" - - -def check(args): - fp = open("assembly-order.txt") - next(fp) - d = {} - for row in fp: - atoms = row.split() - scaf, tag, linkage, no = atoms[:4] - d[scaf] = tag - - fp = open("chimeric-scaffolds.txt") - next(fp) - for row in fp: - old, new, tag, start, end = row.strip().split("\t") - if new not in d: - print(new, "not in sheet1") - continue - if d[new] != tag: - print("{0} => {1} in sheet1 but {2} in sheet2".format(new, d[new], tag)) - - -def agp(args): - fp = open("assembly-order.txt") - next(fp) - sizes = Sizes("SCAFFOLD-SPLIT.fasta").mapping - for row in fp: - atoms = row.split() - assert len(atoms) in (4, 5) - if len(atoms) == 4: - atoms.append("?") - scaf, tag, linkage, no, strand = atoms - strand = strand.lower() - strand = {"f": "+", "r": "-", "?": "?"}[strand] - scaf = "scaffold_" + scaf - scaf_size = sizes[scaf] - linkage = "LG{0:02d}".format(ord(linkage.lower()) - ord("a") + 1) - print("\t".join(str(x) for x in (scaf, 0, scaf_size, linkage, 1000, strand))) - - -def breakpoints(args): - fp = open("chimeric-scaffolds.txt") - next(fp) - scaffolds = set() - nbreaks = 0 - for row in fp: - atoms = row.strip().split("\t") - if len(atoms) == 3: - continue - old, new, tag, start, end = atoms - old = scaffold + old - start, end = int(start), int(end) - if start >= end: - logger.warning("%s %d >= %d", old, start, end) - start, end = end, start - print("\t".join(str(x) for x in (old, start - 1, end))) - nbreaks += 1 - scaffolds.add(old) - print( - "{0} breakpoints in total, {1} scaffolds broken".format( - nbreaks, len(scaffolds) - ), - file=sys.stderr, - ) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/str.py b/jcvi/projects/str.py deleted file mode 100644 index f31e2e12..00000000 --- a/jcvi/projects/str.py +++ /dev/null @@ -1,2271 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Related scripts for the HLI-STR (TREDPARSE) paper. -""" -import os.path as op -import os -import csv -import sys -import json -import numpy as np -import pandas as pd - -from collections import defaultdict -from itertools import product -from random import sample - -from Bio import SeqIO -from Bio.Seq import Seq -from Bio.SeqRecord import SeqRecord -from natsort import natsorted -from pyfaidx import Fasta - -try: - import vcf -except ImportError: - pass - -from ..apps.base import ActionDispatcher, OptionParser, cleanup, iglob, logger, mkdir -from ..apps.base import datafile, sh -from ..apps.bwa import align -from ..apps.grid import Parallel -from ..assembly.sim import eagle, wgsim -from ..formats.base import is_number, must_open -from ..formats.sam import get_minibam_bed, index -from ..graphics.base import ( - FancyArrow, - normalize_axes, - panel_labels, - plt, - savefig, - set_helvetica_axis, -) -from ..utils.cbook import percentage -from ..utils.table import tabulate -from ..variation.str import TREDsRepo, af_to_counts, read_treds - - -# Huntington risk allele -infected_thr = 40 -ref_thr = 19 -SIMULATED_HAPLOID = r"Simulated haploid $\mathit{h}$" -SIMULATED_DIPLOID = r"Simulated diploid $\mathit{20/h}$" -lsg = "lightslategray" - -# List of TRED loci excluded from plots -ignore = ("AR",) - - -class TREDPARSEvcf(object): - def __init__(self, vcffile): - samplekey = op.basename(vcffile).split(".")[0] - reader = vcf.Reader(open(vcffile, "rb")) - res = "-1/-1" - for rec in reader: - sample = rec.samples[0] - res = sample["GB"] - ci = sample["CI"] - break - print(samplekey, res, ci) - - -class TrioOrDuo: - def __init__(self, parents, child, family): - self.parents = dict((x, family[x]) for x in parents) - self.child = dict((x, family[x]) for x in child) - self.is_trio = len(self.parents) == 2 - - def __len__(self): - return len(self.parents) + len(self.child) - - def __key(self): - return tuple(sorted(self.parents.values()) + self.child.values()) - - def __hash__(self): - return hash(self.__key()) - - def __eq__(self, other): - return self.__key() == other.__key() - - def __str__(self): - return str(self.parents) + "=>" + str(self.child) - - __repr__ = __str__ - - def check_mendelian(self, df, tred, tolerance=0, x_linked=False, verbose=False): - child_key = self.child.values()[0] - c = get_alleles(df, child_key, tred) - if c is None: - return 0 - if self.is_trio: - parent_keys = self.parents.values() - p1 = get_alleles(df, parent_keys[0], tred) - p2 = get_alleles(df, parent_keys[1], tred) - if (p1 is None) or (p2 is None): - return 0 - possible_progenies = get_progenies( - p1, p2, x_linked=x_linked, tolerance=tolerance - ) - mendelian_error = not (c in possible_progenies) - if verbose: - print( - parent_keys[0], - parent_keys[1], - child_key, - p1, - p2, - c, - not mendelian_error, - ) - else: - parent_key = self.parents.values()[0] - p1 = get_alleles(df, parent_key, tred) - if p1 is None: - return 0 - _p1 = expand_alleles(p1, tolerance=tolerance) - mendelian_error = len(set(_p1) & set(c)) == 0 - if mendelian_error and x_linked: - # Do not count case where - progeny is male, parent is male - if (c[0] == c[1]) and (p1[0] == p1[1]): - mendelian_error = 0 - if verbose: - print(parent_key, child_key, p1, c, not mendelian_error) - return mendelian_error - - -def expand_alleles(p, tolerance=0): - """ - Returns expanded allele set given the tolerance. - """ - _p = set() - for x in p: - _p |= set(range(x - tolerance, x + tolerance + 1)) - return _p - - -def get_progenies(p1, p2, x_linked=False, tolerance=0): - """ - Returns possible progenies in a trio. - """ - _p1 = expand_alleles(p1, tolerance=tolerance) - _p2 = expand_alleles(p2, tolerance=tolerance) - possible_progenies = set(tuple(sorted(x)) for x in product(_p1, _p2)) - if x_linked: # Add all hemizygotes - possible_progenies |= set((x, x) for x in (set(_p1) | set(_p2))) - return possible_progenies - - -def get_alleles(df, sample, tred): - try: - s = df.ix[sample] - a = int(s[tred + ".1"]) - b = int(s[tred + ".2"]) - except: - return None - if a == -1 or b == -1: - return None - return a, b - - -def main(): - - actions = ( - # Prepare data - ("simulate", "simulate bams with varying inserts with dwgsim"), - ("mergebam", "merge sets of BAMs to make diploid"), - ("mini", "prepare mini-BAMs that contain only the STR loci"), - ("alts", "build alternative loci based on simulation data"), - # Compile results - ("batchlobstr", "run lobSTR on a list of BAMs"), - ("compilevcf", "compile vcf outputs into lists"), - # Plotting - ("evidences", "plot distribution of evidences"), - ("likelihood", "plot likelihood surface"), - ("likelihood2", "plot likelihood surface and marginals"), - ("likelihood3", "plot likelihood surface and marginals for two settings"), - ("compare", "compare callers on fake HD patients"), - ("compare2", "compare TREDPARSE and lobSTR on fake HD patients"), - ("power", "compare TREDPARSE on fake HD patients adding evidence"), - ("tredparse", "compare TREDPARSE on fake HD patients adding coverage"), - ("allelefreq", "plot the allele frequencies of some STRs"), - ("allelefreqall", "plot all 30 STR allele frequencies"), - ("depth", "plot read depths across all TREDs"), - # Diagram - ("diagram", "plot the predictive power of various evidences"), - # Extra analysis for reviews - ("mendelian", "calculate Mendelian errors based on trios and duos"), - ("mendelian2", "second iteration of Mendelian error calculation"), - ("mendelian_errors", "plot Mendelian errors calculated by mendelian"), - ("mendelian_errors2", "plot Mendelian errors calculated by mendelian2"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def mendelian_errors2(args): - """ - %prog mendelian_errors2 Trios.summary.csv - - Plot Mendelian errors as calculated by mendelian(). File - `Trios.summary.csv` looks like: - - Name,Motif,Inheritance,N_Correct,N_Error,N_missing,ErrorRate [N_Error / (N_Correct + N_Error))] - DM1,CTG,AD,790,12,0,1.5% - DM2,CCTG,AD,757,45,0,5.6% - DRPLA,CAG,AD,791,11,0,1.4% - """ - p = OptionParser(mendelian_errors2.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="7x7", format="png") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (csvfile,) = args - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - ymin = -0.2 - df = pd.read_csv(csvfile) - data = [] - for i, d in df.iterrows(): - tred = d["Name"] - motif = d["Motif"] - if tred in ignore: - logger.debug("Ignore {}".format(d["TRED"])) - continue - - if len(motif) > 6: - if "/" in motif: # CTG/CAG - motif = motif.split("/")[0] - else: - motif = motif[:6] + ".." - xtred = "{} {}".format(tred, motif) - accuracy = d[-1] - data.append((xtred, accuracy)) - - key = lambda x: float(x.rstrip("%")) - data.sort(key=lambda x: key(x[-1])) - print(data) - treds, accuracies = zip(*data) - ntreds = len(treds) - ticks = range(ntreds) - accuracies = [key(x) for x in accuracies] - - for tick, accuracy in zip(ticks, accuracies): - ax.plot([tick, tick], [ymin, accuracy], "-", lw=2, color="lightslategray") - - (trios,) = ax.plot(accuracies, "o", mfc="w", mec="b") - ax.set_title("Mendelian errors based on STR calls in trios in HLI samples") - ntrios = "Mendelian errors in 802 trios" - ax.legend([trios], [ntrios], loc="best") - - ax.set_xticks(ticks) - ax.set_xticklabels(treds, rotation=45, ha="right", size=8) - ax.set_yticklabels([int(x) for x in ax.get_yticks()], family="Helvetica") - ax.set_ylabel(r"Mendelian errors (\%)") - ax.set_ylim(ymin, 100) - - normalize_axes(root) - - image_name = "mendelian_errors2." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def mendelian2(args): - """ - %prog mendelian2 - XC_kinship_TRIO_annotationed_age_sex_PaternalMaternalAgeWhenChildWasBorn.txt - hli.20170805.tsv - - Second iteration of Mendelian error calculation. This includes all the read - counts and gender information to correct error estimate of X-linked loci. - """ - p = OptionParser(mendelian2.__doc__) - p.add_argument( - "--treds", default=None, help="Extract specific treds, use comma to separate" - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - triofile, hlitsv = args - repo = TREDsRepo() - treds = opts.treds.split(",") if opts.treds else repo.names - triodata = pd.read_csv(triofile, sep="\t") - samplekey = lambda x: x.split("_")[1] - trios = [] - for i, row in triodata.iterrows(): - proband = row["proband"] - parents = row["parents"] - proband_sex = row["proband_sex"] - parents_sex = row["parent1_sex,parent2_sex"] - proband = samplekey(proband) - p1, p2 = parents.split(",") - p1, p2 = samplekey(p1), samplekey(p2) - p1_sex, p2_sex = parents_sex.split(",") - if p1_sex == "Male": - p1, p2 = p2, p1 - p1_sex, p2_sex = p2_sex, p1_sex - trios.append((proband, proband_sex, p1, p1_sex, p2, p2_sex)) - - header = "{0}_ID {0}_Sex {0}_Calls" - header += " {0}_Full {0}_Partial {0}_Repeat {0}_Paired" - tredsdata = pd.read_csv(hlitsv, sep="\t", low_memory=False) - tsvfiles = [] - summary = open("Trios.summary.csv", "w") - summary_header = ( - "Name,Motif,Inheritance,N_Correct,N_Error,N_missing," - "ErrorRate [N_Error / (N_Correct + N_Error))]" - ) - print(summary_header, file=summary) - print(summary_header) - for tred in treds: - if tred in ("FXS", "AR"): - continue - tr = repo[tred] - tsvfile = "{}.details.tsv".format(tred) - fw = open(tsvfile, "w") - td = {} - for _, row in tredsdata.iterrows(): - s = str(row["SampleKey"]) - inferredGender = row["inferredGender"] - try: - calls = row[tred + ".calls"] - fdp = int(row[tred + ".FDP"]) - pdp = int(row[tred + ".PDP"]) - rdp = int(row[tred + ".RDP"]) - pedp = int(row[tred + ".PEDP"]) - td[s] = [str(x) for x in (inferredGender, calls, fdp, pdp, rdp, pedp)] - except ValueError: - logger.error("Invalid row: {}".format(row)) - continue - - h = " ".join((header.format("P1"), header.format("P2"), header.format("Kid"))) - print("\t".join(["MendelianError"] + h.split()), file=fw) - tredcall = lambda x: td.get(x, ["", "-1|-1", "", "", "", ""])[:] - counts = defaultdict(int) - is_xlinked = repo[tred].is_xlinked - shorten = lambda x: str(int(x[-4:])) # Simplify SampleKey - for proband, proband_sex, p1, p1_sex, p2, p2_sex in trios: - tp1 = tredcall(p1) - tp2 = tredcall(p2) - tpp = tredcall(proband) - m = mendelian_check(tp1, tp2, tpp, is_xlinked=is_xlinked) - counts[m] += 1 - if is_xlinked: - for p, p_sex in ((tp1, p1_sex), (tp2, p2_sex), (tpp, proband_sex)): - if p[1].startswith("-"): - p[1] = "n.a." - cells = [shorten(p1), p1_sex] + tp1[1:] - cells += [shorten(p2), p2_sex] + tp2[1:] - cells += [shorten(proband), proband_sex] + tpp[1:] - print("\t".join([m] + cells), file=fw) - fw.close() - tsvfiles.append(tsvfile) - - error_rate = counts["Error"] * 100.0 / (counts["Correct"] + counts["Error"]) - line = ",".join( - str(x) - for x in ( - tred, - tr.motif, - tr.inheritance, - counts["Correct"], - counts["Error"], - counts["Missing"], - "{:.1f}%".format(error_rate), - ) - ) - print(line, file=summary) - print(line) - - # Combine into a master spreadsheet - import xlwt - - wb = xlwt.Workbook() - converter = lambda x: int(x) if is_number(x, cast=int) else x - header = xlwt.easyxf("font: bold on, name Helvetica; align: horiz center") - hc = "font: name Helvetica; align: horiz center;" - horiz_center = xlwt.Style.easyxf(hc) - correct = xlwt.Style.easyxf(hc + "pattern: pattern solid, fore_colour light_green;") - error = xlwt.Style.easyxf(hc + "pattern: pattern solid, fore_colour rose;") - missing = xlwt.Style.easyxf( - hc + "pattern: pattern solid, fore_colour light_yellow;" - ) - for tsvfile in tsvfiles: - sheet = op.basename(tsvfile).split(".", 1)[0] - ws = wb.add_sheet(sheet) - fp = open(tsvfile, "rb") - reader = csv.reader(fp, delimiter="\t") - for r, row in enumerate(reader): - style = header if r == 0 else horiz_center - for c, col in enumerate(row): - if c == 0 and r: - style = {"Correct": correct, "Error": error, "Missing": missing}[ - col - ] - ws.write(r, c, converter(col), style) - ws.set_panes_frozen(True) - ws.set_horz_split_pos(1) - - wb.save("Trios.xls") - summary.close() - - -def mendelian_check(tp1, tp2, tpp, is_xlinked=False): - """ - Compare TRED calls for Parent1, Parent2 and Proband. - """ - call_to_ints = lambda x: tuple(int(_) for _ in x.split("|") if _ != ".") - tp1_sex, tp1_call = tp1[:2] - tp2_sex, tp2_call = tp2[:2] - tpp_sex, tpp_call = tpp[:2] - # tp1_evidence = sum(int(x) for x in tp1[2:]) - # tp2_evidence = sum(int(x) for x in tp2[2:]) - # tpp_evidence = sum(int(x) for x in tpp[2:]) - tp1_call = call_to_ints(tp1_call) - tp2_call = call_to_ints(tp2_call) - tpp_call = call_to_ints(tpp_call) - possible_progenies = set(tuple(sorted(x)) for x in product(tp1_call, tp2_call)) - if is_xlinked and tpp_sex == "Male": - possible_progenies = set(tuple((x,)) for x in tp1_call) - if -1 in tp1_call or -1 in tp2_call or -1 in tpp_call: - tag = "Missing" - else: - tag = "Correct" if tpp_call in possible_progenies else "Error" - return tag - - -def in_region(rname, rstart, target_chr, target_start, target_end): - """ - Quick check if a point is within the target region. - """ - return (rname == target_chr) and (target_start <= rstart <= target_end) - - -def alts(args): - """ - %prog alts HD - - Build alternative loci based on simulation data. - """ - import pysam - from more_itertools import pairwise - from jcvi.utils.grouper import Grouper - - p = OptionParser(alts.__doc__) - p.set_outfile(outfile="TREDs.alts.csv") - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - treds = args - repo = TREDsRepo() - if "all" in treds: - treds = repo.names - - pad_left, pad_right = 1000, 10000 - READLEN = 150 - fw = must_open(opts.outfile, "w") - print("TRED,alts,alts.hg19", file=fw) # Header - for tred in treds: - ref_regions = [] - - # Simulate a depth 1000 BAM with 300 repeats - for ref in ("hg38", "hg19"): - - # This is the region that involves the TRED locus - repo = TREDsRepo(ref=ref) - t = repo[tred] - chr, start, end = t.chr, t.repeat_start, t.repeat_end - start -= pad_left - end += pad_right - - tred_ref = "{}_{}".format(tred, ref) - if not op.isdir(tred_ref): - simulate( - [ - tred_ref, - "300", - "300", - "--depth=1000", - "--ref={}".format(ref), - "--tred={}".format(tred), - ] - ) - bamfile = op.join(tred_ref, "300.bam") - - # Parse the BAM file, retrieve all regions - bamfile = pysam.AlignmentFile(bamfile, "rb") - nreads = altreads = 0 - alt_points = set() - for read in bamfile.fetch(): - fname, fstart = ( - bamfile.getrname(read.reference_id), - read.reference_start, - ) - rname, rstart = ( - bamfile.getrname(read.next_reference_id), - read.next_reference_start, - ) - f_in_region = in_region(fname, fstart, chr, start, end) - r_in_region = in_region(rname, rstart, chr, start, end) - if (not f_in_region) and r_in_region: - alt_points.add((fname, fstart)) - altreads += 1 - if (not r_in_region) and f_in_region: - alt_points.add((rname, rstart)) - altreads += 1 - nreads += 1 - - logger.debug( - "A total of {} reads ({} alts) processed".format(nreads, altreads) - ) - alt_points = natsorted(alt_points) - - # Chain these points together into regions - g = Grouper() - for a in alt_points: - g.join(a) - for a, b in pairwise(alt_points): - achr, apos = a - bchr, bpos = b - if achr != bchr: - continue - if (bpos - apos) > READLEN: - continue - g.join(a, b) - - # All regions that contain ALT - alt_sum = 0 - regions = [] - for c in g: - chr_min, pos_min = min(c) - chr_max, pos_max = max(c) - assert chr_min, chr_max - pos_min -= READLEN - pos_max += READLEN - regions.append((chr_min, pos_min, pos_max)) - alt_sum += pos_max - pos_min - - regions = "|".join( - [ - "{}:{}-{}".format(c, start, end) - for c, start, end in natsorted(regions) - ] - ) - ref_regions.append(regions) - - line = ",".join([tred] + ref_regions) - print(line, file=sys.stderr) - print(line, file=fw) - logger.debug("Alternative region sum: {} bp".format(alt_sum)) - - fw.close() - - -def depth(args): - """ - %prog depth DP.tsv - - Plot read depths across all TREDs. - """ - import seaborn as sns - - p = OptionParser(depth.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="14x14") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (tsvfile,) = args - fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots( - ncols=2, nrows=2, figsize=(iopts.w, iopts.h) - ) - plt.tight_layout(pad=6) - - data = pd.read_csv(tsvfile, sep="\t", low_memory=False) - - ids, treds = read_treds() - for dp, ax, title in zip( - ("FDP", "PDP", "RDP", "PEDP"), - (ax1, ax2, ax3, ax4), - ("Spanning reads", "Partial reads", "Repeat-only reads", "Paired-end reads"), - ): - logger.debug("Build {}".format(title)) - # Construct related data structure - xd = [] # (tred, dp) - mdp = [] # (tred, median_dp) - for tred, motif in zip(treds["abbreviation"], treds["motif"]): - if tred in ignore: - logger.debug("Ignore {}".format(tred)) - continue - if len(motif) > 4: - if "/" in motif: # CTG/CAG - motif = motif.split("/")[0] - else: - motif = motif[:4] + ".." - xtred = "{} {}".format(tred, motif) - md = [x for x in data[tred + "." + dp] if x >= 0] - subsample = 10000 if dp == "RDP" else 1000 - md = sample(md, subsample) - pmd = [x for x in md if x > 0] - median = np.median(pmd) if pmd else 0 - mdp.append((xtred, median)) - for d in md: - xd.append((xtred, d)) - - # Determine order - mdp.sort(key=lambda x: x[1]) - order, mdp = zip(*mdp) - - # OK, now plot - xt, xd = zip(*xd) - sns.boxplot(xt, xd, ax=ax, order=order, fliersize=2) - xticklabels = ax.get_xticklabels() - ax.set_xticklabels(xticklabels, rotation=45, ha="right") - ax.set_title("Number of {} per locus".format(title), size=18) - ylim = 30 if dp == "RDP" else 100 - ax.set_ylim(0, ylim) - - yticklabels = [int(x) for x in ax.get_yticks()] - ax.set_yticklabels(yticklabels, family="Helvetica", size=14) - - root = fig.add_axes([0, 0, 1, 1]) - pad = 0.04 - panel_labels( - root, - ( - (pad, 1 - pad, "A"), - (1 / 2.0 + pad / 2, 1 - pad, "B"), - (pad, 0.5 - pad / 2, "C"), - (1 / 2.0 + pad / 2, 0.5 - pad / 2, "D"), - ), - ) - normalize_axes(root) - - image_name = "depth." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def mendelian_errors(args): - """ - %prog mendelian_errors STR-Mendelian-errors.csv - - Plot Mendelian errors as calculated by mendelian(). File - `STR-Mendelian-errors.csv` looks like: - - ,Duos - Mendelian errors,Trios - Mendelian errors - SCA36,1.40%,0.60% - ULD,0.30%,1.50% - BPES,0.00%,1.80% - - One TRED disease per line, followed by duo errors and trio errors. - """ - p = OptionParser(mendelian_errors.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="6x6") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (csvfile,) = args - fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - ymin = -0.2 - df = pd.read_csv(csvfile) - data = [] - for i, d in df.iterrows(): - if d["TRED"].split()[0] in ignore: - logger.debug("Ignore {}".format(d["TRED"])) - continue - data.append(d) - treds, duos, trios = zip(*data) - ntreds = len(treds) - ticks = range(ntreds) - treds = [x.split()[0] for x in treds] - duos = [float(x.rstrip("%")) for x in duos] - trios = [float(x.rstrip("%")) for x in trios] - - for tick, duo, trio in zip(ticks, duos, trios): - m = max(duo, trio) - ax.plot([tick, tick], [ymin, m], "-", lw=2, color="lightslategray") - - (duos,) = ax.plot(duos, "o", mfc="w", mec="g") - (trios,) = ax.plot(trios, "o", mfc="w", mec="b") - ax.set_title("Mendelian errors based on trios and duos in HLI samples") - nduos = "Mendelian errors in 362 duos" - ntrios = "Mendelian errors in 339 trios" - ax.legend([trios, duos], [ntrios, nduos], loc="best") - - ax.set_xticks(ticks) - ax.set_xticklabels(treds, rotation=45, ha="right", size=8) - yticklabels = [int(x) for x in ax.get_yticks()] - ax.set_yticklabels(yticklabels, family="Helvetica") - ax.set_ylabel(r"Mendelian errors (\%)") - ax.set_ylim(ymin, 20) - - normalize_axes(root) - - image_name = "mendelian_errors." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def extract_trios(family): - """ - Identify all trios/duos inside a family, where a family contains dictionary - of relationship: individual, for example: - { - "ChildSelf": "176531498", - "DzTwin": "176531497", - "Parent": "176449143" - } - """ - self_key = ["ChildSelf"] - keys = family.keys() - spouse_key = [x for x in keys if ("spouse" in x.lower())] - assert len(spouse_key) <= 1 - parent_keys = [ - x for x in keys if ("parent" in x.lower()) and ("grand" not in x.lower()) - ] - sib_keys = [ - x for x in keys if ("sibling" in x.lower()) or ("twin" in x.lower()) - ] + self_key - child_keys = [ - x - for x in keys - if ("child" in x.lower()) - and ("grand" not in x.lower()) - and ("self" not in x.lower()) - ] - - for sk in sib_keys: - yield TrioOrDuo(parent_keys, [sk], family) - for ck in child_keys: - yield TrioOrDuo(self_key + spouse_key, [ck], family) - - -def read_tred_tsv(tsvfile): - """ - Read the TRED table into a dataframe. - """ - df = pd.read_csv(tsvfile, sep="\t", index_col=0, dtype={"SampleKey": str}) - return df - - -def mendelian(args): - """ - %prog mendelian trios_candidate.json hli.20170424.tred.tsv - - Calculate Mendelian errors based on trios and duos. - """ - p = OptionParser(mendelian.__doc__) - p.add_argument("--tolerance", default=0, type=int, help="Tolernace for differences") - p.set_verbose() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - triosjson, tredtsv = args - verbose = opts.verbose - tolerance = opts.tolerance - - js = json.load(open(triosjson)) - allterms = set() - duos = set() - trios = set() - for v in js: - allterms |= set(v.keys()) - for trio_or_duo in extract_trios(v): - assert len(trio_or_duo) in (2, 3) - if len(trio_or_duo) == 2: - duos.add(trio_or_duo) - else: - trios.add(trio_or_duo) - # print "\n".join(allterms) - print("A total of {} families imported".format(len(js))) - - # Read in all data - df = read_tred_tsv(tredtsv) - - ids, treds = read_treds() - table = {} - for tred, inheritance in zip(treds["abbreviation"], treds["inheritance"]): - x_linked = inheritance[0] == "X" # X-linked - name = tred - if x_linked: - name += " (X-linked)" - print("[TRED] {}".format(name)) - - n_total = len(duos) - n_error = 0 - for duo in duos: - n_error += duo.check_mendelian( - df, tred, tolerance=tolerance, x_linked=x_linked, verbose=verbose - ) - tag = "Duos - Mendelian errors" - print("{}: {}".format(tag, percentage(n_error, n_total))) - duo_error = percentage(n_error, n_total, mode=2) - table[(name, tag)] = "{0:.1f}%".format(duo_error) - - n_total = len(trios) - n_error = 0 - for trio in trios: - n_error += trio.check_mendelian( - df, tred, tolerance=tolerance, x_linked=x_linked, verbose=verbose - ) - tag = "Trios - Mendelian errors" - print("{}: {}".format(tag, percentage(n_error, n_total))) - trio_error = percentage(n_error, n_total, mode=2) - table[(name, tag)] = "{0:.1f}%".format(trio_error) - - # Summarize - print(tabulate(table)) - - -def make_STR_bed(filename="STR.bed", pad=0, treds=None): - tredsfile = datafile("TREDs.meta.csv") - tf = pd.read_csv(tredsfile) - - tds = list(tf["abbreviation"]) - regions = list(tf["repeat_location"]) - fw = must_open(filename, "w") - extract_Y = False - for td, region in zip(tds, regions): - if treds and (td not in treds): - continue - c, startend = region.split(":") - extract_Y = extract_Y or (c == "chrY") - start, end = startend.split("-") - start, end = int(start), int(end) - print("\t".join(str(x) for x in (c, start - pad, end + pad, td)), file=fw) - - if not extract_Y: - return filename - - UNIQY = datafile("chrY.hg38.unique_ccn.gc") - fp = open(UNIQY) - nregions = 0 - for i, row in enumerate(fp): - # Some regions still have mapped reads, exclude a few - if i in (1, 4, 6, 7, 10, 11, 13, 16, 18, 19): - continue - if nregions >= 5: - break - c, start, end, gc = row.split() - start, end = int(start), int(end) - print( - "\t".join( - str(x) - for x in ( - c, - start - pad, - end + pad, - "chrY.unique_ccn.{}".format(nregions), - ) - ), - file=fw, - ) - nregions += 1 - - fw.close() - return filename - - -def mini(args): - """ - %prog mini bamfile minibamfile - - Prepare mini-BAMs that contain only the STR loci. - """ - p = OptionParser(mini.__doc__) - p.add_argument( - "--pad", default=20000, type=int, help="Add padding to the STR reigons" - ) - p.add_argument( - "--treds", default=None, help="Extract specific treds, use comma to separate" - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bamfile, minibam = args - treds = opts.treds.split(",") if opts.treds else None - pad = opts.pad - bedfile = make_STR_bed(pad=pad, treds=treds) - - get_minibam_bed(bamfile, bedfile, minibam) - logger.debug("Mini-BAM written to `{}`".format(minibam)) - - -def parse_log(logfile): - fp = open(logfile) - likelihood = {} - for row in fp: - if row.startswith("DEBUG:IntegratedCaller:***"): - atoms = row.split() - i = int(atoms[1].strip("(,")) - j = int(atoms[2].strip(")")) - lnL = float(atoms[-1]) - likelihood[(i, j)] = lnL - if row.startswith("DEBUG:IntegratedCaller:CI(h1)"): - CI_h1 = [int(x.strip()) for x in row.split("=")[1].split("-")] - if row.startswith("DEBUG:IntegratedCaller:CI(h2)"): - CI_h2 = [int(x.strip()) for x in row.split("=")[1].split("-")] - if row.startswith("DEBUG:IntegratedCaller:ML estimate:"): - MLE = row.split(":")[3].split("=")[1].split()[:2] - MLE = [int(x.strip("[],")) for x in MLE] - - return likelihood, CI_h1, CI_h2, MLE - - -def likelihood(args): - """ - %prog likelihood - - Plot likelihood surface. Look for two files in the current folder: - - 100_100.log, haploid model - - 100_20.log, diploid model - """ - p = OptionParser(likelihood.__doc__) - opts, args, iopts = p.set_image_options( - args, figsize="10x5", style="white", cmap="coolwarm" - ) - - if len(args) != 0: - sys.exit(not p.print_help()) - - fig, (ax1, ax2) = plt.subplots(ncols=2, nrows=1, figsize=(iopts.w, iopts.h)) - plt.tight_layout(pad=4) - - # Haploid model - LL, CI_h1, CI_h2, MLE = parse_log("100_100.log") - data = [] - for k, v in LL.items(): - data.append((k[0], v)) - data.sort() - x, y = zip(*data) - x = np.array(x) - (curve,) = ax1.plot(x, y, "-", color=lsg, lw=2) - ax1.set_title("Simulated haploid ($h^{truth}=100$)") - - h_hat, max_LL = max(data, key=lambda x: x[-1]) - _, min_LL = min(data, key=lambda x: x[-1]) - ymin, ymax = ax1.get_ylim() - ax1.set_ylim([ymin, ymax + 30]) - - LL_label = "log(Likelihood)" - ax1.plot([h_hat, h_hat], [ymin, max_LL], ":", color=lsg, lw=2) - ax1.text(h_hat, max_LL + 10, r"$\hat{h}=93$", color=lsg) - ax1.set_xlabel(r"$h$") - ax1.set_ylabel(LL_label) - - a, b = CI_h1 - ci = ax1.fill_between( - x, [ymin] * len(x), y, where=(x >= a) & (x <= b), color=lsg, alpha=0.5 - ) - ax1.legend([curve, ci], ["Likelihood curve", r"95$\%$ CI"], loc="best") - - # Diploid model - LL, CI_h1, CI_h2, MLE = parse_log("100_20.log") - _, min_LL = min(data, key=lambda x: x[-1]) - data = np.ones((301, 301)) * min_LL - for k, v in LL.items(): - a, b = k - data[a, b] = v - data[b, a] = v - - data = mask_upper_triangle(data) - ax_imshow(ax2, data, opts.cmap, LL_label, 20, 104) - - root = fig.add_axes([0, 0, 1, 1]) - pad = 0.04 - panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2.0, 1 - pad, "B"))) - normalize_axes(root) - - image_name = "likelihood." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def mask_upper_triangle(data): - mask = np.zeros_like(data) - mask[np.triu_indices_from(mask)] = True - data = np.ma.array(data, mask=mask) - return data - - -def ax_plot(ax, P_h, h_hat, CI_h, xlabel, ylabel, ticks=True): - max_P = max(P_h.values()) - a, b = CI_h - - ax.plot([h_hat, h_hat], [0, max_P], ":", color=lsg, lw=2) - ax.set_xlabel(r"$%s$" % xlabel) - ax.set_ylabel(ylabel) - - data = [] - for k, v in sorted(P_h.items()): - data.append((int(k), v)) - data.sort() - x, y = zip(*data) - x = np.array(x) - ax.plot(x, y, "-", color=lsg, lw=2) - title = "Marginal distribution for $%s$" % xlabel - ax.set_title(title) - if not ticks: - ax.set_yticks([]) - - if a == b: - ax.plot([h_hat, h_hat], [0, max_P], "-", color=lsg, lw=2) - else: - ax.fill_between( - x, [0] * len(x), y, where=(x >= a) & (x <= b), color=lsg, alpha=0.5 - ) - ax.set_xlim(0, 300) - - ymin, ymax = ax.get_ylim() - if h_hat < 150: - ax.text( - h_hat + 20, - ymax * 4.0 / 5, - r"$\hat{%s}=%d$" % (xlabel, h_hat), - color=lsg, - va="center", - ) - ax.text( - h_hat + 20, - ymax * 3.0 / 5, - r"95$\%$ CI" + r"$=%s-%s$" % (a, b), - color=lsg, - va="center", - ) - else: - ax.text( - h_hat - 30, - ymax * 4.0 / 5, - r"$\hat{%s}=%d$" % (xlabel, h_hat), - color=lsg, - ha="right", - va="center", - ) - ax.text( - h_hat - 30, - ymax * 3.0 / 5, - r"95$\%$ CI" + r"$=%s-%s$" % (a, b), - color=lsg, - ha="right", - va="center", - ) - - ymin, ymax = ax.get_ylim() - ax.set_ylim(ymin, ymax * 1.05) - - -def ax_imshow( - ax, - P_h1h2, - cmap, - label, - h1_hat, - h2_hat, - h1_truth, - h2_truth, - r=4, - draw_circle=True, - ticks=True, -): - im = ax.imshow(P_h1h2, cmap=cmap, origin="lower") - - from mpl_toolkits.axes_grid1 import make_axes_locatable - - divider = make_axes_locatable(ax) - cax = divider.append_axes("right", size="5%", pad=0.05) - cb = plt.colorbar(im, cax) - cb.set_label(label) - if not ticks: - cb.set_ticks([]) - - if draw_circle: - circle = plt.Circle((h1_hat, h2_hat), r, ec="w", fill=False) - ax.add_artist(circle) - - annotation = r"$\hat{h_1}=%d, \hat{h_2}=%d$" % (h1_hat, h2_hat) - ax.text(200, 100, annotation, color=lsg, ha="center", va="center") - - ax.set_xlabel(r"$h_1$") - ax.set_ylabel(r"$h_2$") - title = "Simulated diploid ($h_{1}^{truth}=%d, h_{2}^{truth}=%d$)" % ( - h1_truth, - h2_truth, - ) - ax.set_title(title) - - -def likelihood2(args): - """ - %prog likelihood2 100_20.json - - Plot the likelihood surface and marginal distributions. - """ - from matplotlib import gridspec - - p = OptionParser(likelihood2.__doc__) - opts, args, iopts = p.set_image_options( - args, figsize="10x5", style="white", cmap="coolwarm" - ) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (jsonfile,) = args - fig = plt.figure(figsize=(iopts.w, iopts.h)) - gs = gridspec.GridSpec(2, 2) - ax1 = fig.add_subplot(gs[:, 0]) - ax2 = fig.add_subplot(gs[0, 1]) - ax3 = fig.add_subplot(gs[1, 1]) - plt.tight_layout(pad=3) - pf = plot_panel(jsonfile, ax1, ax2, ax3, opts.cmap) - - root = fig.add_axes([0, 0, 1, 1]) - normalize_axes(root) - - image_name = "likelihood2.{}.".format(pf) + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def likelihood3(args): - """ - %prog likelihood3 140_20.json 140_70.json - - Plot the likelihood surface and marginal distributions for two settings. - """ - from matplotlib import gridspec - - p = OptionParser(likelihood3.__doc__) - opts, args, iopts = p.set_image_options( - args, figsize="10x10", style="white", cmap="coolwarm" - ) - if len(args) != 2: - sys.exit(not p.print_help()) - - jsonfile1, jsonfile2 = args - fig = plt.figure(figsize=(iopts.w, iopts.h)) - gs = gridspec.GridSpec(9, 2) - ax1 = fig.add_subplot(gs[:4, 0]) - ax2 = fig.add_subplot(gs[:2, 1]) - ax3 = fig.add_subplot(gs[2:4, 1]) - ax4 = fig.add_subplot(gs[5:, 0]) - ax5 = fig.add_subplot(gs[5:7, 1]) - ax6 = fig.add_subplot(gs[7:, 1]) - plt.tight_layout(pad=2) - - plot_panel(jsonfile1, ax1, ax2, ax3, opts.cmap) - plot_panel(jsonfile2, ax4, ax5, ax6, opts.cmap) - - root = fig.add_axes([0, 0, 1, 1]) - pad = 0.02 - panel_labels(root, ((pad, 1 - pad, "A"), (pad, 4.0 / 9, "B"))) - normalize_axes(root) - - image_name = "likelihood3." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def plot_panel(jsonfile, ax1, ax2, ax3, cmap, tred="HD"): - j = json.load(open(jsonfile)) - calls = j["tredCalls"] - P_h1h2 = calls[tred + ".P_h1h2"] - data = np.zeros((301, 301)) - for k, v in P_h1h2.items(): - a, b = k.split(",") - a, b = int(a), int(b) - data[a, b] = v - data[b, a] = v - - label = "Probability density" - data = mask_upper_triangle(data) - h1_hat, h2_hat = calls[tred + ".1"], calls[tred + ".2"] - pf = op.basename(jsonfile).split(".")[0] - h1_truth, h2_truth = sorted([int(x) for x in pf.split("_")]) - ax_imshow( - ax1, - data, - cmap, - label, - h1_hat, - h2_hat, - h1_truth, - h2_truth, - draw_circle=False, - ticks=False, - ) - - CI = calls[tred + ".CI"] - CI_h1, CI_h2 = CI.split("|") - CI_h1 = [int(x) for x in CI_h1.split("-")] - CI_h2 = [int(x) for x in CI_h2.split("-")] - P_h1 = calls[tred + ".P_h1"] - P_h2 = calls[tred + ".P_h2"] - - ax_plot(ax2, P_h1, h1_hat, CI_h1, "h_1", label, ticks=False) - ax_plot(ax3, P_h2, h2_hat, CI_h2, "h_2", label, ticks=False) - - return pf - - -def diagram(args): - """ - %prog diagram - - Plot the predictive power of various evidences. - """ - p = OptionParser(diagram.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="8x4", format="png") - - if len(args) != 0: - sys.exit(not p.print_help()) - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - # Gauge on top, this is log-scale - yy = 0.7 - yinterval = 0.1 - height = 0.05 - yp = yy - yinterval - height - canvas = 0.95 - xstart = 0.025 - convert = lambda x: xstart + x * canvas / 600 - # Symbols - root.text( - 0.5, 0.9, r"$L$: Read length, $F$: Flank size, $V$: Pair distance", ha="center" - ) - root.text(0.5, 0.85, r"ex. $L=150bp, F=9bp, V=500bp$", ha="center") - root.text( - xstart + canvas, - yy - height, - "STR repeat length", - ha="center", - color=lsg, - size=10, - ) - - # Mark the key events - pad = 0.02 - arrowlen = canvas * 1.05 - arrowprops = dict( - length_includes_head=True, - width=0.01, - fc=lsg, - lw=0, - head_length=arrowlen * 0.12, - head_width=0.04, - ) - p = FancyArrow(xstart, yy, arrowlen, 0, shape="right", **arrowprops) - root.add_patch(p) - - ppad = 30 - keyevents = ( - (0, 0, -1, r"$0$"), - (150 - 18, 150 - 18 - ppad, 0, r"$L - 2F$"), - (150 - 9, 150 - 9, 1, r"$L - F$"), - (150, 150 + ppad, 2, r"$L$"), - (500 - 9, 500 - 9, 3, r"$V - F$"), - ) - for event, pos, i, label in keyevents: - _event = convert(event) - _pos = convert(pos) - root.plot((_event, _event), (yy - height / 4, yy + height / 4), "-", color="k") - root.text(_pos, yy + pad, label, rotation=45, va="bottom", size=8) - if i < 0: - continue - ystart = yp - i * yinterval - root.plot((_event, _event), (ystart, yy - height / 4), ":", color=lsg) - - # Range on bottom. These are simple 4 rectangles, with the range indicating - # the predictive range. - CLOSED, OPEN = range(2) - ranges = ( - (0, 150 - 18, CLOSED, "Spanning reads"), - (9, 150 - 9, OPEN, "Partial reads"), - (150, 500 - 9, CLOSED, "Repeat reads"), - (0, 500 - 9, CLOSED, "Paired-end reads"), - ) - for start, end, starttag, label in ranges: - _start = convert(start) - _end = convert(end) - data = ( - [[0.0, 1.0], [0.0, 1.0]] if starttag == OPEN else [[1.0, 0.0], [1.0, 0.0]] - ) - root.imshow( - data, - interpolation="bicubic", - cmap=plt.cm.Greens, - extent=[_start, _end, yp, yp + height], - ) - root.text(_end + pad, yp + height / 2, label, va="center") - yp -= yinterval - - normalize_axes(root) - - image_name = "diagram." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def plot_allelefreq(ax, df, locus, color="lightslategray"): - tred = df.ix[locus] - cnt = af_to_counts(tred["allele_freq"]) - - cntx, cnty = zip(*cnt.items()) - - motif = tred["motif"] - cutoff_prerisk = tred["cutoff_prerisk"] - cutoff_risk = tred["cutoff_risk"] - npredisease = sum(v for (k, v) in cnt.items() if cutoff_prerisk <= k < cutoff_risk) - npatients = sum(v for (k, v) in cnt.items() if k >= cutoff_risk) - - ax.bar(cntx, cnty, fc=color) - - ymin, ymax = ax.get_ylim() - xmax = (cutoff_risk / 10 + 1) * 10 if cutoff_risk > 50 else 50 - pad = xmax * 0.03 - if cutoff_prerisk < cutoff_risk and npredisease: - ax.axvline(x=cutoff_prerisk, color="k", lw=2) - ax.text( - cutoff_prerisk + pad, - 0.5 * ymax, - r"Pre-disease ($\geq${}$\times${}) - {} alleles".format( - cutoff_prerisk, motif, npredisease - ), - rotation=90, - color="k", - ha="center", - va="center", - ) - ax.axvline(x=cutoff_risk, color="r", lw=2) - - if locus == "AR": - npatients = sum(v for (k, v) in cnt.items() if k <= cutoff_risk) - ax.text( - cutoff_risk - pad, - 0.5 * ymax, - r"Disease ($\leq${}$\times${}) - {} alleles".format( - cutoff_risk, motif, npatients - ), - rotation=90, - color="r", - ha="center", - va="center", - ) - else: - ax.text( - cutoff_risk + pad, - 0.5 * ymax, - r"Disease ($\geq${}$\times${}) - {} alleles".format( - cutoff_risk, motif, npatients - ), - rotation=90, - color="r", - ha="center", - va="center", - ) - - x = [] # All allelels - for k, v in cnt.items(): - x.extend([k] * v) - - ax.set_xlabel("Number of repeat units") - ax.set_ylabel("Number of alleles") - ax.set_xlim(0, xmax) - ax.set_title(r"{} ({})".format(locus, tred["title"], motif)) - set_helvetica_axis(ax) - - -def allelefreqall(args): - """ - %prog allelefreqall HN_Platinum_Gold.20180525.tsv.report.txt - - Plot all 30 STR allele frequencies. - """ - p = OptionParser(allelefreqall.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (reportfile,) = args - treds, df = read_treds(reportfile) - # Prepare 5 pages, each page with 6 distributions - treds = sorted(treds) - count = 6 - pdfs = [] - for page in range(len(treds) / count + 1): - start = page * count - page_treds = treds[start : start + count] - if not page_treds: - break - allelefreq( - [ - ",".join(page_treds), - "--usereport", - reportfile, - "--nopanels", - "--figsize", - "12x16", - ] - ) - outpdf = "allelefreq.{}.pdf".format(page) - sh("mv allelefreq.pdf {}".format(outpdf)) - pdfs.append(outpdf) - - from jcvi.formats.pdf import cat - - pf = op.basename(reportfile).split(".")[0] - finalpdf = pf + ".allelefreq.pdf" - logger.debug("Merging pdfs into `{}`".format(finalpdf)) - cat(pdfs + ["-o", finalpdf, "--cleanup"]) - - -def allelefreq(args): - """ - %prog allelefreq HD,DM1,SCA1,SCA17,FXTAS,FRAXE - - Plot the allele frequencies of some STRs. - """ - p = OptionParser(allelefreq.__doc__) - p.add_argument( - "--nopanels", - default=False, - action="store_true", - help="No panel labels A, B, ...", - ) - p.add_argument("--usereport", help="Use allele frequency in report file") - opts, args, iopts = p.set_image_options(args, figsize="9x13") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (loci,) = args - fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots( - ncols=2, nrows=3, figsize=(iopts.w, iopts.h) - ) - plt.tight_layout(pad=4) - if opts.usereport: - treds, df = read_treds(tredsfile=opts.usereport) - else: - treds, df = read_treds() - - df = df.set_index(["abbreviation"]) - - axes = (ax1, ax2, ax3, ax4, ax5, ax6) - loci = loci.split(",") - for ax, locus in zip(axes, loci): - plot_allelefreq(ax, df, locus) - - # Delete unused axes - for ax in axes[len(loci) :]: - ax.set_axis_off() - - root = fig.add_axes([0, 0, 1, 1]) - pad = 0.03 - if not opts.nopanels: - panel_labels( - root, - ( - (pad / 2, 1 - pad, "A"), - (0.5 + pad, 1 - pad, "B"), - (pad / 2, 2 / 3.0 - pad / 2, "C"), - (0.5 + pad, 2 / 3.0 - pad / 2, "D"), - (pad / 2, 1 / 3.0, "E"), - (0.5 + pad, 1 / 3.0, "F"), - ), - ) - normalize_axes(root) - - image_name = "allelefreq." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def make_fasta(seq, fastafile, id): - rec = SeqRecord(Seq(seq), description="", id=id) - fw = open(fastafile, "w") - SeqIO.write([rec], fw, "fasta") - fw.close() - - -def add_simulate_options(p): - p.add_argument("--readlen", default=150, type=int, help="Length of the read") - p.add_argument( - "--distance", - default=500, - type=int, - help="Outer distance between the two ends", - ) - p.set_depth(depth=20) - - -def simulate(args): - """ - %prog simulate run_dir 1 300 - - Simulate BAMs with varying inserts with dwgsim. The above command will - simulate between 1 to 300 CAGs in the HD region, in a directory called - `run_dir`. - """ - p = OptionParser(simulate.__doc__) - p.add_argument( - "--method", choices=("wgsim", "eagle"), default="eagle", help="Read simulator" - ) - p.add_argument( - "--ref", - default="hg38", - choices=("hg38", "hg19"), - help="Reference genome version", - ) - p.add_argument("--tred", default="HD", help="TRED locus") - add_simulate_options(p) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - rundir, startunits, endunits = args - ref = opts.ref - ref_fasta = "/mnt/ref/{}.upper.fa".format(ref) - startunits, endunits = int(startunits), int(endunits) - basecwd = os.getcwd() - mkdir(rundir) - os.chdir(rundir) - cwd = os.getcwd() - - # TRED region (e.g. Huntington) - pad_left, pad_right = 1000, 10000 - repo = TREDsRepo(ref=ref) - tred = repo[opts.tred] - chr, start, end = tred.chr, tred.repeat_start, tred.repeat_end - - logger.debug("Simulating {}".format(tred)) - fasta = Fasta(ref_fasta) - seq_left = fasta[chr][start - pad_left : start - 1] - seq_right = fasta[chr][end : end + pad_right] - motif = tred.repeat - - simulate_method = wgsim if opts.method == "wgsim" else eagle - # Write fake sequence - for units in range(startunits, endunits + 1): - pf = str(units) - mkdir(pf) - os.chdir(pf) - seq = str(seq_left) + motif * units + str(seq_right) - fastafile = pf + ".fasta" - make_fasta(seq, fastafile, id=chr.upper()) - - # Simulate reads on it - simulate_method( - [ - fastafile, - "--depth={}".format(opts.depth), - "--readlen={}".format(opts.readlen), - "--distance={}".format(opts.distance), - "--outfile={}".format(pf), - ] - ) - - read1 = pf + ".bwa.read1.fastq" - read2 = pf + ".bwa.read2.fastq" - samfile, _ = align([ref_fasta, read1, read2]) - indexed_samfile = index([samfile]) - - sh("mv {} ../{}.bam".format(indexed_samfile, pf)) - sh("mv {}.bai ../{}.bam.bai".format(indexed_samfile, pf)) - - os.chdir(cwd) - cleanup(pf) - - os.chdir(basecwd) - - -def mergebam(args): - """ - %prog mergebam dir1 homo_outdir - or - %prog mergebam dir1 dir2/20.bam het_outdir - - Merge sets of BAMs to make diploid. Two modes: - - Homozygous mode: pair-up the bams in the two folders and merge - - Heterozygous mode: pair the bams in first folder with a particular bam - """ - p = OptionParser(mergebam.__doc__) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) not in (2, 3): - sys.exit(not p.print_help()) - - if len(args) == 2: - idir1, outdir = args - dir1 = [idir1] if idir1.endswith(".bam") else iglob(idir1, "*.bam") - logger.debug("Homozygous mode") - dir2 = [""] * len(dir1) - elif len(args) == 3: - idir1, idir2, outdir = args - dir1 = [idir1] if idir1.endswith(".bam") else iglob(idir1, "*.bam") - dir2 = [idir2] if idir2.endswith(".bam") else iglob(idir2, "*.bam") - assert len(dir2) == 1, "Second pile must contain a single bam" - dir2 = [idir2] * len(dir1) - - assert len(dir1) == len(dir2), "Two piles must contain same number of bams" - cmd = "samtools merge {} {} {} && samtools index {}" - cmds = [] - mkdir(outdir) - for a, b in zip(dir1, dir2): - ia = op.basename(a).split(".")[0] - ib = op.basename(b).split(".")[0] if b else ia - outfile = op.join(outdir, "{}_{}.bam".format(ia, ib)) - cmds.append(cmd.format(outfile, a, b, outfile)) - - p = Parallel(cmds, cpus=opts.cpus) - p.run() - - -def batchlobstr(args): - """ - %prog batchlobstr bamlist - - Run lobSTR on a list of BAMs. The corresponding batch command for TREDPARSE: - $ tred.py bamlist --haploid chr4 --workdir tredparse_results - """ - p = OptionParser(batchlobstr.__doc__) - p.add_argument( - "--haploid", default="chrY,chrM", help="Use haploid model for these chromosomes" - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bamlist,) = args - cmd = "python -m jcvi.variation.str lobstr TREDs" - cmd += " --input_bam_path {}" - cmd += " --haploid {}".format(opts.haploid) - cmd += " --simulation" - cmds = [cmd.format(x.strip()) for x in open(bamlist).readlines()] - p = Parallel(cmds, cpus=opts.cpus) - p.run() - - -def compilevcf(args): - """ - %prog compilevcf dir - - Compile vcf outputs into lists. - """ - from jcvi.variation.str import LobSTRvcf - - p = OptionParser(compilevcf.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (folder,) = args - vcf_files = iglob(folder, "*.vcf,*.vcf.gz") - for vcf_file in vcf_files: - try: - p = LobSTRvcf(columnidsfile=None) - p.parse(vcf_file, filtered=False) - res = p.items() - if res: - k, v = res[0] - res = v.replace(",", "/") - else: - res = "-1/-1" - num = op.basename(vcf_file).split(".")[0] - print(num, res) - except (TypeError, AttributeError) as e: - p = TREDPARSEvcf(vcf_file) - continue - - -def evidences(args): - """ - %prog evidences - - Plot distribution of evidences against two factors: - - Sample mean coverage - - Longer allele - """ - p = OptionParser(evidences.__doc__) - p.add_argument( - "--csv", default="hli.20170328.tred.tsv", help="TRED csv output to plot" - ) - opts, args, iopts = p.set_image_options(args, format="pdf") - - if len(args) != 0: - sys.exit(not p.print_help()) - - format = iopts.format - - # Extract sample coverage first - df = pd.read_csv( - "qc-export-MeanCoverage.csv", - header=None, - names=["Samplekey", "MeanCoverage"], - index_col=0, - ) - - # Find coverage for HD - xf = pd.read_csv(opts.csv, sep="\t", index_col=0) - dp = {} - tred = "HD" - for sk, row in xf.iterrows(): - sk = str(sk) - a1 = row[tred + ".1"] - a2 = row[tred + ".2"] - fdp = row[tred + ".FDP"] - pdp = row[tred + ".PDP"] - pedp = row[tred + ".PEDP"] - dp[sk] = (a1, a2, fdp, pdp, pedp) - - # Build a consolidated dataframe - ef = pd.DataFrame.from_dict(dp, orient="index") - ef.columns = [ - tred + ".1", - tred + ".2", - tred + ".FDP", - tred + ".PDP", - tred + ".PEDP", - ] - ef.index.name = "SampleKey" - mf = df.merge(ef, how="right", left_index=True, right_index=True) - - # Plot a bunch of figures - outdir = "output" - mkdir(outdir) - xlim = ylim = (0, 100) - draw_jointplot( - outdir + "/A", - "MeanCoverage", - "HD.FDP", - data=mf, - xlim=xlim, - ylim=ylim, - format=format, - ) - draw_jointplot( - outdir + "/B", - "MeanCoverage", - "HD.PDP", - data=mf, - color="g", - xlim=xlim, - ylim=ylim, - format=format, - ) - draw_jointplot( - outdir + "/C", - "MeanCoverage", - "HD.PEDP", - data=mf, - color="m", - xlim=xlim, - ylim=ylim, - format=format, - ) - - xlim = (0, 50) - draw_jointplot( - outdir + "/D", "HD.2", "HD.FDP", data=mf, xlim=xlim, ylim=ylim, format=format - ) - draw_jointplot( - outdir + "/E", - "HD.2", - "HD.PDP", - data=mf, - color="g", - xlim=xlim, - ylim=ylim, - format=format, - ) - draw_jointplot( - outdir + "/F", - "HD.2", - "HD.PEDP", - data=mf, - color="m", - xlim=xlim, - ylim=ylim, - format=format, - ) - - -def draw_jointplot( - figname, x, y, data=None, kind="reg", color=None, xlim=None, ylim=None, format="pdf" -): - """ - Wraps around sns.jointplot - """ - import seaborn as sns - - sns.set_context("talk") - plt.clf() - - register = { - "MeanCoverage": "Sample Mean Coverage", - "HD.FDP": "Depth of full spanning reads", - "HD.PDP": "Depth of partial spanning reads", - "HD.PEDP": "Depth of paired-end reads", - "HD.2": "Repeat size of the longer allele", - } - - g = sns.jointplot(x, y, data=data, kind=kind, color=color, xlim=xlim, ylim=ylim) - g.ax_joint.set_xlabel(register.get(x, x)) - g.ax_joint.set_ylabel(register.get(y, y)) - savefig(figname + "." + format, cleanup=False) - - -def long_allele(s, default=19, exclude=None): - if "_" in s: - a, b = s.split("_") - elif "/" in s: - a, b = s.split("/") - else: - raise Exception("Don't know how to split string {}".format(s)) - - res = [int(a), int(b)] - if exclude and exclude in res: - res.remove(exclude) - res = max(res) - return default if res < 0 else res - - -def get_lo_hi_from_CI(s, exclude=None): - """ - Parse the confidence interval from CI. - - >>> get_lo_hi_from_CI("20-20/40-60") - (40, 60) - """ - a, b = s.split("|") - ai, aj = a.split("-") - bi, bj = b.split("-") - - los = [int(ai), int(bi)] - his = [int(aj), int(bj)] - if exclude and exclude in los: - los.remove(exclude) - if exclude and exclude in his: - his.remove(exclude) - return max(los), max(his) - - -def parse_results(datafile, exclude=None): - fp = open(datafile) - data = [] - for row in fp: - atoms = row.split() - truth, call = atoms[:2] - t = long_allele(truth, exclude=exclude) - c = long_allele(call, exclude=exclude) - if len(atoms) == 3: - ci = atoms[2] - lo, hi = get_lo_hi_from_CI(ci, exclude=exclude) - if lo > c: - lo = c - if hi < c: - hi = c - data.append((t, c, lo, hi)) - else: - data.append((t, c)) - return data - - -def compute_rmsd(truth, a, limit=150): - truth = truth[:limit] - a = a[:limit] - if len(a) > len(truth): - a = a[: len(truth)] - return (sum((i - j) ** 2 for (i, j) in zip(truth, a)) / len(truth)) ** 0.5 - - -def compare(args): - """ - %prog compare Evaluation.csv - - Compare performances of various variant callers on simulated STR datasets. - """ - p = OptionParser(compare.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="10x10") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (datafile,) = args - pf = datafile.rsplit(".", 1)[0] - fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots( - ncols=2, nrows=2, figsize=(iopts.w, iopts.h) - ) - plt.tight_layout(pad=3) - - bbox = {"facecolor": "tomato", "alpha": 0.2, "ec": "w"} - pad = 2 - - # Read benchmark data - df = pd.read_csv("Evaluation.csv") - truth = df["Truth"] - axes = (ax1, ax2, ax3, ax4) - progs = ("Manta", "Isaac", "GATK", "lobSTR") - markers = ("bx-", "yo-", "md-", "c+-") - - for ax, prog, marker in zip(axes, progs, markers): - ax.plot(truth, df[prog], marker) - ax.plot(truth, truth, "k--") # to show diagonal - ax.axhline(infected_thr, color="tomato") - ax.text( - max(truth) - pad, - infected_thr + pad, - "Risk threshold", - bbox=bbox, - ha="right", - ) - ax.axhline(ref_thr, color="tomato") - ax.text( - max(truth) - pad, - ref_thr - pad, - "Reference repeat count", - bbox=bbox, - ha="right", - va="top", - ) - ax.set_title(SIMULATED_HAPLOID) - ax.set_xlabel(r"Num of CAG repeats inserted ($\mathit{h}$)") - ax.set_ylabel("Num of CAG repeats called") - ax.legend([prog, "Truth"], loc="best") - - root = fig.add_axes([0, 0, 1, 1]) - pad = 0.03 - panel_labels( - root, - ( - (pad / 2, 1 - pad, "A"), - (1 / 2.0, 1 - pad, "B"), - (pad / 2, 1 / 2.0, "C"), - (1 / 2.0, 1 / 2.0, "D"), - ), - ) - normalize_axes(root) - - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def plot_compare( - ax, - title, - tredparse_results, - lobstr_results, - pad=8, - ms=3, - max_insert=300, - color="g", - risk=True, -): - truth = range(1, max_insert + 1) - tx, ty, tl, th = zip(*tredparse_results) - trmsd = compute_rmsd(truth, ty) - if lobstr_results: - lx, ly = zip(*lobstr_results) - lrmsd = compute_rmsd(truth, ly) - - rmsd_tag = "$RMSD_{1:150}$" - if lobstr_results: - ax.plot( - lx, ly, "c+-", ms=ms, label="lobSTR ({}={:.2f})".format(rmsd_tag, lrmsd) - ) - ax.plot( - tx, - ty, - ".-", - color=color, - ms=ms, - label="TREDPARSE ({}={:.2f})".format(rmsd_tag, trmsd), - ) - ax.plot(truth, truth, "k--", label="Truth") - ax.fill_between( - tx, tl, th, facecolor=color, alpha=0.25, label=r"TREDPARSE 95$\%$ CI" - ) - - ax.set_xlabel(r"Num of CAG repeats inserted ($\mathit{h}$)") - ax.set_ylabel("Num of CAG repeats called") - ax.set_title(title) - ax.legend(loc="best") - - bbox = {"facecolor": "tomato", "alpha": 0.2, "ec": "w"} - if risk: - ax.axhline(infected_thr, color="tomato") - ax.text( - max(truth) - pad, - infected_thr + pad, - "Risk cutoff={}".format(infected_thr) + r"$\times$CAGs", - bbox=bbox, - ha="right", - ) - else: - readlength, pairdistance = 150 / 3, 500 / 3 - ax.axhline(readlength, color="tomato") - ax.text( - max(truth) - pad, - readlength + pad, - "Read Length ($L$)", - bbox=bbox, - ha="right", - ) - ax.axhline(pairdistance, color="tomato") - ax.text( - max(truth) - pad, - pairdistance + pad, - "Paired-end distance($V$)", - bbox=bbox, - ha="right", - ) - - -def compare2(args): - """ - %prog compare2 - - Compare performances of various variant callers on simulated STR datasets. - """ - p = OptionParser(compare2.__doc__) - p.add_argument( - "--maxinsert", default=300, type=int, help="Maximum number of repeats" - ) - add_simulate_options(p) - opts, args, iopts = p.set_image_options(args, figsize="10x5") - - if len(args) != 0: - sys.exit(not p.print_help()) - - depth = opts.depth - readlen = opts.readlen - distance = opts.distance - max_insert = opts.maxinsert - fig, (ax1, ax2) = plt.subplots(ncols=2, nrows=1, figsize=(iopts.w, iopts.h)) - plt.tight_layout(pad=2) - - # ax1: lobSTR vs TREDPARSE with haploid model - lobstr_results = parse_results("lobstr_results_homo.txt") - tredparse_results = parse_results("tredparse_results_homo.txt") - title = SIMULATED_HAPLOID + r" ($D=%s\times, L=%dbp, V=%dbp$)" % ( - depth, - readlen, - distance, - ) - plot_compare(ax1, title, tredparse_results, lobstr_results, max_insert=max_insert) - - # ax2: lobSTR vs TREDPARSE with diploid model - lobstr_results = parse_results("lobstr_results_het.txt", exclude=20) - tredparse_results = parse_results("tredparse_results_het.txt", exclude=20) - title = SIMULATED_DIPLOID + r" ($D=%s\times, L=%dbp, V=%dbp$)" % ( - depth, - readlen, - distance, - ) - plot_compare(ax2, title, tredparse_results, lobstr_results, max_insert=max_insert) - - for ax in (ax1, ax2): - ax.set_xlim(0, max_insert) - ax.set_ylim(0, max_insert) - - root = fig.add_axes([0, 0, 1, 1]) - pad = 0.03 - panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2.0, 1 - pad, "B"))) - normalize_axes(root) - - image_name = "tredparse." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def power(args): - """ - %prog power - - Compare performances of various variant callers on simulated STR datasets. - This compares the power of various evidence types. - """ - p = OptionParser(power.__doc__) - p.add_argument( - "--maxinsert", default=300, type=int, help="Maximum number of repeats" - ) - add_simulate_options(p) - opts, args, iopts = p.set_image_options(args, figsize="10x10", format="png") - - if len(args) != 0: - sys.exit(not p.print_help()) - - max_insert = opts.maxinsert - fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots( - ncols=2, nrows=2, figsize=(iopts.w, iopts.h) - ) - plt.tight_layout(pad=3) - - color = "lightslategray" - # ax1: Spanning - tredparse_results = parse_results("tredparse_results_het-spanning.txt") - title = SIMULATED_DIPLOID + " (Sub-model 1: Spanning reads)" - plot_compare( - ax1, - title, - tredparse_results, - None, - color=color, - max_insert=max_insert, - risk=False, - ) - - # ax2: Partial - tredparse_results = parse_results("tredparse_results_het-partial.txt", exclude=20) - title = SIMULATED_DIPLOID + " (Sub-model 2: Partial reads)" - plot_compare( - ax2, - title, - tredparse_results, - None, - color=color, - max_insert=max_insert, - risk=False, - ) - - # ax3: Repeat - tredparse_results = parse_results("tredparse_results_het-repeat.txt", exclude=20) - # HACK (repeat reads won't work under 50) - tredparse_results = [x for x in tredparse_results if x[0] > 50] - title = SIMULATED_DIPLOID + " (Sub-model 3: Repeat-only reads)" - plot_compare( - ax3, - title, - tredparse_results, - None, - color=color, - max_insert=max_insert, - risk=False, - ) - - # ax4: Pair - tredparse_results = parse_results("tredparse_results_het-pair.txt", exclude=20) - title = SIMULATED_DIPLOID + " (Sub-model 4: Paired-end reads)" - plot_compare( - ax4, - title, - tredparse_results, - None, - color=color, - max_insert=max_insert, - risk=False, - ) - - for ax in (ax1, ax2, ax3, ax4): - ax.set_xlim(0, max_insert) - ax.set_ylim(0, max_insert) - - root = fig.add_axes([0, 0, 1, 1]) - pad = 0.03 - panel_labels( - root, - ( - (pad / 2, 1 - pad, "A"), - (1 / 2.0, 1 - pad, "B"), - (pad / 2, 1 / 2.0, "C"), - (1 / 2.0, 1 / 2.0, "D"), - ), - ) - normalize_axes(root) - - image_name = "power." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def tredparse(args): - """ - %prog tredparse - - Compare performances of various variant callers on simulated STR datasets. - Adds coverage comparisons as panel C and D. - """ - p = OptionParser(tredparse.__doc__) - p.add_argument( - "--maxinsert", default=300, type=int, help="Maximum number of repeats" - ) - add_simulate_options(p) - opts, args, iopts = p.set_image_options(args, figsize="10x10") - - if len(args) != 0: - sys.exit(not p.print_help()) - - depth = opts.depth - max_insert = opts.maxinsert - fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots( - ncols=2, nrows=2, figsize=(iopts.w, iopts.h) - ) - plt.tight_layout(pad=3) - - # ax1: lobSTR vs TREDPARSE with haploid model - lobstr_results = parse_results("lobstr_results_homo-20x-150bp-500bp.txt") - tredparse_results = parse_results("tredparse_results_homo-20x-150bp-500bp.txt") - title = SIMULATED_HAPLOID + r" (Depth=$%s\times$)" % depth - plot_compare(ax1, title, tredparse_results, lobstr_results, max_insert=max_insert) - - # ax2: lobSTR vs TREDPARSE with diploid model (depth=20x) - lobstr_results = parse_results("lobstr_results_het-20x-150bp-500bp.txt", exclude=20) - tredparse_results = parse_results( - "tredparse_results_het-20x-150bp-500bp.txt", exclude=20 - ) - title = SIMULATED_DIPLOID + r" (Depth=$%s\times$)" % depth - plot_compare(ax2, title, tredparse_results, lobstr_results, max_insert=max_insert) - - # ax3: lobSTR vs TREDPARSE with diploid model (depth=5x) - lobstr_results = parse_results("lobstr_results_het-5x-150bp-500bp.txt", exclude=20) - tredparse_results = parse_results( - "tredparse_results_het-5x-150bp-500bp.txt", exclude=20 - ) - title = SIMULATED_DIPLOID + r" (Depth=$%s\times$)" % 5 - plot_compare(ax3, title, tredparse_results, lobstr_results, max_insert=max_insert) - - # ax4: lobSTR vs TREDPARSE with diploid model (depth=80x) - lobstr_results = parse_results("lobstr_results_het-80x-150bp-500bp.txt", exclude=20) - tredparse_results = parse_results( - "tredparse_results_het-80x-150bp-500bp.txt", exclude=20 - ) - title = SIMULATED_DIPLOID + r" (Depth=$%s\times$)" % 80 - plot_compare(ax4, title, tredparse_results, lobstr_results, max_insert=max_insert) - - for ax in (ax1, ax2, ax3, ax4): - ax.set_xlim(0, max_insert) - ax.set_ylim(0, max_insert) - - root = fig.add_axes([0, 0, 1, 1]) - pad = 0.03 - panel_labels( - root, - ( - (pad / 2, 1 - pad, "A"), - (1 / 2.0, 1 - pad, "B"), - (pad / 2, 1 / 2.0, "C"), - (1 / 2.0, 1 / 2.0, "D"), - ), - ) - normalize_axes(root) - - image_name = "tredparse." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/sugarcane.py b/jcvi/projects/sugarcane.py deleted file mode 100644 index 1fd63131..00000000 --- a/jcvi/projects/sugarcane.py +++ /dev/null @@ -1,807 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# -# sugarcane.py -# projects -# -# Created by Haibao Tang on 12/02/19 -# Copyright © 2019 Haibao Tang. All rights reserved. -# -""" -Simulate sugarcane genomes and analyze the diversity in the progeny genomes. -""" - -import os.path as op -import sys - -from collections import Counter, defaultdict -from enum import Enum -from itertools import combinations, groupby, product -from random import random, sample -from typing import Dict, List - -import numpy as np -import matplotlib.pyplot as plt -import seaborn as sns -import pandas as pd - -from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir -from ..formats.blast import Blast -from ..graphics.base import adjust_spines, markup, normalize_axes, savefig - -SoColor = "#7436a4" # Purple -SsColor = "#5a8340" # Green - - -class CrossMode(Enum): - """ - How the F1 is generated. - """ - - nplusn = "n+n" - nx2plusn = "nx2+n" - twoplusnFDR = "2n+n_FDR" - twoplusnSDR = "2n+n_SDR" - - -# Computed using prepare(), corrected with real sizes -ChrSizes = { - "SO-chr01": 148750011, - "SO-chr02": 119865146, - "SO-chr03": 103845728, - "SO-chr04": 104559946, - "SO-chr05": 93134056, - "SO-chr06": 74422021, - "SO-chr07": 81308893, - "SO-chr08": 71010813, - "SO-chr09": 86380266, - "SO-chr10": 73923121, - "SS-chr01": 114519418, - "SS-chr02": 119157314, - "SS-chr03": 85009228, - "SS-chr04": 79762909, - "SS-chr05": 90584537, - "SS-chr06": 95848354, - "SS-chr07": 83589369, - "SS-chr08": 64028871, -} - - -# Simulate genome composition -class Genome: - def __init__( - self, name: str, prefix: str, ploidy: int, haploid_chromosome_count: int - ): - """ - Simulate a genome with given ploidy and haploid_chromosome_count. Example: - - >>> print(Genome("t", "pf", 2, 3)) - test: pf-chr01_a,pf-chr01_b,pf-chr02_a,pf-chr02_b,pf-chr03_a,pf-chr03_b - """ - self.name = name - chromosomes = [] - for i in range(haploid_chromosome_count): - chromosomes += [ - f"{prefix}-chr{i + 1:02d}_{chr(ord('a') + j)}" for j in range(ploidy) - ] - self.chromosomes = chromosomes - - def __len__(self): - return len(self.chromosomes) - - @classmethod - def make(cls, name: str, chromosomes: List[str]): - genome = Genome(name, "", 0, 0) - genome.chromosomes = chromosomes - return genome - - @property - def gamete(self): - """Randomly generate a gamete from current genome that""" - self.chromosomes.sort() - gamete_chromosomes = [] - - # Check for any chromosome that have 2 identical copies, if so, we will assume disomic - # inheritance for that chromosome and always keep one and only copy - duplicate_chromosomes = [] - singleton_chromosomes = [] - for chromosome, chromosomes in groupby(self.chromosomes): - chromosomes = list(chromosomes) - ncopies = len(chromosomes) - duplicate_chromosomes += [chromosome] * (ncopies // 2) - if ncopies % 2 == 1: - singleton_chromosomes.append(chromosome) - - # Get one copy of each duplicate chromosome first - gamete_chromosomes += duplicate_chromosomes - - def prefix(x): - return x.split("_", 1)[0] - - # Randomly assign the rest, singleton chromosomes - for _, chromosomes in groupby(singleton_chromosomes, key=prefix): - chromosomes = list(chromosomes) - halfn = len(chromosomes) // 2 - # Odd number, e.g. 5, equal chance to be 2 or 3 - if len(chromosomes) % 2 != 0 and random() < 0.5: - halfn += 1 - gamete_chromosomes += sorted(sample(chromosomes, halfn)) - return Genome.make(self.name + " gamete", gamete_chromosomes) - - def mate_nplusn(self, name: str, other_genome: "Genome", verbose: bool = True): - if verbose: - print( - f"Crossing '{self.name}' x '{other_genome.name}' (n+n)", file=sys.stderr - ) - f1_chromosomes = sorted( - self.gamete.chromosomes + other_genome.gamete.chromosomes - ) - return Genome.make(name, f1_chromosomes) - - def mate_nx2plusn(self, name: str, other_genome: "Genome", verbose: bool = True): - if verbose: - print( - f"Crossing '{self.name}' x '{other_genome.name}' (2xn+n)", - file=sys.stderr, - ) - f1_chromosomes = sorted( - 2 * self.gamete.chromosomes + other_genome.gamete.chromosomes - ) - return Genome.make(name, f1_chromosomes) - - def mate_2nplusn_FDR(self, name: str, other_genome: "Genome", verbose: bool = True): - if verbose: - print( - f"Crossing '{self.name}' x '{other_genome.name}' (2n+n_FDR)", - file=sys.stderr, - ) - f1_chromosomes = sorted(self.chromosomes + other_genome.gamete.chromosomes) - return Genome.make(name, f1_chromosomes) - - def mate_2nplusn_SDR(self, name: str, other_genome: "Genome", verbose: bool = True): - if verbose: - print( - f"Crossing '{self.name}' x '{other_genome.name}' (2n+n_SDR)", - file=sys.stderr, - ) - raise NotImplementedError("2n+n_SDR not yet supported") - - def __str__(self): - return self.name + ": " + ",".join(self.chromosomes) - - @property - def summary(self): - def prefix(x, sep="-"): - return x.split(sep, 1)[0] - - def size(chromosomes): - return sum(ChrSizes[prefix(x, sep="_")] for x in chromosomes) - - # Chromosome count - total_count = 0 - total_unique = 0 - total_size = 0 - total_so_size = 0 - ans = [] - for group, chromosomes in groupby(self.chromosomes, prefix): - chromosomes = list(chromosomes) - uniq_chromosomes = set(chromosomes) - group_count = len(chromosomes) - group_unique = len(uniq_chromosomes) - group_so_size = size({x for x in uniq_chromosomes if x[:2] == "SO"}) - group_size = size(uniq_chromosomes) - total_count += group_count - total_unique += group_unique - total_so_size += group_so_size - total_size += group_size - ans.append((group, group_count, group_unique, group_so_size, group_size)) - ans.append(("Total", total_count, total_unique, total_so_size, total_size)) - return ans - - def print_summary(self): - print("[SUMMARY]") - for group, group_count, group_unique in self.summary: - print(f"{group}: count={group_count}, unique={group_unique}") - - -class GenomeSummary: - def __init__(self, SO_data, SS_data, percent_SO_data): - self.SO_data = SO_data - self.SS_data = SS_data - self.percent_SO_data = percent_SO_data - self.percent_SS_data = [100 - x for x in percent_SO_data] - - def _summary(self, a, tag, precision=0): - mean, mn, mx = ( - round(np.mean(a), precision), - round(np.min(a), precision), - round(np.max(a), precision), - ) - s = f"*{tag}* chr: {mean:.0f}" - if mn == mean and mx == mean: - return s - return s + f" ({mn:.0f}-{mx:.0f})" - - def _percent_summary(self, a, tag, precision=1): - mean, mn, mx = ( - round(np.mean(a), precision), - round(np.min(a), precision), - round(np.max(a), precision), - ) - s = f"*{tag}*%: {mean:.1f}%" - print(s) - if mn == mean and mx == mean: - return s - return s + f" ({mn:.1f}-{mx:.1f}%)" - - @property - def percent_SO_summary(self): - return self._percent_summary(self.percent_SO_data, "So") - - @property - def percent_SS_summary(self): - return self._percent_summary(self.percent_SS_data, "Ss") - - @property - def SO_summary(self): - return self._summary(self.SO_data, "So") - - @property - def SS_summary(self): - return self._summary(self.SS_data, "Ss") - - -def simulate_F1(SO: Genome, SS: Genome, mode: CrossMode, verbose: bool = False): - if mode == CrossMode.nx2plusn: - SO_SS_F1 = SO.mate_nx2plusn("SOxSS F1", SS, verbose=verbose) - elif mode == CrossMode.twoplusnFDR: - SO_SS_F1 = SO.mate_2nplusn_FDR("SOxSS F1", SS, verbose=verbose) - elif mode == CrossMode.twoplusnSDR: - SO_SS_F1 = SO.mate_2nplusn_SDR("SOxSS F1", SS, verbose=verbose) - if verbose: - SO_SS_F1.print_summary() - return SO_SS_F1 - - -def simulate_F2(SO: Genome, SS: Genome, mode: CrossMode, verbose: bool = False): - SO_SS_F1 = simulate_F1(SO, SS, mode=mode, verbose=verbose) - SO_SS_F2_nplusn = SO_SS_F1.mate_nplusn("SOxSS F2", SO_SS_F1, verbose=verbose) - if verbose: - SO_SS_F2_nplusn.print_summary() - return SO_SS_F2_nplusn - - -def simulate_F1intercross(SO: Genome, SS: Genome, mode: CrossMode, verbose=False): - SO_SS_F1_1 = simulate_F1(SO, SS, mode=mode, verbose=verbose) - SO_SS_F1_2 = simulate_F1(SO, SS, mode=mode, verbose=verbose) - SO_SS_F1intercross_nplusn = SO_SS_F1_1.mate_nplusn( - "SOxSS F1 intercross", SO_SS_F1_2, verbose=verbose - ) - return SO_SS_F1intercross_nplusn - - -def simulate_BCn(n: int, SO: Genome, SS: Genome, mode: CrossMode, verbose=False): - SS_SO_F1 = simulate_F1(SO, SS, mode=mode, verbose=verbose) - SS_SO_BC1, SS_SO_BC2_nplusn, SS_SO_BC3_nplusn, SS_SO_BC4_nplusn = ( - None, - None, - None, - None, - ) - # BC1 - if n >= 1: - if mode == CrossMode.nx2plusn: - SS_SO_BC1 = SO.mate_nx2plusn("SSxSO BC1", SS_SO_F1, verbose=verbose) - elif mode == CrossMode.twoplusnFDR: - SS_SO_BC1 = SO.mate_2nplusn_FDR("SSxSO BC1", SS_SO_F1, verbose=verbose) - elif mode == CrossMode.twoplusnSDR: - SS_SO_BC1 = SO.mate_2nplusn_SDR("SSxSO BC1", SS_SO_F1, verbose=verbose) - # BC2 - if n >= 2: - SS_SO_BC2_nplusn = SO.mate_nplusn("SSxSO BC2", SS_SO_BC1, verbose=verbose) - # BC3 - if n >= 3: - SS_SO_BC3_nplusn = SO.mate_nplusn( - "SSxSO BC3", SS_SO_BC2_nplusn, verbose=verbose - ) - # BC4 - if n >= 4: - SS_SO_BC4_nplusn = SO.mate_nplusn( - "SSxSO BC4", SS_SO_BC3_nplusn, verbose=verbose - ) - return [ - None, - SS_SO_BC1, - SS_SO_BC2_nplusn, - SS_SO_BC3_nplusn, - SS_SO_BC4_nplusn, - ][n] - - -def plot_summary(ax, samples: list[Genome]) -> GenomeSummary: - """Plot the distribution of chromosome numbers given simulated samples. - - Args: - ax (Axes): Matplotlib axes. - samples (list[Genome]): Summarized genomes. - - Returns: - GenomeSummary: Summary statistics of simulated genomes. - """ - SO_data = [] - SS_data = [] - percent_SO_data = [] - for s in samples: - summary = s.summary - try: - _, _, group_unique, _, _ = [x for x in summary if x[0] == "SO"][0] - except: - group_unique = 0 - SO_data.append(group_unique) - try: - _, _, group_unique, _, _ = [x for x in summary if x[0] == "SS"][0] - except: - group_unique = 0 - SS_data.append(group_unique) - total_tag, _, _, total_so_size, total_size = summary[-1] - assert total_tag == "Total" - percent_SO = total_so_size * 100.0 / total_size - percent_SO_data.append(percent_SO) - # Avoid overlapping bars - SS_counter, SO_counter = Counter(SS_data), Counter(SO_data) - overlaps = SS_counter.keys() & SO_counter.keys() - shift = 0.5 # used to offset bars a bit to avoid cluttering - if overlaps: - for overlap in overlaps: - logger.debug("Modify bar offsets at %s due to SS and SO overlaps", overlap) - SS_counter[overlap - shift] = SS_counter[overlap] - del SS_counter[overlap] - SO_counter[overlap + shift] = SO_counter[overlap] - del SO_counter[overlap] - - def modify_range_end(d: dict, value: int): - if value not in d: - return - # Has data at the range end, but no adjacent data points (i.e. isolated bar) - if value in d and (value - 1 in d or value + 1 in d): - return - logger.debug("Modify bar offsets at %d due to end of range ends", value) - d[value - shift if value else value + shift] = d[80] - del d[value] - - modify_range_end(SS_counter, 0) - modify_range_end(SS_counter, 80) - modify_range_end(SO_counter, 0) - modify_range_end(SO_counter, 80) - - x, y = zip(*sorted(SS_counter.items())) - ax.bar(np.array(x), y, color=SsColor, ec=SsColor) - x, y = zip(*sorted(SO_counter.items())) - ax.bar(np.array(x), y, color=SoColor, ec=SoColor) - ax.set_xlim(80, 0) - ax.set_ylim(0, len(samples) / 2) - ax.set_yticks([]) - summary = GenomeSummary(SO_data, SS_data, percent_SO_data) - - # Write the stats summary within the plot - summary_style = dict( - size=9, - ha="center", - va="center", - transform=ax.transAxes, - ) - ax.text(0.75, 0.85, markup(summary.SS_summary), color=SsColor, **summary_style) - ax.text( - 0.75, 0.65, markup(summary.percent_SS_summary), color=SsColor, **summary_style - ) - ax.text(0.25, 0.85, markup(summary.SO_summary), color=SoColor, **summary_style) - ax.text( - 0.25, 0.65, markup(summary.percent_SO_summary), color=SoColor, **summary_style - ) - - return summary - - -def write_chromosomes(genomes: list[Genome], filename: str): - """Write simulated chromosomes to file - - Args: - genomes (list[Genome]): List of simulated genomes. - filename (str): File path to write to. - """ - print(f"Write chromosomes to `{filename}`", file=sys.stderr) - with open(filename, "w", encoding="utf-8") as fw: - for genome in genomes: - print(genome, file=fw) - - -def write_SO_percent(summary: GenomeSummary, filename: str): - """Write SO % to file - - Args: - summary (GenomeSummary): List of simulated genomes. - filename (str): File path to write to. - """ - print(f"Write SO percent to `{filename}`", file=sys.stderr) - with open(filename, "w", encoding="utf-8") as fw: - print("\n".join(str(x) for x in sorted(summary.percent_SO_data)), file=fw) - - -def simulate(args): - """ - %prog simulate [2n+n_FDR|2n+n_SDR|nx2+n] - - Run simulation on female restitution. There are two modes: - - 2n+n_FDR: merger between a somatic and a germline - - 2n+n_SDR: merger between a recombined germline and a germline (not yet supported) - - nx2+n: merger between a doubled germline and a germline - - These two modes would impact the sequence diversity in the progeny - genome in F1, F2, BCn ... the goal of this simulation, is thus to - understand the mode and the spread of such diversity in the hybrid - progenies. - """ - sns.set_style("darkgrid") - - p = OptionParser(simulate.__doc__) - p.add_argument( - "--verbose", - default=False, - action="store_true", - help="Verbose logging during simulation", - ) - p.add_argument("-N", default=10000, type=int, help="Number of simulated samples") - opts, args, iopts = p.set_image_options(args, figsize="6x6") - if len(args) != 1: - sys.exit(not p.print_help()) - - (mode,) = args - mode = CrossMode(mode) - logger.info("Transmission: %s", mode) - - # Construct a composite figure with 6 tracks - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - rows = 6 - ypad = 0.05 - yinterval = (1 - 2 * ypad) / (rows + 1) - yy = 1 - ypad - xpad = 0.2 - xwidth = 0.7 - - # Axes are vertically stacked, and share x-axis - axes = [] - yy_positions = [] # Save yy positions so we can show details to the right laterr - for idx in range(rows): - yy_positions.append(yy) - yy -= yinterval - ax = fig.add_axes([xpad, yy, xwidth, yinterval * 0.85]) - if idx != rows - 1: - plt.setp(ax.get_xticklabels(), visible=False) - axes.append(ax) - ax1, ax2, ax3, ax4, ax5, ax6 = axes - - # Prepare the simulated data - # Simulate two parents - SS = Genome("SS", "SS", 10, 8) - SO = Genome("SO", "SO", 8, 10) - - verbose = opts.verbose - N = opts.N - all_F1s = [simulate_F1(SO, SS, mode=mode, verbose=verbose) for _ in range(N)] - all_F2s = [simulate_F2(SO, SS, mode=mode, verbose=verbose) for _ in range(N)] - all_BC1s = [simulate_BCn(1, SO, SS, mode=mode, verbose=verbose) for _ in range(N)] - all_BC2s = [simulate_BCn(2, SO, SS, mode=mode, verbose=verbose) for _ in range(N)] - all_BC3s = [simulate_BCn(3, SO, SS, mode=mode, verbose=verbose) for _ in range(N)] - all_BC4s = [simulate_BCn(4, SO, SS, mode=mode, verbose=verbose) for _ in range(N)] - - # Plotting - all_F1s_summary = plot_summary(ax1, all_F1s) - all_F2s_summary = plot_summary(ax2, all_F2s) - plot_summary(ax3, all_BC1s) - plot_summary(ax4, all_BC2s) - plot_summary(ax5, all_BC3s) - plot_summary(ax6, all_BC4s) - - # Show title to the left - xx = xpad / 2 - for (title, subtitle), yy in zip( - ( - (r"$\mathrm{F_1}$", None), - (r"$\mathrm{F_2}$", None), - (r"$\mathrm{BC_1}$", None), - (r"$\mathrm{BC_2}$", None), - (r"$\mathrm{BC_3}$", None), - (r"$\mathrm{BC_4}$", None), - ), - yy_positions, - ): - if subtitle: - yy -= 0.06 - else: - yy -= 0.07 - root.text( - xx, - yy, - title, - color="darkslategray", - ha="center", - va="center", - fontweight="semibold", - ) - if subtitle: - yy -= 0.02 - root.text( - xx, yy, subtitle, color="lightslategray", ha="center", va="center" - ) - - axes[-1].set_xlabel("Number of unique chromosomes") - adjust_spines(axes[-1], ["bottom"], outward=True) - normalize_axes(root) - - # Title - if mode == CrossMode.nx2plusn: - mode_title = r"$n_1\times2 + n_2$" - elif mode == CrossMode.twoplusnFDR: - mode_title = r"$2n + n$ (FDR)" - elif mode == CrossMode.twoplusnSDR: - mode_title = r"$2n + n$ (SDR)" - root.text(0.5, 0.95, f"Transmission: {mode_title}", ha="center") - - savefig(f"{mode}.pdf", dpi=120) - - outdir = f"simulations_{mode}" - mkdir(outdir) - # Write chromosomes to disk - for genomes, filename in ( - (all_F1s, "all_F1s"), - (all_F2s, "all_F2s"), - (all_BC1s, "all_BC1s"), - (all_BC2s, "all_BC2s"), - (all_BC3s, "all_BC3s"), - (all_BC4s, "all_BC4s"), - ): - write_chromosomes(genomes, op.join(outdir, filename)) - - # Write the SO percent in simulated samples so that we can compute P-value - for summary, SO_percent_filename in ( - (all_F1s_summary, "all_F1s_SO_percent"), - (all_F2s_summary, "all_F2s_SO_percent"), - ): - write_SO_percent(summary, op.join(outdir, SO_percent_filename)) - - -def _get_sizes(filename, prefix_length, tag, target_size=None): - """Returns a dictionary of chromome lengths from a given file. - - Args: - filename ([str]): Path to the input file. Input file is 2-column file - with rows `seqid length`. - prefix_length (int): Extract first N characters. - tag (str): Prepend `tag-` to the seqid. - target_size (int): Expected genome size. Defaults to None. - """ - sizes_list = defaultdict(list) - with open(filename, encoding="utf-8") as fp: - for row in fp: - if not row.startswith("Chr"): - continue - name, size = row.split() - idx = int(name[3:prefix_length]) - size = int(size) - name = f"{tag}-chr{idx:02d}" - sizes_list[name].append(size) - - # Get the average length - sizes = dict( - (name, int(round(np.mean(size_list)))) for name, size_list in sizes_list.items() - ) - print(sizes) - if target_size is None: - return sizes - - total_size = sum(sizes.values()) - correction_factor = target_size / total_size - print( - f"{tag} total:{total_size} target:{target_size} correction:{correction_factor:.2f}x" - ) - return dict( - (name, int(round(correction_factor * size))) for name, size in sizes.items() - ) - - -def prepare(args): - """ - %prog SoChrLen.txt SsChrLen.txt - - Calculate lengths from real sugarcane data. - """ - p = OptionParser(prepare.__doc__) - _, args = p.parse_args(args) - if len(args) != 2: - sys.exit(not p.print_help()) - - solist, sslist = args - # The haploid set of LA Purple is 957.2 Mb and haploid set of US56-14-4 is 732.5 Mb - sizes = _get_sizes(solist, 5, "SO", target_size=int(957.2 * 1e6)) - sizes.update(_get_sizes(sslist, 4, "SS", target_size=int(732.5 * 1e6))) - print(sizes) - - -def get_genome_wide_pct(summary: str) -> Dict[tuple, list]: - """Collect genome-wide ungapped percent identity. - Specifically, from file `SS_SR_SO.summary.txt`. - - Args: - summary (str): File that contains per chromosome pct identity info, - collected via `formats.blast.summary()`. - - Returns: - Dict[tuple, list]: Genome pair to list of pct identities. - """ - COLUMNS = "filename, identicals, qry_gapless, qry_gapless_pct, ref_gapless, ref_gapless_pct, qryspan, pct_qryspan, refspan, pct_refspan".split( - ", " - ) - df = pd.read_csv(summary, sep="\t", names=COLUMNS) - data_by_genomes = defaultdict(list) - for _, row in df.iterrows(): - filename = row["filename"] - # e.g. SO_Chr01A.SO_Chr01B.1-1.blast - chr1, chr2 = filename.split(".")[:2] - genome1, chr1 = chr1.split("_") - genome2, chr2 = chr2.split("_") - chr1, chr2 = chr1[:5], chr2[:5] - if ( # Special casing for SS certain chromosomes that are non-collinear with SO/SR - genome1 != "SS" - and genome2 == "SS" - and chr2 not in ("Chr01", "Chr03", "Chr04") - ): - continue - qry_gapless_pct, ref_gapless_pct = ( - row["qry_gapless_pct"], - row["ref_gapless_pct"], - ) - data_by_genomes[(genome1, genome2)] += [qry_gapless_pct, ref_gapless_pct] - return data_by_genomes - - -def get_anchors_pct(filename: str, min_pct: int = 94) -> list: - """Collect CDS-wide ungapped percent identity. - - Args: - filename (str): Input file name, which is a LAST file. - - Returns: - list: List of pct identities from this LAST file. - """ - blast = Blast(filename) - pct = [] - for c in blast: - if c.pctid < min_pct: - continue - identicals = c.hitlen - c.nmismatch - c.ngaps - qstart, qstop = c.qstart, c.qstop - sstart, sstop = c.sstart, c.sstop - qrycovered = qstop - qstart + 1 - refcovered = sstop - sstart + 1 - pct.append(identicals * 100 / qrycovered) - pct.append(identicals * 100 / refcovered) - return pct - - -def divergence(args): - """ - %prog divergence SS_SR_SO.summary.txt - - Plot divergence between and within SS/SR/SO genomes. - """ - sns.set_style("white") - - p = OptionParser(divergence.__doc__) - p.add_argument("--title", default="Gapless", help="Plot title") - p.add_argument( - "--xmin", - default=94, - type=int, - help="Minimum percent identity in the histogram", - ) - opts, args, iopts = p.set_image_options(args, figsize="8x8") - if len(args) != 1: - sys.exit(not p.print_help()) - - (summary,) = args - data_by_genomes = get_genome_wide_pct(summary) - # Print summary statistics - print("Genome-wide ungapped percent identity:") - for (genome1, genome2), pct in sorted(data_by_genomes.items()): - print(genome1, genome2, np.mean(pct), np.std(pct)) - - # Plotting genome-wide divergence - fig = plt.figure(figsize=(iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - SPECIES_CONFIG = { - "SS": {"label": "S. spontaneum", "pos": (0.5, 0.67)}, - "SR": {"label": "S. robustum", "pos": (0.2, 0.3)}, - "SO": {"label": "S. officinarum", "pos": (0.8, 0.3)}, - } - # Get median for each genome pair - medians = {} - for g1, g2 in product(SPECIES_CONFIG.keys(), repeat=2): - g1, g2 = sorted((g1, g2)) - d = data_by_genomes[(g1, g2)] - medians[(g1, g2)] = np.median(d) - for g, config in SPECIES_CONFIG.items(): - x, y = config["pos"] - text = f'*{config["label"]}*' + f"\n{medians[(g, g)]:.1f} %" - text = markup(text) - root.text(x, y, text, color="darkslategray", ha="center", va="center") - - # Connect lines - PAD, YPAD = 0.09, 0.045 - for g1, g2 in combinations(SPECIES_CONFIG.keys(), 2): - g1, g2 = sorted((g1, g2)) - x1, y1 = SPECIES_CONFIG[g1]["pos"] - x2, y2 = SPECIES_CONFIG[g2]["pos"] - x1, x2 = (x1 + PAD, x2 - PAD) if x1 < x2 else (x1 - PAD, x2 + PAD) - if y1 != y2: - y1, y2 = (y1 + YPAD, y2 - YPAD) if y1 < y2 else (y1 - YPAD, y2 + YPAD) - xmid, ymid = (x1 + x2) / 2, (y1 + y2) / 2 - text = f"{medians[(g1, g2)]:.1f} %" - text = markup(text) - root.text(xmid, ymid, text, ha="center", va="center", backgroundcolor="w") - root.plot([x1, x2], [y1, y2], "-", lw=4, color="darkslategray") - - # Pct identity histograms - PCT_CONFIG = { - ("SS", "SS"): {"pos": (0.5, 0.82)}, - ("SR", "SR"): {"pos": (0.1, 0.2)}, - ("SO", "SO"): {"pos": (0.9, 0.2)}, - ("SR", "SS"): {"pos": (0.3 - PAD, 0.55)}, - ("SO", "SS"): {"pos": (0.7 + PAD, 0.55)}, - ("SO", "SR"): {"pos": (0.5, 0.18)}, - } - HIST_WIDTH = 0.15 - xmin = opts.xmin - for genome_pair, config in PCT_CONFIG.items(): - x, y = config["pos"] - ax = fig.add_axes( - [x - HIST_WIDTH / 2, y - HIST_WIDTH / 2, HIST_WIDTH, HIST_WIDTH] - ) - d = data_by_genomes[genome_pair] - binwidth = (100 - xmin) / 20 - sns.histplot(d, ax=ax, binwidth=binwidth, kde=False) - ax.set_xlim(xmin, 100) - ax.get_yaxis().set_visible(False) - ax.set_xticks([xmin, 100]) - adjust_spines(ax, ["bottom"], outward=True) - ax.spines["bottom"].set_color("lightslategray") - - title = opts.title - italic_title = markup(f"*{title}*") - root.text( - 0.5, - 0.95, - f"{italic_title} identities between and within SS/SR/SO genomes", - size=14, - ha="center", - va="center", - ) - normalize_axes(root) - image_name = f"SO_SR_SS.{title}." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def main(): - - actions = ( - ("prepare", "Calculate lengths from real sugarcane data"), - ("simulate", "Run simulation on female restitution"), - # Plotting scripts to illustrate divergence between and within genomes - ("divergence", "Plot divergence between and within SS/SR/SO genomes"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/synfind.py b/jcvi/projects/synfind.py deleted file mode 100644 index 57d76646..00000000 --- a/jcvi/projects/synfind.py +++ /dev/null @@ -1,860 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -SynFind analyses and visualization. -""" -import os.path as op -import sys - -from collections import defaultdict -from copy import deepcopy -from itertools import groupby - -from ..apps.base import ActionDispatcher, OptionParser, logger, mkdir, symlink -from ..apps.grid import MakeManager -from ..formats.base import get_number, must_open -from ..formats.bed import Bed -from ..formats.blast import BlastLine -from ..formats.gff import Gff, load -from ..graphics.base import ( - FancyArrow, - plt, - savefig, - panel_labels, - markup, - normalize_axes, - latex, -) -from ..graphics.glyph import CartoonRegion, RoundRect -from ..utils.cbook import SummaryStats, gene_name, percentage -from ..utils.grouper import Grouper - - -def main(): - - actions = ( - ("cartoon", "generate cartoon illustration of SynFind"), - ("ecoli", "gene presence absence analysis in ecoli"), - ("grass", "validate SynFind pan-grass set against James"), - ("coge", "prepare coge datasets"), - # For benchmarking - ("synfind", "prepare input for SynFind"), - ("iadhore", "prepare input for iADHoRe"), - ("mcscanx", "prepare input for MCScanX"), - ("cyntenator", "prepare input for Cyntenator"), - ("athalianatruth", "prepare truth pairs for At alpha/beta/gamma"), - ("yeasttruth", "prepare truth pairs for 14 yeasts"), - ("grasstruth", "prepare truth pairs for 4 grasses"), - ("benchmark", "compare SynFind, MCScanX, iADHoRe and OrthoFinder"), - ("venn", "display benchmark results as Venn diagram"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def grasstruth(args): - """ - %prog grasstruth james-pan-grass.txt - - Prepare truth pairs for 4 grasses. - """ - p = OptionParser(grasstruth.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (james,) = args - fp = open(james) - pairs = set() - for row in fp: - atoms = row.split() - genes = [] - idx = {} - for i, a in enumerate(atoms): - aa = a.split("||") - for ma in aa: - idx[ma] = i - genes.extend(aa) - genes = [x for x in genes if ":" not in x] - Os = [x for x in genes if x.startswith("Os")] - for o in Os: - for g in genes: - if idx[o] == idx[g]: - continue - pairs.add(tuple(sorted((o, g)))) - - for a, b in sorted(pairs): - print("\t".join((a, b))) - - -def synfind(args): - """ - %prog synfind all.last *.bed - - Prepare input for SynFind. - """ - p = OptionParser(synfind.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - lastfile = args[0] - bedfiles = args[1:] - fp = open(lastfile) - filteredlast = lastfile + ".filtered" - fw = open(filteredlast, "w") - for row in fp: - b = BlastLine(row) - if b.query == b.subject: - continue - print(b, file=fw) - fw.close() - logger.debug("Filtered LAST file written to `{0}`".format(filteredlast)) - - allbed = "all.bed" - fw = open(allbed, "w") - for i, bedfile in enumerate(bedfiles): - prefix = chr(ord("A") + i) - bed = Bed(bedfile) - for b in bed: - b.seqid = prefix + b.seqid - print(b, file=fw) - fw.close() - logger.debug("Bed file written to `{0}`".format(allbed)) - - -def yeasttruth(args): - """ - %prog yeasttruth Pillars.tab *.gff - - Prepare pairs data for 14 yeasts. - """ - p = OptionParser(yeasttruth.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - pillars = args[0] - gffiles = args[1:] - aliases = {} - pivot = {} - for gffile in gffiles: - is_pivot = op.basename(gffile).startswith("Saccharomyces_cerevisiae") - gff = Gff(gffile) - for g in gff: - if g.type != "gene": - continue - for a in g.attributes["Alias"]: - aliases[a] = g.accn - if is_pivot: - pivot[a] = g.accn - logger.debug("Aliases imported: {0}".format(len(aliases))) - logger.debug("Pivot imported: {0}".format(len(pivot))) - fw = open("yeast.aliases", "w") - for k, v in sorted(aliases.items()): - print("\t".join((k, v)), file=fw) - fw.close() - - fp = open(pillars) - pairs = set() - fw = must_open(opts.outfile, "w") - for row in fp: - atoms = [x for x in row.split() if x != "---"] - pps = [pivot[x] for x in atoms if x in pivot] - atoms = [aliases[x] for x in atoms if x in aliases] - for p in pps: - for a in atoms: - if p == a: - continue - pairs.add(tuple(sorted((p, a)))) - - for a, b in sorted(pairs): - print("\t".join((a, b)), file=fw) - fw.close() - - -def venn(args): - """ - %prog venn *.benchmark - - Display benchmark results as Venn diagram. - """ - from matplotlib_venn import venn2 - - p = OptionParser(venn.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="9x9") - - if len(args) < 1: - sys.exit(not p.print_help()) - - bcs = args - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - pad = 0.02 - ystart = 1 - ywidth = 1.0 / len(bcs) - tags = ("Bowers", "YGOB", "Schnable") - for bc, tag in zip(bcs, tags): - fp = open(bc) - data = [] - for row in fp: - prog, pcounts, tcounts, shared = row.split() - pcounts = int(pcounts) - tcounts = int(tcounts) - shared = int(shared) - data.append((prog, pcounts, tcounts, shared)) - xstart = 0 - xwidth = 1.0 / len(data) - for prog, pcounts, tcounts, shared in data: - a, b, c = pcounts - shared, tcounts - shared, shared - ax = fig.add_axes( - [ - xstart + pad, - ystart - ywidth + pad, - xwidth - 2 * pad, - ywidth - 2 * pad, - ] - ) - venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) - message = "Sn={0} Pu={1}".format( - percentage(shared, tcounts, precision=0, mode=-1), - percentage(shared, pcounts, precision=0, mode=-1), - ) - print(message, file=sys.stderr) - ax.text( - 0.5, - 0.92, - latex(message), - ha="center", - va="center", - transform=ax.transAxes, - color="b", - ) - ax.set_axis_off() - xstart += xwidth - ystart -= ywidth - - panel_labels( - root, - ((0.04, 0.96, "A"), (0.04, 0.96 - ywidth, "B"), (0.04, 0.96 - 2 * ywidth, "C")), - ) - panel_labels( - root, - ( - (0.5, 0.98, "A. thaliana duplicates"), - (0.5, 0.98 - ywidth, "14 Yeast genomes"), - (0.5, 0.98 - 2 * ywidth, "4 Grass genomes"), - ), - ) - normalize_axes(root) - savefig("venn.pdf", dpi=opts.dpi) - - -def coge(args): - """ - %prog coge *.gff - - Prepare coge datasets. - """ - p = OptionParser(coge.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - gffs = args - for gff in gffs: - atoms = op.basename(gff).split(".") - gid = atoms[-2] - assert gid.startswith("gid") - gid = get_number(gid) - genomefasta = "genome_{0}.faa.fasta".format(gid) - species = "_".join(atoms[0].split("_")[:2]) - cdsfasta = species + ".cds.fasta" - load( - [ - gff, - genomefasta, - "--id_attribute=Parent", - "--outfile={0}".format(cdsfasta), - ] - ) - - -def calc_sensitivity_specificity(a, truth, tag, fw): - common = a & truth - sensitivity = len(common) * 100.0 / len(truth) - specificity = len(common) * 100.0 / len(a) - logger.debug("{0}: {1} pairs".format(tag, len(a))) - logger.debug( - "{0}: Sensitivity={1:.1f}% Purity={2:.1f}%".format( - tag, sensitivity, specificity - ) - ) - print(tag, len(a), len(truth), len(common), file=fw) - - -def write_pairs(pairs, pairsfile): - fz = open(pairsfile, "w") - for a, b in pairs: - print("\t".join((a, b)), file=fz) - fz.close() - - -def benchmark(args): - """ - %prog benchmark at bedfile - - Compare SynFind, MCScanx, iADHoRe and OrthoFinder against the truth. - """ - p = OptionParser(benchmark.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - pf, bedfile = args - truth = pf + ".truth" - synfind = pf + ".synfind" - mcscanx = pf + ".mcscanx" - iadhore = pf + ".iadhore" - orthofinder = pf + ".orthofinder" - pivots = set([x.accn for x in Bed(bedfile)]) - - fp = open(truth) - truth = set() - for row in fp: - a, b = row.strip().split("\t")[:2] - pivots.add(a) - truth.add(tuple(sorted((a, b)))) - logger.debug("Truth: {0} pairs".format(len(truth))) - - fp = open(synfind) - benchmarkfile = pf + ".benchmark" - fw = must_open(benchmarkfile, "w") - synfind = set() - for row in fp: - atoms = row.strip().split("\t") - query, hit, tag = atoms[:3] - if tag != "S": - continue - synfind.add(tuple(sorted((query, hit)))) - calc_sensitivity_specificity(synfind, truth, "SynFind", fw) - - fp = open(mcscanx) - mcscanx = set() - for row in fp: - if row[0] == "#": - continue - atoms = row.strip().split(":")[1].split() - query, hit = atoms[:2] - mcscanx.add(tuple(sorted((query, hit)))) - calc_sensitivity_specificity(mcscanx, truth, "MCScanX", fw) - - fp = open(iadhore) - iadhore = set() - next(fp) - for row in fp: - atoms = row.strip().split("\t") - query, hit = atoms[3:5] - iadhore.add(tuple(sorted((query, hit)))) - calc_sensitivity_specificity(iadhore, truth, "iADHoRe", fw) - - fp = open(orthofinder) - orthofinder = set() - next(fp) - for row in fp: - row = row.replace('"', "") - atoms = row.replace(",", " ").split() - genes = [x.strip() for x in atoms if not x.startswith("OG")] - genes = [gene_name(x) for x in genes] - pps = [x for x in genes if x in pivots] - for p in pps: - for g in genes: - if p == g: - continue - orthofinder.add(tuple(sorted((p, g)))) - # write_pairs(orthofinder, "orthofinder.pairs") - calc_sensitivity_specificity(orthofinder, truth, "OrthoFinder", fw) - fw.close() - - -def write_lst(bedfile): - pf = op.basename(bedfile).split(".")[0] - mkdir(pf) - bed = Bed(bedfile) - stanza = [] - for seqid, bs in bed.sub_beds(): - fname = op.join(pf, "{0}.lst".format(seqid)) - fw = open(fname, "w") - for b in bs: - print("{0}{1}".format(b.accn.replace(" ", ""), b.strand), file=fw) - stanza.append((seqid, fname)) - fw.close() - return pf, stanza - - -def write_txt(bedfile): - pf = op.basename(bedfile).split(".")[0][:20] - txtfile = pf + ".txt" - fw = open(txtfile, "w") - print("#genome", file=fw) - bed = Bed(bedfile) - for b in bed: - print( - " ".join(str(x) for x in (b.accn, b.seqid, b.start, b.end, b.strand)), - file=fw, - ) - fw.close() - return txtfile - - -def cyntenator(args): - """ - %prog cyntenator athaliana.athaliana.last athaliana.bed - - Prepare input for Cyntenator. - """ - p = OptionParser(cyntenator.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - lastfile = args[0] - fp = open(lastfile) - filteredlastfile = lastfile + ".blast" - fw = open(filteredlastfile, "w") - for row in fp: - b = BlastLine(row) - if b.query == b.subject: - continue - print("\t".join((b.query, b.subject, str(b.score))), file=fw) - fw.close() - - bedfiles = args[1:] - fp = open(lastfile) - b = BlastLine(next(fp)) - subject = b.subject - txtfiles = [] - for bedfile in bedfiles: - order = Bed(bedfile).order - if subject in order: - db = op.basename(bedfile).split(".")[0][:20] - logger.debug("Found db: {0}".format(db)) - txtfile = write_txt(bedfile) - txtfiles.append(txtfile) - - db += ".txt" - mm = MakeManager() - for txtfile in txtfiles: - outfile = txtfile + ".alignment" - cmd = 'cyntenator -t "({0} {1})" -h blast {2} > {3}'.format( - txtfile, db, filteredlastfile, outfile - ) - mm.add((txtfile, db, filteredlastfile), outfile, cmd) - mm.write() - - -def iadhore(args): - """ - %prog iadhore athaliana.athaliana.last athaliana.bed - - Wrap around iADHoRe. - """ - p = OptionParser(iadhore.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - lastfile = args[0] - bedfiles = args[1:] - blast_table = "blast_table.txt" - fp = open(lastfile) - seen = set() - for row in fp: - c = BlastLine(row) - a, b = c.query, c.subject - a, b = gene_name(a), gene_name(b) - if a > b: - a, b = b, a - seen.add((a, b)) - - fw = open(blast_table, "w") - for a, b in seen: - print("\t".join((a, b)), file=fw) - fw.close() - logger.debug("A total of {0} pairs written to `{1}`".format(len(seen), blast_table)) - - fw = open("config.txt", "w") - for bedfile in bedfiles: - pf, stanza = write_lst(bedfile) - print("genome={0}".format(pf), file=fw) - for seqid, fname in stanza: - print(" ".join((seqid, fname)), file=fw) - print(file=fw) - - print("blast_table={0}".format(blast_table), file=fw) - print("cluster_type=colinear", file=fw) - print("tandem_gap=10", file=fw) - print("prob_cutoff=0.001", file=fw) - print("gap_size=20", file=fw) - print("cluster_gap=20", file=fw) - print("q_value=0.9", file=fw) - print("anchor_points=4", file=fw) - print("alignment_method=gg2", file=fw) - print("max_gaps_in_alignment=20", file=fw) - print("output_path=i-adhore_out", file=fw) - print("number_of_threads=4", file=fw) - fw.close() - - -def extract_groups(g, pairs, txtfile): - register = defaultdict(list) - fp = open(txtfile) - next(fp) - for row in fp: - if row[0] != ">": - continue - track, atg, myname, pairname = row.split() - pairname = pairname.rstrip("ab").upper() - register[pairname].append(atg.upper()) - - for pairname, genes in register.items(): - tag = pairname[0] - tag = {"A": "alpha", "B": "beta", "C": "gamma", "S": "others"}[tag] - pairs.add(tuple(sorted(genes) + [tag])) - g.join(*genes) - - -def athalianatruth(args): - """ - %prog athalianatruth J_a.txt J_bc.txt - - Prepare pairs data for At alpha/beta/gamma. - """ - p = OptionParser(athalianatruth.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - atxt, bctxt = args - g = Grouper() - pairs = set() - for txt in (atxt, bctxt): - extract_groups(g, pairs, txt) - - fw = open("pairs", "w") - for pair in sorted(pairs): - print("\t".join(pair), file=fw) - fw.close() - - fw = open("groups", "w") - for group in list(g): - print(",".join(group), file=fw) - fw.close() - - -def make_gff(bed, prefix, fw): - bed = Bed(bed) - nfeats = 0 - for b in bed: - seqid = prefix + b.seqid - print("\t".join(str(x) for x in (seqid, b.accn, b.start, b.end)), file=fw) - nfeats += 1 - logger.debug("A total of {0} features converted to `{1}`".format(nfeats, fw.name)) - - -def mcscanx(args): - """ - %prog mcscanx athaliana.athaliana.last athaliana.bed - - Wrap around MCScanX. - """ - p = OptionParser(mcscanx.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - blastfile = args[0] - bedfiles = args[1:] - prefix = "_".join(op.basename(x)[:2] for x in bedfiles) - symlink(blastfile, prefix + ".blast") - allbedfile = prefix + ".gff" - fw = open(allbedfile, "w") - for i, bedfile in enumerate(bedfiles): - prefix = chr(ord("A") + i) - make_gff(bedfile, prefix, fw) - fw.close() - - -def grass(args): - """ - %prog grass coge_master_table.txt james.txt - - Validate SynFind pan-grass set against James. This set can be generated: - - https://genomevolution.org/r/fhak - """ - p = OptionParser(grass.__doc__) - p.set_verbose() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - master, james = args - - fp = open(master) - next(fp) - master_store = defaultdict(set) - for row in fp: - atoms = row.split() - s = set() - for x in atoms[1:6]: - m = x.split(",") - s |= set(m) - if "-" in s: - s.remove("-") - - a = atoms[1] - master_store[a] |= set(s) - - fp = open(james) - next(fp) - james_store = {} - tandems = set() - for row in fp: - atoms = row.split() - s = set() - Os = set() - for x in atoms[:-1]: - m = x.split("||") - if m[0].startswith("Os"): - Os |= set(m) - if m[0].startswith("http"): - continue - if m[0].startswith("chr"): - m = ["proxy"] - if "||" in x: - tandems |= set(m) - s |= set(m) - - for x in Os: - james_store[x] = s - - jaccards = [] - corr_jaccards = [] - perfect_matches = 0 - corr_perfect_matches = 0 - for k, v in james_store.items(): - if k not in master_store: - continue - m = master_store[k] - jaccard = len(v & m) * 100 / len(v | m) - jaccards.append(jaccard) - diff = (v ^ m) - tandems - corr_jaccard = 100 - len(diff) * 100 / len(v | m) - corr_jaccards.append(corr_jaccard) - if opts.verbose: - print(k) - print(v) - print(m) - print(diff) - print(jaccard) - if jaccard > 99: - perfect_matches += 1 - if corr_jaccard > 99: - corr_perfect_matches += 1 - - logger.debug("Perfect matches: {0}".format(perfect_matches)) - logger.debug("Perfect matches (corrected): {0}".format(corr_perfect_matches)) - print("Jaccards:", SummaryStats(jaccards)) - print("Corrected Jaccards:", SummaryStats(corr_jaccards)) - - -def ecoli(args): - """ - %prog ecoli coge_master_table.txt query.bed - - Perform gene presence / absence analysis in Ecoli master spreadsheet. Ecoli - spresheets can be downloaded below: - - Ecoli K12 MG1655 (K) as query - Regenerate this analysis: https://genomevolution.org/r/fggo - - Ecoli O157:H7 EDL933 (O) as query - Regenerate this analysis: https://genomevolution.org/r/fgt7 - - Shigella flexneri 2a 301 (S) as query - Regenerate this analysis: https://genomevolution.org/r/fgte - - Perform a similar analysis as in: - Jin et al. (2002) Genome sequence of Shigella flexneri 2a: insights - into pathogenicity through comparison with genomes of Escherichia - coli K12 and O157. Nucleic Acid Research. - """ - p = OptionParser(ecoli.__doc__) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - master, querybed = args - fp = open(master) - header = next(fp) - assert header[0] == "#" - qorg = header.strip().split("\t")[1] - qorg = qorg.split(":")[-1].strip() - - store = {} - MISSING = ("proxy", "-") - for row in fp: - a, b, c = row.strip().split("\t")[1:4] - store[a] = b in MISSING and c in MISSING - - bed = Bed(querybed) - tags = [] - for i, b in enumerate(bed): - accn = b.accn - if accn not in store: - logger.warning("missing {0}".format(accn)) - continue - tags.append((store[accn], accn)) - - large = 4 # large segments - II = [] - II_large = [] - for missing, aa in groupby(tags, key=lambda x: x[0]): - aa = list(aa) - if not missing: - continue - glist = list(a for missing, a in aa) - II.append(glist) - size = len(glist) - if size >= large: - II_large.append(glist) - - fw = must_open(opts.outfile, "w") - for a, t in zip((II, II_large), ("", ">=4 ")): - nmissing = sum(len(x) for x in a) - logger.debug( - "A total of {0} {1}-specific {2}islands found with {3} genes.".format( - len(a), qorg, t, nmissing - ) - ) - - for x in II: - print(len(x), ",".join(x), file=fw) - - -def plot_diagram(ax, x, y, A, B, tag, label): - ax.text(x, y + 0.14, "{0}: {1}".format(tag, label), ha="center") - strip = tag != "G" - A.draw(ax, x, y + 0.06, gene_len=0.02, strip=strip) - B.draw(ax, x, y, gene_len=0.02, strip=strip) - - -def cartoon(args): - """ - %prog synteny.py - - Generate cartoon illustration of SynFind. - """ - p = OptionParser(cartoon.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="10x7") - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - # Panel A - A = CartoonRegion(41) - A.draw(root, 0.35, 0.85, strip=False, color=False) - x1, x2 = A.x1, A.x2 - lsg = "lightslategray" - pad = 0.01 - xc, yc = 0.35, 0.88 - arrowlen = x2 - xc - pad - arrowprops = dict( - length_includes_head=True, - width=0.01, - fc=lsg, - lw=0, - head_length=arrowlen * 0.15, - head_width=0.03, - ) - p = FancyArrow(xc - pad, yc, -arrowlen, 0, shape="left", **arrowprops) - root.add_patch(p) - p = FancyArrow(xc + pad, yc, arrowlen, 0, shape="right", **arrowprops) - root.add_patch(p) - - yt = yc + 4 * pad - root.text((x1 + xc) / 2, yt, "20 genes upstream", ha="center") - root.text((x2 + xc) / 2, yt, "20 genes downstream", ha="center") - root.plot((xc,), (yc,), "o", mfc="w", mec=lsg, mew=2, lw=2, color=lsg) - root.text(xc, yt, "Query gene", ha="center") - - # Panel B - A.draw(root, 0.35, 0.7, strip=False) - - RoundRect(root, (0.07, 0.49), 0.56, 0.14, fc="y", alpha=0.2) - a = deepcopy(A) - a.evolve(mode="S", target=10) - a.draw(root, 0.35, 0.6) - b = deepcopy(A) - b.evolve(mode="F", target=8) - b.draw(root, 0.35, 0.56) - c = deepcopy(A) - c.evolve(mode="G", target=6) - c.draw(root, 0.35, 0.52) - - for x in (a, b, c): - root.text(0.64, x.y, "Score={0}".format(x.nonwhites), va="center") - - # Panel C - A.truncate_between_flankers() - a.truncate_between_flankers() - b.truncate_between_flankers() - c.truncate_between_flankers(target=6) - - plot_diagram(root, 0.14, 0.2, A, a, "S", "syntenic") - plot_diagram(root, 0.37, 0.2, A, b, "F", "missing, with both flankers") - plot_diagram(root, 0.6, 0.2, A, c, "G", "missing, with one flanker") - - labels = ((0.04, 0.95, "A"), (0.04, 0.75, "B"), (0.04, 0.4, "C")) - panel_labels(root, labels) - - # Descriptions - xt = 0.85 - desc = ( - "Extract neighborhood", - "of *window* size", - "Count gene pairs within *window*", - "Find regions above *score* cutoff", - "Identify flankers", - "Annotate syntelog class", - ) - for yt, t in zip((0.88, 0.84, 0.64, 0.6, 0.3, 0.26), desc): - root.text(xt, yt, markup(t), ha="center", va="center") - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "cartoon" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/tgbs.py b/jcvi/projects/tgbs.py deleted file mode 100644 index 51f64817..00000000 --- a/jcvi/projects/tgbs.py +++ /dev/null @@ -1,696 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Reference-free tGBS related functions. -""" - -import os -import os.path as op -import sys - -from collections import Counter -from pickle import dump, load - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - iglob, - logger, - mkdir, - need_update, - sh, -) -from ..apps.cdhit import deduplicate -from ..apps.gmap import check_index -from ..apps.grid import MakeManager -from ..formats.base import must_open, write_file -from ..formats.fasta import Fasta, SeqIO -from ..formats.fastq import iter_fastq -from ..formats.sam import get_prefix -from ..graphics.base import normalize_axes, plt, savefig - - -speedupsh = r""" -cd {0} - -find *.native | sed 's/\..*//' | sort -u | \ - awk '{{ printf("split_by_chromosome.pl -s %s -o splitted_%s -native %s.*.native -x 5\n", \ - $0, $0, $0); }}' > split.sh -parallel -j {1} < split.sh - -find splitted_* -name "*.native" | \ - awk '{{ printf("SNP_Discovery-short.pl -native %s -o %s.SNPs_Het.txt -a 2 -ac 0.3 -c 0.8\n", \ - $0, $0); }}' > snps.sh -parallel -j {1} < snps.sh - -find splitted_*.log | \ - awk '{{ gsub("splitted_|.log", "", $0); \ - printf("combine_snps_single_file.pl -d splitted_%s -p \"*.txt\" -o %s.SNPs_Het.txt\n", \ - $0, $0); }}' > combine.sh -parallel -j {1} < combine.sh - -cd .. -""" - - -def main(): - - actions = ( - ("snpflow", "run SNP calling pipeline from reads to allele_counts"), - ("count", "count the number of reads in all clusters"), - ("snpplot", "illustrate the SNP sites in CDT"), - ("weblogo", "extract base composition for reads"), - ("novo", "reference-free tGBS pipeline v1"), - ("novo2", "reference-free tGBS pipeline v2"), - ("mstmap", "convert LMDs to MSTMAP input"), - ("query", "random access to loci file"), - ("synteny", "plot mst map against reference genome"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def build_index(locifile): - idxfile = locifile + ".idx" - if need_update(locifile, idxfile): - fp = open(locifile) - fw = open(idxfile, "w") - idx = {} - while True: - pos = fp.tell() - line = fp.readline() - if not line: - break - if not line.startswith("//"): - continue - tag, contig = line.split()[:2] - idx[contig] = pos - dump(idx, fw) - fw.close() - return idx - - idx = load(open(idxfile)) - return idx - - -def query(args): - """ - %prog query out.loci contig - - Random access to loci file. This script helps speeding up debugging. - """ - p = OptionParser(query.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - locifile, contig = args - idx = build_index(locifile) - pos = idx[contig] - logger.debug("Contig {0} found at pos {1}".format(contig, pos)) - fp = open(locifile) - fp.seek(pos) - section = [] - while True: - row = fp.readline() - if row.startswith("//") and row.split()[1] != contig: - break - section.append(row) - print("".join(section)) - - -def synteny(args): - """ - %prog synteny mstmap.out novo.final.fasta reference.fasta - - Plot MSTmap against reference genome. - """ - from jcvi.assembly.geneticmap import bed as geneticmap_bed - from jcvi.apps.align import blat - from jcvi.formats.blast import bed as blast_bed, best - - p = OptionParser(synteny.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - mstmapout, novo, ref = args - pf = mstmapout.split(".")[0] - rf = ref.split(".")[0] - mstmapbed = geneticmap_bed([mstmapout]) - cmd = "cut -d. -f1 {0}".format(mstmapbed) - tmpbed = mstmapbed + ".tmp" - sh(cmd, outfile=tmpbed) - os.rename(tmpbed, pf + ".bed") - - cmd = "cut -f4 {0} | cut -d. -f1 | sort -u".format(mstmapbed) - idsfile = pf + ".ids" - sh(cmd, outfile=idsfile) - fastafile = pf + ".fasta" - cmd = "faSomeRecords {0} {1} {2}".format(novo, idsfile, fastafile) - sh(cmd) - blastfile = blat([ref, fastafile]) - bestblastfile = best([blastfile]) - blastbed = blast_bed([bestblastfile]) - os.rename(blastbed, rf + ".bed") - - anchorsfile = "{0}.{1}.anchors".format(pf, rf) - cmd = "paste {0} {0}".format(idsfile) - sh(cmd, outfile=anchorsfile) - - -def mstmap(args): - """ - %prog mstmap LMD50.snps.genotype.txt - - Convert LMDs to MSTMAP input. - """ - from jcvi.assembly.geneticmap import MSTMatrix - - p = OptionParser(mstmap.__doc__) - p.add_argument( - "--population_type", - default="RIL6", - help="Type of population, possible values are DH and RILd", - ) - p.add_argument( - "--missing_threshold", - default=0.5, - help="Missing threshold, .25 excludes any marker with >25% missing", - ) - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (lmd,) = args - fp = open(lmd) - next(fp) # Header - table = {"0": "-", "1": "A", "2": "B", "3": "X"} - mh = ["locus_name"] + next(fp).split()[4:] - genotypes = [] - for row in fp: - atoms = row.split() - chr, pos, ref, alt = atoms[:4] - locus_name = ".".join((chr, pos)) - codes = [table[x] for x in atoms[4:]] - genotypes.append([locus_name] + codes) - - mm = MSTMatrix(genotypes, mh, opts.population_type, opts.missing_threshold) - mm.write(opts.outfile, header=True) - - -def weblogo(args): - """ - %prog weblogo [fastafile|fastqfile] - - Extract base composition for reads - """ - import numpy as np - from rich.progress import Progress - - p = OptionParser(weblogo.__doc__) - p.add_argument("-N", default=10, type=int, help="Count the first and last N bases") - p.add_argument("--nreads", default=1000000, type=int, help="Parse first N reads") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastqfile,) = args - N = opts.N - nreads = opts.nreads - - pat = "ATCG" - L = np.zeros((4, N), dtype="int32") - R = np.zeros((4, N), dtype="int32") - p = dict((a, i) for (i, a) in enumerate(pat)) - L4, R3 = Counter(), Counter() - - k = 0 - fw_L = open("L.fasta", "w") - fw_R = open("R.fasta", "w") - fastq = fastqfile.endswith(".fastq") - it = iter_fastq(fastqfile) if fastq else SeqIO.parse(must_open(fastqfile), "fasta") - - with Progress() as progress: - progress.add_task("[green] Processing ...", start=False, total=nreads) - for rec in it: - k += 1 - if k > nreads: - break - if rec is None: - break - s = str(rec.seq) - for i, a in enumerate(s[:N]): - if a in p: - a = p[a] - L[a][i] += 1 - for j, a in enumerate(s[-N:][::-1]): - if a in p: - a = p[a] - R[a][N - 1 - j] += 1 - l4, r3 = s[:4], s[-3:] - L4[l4] += 1 - R3[r3] += 1 - print(">{0}\n{1}".format(k, s[:N]), file=fw_L) - print(">{0}\n{1}".format(k, s[-N:]), file=fw_R) - - fw_L.close() - fw_R.close() - - cmd = "weblogo -F png -s large -f {0}.fasta -o {0}.png" - cmd += " --color-scheme classic --composition none -U probability" - cmd += " --title {1}" - sh(cmd.format("L", "First_10_bases")) - sh(cmd.format("R", "Last_10_bases")) - - np.savetxt("L.{0}.csv".format(pat), L, delimiter=",", fmt="%d") - np.savetxt("R.{0}.csv".format(pat), R, delimiter=",", fmt="%d") - - fw = open("L4.common", "w") - for p, c in L4.most_common(N): - print("\t".join((p, str(c))), file=fw) - fw.close() - - fw = open("R3.common", "w") - for p, c in R3.most_common(N): - print("\t".join((p, str(c))), file=fw) - fw.close() - - -def count(args): - """ - %prog count cdhit.consensus.fasta - - Scan the headers for the consensus clusters and count the number of reads. - """ - from jcvi.graphics.histogram import stem_leaf_plot - from jcvi.utils.cbook import SummaryStats - - p = OptionParser(count.__doc__) - p.add_argument("--csv", help="Write depth per contig to file") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastafile,) = args - csv = open(opts.csv, "w") if opts.csv else None - - f = Fasta(fastafile, lazy=True) - sizes = [] - for desc, rec in f.iterdescriptions_ordered(): - if desc.startswith("singleton"): - sizes.append(1) - continue - - # consensus_for_cluster_0 with 63 sequences - if "with" in desc: - name, w, size, seqs = desc.split() - if csv: - print("\t".join(str(x) for x in (name, size, len(rec))), file=csv) - assert w == "with" - sizes.append(int(size)) - # MRD85:00603:02472;size=167; - else: - name, size, tail = desc.split(";") - sizes.append(int(size.replace("size=", ""))) - - if csv: - csv.close() - logger.debug("File written to `%s`.", opts.csv) - - s = SummaryStats(sizes) - print(s, file=sys.stderr) - stem_leaf_plot(s.data, 0, 100, 20, title="Cluster size") - - -def novo(args): - """ - %prog novo reads.fastq - - Reference-free tGBS pipeline v1. - """ - from jcvi.assembly.kmer import jellyfish, histogram - from jcvi.assembly.preprocess import diginorm - from jcvi.formats.fasta import filter as fasta_filter, format - from jcvi.apps.cdhit import filter as cdhit_filter - - p = OptionParser(novo.__doc__) - p.add_argument( - "--technology", - choices=("illumina", "454", "iontorrent"), - default="iontorrent", - help="Sequencing platform", - ) - p.set_depth(depth=50) - p.set_align(pctid=96) - p.set_home("cdhit", default="/usr/local/bin/") - p.set_home("fiona", default="/usr/local/bin/") - p.set_home("jellyfish", default="/usr/local/bin/") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (fastqfile,) = args - cpus = opts.cpus - depth = opts.depth - pf, sf = fastqfile.rsplit(".", 1) - - diginormfile = pf + ".diginorm." + sf - if need_update(fastqfile, diginormfile): - diginorm([fastqfile, "--single", "--depth={0}".format(depth)]) - keepabund = fastqfile + ".keep.abundfilt" - sh("cp -s {0} {1}".format(keepabund, diginormfile)) - - jf = pf + "-K23.histogram" - if need_update(diginormfile, jf): - jellyfish( - [ - diginormfile, - "--prefix={0}".format(pf), - "--cpus={0}".format(cpus), - "--jellyfish_home={0}".format(opts.jellyfish_home), - ] - ) - - genomesize = histogram([jf, pf, "23"]) - fiona = pf + ".fiona.fa" - if need_update(diginormfile, fiona): - cmd = op.join(opts.fiona_home, "fiona") - cmd += " -g {0} -nt {1} --sequencing-technology {2}".format( - genomesize, cpus, opts.technology - ) - cmd += " -vv {0} {1}".format(diginormfile, fiona) - logfile = pf + ".fiona.log" - sh(cmd, outfile=logfile, errfile=logfile) - - dedup = "cdhit" - pctid = opts.pctid - cons = fiona + ".P{0}.{1}.consensus.fasta".format(pctid, dedup) - if need_update(fiona, cons): - deduplicate( - [ - fiona, - "--consensus", - "--reads", - "--pctid={0}".format(pctid), - "--cdhit_home={0}".format(opts.cdhit_home), - ] - ) - - filteredfile = pf + ".filtered.fasta" - if need_update(cons, filteredfile): - covfile = pf + ".cov.fasta" - cdhit_filter( - [cons, "--outfile={0}".format(covfile), "--minsize={0}".format(depth / 5)] - ) - fasta_filter([covfile, "50", "--outfile={0}".format(filteredfile)]) - - finalfile = pf + ".final.fasta" - if need_update(filteredfile, finalfile): - format( - [ - filteredfile, - finalfile, - "--sequential=replace", - "--prefix={0}_".format(pf), - ] - ) - - -def scan_read_files(trimmed, patterns): - reads = iglob(trimmed, patterns) - samples = sorted(set(op.basename(x).split(".")[0] for x in reads)) - logger.debug( - "Total {0} read files from {1} samples".format(len(reads), len(samples)) - ) - return reads, samples - - -def novo2(args): - """ - %prog novo2 trimmed projectname - - Reference-free tGBS pipeline v2. - """ - p = OptionParser(novo2.__doc__) - p.set_fastq_names() - p.set_align(pctid=95) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - trimmed, pf = args - pctid = opts.pctid - reads, samples = scan_read_files(trimmed, opts.names) - - # Set up directory structure - clustdir = "uclust" - acdir = "allele_counts" - for d in (clustdir, acdir): - mkdir(d) - - mm = MakeManager() - clustfiles = [] - # Step 0 - clustering within sample - for s in samples: - flist = [x for x in reads if op.basename(x).split(".")[0] == s] - outfile = s + ".P{0}.clustS".format(pctid) - outfile = op.join(clustdir, outfile) - cmd = "python -m jcvi.apps.uclust cluster --cpus=8" - cmd += " {0} {1}".format(s, " ".join(flist)) - cmd += " --outdir={0}".format(clustdir) - cmd += " --pctid={0}".format(pctid) - mm.add(flist, outfile, cmd) - clustfiles.append(outfile) - - # Step 1 - make consensus within sample - allcons = [] - for s, clustfile in zip(samples, clustfiles): - outfile = s + ".P{0}.consensus".format(pctid) - outfile = op.join(clustdir, outfile) - cmd = "python -m jcvi.apps.uclust consensus" - cmd += " {0}".format(clustfile) - mm.add(clustfile, outfile, cmd) - allcons.append(outfile) - - # Step 2 - clustering across samples - clustSfile = pf + ".P{0}.clustS".format(pctid) - cmd = "python -m jcvi.apps.uclust mcluster {0}".format(" ".join(allcons)) - cmd += " --prefix={0}".format(pf) - mm.add(allcons, clustSfile, cmd) - - # Step 3 - make consensus across samples - locifile = pf + ".P{0}.loci".format(pctid) - cmd = "python -m jcvi.apps.uclust mconsensus {0}".format(" ".join(allcons)) - cmd += " --prefix={0}".format(pf) - mm.add(allcons + [clustSfile], locifile, cmd) - - mm.write() - - -def snpflow(args): - """ - %prog snpflow trimmed reference.fasta - - Run SNP calling pipeline until allele_counts are generated. This includes - generation of native files, SNP_Het file. Speedup for fragmented genomes - are also supported. - """ - p = OptionParser(snpflow.__doc__) - p.set_fastq_names() - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - trimmed, ref = args - nseqs = len(Fasta(ref)) - supercat = nseqs >= 1000 - if supercat: - logger.debug("Total seqs in ref: {0} (supercat={1})".format(nseqs, supercat)) - - reads, samples = scan_read_files(trimmed, opts.names) - - # Set up directory structure - nativedir, countsdir = "native", "allele_counts" - for d in (nativedir, countsdir): - mkdir(d) - - mm = MakeManager() - # Step 0 - index database - db = op.join(*check_index(ref, supercat=supercat, go=False)) - cmd = "python -m jcvi.apps.gmap index {0}".format(ref) - if supercat: - cmd += " --supercat" - coordsfile = db + ".coords" - supercatfile = ref.rsplit(".", 1)[0] + ".supercat.fasta" - mm.add(ref, (db, coordsfile), cmd) - else: - mm.add(ref, db, cmd) - - # Step 1 - GSNAP alignment and conversion to native file - allnatives = [] - allsamstats = [] - gmapdb = supercatfile if supercat else ref - for f in reads: - prefix = get_prefix(f, ref) - gsnapfile = op.join(nativedir, prefix + ".gsnap") - nativefile = op.join(nativedir, prefix + ".unique.native") - samstatsfile = op.join(nativedir, prefix + ".unique.sam.stats") - cmd = "python -m jcvi.apps.gmap align {0} {1}".format(gmapdb, f) - cmd += " --outdir={0} --native --cpus=1".format(nativedir) - mm.add((f, db), nativefile, cmd) - - cmd = "python -m jcvi.apps.gmap bam {0} {1} --cpus=1".format(gsnapfile, gmapdb) - mm.add(nativefile, samstatsfile, cmd) - allnatives.append(nativefile) - allsamstats.append(samstatsfile) - - # Step 2 - call SNP discovery - if supercat: - nativeconverted = nativedir + "-converted" - mkdir(nativeconverted) - allnativesc = [op.join(nativeconverted, op.basename(x)) for x in allnatives] - cmd = "tGBS-Convert_Pseudo_Genome_NATIVE_Coordinates.pl" - cmd += " -i {0}/*.native -o {1}".format(nativedir, nativeconverted) - cmd += " -c {0}".format(coordsfile) - cmds = ["rm -rf {0}".format(nativeconverted), cmd] - mm.add(allnatives + [coordsfile], allnativesc, cmds) - - runfile = "speedup.sh" - write_file(runfile, speedupsh.format(nativeconverted, opts.cpus)) - nativedir = nativeconverted - allsnps = [op.join(nativedir, "{0}.SNPs_Het.txt".format(x)) for x in samples] - mm.add(allnativesc, allsnps, "./{0}".format(runfile)) - else: - for s in samples: - snpfile = op.join(nativedir, "{0}.SNPs_Het.txt".format(s)) - cmd = "SNP_Discovery-short.pl" - cmd += " -native {0}/{1}.*unique.native".format(nativedir, s) - cmd += " -o {0} -a 2 -ac 0.3 -c 0.8".format(snpfile) - flist = [x for x in allnatives if op.basename(x).split(".")[0] == s] - mm.add(flist, snpfile, cmd) - - # Step 3 - generate equal file - allsnps = [op.join(nativedir, "{0}.SNPs_Het.txt".format(x)) for x in samples] - for s in samples: - equalfile = op.join(nativedir, "{0}.equal".format(s)) - cmd = "extract_reference_alleles.pl" - cmd += " --native {0}/{1}.*unique.native".format(nativedir, s) - cmd += " --genotype {0}/{1}.SNPs_Het.txt".format(nativedir, s) - cmd += " --allgenotypes {0}/*.SNPs_Het.txt".format(nativedir) - cmd += " --fasta {0} --output {1}".format(ref, equalfile) - mm.add(allsnps, equalfile, cmd) - - # Step 4 - generate snp matrix - allequals = [op.join(nativedir, "{0}.equal".format(x)) for x in samples] - matrix = "snps.matrix.txt" - cmd = "generate_matrix.pl" - cmd += " --tables {0}/*SNPs_Het.txt --equal {0}/*equal".format(nativedir) - cmd += " --fasta {0} --output {1}".format(ref, matrix) - mm.add(allsnps + allequals, matrix, cmd) - - # Step 5 - generate allele counts - allcounts = [] - for s in samples: - allele_counts = op.join(countsdir, "{0}.SNPs_Het.allele_counts".format(s)) - cmd = "count_reads_per_allele.pl -m snps.matrix.txt" - cmd += " -s {0} --native {1}/{0}.*unique.native".format(s, nativedir) - cmd += " -o {0}".format(allele_counts) - mm.add(matrix, allele_counts, cmd) - allcounts.append(allele_counts) - - # Step 6 - generate raw snps - rawsnps = "Genotyping.H3.txt" - cmd = "/home/shared/scripts/delin/SamplesGenotyping.pl --homo 3" - cmd += " -pf allele_counts -f {0} --outfile {1}".format(countsdir, rawsnps) - cmds = ["rm -f {0}".format(rawsnps), cmd] - mm.add(allcounts, rawsnps, cmds) - - # Step 7 - generate alignment report - sam_summary = "sam.summary" - cmd = "/home/shared/scripts/eddyyeh/alignment_stats.pl" - cmd += " -f {0} -o {1}".format(" ".join(allsamstats), sam_summary) - mm.add(allsamstats, sam_summary, cmd) - - native_summary = "native.summary" - cmd = "/home/shared/scripts/eddyyeh/alignment_stats.pl" - cmd += " -n {0} -o {1}".format(" ".join(allnatives), native_summary) - mm.add(allnatives, native_summary, cmd) - - mm.write() - - -def snpplot(args): - """ - %prog counts.cdt - - Illustrate the histogram per SNP site. - """ - p = OptionParser(snpplot.__doc__) - opts, args, iopts = p.set_image_options(args, format="png") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (datafile,) = args - # Read in CDT file - fp = open(datafile) - next(fp) - next(fp) - data = [] - for row in fp: - atoms = row.split()[4:] - nval = len(atoms) - values = [float(x) for x in atoms] - # normalize - values = [x * 1.0 / sum(values) for x in values] - data.append(values) - - pf = datafile.rsplit(".", 1)[0] - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - xmin, xmax = 0.1, 0.9 - ymin, ymax = 0.1, 0.9 - yinterval = (ymax - ymin) / len(data) - colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg") - ystart = ymax - for d in data: - xstart = xmin - for dd, c in zip(d, colors): - xend = xstart + (xmax - xmin) * dd - root.plot((xstart, xend), (ystart, ystart), "-", color=c) - xstart = xend - ystart -= yinterval - - root.text( - 0.05, - 0.5, - "{0} LMD50 SNPs".format(len(data)), - ha="center", - va="center", - rotation=90, - color="lightslategray", - ) - - for x, t, c in zip((0.3, 0.5, 0.7), ("REF", "ALT", "HET"), "rbg"): - root.text(x, 0.95, t, color=c, ha="center", va="center") - normalize_axes(root) - - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -if __name__ == "__main__": - main() diff --git a/jcvi/projects/vanilla.py b/jcvi/projects/vanilla.py deleted file mode 100644 index afbc38a0..00000000 --- a/jcvi/projects/vanilla.py +++ /dev/null @@ -1,450 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Plotting scripts for the vanilla genome paper. -""" -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger -from ..compara.base import AnchorFile -from ..compara.synteny import check_beds -from ..formats.base import get_number -from ..formats.bed import Bed -from ..graphics.base import normalize_axes, panel_labels, plt, savefig -from ..graphics.chromosome import draw_chromosomes -from ..graphics.glyph import TextCircle -from ..graphics.synteny import Synteny, draw_gene_legend -from ..graphics.tree import LeafInfoFile, WGDInfoFile, draw_tree, parse_tree - - -def main(): - actions = ( - # Chromosome painting since WGD - ("ancestral", "paint 14 chromosomes following alpha WGD (requires data)"), - # main figures in text - ("ploidy", "plot vanilla synteny (requires data)"), - # Composite phylogeny - tree and ks - ("phylogeny", "create a composite figure with tree and ks"), - ("tree", "create a separate figure with tree"), - ("ks", "create a separate figure with ks"), - # Composite synteny - wgd and microsynteny - ("synteny", "create a composite figure with wgd and microsynteny"), - ("wgd", "create separate figures with wgd"), - ("microsynteny", "create separate figures with microsynteny"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def phylogeny(args): - """ - %prog phylogeny treefile ks.layout - - Create a composite figure with (A) tree and (B) ks. - """ - from ..compara.ks import Layout, KsPlot, KsFile - - p = OptionParser(phylogeny.__doc__) - _, args, iopts = p.set_image_options(args, figsize="10x12") - - (datafile, layoutfile) = args - - logger.debug("Load tree file `%s`", datafile) - t, hpd = parse_tree(datafile) - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes((0, 0, 1, 1)) - ax1 = fig.add_axes((0, 0.4, 1, 0.6)) - ax2 = fig.add_axes((0.12, 0.065, 0.8, 0.3)) - - margin, rmargin = 0.1, 0.2 # Left and right margin - leafinfo = LeafInfoFile("leafinfo.csv").cache - wgdinfo = WGDInfoFile("wgdinfo.csv").cache - outgroup = "ginkgo" - - # Panel A - draw_tree( - ax1, - t, - hpd=hpd, - margin=margin, - rmargin=rmargin, - supportcolor=None, - internal=False, - outgroup=outgroup, - reroot=False, - leafinfo=leafinfo, - wgdinfo=wgdinfo, - geoscale=True, - ) - # Panel B - ks_min = 0.0 - ks_max = 3.0 - bins = 60 - fill = False - layout = Layout(layoutfile) - print(layout, file=sys.stderr) - - kp = KsPlot(ax2, ks_max, bins, legendp="upper right") - for lo in layout: - data = KsFile(lo.ksfile) - data = [x.ng_ks for x in data] - data = [x for x in data if ks_min <= x <= ks_max] - kp.add_data( - data, - lo.components, - label=lo.label, - color=lo.color, - marker=lo.marker, - fill=fill, - fitted=False, - kde=True, - ) - - kp.draw(filename=None) - - normalize_axes(root, ax1) - labels = ((0.05, 0.95, "A"), (0.05, 0.4, "B")) - panel_labels(root, labels) - - image_name = "phylogeny.pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def tree(args): - """ - %prog tree treefile - - Create a tree figure. - """ - p = OptionParser(tree.__doc__) - _, args, iopts = p.set_image_options(args, figsize="10x8") - - (datafile,) = args - logger.debug("Load tree file `%s`", datafile) - t, hpd = parse_tree(datafile) - - fig = plt.figure(1, (iopts.w, iopts.h)) - ax1 = fig.add_axes((0, 0, 1, 1)) - - margin, rmargin = 0.1, 0.2 # Left and right margin - leafinfo = LeafInfoFile("leafinfo.csv").cache - wgdinfo = WGDInfoFile("wgdinfo.csv").cache - outgroup = "ginkgo" - - # Panel A - draw_tree( - ax1, - t, - hpd=hpd, - margin=margin, - rmargin=rmargin, - supportcolor=None, - internal=False, - outgroup=outgroup, - reroot=False, - leafinfo=leafinfo, - wgdinfo=wgdinfo, - geoscale=True, - ) - - normalize_axes(ax1) - image_name = "tree.pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def ks(args): - """ - %prog ks ks.layout - - Create a ks figure. - """ - from ..compara.ks import Layout, KsPlot, KsFile - - p = OptionParser(ks.__doc__) - _, args, iopts = p.set_image_options(args, figsize="10x4") - - (layoutfile,) = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - ax2 = fig.add_axes((0.12, 0.12, 0.8, 0.8)) - - # Panel B - ks_min = 0.0 - ks_max = 3.0 - bins = 60 - fill = False - layout = Layout(layoutfile) - print(layout, file=sys.stderr) - - kp = KsPlot(ax2, ks_max, bins, legendp="upper right") - for lo in layout: - data = KsFile(lo.ksfile) - data = [x.ng_ks for x in data] - data = [x for x in data if ks_min <= x <= ks_max] - kp.add_data( - data, - lo.components, - label=lo.label, - color=lo.color, - marker=lo.marker, - fill=fill, - fitted=False, - kde=True, - ) - - kp.draw(filename=None) - - image_name = "ks.pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def synteny(args): - """ - %prog synteny vplanifoliaA_blocks.bed vplanifoliaA.sizes \ - b1.blocks all.bed b1.layout - - Create a composite figure with (A) wgd and (B) microsynteny. - """ - p = OptionParser(synteny.__doc__) - _, args, iopts = p.set_image_options(args, figsize="12x12") - - (bedfile, sizesfile, blocksfile, allbedfile, blockslayout) = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - ax1 = fig.add_axes([0, 0.5, 1, 0.5]) - ax2 = fig.add_axes([0.02, 0, 0.98, 0.5]) - - # Panel A - title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$" - draw_chromosomes( - ax1, - bedfile, - sizes=sizesfile, - iopts=iopts, - mergedist=200000, - winsize=50000, - imagemap=False, - gauge=True, - legend=False, - title=title, - ) - - # Panel B - draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout) - - normalize_axes(root, ax1, ax2) - labels = ((0.05, 0.95, "A"), (0.05, 0.5, "B")) - panel_labels(root, labels) - - image_name = "synteny.pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def wgd(args): - """ - %prog wgd vplanifoliaA_blocks.bed vplanifoliaA.sizes - - Create a wgd figure. - """ - p = OptionParser(synteny.__doc__) - _, args, iopts = p.set_image_options(args, figsize="8x5") - - (bedfile, sizesfile) = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - ax1 = fig.add_axes((0, 0, 1, 1)) - - title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$" - draw_chromosomes( - ax1, - bedfile, - sizes=sizesfile, - iopts=iopts, - mergedist=200000, - winsize=50000, - imagemap=False, - gauge=True, - legend=False, - title=title, - ) - - normalize_axes(ax1) - - image_name = "wgd.pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def microsynteny(args): - """ - %prog microsynteny b1.blocks all.bed b1.layout - - Create a microsynteny figure. - """ - p = OptionParser(synteny.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="12x6") - - (blocksfile, allbedfile, blockslayout) = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - ax2 = fig.add_axes((0, 0, 1, 1)) - - draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout) - - normalize_axes(ax2) - - image_name = "microsynteny.pdf" - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def ancestral(args): - """ - %prog ancestral vplanifoliaA.vplanifoliaA.anchors > vplanifoliaA_blocks.bed - - Paint 14 chromosomes following alpha WGD. - """ - p = OptionParser(ancestral.__doc__) - p.set_beds() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (anchorsfile,) = args - qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts) - - # We focus on the following chromosome pairs - target_pairs = { - (1, 1), - (1, 6), - (1, 8), - (1, 13), - (2, 4), - (3, 12), - (3, 14), - (5, 6), - (5, 8), - (7, 9), - (7, 11), - (9, 10), - (10, 11), - } - - def get_target(achr, bchr): - if "chr" not in achr and "chr" not in bchr: - return None - achr, bchr = get_number(achr), get_number(bchr) - if achr > bchr: - achr, bchr = bchr, achr - if (achr, bchr) in target_pairs: - return achr, bchr - return None - - def build_bedline(astart, aend, target_pair): - # target_name = "{:02d}-{:02d}".format(*target_pair) - target_name = [str(x) for x in target_pair if x in (1, 2, 3, 5, 7, 10)][0] - return "\t".join( - str(x) for x in (astart.seqid, astart.start, aend.end, target_name) - ) - - # Iterate through the blocks, store any regions that has hits to one of the - # target_pairs - ac = AnchorFile(anchorsfile) - blocks = ac.blocks - outbed = Bed() - for block in blocks: - a, b, _ = zip(*block) - a = [qorder[x] for x in a] - b = [sorder[x] for x in b] - astart, aend = min(a)[1], max(a)[1] - bstart, bend = min(b)[1], max(b)[1] - # Now convert to BED lines with new accn - achr, bchr = astart.seqid, bstart.seqid - target = get_target(achr, bchr) - if target is None: - continue - outbed.add(build_bedline(astart, aend, target)) - outbed.add(build_bedline(bstart, bend, target)) - outbed.print_to_file(sorted=True) - - -def ploidy(args): - """ - %prog ploidy b1.blocks all.bed b1.layout - - Build a figure that illustrates the WGD history of the vanilla genome. - """ - p = OptionParser(ploidy.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="12x6") - - if len(args) != 3: - sys.exit(not p.print_help()) - - blocksfile, bedfile, blockslayout = args - - fig = plt.figure(1, (iopts.w, iopts.h)) - root = fig.add_axes([0, 0, 1, 1]) - - draw_ploidy(fig, root, blocksfile, bedfile, blockslayout) - - root.set_xlim(0, 1) - root.set_ylim(0, 1) - root.set_axis_off() - - pf = "vanilla-karyotype" - image_name = pf + "." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def draw_ploidy(fig, root, blocksfile, bedfile, blockslayout): - switchidsfile = "switch.ids" - Synteny( - fig, - root, - blocksfile, - bedfile, - blockslayout, - scalebar=True, - switch=switchidsfile, - ) - - # Legend showing the orientation of the genes - draw_gene_legend(root, 0.2, 0.3, 0.53) - - # WGD labels - radius = 0.025 - tau_color = "#bebada" - alpha_color = "#bc80bd" - label_color = "k" - pad = 0.05 - for y in (0.74 + 1.5 * pad, 0.26 - 1.5 * pad): - TextCircle( - root, - 0.25, - y, - r"$\alpha^{O}$", - radius=radius, - fc=alpha_color, - color=label_color, - fontweight="bold", - ) - TextCircle( - root, - 0.75, - y, - r"$\alpha^{O}$", - radius=radius, - fc=alpha_color, - color=label_color, - fontweight="bold", - ) - for y in (0.74 + 3 * pad, 0.26 - 3 * pad): - TextCircle( - root, 0.5, y, r"$\tau$", radius=radius, fc=tau_color, color=label_color - ) - - -if __name__ == "__main__": - main() diff --git a/jcvi/utils/__init__.py b/jcvi/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/utils/__main__.py b/jcvi/utils/__main__.py deleted file mode 100644 index f627666a..00000000 --- a/jcvi/utils/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Assortment of utility scripts implementing recipes from Python cookbooks, such as iterators, sorters, range queries, etc. -""" - -from ..apps.base import dmain - - -if __name__ == "__main__": - dmain(__file__) diff --git a/jcvi/utils/aws.py b/jcvi/utils/aws.py deleted file mode 100644 index 0546df50..00000000 --- a/jcvi/utils/aws.py +++ /dev/null @@ -1,810 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -AWS-related methods. -""" -import fnmatch -import getpass -import json -import os -import os.path as op -import sys -import time - -from configparser import NoOptionError, NoSectionError -from datetime import datetime -from multiprocessing import Pool - -import boto3 - -from botocore.exceptions import ClientError, ParamValidationError - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - datafile, - get_config, - logger, - popen, - sh, -) -from ..formats.base import BaseFile, SetFile, timestamp - -from .console import console - - -AWS_CREDS_PATH = "%s/.aws/credentials" % (op.expanduser("~"),) - - -class InstanceSkeleton(BaseFile): - def __init__(self, filename=datafile("instance.json")): - super().__init__(filename) - self.spec = json.load(open(filename)) - - @property - def launch_spec(self): - return self.spec["LaunchSpec"] - - @property - def instance_id(self): - return self.spec["InstanceId"].strip() - - @property - def private_ip_address(self): - return self.spec["PrivateIpAddress"] - - @property - def availability_zone(self): - return self.spec["AvailabilityZone"] - - @property - def volumes(self): - return self.spec["Volumes"] - - @property - def block_device_mappings(self): - return self.launch_spec["BlockDeviceMappings"] - - @property - def ebs_optimized(self): - return self.launch_spec["EbsOptimized"] - - @property - def image_id(self): - return self.launch_spec["ImageId"] - - @property - def instance_type(self): - return self.launch_spec["InstanceType"] - - @property - def key_name(self): - return self.launch_spec["KeyName"] - - @property - def security_group_ids(self): - return self.launch_spec["SecurityGroupIds"] - - @property - def subnet_id(self): - return self.launch_spec["SubnetId"] - - @property - def iam_instance_profile(self): - return self.launch_spec["IamInstanceProfile"] - - def save(self): - fw = open(self.filename, "w") - s = json.dumps(self.spec, indent=4, sort_keys=True) - # Clear the trailing spaces - print("\n".join(x.rstrip() for x in s.splitlines()), file=fw) - fw.close() - - def save_instance_id(self, instance_id, private_id_address): - self.spec["InstanceId"] = instance_id - self.spec["PrivateIpAddress"] = private_id_address - self.save() - - def save_image_id(self, image_id): - self.spec["LaunchSpec"]["ImageId"] = image_id - self.save() - - -def main(): - - actions = ( - ("cp", "copy files with support for wildcards"), - ("ls", "list files with support for wildcards"), - ("rm", "remove files with support for wildcards"), - ("role", "change aws role"), - ("start", "start ec2 instance"), - ("stop", "stop ec2 instance"), - ("ip", "describe current instance"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def ip(args): - """ - %prog ip - - Show current IP address from JSON settings. - """ - p = OptionParser(ip.__doc__) - if len(args) != 0: - sys.exit(not p.print_help()) - - s = InstanceSkeleton() - print("IP address:", s.private_ip_address, file=sys.stderr) - print("Instance type:", s.instance_type, file=sys.stderr) - - -def start(args): - """ - %prog start - - Launch ec2 instance through command line. - """ - p = OptionParser(start.__doc__) - p.add_argument( - "--ondemand", - default=False, - action="store_true", - help="Do we want a more expensive on-demand instance", - ) - p.add_argument("--profile", default="mvrad-datasci-role", help="Profile name") - p.add_argument("--price", default=4.0, type=float, help="Spot price") - opts, args = p.parse_args(args) - - if len(args) != 0: - sys.exit(not p.print_help()) - - role(["htang"]) - session = boto3.Session(profile_name=opts.profile) - client = session.client("ec2") - s = InstanceSkeleton() - - # Make sure the instance id is empty - instance_id = s.instance_id - if instance_id != "": - logger.error("Instance exists {}".format(instance_id)) - sys.exit(1) - - launch_spec = s.launch_spec - instance_id = "" - - if opts.ondemand: - # Launch on-demand instance - response = client.run_instances( - BlockDeviceMappings=s.block_device_mappings, - MaxCount=1, - MinCount=1, - ImageId=s.image_id, - InstanceType=s.instance_type, - KeyName=s.key_name, - Placement={"AvailabilityZone": s.availability_zone}, - SecurityGroupIds=s.security_group_ids, - SubnetId=s.subnet_id, - EbsOptimized=s.ebs_optimized, - IamInstanceProfile=s.iam_instance_profile, - ) - instance_id = response["Instances"][0]["InstanceId"] - - else: - # Launch spot instance - response = client.request_spot_instances( - SpotPrice=str(opts.price), - InstanceCount=1, - Type="one-time", - AvailabilityZoneGroup=s.availability_zone, - LaunchSpecification=launch_spec, - ) - - request_id = response["SpotInstanceRequests"][0]["SpotInstanceRequestId"] - print("Request id {}".format(request_id), file=sys.stderr) - - while not instance_id: - response = client.describe_spot_instance_requests( - SpotInstanceRequestIds=[request_id] - ) - if "InstanceId" in response["SpotInstanceRequests"][0]: - instance_id = response["SpotInstanceRequests"][0]["InstanceId"] - else: - logger.debug("Waiting to be fulfilled ...") - time.sleep(10) - - # Check if the instance is running - print("Instance id {}".format(instance_id), file=sys.stderr) - status = "" - while status != "running": - logger.debug("Waiting instance to run ...") - time.sleep(3) - response = client.describe_instance_status(InstanceIds=[instance_id]) - if len(response["InstanceStatuses"]) > 0: - status = response["InstanceStatuses"][0]["InstanceState"]["Name"] - - # Tagging - name = "htang-lx-ondemand" if opts.ondemand else "htang-lx-spot" - response = client.create_tags( - Resources=[instance_id], - Tags=[ - {"Key": k, "Value": v} - for k, v in { - "Name": name, - "owner": "htang", - "project": "mv-bioinformatics", - }.items() - ], - ) - - # Attach working volumes - volumes = s.volumes - for volume in volumes: - response = client.attach_volume( - VolumeId=volume["VolumeId"], InstanceId=instance_id, Device=volume["Device"] - ) - - # Save instance id and ip - response = client.describe_instances(InstanceIds=[instance_id]) - ip_address = response["Reservations"][0]["Instances"][0]["PrivateIpAddress"] - print("IP address {}".format(ip_address), file=sys.stderr) - - s.save_instance_id(instance_id, ip_address) - - -def stop(args): - """ - %prog stop - - Stop EC2 instance. - """ - p = OptionParser(stop.__doc__) - p.add_argument("--profile", default="mvrad-datasci-role", help="Profile name") - opts, args = p.parse_args(args) - - if len(args) != 0: - sys.exit(not p.print_help()) - - role(["htang"]) - session = boto3.Session(profile_name=opts.profile) - client = session.client("ec2") - s = InstanceSkeleton() - - # Make sure the instance id is NOT empty - instance_id = s.instance_id - if instance_id == "": - logger.error("Cannot find instance_id {}".format(instance_id)) - sys.exit(1) - - block_device_mappings = [] - for volume in s.volumes: - block_device_mappings.append({"DeviceName": volume["Device"], "NoDevice": ""}) - - new_image_name = "htang-dev-{}-{}".format(timestamp(), int(time.time())) - response = client.create_image( - InstanceId=instance_id, - Name=new_image_name, - BlockDeviceMappings=block_device_mappings, - ) - print(response, file=sys.stderr) - new_image_id = response["ImageId"] - - image_status = "" - while image_status != "available": - logger.debug("Waiting for image to be ready") - time.sleep(10) - response = client.describe_images(ImageIds=[new_image_id]) - image_status = response["Images"][0]["State"] - - # Delete old image, snapshot and shut down instance - old_image_id = s.image_id - response = client.describe_images(ImageIds=[old_image_id]) - old_snapshot_id = response["Images"][0]["BlockDeviceMappings"][0]["Ebs"][ - "SnapshotId" - ] - response = client.deregister_image(ImageId=old_image_id) - print(response, file=sys.stderr) - response = client.delete_snapshot(SnapshotId=old_snapshot_id) - print(response, file=sys.stderr) - response = client.terminate_instances(InstanceIds=[instance_id]) - print(response, file=sys.stderr) - - # Save new image id - s.save_image_id(new_image_id) - s.save_instance_id("", "") - - -def glob_s3(store, keys=None, recursive=False): - store, cards = store.rsplit("/", 1) - contents = ls_s3(store, recursive=recursive) - if keys: - filtered = [x for x in contents if op.basename(x).split(".")[0] in keys] - else: - filtered = fnmatch.filter(contents, cards) - - if recursive: - store = "s3://" + store.replace("s3://", "").split("/")[0] - - return filtered - - -def rm_s3(store): - cmd = "aws s3 rm {}".format(store) - sh(cmd) - - -def rm(args): - """ - %prog rm "s3://hli-mv-data-science/htang/str/*.csv" - - Remove a bunch of files. - """ - p = OptionParser(rm.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (store,) = args - contents = glob_s3(store) - for c in contents: - rm_s3(c) - - -def worker(work): - c, target, force = work - if force or not op.exists(target): - pull_from_s3(c, target) - - -def cp(args): - """ - %prog cp "s3://hli-mv-data-science/htang/str/*.csv" . - - Copy files to folder. Accepts list of s3 addresses as input. - """ - p = OptionParser(cp.__doc__) - p.add_argument( - "--force", default=False, action="store_true", help="Force overwrite if exists" - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - store, folder = args - force = opts.force - cpus = opts.cpus - if op.exists(store): - contents = [x.strip().split(",") for x in open(store)] - else: - contents = glob_s3(store) - - tasks = [] - for c in contents: - if isinstance(c, str): - oc = op.basename(c) - tc = op.join(folder, oc) - else: - if len(c) == 2: - c, tc = c - else: - (c,) = c - tc = op.basename(c) - tasks.append((c, tc, force)) - - worker_pool = Pool(cpus) - worker_pool.map(worker, tasks) - worker_pool.close() - worker_pool.join() - - -def ls(args): - """ - %prog ls "s3://hli-mv-data-science/htang/str/*.vcf.gz" - - List files with support for wildcards. - """ - p = OptionParser(ls.__doc__) - p.add_argument("--keys", help="List of keys to include") - p.add_argument( - "--recursive", default=False, action="store_true", help="Recursive search" - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (store,) = args - keys = opts.keys - if keys: - keys = SetFile(keys) - print("\n".join(glob_s3(store, keys=keys, recursive=opts.recursive))) - - -def s3ify(address): - if not address.startswith("s3://"): - address = "s3://" + address.lstrip("/") - return address - - -def push_to_s3(s3_store, obj_name): - cmd = "sync" if op.isdir(obj_name) else "cp" - s3address = "{0}/{1}".format(s3_store, obj_name) - s3address = s3ify(s3address) - cmd = "aws s3 {0} {1} {2} --sse".format(cmd, obj_name, s3address) - sh(cmd) - return s3address - - -def pull_from_s3(s3_store, file_name=None, overwrite=True): - is_dir = s3_store.endswith("/") - if is_dir: - s3_store = s3_store.rstrip("/") - file_name = file_name or s3_store.split("/")[-1] - if not op.exists(file_name): - s3_store = s3ify(s3_store) - if overwrite or (not op.exists(file_name)): - cmd = "aws s3 cp {0} {1} --sse".format(s3_store, file_name) - if is_dir: - cmd += " --recursive" - sh(cmd) - return op.abspath(file_name) - - -def sync_from_s3(s3_store, target_dir=None): - s3_store = s3_store.rstrip("/") - s3_store = s3ify(s3_store) - if target_dir is None: - target_dir = op.basename(s3_store) - cmd = "aws s3 sync {}/ {}/".format(s3_store, target_dir) - sh(cmd) - return target_dir - - -def ls_s3(s3_store_obj_name, recursive=False): - s3_store_obj_name = s3ify(s3_store_obj_name) - cmd = "aws s3 ls {0}/".format(s3_store_obj_name) - contents = [] - for row in popen(cmd): - f = row.split()[-1] - f = op.join(s3_store_obj_name, f) - contents.append(f) - - if recursive: - que = [x for x in contents if x.endswith("/")] - while que: - f = que.pop(0).rstrip("/") - contents += ls_s3(f, recursive=True) - - return contents - - -def check_exists_s3(s3_store_obj_name: str, warn=False) -> bool: - """ - Check if s3 object exists. - """ - s3_store_obj_name = s3ify(s3_store_obj_name) - cmd = "aws s3 ls {0} | wc -l".format(s3_store_obj_name) - counts = int(popen(cmd).read()) - exists = counts != 0 - if exists and warn: - logger.debug("{} exists. Skipped.".format(s3_store_obj_name)) - return exists - - -def aws_configure(profile, key, value): - sh("aws configure set profile.{0}.{1} {2}".format(profile, key, value)) - - -def role(args): - """ - %prog role htang - - Change aws role. - """ - ( - src_acct, - src_username, - dst_acct, - dst_role, - ) = "205134639408 htang 114692162163 mvrad-datasci-role".split() - - p = OptionParser(role.__doc__) - p.add_argument("--profile", default="mvrad-datasci-role", help="Profile name") - p.add_argument( - "--device", - default="arn:aws:iam::" + src_acct + ":mfa/" + src_username, - metavar="arn:aws:iam::123456788990:mfa/dudeman", - help="The MFA Device ARN. This value can also be " - "provided via the environment variable 'MFA_DEVICE' or" - " the ~/.aws/credentials variable 'aws_mfa_device'.", - ) - p.add_argument( - "--duration", - type=int, - default=3600, - help="The duration, in seconds, that the temporary " - "credentials should remain valid. Minimum value: " - "900 (15 minutes). Maximum: 129600 (36 hours). " - "Defaults to 43200 (12 hours), or 3600 (one " - "hour) when using '--assume-role'. This value " - "can also be provided via the environment " - "variable 'MFA_STS_DURATION'. ", - ) - p.add_argument( - "--assume-role", - "--assume", - default="arn:aws:iam::" + dst_acct + ":role/" + dst_role, - metavar="arn:aws:iam::123456788990:role/RoleName", - help="The ARN of the AWS IAM Role you would like to " - "assume, if specified. This value can also be provided" - " via the environment variable 'MFA_ASSUME_ROLE'", - ) - p.add_argument( - "--role-session-name", - help="Friendly session name required when using --assume-role", - default=getpass.getuser(), - ) - p.add_argument( - "--force", - help="Refresh credentials even if currently valid.", - action="store_true", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - # Use a config to check the expiration of session token - config = get_config(AWS_CREDS_PATH) - validate(opts, config) - - -def validate(args, config): - """Validate if the config file is properly structured""" - profile = args.profile - if not args.profile: - if os.environ.get("AWS_PROFILE"): - args.profile = os.environ.get("AWS_PROFILE") - else: - args.profile = "default" - - if args.assume_role: - role_msg = "with assumed role: %s" % (args.assume_role,) - elif config.has_option(args.profile, "assumed_role_arn"): - role_msg = "with assumed role: %s" % ( - config.get(args.profile, "assumed_role_arn") - ) - else: - role_msg = "" - logger.info("Validating credentials for profile: %s %s" % (profile, role_msg)) - reup_message = "Obtaining credentials for a new role or profile." - - try: - key_id = config.get(profile, "aws_access_key_id") - access_key = config.get(profile, "aws_secret_access_key") - except NoSectionError: - log_error_and_exit( - "Credentials session '[%s]' is missing. " - "You must add this section to your credentials file " - "along with your long term 'aws_access_key_id' and " - "'aws_secret_access_key'" % (profile,) - ) - except NoOptionError as e: - log_error_and_exit(e) - - # get device from param, env var or config - if not args.device: - if os.environ.get("MFA_DEVICE"): - args.device = os.environ.get("MFA_DEVICE") - elif config.has_option(profile, "aws_mfa_device"): - args.device = config.get(profile, "aws_mfa_device") - else: - log_error_and_exit( - "You must provide --device or MFA_DEVICE or set " - '"aws_mfa_device" in ".aws/credentials"' - ) - - # get assume_role from param or env var - if not args.assume_role: - if os.environ.get("MFA_ASSUME_ROLE"): - args.assume_role = os.environ.get("MFA_ASSUME_ROLE") - elif config.has_option(profile, "assume_role"): - args.assume_role = config.get(profile, "assume_role") - - # get duration from param, env var or set default - if not args.duration: - if os.environ.get("MFA_STS_DURATION"): - args.duration = int(os.environ.get("MFA_STS_DURATION")) - else: - args.duration = 3600 if args.assume_role else 43200 - - # If this is False, only refresh credentials if expired. Otherwise - # always refresh. - force_refresh = False - - # Validate presence of profile-term section - if not config.has_section(profile): - config.add_section(profile) - force_refresh = True - # Validate option integrity of profile section - else: - required_options = [ - "assumed_role", - "aws_access_key_id", - "aws_secret_access_key", - "aws_session_token", - "aws_security_token", - "expiration", - ] - try: - short_term = {} - for option in required_options: - short_term[option] = config.get(profile, option) - except NoOptionError: - logger.warning( - "Your existing credentials are missing or invalid, " - "obtaining new credentials." - ) - force_refresh = True - - try: - current_role = config.get(profile, "assumed_role_arn") - except NoOptionError: - current_role = None - - if args.force: - logger.info("Forcing refresh of credentials.") - force_refresh = True - # There are not credentials for an assumed role, - # but the user is trying to assume one - elif current_role is None and args.assume_role: - logger.info(reup_message) - force_refresh = True - # There are current credentials for a role and - # the role arn being provided is the same. - elif ( - current_role is not None - and args.assume_role - and current_role == args.assume_role - ): - pass - # There are credentials for a current role and the role - # that is attempting to be assumed is different - elif ( - current_role is not None - and args.assume_role - and current_role != args.assume_role - ): - logger.info(reup_message) - force_refresh = True - # There are credentials for a current role and no role arn is - # being supplied - elif current_role is not None and args.assume_role is None: - logger.info(reup_message) - force_refresh = True - - should_refresh = True - - # Unless we're forcing a refresh, check expiration. - if not force_refresh: - exp = datetime.strptime(config.get(profile, "expiration"), "%Y-%m-%d %H:%M:%S") - diff = exp - datetime.utcnow() - if diff.total_seconds() <= 0: - logger.info("Your credentials have expired, renewing.") - else: - should_refresh = False - logger.info( - "Your credentials are still valid for %s seconds" - " they will expire at %s" % (diff.total_seconds(), exp) - ) - - if should_refresh: - get_credentials(profile, args, config) - - -def get_credentials(profile, args, config): - mfa_token = console.input( - "Enter AWS MFA code for device [%s] " - "(renewing for %s seconds): " % (args.device, args.duration) - ) - - boto3.setup_default_session(profile_name="default") - client = boto3.client("sts") - - if args.assume_role: - - logger.info( - "Assuming Role - Profile: %s, Role: %s, Duration: %s", - profile, - args.assume_role, - args.duration, - ) - - try: - print((args.assume_role, args.role_session_name, args.device, mfa_token)) - response = client.assume_role( - RoleArn=args.assume_role, - RoleSessionName=args.role_session_name, - SerialNumber=args.device, - TokenCode=mfa_token, - ) - except ClientError as e: - log_error_and_exit( - "An error occured while calling assume role: {}".format(e) - ) - except ParamValidationError: - log_error_and_exit("Token must be six digits") - - config.set( - profile, - "assumed_role", - "True", - ) - config.set( - profile, - "assumed_role_arn", - args.assume_role, - ) - else: - logger.info( - "Fetching Credentials - Profile: %s, Duration: %s", profile, args.duration - ) - try: - response = client.get_session_token( - DurationSeconds=args.duration, - SerialNumber=args.device, - TokenCode=mfa_token, - ) - except ClientError as e: - log_error_and_exit( - "An error occured while calling assume role: {}".format(e) - ) - except ParamValidationError: - log_error_and_exit("Token must be six digits") - - config.set( - profile, - "assumed_role", - "False", - ) - config.remove_option(profile, "assumed_role_arn") - - # aws_session_token and aws_security_token are both added - # to support boto and boto3 - options = [ - ("aws_access_key_id", "AccessKeyId"), - ("aws_secret_access_key", "SecretAccessKey"), - ("aws_session_token", "SessionToken"), - ("aws_security_token", "SessionToken"), - ] - - for option, value in options: - config.set(profile, option, response["Credentials"][value]) - # Save expiration individiually, so it can be manipulated - config.set( - profile, - "expiration", - response["Credentials"]["Expiration"].strftime("%Y-%m-%d %H:%M:%S"), - ) - with open(AWS_CREDS_PATH, "w") as configfile: - config.write(configfile) - - logger.info( - "Success! Your credentials will expire in %s seconds at: %s" - % (args.duration, response["Credentials"]["Expiration"]) - ) - - -def log_error_and_exit(message): - """Log an error message and exit with error""" - logger.error(message) - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/jcvi/utils/cbook.py b/jcvi/utils/cbook.py deleted file mode 100644 index 20c7c2c5..00000000 --- a/jcvi/utils/cbook.py +++ /dev/null @@ -1,465 +0,0 @@ -""" -Useful recipes from various internet sources (thanks) -mostly decorator patterns -""" - -import os.path as op -import re -import sys - -from collections import defaultdict -from typing import Optional - -from ..apps.base import logger - - -def inspect(item, maxchar=80): - """ - Inspect the attributes of an item. - """ - for i in dir(item): - try: - member = str(getattr(item, i)) - if maxchar and len(member) > maxchar: - member = member[:maxchar] + "..." - except: - member = "[ERROR]" - print("{}: {}".format(i, member), file=sys.stderr) - - -def timeit(func): - """ - - """ - import time - - def timed(*args, **kw): - ts = time.time() - result = func(*args, **kw) - te = time.time() - - msg = "{0}{1} {2:.2f}s".format(func.__name__, args, te - ts) - logger.debug(msg) - - return result - - return timed - - -def depends(func): - """ - Decorator to perform check on infile and outfile. When infile is not present, issue - warning, and when outfile is present, skip function calls. - """ - from jcvi.apps.base import need_update, listify - - infile = "infile" - outfile = "outfile" - - def wrapper(*args, **kwargs): - assert outfile in kwargs, "You need to specify `outfile=` on function call" - if infile in kwargs: - infilename = listify(kwargs[infile]) - for x in infilename: - assert op.exists(x), "The specified infile `{0}` does not exist".format( - x - ) - - outfilename = kwargs[outfile] - if need_update(infilename, outfilename, warn=True): - return func(*args, **kwargs) - - outfilename = listify(outfilename) - - for x in outfilename: - assert op.exists(x), "Something went wrong, `{0}` not found".format(x) - - return outfilename - - return wrapper - - -""" -Functions that make text formatting easier. -""" - - -class Registry(defaultdict): - def __init__(self, *args, **kwargs): - super().__init__(list, *args, **kwargs) - - def iter_tag(self, tag): - for key, ts in self.items(): - if tag in ts: - yield key - - def get_tag(self, tag): - return list(self.iter_tag(tag)) - - def count(self, tag): - return sum(1 for x in self.iter_tag(tag)) - - def update_from(self, filename): - from jcvi.formats.base import DictFile - - d = DictFile(filename) - for k, v in d.items(): - self[k].append(v) - - -class SummaryStats(object): - def __init__(self, a, dtype=None, title=None): - import numpy as np - - self.data = a = np.array(a, dtype=dtype) - self.min = a.min() - self.max = a.max() - self.size = a.size - self.mean = np.mean(a) - self.sd = np.std(a) - self.median = np.median(a) - self.sum = a.sum() - self.title = title - - a.sort() - self.firstq = a[self.size // 4] - self.thirdq = a[self.size * 3 // 4] - self.p1 = a[int(self.size * 0.025)] - self.p2 = a[int(self.size * 0.975)] - - if dtype == "int": - self.mean = int(self.mean) - self.sd = int(self.sd) - self.median = int(self.median) - - def __str__(self): - s = self.title + ": " if self.title else "" - s += "Min={} Max={} N={} Mean={:.2f} SD={:.2f} Median={} Sum={}".format( - self.min, self.max, self.size, self.mean, self.sd, self.median, self.sum - ) - return s - - def todict(self, quartile=False): - d = {"Min": self.min, "Max": self.max, "Mean": self.mean, "Median": self.median} - if quartile: - d.update({"1st Quartile": self.firstq, "3rd Quartile": self.thirdq}) - - return d - - def tofile(self, filename): - fw = open(filename, "w") - for x in self.data: - print(x, file=fw) - fw.close() - logger.debug( - "Array of size {0} written to file `{1}`.".format(self.size, filename) - ) - - -class AutoVivification(dict): - """ - Implementation of perl's autovivification feature. - - Thanks to - """ - - def __getitem__(self, item): - try: - return dict.__getitem__(self, item) - except KeyError: - value = self[item] = type(self)() - return value - - -def enumerate_reversed(sequence): - """ - Perform reverse enumeration, returning an iterator with decrementing - index/position values - - Source: http://stackoverflow.com/questions/529424/traverse-a-list-in-reverse-order-in-python - """ - for index in reversed(range(len(sequence))): - yield index, sequence[index] - - -def percentage(a, b, precision=1, mode: Optional[int] = 0): - """ - >>> percentage(100, 200) - '100 of 200 (50.0%)' - """ - _a, _b = a, b - pct = "{0:.{1}f}%".format(a * 100.0 / b, precision) - a, b = thousands(a), thousands(b) - if mode == 0: - return "{0} of {1} ({2})".format(a, b, pct) - elif mode == 1: - return "{0} ({1})".format(a, pct) - elif mode == 2: - return _a * 100.0 / _b - return pct - - -def thousands(x): - """ - >>> thousands(12345) - '12,345' - """ - import locale - - try: - locale.setlocale(locale.LC_ALL, "en_US.utf8") - except Exception: - locale.setlocale(locale.LC_ALL, "en_US.UTF-8") - finally: - s = "%d" % x - groups = [] - while s and s[-1].isdigit(): - groups.append(s[-3:]) - s = s[:-3] - return s + ",".join(reversed(groups)) - - -SUFFIXES = { - 1000: ["", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb"], - 1024: ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB"], -} - - -def human_size(size, a_kilobyte_is_1024_bytes=False, precision=1, target=None): - """Convert a file size to human-readable form. - - Keyword arguments: - size -- file size in bytes - a_kilobyte_is_1024_bytes -- if True (default), use multiples of 1024 - if False, use multiples of 1000 - - Returns: string - Credit: - - >>> print(human_size(1000000000000, True)) - 931.3GiB - >>> print(human_size(1000000000000)) - 1.0Tb - >>> print(human_size(300)) - 300.0 - """ - if size < 0: - raise ValueError("number must be non-negative") - - multiple = 1024 if a_kilobyte_is_1024_bytes else 1000 - for suffix in SUFFIXES[multiple]: - - if target: - if suffix == target: - break - size /= float(multiple) - else: - if size >= multiple: - size /= float(multiple) - else: - break - - return "{0:.{1}f}{2}".format(size, precision, suffix) - - -def autoscale(bp: int, optimal: int = 6): - """ - Autoscale the basepair length to a more human readable number. - The optimal is the number of ticks we want to see on the axis. - - >>> autoscale(150000000) - 20000000 - >>> autoscale(97352632) - 10000000 - """ - slen = str(bp) - tlen = slen[0:2] if len(slen) > 1 else slen[0] - precision = len(slen) - 2 # how many zeros we need to pad? - bp_len_scaled = int(tlen) # scale bp_len to range (0, 100) - tick_diffs = [(x, abs(bp_len_scaled / x - optimal)) for x in [1, 2, 5, 10]] - best_stride, _ = min(tick_diffs, key=lambda x: x[1]) - - while precision > 0: - best_stride *= 10 - precision -= 1 - - return best_stride - - -def gene_name(st, exclude=("ev",), sep="."): - """ - Helper functions in the BLAST filtering to get rid alternative splicings. - This is ugly, but different annotation groups are inconsistent with respect - to how the alternative splicings are named. Mostly it can be done by removing - the suffix, except for ones in the exclude list. - """ - if any(st.startswith(x) for x in exclude): - sep = None - st = st.split("|")[0] - - if sep and sep in st: - name, suffix = st.rsplit(sep, 1) - else: - name, suffix = st, "" - - # We only want to remove suffix that are isoforms, longer suffix would - # suggest that it is part of the right gene name - if len(suffix) != 1: - name = st - - return name - - -def seqid_parse(seqid, sep=["-"], stdpf=True): - """ - This function tries to parse seqid (1st col in bed files) - return prefix, numeric id, and suffix, for example: - - >>> seqid_parse('chr1_random', stdpf=False) - ('chr', '1', '_random') - >>> seqid_parse('chr1_random', stdpf=True) - ('C', '1', '_random') - >>> seqid_parse('AmTr_v1.0_scaffold00001', ["-"], stdpf=False) - ('AmTr_v1.0_scaffold', '00001', '') - >>> seqid_parse('AmTr_v1.0_scaffold00001') - ('Sca', '00001', '') - >>> seqid_parse('PDK_30s1055861') - ('C', '1055861', '') - >>> seqid_parse('PDK_30s1055861', stdpf=False) - ('PDK_30s', '1055861', '') - >>> seqid_parse("AC235758.1", stdpf=False) - ('AC', '235758.1', '') - """ - seqid = seqid.split(";")[0] - if "mito" in seqid or "chloro" in seqid: - return seqid, "", "" - - numbers = re.findall(r"\d+\.*\d*", seqid) - - if not numbers: - return seqid, "", "" - - id = numbers[-1] - lastnumi = seqid.rfind(id) - suffixi = lastnumi + len(id) - suffix = seqid[suffixi:] - - if sep is None: - sep = [""] - elif type(sep) == str: - sep = [sep] - - prefix = seqid[:lastnumi] - if not stdpf: - sep = "|".join(sep) - atoms = re.split(sep, prefix) - if len(atoms) == 1: - prefix = atoms[0] - else: - prefix = atoms[-2] - prefix = prefix.replace("Chromosome", "Chr") - else: # use standard prefix - if re.findall("chr", prefix, re.I): - prefix = "Chr" - if re.findall("lg", prefix, re.I): - prefix = "LG" - elif re.findall("sca", prefix, re.I): - prefix = "Sca" - elif re.findall("supercontig", prefix, re.I): - prefix = "SCg" - elif re.findall("ctg|contig", prefix, re.I): - prefix = "Ctg" - elif re.findall("BAC", prefix, re.I): - prefix = "BAC" - else: - prefix = "C" - - return prefix, id, suffix - - -def fixChromName(name, orgn="medicago"): - """ - Convert quirky chromosome names encountered in different - release files, which are very project specific, into a more - general format. - - For example, in Medicago - Convert a seqid like - `Mt3.5.1_Chr1` to `chr1` - `Mt3.5_Chr3` to `chr3` - `chr01_pseudomolecule_IMGAG` to `chr1` - - Some examples from Maize - Convert a seqid like - `chromosome:AGPv2:2:1:237068873:1` to `2` - Special cases - `chromosome:AGPv2:mitochondrion:1:569630:1` to `Mt` - `chromosome:AGPv2:chloroplast:1:140384:1` to `Pt` - """ - mtr_pat1 = re.compile(r"Mt[0-9]+\.[0-9]+[.[0-9]+]*_([a-z]+[0-9]+)") - mtr_pat2 = re.compile(r"([A-z0-9]+)_[A-z]+_[A-z]+") - - zmays_pat = re.compile(r"[a-z]+:[A-z0-9]+:([A-z0-9]+):[0-9]+:[0-9]+:[0-9]+") - zmays_sub = {"mitochondrion": "Mt", "chloroplast": "Pt"} - if orgn == "medicago": - for mtr_pat in (mtr_pat1, mtr_pat2): - match = re.search(mtr_pat, name) - if match: - n = match.group(1) - n = n.replace("0", "") - name = re.sub(mtr_pat, n, name) - elif orgn == "maize": - match = re.search(zmays_pat, name) - if match: - n = match.group(1) - name = re.sub(zmays_pat, n, name) - if name in zmays_sub: - name = zmays_sub[name] - - return name - - -def fill(text, delimiter="", width=70): - """ - Wrap text with width per line - """ - texts = [] - for i in range(0, len(text), width): - t = delimiter.join(text[i : i + width]) - texts.append(t) - return "\n".join(texts) - - -def tile(lt, width=70, gap=1): - """ - Pretty print list of items. - """ - from more_itertools import grouper - - max_len = max(len(x) for x in lt) + gap - items_per_line = max(width // max_len, 1) - lt = [x.rjust(max_len) for x in lt] - g = list(grouper(lt, items_per_line, fillvalue="")) - - return "\n".join("".join(x) for x in g) - - -def uniqify(L): - """ - Uniqify a list, maintains order (the first occurrence will be kept). - """ - seen = set() - nL = [] - for a in L: - if a in seen: - continue - nL.append(a) - seen.add(a) - - return nL - - -if __name__ == "__main__": - import doctest - - doctest.testmod() diff --git a/jcvi/utils/console.py b/jcvi/utils/console.py deleted file mode 100644 index 77a52cf2..00000000 --- a/jcvi/utils/console.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# -# console.py -# utils -# -# Created by Haibao Tang on 01/09/21 -# Copyright © 2021 Haibao Tang. All rights reserved. -# - -""" -We create a singleton console instance at the module level or as an attribute -of your top-level object. -""" - -from rich.console import Console - -console = Console() -printf = console.print diff --git a/jcvi/utils/data/Airswing.ttf b/jcvi/utils/data/Airswing.ttf deleted file mode 100755 index bb7d3dc0eb7858cfa7fd766e7ac0be35e2a64f80..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16912 zcmeHud7K!jK>W;_^d9K*Wfmf`FianW^9RR8`OP1l0HZ==;1sUd`00 zTXm~$opaB*=X~$ET_lJo0AeFrdirT6+MfTemq_|Ct`47kT5;yJ?>XmABJoa~pMBY}|gylKW2n2$Ab$FyPuUx?$t1yJw<(0Ul`97F=+T zEIb3}3viy7II+h*WV;#XGD&{#yU!&z6r8><&&um<*2Qm z5{o|ZfB&5jvp*(;i2cnCl_G{e5ewJX^NAV`i;xI&(dalqX3(tk?>bRy^=odh~W zi}l~Y2A6&Yd2}-9d|ILZnikM0peN92`cYa)r-Lq{GeA$IGxcB5Npu$I zVj9t(rX{orbSbR{T}Es4U(#|~3wkoG*Po&lbT;TIv;p)~I!FHnokr(^o=zL}C+Q5@ z1bQZI20e?m=ugl}+6p>C=Yg)GZTjQ1nznEmd+D2D_ zZl||{?x1(*Kc)-lYS5i@js7Eg3tbDki{1%(A-zlgAzei82ECZx1G<~utN(!Z(EC6y zq3iTV=u&z==wA8&=wFq`jq|vy^}r-`Y!q`=sxWejj~^z5@DTx&!nh^i_R~K1yE$ z{TSV;-%Hoi0ni)hF3^wDLH!=Ok-hLfBpm)<#px>ll>R+R8(XT+iO-J>w(joda=y&KD(0k}N`WIpx>vz=#Ye7 z1^ofNrr$|f<7(kKz}9t`ptAy1VDc+RM2NcQ2!kLMub3r zD>VJH^sI=0{!T29`rfUsDGNC7fqml6mifOM6>=WdQr51z9bU*C+SZj z1^Ti`gT5j%`X}hmA`AK#k<)LYSH%p_*F-Dm>!MA+k^U;$L63A z?QREda@V=ssJ6R3_;Y(aHXB!>;P6 zsPq_>c;5RsKdE%uFdTlnv5#)l? z>r}j|;`Kd`$~ychb#-<-S96~7P z=lz88g zYd*i4MI~>zfg6u-!R@yy@w__QO>TF<9|&OgJM2DJG=N*sz5YPd7YNQqrBUVgI~>PX z;`!iWL%3H7del&( z8qzL6Wyq{_yrB}$YgZ|xC?Vc(pVJxi#KS0fLrO5*6bwhMnyPd;j<3Y?kq`3oVctUF zunW&OYr$~1F06#K=8zWs;NPpn^U<5V z@S9ZP`Pcz|KEj(8iMU*0B^eDzBEE=MizdR+`U6vyZs+lpcpe)gY3w3wA@(MmF#vp= z9PrWvq_x^Y>^S+SHyd!hgPg#TZs11+xUmj+(GM@A0w0Ee3pL=uC~#ms@LwZvUwqoT zl0bQBpt~$kT^?wz6)3J9=&ci|tq8PM21@G!I_m){>jfH{0~BV%5iSbvl<=1TuGxzS zr{h`z$ZG?a^WB2@mh9#$|HijP4sLCic^5E4PB4zB0MLGSWC%932_yLpwfh2an+PeIsb$ zJgWs9jZ4LG+~n_A0R1&zs(5!@A+uN4~Q#QpfMcM$ws4(lj$V*9N|t5bu2k;qmo15_@F zleXN2CS50O*|5KI>dM(_?_K2556vFwQj==7OHB`pJrhS>f8BQTUsny=TJdgGe@(tj zPGRm-pmmkBC>ARv?~HsVtvEzSdnR2U^3sgFoTv|Vw4>$8*Yo>DaeSa)ZDbuMc_`NM zP-GtZMQB#uYPDgX{xj%;_)5=c9jp7|-WaGNM$fl>G&-Vv`H`T(Y@77LEA5ipyz}ktcD+@EKyk)C43Y{b{CN|nW4V`q-`br9{REkQIrG<`suBAx}hk%$^ zE1{fdI-ck0+Q6A5K@$~fB`B~2BIN+T5m*;`yE|uxB&t)jWGO=(rYJpGLr`hlG_Aq$ z!z|Rj6IWcew1Mra0a7hwY2D&=!?@bp`a(s|8Y!2YxPHENUMAYPN=U0ZqnY!xjfd8F zilJd-;~?t{{de-HJRADxz!*!3&dy4K+FDb|NU*InmrW%t-Q=>i=34z=cvV$oifawB zNo94`irV>)l_gR+w#e!*lSgZHcg0f84G3rGZsPOp1BOk0p;n2J^P6E{RV9k~bI<|Y z5iDa~e^TBkuY^9cbZ%va$mJ^e<|vP`mS$6n0lx(6hf*CXluFGiRV{J2$dVSL@L|){ zIw@$bR?wWO(`lkWq4JA;Yboy6@Fl1c}n~c zS0**5U_>=w93}Om@_M-pe6XRC73p*(Lyb`hd%*8;!aJivIECc*2Yfz%ASI%cMtW_* zHRd|z8ksVP26HS1|DmBm%%Yu`VxOk zPW+|MQwplZPBq9QZlmc91hmEeVk7G)iZQp^_8I6XL#q&Hve`-wj?X4%(uw-%J}~ol zd|<683T10W$+9Sa)Bnka|Lcy1H8uLy_Q9dHw!s0}<#W}u)4NwlO+yiv7!$8o?P~qX zj#zrq*C|Cch`tU^T$X|?a}kRI@-vVnNGmHXA{45G75EFA98?37-rVVO2K+*{^kRg} zDU}q=;bbXArckXUAw!bm#!Hu)zt!UVy0J02xytCVr=>5YFw{vx2K&cCk)H)$Nu!Tb zsY+T&1emczEHsU?dJ(G{4B~hVUlwCkmlmT`EoZ4W;Vhb0!)178zla(G0DkT>)(Gyp zPq-q3+r>u=-z+DqJYSR=Kc~R95BZiev4yF-iU|2p)|*sbRO|C z$ygy0>Jrlh|YEM9Jfo$yW#r?J$U`?l0;)XT3Rpc9xEKociMG{gF z{>n3je~N%z@K@Isd>H6H40NiBhpA_+x=cgrRblqgNQ-b9U;w_We++sV+9p1H`4U7I z+ur1{+iWv~s-=%~taG)&S0f4g5b#-=7FS{-lc{9W@uqMv-Bj;$I#T|0Jqe%H`7m!_ zGVbx(5sJZH%T9)^$LQ@`I%xtgIm);!+AN}>C}5ch(7J4oYCBSm#kCVn3-Y1V_Q;ka z>jNu_rCgVD-Tj+w%HClO!0fOz`{E^0r`P9zY&K(abSYecXHCgx|g0vP6RLCI3Tn1&|H?ZN_+audVH zP_R@ChM3RIk?pC*!a5QQ@z$`z9Ukr@6KK&YkM;c42ln%-cX55wLF#*Ir;m+2Jgu%juM9 zAiYDPx71ZOlo5$KWWm}fukKh?ZUX;UONlgWO*NHPcdRZqq<3hVknInu%M)AuTEQ0- zK$zWGKIsg3FmYJ8dxV;vIXJ#aw=os*dkXXO$X~CM2r@ z((#`s#U?lG_gnCAh$u*0$o?B81o>d!|>atz_Dno-Vu$)B9Uk^8&O12>2kz@NT80Ju#S zKsdBUDMb1QA_Zm5_+u%tX`Trnu!M@Wu9u{UJP)(gm6fE>v`VDT&bdUN-{DmhuOLiO zWm)nH+zeYYrl-TE+$^&t>!gE}NUI$nni0eU&b%eeBox)`y-`d;STLu( z$1vpXUU9b-swR$2!>0~7(XSgh-sePDvFj~UQLn#DR zB?GgD?ppPy#S^v4<&1bs>evG&xNbNU^b~zsT~cuF0gy><1HR@g z9sAjExS|nO15}T}83)`+KWj0@Tv;Q;~>rBKz4?BdWtz;m)th?Eh7kU z7J+@}H{?Ka{Jp}Z^-GEve`wEQctfMFn!wLB(_&Q8j8TYC4Uy`o<|sQEW+zAO8)KA6 z!=uBe!|P9Ds9%&#uOE`HHiJ~Lq=w37kV-d{S4)C@I)ap(8h@o9s&_~FrZ`2MTD(W< zV%>Uy4T|75WnhCe<|n?U#&9sz81p3&r&3sgPg$1z=GXwRqP7c-iAb;6Z%K>CJPX16 z=h2}?aFWv(aX%s(b96yI$b6c}U`1g4kttyC``wEf7dT>zcT3%3{AI=k(wT-B1}uQx z8GYV{KJSo|h_xqI8V!8Y9FK*Nd`dP4{a$JgvXqk(#ev}}w4ClWgKq|MOdN^MFN!p5 z!Qqs<*uQW{Z4q5SIvfs zu|5PBt$*yfrSs=4op|=S)9nJt<~b2vI&a=GQ;z7wTji@DM-wr=iN`C=O%6myE$meU zMTFNf$#6!@=<3~zW!0wGCvHfIYh1~b47-sjfj)m-&LA(|3WW0?m0<(n%r2MU z9c>jH5!KpKjrB1#;6u*HCMh15JzZ%F+voJnu|+%D5$bH2bh39grKU{G1piShE6Ind zt~m5hZPPWEdNZ^%tEW3(E{Jr>S@sD8xN@`Tn%Nue$`4kNYt)w)BIF@e=9M;rGU3~rJ<9DQ`lO}cqk3>Zv;%!-Xg|s3){`fFo zb&8RujN6?l2xo8B?aCAno^i2|&T6l3T77q0K=nL1dbJQEZPDh{-xTcu)eSE@J%{=M z&JiEPLPCP-+UIKcr9vLRCdgyOKaW-UJXX&0Sl!NJ1v-ya;ygWr{m}NGvF4D+FE;Wdy#-6(X)H_Bp=~!7GV0n7kUMaI9FLesFOKav591gvn&oDc zn~S9~qkEWrxgYrvV?q@TIXV4`?!}l$XQvWEoOiv1Q>{SCLaZ%vsuiOfdDzNa&chxS z%O(@A-Ba1t@rT3|o?ctcyiU)%p2hugfJ)%48G8x5X2D+;cb-3=wc^1bg zuJL&SM?a3`$B}@~;ihQuAO@WiodDscPYvzULM(DAWd@R23>^lN7?0DEtxGR6k)xSw zHIU=9Wb3GjjYg5on@Y4wxzxn6~wAmfEbOF9CphJVz7c3TfqS$bU+YVwNK)`=~*um4 z2##Ngnu5y^T!!E>1eYPW48dgxE<G&zyzdjueKQon&KH0RmG`L7$C$O9bdu(lP zoU`?DbYQC^9D~+*J??9?u4SflU8CbcQ5`i#3LR@BM)ujR_R8xfHH$?mQ?oph#K>5a zm28&8Ra6Pt~TkT$%%!cJ=%M0;umUWhA*3HXy`aanRJJ>`Dt*= z7RG5S=3Ibt8#uQy(}~ zCN+rL`KtesQaPJunAwiWODHcFL_WuOGjG9B6TnC9Lp%r8hW9GA&G)Z!cI8S%o=Eak zJ(OP{j&i$US+0=7OP|8N+gjVYzAy%OQs45W{kaVL8OG9Aa1wF)W7|mO~87 zA%^9uF)RmZa)@C$BZk@F8YRdiq3OAhtLhqiOt{elc-8}W)&qFf19;X0c-8}W)&qFf z19;X0c-B*eXFY&tJ!rS53eS1~&w31aW<%5S9>-K5TdsDgb^33uqS@`V8eQP^7G19TR z)RfwaOpFM_Jeta@ko<;q+!_*gPpDJ#Z%t&ook)!beFZ*QE-frA>I`gIVq|!bH(9iI z^Rn|#te!2ISh~Lbl(uCdw^Yt;P7f|02)k{DKd;Su*I;0dw2XbLv5KRq8J`S4pMWkC zRb3{a%LH_pfG!izWdgcPK$i*VG67vCpvy#6mkH=H0bM2xT_*4f4t0z_I0W)Ei9?)$ zff@fa7=z8k9mo0k>|}}&$vI9kVj_*{PmeYExz=(z(pQYMx%EpEGjA>}IxcO#QJhuc zrQ`V%*C(ay)GUEPak8Ow7P<=dU7I(**|hH>c+6n0#_=MUXXD^`1Uvr>&eIT(=Xczn zannfSFw!`TG!7$;!${*W(m0GX4kL}jNaHZlc-2VbFw!`TG+s5*IE*xI7%4Z|fu0m4oFNc9B)x+93y*!yKnMRii+th!M=;FVm{|YmXM8u}{Q$4rZ@i z;u{YO$)OEz!y-Nwi^pD&Hc$K{S*Q7mSiNsB=cw|m)Wmf>CPhtNJ#Fea2u(8&L4GfH zW&rQxoHtKhl4Ub#CifaC^DclJ@hCf1EB6x$erwE1Itw!&EnS#A_5$8Bl`l7BMZ1g< zs`0iSi7I|6PeeH0RwpFYxGPNTlr|P^4Pi|V5EjZDA3|O7H3+=y_=RL<#5wR{98?!| zk?b_H_fzck;m27fZVJORQoz74eAR~t7V6?&=||oX%j76A zV^NFqq}g%aDFnlr@JRrZ4(1n3G?@#|oXR&4jMRlrw36XoG5+B0#W4p1n8mwI-A(1g z!J2`)*nOq2v*EDb;`n_t^IZoH&F}-eVF(&Zb1!mM&o9ydHOPB+F`hx{l%cBhns3lZ zZ!4w9P%L(Gz({Wo_&N?}loF@FXL{0yrS4_(j1)JR753AO6!-k4&yDqC{V>uuX7V_d z>uxdai+_GySH*}tXXcrh13t^tF&1UOSdshCurX$c)q54xTb@0ZNBY_ZV>>*R)xJ+o zK(!|&T@!m#V$`nKR2l0C+;!FDHlQBjWo;h&!zM3b{3x5dhY>ydQhU+xXJPoWF#K5< z{wxfC7KT3y!=HuW&%*F$VfeFf)t`kiAj9xy;i^9i!=Hr>e>RmF01PVOhvDo$TQC&( z=(GH|Z<=b-sa(E{Ur^5KEo1Q^5-np@z=S;MWZr_oaS=_MLAoo8^<`r?lv9Hu-xZZs zwUo1cPEq1~cArz)C7msG#YM@uv=1^Ac@Qe5UHp?mRlv9Zbcf^nNww&9+ySY{PGOH|8TD;vV+i=h4tg@Zj#hq5!fv>rrhUE#H8_o8Km#i}Qlsc?3_>@*yW$-Dj zv&!I8`m9w3pVIwS8GOpkvA6;MxWF-jQo};)_SV zFM)3o@Leg*Mc!{0zG2*h;||=Dz~70uV*}dQW?UP^cbcQ9xftggaPL2Ak-uU1$8G-a z-1Cp$#NSrjhJn-zI@9>tq742QqCdLv?J3tx{c$~ihjILykCS%M=#J4{8!j5%m^kOs z#KH@9Ty(+aT^n|88QqoWF7(dby6eI{TX$?uBu>N!@!PiU7(I1#^TpdX>^lCfE? zF@y!4xG{eT1f zrH%|=@qhB2Pd{e5gZQ>m7B+l8eS%?^C*oV_#rQ^gDeQAOzKLFeZ=p}cH_)f!+vhX! h&GSlp>%0nIHm#wxw&iCmT`HY;x?}#Sg;J?Z{{zK84!8gS diff --git a/jcvi/utils/data/Collegia.ttf b/jcvi/utils/data/Collegia.ttf deleted file mode 100755 index 3245a69685dd09449c49a6c3c3c017a56627f6b7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 103940 zcmeFa2fSTXwdg(P?!EWk&OW{O^pH+M2?UZ*6G|Gr2Sq?px}ty-E7w91MHGP$QQ)c+ zFDfdaC<1z=*vnNcSI&O_vDV(%M{=*e?|b=vzxVy#+sT+~?zPvNW%N1bSnDXQl!{Q; zRB6S*2Oav#8P3;~zU~&KEGMj4cj(H%H$Q!|QjU2_>F_~^4vyS$@^3#z{dVr!bokN( zHf(X6vQsJN!AjX*I{uW+r_UTzo2yg@zdH^&{@k-mwzc|FrTYKC^|>vlZ#~7a@k5hJ z_5O`E&f0qN`CD#3`2!o38VD)9;lXVuoU+rF>sYCje@Lm+&TS`dKH+Ei3y$U5KJK5` z#tFyotqb^m0^e)fPC0w$lMhw88LNv4jGc7inWsk1jvS!$<*#w>n3GRCe)II!n`%m5 z`Xt|Tr)=JNy7emG;oN^Y_m@uHe9DQh)z1B_(pS-^{e7pOcGlTv)W3b3((iqq>mND& z%o9%^n*Q>OjQ@15f2R^$0sfnh^B)_W_eYhs(vMPKNx!hvI9_q2Zrm|*t@RUDaHQPI zB4_z? zrfNCg?wnEn@)q&}vvS3~?ZWC`=V#5i8#`#H*^f5vaua_>pB<5LNKujuFa z-aIn)n7&gvEg4m|c$C-jl=A6ctB`()_HR>BeW2PbV`e`6mX&HAeJUMP`{{S9oW2*_ z97$IF^Xe;%|6d!g=&RK*^u+Pu_o`8Sk1FUKb3Tmm zr0DAretVyaS&mQ{{T}+g3j7_Rvif}5jj9bC^UOc1XTaUb{PwV_TCB`(0vzW#ehb_N z)e`+-wZ!uK#_uha#vd(C<`Sd7g34Gfpv}h`ZwUUB(C+od9eNx00C4FMmfxtX<#V+A zXEk75$G9$KJOKDNK;O}r(cgrLegRDYEfV^lz>MlO==3Y|dh4UA%zXRwCE)0TjOP(` z2)MpjFJ}%1s1@MpOua&B@b@(5y!y_@pLMwLC-Cot=5d1_8n=g73A}3- zTuNQ&`Ce%7Zs2P3sJ^5s>On;4%Z(qZuQZ;~|H}M7Nt?6bV|>4l^RwY<)nj>&I@Ew` zp@H!5r<8}I@bi@ANOhq86=Pb>e3UxevIAVc(fGXO`R28kHNI=9sRG~QGIvXo`NRO3 zgUqoT9=uKkke{F>!aR2~N3M~hfXIM)t?@_THR@&KKJK-yr>!O)1Q!NA1V_SGts}HH zWK8&6WJ~zFd8Dl>G>(bP2;a7jLObbGWKiS)%K4XSt(PvU&c<(5k5TrjF6w=%yYU<3vxjm(^->P1zQ(WN$bQOU zH9$F{1{=RpqiTq9Obs`Fsm9d^<%AlgoK$0tU!cjQoL1wMbJRrR=W4E+q(puw=d0<) z&(s1nhjO8s3!g7i^C%aq`IP&q1&vqL619+Wsaiz2Of7D_qLv%w3bh~gm1;@jWwlBz zrQBaFYy4ELR?8{Zs1=k4sFjU>R|l$9ln1H(DGyew8$VHN)f&n})B%kjt3%a+lZOSwTE(s)sAREJU?q1I6zsSazrppH_9Qy#6>H-4m!Q5z_aRU0Wc zsUsTCtIg_2%Hxdkcy$zd=LB^$<%#MT$}Q^H#t+n1wTW_@+T3_fZCA%p?oh{5o}^A_ zd|#ccPNY0VZJ|6>ZEbu{ou;-?o^F(9sO^ns)tPDsa#xv?{bu#5S>J-Xz)v1lA z)p_bP%AM+T%JbD3jqj?rs52?ws?KVBM_r)KrhJ=GUZ~D#d|O?l&ZWFqok#h0wX^Y* zdWSln@)GqH%6F=_Hom3ar7obnRK2b7O?8>Nkn(bM5#<%?;>I`BmFn%3R~hBi>K%SoUsuK65W%70NeP<~9^ z*!ZgYxVnk*6YA#1SJWrf2PkhO+)ws1H+qN`0j9h`LjKl=9Q+7Rt}4 ze`!3d?ouD4{H*$T;~{mo`UK_Y)F&xFuWoI8S>2;Yl~}>Jjw?%70b&Hg=(@ zrTnV8kMe8k{>J_4QMHTmG4%lD<7#)~KJ|q9BIVcBgOvZKzSOu^J&CouSA9c0)cAt> zrh1t2Tj~+Yr|@d-QQtPo?_dq@QQuWxZG2umt-eP2jCz#vS@l@sbLxBQamw$jCmMIF z=hW9Jf1v)2@`vil#%J*pzd`vU^-anb)VCUUsTYm%CG`~bAFFRSKBIo3zC-!%>bs3k ztDmZ;DPLC4P`;v`ZQQ9|Ro|n0O?{v8XX?4er_|5Y4=8`3e%QD}{Zc(o`75LRwfa%x zcJ&+e0_AVjisD4WMx_Y_sNxY<2DF3WprThyv z^b_i@M){xWXVm|ue%|=FdPDt!az_1y-^^K!7*tS+a;NmJ4?S2(0&J~A3^wrDxO=i{_9LjF7L z7N^5ucN%xeZSC>62qYrZc48d+w9*@Jo2RAyHn@tLK!)~$L+%Bt|rezo=L8LH>qroJC54YRlkpbGJ6Y0tB6Eg6VF;Zki zkjO~EhsSN;gRwAd2b|UZz&7AaP+=5Kr`zdr%73@h>fz|71(^dE$dti{)5&?-pdW`p zJlGn%F$D18_X|Gw!C~OTZZL_f47!x4 z4Y$+dVI-(HW+3>W6{c*nx!sIgcpEljN+8GO=AYotOE=uYX{U=>(-AC?ZWk&*_|3qF;LezoC`ynIcQ`yoU*JP#$<0om z;KS>J_6|qb=W_)FA2LYp03RXH-NFZha|;s-KAaA}*XCnjG7Zp7VSM)BL-50h`8Yft zuiFb!41D-J9=Fdbcgb9YZ%ljyj5ES9v+%(Lz(?4?haHjddLgO4`frQ zh|KbFyMrx!2v_hi_=BdvM>OhIXfQ;=PmfNgSGd&!&D}0=EW`JDnTLmOB2l2pE1kiC z9uH_1{CYih19!p#HW!MLdqrAAOwk=qugB^3KsdWIK;iJAK)_Tq5b%UVie21Ai&UbZ zn=9c8{xNwW3ZsWvg8_ShfqCg$IyP94wgn4<4@P5t+)kg*@AWfgk2mPG2faQ|Kq$%_ zU>R-`e0V$|P8nj&1mt53fWAC#6_0xaA1;XHhs-V?ly&+Ty)YibxBKAhpcg!&hJ1F~ z^m}|>pBF~-2Eh#2^?9K`_!kyH^%%?uFS=cR@WIS{UYFYo`#D@8E(Ix0c*GSC1-%i$ zhoFMtb8FlN1-#rT!s?(8c+rL!yPct+JxG&29|LhSTjsz_gi}DKA=6TokKZ5g1q3O+ zkl!An8nk)6G6z$RL|6NK5l$Fn^UxCHvqMB5|w?WIcOpP8-b{R&+YZOFznul z-{*9Od>(kjor;8gF=0RtR>tr1VYrgu#_NYgz`DmtAEFacr5;x#;)sBGKOY1_3O*(R zP+-EtVALps!B8L+2n2nBXwVT3g!~bk+yxO0zLBf_{GT_Dsdx=K$Iq8Gm z?0%o!>yF2qG58@E48W@X0F3DY0m3Pe03J4QAw>6v!;xSFr1(SekTVtx2V-^`4+gnQ z_(t?uAduulkOml+ai_y>{uuD7d@%?S*~7i=dD1RTzAILthPK6pJC zc7mp8FvLH|P&NdW3m>F8c5UeDgn0R#LBA@MLkbP% zkND#;7m^miDtJRE6I$_yLQZES67eO%AwMz`j<~pu-a=tmG8|5Zf`Z>@$n9gwemDH% z@H^rVF~E!he&HWx7GW?UcR1+wBO`8iEXZ7azH%xN%KA|;0dPcvPM5z3ZUSLM2H_02 z>BATFJ3>K6z?(|BQs|aQBpgOe8BzNKI-GDo^W%%f;*kVM2}M&eS1J;ZBpqNU67dJa zfp7r!bRv9NP8ehh(i|Trk|jTe1FCG`!|jg-5^=#t)QRc~oA?L_KBCd6KN;qF8HfuU z$099!q(UJBA7Qs2c@=yh;w^l5g8rC^k7(Ez40|FVIW72z2YqfdPdS|o=UVuHT;QYR zKop?82y?(40Ux0zKGNU=agRnLk$^wKh>Q=4L^K=}2OB{HBj6(uO)}GICzy*zJ>FzE;9)|wTsm3`;t7Pn5hHfFgEc4Uh#?UC3wdHulwr^r2{}Xle9oOk zGUM?Wq8nvIq5y(^ARzujm`W%JQbWOHG8Iq9qidr#GQ6>7i64pLNqsN_aKyi$>znL>Oc2Nu^SuTmmKx zhY|^QC>~BH60t-q97!e$@mK_^q~qR5Gzb@jqi9QaCI$hSQ8b(mhmrSCDjtj`0*R8oD-F($coT7yVb~Rmx+1}H$x{ULsZ=5v z2`3T>rp`z70xpcp1>5j}E280SHkZn$Qt4Q_l<^i**<{g`NF-CqFnp81*@DZG$y$<+ z=w0S0AGbRRQ$^f~uo~!3Dl~dH6D{UFXsQf;OelpiNyNDZ1CYsN!o^fFnuKuBKCde?At0 zN5cKpVzN{45sQ1#4@s{l-0zM=xiT1Kq%nUw84V_)?nK-j4OJ`NN+96RWK!v9G?mU6 z^N)yLh=|pV2`cCn-I321GQ~_Lm&jIgzDlN$uDH|Okd7o%v2-jZb}60iOf#!!ILf#T zEPCB(m@4W?Mb*ey8hrR7`8YH{MsgUEa3+w9rxRS0_WAPpe5{g5$FYx@tS^#^6|>n? zHWiO&GqrRo4tDeDKr9h~C1VMXJLWDYcRXD0@O7Xea=C0a9)ks# zJrjut9-z1<3CkcOd}5_iIbUJSseHZYujNbG4tJItawP^g-U z1+vlTM0ZDaAdY90O#A)0Y|iJ8k9p#WR6c@zV5Gr9HW|qzJei~?8SU=&cLzejLLr|| zCUS*>$UjO!Fd(jo^q5RY5wBEg#g1aJoGtZ~16{>xq00lya7PvtrC?9Lzfc(9L?IbV z#?pBMi+->CnDpn8YR+^)dA-3zIn~(#!^g{5$ygzrN#!zJllS|}<#MvCm`@dxiDJ>8 z$R(?VVz!t~rHh50JUTHJt>i;VcqtW1WxT$mw8}NQ%k@%^w^%HdOQ~E@iPcOKq8Tkx3{l8P_K8FJ4d_2Bb~h+ z!~R;WR`1A`YlT{&Al9T-o33?qbmUUGRIw^eWP|={p<2iVYdN)gOYV7Pl~7fE^_bwU z7%!HC!9rlDQm&LxH|0Wqp^(YNbG`L!r8C)GjTNW6JLB=eYCc-eX4g!O)feTFj#4$k zMC;*5epN7EtaPQ)g>)fRigk6AGPOdWRtc0cbEYCw@npQGr>m=2=!qCu2?`UuDKu6!4f#_uKaQ9@OtE;=GyV%hsd?OmTyL(YLAK~(9r!-NB1m(w4 zq^l_V%A^noSa-9-ET0_TsObve0F09yEmQ<%fB2CgFmV-DVmOx=s0c!bflFtyxqJam ztw7FtCqwG(>mL{#8Xg%P8=sh*nw~Rv-uwj%7cJgz$8X->~tBBab@zm}56>R-d`+vv+^)^Y?t=-uv#~^}y~gKKP|CKlJb;|N51$e(ll6 z9)IHN|Mui@-#Gq+6Sr*L_RViSwSC98PdZtB=etjz^31c}`~Gu3IQ6t2o_@xeXFdO; zvtM}eoO92!{`e>VzVrOIy!C>&U3k$?Uw-A)i~shHOO(3wvdgcy^0lA+{1?Cc)vtf^ z+u!|O{o#+V|LLl$-~FCz-uu35ue<*J*`#;qGwJ1R z^ztaZe1BFi(Ne0EgDa$$={NNfuB4m2oM7~#8V&ZORpT#>-#32U_(kJYcG6YjutvRc zXJby|!;R}2r!_ieexlSHUjtUZG4e+Gjr2?ErL89MUt0Up5ihNJY2Hf*zj)V+_rCD- z3(vjqg#7ZtXJ5GCg)?3_?MJ`-VV1ALI>*WR)`zU3y*}E-g+TkmygfS5{ywWV)8fYF zxz{8N0u?vZsA6 zd$ONkFZN9K41dmU;7{0dy^B5Y&$0t}H~WIWV8`!Q>euZ4-NdfzAK3Z(Jv)k@XMgr& zcKH6tUf(_J1%H%%^n2NZy^sCY``K+hoxRov*=hX}d#ew#v-${osW-Ee`Ze}ZA7#(< zG4>aMJie11@&C)0|CcZScjQY%LG`WPrQ8Q^_?*`7*|d+0_Dc5Z%-@RyKc2D)hjT`6Cj!oL(7B0VXBQ5M`w!qOiHTVV>+51+P z?zYws-mrGV-4`#--o0q)M*N}Dvb!HXc*ETfFU^)WZsaP*o{=)*w{1@~$LwUxjvjt@ znXMeU;qFD*yVb@kuaq_q-B2yxeeso7X0K!(&F{O^!*4pF)tgT&YMuZSv;iiT?a~(? zOk*6X<*b~jmaApPxp66dyZcrjx?ve3EpO}>ocE%8G#1*@OA?=ze^eggFhs6Zk?aD| zsIK0J$i*Cc(-}KiSF^7{acR=4W}kY9hNnr$s_kir-W;^$?4@fHH{_C?f~F!b2;HT~ zLrarK`XGAhgJh*yBoA2MXDu-zr#cwa4MGIK6{3Ns0QpYc`aE@xHjY+~?&oE4%Qnar z*#~IXv0&qFnsX?tbZuNYG&~wEhdV~Y)rI=JnaBS6S8M5SFDm!%GsA7ZvP9k$W*D zUevr71@2Y;qXo}i_K+gejFs{0=9ukInR5#>y7->)+hzQ0SufbwoRCXSxMV`RRN#3= z;e<#+s7_!KSOKgDwgTq?mjO2dw*mJ9j{*Lpnai_OUI8pe4-H4HRqI&isKZ*RkB?1^ zjwIqS$Gz5*t=65^=~91Rx!6BoIe2E9z9};tedCOQQn`PiR2&3Vf-C(!eSqa-WE3XU z%ez#PIavn{Tv@>tEM?-#3a+f+$_lQm;K~ZFtl-KDuB_n73a+f+$_lQm;L6G8^>6N}*8s%0x#f z!cl+k$ZBz5uuwVbXgvS)V4-B-%hC9qWr^iOU(aZN_^O)l(*X5ZF2X+00`Z2Xnc5JS%I;!xNch@&{+*&`TzWV6GTW3C8xV7-{&+|FznxmSBUtDty z!$S#lHXhP%B}czYAHEx>G-eFB6LCg@gmxupSAupWXjg)EC1_WIb|q+6f_5cnSAupW zXjg)Er55c<(5__AuEYdHZscfTLNSh}{D^{ysS?s4%`IX4(tLR2mA=@WV||F z?;KBzj!aIBk97`c{=x&n=t1HLOPA^?(k8kf7?#O1+^|cc$bL`~O{?>Aq<+E^P zo{ZZuHP5noH(JJ|X-3U8Xgi2X7q;}F|C?IU2HtHgyxYLL4ZPdHyA8bCz`G5++rYaG zyxYLL4ZPdHyA8bCOoHD>4)LP^xJp9R3~ZBuZ8ES;2DZt-HW}C^1KVU^n+$A|fo(F% zE86WDDlY>DrX7qq0n~vBU^#F&umw05xD>bnxD~h$coZ;nL^nkWvZiev)j0yog56p zD2&qk6k3Mk0%55HilGarv=qc_gXSXVSr9)>rglpo44XD#7#1AHCJZz9VQj)MHenc> zFpNzY#wHA76Na$~!`OsjY{IZP-)E`30+{nvYS^lzoj+?V}QAq(^z!b0& z*Z^z;b^@0JHvzW;yMV_5bG8XJPP;?H_DW|{dnPeBPKP$WkB*q%CP_|mc1}!2Cpwdk zsKM8xBjaNl{x*((Tt(!nuxkH8B2kz*Y5zjFxT3;#O}v?uUvnmTPCuvd^)g}rvEplLm8FjlR_STz`{2BXv< zb`8d=!B{mIs|I7$V5}O9RfDl=Fjfu5s)1@aXOC8zZO-;`jHZ!v;hzX}uA6kOL+5&n z&UNTqht75AT!+qe=v;@+b?97&&UNTqht75AT!+qeIv*M?SL>aQYDLWBWL*f2l{6{q zOe81JRxxM7I@VNIlM_*{=_DaRsB1ardP`>Y8Qz-I?(vi%zWc9JabQmsqg2XV8D(Atn8pp8jgZ z>5r9WK9ZOy4di0E`WnMd;kT&IKv%CibvMon-mht&#EvHJg5o}s=|Y+@R*|W43(Dw@hvU-f*%=6r&zK{w7^O0cI;Sp?a_T7|KSfm-uKm?ykd@}OTSAm!8Z@X z@|a>{2HYtgzE?2oo#iq4<%Hifz~Y~J5h`)ITQ!nlo9E=INb^@&YnZiQvj&-x&Q53N zq;ovkIhmYHIw#|=PA#w2mQNmeNb@|j$`p^4!AAWfED(_m~{$1$F*k0^jT>|0#@x6~zI>L-D}cz1dLA~YPE3j~)6^6ft?!QII{L~T z$=Jr)nC{J&`|}y3S2M8(bYADdaka_OvXt^P`9iqL4Jd}~q}fRnz<6S=9+`~SU%GfQ zvQ&(N`3vsQa?U9k=uXM zhgrVG{Jh{xaOFnowAF#&VPH`Mdgz{vb!#kquQRwMW?A;e%Lj#qYGam;{G8F6$e@L% ztNx&0vD}6Xmen6kk0eh+73L?5D~;x<2k^^%eE0EP*5Yx#%Yvl3hku$&*!G&X?bDb` z`-o|SBG{ma$;>V)DWD6O0#*VWfNj7|;Bw$5;C5ga@Hij_P&goMqB#wk)1WyGn$w^; z4Vu%SISrcApg9ei)1WyGn$w^;-9mF3G^Y(T+d=cBvD!-}oNOhMid%8U9ZAy@pB(F~ zYuC1O^_=y=pgPGEK59-7K>7h@m`AD!QPe3o5#xq6;dzprQ*Zx}c&9D!QPe z3nGYd=0SJEH4=KpK!+^El5p_o_(T-Tfph2_Zz_$TW?d97hrck^SDHF__3E`FQwLmk zYj^U*nD(x&7c0Z()VuoDEUp+7?jiGX(x1Y!qa$;zzSeJ zuoXBDxD2=vxDB`;cnoN&4ktV4WJF(?&- zQZXnMgHkal6@yYSgHnm+2xXGySVcLpR&C1280*$q^b)V%@ceHXT^a1Hx<|_0b5|YL zUmcvBSiRrYvEv5kdvfQKcZm#5kJ^s%tjZ5m&Q5PyvSzUVJqJ&(q6_p=1tNZ(SY3p` z&WCriG(&%5P(|-b(8k75EU#e{s7s(kjIW#*iIVH09HX2QzfA%r@>_BbP4?pSX0XTz zD#qbX=}vByP=xpUeZCJerv7bdKt;$C7c|1-3u@ zc4yL6zURDiKVn_qpD*@qJY8QtbC$L}!giYQ=>YooI_6nYzuL{hcHh~2q?!R&_RKdR zJQ$e8gLyfTH-o4NW4_`h8_)%hfhAFecj}j>~O+^GDiP2Aj_Oad)=@9o&q`Wg)j)O&KQ`lKwbk+w}^h_;E zqz4C5%Zj=y)_ZxdP+ULSf5C|zwR~u2ZnAHHe1b_m?+ojW?!|N0op`Prg<|1(<3H)| z;!~znr@mfdyw#Q$RmF>{nu6e_k_NhgXSp+E#MF(Lx)D=1 zV(Lar-H53hF?A!RZp74$n7R>DalA}_uET7tgVs7)t#!~^2d#C`S_iFl&{_wrbacA_Kw!H%+aCv|99b8U&w|0GDZWSwA{TlPljfzqfnrl-%%=1IDK2 z%-ghe!PEs7TO!*#l8zpmoU2z~u=%aYa|<6RP0!nW;(p6_Y}y(Z%Mxr^f-Ot1WeK({!ImZ1vIJX}V9OG0 zS%NJ~uw^s&{4AAMfIZIkq#4g>8Zi7oGbq(M{$nD#teEsp0>(Ab@eEG_i8#Zio zowDW9V&T}sk3IJA&AH;GTTZd8K5X5(!)E^Tv>iK6Lmw5*3(QXuW$(xEOk0Q!5Rzt1j;f_3t zD32n_qloe-qCAQyk0Q#Wi1H|+Jc=mKeDWxwJTqoJgGY`*Fe2krS|KMVro%8EMr>(9 ze`jjNrStkkUkoi>bMiTxwk?=?o3Yw=&+kt)kYze}{MqI+cO-B7zHmoAT!=ia2XrAU`&P_#r=?12ORlr7I zJ8(X51#mNP2k-#!1Rz#R!jr9)W^1dlw5e*ZA10ZzfleKXpxRJ{mp-FtJX|>rBvYdz|h{NIHQ#zQaO_uvil&;1* z;(i^U*58}Cg#@qulb<~As;ka}BcQ>U`l2OfaiacZ_geI;xjmx|hLL;AaXT=kUz#cD zm-G#1obkgm&X9XG(ze+W+|#caDU^+KUTJ3ZJ2~F3$L;Ycy@`Imw^=%U@??JRZ~R3c zgYII3Mqj;4buwU|xo+AAJ8-R?e%TaBVq;{#t?(2;FSX|vC&5Y$4Ov4&*3ghOG-M48 zSwlnC(2zAWWDN~jLqpcg&7u3KJPM#~#Awz~lk$b;X}TRWkBPX&W|^<7oWMSdvnG3v zK~r@FQC-9hP1O}dbp=sfL68|lbp=sfK~z@|)fGf_1yNl=R96tf22ou>cw4;Btm1(L ze5fq#mQB7a!L_WFODuvthFbh;PLsiYSzp1XF%F&!OmE%g&a8W_+kbE)9c(R#LP)slR= z(0NAg&=pf7eKVK-eI7uOw}4$^`5JG?*MaAE znf&(S&%fc|Wh3JUE*-4rGXve3ktq-XK|tFPN}g4RbJymR{jYxRoxQ`3hmowMH}!CR0*@`l6+Y-ss8T9D5Bul6jj z+8$`r@oDbN7`DM?@T%$Fd!I7n03mFA1j1;DA&x%(;?-nzY*1H0&g5uk@b2qR9ZPjy z<%`{w4D5IAl;wjnn|E9r8eY40H1M|35X&AQ6wOW1VoX5C=w7w0-{Jj)X*YXjzNQIvZnUU(q^{R zW|lUyw3(&NENy0KGfSIU+RV~smNv7rnWfDvZDwf`B(WF|?`gLP%Oqs~!tf&Z^6zJR zktt+JrWocZ{(UpNf&v$BvWUU#z+datgeXZpGPed3onwSZSnemANB!KuoD;uqvpss~ z_|(Er5(zpy6SJ8k{8$xYEsBH*!GZo<(DlY>T zWB*KooqPGu&2_wC8pNwrrfE=^2Kdr64ZKK{FI~VCuoBn+Yy)-zmjgEew*$L?$AQ*j zovi{l9P{^E1-7FXN3jb3e80eLdU>t=0tu-44aa^r=rC(I&wHd8Nv~GIt;M%4E$pmZ zUP?N9^ooHumaIBsa{gS)=;}rL|ITHNUzE^WF4(jqxl_V@$e{4~AK-HzGyNi77#7+O zNCdm>v9)7x;@!6P3ZyQfT~SKyD05R>+w$9vy@4Yx4V!olgXeGy&tZBGgXb`K4uj_~ zcn*W-FnA7w=P-B34yTq+$;h;f4?49Xj#s zk)eMOXONVduyKpT8zdD@T%guAfXqJ1IY~}RNDRUdBqKbA1hLg+NYZ3{h{5h3AAiTO zy{TAz+q-*8=~4aXWs%dZAF`fm{gOn%&2gT=I5DykyJ~MqwnCb+z7Nuo9L;cMO9xm) z!&+u(*k-DMp-|n#5oO9kR0m@fB8y`6GT!J|=id)jBBSGDHoZ78cK(S2iCE{Uws+Vp z^Ot7Kp?-UAA-~v=?br2d$aYa*MD!f)iUDb~l)_gt!{g$cwMJf&uIW-7Xy2YILgHaL zSQICKIxqn&2M!0e0Otai0yhA+0`~!r0!59OxTvInE?^2+ z32Xqi0Xu=qft!HafnC7kfULJW438jeY}cBJXC&rKkVcxvYvjzypZ#*DZ@=ELvvWgkiv`+Fp+sEpVNv~uNn6>* zm8@P_1Blr$DQPlNE0Ql1ZaGLku5Ag3L|%nwB@8e17?$IJW7gW zPWt6si-J;bQ*f_^K~6|YM&KSds66-H-~&xNb>>htABDLR2uGMQeY+I2x7_ zY>pEBW(QBdS}xlzvE7GQ@c%xl`leG}9!zFNUVLF9nHsPJ2U6+&FMW1C58y$5Q?Au6 z@aa_tNTj0OZ7bWjEs!P-+B?<%Krgj)jk&3ae!1Lc%PiF|?jK7oo5?RtnSFbRXRV=c zTou_ako-?`s_pFNu%InG3vJC#4J^5E#>^19$CL>VYShDW$is5TgW>bA9P+Rn@~|B8 zupIKR9P+Rn@~|B8upDY8<~&Q~6+nbl*uW2qgur?T9i&CgH$z73lu8WOp;xR~cx->Y zW8kz?r^9;6ZkadprFSly_Me-$MR!}zkXP;*cJ7DFEg^Gj?Ku5+{3$qYSvk2Th({R` zYmP{^v}iN3(`KN(9E`mj*EWlpJA;xzC=HWEPmTPm`k=Yn)Z!w1_EBq8ukpp+Vsg;= zDgRa9{cg46`(K}zDNgF2ElXynZ~0ucH1pKV`%M~(J-?cH`W0)`+5FR5lZ#@X%}b_y zwr2GYcnEfb4klV=^OIPvF=6lb_2uhR`o#sM>b#j@YjdxbR0PX1#*-B9B87~~uAY%W z4?!gZK@7N!x{Z2@x=CmZ4xRZqFWyLq^QMm2jlsh}8LCl)upMdJD_<@lteTj`s=AENFPCaTFX5L#TwplN-B`%r2;fAv; z+h){$_0GjVnwio1#d+Pe#TGq&%hUR7;lTuN3V+V>VKn!*7&-HjFwtzf2p+`pm7(UWpCZM#{Qcz$bVR8l}wTq8J_dfP+WYj<=n)a}K&?PnMy zI>kHx4bEl#*2Ir}f`EmUd=Gz zhX>Z^Urf$C{N1@@qw{7~tnCo~Wp9{0)8CV`5@az$&WdRD3?Mu4tw^HmeTvl(MJx$K z5`h)BN~}bFkIio9?}6htXFG^fghL@#HH1|azre7nA!rlAs)n#@A*^Z$s~W5F%4y-@1~i?*Ds z^+^lUA?>^Jtgmgo^2)8bJ5E_{WT||7ykpg6_ZavTeegBpPr_B-Gw~@LA^2=rWAXJ{ z5-5miX$PsBI5y%vDORQ_6H&5<$xEYInWk8ordXM#Sed3+nWk8ordXM#SecTR0z3db z0SKZ*z9rj8_%@18jloRjmbBy>wQ2VcR;dh$OU?aeUL0!Z%}Gh=5zdoUsga#vt%i;8 zS!eyHBifGfz2S^2m(JUno!dLoS^ZG9x;)i0re9c|e%q$w)-KoEXP!w6j?bAYH`he) zAaM!aCC|#AXTYl9OK?`DuN($%#rht{e24EYo6Sy3>3Ai2D1w0N29b|E` zueR4@uI4ClFG;|Zh|_#+c$knP9di#zO7cZFk z^Qw6^t+$?}^^QRJ+{!VX-S0W;k*!xQU%EB-$z`ddhf3v3?z!`-kBiPR*Q!q%HoT-A zcz4i5eDyN3`G3gEd3d=QH*`};1Kq$hunO1+YzNK1t(grV{N2vi_=o&$a6!|VFx)CK3JgX!Sw9JPrJH9+wFv6JXMOhBtDO#O zl@viE1LLQ|T01VW*={`j%0uV+BbSaQhO&B2-|(Bn;FQDf?x^46Cx2y+C}`dm|0n$( z_Psmwqoj$0X|X+ZX43NAGs2Pgv4`*|fC`ns{{QQzKH8>Gd_K!l01H4Xnk!=nlBj)C zH5uy24GEh`6}%rGl(-?G8xp!9Az9eKcHn&A3gBko4&VXc2|!5rW>>lbY7_icG5YJ= zBeeg(vojqzbIa9kI=yy$9Xn319jDih(`(1+wd3^KaeD1Iy>^^lJ5H}1r`L|t%j;at zXpbGIm-j;Bw@Ff#*``DuQl{Leko%_om83lR*9A-gD}fEbHee@kIdBtjJFp9Q9FXOo zk=SLJl06S05yso|1QID4@=(Y7r&p{VA1rs<-4iS3jecd>l7%Z)4fXde&ZgHac4sf{ ziX^)8fn;T%^Wf=E7SHaQ=q}`QJ%hdTwvWpn+QH8u9HBpfoCeie*@}|5sj2f#E7S5d z_tJ&4SJk5MBpFN;uDCs|7=*4iy$mybqppuw!K!Kne*cG~mVudp74hs6{gn0e(oT#>Sx z_VS>a^=>G7C@Qwft0la=be; zGIP(zCu)+Mr&)JGG4SP4i&}KrYi@gmL)!kLJk3W7hFLCGt)sDImT>W?KIpp7UFqL) zR^U9#2SdX%Tl5D;#CfDYA6I{a{-VThAc#0>d#yqCdSKw6JFIPA-`otfGa^|J@rayJ zlisB!550JGbD*8A;aV15blAF!uYQMX<)O)od>fWKFVTn0+-3P-vS;QBebiBrK@R8D zNkr0kfJ*<^82^#2@$ZG6kgP;PvsNN)d^M;1=9Qtueq@`RzXN3sl9i!s3r4M@iK1>U z3pct$s}2a+-*x*fms!Khmxb(?+;V53e8UGyS-noLh*hq>x|p2#^2}`po|&+InsJ8I zj{iy~%{ZZ{UZrDFP3gEbqvm+q%)HkW+oCIX+xQDnf?I8_mgei$u=Ufov0(bdTO5IP zhXm}zVb8s9y3#TId3{yA^7!LbV-29;1*NZNtU-3$nh5%*-n=|e-U>B|KQ7BhbIHhB zm7HT!T{x@=Mm!q%xsmP_eeJ&9?6$Q24EbKUnRQlqPGfW9PF=G6mPt%X)V&>lnY~VH zdB|etWhLT}*s$?LFcAu#loCfYUg-?e*JhjTp+sMz@37)V%j(VL?bbIgvu>Z2-!D%F zGe^U?a<8#j$A;Y`tII_4H`-fDA2b<`ZFPn}AB~J#`!8JiquWMnqoqY}{B)`${MXOl zH~6J7-i4B;qYYgvYehkb<|s&!?=8<&>VD4o2kF;oSXc=j(>FQ4Vd5!fjHo}+7w><_ zW;<-Vwph-$Zm%Bx#_r>c=L$4`=c;D$T0VuSr)fhL?uJsOEy-9g^A0pp#m<|)dfT>( z@89xyUDJ-4zv@e8-XhmDGWrjVw_@AHJk4I5xg;E?3FpU2chG-$W%Fj4<9-&WdeXv_ z3|twDW{F1gkk8HQ51wOjzO!F~9~S;j&7bu){V*;_r~aV%6sA1W(4w4}6!D%!=_KJ# zuHa0IX^b6w2b=H-pbks`%YnmzEx@_JrN9lqt-yW2qd+sh+|JwzgBZ!&GLsWB;96F& zh6hhw5*FIorOmixJDGGZ)i7I=dGI~OmRf0pJPOHvsbM#2*o_)?qlVq6VK-{njT&~N zhTW)PH)_gr^kOG6E$?-xVQ_c~(sVg$7!t{&5TsaDjX_K7nZFoj3}1rs`!Zp|Y9vgc za>r$nAY0OSALAs18jk`bh(RY?Szl~mPmF6dI5D+isDEiGzqEJZ>b|3vTThLra?xZy zny+{CA2fGmPq)jnFtsJNcGGwI!VmBU)m$vyfnK)omxA8VA3=vCR9Sx%-x~_Z&O?PM zwJ4rJLRy}HgY%|$!tc~ukrr_}Jx>ALYs4L%GMvv6&SyzHUdeqZAqFL!&l1jO3Fotf z^I5|AEa7~Xa6U^opCz2n63%A{=TkBi84WE*;YQJxrmxq2-excL+DwP~pJEdjo~CoA zi;;ukIVhfkRdTRO4pzy*Dmhps2dm^@l^m>+gH>{{O0MN%T*ULJ4tyZ%nAaz2JFM0-EdzZA_1C+GjxE2>X z7#R;E<6&exjEskoaoKHz?9Whn8L;fxQ*MUE{wuPGGUiPzh8#&yz;EKs58nJOy!pkW z18;ut<_B+n@a6|^e(>f8Z+`IR2XB7x<_B;5y)PI?s7tg-c9;J?4X(Mn{LgdYn!C%b zTsV1l$Q-Icik4s+S=_B;lDOlA_2Z+%;~PfCyLtz*$yBy~XlSl;zlFyfv)_vRj#^iB zWVqVdb#_r?uZrwlW5`}r-;Aab?FY1EO*Eks*=xz0#F<+1Ccm}htwz1&ZOeJt?UD76 z5xH<6MhTz}OaRM)!+|Zpxxl5s4Zy9yeZZqYlZx%PQb^D3+~@z(qz><7w2rSbSw;2M z(ZU9?qbvwzgT-KjC&x{>>l#@x)Z0CHe6dm~bn#}`_$W}72F(d*;miD$dFw8F%2>0viwV!GMqq$6D=7|Aj1h{IDrf&kl_R}oIr*X$Z!G~ zP9VbxWH^BgCy-&%H)J2SDvGU*XH(^MR-B`CeclVM_0zrc29KW@>m6EmNMH9S=Gfk9 zd2mzyq~wx?JC0bebZUG<<*GIENIW`D*4in{zwijbtDb?QrO$enHRwqUW@<-umNS<5rPx4mvt{O2qsio`j%w@X z?PDwbTVxrC7uxXa_v%M5;iGDqs=ke9HY%K+8m?J zG1?rX%`w^>qs=kH7)T7-)T_864uc1$(1}y%#3^**6gqJVoj8R~oI)o~p%bUjiBssr zDRkmgOD9fY*`^qZAiAcej8RH#RaQ5mhfRsxP5jvWRo)ZSx=te8^WPi^Y4c5&uv(XD zpJ@8L`1;Zw&o9!O`Q9O^nO^ybDQf0lm~C#(j38uD;0(>d8JdGLGzVvB4$jaVoS``i%P3E| z%~4t$<{PQp2HX!c9p*W>MRULtYg>_Dc@~nC3dy(>pQWib$3=JY!k?xkbsBF!5zjVi zygXxcM3;va1~RS#=6CtSUd9O}Py+46imIWs}dfvFS7sM8m8)fYxR#-H-)N{1cTHnZyQai%E zR}81Qu4!|@?0A^51Z+M?Ywm>57)EzyxgHB@`}wxcKK)SPv(u5#+&6wR5VrP)UoY1S{g+kdrI%c> ze*SXtcW`>tr+9xB&zIB+%Jv~T7cqvD`i8m}~Xf(Sgz-Z0iKO`vGsW#YPN&CEqJ zUPJ*t1$Qlc%dWh!KWRieBud%DR2!GG^9}aZ-ZBokM0Uqz5k&SKp_$HEXuN4~WNkSb z*48tv6M{vcaL}B2=kxam^z}W-Xy3bA$TaoBFnZx~!!N&4_Fmhut(GnjT`4+3j^>W0 zaI@WbtQV#FUq@&6Ld0GnjM)$Kv_r5G-86%-@|2|+j5RYNB?2WePABi(F*0I?S^HU+ z2Ayxe^ty9w(W9)fAxpyGj8~6{hCV&>%V6#9n++b(ydP7k4>QOAFY~8*WjK2V@y4%> zuJOetVbX2*yaN(_8btc);8OO@*eA2{y3LH36)S%Vv{^a5RT(lW@RWI-uXSCsayl^? zS4s}++(DB208rkmE%1a*FsHy2bHlk8~A_ZoFsok_i$dN=hF^&aXyg!6iF zCCtq?St$+u_cp#;Y<%}D8{aP{`e&VJF0R`9+)Z(5pQy?$Rr66D3*GIj$P9?(-0AziA%8-`8B$INNJ zxew(9*ODjM+PM~Ay=9+e|EZNx5aOEFDp>M(T6@e=Z`op5oS1f4JgP)<$ztPddxvT- z1KhN?Z3WhTuA(XG?M-Mi>aee37&1sQudO^f;_pLQ>?Y@cYZ zR@*0LYhqa_?5k%qWWYRwgKd8^Sk$5JaE8~pHTB%TV zoe|0ZN6R0ruphb@c`55#%pEX!i&BNbn)|$`z=BHE+E{@%#wE|wZcZT^0Gv6-Z+11Z1J2*u)e;#K6lZ?oXO5? zN9L&Uepl|S&SE0$ijN*Rzg);2&fg*9Z?}yfw5o%!gjPZPp@=0!EXXD%AZ5fg<<)#< ztStgZ$X<+k%FI-iv;WQU#w_GXp_cdD+DQ;+sUVLDl9yq;(+lHdhTrG&td0>tHgneRT5r$#di#p$ zjYyJ1ug$x{6xWS%f}Zj)Ow~#>S=y!jas4&*{9516ZtJiJ;!=1kG3;o6mmsr&w8xs zxwo~`UcvjeTtEv6U(9|nf}$~2TjCF}B@1){lfVjKJ+Kux54a4t5x5PwA9xIC`U64y z0ePc>3{o{$S}?T{RU37X`)fRt&WrzAwqe}y#6k6f;obX#UAe05Y>VZT0o(HFqj2sM zD=(W)7jf_VEa8EfZ_X9>o_tggekr)#J2$O8^QJ@9KGB}}?A)}uI$9;ZhpbiKf7&}v z^Us|MoTDF%4*cK;V-lCpEXb8*4SmJXPfe6HbDY{u?`0I4d1>t{+MPGsXs3Hi%7vLM zBQFzZ)nr4$OrR-uIwdVlvc1Mz2dh4N!JHjC&#?^+InT5Y4*O0!_0?DPgTtZ69?RB~ zJ&!#$=AT(g3p6b`RF^RRsCsi=T6=VRZK&-v-2d6Uv`HR^G4s-#N$s;oyLy6-otM1p zTx+<$KWsbilD9|umafc{fAnHHfAGPT*o!|h#+YoB^hJy@sOAff5f+0r{{fz7qe`r! ziK--Tt8QhOC3yw;+Wl;vQ_r$_M@H?gvv2j=UV16ep|1!IJpJutw|?U@1B72>1*-Qq z`hIhcSsOpI(ILj|pX8VsFRhg~iivK?MsD2DlhdWh2TtkE9F=~-cFK{t7p>ckdDj|? z^$C_o2zIQNhd#~scFHE0#MnhsN$#l#rRWDaUq&~i31Z~AR`eX$lD|O_GgJa{VXUO> zaWnHC3&P)ClhDTG#9SS(uscEOps_!Z&?nT6IxL!5upkrd?K!HpwCoEnSQzq^k2?OK zBM(?s?YACh?XNC7;K=U!@%00b?VRrUAMCvgoMcsX=w16f>s{4duYPyeqpSM;s;;W; z>7Ms64+cRH21byl0%Huy!x8jGh!_D)@DWAOTw^pBA#XK7Akmn_AS9ZMNr33h%OrB6 zUQME3VoaQx`(OK<)73LQjNVJ<=KH>17*^M=Q|Iin_gZVOwf5R;uRYnSU3qR5d|h&) zFZ5f+mvBapc$(Fm^jo26=A(*v43odEdeNXyw9}>>2yn(JkloANrNCiM==PJAcp3{kSI; zd3ndl8}>nJKc61TCOC~lUvzbx@twF=QQbtpffiYl4oZ^ZJIIFZmGExmYMFAzt&t|U z?y5if)bf8_{uD;_)-9vzFK-_|{z2o)@!Kc9roJxiD4cjq9ai4~F`AqAf(V9b0Mv;PVlV^W{Y63$kkE*D)iU1_ow|G z+IM7+qz*08JRSr=Vphm!Kpt`Vx?B^}Udob@MXw$vXmrP(;)yofE0L(31M^>dWe@ z>N;@AmLTNn-WZ?3!WTXNyIIu}B(<^qQkc3j^#%7q)(U66*saGg6Il?zZkcuVMU zxqv{DMRb#b1e{93ilFq8wbC_frQcX9{pMQfJ3&dUtd+K}m40!p^qgNp!sJtGQ6~XT zD!1zL$8pg&{3C0%>^f=lgl?*0W4D{&A^OuIRmGpAI96BW!Km+ z`{5;=m@y&l)8d@W_aj7aAgW6Fa(t+)&+M;s3i>*xk{OIPT1JWuo zB)Cqs+^jaTOcv0o^15nED_1w`%sbSml~7YOf^g$%OaGAu<71URJGq4dZ{TwJ6*QHwVWF%CXJ}gftL}Z%Tb1DP)z5Pn>B1FVs%?# z+Y0k)o0=@gw7UCDlZ4SJ4qFY!BjJQHY%3B`PCT7=!?qJO+z5B6ST=9lDJS9-q}FIO zoUl26JFdFjOjjgqS%ztLhgFzlc)OxB%*+p&1vAV(&akp9+sfER!r;t#&RfMo44bBM zoT#H*Hxlip2a%+TSS&v~arxbdILb7QFim$X%TbY3f<`(Dt8!;8hf_yrVi(0M`chsO6wxhj9&H|fJ%Z$41XOq!zBs?~xoDmv) zCpb*yP0Nj^!q6sMaEkeODwcE8Mz@h1s@X-AbbIq)rMqM#l>yfJ5-QT}>sF?%45iY_ zsoJhVHC7yq#wnY&V^;RW98G(g6EmuVYL z(Xs9DP{dFX8IyUVas%I&e8B<-eG(8fr`4a;?*25cUW$5eOBwv%S05J~f(YsL#I9L}WDeZ%=M zFCOf2`s_%rtLy~NyQY~kMpDql&PY=^E!c{hnS^DTap4TQ9&zkQBI9Ob^tZ=ObY;}% z4L9XFU=Yg7mR9VLvCx-QQhgS4fP6!Kx9TGt;M6g35 z*dY<@kO+221Utk(pW|r?Ujf8bCT>Zxg9Iiwn?={f4!nPE_wG61%la=R+7dnSB4fai zj1=2FKSp2=iVq?%IAp(^SZdwkeGu#1#+*s{frqjXY06nBU1j}GA9L2MvzOz!>(0OS z@+;?BN7OFk;BmufA2-yu!dIVv!Qz%zYdoCzTlFaUC?^zoPb4j>7=LW85W7tLlL`Ej ze&94+#XL;lpG@GNOyHkP;Gaz3pG@GNOyHkP;Gaz3pG@GNOyHlCz-cj@6aKyo+l#-m zsZ%ywYM-Y>pW4)EQ>RUxHg($6X;Y_7oi=sa)M--RHNgvC{m@&O6}o^lrlqeg3rjb&l4T&o|tG_i)4sGn(DTpt{t!7v3*~ ze&1WC-Q061WoN$3sdZX^ad0`zJMuskXaakIL%_AbQQ$4WeZYgjBft~DhIxvND`KjG zapg_bB~VDFL*2st>&6*#^0lxhRqQDidwNQxPZmn00-wfUv8PZh7UZ(`DR?>YE&lu& zF)TyS@k3(%dn+7bdW&a3W@N)_k-VKrqCb`B%pCj+>oW?7=U1i@Ix-jz73@fz;^+1F z7&=f6JdtAJ%lfKk@=F%PkU!-dp(Zb=t|-eqF6ndGbKc1hRtvJ3DHrQU{b=O(R^L@x z_?^M5x^KtW%lFL8$19cdZX0X0f9ch|3zc1a?yrA+;pOVbw`{+~eZaZJus-eHVw67D zRb($AbMO=YbmBQ<%J^02F~sh!pO*=T#MWrpP-PrfgK=DC99J2~RmO3Zaa?5_R~g4u z#&MN#TxA?r8OK$|ag}jg)#F%v(_{2CSQ#N=N4Vt&-^?EDg7>*4ILa{a-Fh)L!^b=y zu~1O&0h7coa*W04=YX9}l@N+YHfy;-4owOB=>&)Uk{8HO^1Q$$24o2bNC_`+2`_L7 zFK`Jja0xGP2`_L7FK`Jja0xGP2`_NT8zj$Acn*+BaMiC@<_o#Bn8I>0V6~CPDwWfV zaJh?Fov9W~ZP+%l>bc@SzjRAi?&`iN3M+};s~XRpGtgyyc4*g=!}S;UCym~#U)lco znZD}2{w>>nLLHjPP3&88FK%2@t*OlT&$PcWc6t3(L`sR*M*W-dQCjjrSx)m3@`(d4 zp;xE6bQHEA!7HK1Bt%5Q3N#)3>&@ljI$D)eAPeHDlToo6N5J)EH z;`5iX=u7zwd?j*kH`ltUVSwqYoOlXyWHzi9By=>A_>uB;rU^1J08jdyrwi@*#^ScF z$lJn+r7g|*tBM2vC0#2O%cY6jYinx1dy9M7*{f%5yTZN2`Sh;p%C-y6I&1H~i?;4q z78zSq_k_NUEa@YDA}`}tpF$$MJ&YtAhEBss!eJ!gFp_W>NjQuo97YljBMFC*gu_U} zVI-k{-ow)rz5)o5In_8ctbKLD$dc(<)>k`t3j6ykC8|a6AiH>EU8BHfCrYA|n@u#n zce29zosOOQOkaNsE;mbAvL@7En&!u9pJqFvJ}2O#gQd#!@W5oQdvc&QJFsQSI4hl) z7@Sb|=KJT$lYJPL)#TRr+?Lz>qOZ1w`>TTcKJ_-{M(>9O3+h~t$t0Vd$X5d_7@qXX zBE2MPMq44|L*&aiunZgk4g*Jk+kkt32Y`ox$AG5*u|fnNk}FE&3L!y4-8j^ZL)|#k zjYHiy)Qv;kIMj_p-8j^ZL)|#kjYD0D0z#NRP2nqm*pIRmL-sui)smDY{HnltdFZ7A3sM7bBhsVe$0dTljA<%G_>nFydgwzwK`R zd%7=OE~oqYQk6=Ig&o=RkvZ`%#$|YACdv4@MSbmYju8ATsFXE16(%1WdmF`0$eu0rJTL$c^KsgRnK|n#b8QM;;hfn z63;XPj%UEJ=;C?G^OVKax4O2@w_%CZKj&x7`2*u|A+l3R&Eiaj!u)bzAr-^0i zy=yBer@q}2f6+pv8qIW>&eHOWw!Ub*Vun`-UzM4fS#4f4G(25*?C!4}Joup5niwi+ z8|V0m=hW}1KSBhQLX!%g{1ok;@T9~9Qeq;I5)<4qfs~j)N=zUnCXf;nNQnug!~{}e z0x2!A z=ZBPb7`C&*h$Y117+wcCQ!j)`zLt$4t48c(*>{-TH`k>m%N+k9fB};@$d) zck3hGt&g|$5%1PVcV$Bn=_g@1nGJa*&qC+CTv_R^!hBV@sEUnKh54#5Ulr!7!hBVj zuL|>3VZJKNSB3e+PNZqiPWNY|G3U%7o3thb}TXR2IJ z^t?NB`Ma}|<+|jHl=B;U)#d7U=xbG#;BF8sv5~^%1*jni-#ks=X<%(@Z^9YJBI}0W z#SOse~Ad)fFiHjNX#s&C}lRovPqOPjjlm>@9;m z$vq1ks&Mx1!JeV&YX&deyHuZ4ua^@+W`;+IN`o%5CrY6w)n{O^Oz8Gt`ATE1ixDiF z$Kk#Rg=K zn}%e~j07AK<-Wd$yUbTv#;xWSjy92Izgs=fzOHXBW4wLsXb5%U;S+bNzczk9G#a{s zS#GyBfkh#DYoR0~(@H}Gd9d;nr5psdNI3|*k#Z19B=-kFEm9uTGi3-O#Hf~JKkdy?8W*0TV{s0?S0cow)?9 zygsZx{pPJ_8Lu(xW9R>p^Ll09Te&KKRNNuZW-PQ%?NIN-brXX+Vt~QY5LR8Y>)&xp?+UwjRYEl%xCLJ&oZFzMBmPIc}iR`gAbt zt=A*bq-o=xRvSmFjic4Z(Q4ypwQ;oCI9hEStu~HU8%L{+qt(XIYU60N@j$DMqt)2A zkC0*AJv6RSB0`0=wmsb2H_Q^&;yJCOTd2M65x8^R^V>ne&8YCQQ%2Hgja%5 z=cUETQE#B6#Hr;;3qLQ+TKFRwfC3j%@adg+z#5iTsT|(>)`j!$T%Dd>z2p44+P`z* zr8~yAG;4dWU1_x&XZLrd`wt%MPpA9m8m~U|_8&j=>V@fB+y~vG?#FR0wJut`>gt7y zE~;O3b^RiGTsZMvl1N4)J5PfHE|PK}>U2cjF|mWR zRaJvWYVb%69;v}2HF%^3kJR9i8az^iM{4j$4IZh%Bff3>G=;AKGXIj=JA1Bt*63JH z8-mR?*!~^cN~Cv>ZoW(rNhB(2wI5#T=Ux5qND`kVDf?b1DIyWsg_N@QQyJXP80{Fa|6E`+-Y<>w#N=yMgP_790ra6w*mJ64*(AX zj{#2sLI&~Xcrwuof%0D$oS>0*8QWfuq1%fct<4fk%KR0AC{Z z!|lRKfyY@6ndF#FEq=uBFuXSR^_jU7A>+rm>I)9HZq`<1PkD;tTP8}PG<&qAqIr;H zi$mmS4_Gqzs=9}>-OJ~cyL*O5A05ebFCQ#rdPb`1+1~uft}TNzs?sx9?UV;<+8zmL zmte2j4@rctcoG?5UqT*z4e)J66WSq?fo0$Ta2PlO+y>kOJODflJO(@k2<;+hH!(6K zNR4{wMx5?Q9v<1LFEf07Lb#{V3ec|r{R+^p0R0NkuK@iD(60df3ec|r{R+^p0R8-J zKu=Tn3Lv5&iEZj-Rg=}iz~~e7ZP?G&p<&1M=grMFkA}h#5^gPY6z7%bG6^p0)3jVO zJ6N_L1|%>-_@->=HaJpQK3Kko(!j_lC8Ja=3|40$Hq%bln9C3Cj;D~?^PP9+_gZg>n zmucoOnoMQ`7Bew9$x(9Y$wbgA;Wdx|l$xxT)e4R}`qE!;5?92>|8d;BDrwYyq5WXG zIOm?PzIZ`=PSQDA?BV$cXXi{R$pWUW#+umWwRP*r>kpug@+L7`OwEd(Ltyhpl~``< zbQ0?!$+%C_j3m}W66+y}^^nARNMb!Cu^y6G4@sp>2n(bhu}>p_mOk(niD zcxv$^qcgAwPn|Q49P`9ii2&#c{|T^&YtEH$vH~C4Ak>Fo)mVni>-;Na9V3OgfJWYS zxbmLZyOnWwd!R&MD3NlXAzTHz$>; z!{)K}9JR?#s0h7_0-_L$!^`Y(=>5V#Nr$0RrihH9{X zNhTFz;BJ(AMiG0GnQ&AO>;k_?jw*>9Ums&2*CnPvo(w$3!cP-io8T={_M&HGvrZ_M?R4fTJE@3|Ip81D62T z1GfTq1NQ?D0gnPt0%D5=eg)Cqa{fh{UJ8H7+%d@E;Q4)+1d|)5>N@o^utaMC!6%2S zoNH#sm)#9qkiPk5Q}UCjeN&eQFdi-v7<- z&H!+NBej)_!-lbB3qXXwN`(rnp`DFS&MqRb8n2->pk9oluAjFU{B4LJN zk;R$Qbnqbam_RbiR86A1WYu7^Vs~V3dDhQ=BRKEOW7gwb4;OzQvUZiqRBJN{;mu#f z=q-rL^dzD%VOd8a)$h@W>C2yVs5Lb0Q%hzq>j>wPE9D=|pT%Z%@hMAU=oy6C4m0z3 zXvJRx&-;fQ*L`1@Fv+}O(nIDm z6rKY-uVW}Qsm%)+?#rH(a6j-6@F?&k;31W; z8(ps~qt4TAF?Jd<>ilq%qW`J<&(=wExvCc^&{!N@D=8g1&{#AF?!N1u_65YEw5RU5 z=k8B2>r2Y4Z^FDPp{DK1`1pAHOWw!ZD4QSi|5FD(_R)`h^vfqr3#08r2d}&Cx~`c1 zt3E*aIx*QoqK^J6dy!(HSBTHRiy9RRE3n<9Y|||{sz3ya#Hh)6d{Pz*P1L`XH=HSX zI;&~@8GzZsluqAwO3FUkv`ZaVJB<4oMJaIY&4I+AWCmAbg{)psg+mujob&Q?)$uDY zFCRTB{n%~vsKds)v{dp|=wz}ZE1MV?@R!W~z%Y|&G!uScrqQANu6XZYd9t3_86W5^ z&l<(jWNYj2=t`6NE>Pc4HyaI9M~wAMv2Uf03=0ewsG|DD{{39L!}y$fpZQ{jX+QV4 z`W|o5#{eUkhk?g{rvQ;e;$o21OHrdbDS>cK9;gCMU@ve8 zxE44Hyal)qco295cmnXXLJIjSNeMiz?va@7ZcSN<-In7`PwqH7&2(%4;Tk_TzGcg} zKr)j}Bs1#U-nWV6H)j)xTsFyKxC-5LVvp(%%~R*N*d-Av^xB;i$tgQBC6#0sd>e(M zBy9+u0=;t%|M9FONMajVEB|k9ZGL<)7IjjoWq%)!ImJG&B5Eb83INu-sd6 zR^!s)eEiq|M<~M4_t8Je${^X$}4%dI`J?3 zk$jN@p|{CQ$D^Uhz+lcMWtp$ZXRzua=nX7SDT@!L2ja*^{D3|i4h{3^qZMb63PogE zlzzwo5K@I#qP4C*5ZsHbuQj)`bA#lcgNi+h1?OwEcT5kqAF_I5)8Vlq=jjZkr@PdZ z{eNn{rn>Z=uIMhaKmYe~3hPg)Z2Bz8EkQrO!=FL=*%!K5`g!V^T%M>CN4czX%iL5} z2fTjDnX86X3}R7`Sxe%kgsBaIKKHhsyk6Ci`HfR;*rdGQ$pnl8|C!e~NC z`4rEN3`kjA)N#su20fMEC03%yJ!0#3PH5S%E8owW0Ub}~XVDYdQNrWG9BDq~KvrR# z&2k!f4HV<7#2e&OjJ8oaB^{_zuA*5b4_kgk^UR2x3^vITT&*%3B}pM7&MQ}69!(`Kj&!Rn#)|sA{e|`;4qiu$701j#BY9=o>updp>se107Gz~W%vZ*Siwmy+)PNSS54ae(4tO1K7w}%-Bfuws&jS9k z&B>=zdUZ;`?*Hx6DSbtLrb8+do|6AQXH^=1{LmFwfF7I=tg-De9)Rb^pr+?2ZdBWd4*jPQ-SR1qC`TTuys=P)=xuiOf-L5_# zy<^9g?W?Q(iG`O<)wb_m-nwOcurPVm(rn!0A<3>cluy`$L3X|Vd?HrR+4a)f>!?^(IzuW*8IFQ%_i&RjtCwrz{?> zZ+Y>?lNPt?O}IuVNe-^N@d2i=rgkD)WVvh2qW0g}nf?0#i(C*pFVJcj<^ERCQ!{bN z8GUE(wPH8lX2u&0Cvw{_C9Np!`Kwbiav6Uy@E+V3TrjFE9=)0|$V^z!Bg!;2z)s;9=k~;3+^9sCbnnaKz>_ zB^2W`SO*m48q5LYdO_R$tdHnX5#vfHC*S(%n3mJN5uxoy;tlmYNG!O#gj;w9u^-&# z_~)5t{8P-UOZJ^s&yl_799|+T$D6G8^si3N?=${vJX@&lT;6u=4Hq@eYAwi-=EvGk z7iJn;8oRVy8)uKeL)ghZoZ$QsF%7Y6JimM|c43i;QL4wx&+BtxELw2MM`3?Qy(#fOT87`8vQmejA*wyMVqbcbIiko7ytTyR* zPH&GQ1|88vyd!eT%Rw*mBoSVDS6lA>iFoRvsreT*CJ5wb&Qxh!G(UN3DmPv#SBj(g z?q|>6b)fg7%U$UK4Ebv+`s4Q!tKlP7(n@#rOeAD<6xv0c~91p!!41QSye4H=d zUP4n`M4D~HrjUKI^4afy%2cj|UKl;;u1T%7p^D5Y*Vbj%3y~p4qP#_t>HFDSsdarq zu0mPW6_dU)egiKhvv?4^bP#&O8=5esw;(@ouKJ5}kAK}<+S-RFSM80yYIvqJJ3c=* zRGK;b^4?^(@dxc25|!^wI*ZA2`wO*L`w&zmz<{`hMI5HUN+45Gd=F*-dCEbowannx*3Bsg`|9PM zkz?Kre-wFAr9nOVZSspZr#}^=om~zR^F5%&^7@rUSqc!ZfP7Z?tT4geh*x{v9_|Jk zjU+nRt)Gll!;B>p#@XgizGHvV&iv56eH)Pad1JA^JJZ+L^R;gpx8MrL*EOvs$Vm7b z7!9n;#TXzu@1mHe5=)dhiMUq>=qH zj`6|JBG{6tP-5NDm4WmQg8|Wm2|hi|=N%L)t5M#Q%*bijBEOGA#X&x2xPJ!TZg%eA z>Jq<7ZCe8#*Q0#>zWaHmJ<_Hk0zn*x^W5E;s7;0DHB}{4Z2fsBK|F=TSOmPK_pu)H zYN*12GdOIkx|dDAZCvL}K*kD$*Z=<^8rJc2%t zpwA=d^9cGpfEb-=^d#SbGmwWzQa|w}aIYIW|>#25Ab5P0?JQKiafz>FfV!7Rz7ev9{9yX^^ZT$fsvxh>8vQ z@yT*k=Wmfq!envZ9V*jVZ$mrnejY(tJXJEP*{!Dt!abs6FFA2F`BL8n0^LwqMpVb~ zEz(Y~>;(BuEg8uqO*M7m)FVgKLA9^_Si2y9g$EW@nni%OQpu_7SBvU%D*gJ`bI)?< zRph*w;SSl^CKkNt4Kc>T)IKWC8*FjCv+Wmu@+Z%I*SpTue_s_8_(~IH9Ux>_=8e=- zKpak}9iFFQGvm^|W!1nA+`RmuqAfb^lT;|DNBIO@@?zj4{N9Nv^Y_wsY>JK|@U4zv znJ$wsqvPn65#gP{E%vEsDbZLW6GhgQ`4r7DhUS^T=$1^~dZ{u9FUPQ3izaqVHfPpz zUnFAsNso{X9Xx{VVUjVX>$7ws`iJ3dvnZm&kvyE?1D`3_2cS zYIoHu-<6&}|NMILdw;o(h98865=$|z_INRMV&YBEK9hF-e__2Y(CRW)1h-}M2;l`f zxwzO?rjDKrY($@Y9eNgod>lQnmI}eozva_YmjA?d5{(}$3wrbudrT~h8g!KXbYelA zOviN#h3Yh9OiXNDot)gdbuyXFB~sb;U!Fon;|t!;ldEsdl3+fYEPwC6Z6G6Kx^QAn z-9zm6m>QDp?E17j@o#U!$;Tt~PYika^k!0jtzOtg-dm`EH6qO4?coID1|)08`gO%Q3ZaYLkuOrIk) z;@P>zo35Db$yTrbiHVViaIo~&)xVt8{TlP9%p1&4ny)dw)_sX~-|Z zG+1*D)6a%it7p9PANX#b8eH;H>sPJYt)AOIW4+O|K5|N=iP~A~3#)Sbcec;ZoTL6} zy8Y=tURkWKw)dSsYTCugrL7~QTbjHS-C8>F|EPaNx61jIwNPEXTzJQ`=!gVI4rw3O zFt(heSHfozq0kGA1Ixex;4p9mxDB`mcmQ}9cno+7kU*whKE3#BIjL3FI-|(4N!}$3 z&7MY|_gSv7<{tLfusT@pAo+1N1b9;;S*=QYt5s=MtI}RWIVBs8ViN+e1ndVc0j>vb z1?~p!2Oa_*1)cErdfSw)ZB{-dwF?nk!nxbBCPdgYFuuC7xu_r>3O^Y32w z*0)|a@bC}sJ=@s-^KbufWAwmXAOFm$`Wd>Tw4wXILU%c{Yf;U}!nn7$qlzf^sXhYL zM*^ykK*15HJ_6N8p!x_@AA#y4P<;fdk3jVis6GPKN1(bKZ7+MJMl_RqBU$WVS^1V_ zXm51NQc_x5MP6o^1?~{x(_G_#4G|O@#+~l~);d&XePVTsOkfpAR+oB2UBcTv(60yj z^{~3sgCD1d)ukR*mwH%T>S1-Mht;JXR+oBcI86Txh35c?)DcT+{21Dv&spdPmDq(Q z2lk3ztlBW;J70x_4TkDzjVtv!=N?O~?)ae}-oC1=yKNz1MvN;GFaCyaWEn3T?Jd+s z+mF3!-`NND-k@HU8}D0afBeJE$^q3rEgidAC%RncwH#tU*84mxivQB0z2d(RTuW8~ z8$5e^Sf{uW%%3+adw}py)F0nsrws9S$=)w-7n@10>C8J?uuC+*ZyR;|`0Kr+TH^X+ zU(+?oWK-xWLw(Gn>lkz$gRW!HRdzLvLDwXDidUdZrV24Rvxm}C$p8H7m&VUj_ZWDq79 zgo#XbG6<85=KmaT5I-1`Db%8Q=B#dh8klC|_ee=PIon*?H#fa!WN^>q_JdQG?KN-6 z^$cVRgPFmqPFH&TwWHJLt?Zu|4@bB6T$?@r%0HUwdMpx84P<*q#r=kZ?>~9t|9`yk z`xn1>C2~gIc=Pmoj{o@&#vAuzxfjVT@t}7$uq^%Aj8=)o70Xd%wYc@9ET^c2;cD*` zwOOtN>(F{UIt-0GPz9R6Uf>XLEpQZg3veIsAn*wA1mH(RjuDIJjYmYU9~mHv#Ny!; z6HMMN^B=jUb5H1q-E{{>b!13KWcVi7djDM)>&zlU#6!1vf!|@|o&c2rQTJURR>R0px!KY$igvzd*yqbvhe)QHM?X=rt(wk_JwZn)pU+C3xe{* zWF_Pq9z4}oBOJ5p>9tk#+A4Z&71_0lURy=4t)kaf(QB*dwN>=mDtc`dy|#*8TMhKu zDtc{I_r4cnepT~ih4*=jgQE4xaVWb7Yv?se=c4sm4gRVz{%Yv88hWjUUaO(kYUs5Z zdaZ_DtD)Cw=(QSpO_IV0w)#U;TCY7{V(IUb!E{a6spKgRrq`Z~aabcn((l@j?Ly7kpma@PN4E_X3B2Yk{M{TY&q32Z2X` zCjg)S=Hb72O##lQ_tyjT+CA@*NujS&_mxYkd28?OEu32s}0+m_(ACAe)V!H{Y$*%1#W(jg|wXStd@l|FSJ#Zl6O_20)zL;^k{}5 zHV>0ZqFDJZLUe}D9czNq-!9)_)D2Eif7|{^PSGL^wcaTzOG7Ijo36m7E3oMbY`OxQ zuE3@%u;~hHx&oW7z@{s(=?ZMR0-O3<>z<+T93bkVhk+nl>!i1fenzlK-auYX5tvm3 zrli;0DH8#Sgsal)DtNCVFslg6Dgv{Lz^o!Ls|d_00<+2pts*cbNvB|`Pn!vvP1oj! z>zuYfsJ#3Q&yl-2JIY)v9* zP2^y7?TW09njl<97zDdiv|Q`V6FvW}w|2f+VJ;GzARc$}9%%B{{1WT08~nHa!6E)> z$CBoQHCS?X{=GdqQ(K5eoMdt_@9sW#zH!Myb9{29ucxx(bnwQsO<_#4qB7$JQpzN22F=kdLq+d7Jb1V@8_M45RS0=pb6sbk*WWARD!@ZFo| z3w8<4*27=?Cj;CiT25xOI{s^I-tA`;=nTJ3jzvlTAjyI}zpv|!F8t)oVQWAO*aut; zTnD@kxC?kM@Dbn>z-Iy9ed02@nEYzOle}9uElYDYE1Rx2rOxO?E0Q>MEvAwsaGizi zGx(e@_>1tx%ag0K*EAO==PtZpYW!!nSZ^@&{JA}U<=_t$c5i>>OSkP=YFtu!+d1cK zphK=hhYqe!rOA5S|0mw-)!)4M#arI+Ja}hr+J4~q@h_}>rY)7oYPjurk-@mSR3f6_D~ zrzG=Y3)lx-3|t4i4!8?=FYpoI6ToKypZ}xqe^m2-9P3lIDtU|lnBI+%0K zbC;K4UCe-tVmlF9h~^1@|^=y&Q}+P+Kkl+D^? zZ=jt&`5)fSOAej4@=Cv*+P!vaJ5T&~+WAGjr@T9KzW9?nyEY{zLvkE;Rz19OR`9ka zmo_8^>iDt*Z+c()`b0~=;*IQI*D00Oyf~+{%RKQk?fQm)o>f^@#7{4$$DH14BWIPJ zK6@j(IS5(dM{$@)n8Pw~05}XB0hp_ZnfQW_vXUvO7bfGaSqB0`?EOIyDJGGWC3$qP z$WY|F7mNYJQtl*_-f$L0$4L|TWKQ+PoNS<VBO^U`p-w_La##(P^4pJqA!7!6LOWwMVqd5Dk{qK7)S$gdcAT=B=p& z=n=seprA^*LS5qLlJYoZSyPvLJYRyWy9V`(Nv1b&o#ai}a18snNa68yVtK=?&Ch?O z^{AH{>S()_d^n|^B61-I#)!(C=eubiR5-tC+pwVrFLE}7~NRG8{Z zN`}(Rf%B(Ety?)aR2?Xm&09?KhUre0hO5-R)x-Df=_%~H>&D_gcTJjIGpfENU4eu; zH?z9SmrKLyO`feH7Pny8i%ifH@J-tNA?xL0*KqoY46`h=u`DFaG8@Y>8_O~q%Q73w zG8@Y>8_O~q%Q73wG8@Y>8XwK6Vz5>XaW}MoJCrNJKSAIIl?K#aiZ;jpJ`HS%UPg=jgc#8i3T0hGSWXox3Ez0|u z?@w!)hjfQZGOoqxC3A6cX2p(qe%yjL3SV%BV$3FL^v~RS!7(=8yj0?)D<@eNe&kYT zTz4!B&#>F9FM7Jc*KC;}K=ah+o`a5ygc`=w`^7zp(FF5}-lFgywJRttVCa@Pc1Y8Ro1>OQ6 z+2kPIlb1@7iz2=|OQk6q=xuloCR=kf)e9<~^8)ba@cqttGnEL1USJ$p1`Ys+fg`|e zz&*eNz{9{}z*B(4LdgcYPB4}qtlD9u?{7_ctF{$y>QrIsR0*a|6@+|+4po>sRhT+e zm^xLMI#rlDRhT+em^xLMI#n=|D@>g#x&(Z_>QrYjNX=Dpn1*WOMG!>Yp0@GArnhH!)foUo*O$DZ@z%;^j;OQ9(&jCKuoV?6* zCT2M`2iQwb$1I&Sr9XWhCSl%i;(O{|ZEuzoUP;kP5<4$iX)O-+n5Uh@PF%lZO}2#w zJJX~*$e`}yUI_^n|4ra4lixBfF5tbuM}SWNp9OqsWT1vbW=TUW9YESK040X9 z?hsI^Z9jHLHN0(x<%aX;*nqD+uR5en{zPhQplsb@7(YDY*?>3hUA_!+KfnL3<=$Zo z{wbqtru_#iV(@$TzhC3OqNbT*`uHEEGXeeu=Ye-#aA_U`xKHR2SPtS?4EzEAJ9H^g zm+Z%s#kdY#swdg+{|9vOlNd8++ibd&e_*=g3sB~pSZr6HN*UWSUk7c(X8Yo4X;a9j zQZgO$>2t64zl_TGka@Hmpeys=0QX67D1qz}8!Gvq10R;?l#~7Tj0^4+SA)#vJlsJmVwJ_{cLp@{Er>@SNO?8cM?dDrk;%^=Zm9=xhGl8HYsPm+mw-?7Vhs5_Gk>&Mc z-l?18+_gw~k?|x67`?k>H)khvyf3girp~lbXnLIj>)1ZNjH}BX_4R)0Q(a3Q3vJ#( zqYao)!br~GQARhNiP&>N2)X|`a2l$g#+LcZGcka{4!sYmtBpU^`}Ka`8;7E+Wc+wC zQCl7_I5gqkGcTjwi}?fQWvo82$ zg}D_Ab1V03#lqZ*g}D_Ab1N3+RxHe|SeRR}Ft=i1ZpFgfsx3_MRq4#XT3D{%0ush# z3>JbmE8brcKdxU)-snp*{bVSzj-hv5lfb83b86+B&SbzLa5vp~!P#eDaHse8hO@S9 zJL`rU&f32HtT&7(>mt*<>cC5z)o}dck<@S(=lS#&%%ex{zWc~g@2~OhYc9Fzro-1< zbC|zZx3BHa&!qZGsxgy3f7`yLn|gX~4!3_RIaI1v@@yA2PvWbFx}(EY6+gP`|9#w~ zIlBU9SK#aloLzylD{yuN&aS}O6*#*BXIJ3t3Y=YmvwdUyX$oHfM7@pqoFUZ7^JJ;UTW)szw@avym~V>DEqT;rNO1(_!&;{|?Qd9ON-puNK$oLR>F8((3 z9p$K`DyRWfQLs;ktE>TdO*`jq;z`g<~fT1LhgG0Mi2vC}xmIM2Ai zIAmN61wLyWGoCg6%J^I3TXe-WBW9O5&M_%_%$Jx)%zMoLWvya#S$}VR*Us4ud$)bH{RaCT_RrcMwg15Wvi%+B zu=7fHuY0|Fqx*XIe)rehXWegy^WkQATln(uP2v9%enlB&=t}P6!Isc;k zlyBu*{9y@)f;{i!vm9KVb@|Ie4pNp}tC`hy>jxo<~&enx^ry7E5kD-SK88=D>Q4| z*5HyepD%2K{5)HAhgDLrPv%Z8Y=sW`U$i^e<04s8w=2k8`?@O{hY`)kRIhLbi}~XI06ma!0jAwa}zm z%X7f6Zf=&ZdFarD_E~p9a6ql)Mj76K5kNT@$?_gvUkCqLaLG-o#tAS&E*>uDz!z*I z9cPam^r4lC2PiDIWKP~}ErP5TxCRZBNCg3oG^Ys>os_{Jvf_a|UI@u)tNdFi$h+Vu z7W(S23Cz@-2VlY$l`4?K;!mMTTN^a1;PBER5L7GE=!Q^=?n@pG=tBG1kS%DhLQmMI z32m!9KwKYA1ZXIXFWdQAFulCKyr6Wqg~pH(oH<~dB(`M3TA=Zeu2rqm?!rQoyzDLN zqFwSPm1FH5iSi&hvaqRqMi*G6t!%45ApUdEUqg)72Uy zfx#i9u?NkmD#$`Ijlv&BVT0JEYv z8O(fSA*Vbdbap`kZ-OJ}mPVbF=X9-A(&WJBR8w$3B?At5)y(H0Vr{O>O9bIis8%X3 z@}?Rr!cP2bjkY1^5G4{_2;-u%o4k3Oq1)oL6A?OhZNw6WWK}+U% zjmHQeI^SsJK|NBgmRB;&^X0|z;`{<*-s5dF4?SLF$fAGpYEIAv0R_EODMBO3F|*P^ z+P;v{tSbY7HoBmFak~K~;mmK}#AFawk zG}se^nTC@vZMfL!acwTyj$xXvVL7fc z9XskoY$x37b}QSB^$^UGQsHPMsgky`Ol24X`hVUMjuhOeVaxlJ;nD(Smn4_W^zvFg9C1{o9sxI}2iba)?s?aX53D87SIaw98|sDMbcJ2oKR-oa(1jJE56v>&VfRz zbJd?SPf>Qpa>>79SauF9Ii};7NfkF7t6hrNV8M22rezeOHoPN`fB`6FwO8q*AWCzi z6*bLp-muiOX40~pF+r0sQp9yYNy@U!@Dw~?nsIt#gUWW(q}>+W8|E-sc*1cwq~v2f z1RsRqrfymU>JwI%;iSxrVWgb)Ibp_(#jv2mR@5>eFEx&ssR*@#Abz1ev}&izv?yvY z8fYn~F|9<*qFSe5hj$sS3B_I70T07#4!uYShiKks#6d!Ml%~RODw2jDl;be29HR=} z;0jt9Q;wZ7Y>O#g7(BR!?TqjvmAHxl1I=B?Dm6-LySZeTQk}~+upF*XQ3Qj|w(WRC zeUeU@@jlxeaX~PM%-O>1AjmMCsOhG%6UkmGR*Xi=a1(jgaNV#Q$tve!9*cmzL@$HJ zNy&f<=k4yOvNBo2)cvZ8nl|lHW;|_Gquj=bg`|dqBuRr8Ml|iEr&%jv(giRSapIO0H&vHm7urub zkz~w74p2W2M&b114s`kgiMmU^y)8S;l?LaoujWFEz#>-_pI^FBU6FtRPH^fC? zFbtuhNLRr^`?w&w+vzflVy@3NoOo-7;goWcFnzd~=&RTBRTZ;il@hX(=bQ*~)pT7I zwX8@u_F*^8vj|rg87IRefq0}J)N02~JIT<9WMDQ3XvDht2?_`c3EMGz5HxW}G#C^0 zmW{(~pb*^i$3f2Y8sTo_oE3-XVJ$cUvca72tw=36Q_pYAl+a%8Nu8_ zmv{}A?YNaevO;y0mO<|#8fe!*n6bhg4mH6WJdXHuRT`1@emgw`FDJv$%}lF4JEg7$ z{m9s)yxxk)K*PpFfS7jFZvUo|@z!NI@jOkAC8M-8oJmBiTsor0xJy^Wt-J)v!Bav{ z+f0SsWc!$v8i1Qt%d{=>`JA${dv%tU|U zP6U%{BuppkKvomRza)cB0X;N9MrW8>(-CF^UEzck4@XSL*diJxEQGZak**#mowS@- z7KvvivdMl1pGx$qZEh67W4=R6Ak(Ng;LC9ThQZ>f?4ZSK9reRu^blCrJSw;bOH_?S zGvI9~ZpGk`CF211B8|cm$jXQv>k*EYL2al+q0Dnt%k9@}ZV$>3QnB86{>`KDUX**p zon6F;h;^xQm|7miySE-TK$PeInIGU#AcA{sf zp~m2yI2;q{GPs7&KLuhv-?{MayOo{bEX>A(1j>Mz9voh$)b=O3D~^!fu># z3c>8CYeVlw)QH9qIUHN0Rt zVWc&Kz#K+0g%c4o9PUC0TT$ek9WzvX3uA?@GpJISMGkm$W49!Fqo^AfmP)#jT$pA_ zcNk}P!clGtZbWvvh*V^xvfEp@fEEQm7zvCM$LJPj2}fqYIl$#OgF6 zA&$63;5tzhwHdM7MH@vL#mkDjKF=GQ8VKqdNxg#O;U?gDV^t(SYm$6Gk>J z=)jWVfe74Vg6CKy!dvZRdleSv_2!ou^gOXggsdWz0tvHX9FOpurF#8eI$4w5_wL3lg~U9uZ?^miEaj zQKhIKjgPoFMmKnWEMg(FjRFdYdq5(z_a3bxR17YM{q$?-(kwQ^Mqv344FEwZlx>nJ`@`}3Y z1iTkZxo-E6b$tX2*o-C_{vwG{-mZydTW3sLF{GJ^EgeSqMZ$wL-oj#b%_o1lk`8z0L4vSQJYlE) zIJGoaG~I6cVRU)amZ=SLBg?7PPQpz(aXXsmaSzQ@Qaz?q$z#nzI~BnXt`ID- zzS&GQ9UdQx*20se>WN~9I=v^oD4Iu>dgd3 zJncrKCVN#ntz65^&}4MB*~QYJnAcX)Oc_cvUl?+`2D2rj(1#{BF(L;MZx$vIN(}o{ z-9rLZcJn0YX;iQF`t z3?eHmmb$Jul5*one)u)qZE6eRCE-|(Sw+O|XBb3PDjIG7vSP~8o z9#bhqH=@W$#>A+gZOodTaD+xXk%Y)9WLhjGlOlfC)q}|Pw~;?c)JLh*F=UPgQzGDH zk{ZqP#HW+YFD9%+4;E0r6-)h;N@EeA@FG7EP0?T4NGoCrWoJkB@FE5aR)&iJ7vd@e zSp>t6VHO5aaU;CL&5KAtNSaP0$0Xj)p>5)Qu}F75-eWO?fe1Zn72_dcpBIaUyBM?K zw;{=y-F`iq1k0Hoeu`VKsJJfbj0On3&`xMSbTMasiX;L95#~Hd=Ut#NE#_2`B`nte zmQS33EO)Yk7A2;4-9Eie4ASGQoO)TxWTDL~5w#{qfHX#P3B_VlF}JLqZGTW**`83( zsDrQi-0R-!(?be5Mmo;ziGUktIQtSCmK}S)i2YI?4K}S2g9J z54{?^VVB*8Q#(4*1^y!&f*IEznwa(iwbHaJO$bMtK&>=^T4@5c(gbRy3DimxsFfyA zD@~wQnn0~Ifm)JjQiqkM3DlDP6?5}N7MO6#l?WnWE!E2oyH&BE`k_>Opl5Jff9j4q zu4(ia?pP^zr*?eFxaPO-pR6qXaeIeHgZ_|GUohTDc*Uq1ll{4pxk2Jc%6j=$*2F{7 zR)?SRKoyV#_;#>}pkAu&b~4g3P|**VW6-y?0(z zpIzGe((R4P!suY7UQus&aOm>vz-!KV{Tr_Sk*%$BFW9;7HG3vY^9LqNQ>dN`6Ri8?{{HAvMpKcQR zs?ebmzi52fcpvp#=N|bz(2m$mKla9hMnS#e>qE|U&UGr%ewVu8&>_L$iI1JQS?wdnpik2xr5i3xCd@{i zRhU84ldu7
>q|!66NEsFhOx94@%TNbTD3T5KXelCaIZ+dJ`!u`y$|=ib|HdwlPO z>DTtm=N8q~KfHKIlK5-dw~gO5emgX*5)xG{n|*aelto086+}dF3q`7YCn0?D=)tE zz7Krj>g$bL7LHzeO(R1T$V;#L$-|3(b*nPolu_@kUw&)*=Z#}RdxK(VrX=t+6KqoP!c250T9V8rx%>Fohmq3|Ip81D62T z1GfTq1NQ?D0gnPt0+NSFj$lt|H9WxkBsH8D%si}nrr>>V80hmvSlDiLoqDNdxT^K*0LgM~dixlk)*NA-nsR@DBb1KDKdhLrUSS{@s{>er{>ZB z+MB2E``*8uk~%+V?=3p^vlP0C_MWsqX2a>l9d+pQPG`S$XTwa^XPM5%8L@i$>4og- z9-$W{F*Q8WYQpNZF&%n#x*lykB@k?|tu-Ev%_AwQ`eN2+= zdy3Y0k-uK%4=?*S*SMZR%8=XFk06!2Ai;ihVSK%lNH(%d5Vz(N-=;i$M)2UUB#EKN zG?Ov2c^SbaBEORn{0B`2?i(FJGH~f^NFTfu+F;GJ>5K5eATFWhxm zvsGVzwb~D~PpHp5=a<#rRQvm?RepN>HLv-7`IPVU$p{Uc_yT_Sf2DUW($kj&L=Rf> ztCn!`c5g7-X1^lbrKPg}@-N^0?yFz@>RI(Z_4f9SAN}ZKoNHtHXHVX%u~Spy>~4ec zBron5EEVjO`d{sx3w%`7nZVDT872vj2?-&unS{wCWJpLRVe*cVM}&|FA|R-|lmHPT zk6;LI>e_8(!79EfsI}H7Xlq;RTIjmgwk``@+p?~uthK(jQcG>MsI+w}+5dOWy)%&n zv_G@E-FB1TcjwHVJ9p0g&i6Xsch2|qU)F48XT{fc!7%IsJKuB?jVz!Dr~_JnIY2wG z5!epw2KEB`fkS}U1(GXQDpMzNUz3YR&d7X75=Cn6`j9@fMPq!QLwplaeC(vErkT1) zRZTN>6GL1!&B%Jt5IQ}Br3dZ`R)}CVWf1=(8DlKV9Xch}KE3khOnc{n&VWuP^xjoC zKaiR-J#(Ma2Q5#jZ+aEf#XeXIop18iOZt#l&bmUVuTcM2b8G@G2Ce|s0b7Bcz=OaO zzyZLf=R)XN^8HC!DJOrum;o+~7$-Z9h?oH&pl9_|J&I|JqNG@0vLzYsvVbC>4rl@9 z0PVm=U^}oI*bD3j4goTXlHep>BGh@Sxx_(Xewa);W)p;aegSm56Ai8I6oZchvWQk9Lf&Z4(tZ@0{ekOfSAQ$)QG7= z5{A!9;dA+|FDqo?~2AlB6e>pDBhZD|B+9KC0=Qfj4`TgU&=03ORMU zPNNHVuQFYb2oL>dUN`72vfZEdBrdVAoxZCVm;;C{L(VOk+K7a3fl?!xjUD262F1eH zxyp1kqmLL>XGF~2VY{Z$Hjk$Yg3g|3y?lOJ#-vj@6EjT}BUkr(^ynS;X8NM@e|?<3 z==@(&szckgXOSZx?u(rZ3<97#sYbAUs7JO0aCg5UP6+r(>gMIEN7%>BkIC#nGETW( zoZwbp17#KpM{!b;VfRFxw50}=!*GH=2bJ51Tn-nWVZQUIR26gZx>@5j zBzTpK34+)BkAYX6??4=Of5~_?#!@fT`ixm|2JGIa?Iom!>i)o8=ni3ICb7v7QUzPu zQzmjXZUWgTw1ohM9HX6|gy$?TCC_pfevOS_DUY47b(?kbAAjl|Y5RDL zKjHs*<4mQ(`sR(LNSZT@-lL*9U3TrSJDE8J)ln=RA#}irrYSf?Jo%3F(O%TU#v&TZ ztEeooz*BjZX`smdVZ7JXV|X7Dt4UI3Y3}(`>UUulnb=My8jHQ{Kn?J5^1<0|7N=$x zvjNEha>g26;?nD*9#sLl4)Xx~3SXZMqi-0q7~0IB z?$>trUGNaTE^&iN##bHt^)L(f;M(xu;q%}T@!%2h;1TiQ5%J&=@!%2h;1TiQ5%J&= zkzkYd@OkivNXQD#j#U!1>xsnY?>!@TGCd>UEFM@z)Pkvosh*(VXGPRP5w%c6Efi4; zMbttOwNOMY6j2LB)B+0)I0%SCWC(Zj8qwh8vAl=|FQUPVXz&vD^%C~=687~H_Vp6> z^%C~=687~H_Vt=aI6~tXAWj`3U%C{Dc(_apaY5`aR+2S86f4!LWo$5Bj5YFssn05z zi(M758m!h8rYP&a!0o1{spOQKL%yIal_O)20`}}NNa0sg7=skXAcZkVVGL3jgA~Rf zg)vBB3{n__6viNhF-UR7AcZkVF=G((KG;5#xc@G$dQF%hpj|)`)EHt$V+f-$#2Jkt zj0PpQfLfp#xD;3hYyh?ayMR5wKHwlAqajSCOJw(pKRkmmmR{mRjD_FXWhTK`2@m4^ z9`2R-*Q@hut1D~gSJf6r$^%*1fwGF|xfe1D360|}`*1~t5iXw@h8Qp00MX=HRsG8g zCRMTco@}m=gh5*8{Z(*AI6qvGQBz^{!5kw$J+EcvtmZ!W`VOZZ+>bXel% zVHKD4_PQzUnjMITL0rymE#ypcnEsRf64LY;+3$mmeRRWz?Drx2eaL@=Tb;D^5y0S^6L*gM_j|IGBs=Z)hD+ zk)SuZ^CE(YPzTZRF#AT+t7HX3ehu3T3E3gOtg&+nZXa};<=U3C-oMxs@{atdu+HNt zwyeCwytJ_sFQd%2tK!(LOH+i&&mmI-!S!$*R8Q25$+EwSuy*W7 zrigqbeZEt(UW z4rl@90PVm=U^}oI*bD3j4goSE68CoISK=Nvb1TWBmZ@u%n=6XweF?oUDRX$VPr`;JYOFE(yL%g71>x zyCnE73BF5$?~>rVB={}~zOw_vM`#=a#O4|X7fGTuDY?$*Xse`R3FULG?e4;vO-KFmyu@!fb8kw^Hui1UJIhd$2YiS3h>O{JHQ6V#BEXz2oFj_bB}Aj8;m2GM3L zdWA4Xgpw0AhC?Nh(WiO+Luu(6snsQuC|%-G1>kiPb;>-d*jjHaF(KZ3o!7`jP#u3p z+~cP?G!aa*z|>$m5lkn7sYEcH2&NOkbRw8e1k;INIuT4Kg6Tvsod~8AHK`5a)np!& zdkfA@epMmve3hZee3adCuy5OhaZ9~tY1%?oXBiDjeGPc8e`im!G?Q$)vVJ81Ckyoj z;N$?D9DtJpaB=`n4#3F)I5_|(2jJuYoE+eJ!P*fT#{l8vT;@*_Dw9GPB=<1<6<;~4 zSJK)=xx|7uBYs56Zsn9j@;tFhTy9;##yhlB54p8?>eLUOuPzxIj926eP`bt%kaC_Q z(@m^uJCBgBzT;?7%hX9c}1yFw)U_wZ;_OigPQzFT^ocbi<_cA=<@PnOt zHf~Y&DmfB2JtWG{gTWXwk#!qVCT}*+iXerQ-H@^yQg%biZb;b;DZ3$MH>B)_l--cB z+aYB)r0mwDJODm66$!S>8M7W5)}t@-UDFy!Vy=f~vIr%EE>Y#m9h$kB4>KuyVnP|rYJ zM$}g3E|u0@qrM7X4N_t2LeV)RHBFABkJ?c3aw=VT4yXp2fQx}EfOWuDU?=b(@C0xG zu>EV<*pS(pCVkx1)X9}FwJds<+EG4!!@-HP#pmwWh+?QYHGw2Z>tw(U$L=Io6veQ3 zrZxF-wA$;GT*ZgV7!7Sg&Viz$*}C9TeqJyu zDQ!gl=t!|^phM0doa|Y1DWj@~-Cz;>sIlIM&PQ8ClKoKjE|}WjMn0zVUekR)7Nu$CI#gY9ZgBAAiA$^%!;7+` zr@nDtbwP_YZ)8?Rc;u=5S2VRGWE9)nCbn~wvD(EN`iOOz$Rsm82^-neFV*IShfyJV zjKr;NB$BN9bIyw6*@@T4s=N>*R&dzFdj!cPv`hG=w98{0)=QQ3s+`el6zw&Aah5BP zxmtlqCtYk0t8jOJMabkSlk~1CXMihYl601Q(J`TE%^Dpkvm%wfl10}DI}Xcbn49y1 zqt1{d+Ga?MH52#A#;y#iS7gqDKmi=PUEIu{I1Z_~>Q9NEiIpTZx{cGKzp(vL@kmxc z_Z3L{ot1aCkk8@fW|<2`sSUBqRYk``R+d@mAi7iLCZ5A)!DRxk5}p_5)*xuusaYUv zLx=O3aYM9$K4 z+5Zw`$+DnbhMl!VGFy?PwaLh?WMo$|vMU+cm5l64Ms_75yONPz$;hr`WLGk>E7_4< z$;dAD&oEobfpuiRi-|6rB^SEv!#mWAqE|*9vGP1;pyYy4)^BA5L{?!NP@m&oRc*cg zEfz$WxpNhyjXp`nJyeQiP>N+xie*rWWl)M`P>N+x>Qs3z#XKm*JSfFHD8)P|#XKl= zs=SwC9+c|mN#fR}T3|_zTvJ5bIU_~1#n3hbD$ymRt z*aPeX4gzA7*Yqk6F2b`)X1kINmvnYgE?m2U^Fw-@8%g_yr@ikDk147gL|Uih>U8hW zl)A}Ns>=cw zoO!IhPO^s;XV&LU%q}R%7U;QV@(%^s{(=I3c7gDY#DY#*Z@8z!|HanlK{92&q*Or4Kx8416Kg+fUUqz;6dOC-~eFT;6q_D zaow8TQ$3}iV_l29la=%m=#zFyOyi$*N&ZM=n@BbhzWrsD32Hnl6>gMO=B&apGJ#(p zRw7Y%A2co@4^87fXxs;l`=D_jH131OebBfM8uvltK4{zrjr*W+A2cq36SD*{j7*XJ z9g-Yc!s692lCFqWRkOqfA1XOC!|qVHLWsYDj0E=%$sb+&`|Gz&s~bQ0*7dEHdy|LU zEYEc2G!3h)9XiQ7y*6`LhBfbjN28rJzkGAU;KtFFU4e3P$GD#TY-MHDh4N&X-9xOK zTx;Op3U!WZa*g{IT{luSfilW={3C);vx_k&(Z~XdfI6TBm;ZeTC4A2zGZMToKQxR$fvgGXN!xkl<9ykP7o0n4 zNJ`#~`=^c>yC4({W(6w7OdPXf+_}D#+;MlznsMm`(>nu`Y+|?eW-cAz%}5)6d6IYJ zkRihcq_^Jb54Fr1;Tk@=*5?`&E-r5_FMV)QQ_k>=Q`Hq!m9FjAR#jC^7ul%5y!n65 zy3%^V^|YsPKxDv{0Z)cFjoB7-XjUpZu*?|k3U zh4`ElhiS_`u=6ProPqrHrW=PiH1(?#K#e9n4+e0Mq@l6l;JI7J&Kh_3_{S$)GvT$yX^owYJI|XqasH$wlXgtn*R*r;hRN?Y zZ)x7svbtq!%U$QcHs$El=+sS9-@o9u0(Yv#=FP?kJrb~`q8sz`Loag2|H#azU(>yhA&%9^n?^#f};Ij)JUhw3CS1$YR zNuUfLD?Yb-OU3#j$;mV!MXRb(J@!ZP0R$aI1rH*LF)vKGXD((!f zN$(oEcKW(S>qF~LY}mJP-=>M1S8us`%X6O!TzB<#U%T$L&vjlu_WDKF-+zPWhK3tX z?8w|vzvIA;<2#Pu*m&dPH*LM??VHnYK7Pwnw`SkE<<{qK+i}~Qw?}W^vNLh#w4LvN z{^8HRxNF<);@yYu*l=h4ou}^l{ug%Lz3}e$?-_f~@h|=S-VOKNa(}@C-UnWM@Y#n} z?}_g1+_Y1I1diUFKUwZdRjv3wWa13xJi(C|M zajenpbJ($v&8I)$SkV0n$BEtVa%}4U701c?OtWs!?EWohX7R0nYT%xf&ZAvGW|`MG z)_1?jv7!4Yf-0kX563zDVxgMP-HNe_PjXD>4#iyaYea2E_c4xH^rVdTXE+AvNg4M% z%(1TfCmiebnTGCf)1Je3%hX)%Q^q|{pbds|{|dhQo@fSrY~XwiXP)6$(0!caM15@P z{sryH-OqDu?tY!)`TG2HeSU^M&eUfv(x1-aS!&d5_D$4~Z{wF7FYSJr;~ag>T(D82 z=J6CY^hEBwfa~k{-CuAl00VUzhjrlaB<;!i*h~-W`SdlknDqG^?sk%+;HQBz-{dIR zXwY|P;0~|SZUPSt+~E+%X71cT&wtKwX7@`RXLY~G@nZVZz}-%8oC{7G=-ay#%n>|H z0`n(0W`n)+!T-Bz8t0Fx*( zW4FX!^Tpn|)pWdp=Fh`fovehn1Nhag_IWSo4-2j8zwc2^!?Z~pVm)NHT`Ix)y4`lO zpOcjX`f3l?ykfTpklE(A-S(=1E;gR%Yf@lw*;bzR0*;G$?NBQ@ zzeHaZM7ejURb1V{_d2;^HF~d&RuJ6uyxfEY1Bl5{57_&XDm>oofot&?(A6Ewxqo^*wnGAt7B*{4~ox#e;m`GKRpNN0)?BA#> zi;gKPuZdqCZ}o*zhISFwVNgwB(Ayz{WwaY@@>#?3i=bK_`(!-rQm)wuk+eY=i!~|9 zHB&f0%RVn8)yd~=+_{}&kU=fyU&yExQd<%`Q^P%i_P_k*8m^X4g&>2vFY;|6=`Q|- zDBJmDDd&WoJJeeHZqjSHawX^1@L4-Olkd0d@5&Rla^I!;8exo}n#yPE^c`389bufl zNj#`7;$F*Q*Eqdhk6pK(-p6qxzaf2GW8)=4@0LT#f}_*fvMDMixk-8xS)Vr;_qh+B*if8zsd!x^b%@k<$`rc*%RYdoESDL0 zkkvFH;_C%!Bw9R-1z&{j9u3)+G7KU|#z1@(NU|y>GBsHDb?CQq&_3s?v1*(e&upO) zwwQ?aX~Jx8X3}&%#5@&QIZa)NzLLztFv!pMmyVX7FOX^;AAM>TX>S6T=I~c#N9#h{?kE_3D{`5`t zr20qol=`-MT75@-SA9=CqyC9G)lbw>ro=C(pQ;zt&(urmW%Y`BRlTPEgDJ~#G}7zp zSL#j6t$wY3V|kcA{6f8}-c!F-C)GRZgnC20rQU|*7en%G%;lCszU}Hth;aoBvI=rs zt*(N7)(!0wR&|@Yo$1RiCXaWh zJJnt4i|T&$0F#+L>dWdY>QVJoW<~qSp79S%Yz{K5{J#1Z?7JVRAF3arOaE0pr~XYn zPvVUu>H~F3bz6%3gOZDtOtvIs97J~IA*82FCVgRw`cErW{a*dSGi~~-*Gp_?DE$QfLUA(w$RoB3_STmu;zN+1Roo&Ca zb>6QO3A`#{t$7{T68owUss#*$*2s>wqP1%k6e@W-RCl-j#9H3gDd)^}tVPu{Z*j)5 zPUS9J?EBL${oPCRNKCb>~O|zC+tE?>~+ketJU_EO+XT4$_v)<;;Vi{Z5 zJ#WJsVi}8FELO1_o98WU(TXjZ$oU^&iM^sNIkAPs5*9mHtYEQ$Eo}pP^f$x;7W-GM zU#a4~2W#vdZP^j#<~PLd6{}Y~T*tJP$3i-6Ua@$^-W6+CY+bQ*#m*HgS8QC5wsEz! zs6Racw~AFKwyjvUV%LgQD>kiIv|`VSH7mBPSh8ZriWMt1tXQyOzl!xLwyRjKVz;u5 zf|gjVV)u*1D)y>as}^Tp)6au-sC)D-l0z(2u~W@AUey+=*r#Hhift;Eso14rm5NO& z7OB{yg865_fY_sAjmj99v$6<7tWmK=#S)cVc_+D2EK#vTEp3Nd$R|mUVF_UK|~|yF>=t1yXQ{U7tNUW4=8yHa98c}nx>*(mwrMd z#t}JAEU9T}ip5ZqT)0g`mNY!Pc=4jW`*1yk=%(A3*3}qq)LtA#G`#}%iK1K;t66}2_wyX(h9x2?zZ$II%P8_nOEw-A*#p?>M|nzkmf zOl-k5>XWyzW_jJ*ZJ&LCYtZK&*0iFf^)^@9HljK2sBXH+-c^@dOqkRy}Px-krdFp=RlLx7TUZ)?^vvh#Y z&>Qp%+E1Mp9->$2=kzn$OWSA<{glqp4*DhiJN-o7e}w*xen`*J7W!ZGBK?ZC(pmZ! zdWm+^k1*=n={)^2{ea$~cj;5wMc<>>=reko{*_*#!}J^a-}Exo(jqcwG1bu$+DS|4 zL8_-^)Ig22oK{d1Md%@Fp=N5ORkV^0(f6s1zDf_%*XV!HoAf`yv379jamd^ndY-;c z-=MYh1g)cQ(v$QQeT#lg>*;CwC;B#Rqz$x>UZ7`a6K$sNP?($=OV&LYD;LIOHO0hK zahsScYDAk@EjDSt(tfS|hxU8NX2&kaKF0y?7;maK%bV@>dxPF7-f7-hKEE&M*Zgk( zXn(Rl%b)F^=)c{+&|l~Oe8QXOH93l2+77H`qEajb)^=e1rNsJv0@f7oSnoKmS7N=X z57s1qW*n;_u~IB1W#K^Vnb@LOUCbG~KXxsVnPYm1dMbKuynMFjy5n?wry9TG>yK|f z{*B{1{_xlzetzu3W5nZ2Sj(7jxkY`kekk-Sk(;{RKG_4$8Ukx|}Q9==YHF*XRU& zLVu!Dvf~1yuJJ51_qdL_j?x6u>1HrkzeN-V{e?FRJuRs~PfvCGiwh?Te_B#11&fOU zeyymG3Py>v)O1uSO2WMfdNH1Gr>1L98Tf?5(Qw!Z>tVw%-wf@IMx!Dx(%un9p%E7I zeKTj?Hgl#k%h0*5dBlhq`0tt3X&xy%nlDmNJ;msXM7j*1i*nuDW=bU%+bZ%v=MaKU zOAi(nGi`z4LQ?8T^>fpPp)=Ork2cHktYKVUS}!p;v$TB@LlRMP5C}$zfPcJwA|KW9 z5%U=dpE-9kapAE1Ws(O3 z|Gn++oi`R#iyVA)9^P{3bmM}0^GLSH*=`;QWr_`kHqQ{tGtHOfm(la0Vcw^TS<+*F za#Xr5Pzgq5k|-)HF32g=(>w+GwIbDDR2amlxG@YF*%%CWAOmG3K0iiJS7TF*Dq`2s z&aU>%aAp_tBot~ljMMug9Vjr*a!GqQWN15cx3`Ded31OzxLQch%-_9Vm5p((#t4IB#H@jY7DGh!)B-(~B{<;oN?Ijl*9*zW zI3H0tb)pa(A`vv8K4z17BpQu$baaRmk;lc77QX55D!CBz`O4g3L?d_*q7s&J&CHbA z%ITI;JCr3($!xHMpD;SB+nl@kB=S<;YB|~uif(r^jU;mVwWKl0AXy$nH>MD#6h7jh z`ZLVw<_`Q#@B7>sJvnKN<}XwS`zd!bYRf+ZPtV8SK}FeW@s@i!>`O_AJ zB{u5N_Bob8E-0zUmsF7COY%@-WB6N9t%E5qV6r`8kde0$1#r+{+c_37C@K zF7b+ww0+vGtfzoCP1^@dhU_)80WiP=voQqZgHo;V8J>=r<|mrr{x(1Dfz)Z5dtpzs z$Fn}`Vk^&PcuuDvEiB9AlguuVtjp1piwZTUy0p|}#eJKXY7Fv3o;V1W9>L#qLu`P` z+F&+Cqwt{wXd;IfSkoA1p_D(U(*nI1`ZP6NPYr-47NwS)Y3Fbq4qrN{sE*T>i@8-; z;Pa-Bal70ecYzOjmPIz;!|X39o-kdEv2=jHC^O6w8r@?4;?Twl$O%wIdRB!4YsB&` zEs-$hz2PbkN6j6F+bcR6WqV=0x@&(~Sd5jEo_T8VBL}Pt_=6?F2c^ym1uds~1k-W7 zNK`Omf@mm|T_Nr^p=FJ*w$ISb=^!Ht#_T`|dKHN{D$C2n&GNzsm^D#zdQ|C|h?oy% zX|WSlyP4#*cyoG3;Q>>xVA9pgr)8#+dQK=&X4 zmgbD;19}(dYC8D|9ZVWStY`vLS-hY%lhabQKvAOF#C$;;+_gx=h_K{!w6}M(LkGw0tTdEtEYT?nS$u>vv)sT8fL#HW6;X^nVl0b7VQpSVsON_zQ1RNW zAvV8KFv6wnmrl0m|sZ0Zrb7k%T9Li4u1)6z9+u`xcH<&?eF|uzU;6b~3;b zd@R>OSHb|V;<1`sVA?Y z@x2ohPJ!L88FMpR5eDoI;T9bf{A_40}F4-LmzV)B#5&@W2Jd$1tFgcnC_1LMM? zc(X{)arhi=CJV=)=V*DGUMhA4!U3@>zom`)S{118*;gMZD>d>(_n~~fYNK*Try=7bU!)St+Q1XdshJ$J27YvQ*Dyq`35@DlC!p_~8f#HbGT8x=dzmZ)T%E(Y|nqS_Sk009uCqFGk=ga67Wkt z!3O)TI2K?Y3m~g___TwL2$+U{3v=FYFV3yv4g!TNbr;}ZnAy0`HTr4 zNv=9|tW`%?cmpb(zN9!1EVLas^jCC;LlwsQ{h?g(R#|-r4dzw_%r8|N zV${0)7Q=NEjY>+H9aZbPwuj10t>XJU`Tw)A{?O5iVU3AWHMaj4B#5rfUL~p)dnIm` zPYa9lgqjO-Bn$M6lysPq0u~c{_=4;cxN{sqo)~Hj*W^d_axr1b{q5m3#*#0&O3llq zp#^1!S%Zb0-|aAZ?&{pD?QFD$5JDa9*}2};q21AWz9$)#EGgQJwy`d@+ZO3+G-UGu ze~z%4WHAY8PeJ?3vz}Fa#?4%c!98JLIC5!C*l>zaSYcP%gRv`5UCs+NJCyQJo_fMO zfIg3Fk4VIx0sSj7sv}ESL7l9XOUAH6hBek~FaRF<60m0YlS8)8*HcHSPP_#nLkh=IJf^Vk42G%o2@4#A7cv4aRr*pu4;lp($uhmNQk&H zntHekVx-IjtPdO3u#BRGFCT|@0@`DLg5~6x2KFLE0ko2E^HeJ4Rwjxd6clDSI#cL) zn%xicKDZy|eR20g%+J@w{p^6aj#h2Fgq5FjlXN~V&t9|6u_uHca0~9=p@k_4Ee!9| zjGvoHaSYO`>twvu{rd+ty|MEulxQP+{$kM+VsgZ{M0bDMPGAD@8XYST=#}K5P{LRe z2K*T*#U+IS6{bXKI&+{v&k>r3V?D4Nqs9H^wm?1NUS)dM;cT!xY`DxFV*Y~t+YMtu ziFv9^S{HaSOSQ+UhZ!Mr`q4wEX1-${>G{TXxWb>BU!Z`;fT?%~RKO}*W00sM5{Cl{ zH$@JIcCoa&tkirbms`e={6eobHcInwp|mU-D9dN#u%>5GX*G)jlu31|_PzRcScaaz zG&X7@d1MG5$2PRk-ZE@`3Q*;Mwp9jST#GSD68E6L+Or5(K4_vCi7yB>Fm4xWZP*EZ@XHRsY z<~Bc3@s&Q9GkWbi1Q$xhw(L?xKnOSz7LJaWcz^F)D1PHQ&(R_6R#AMNNS`b`f&4AH zz9ruXZ*eYcJD;0-p|ND|UPb?nj#II%ykbqmy2o(}GhdeXM&44=-PXz^+*aL*meAhn zyqBbkPA%8|^Apd?MFqzP)v5x+Xa#ZxEF|nUFp_B; z1rR#%s$aVHN?UOJAtTW1-6OCPQTByZDDwv2IPGv|sfE8~|FM)YdRECOfb28mWi5Jw zejuL3E9CxK7R|wyNl#uywyY7h#sR%jt7FL<8HGm+wg)e*q}79jT>@qj8tb6aSry|J@^1Um=N!QHzj?v<__K}ri&aavOGv9*6CHJ+pF}d_NrXH zti4}v%r~p__2qh%UaqgNYES4DWE72dV_dK^CH)p?5gE*eLwGHwIy8;tCox0RP~e0= zJlJRlXMmm$ag;XJY?88+!E0ropVSxthqYv6?gw%T)W(U7UWo39;*eZ?O1otbw&D+h zS-b|~511rF7LsnM7qLzh_EH$`Igg66+Hn`ASHO6{6~VlrA*q+L<;=qBv58Nk2+GK= z#Kp_<8?<@u8Hf~1F2!S-z-%$6Z=l0s6#GNaHUJrp3}d+gMU={gHm|-A2Z47KuK@b_ z?sl*|5O?CCweiu}m)C+ggL4R9-mOfv;ytAQ2N=sM`1De95A^Aa2l(`luus3HJwI1$ zD6OcLK7FqF&1z$R;{cc5oL;qVV_9aIxuz3$69>#i2=J(F%Q^ZN*G^uCwO-yDb)`bzTZe=vS$=KmUUQz910m#qssznISWaJ zzA7nqW_B)iLDPJU0AUX5URoC379P{ z$aw9DJI@Xo0qwJIB=1> zML)+qz9k7>z-3*X7*S=NnmC(N$lG@|!}H*M(ksnmHGz3>!`M0EaATO4H1gAD4r}MY z4M;z@oUqXvpSy6qfl6+rGLeWvWU+$4b}8!{ESBtbpQF-}l`HtxYKw4yZ_T@dAdu;@ z_XfMnpYtVe42pwtV^FH3giS$pEjI*hDKAHa41MUgidNem(%Ps90e69s!~SOB3(F(! z#5^v*2YUfZudyv=ZyJ`@_NLk0LYOzv=wKtDSMnGwd}(lc1D`^Vu_^<0UgQC<^5B5? z3SM-A+m_E>^egPjzO1*kE$HL2IkT!Z_Dq2uez%{e#`s;Y1OJtM=pYv3w~7QGI^L%9 zOuWtY>jyMkvGIyzh^gi~eTezKnA(>xtLHrDlm}fr!S>*Y=M#o-!Vtu zLF+*bLV;Ypusy;og+VXAg5xZ?kP!KKqsaT|@2kan?ex0M`iL%Y7e z!*l+mI?;2^sFt3yV=dg}vYroZ&$$d+)##^`Rd~+4W1Ef@nqMpB_*kis+qP+`0r=|4 zIz$xWhvk|BTtWTehdm@=wM5-uemUrxVL%o2vd9H69ioZdH|^O)Qt~s`7JcJY@YW ziT_5cT~Mp2cP5rwVM1B7Uo! z`J>7uo$RkVPRZ4w+!s!<)7H7B z2_3>aZMs@_z=eWcDjGAs&!!$0w+S0$aF^|%08AT4Huz7TV&%zaavO*DjPQ}GIRqzE z`_i^4S55{BEHA$j6ckYz2x5=4L}p#}k44DDHP4_?-yF~roGfHqm~+@#)<^6ZFdWl% zRnY6H4y!VZ#%hqZi#^t|Dg=h(UTb-_j;L|JvO_xOxMj+3wzQ&Kbj$sJ8e?t#`@i$% zcfM0sr>#4*dGn!XlxHfV_*etec#aXs-M5J{2Qpyquke~crXfVNATP3E^qFPaP7G5- z?w>eM=$Ep&Fd82%-W!+$~o5woIf& z<5hO8twaO%oOw&nA#})UySB_HUcx9IEY9ZGy^}!fv0O;2EO3 zLocl^MWVEIG#3XD!7K~O@7#L5ejT)}NC_a8wjTU3&u+|zIx#BbH%pty)Ak{AD2MmJ z#ccQ|V?f83?Nu)%4Yt5qeaWEaM0%Nmz`ENl@529x-$jC*x@%6ynsqhite@#UAU#sf zYm{Y998m`D!soJM$74A2V9?mH8yr?7<6E7uF?H-=M#;`V&)$hM#&|~TqAq6v?|dXf zKCna{Ei$@F!nwKO5?57~*p(U8^=M{D#WXIV_IXuXgCvg@?psBUD_+}K&|91P9NX$j zh2I_IogTL&5)HFFyj%+`5Uy7+K11C$+L22pgvGG*PuBzoIJ!E+R-!4ImoEy9ei_#McIErZYHue6_`i=tpWnmdG{kj zs(qCn$V+i7y|1EoRMHq~MR;VKSy-PBu5vgVoMU?NZ*W&XIE>#KU!HP?f_p$PJ>D{(f0Gmbe& z+G%?*TKr_6IlZ)^v@|!jR2?{p`u)nAZ?3#q^vHwd-&ncxJ})75r?cZ&jAX3@GpfmK zHy9z#LgO$_dY{~|eHgK`nJng+oQ|w=#)0t6OazawylbOhcgeF}FCcQ#8{* zF!x+-Gw6WiA@Ui$C|%N9$~7>TWeq$}vA5kb`uxh_R?e>dzxkzu@+$ywem=24X<+ic_!A6G4`2&rZ z{;)DBwqQU~BlGzam@0Y&Ai6x)`*(FkU$@q|0?U!&W2d81!xnQZVFKFp1+mtMG z7OMtsSHel+Jw7CI4cxJmZOeVpOy0BPq>^~_p8E(5B*%X66e6iEIStjyB2sy7*5(_$ z51B`#Iw{8idyCM}*Km|-5DUt-{VLlY@y7b@e7I-uje)x#u-DKmYaL$JGOSk|YdP~- zYKq?9dXx+m8AF9&%?fAZ_KgB zD`gU8-Lec?Y(N(jp>}BKW*o}-a_UHemX2u{jt#WsX$qB>if?7QM^y}aDZ7&mpTx!yMtBFkc@dd$BM*$W zqksyq=A#g$9HMgC{kZ|k}6ITh1!DIN%R~9Ew89(Bmf87aa*RdkUQcst^ zyu+okN!mPE{=ab#HDRCG;s8Z$4h)Rmv?uxSvshoI)&KkX!tVqKN2z#{4EY zS8&L#6D8V1y$t`Xm*L}eKf#soR<@_a0v+l z5_D~|s$ zK)b29Uaf%78K5!nvqqFX37CVjmjG=FxGr#n5}!e)KjX~w%p)E3%bj1Qd{}oR#sl`z zG#qtp6_v4{$~4Jm>u@Xpo;m=I=(jG<(Le1Ru3@j89=@bADIUhg!pLpzyDG8ou9XX`_9l0U;G;U zl>O$uTLAoM1HToON8FcVJjk&>POB~PI;{eq)mZU(f-ig>r}YD>buqk7#*ic`@l3PE z675JnG9KppPW7DgyHa+@)k1FIA@Mv_02VmL(t|9&{Jf_@(FdK0v`PhWnPb^b#hAZK z6@Uezf~Lmrp)O{$;y38z@qs+yhq_i7_Ypi|y6_Avn4(D@e@Imt!?f53^xN2{mMj?B zBdE76)+6qvT)_3>k2J!yin1KpbRFPYoR4$uq*0Cnx)yb3i$?s)=P$@%_%!x2Z6u8n z$H}41p>b#<#9)sD%Z&CAEdm|4vJ8l&_yzw@@$0YGs8$>yFMhle;JVgSe-Xz-_!OMO zE`xXu)FDS*dkZ!OyFrd80S5pV;cb-zS^?h!>;W7FU?<5{0oVi$jx~aN4FEI8vk>qm zUSBa5@GMl26EF(^d_xZcP-mDQfaivf13ZP-J!pW%fc-=xLBq(Wi9-xD>T3YdFzT;R zjB5eFI|j6jc^;kuo=I8+_!MhcJe&M6{N@>e?RcF8?xlj(wDEwg_~j;WrB4G?1JG_d zaHhWx+ktu+cs2ul9-9k5yJJoGo_7Jz&n!OxG-RROYvuvMMB`v1#x($5h4}&f*W&)Q z2Z^$$1MqzINuujO<8{Z0yz>FSgAX+g@EG7Z0LuNq;RnzCvj7hOz+3-%0Q%+M2RH&a zLlkfV&_+N9+ztS~0O|%%H-NeU)D56+0CmTs?s(K4UkI23FaVDLHUnM;ya6~0=*Aww z2*7wiDc~MJ17Iy+E1(nb7T{yRWujmzAP+DL@Bp9{upaO{U?1QJ;0zY}PQW+|P%r0Q zqTEXO=P17(yuE%k(GB3|4L>HDcn_eNC~qv_Fi}4Eoev)7A0?XfAOJj@)J-%w84v`3 ze-hB{B(aUcxI@vH&`u834S+z*7vo#gLs64FJBxVHVEO@Be482Dm^&3EH|5 ze7bQCV5wGAUbwsW6Vf3^!**YH%A7Q(7 zK7#ztlVuGJ-rHBKSXO6vtD3yKymMP?npf2~F3B(S&Tee2Yi_NnZ(Ls2*gDCpPnn`m z^4_zwzQwzEMPsWM$AvN4 zb>2B#y|!+ecWG;D)3nKxjh2Qh8)};Sluf%L znR(vanb%8F@qp@ARnxt7%`NpS8og5|P090qspett)SL49*Xe_w`&Q#^u3J*yg8ra? zt<5z?-SV2|W!$qXvIu*24fyk7Cwc|`mf?Lj2Ck~`h8r*5jN_%bc*9K%-gC1G@4sop zN-3WTQ92vvt*FcIzp0UTmg9=cCgHA5Q}CzD^Ly~_oO-+|$BTFAtdKRl)^!7pweqPJ ztJRgbZ%C|OU|NbZK5{F|C59Ei(R*hW?z8~M5G6_JAfJ_pRUYLrs419 zW*(l&McnFo9BrPDw!~Cg19p+U%T3mtaNmBZjL*^2D&Gq=X8K|`q@0I$R zluaIETWaAZLi^(rdjMlS*ykcNxQ1E|Xl56*um{#;D6GkF8UbrD3K5hsh@K{6|0)%+ zlyu~#j)hgoLdu6i3A4zBauehA!Td0mNd|0?FRdRNyYcu&`AdXyfc$MMRLv9t#7 z>ROApb@98po}_QecXh3&4I+~^iY&aND~$IqZNnSGcHpgG&*QCKyYN=Af5rR4cH`Y) zKc;`9PP~DP-$(W{ym@Rt-bZ#2Z!qIGm;Dm9_CM(q-pKSR-bHo}?;AUh>>TwLrwe$G z(+T<`oy6OkzHPk)>{+~_jNf0j74H)JKHgRKBHnbij}G9iPKU)cVw|{EWQ*&BSNP~X z`iPF=jZc4|wwrvfBFQ+Lg-}H`KMc%UbIjj5_yh zb!T?(ouRXhdKiwD`WE*c>ghXFojVfh+!3#Hx2kius&jX59rxX;g(_8{N>!*zsBpi$ zGxYvAu7_2@!*+pIQ|DgM(ooZ~)TQn;*VQygJi~g?46$pwWP6_4I-|a!p~k(urnY$n HJkb9GD73?q diff --git a/jcvi/utils/data/Humor-Sans.ttf b/jcvi/utils/data/Humor-Sans.ttf deleted file mode 100644 index d642b12b821a16d313b4035b3640eaa54a45b9e8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 25832 zcmd6ve~_H@ec!*&-tF$8M`(8~ZiPYC?O7m!IdmW)4zSF@8DkkEkcBa4Vq*zeLaN0{ zkpx%`rEYpnYnKTbYEL|>Cvk|=Fs_?X7>6=xta{p7lTJLOX(*{PYHPci&^pdELuu-9 zi>|Nt`}2LCTRBeqPbU*;ckkKf$M^aCcz=F=zx$*jrL-b_DvhMUz4uLT{^@^tWGbcN zD)!#9_t8B^Zr}dGmXt>LUOf58p%eSJy!<Y-CUU;@=_fIaJ)2Zt#vq z8cVk!m&9lO*O)DSE;S+Gw;G>e*E8|A_(s}XjOdi0C7(R>=k`O#k1@(%@D@ju6>p__iyay&3NfwEpPjLou^|uSMTbc`m6mK zi*Y4!kEL@e8sMSW&igp;@A7VOth!#;f9rsF~&*N#Iu17|eh3Ju7xmPWBbYj#9LZPuMGJ4jsr$BR8@NV(WIxiY*=so<8 zgCnEOR(tHC73Io{S9Q9*x2(Qoyuaqsw_bMn+PAHn7)-u>{S{Yk*m%{|@3>~_+D+5f zUB7wD4O?%Vx#{Lx-ns2vx6a;n``o+lc+Z{h-TuD2-hcOwd+y!&fnE1~@cs`yF#q8P zf8rw_eduGm_YB|dFW7!~?*#|;UGR-BAHQJ7{&euKJ#xToed2-xKkiE~Kbqz__?FbQ z?LC!#8NcxBgVlq#4&FBS$`S^c2I;xH@6y3q?OXcKOJ84lY3ZxqzVF-jetXA%`scM6 zhwp+km!40rH=arp2}@tyUO!!IogUjd+t@PE9G_^6w^w(H_0x^Kqy3AM1RoZjR7D&dP;} zhtG!O$L5I0=9E!=V`F9uF~#AEj18)*fKW`eykSQeKp1RKZnS&T`TajV5Hx)I<1Q}RKm{qAC~KQ&{y zO#v(o3%~%DL?!7-ry(>1IwO2a^vHS=SxxmOy9*jHjHOqWijW}jC1TuIx-xA~yV4^m z&1?jA6UA(?rZc*Jx-E-nkGE$!D*@f?O5;j^1ez1Y%$jV=7}Dr?`>CM3+n<3tfrq=9 z%(-l@F!v&Rc0dkz%t(RpUU#n5Jdv@~QDTT>L7+;TT}5Oj82`Vsjaw z>T${r-FGT5?<<>7a6%(M)OTtbmk1w88g3F30$1c%dcJtMu?;+3o<0VerW>}Z0$2eS z*dE;?zJ(UKHKA&DOwO${s$;S}UoqZ3E6cL~3hT$3V3yDBYe;6Mdj^$cMNZnLd=!Gs z?v$8?f7Citic4$@f@ko8FU5Es`~0s<`C-1?>lsi1%n+L4Fb{L{)rd77ra(f7A4Sc` zJi8)A*ivw8rnBi37PTRJORPkmCmiIMH2sT)J0e5K;;DXRuh1%92=xnW;sFL~WTHw4 z4j}4oORp5`8oSc1+`6-3d}5^xatw(1jzV0!xEOf>7;qf6BurSb70l-1AarnuPlF%M zTdl?*$b`SLdrFXG%j8!W0vnUPMLduMk5LM0lAdT(j9dCvv72~y8@{^J0s-qc%E8V; zVw1=weRs#(#l5CrQN9TpQP+H?B^w};n(-ZnZW4+@{;!&n#{i(JDkf#%^WPsOl6u13k1jFX5Bgl&?p4O zzVTAupIi064!Lq<;w*fu!W5f95K6XyjFMNY5E_MUr^FI(xa$aTcj0Ag1fSUVj{+R7mYG;78*nS4$Uau=N}@|Y&;2dYqSF!NS0lA{u@5txvH1W* zHs(s%KSsw3h_bz-1zl*VxFaAR76)ACRvfCIqUBpOVZ)S1J2( z;0X*!e35#T2nR`r4I!`@6jR5c@MSa5d{P+RZsAFN;%o+y>1}uHzux8=a20?>y(mfA zK_`JT>0`zY!axqoY;+slv|E-mY8IPjh+J7FrHnfKq%1Z>Fx!}v4J9Zp#E%9s>#S zis&G(4H)K^3hmn7BarQaV-%Z-d>1*g>-h*35S0S1gx+BV%T7H}przFXMPm zcD|4#3IiQ%WQx6&2}T3d0g8Y-8=nO=c*ml1k%Exx#rVv5mE2?{uIXYDWKFC3w)4P2 zHvV>{N$!g>Lc{Ut(p$_7zbvAbqI)K73g7WEJlJ|jszp1T>!|<8{f0X9SFn>A2nIuP zaD}5W>^QjJREZ}+peoN@@jQ!%+>pk}pY}WUHi3iPK(Pj}bSP{<3NXQEQ1gU!ls2G- z{lep7O`}DM+ed#>6s!>E?uei=C+}6Bc!o9&JQy9pI_01?s+?EVib5q75rKgN3d47q z8?1!5DXW%KeXR~75quPIC!g^p23ReC9hS)j`8e5tdP`6XEiQW^&p+2VU@wt-1LF#P z_(uBz#fAtEvJnSEHL})s#v~kUg}SKt{UIeYxVi(@uj@1EY<`*b1sqz~KBP>-l`|6I zIr2IQg5xUw>>wVrXOqJs+at-#cIhK#>ZrHVW1fcZz_lhF_uRdOV}dgJ3X@hS#mVZ>#l# z<;n`l%Rx3i?YKpd&bq6|U$BF;&~5i&^`R)NH2_@cBvgnm^<2l2s{eDTlcv(&QhXPd z6ti8~)At;60~ERFMd`2BUf2?+w#TupiKZb}C<|ES+NwmY^o;{vhRJ6X!orQn^s78W z)`i;Do)j8UHLl1e;rv+AP~`E2?r7QvWd!1b?D!q39h4;d?^{^3rX9{McULAN7NbN2 z(5>)mxEH4m_&w-xTgxO%sthC`#-`kG;7iST+^X9%%N&(l8&&8|a^@9jZTd+hxplg& z=%-*PX-0KMZYV(M!yOjPGlvLmL^{wG!f+zOWbt$`+nQ3tqDli?^XxSn8BK^!{!=H1 z*c~}U`0xvfi4VCuSg4M)PPztR=M*9luwoVPZ3^@ zNUFA<@b+A*dp5fc(Xr_82myEy32Hd&6s+XY-Am3Hk*0uP+kgm>149*@RUsZ23fmEq zhCLlghT4T0rViVP`urboo4Rl(|E~hNES}pA=@V5DT5jNG5s*!v%VE*taYbFAKS1gX zi9OlB-qLuOY8>1(I~U0ea}CmqsR$Y(QG8>=^gfU)AS=cj59ATiy7ZH85KP(%GDzU2 z>TyK9%6^cSlb{8267?Bp0uA*Yg#-6Ol#xu)T@WA6GXi*(Md?fpYOTOH*r|oaMWH@K zM=r-#f~OFw3{1R7*oe^MSWd+>RXQ8kPuq#;4yNoCk8_aAi{B1Ws4wBhkmU{7)K88>eCjKBtsOK~iMO#L#<3Mu>4{Si=O91N z%;^u)PRXHR7regHHXgFpnFA%ZH6;-8H@AJA$WE$r`jYM!Y*&%UDzYOK5QrhR2oVjT z(r%_IzB$g*UlWfncmhWZ;V^A!g+N&cy&1@B5FrqR^gOaRb$Ih4Tm5Jh#aW$ z7d8;O9ClXLX5Y51c9c5UR$+~0c*lmhi#xmmh5Q2g6SMJxnEaBjUfwr{^N0F6SDfS0 z2wDsm#oiP%I3X?diZ`VD@s)@{;nxz03SG)~`I6%@p*Wk141nL~X^1>5p zeyG;F8LP+>L)KOa6XXx#i0>g)TTGu*}pAIj3(57xTr z!{JS2MwL^Q0O^U1vb;cZnQj4Qtm%&RDz zlCLe+Sv9sjgTdE@ji^#^Kz24m%>5O1*h`$IkGGB4dUx#c0d@mDfuZOrM~&F0nHr|= z&Su$Vs}*J~^e7b#ldwdD*VtfKo?lxw!2bmIW`d5 za!Sryh(jy3NT_4&#sf|pLVC7uS3icnV}Eu5a`Ls3O{ek*i&id5XVE>;j&KIF!vM4! z4zIKK43Y$0avz*Y$3sGll+YFuEmyT&IT3|smldX`WLi7zqBfHYw_+xd0=mkVN zTlN2BiapBTi&bUW1z|Bg`(c%-7{RD^Fa7cMM1vphv7Ct8X7utx8zZV=JJY$*^VZiLaBJj;UdYFSoNu3OHm>W**PVO(R`b`;m{i|UE}M9``C!xxgHG^ z*D|L9_?ClmDz(O7e0gu3L2D@W%a*Z%bDrdT)eaU}3?DD5%ScNN9CYt=mTrs+n^Qv+ z3M(OR@@ zatyeahy?>91S46yGOTLi01@;idM@G^k~KKk zcUXb$HK%%2OoyiqEtETetU@oRYIzc`xmt@^f4bK7L|o4&#Dg4bw{_N9rMs57a7<;8 zxllgx6N)eg%`tY%8CPN8m78Mz2*>b2}fcd(m^B9CwA>?DH$IhuSxgg|^!xNQF2&>84qJZayFqXt`Hh|#sQsKxr=3>Igtx#<9`L4_@a5X zh(w4Ob3#CXB4+~*Bn}%x>pQYRS`Erqn@jgMHc+=*tBi~Kq#%ykd`6sxAQv`VovE|k z7`OeNbmAB*yTVRXQmX4;1%P_euzUWC$cE`LaBqjfgfn&mNGQjoQ(L&IjOg2t`CZvz zMJYs(Kacg41yNA-R9sYx+N3!5`#r<6WR|ETDBk7PppZz51?e7$G$h6Cr;uFK{=-Hj zP1I8WBIs2OqxmAz)C%G@^x`>%dK)VBr3E2ViPlBT@CqAapm&2@WI+=X+?E>5i1?K`JXl^(++>Q8(J1P+Bk+e~FT%CV}zC2-|@ zWf^DToFpq3%SS>K!D`KlSO>+hUNu={vN>fAZ<>pRuyv_KQecq?<{gOF+4-WpsrH~} zH(7JSZ!fxumE{Hb4Jc@L#`{a{=o z4;P4yB0bw~F4|C>U5IefT%xN&b@e`12Ka6-R-bKq792h!q(=zEZ#TA?CSb2z4q~td zZE)X6@cAx-FlX{iC#)_=iCM{0qH00s0%lAGS-~O{N;0G+GhE4^Nf}Dui#4&5A~$)4 zH-I?Bm#QPAEM;5zpfKK1FxH~>>7J^ziHm3H8px4cV*oqUX`mB!BW&1xg@Pgw>JuAP zDraq>FzL+dF>`jcWFT({tP!iVaYA3ctQb-EJqIPsA@O)gQO-sk-F024#Oz84@KsqX zUH1obUE@xl&_)w9Pd-N93Mh6icKlF?@r0xeHF|Z>5+=X`Y_wtrq|Y7P=cMjTYbmNO zQ-vUY&st;yuPRvCq>L^(wau1F$f>G5A`tMo!?KdZgz69~68XJ4U8pC2ZTT)o<(p2Y zvv8{UO`@SwM{LYiIzofnKHU>wCo*K4u_ZH~JR~aO!5HrVFUPA%Gsn>nD3iQNj zc@RKMhm!27No_xo=8NZ;=eUSFOltA(q}C)qjc^hCZb+JM%8?^vd|UeSx8%NYy&Gx~9j zezu?`OXOC5Ii_cmp)G(r@YEHSRP z7<(Djc@_eZTzjNPs%MS=?SW->%L|coF+R7(Tv?SXYpuM2+=r=(eSm2>+L&vK?pV7Z3p8jv&j#hgIW)IrzCV;$>ATMyne*{zhoJuG%>?j$ zRzwSEcPdv^zXnfKXAJ2CAD7G003Q%KrNOqPN5nzCn`oO7O{4hfmeP!n-_a!%jnqbau zO$HwVOKk(8cR@R2Zq-DT}xA9XiEcR8msa(-=h1 zR#uF+!rgipxRH9Yjrv;+lBkANRm9W`at@gF6~)vQd5P5~@0AyU8|5bESFSiZBm{Ej z#`d*JOOOWBO}`2xTqy|;%&2Gzz>&rQE;=}nNY2RZYIQ?~ltvGUAFte9`)8U;YU2|3 z9>w3CnxVgWgKP!>arLhA5>&I~_$tkzR@Du{$j z=zoGoL9%KBXm6+n{++}wyEv#ciKHP}vz)0P$oUd^NJkYrUrHcL<`azp`3ag;kC)YE zni(=)@qr`jH#&eK33WN7azv&AQN1sLmnk=x2x~UCUagCAyjO(go1%HaGxsTSLHH_b zn9-xSTMW|7ldJw%^r_wc$&&l`p;@Ix((Ea296apzZ?(uL^~+XcvdVw#ve2bkmep^z zWERVzTh=2o6*pQe$dWC3eX|3R6uKP7pj0 z>AgVo4`bA4RNhlz3?c3*rUAcldQ{CcHKOsruBN;UsWDs1NcCcpH-5<`N}vIx2@Tng88F(z?Y-N2(b6h7 z3e15Smk7bEb>Vq@LC?#okablO8&F7}AK0Kh`EeDT#1&McJ||%-B!yRVPg6|meSUjr zXBJ<*E8T->XKRiQo$O0YRvu}UW}TF$VB4croo@jQN2Z(!)Ivof={Gkvx@#=m25W4GeWD%MH#Pv352SugYl4LX~LfkgZB_v-10dW6i7VWX5$H^)r22GIP2idA*yk?s)}GJXJGN;?n^~ z6AeSYi_Vt~U(+m%FDN>1s+FKv9DWM$xF@~W`a|ZpVDUZUM8e4gh7{rowc2h32FWh^ z2WKr7W21tFByvj9b7zmU5gM0hv1;YzR6=qB^EHv`s2>rTzWxf&Dx2kod}CB2AXC;# zPWwF{e}&fi8V3qSO|N2*%27NEe6cjd&_+t5dH_Pz6p2K&{tTQgSSa zIRREPpxHWU5|ACbc&!3#xY}Wxs%&d6->9a=bJMB?mDxW_Oh1qgDKoY+5|WhpS}t3s zOZ`Mnv*AUjJK<`$_*$-jp;1B5i|oUWq&{4_Wgzzu>-S_82$2crT##AK*+u#+*&84b zrlfMr5zfjDbrx#`DpkVp4J zUBRfx5m%-0#U=2~FCbY2~T@M?F| zXBAc<0JY1Z*!lQ;oxA+&V4-x74j`yHCvdXaK}vjV76$9XJE`%pT?qZfg@@ z1M(Ee>Rsly{;FKT;tHYFpPH{)76(MD?OFVcxs?9p_{6cYx2oF9xE|scUe(9-*+z|7 z*0p*~#kw_4Qq;A7*3aw%C8t42*DKCF^Sqp)GW$X4+U)Rq58PzDt*t%LzbRV1iIp?CmQ78RTRi}hT1-jB%3<%MSVFv%f#)na5L?E`w#_LH`myk07zhI9k# zRi0MW$2+l}4QWqUkC#1(Dd}A(DfJ^Jmkij=h}MSkvmw~8EN0j+oPDB~430Q7bb^x6 zs&{7fVIYP;IG6Bvn2ULg1Tk#pjO-;Ah*oh+p}>r0!^s+pl|_<~u7u-2Nt`U8RFUm; z<0-h-%qDe1r#UGXp$S|xihIp*KdRQIS9)qrqw#1!k-;foxCvyPbXYN1WZHQeO-PDW zgcPuZ80V#T{tG{e@ZQ49q$;bu4MF&U4o9 z)J-23=)oYU!=M&1Se;U=yN;Dz{S#)6UQU;&H zA(}+;6rPYnk7q?<1?x=w%BJC|>RByvmNNCBc&RFw87KT3jXl=}{A$t~Lkof13>@qP zl~zKLXpB+E^1i0(b6S>9kp-R!S7`9cG)^=1YQ) zU`u1w!~@URz?Zfg2kQ2Qe4Kv;7Kwo)@Uj4Z6k<6k$*D<991J@h*-a%9sl}?Pn=*En z*r}Bn4#FYB(hA!v9%qOU4!(`Zn|q}8r@5>R1jJ{MTDvAmn+YD@Ht2{iN-gSyN{*0M zw0+EBZm(CE^9c}Pn6Q8cJe+^6@v}S^bR@M|`Mxf_JzYh1dL2h@O55mM-J9+sR(+ml z7XE7b$LXJ@Z=`>nemDKQ^sV$C(s$Bp>5tPJ=?Ce57pWL6Rurp>ONvX2wZ)*=P`snq zRBSG8EN&@oE#`_li@S<@3Z8K~vvte3-Xj|^BIO6P^^`8Os{=C|XST5BkyF~dSL@XL z*Ozg2TaU;Y!71sWPwF(Z2OMqHBj|*q^=$%+p(10|*;{6GD}iLSC&RFPw(8TWJ#o>6pO?cyS<+KhEEmhNxPkLtOwIIUm&FE>*h1XxCpCqD9J{^q-waceAd1~(`8(z>K8xqdGARAD z_d-~4zxY)Y5Vh=RKn#Cabq6UVuf>Q@h1|1zS;qi6ppjV3%PwSailrlFsJN>XLG0p% z`=qu$&2O}CR0g6alHni0uv_LdED247@JPV0H+&~vQw?*~dHkAOExU-W{gNyb;mL=n z`-hux(9Hhm8T%H(jQ_$uG9w}RY}MFjdDsFei?PO-;g;%vr2yaW@@sVciN{rVVmrzs zbCLS2E%}drDu5PQXd=k1axFb_>0rWHWFmTzdpwgM;(G4nI}CaaQn5}Bl$g|M2v$zvQR*{Uts#kRDk2tMx?JzgqOHB!cS6+Fxch(naZFp8qzU z-o{Fzt66{ip7cU7PCiOZBKoW!-!jfyf7j^mMi9P+ps}&jSOdcIAw97-*Gx*XM+p$> z&a(3&!jf{%tuhUTK%Oc{iJ@q)Q~jxReLUS1W&||9bDmJbhhkV6Mi>b1P`do$5}g>0}aEm&DHfD zZQx;+U*uf=X@H$Q4z_B#v1k?qy;TY~+|sJ}SI(w4l&U3_r5PDGpYHK5Gq&92%$Mm>ktRl+rQ@q(*-p?| z%S~&y|Du1-;*8%;l7588U3`V-w|||w_Ftsbx{mk1NvVB2r7^w#ETxO?e|b(zX96rA^Q^y+5VvZb<3+b-Yiew0Q;ZBfS4D@AD~b zxih63pnL1{JhuNA$W0KPZbyrHW@Nyeo;P&~H=AirC zyHmP@bKi3+r8~LTdv~U^owc^_ga5mJHl_D3rgZmRDeYLno9pjE2KT&_($2Y*KG@>@ z&6MuHgZJO#jf_78-yeD-r3bj~!CMmo^OMxs;NNd)Re=8$=@LD0-TxYCRWX~lN7C-% zALi{*&Y|z*f6cUkXTQh4R(eb0U50w{>2;YzS%Kg4# zkTz3MU6*d=X-@<0aFpYFIKGdubj=AqpYZ=pX+NVaaO@yk2RY*?*X(8AqrUbr#@$0? zxt{wUgwAWJ)Apywq2v0rb^BwF9zHs_Z_mOp>gvb1;xJbasNY2Ov0eKfdF;@hqcE`# z#vWsUJ?ZHG{gOK?Q4Kb4x_NNb^)s8V{_`Rn42^N;pCewZZ-Q|N@%l8wHwp6p&FtXE zvGAdNM~@vmj9#H;^G(xRrmwp>eaJF8hBPF%<=bthex2rbnA4KGN&FGPWU&b64SkSB ze?jXrf6Tu_>?=5HgeRakQ;SM@j99;dr&g~d;;-UaKwaYgTX;s&B}D%|Pt>}UXVYHB zGp*N>3#{Ym`~zl^-_FxkuOKtn09LL-4Da9xa8qb-lWDvj(QHARTbaL~f%cozE$N+9 z;P2vzRkP`~bbFcuId_m#+{qfM?d1A*rT6oT*&VE3xHs)gA3(A9@e76fk;ntA7yK~m n`+p*Rgr^ig#FMpmqwK}>chZ;AKS(d67pddFntmgFEv5ehM(RRK diff --git a/jcvi/utils/data/TREDs.meta.csv b/jcvi/utils/data/TREDs.meta.csv deleted file mode 100644 index 9e8a4ef8..00000000 --- a/jcvi/utils/data/TREDs.meta.csv +++ /dev/null @@ -1,33 +0,0 @@ -abbreviation,title,variant_type,gene_name,gene_location,id,motif,repeat,repeat_location,repeat_location.hg19,prefix,suffix,gene_part,inheritance,omim_id,url,src,mutation_nature,cutoff_prerisk,cutoff_risk,normal,prerisk,risk,symptom,allele_freq -DM1,Myotonic dystrophy 1,short tandem repeats,DMPK,chr19:45769708-45782556:-,chr19_45770205_CAG,CTG,CAG,chr19:45770205-45770264,chr19:46273463-46273522,AATGGTCTGTGATCCCCC,CATTCCCGGCTACAAGGA,3' UTR,AD,160900,http://en.wikipedia.org/wiki/Myotonic_dystrophy,wikipedia,increase,35,50,5-34 repeats,35-49 repeats,50+ repeats,"Myotonic dystrophy (DM) is a chronic, slowly progressing, highly variable, inherited multisystemic disease. In DM1, the affected gene is called DMPK, which codes for myotonic dystrophy protein kinase, a protein expressed predominantly in skeletal muscle. In DM1, there is an expansion of the cytosine-thymine-guanine (CTG) triplet repeat in the DMPK gene. ","{3:1,4:2,5:8666,6:59,7:122,8:227,9:41,10:517,11:3263,12:3742,13:4178,14:1566,15:274,16:234,17:96,18:44,19:91,20:345,21:459,22:309,23:194,24:197,25:145,26:122,27:93,28:77,29:43,30:40,31:37,32:15,33:16,34:4,35:4,36:5,37:2,39:3,40:2,41:3,42:1,43:1,45:3,46:2,53:1,54:3,55:2,57:1,58:1,60:3,62:1,67:2,73:1}" -DM2,Myotonic dystrophy 2,short tandem repeats,ZNF9,chr3:129167814-129183966:-,chr3_129172577_CAGG,CCTG,CAGG,chr3:129172577-129172656,chr3:128891420-128891499,GGGACAAAGTGAGACAGA,CAGACAGACAGACAGACA,intron 1,AD,602668,http://en.wikipedia.org/wiki/Myotonic_dystrophy,wikipedia,increase,27,75,11-26 repeats,27-74 repeats,75+ repeats,"Myotonic dystrophy (DM) is a multisystem disorder and the most common form of muscular dystrophy in adults. Individuals with DM2 have muscle pain and stiffness, progressive muscle weakness, myotonia, male hypogonadism, cardiac arrhythmias, diabetes, and early cataracts. ","{1:1,3:2,4:5,5:5,6:39,7:125,8:38,9:29,10:115,11:127,12:193,13:63,14:405,15:15176,16:4884,17:1462,18:633,19:946,20:330,21:128,22:112,23:43,24:40,25:52,26:176,27:38,28:13,29:6,30:6,31:14,32:7,33:3,34:1,40:1,41:4,42:2,43:5,44:2,45:2,46:3,47:1,48:1,49:3,50:3,51:1,52:1,53:3,54:2,55:1,57:1,62:1,64:1,68:1,71:1,72:1}" -DRPLA,Dentatorubro-pallidoluysian atrophy,short tandem repeats,ATN1,chr12:6924462-6942320:+,chr12_6936729_CAG,CAG,CAG,chr12:6936729-6936773,chr12:7045892-7045936,CACCACCAGCAACAGCAA,CATCACGGAAACTCTGGG,coding region,AD,125370,http://en.wikipedia.org/wiki/Dentatorubral-pallidoluysian_atrophy,wikipedia,increase,36,48,6-35 repeats,36-47 repeats,48+ repeats,"Dentatorubro-pallidoluysian atrophy (DRPLA) can be juvenile-onset (< 20 years), early adult-onset (20-40 years), or late adult-onset (> 40 years). Early adult-onset DRPLA also includes seizures and myoclonus. Late adult-onset DRPLA is characterized by ataxia, choreoathetosis and dementia. DRPLA is an autosomal dominant spinocerebellar degeneration caused by an expansion of a CAG repeat encoding a polyglutamine tract in the atrophin-1 protein.","{5:13,6:18,7:64,8:2685,9:386,10:2265,11:375,12:1370,13:1595,14:1330,15:8475,16:3796,17:1467,18:488,19:295,20:313,21:130,22:58,23:76,24:27,25:17,26:6,27:1,28:2,29:2,30:1,34:2,39:1}" -FXTAS,Fragile X-associated tremor/ataxia syndrome,short tandem repeats,FMR1,chrX:147911950-147951126:+,chrX_147912051_CGG,CGG,CGG,chrX:147912051-147912110,chrX:146993569-146993628,AGGGGGCGTGCGGCAGCG,CTGGGCCTCGAGCGCCCG,5' UTR,XLD,300623,http://en.wikipedia.org/wiki/Fragile_X-associated_tremor/ataxia_syndrome,wikipedia,increase,55,55,5-44 repeats,-,55-200 repeats,"Fragile X-associated tremor/ataxia syndrome (FXTAS) is a late onset neurodegenerative disorder associated with problems of movement, memory, and the autonomic nervous system. It is related to the disease fragile X syndrome, although FXTAS is a clinically distinct syndrome. In fragile X syndrome (FXS), the fragile X mental retardation 1 gene, FMR1, is silenced; in FXTAS FMR1 is overexpressed and interferes with brain function. Both FXS and FXTAS are caused by a trinucleotide repeat expansion in FMR1. This CGG repeat expansion is smaller in FXTAS: the disease only occurs in individuals with a Fragile X permutation. It most often occurs in men, but can present in women. There is no cure for FXTAS, but several of the symptoms can be managed with medication.","{4:201,5:430,6:517,7:642,8:730,9:934,10:2266,11:525,12:844,13:1461,14:1585,15:1633,16:1483,17:1210,18:750,19:583,20:894,21:192,22:225,23:332,24:179,25:139,26:130,27:85,28:93,29:264,30:378,31:135,32:69,33:24,34:6,35:8,36:20,37:8,38:16,39:11,40:3,41:2,43:1,59:1,60:1}" -FXS,Fragile X syndrome,short tandem repeats,FMR1,chrX:147911950-147951126:+,chrX_147912051_CGG,CGG,CGG,chrX:147912051-147912110,chrX:146993569-146993628,AGGGGGCGTGCGGCAGCG,CTGGGCCTCGAGCGCCCG,5' UTR,XLD,300624,http://en.wikipedia.org/wiki/Fragile_X_syndrome,wikipedia,increase,200,200,5-44 repeats,-,200+ repeats,"Fragile X syndrome (FXS) results in a spectrum of intellectual disabilities ranging from mild to severe as well as physical characteristics such as an elongated face, large or protruding ears, and large testes (macroorchidism), and behavioral characteristics such as stereotypic movements (e.g. hand-flapping), and social anxiety. Fragile X syndrome is associated with the expansion of the CGG trinucleotide repeat affecting the Fragile X mental retardation 1 (FMR1) gene on the X chromosome, resulting in a failure to express the fragile X mental retardation protein (FMRP), which is required for normal neural development. Fragile X-associated tremor/ataxia syndrome (FXTAS) is a late onset neurodegenerative disorder associated with problems of movement, memory, and the autonomic nervous system. It is related to the disease fragile X syndrome, although FXTAS is a clinically distinct syndrome. Both FXS and FXTAS are caused by a trinucleotide repeat expansion in FMR1. This CGG repeat expansion is smaller in FXTAS: the disease only occurs in individuals with a Fragile X premutation.","{4:201,5:432,6:517,7:642,8:730,9:934,10:2266,11:525,12:844,13:1461,14:1587,15:1633,16:1483,17:1210,18:750,19:583,20:894,21:192,22:225,23:332,24:179,25:139,26:130,27:85,28:93,29:264,30:378,31:135,32:69,33:24,34:6,35:8,36:20,37:8,38:16,39:11,40:3,41:2,43:1,59:1,60:1}" -FRAXE,"Mental retardation, FRAXE type",short tandem repeats,FMR2,chrX:148500618-149000662:+,chrX_148500639_CCG,GCC,GCC,chrX:148500638-148500682,chrX:147582158-147582202,GCCGCCTGTGCAGCCGCT,GCTGCCGCCCCGGCTGCC,5' UTR,XLR,309548,http://en.wikipedia.org/wiki/Fragile_mental_retardation_2,wikipedia,increase,26,200,6-25 repeats,26-200 repeats,200+ repeats,Fragile XE mental retardation (FRAXE) is one of the most common forms of non-syndromic X-linked mental retardation. The most common mutation giving rise to this syndrome is a triplet expansion of CCG in the 5' untranslated region which leads to a silencing of the FMR2 gene.,"{3:5,4:723,5:887,6:1141,7:1402,8:1431,9:1535,10:1657,11:1705,12:1562,13:1115,14:759,15:1478,16:552,17:264,18:373,19:128,20:76,21:59,22:37,23:29,24:25,25:10,26:3,27:13,28:6,29:10,30:6,31:1,32:3,35:2,36:3,38:3,39:3,41:7,42:2,43:1,44:2}" -FRDA,Friedreich ataxia,short tandem repeats,FXN,chr9:69035562-69100177:+,chr9_69037287_GAA,GAA,GAA,chr9:69037287-69037304,chr9:71652203-71652220,ACAAAAAAAAAAAAAAAA,AATAAAGAAAAGTTAGCC,intron 1,AR,229300,http://en.wikipedia.org/wiki/Friedreich%27s_ataxia,wikipedia,increase,23,66,7-22 repeats,23-65 repeats,66+ repeats,"Friedreich's ataxia (FRDA) is an autosomal recessive inherited disease that causes progressive damage to the nervous system. It manifests in initial symptoms of poor coordination such as gait disturbance; it can also lead to scoliosis, heart disease and diabetes, but does not affect cognitive function. The particular genetic mutation (expansion of an intronic GAA triplet repeat in the FXN gene) leads to reduced expression of the mitochondrial protein frataxin. ","{2:1,3:1,5:246,6:333,7:152,8:7600,9:12407,10:219,11:29,12:163,13:147,14:151,15:117,16:247,17:460,18:649,19:543,20:421,21:306,22:200,23:199,24:122,25:129,26:98,27:50,28:52,29:26,30:21,31:19,32:8,33:7,34:3,35:4,36:4,38:2,40:2,41:1,43:3,44:1,45:1,46:1,52:1,53:1,54:4,55:1,56:4,57:7,58:6,59:7,60:8,61:7,62:9,63:8,64:5,65:9,66:1,67:5,68:5,69:3,70:3,71:5,72:4,73:1,74:1,75:1,77:3,78:1,79:1,80:1,83:1,84:1,86:1}" -HD,Huntington disease,short tandem repeats,HTT,chr4:3074509-3243959:+,chr4_3074877_CAG,CAG,CAG,chr4:3074877-3074933,chr4:3076604-3076660,GAGTCCCTCAAGTCCTTC,CAACAGCCGCCACCGCCG,coding region,AD,143100,http://en.wikipedia.org/wiki/Huntington%27s_disease,wikipedia,increase,36,40,<26 repeats,36-39 repeats,40+ repeats,Huntington's disease (HD) is a neurodegenerative genetic disorder that affects muscle coordination and leads to mental decline and behavioral symptoms. HD is one of several trinucleotide repeat disorders which are caused by the length of a repeated section of a gene exceeding a normal range.,"{3:1,4:1,8:1,9:106,10:140,11:35,12:148,13:56,14:234,15:2837,16:1383,17:8329,18:3441,19:2280,20:1650,21:1064,22:753,23:770,24:570,25:370,26:283,27:196,28:176,29:124,30:75,31:52,32:58,33:43,34:30,35:20,36:16,37:5,38:4,39:4,40:3,41:1,44:1}" -HDL,Huntington disease-like 2,short tandem repeats,JPH3,chr16:87601834-87698155:+,chr16_87604288_CTG,CTG,CTG,chr16:87604288-87604329,chr16:87637894-87637935,CGGAAGCCAGGGAGCTGC,TAAGATGGTTTCTGTGCA,3' UTR,AD,606438,http://www.omim.org/entry/606438,omim,increase,29,40,6-28 repeats,29-39 repeats,40+ repeats,"Huntington disease-like 2 (HDL2) is clinically similar to Huntington disease but arose from a CAG expansion in a different gene. The disorder is characterized by onset in the fourth decade, involuntary movements and abnormalities of voluntary movements, psychiatric symptoms, weight loss, dementia, and relentless course with death about 20 years after disease onset. ","{5:10,6:1,7:19,8:82,9:18,10:39,11:606,12:113,13:645,14:12231,15:2128,16:6958,17:1739,18:315,19:185,20:15,21:2,22:11,23:17,24:6,25:24,26:38,27:43,28:14,29:2,33:1}" -ULD,"Epilepsy, progressive myoclonic 1A/Unverricht-Lundborg Disease",short tandem repeats,CSTB,chr21:43773664-43776374:-,chr21_43776444_CGCGGGGCGGGG,CCCCGCCCCGCG,CGCGGGGCGGGG,chr21:43776444-43776479,chr21:45196325-45196360,GCCCCGCAAGAAGGGACG,AACCTGGCCACCACTCGC,5' UTR/Promoter,AR,254800,http://www.omim.org/entry/254800,omim,increase,30,30,2-3 repeats,-,30+ repeats,"Myoclonic epilepsy of Unverricht and Lundborg (ULD) is an autosomal recessive disorder characterized by onset of neurodegeneration between 6 and 13 years of age. Although it is considered a progressive myoclonic epilepsy, it differs from other forms in that is appears to be progressive only in adolescence, with dramatic worsening of myoclonus and ataxia in the first 6 years after onset. ","{1:6,2:9403,3:15740,4:61,5:30,6:11,10:1}" -OPMD,Oculopharyngeal muscular dystrophy,short tandem repeats,PABPN1,chr14:23320187-23326184:+,chr14_23321473_GCN,GCN,GCN,chr14:23321473-23321502,chr14:23790682-23790711,CCAGTCTGAGCGGCGATG,GGGGCTGCGGGCGGTCGG,coding region,AD,164300,http://www.ncbi.nlm.nih.gov/books/NBK1126/,omim,increase,11,12,2-10 repeats,11 repeats,12-17 repeats,Oculopharyngeal muscular dystrophy (OPMD) is an autosomal dominant disorder presenting in late life and characterized by dysphagia and progressive ptosis of the eyelids. ,"{6:1,7:1,8:4,9:98,10:25073,11:73,12:2,13:6}" -SBMA,Spinal and bulbar muscular atrophy of Kennedy,short tandem repeats,AR,chrX:67544031-67730618:+,chrX_67545318_CAG,CAG,CAG,chrX:67545318-67545383,chrX:66765160-66765225,GCCAGTTTGCTGCTGCTG,CAAGAGACTAGCCCCAGG,exon 1,XLR,313200,http://en.wikipedia.org/wiki/Spinal_and_bulbar_muscular_atrophy,wikipedia,increase,36,36,<34 repeats,-,36+ repeats,"Spinal and bulbar muscular atrophy (SBMA) is a debilitating neurodegenerative disorder resulting in muscle cramps and progressive weakness due to degeneration of motor neurons in the brain stem and spinal cord. SBMA is caused by expansion of a CAG repeat in the first exon of the androgen receptor gene (trinucleotide repeats). The greater the expansion of the CAG repeat, the earlier the disease onset and more severe the disease manifestations. ","{1:1,2:3,4:2,5:2,6:1,7:24,8:13,9:8,10:5,11:11,12:30,13:76,14:183,15:159,16:251,17:678,18:1456,19:1964,20:2114,21:3202,22:2191,23:2098,24:2013,25:1465,26:859,27:477,28:281,29:242,30:86,31:53,32:19,33:15,34:7,35:6,36:10,37:3,38:1,40:2,41:1}" -SCA1,Spinocerebellar ataxia 1,short tandem repeats,ATXN1,chr6:16299111-16761489:-,chr6_16327636_CTG,CAG,CTG,chr6:16327636-16327722,chr6:16327867-16327953,CGGAGCCCTGCTGAGGTG,CTCAGCCTTGTGTCCCGG,coding region,AD,164400,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,36,39,6-35 repeats,36-38 repeats,39+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by an expanded (CAG)n trinucleotide repeat in the ataxin-1 gene (ATXN1). SCA1 patient experiences hypermetric and slow saccades.","{2:2,3:2,4:1,5:2,6:2,7:3,8:5,9:1,10:3,11:2,12:5,13:1,14:2,15:2,16:8,17:2,18:4,19:53,20:51,21:25,22:40,23:63,24:35,25:64,26:842,27:589,28:1933,29:7441,30:8337,31:2190,32:1995,33:711,34:252,35:226,36:291,37:32,38:17,39:10,40:2,41:2,42:1,43:3,44:1,45:2,55:2,66:1,68:1,76:1}" -SCA2,Spinocerebellar ataxia 2,short tandem repeats,ATXN2,chr12:111452213-111599675:-,chr12_111598951_CTG,CAG,CTG,chr12:111598951-111599019,chr12:112036755-112036823,GGCAGCCGCGGGCGGCGG,GGGCTTCAGCGACATGGT,coding region,AD,183090,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,32,33,<31 repeats,32 repeats,33+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by an expanded (CAG)n trinucleotide repeat in the gene encoding ataxin-2 (ATXN2). SCA2 patient experiences diminished velocity saccades and areflexia (absense of neurologic reflexes).","{8:2,9:4,10:5,11:2,12:10,13:20,14:6,15:21,16:18,17:108,18:14,19:89,20:88,21:978,22:20300,23:2877,24:181,25:75,26:24,27:274,28:22,29:66,30:35,31:29,32:10,33:1,36:1,39:1,60:1}" -SCA3,Spinocerebellar ataxia 3,short tandem repeats,ATXN3,chr14:92058551-92106620:-,chr14_92071011_CTG,CAG,CTG,chr14:92071011-92071034,chr14:92537355-92537378,CTGTCCTGATAGGTCCCC,TTGCTGCTTTTGCTGCTG,coding region,AD,109150,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,45,60,12-44 repeats,45 repeats,~60+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by a (CAG)n trinucleotide repeat expansion encoding glutamine repeats in the ataxin-3 gene (ATXN3). SCA3 patient experiences Gaze-evoked nystagmus (a rapid, involuntary, oscillatory motion of the eyeball) and slow saccades.","{2:1,3:1,6:5,7:9,8:5377,9:41,10:28,11:15,12:171,13:270,14:1191,15:1990,16:1126,17:6820,18:1129,19:219,20:423,21:3156,22:1233,23:465,24:412,25:233,26:196,27:218,28:205,29:125,30:90,31:46,32:23,33:12,34:12,35:10,36:2,37:2,38:2,42:1,55:1}" -SCA6,Spinocerebellar ataxia 6,short tandem repeats,CACNA1A,chr19:13206441-13506459:-,chr19_13207859_CTG,CAG,CTG,chr19:13207859-13207897,chr19:13318673-13318711,GCCCGGCCTGGCCACCGC,CGGGGGCCCCGAGCCGCC,coding region,AD,183086,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,19,20,<18 repeats,19 repeats,20+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease. SCA6 patient experiences fownbeating nystagmus and positional vertigo.","{3:1,4:223,5:2,6:10,7:2068,8:124,9:18,10:194,11:9596,12:4467,13:7557,14:878,15:75,16:22,17:10,18:12,19:3,20:2}" -SCA7,Spinocerebellar ataxia 7,short tandem repeats,ATXN7,chr3:63864556-64003461:+,chr3_63912686_CAG,CAG,CAG,chr3:63912686-63912715,chr3:63898362-63898391,GCGGCCGCGGCCGCCCGG,CCGCCGCCTCCGCAGCCC,coding region,AD,164500,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,28,34,<19 repeats,28-33 repeats,34+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by an expanded trinucleotide repeat in the gene encoding ataxin-7 (ATXN7). SCA7 patient experiences macular degeneration, upper motor neuron and slow saccades.","{4:2,5:12,6:18,7:340,8:66,9:488,10:17886,11:1697,12:3183,13:1272,14:138,15:48,16:26,17:14,18:9,19:13,20:4,23:3,32:1}" -SCA8,Spinocerebellar ataxia 8,short tandem repeats,ATXN8OS/ATXN8,chr13:70107213-70139429:+,chr13_70139384_CTG,CTG/CAG,CTG,chr13:70139384-70139428,chr13:70713516-70713560,CTACTACTACTACTACTA,CATTTTTTAAAAATATAT,"untranslated RNA, coding region",AD,603680,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,80,80,15-50 repeats,-,80+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by a CTG/CAG trinucleotide repeat expansion. SCA8 patient experiences horizontal nystagmus (a rapid, involuntary, oscillatory motion of the eyeball), instability and lack of coordination.","{4:2,5:5,6:111,7:32,8:123,9:3757,10:226,11:755,12:5301,13:546,14:1226,15:5791,16:2824,17:1647,18:627,19:566,20:663,21:353,22:120,23:116,24:58,25:29,26:23,27:25,28:17,29:6,30:7,31:4,32:12,33:18,34:11,35:11,36:31,37:18,38:13,39:5,40:4,41:6,42:2,43:6,44:3,45:3,46:3,47:1,51:1,52:6,53:10,54:8,55:13,56:14,57:9,58:5,59:11,60:7,61:7,62:8,63:9,64:5,65:5,66:5,67:5,68:1,69:2,70:3,71:4,72:3,73:1,74:2,75:2,76:2,78:1,80:1,89:1,92:1,100:1}" -SCA10,Spinocerebellar ataxia 10,short tandem repeats,ATXN10,chr22:45671797-45845306:+,chr22_45795355_ATTCT,ATTCT,ATTCT,chr22:45795355-45795424,chr22:46191235-46191304,AAAAGACTACTAGAATGG,TTTTGAGATGAAGTCTCT,intron 9,AD,603516,http://www.omim.org/entry/603516,omim,increase,800,800,10-32 repeats,-,800+ repeats,"Spinocerebellar ataxia type 10 (SCA10) is a progressive, genetic neurodegenerative disease, caused by a ATTCT repeat expansion. The autosomal dominant cerebellar ataxias (ADCAs) are a clinically and genetically heterogeneous group of disorders characterized by ataxia, dysarthria, dysmetria, and intention tremor. ","{7:58,8:11,9:18,10:89,11:457,12:3296,13:6573,14:8078,15:3069,16:1826,17:875,18:488,19:183,20:132,21:47,22:21,23:8,24:2,25:5,31:1,34:2,35:1,36:3,37:5,38:2,39:2,40:1,41:2,42:1,44:3,45:1,46:1,47:1}" -SCA12,Spinocerebellar ataxia 12,short tandem repeats,PPP2R2B,chr5:146589503-147081519:-,chr5_146878729_CTG,CAG,CTG,chr5:146878729-146878758,chr5:146258292-146258321,ACACGCGCGCACTCGCAG,CAGGAGGCTGGAGGCGGC,5' UTR/Promoter,AD,604326,http://www.omim.org/entry/604326,omim,increase,51,51,8-23 repeats,-,51+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease, caused by a CTG/CAG trinucleotide repeat expansion. Spinocerebellar ataxia type 12 (SCA12) is a prototypic phenotype was that of a classic spinocerebellar ataxia, and the disease resembled the spinocerebellar ataxias more closely than any other form of neurodegenerative disorder. ","{2:1,4:2,5:2,6:9,7:8,8:76,9:1482,10:13389,11:232,12:40,13:3734,14:2041,15:2703,16:687,17:301,18:337,19:92,20:35,21:19,22:8,23:18,24:12,25:10,26:8,27:5,28:4,31:2,33:2,35:1}" -SCA17,Spinocerebellar ataxia 17,short tandem repeats,TBP,chr6:170554332-170572869:+,chr6_170561908_CAG,CAG,CAG,chr6:170561908-170562021,chr6:170870996-170871109,TTGGAAGAGCAACAAAGG,GCAGTGGCAGCTGCAGCC,coding region,AD,607136,http://en.wikipedia.org/wiki/Spinocerebellar_ataxia,wikipedia,increase,43,43,25-40 repeats,-,43+ repeats,"Spinocerebellar ataxia (SCA) is a progressive, genetic neurodegenerative disease. SCA17 patient experiences mental retardation. SCA17 can be caused by heterozygous expansion of a trinucleotide repeat encoding glutamine (CAG or CAA) in the TATA box-binding protein TBP.","{5:1,7:1,11:1,12:2,14:1,15:4,16:2,17:7,18:2,19:5,20:6,21:4,22:1,23:3,24:5,25:11,26:10,27:24,28:26,29:140,30:178,31:79,32:446,33:546,34:730,35:2429,36:4571,37:6039,38:7589,39:1762,40:400,41:128,42:48,43:15,44:26,45:7,46:4,52:1,53:2}" -SCA36,Spinocerebellar ataxia 36,short tandem repeats,NOP56,chr20:2652531-2658392:+,chr20_2652734_GGCCTG,GGCCTG,GGCCTG,chr20:2652734-2652757,chr20:2633380-2633403,CGTTCGGGCCGCAGACAG,CGCCTGCGCCTGCGCCTG,intron 1,AD,614153,http://www.omim.org/entry/614153,omim,increase,650,650,3-14 repeats,-,650+ repeats,"Spinocerebellar ataxia type 36 (SCA36) is a slowly progressive neurodegenerative disorder characterized by adult-onset gait ataxia, eye movement abnormalities, tongue fasciculations, and variable upper motor neuron signs. ","{3:44,4:8914,5:1250,6:833,7:10748,8:809,9:2222,10:227,11:165,12:35,13:8,14:3}" -EIEE1,"Epileptic encephalopathy, early infantile, 1",short tandem repeats,ARX,chrX:25003693-25015947:-,chrX_25013662_CGC,GCG,CGC,chrX:25013662-25013691,chrX:25031779-25031808,GGCCGTGGCGGCCGCTGC,TGCCGCACCCTGAAGGAG,coding region,XLR,308350,http://www.omim.org/entry/308350,omim,increase,20,20,<12 repeats,-,20+ repeats,"Early infantile epileptic encephalopathy (EIEE1) is characterized by frequent tonic seizures or spasms beginning in infancy with a specific EEG finding of suppression-burst patterns, characterized by high-voltage bursts alternating with almost flat suppression phases. ","{3:1,4:55,5:72,6:119,7:146,8:158,9:584,10:18651,11:25,12:18,13:2,14:2,15:1}" -BPES,"Blepharophimosis, epicanthus inversus, and ptosis",short tandem repeats,FOXL2,chr3:138944223-138947139:-,chr3_138946021_NGC,GCN,NGC,chr3:138946021-138946062,chr3:138664863-138664904,GCCAGGGCTACCGGGGCC,CATCTGGCAGGAGGCATA,coding region,AD,110100,http://www.omim.org/entry/110100,omim,increase,19,19,14 repeats,-,19-24 repeats,"Blepharophimosis, epicanthus inversus, and ptosis (BPES) patients show dysplasia of the eyelids. In addition to small palpebral fissures, features include epicanthus inversus (fold curving in the mediolateral direction, inferior to the inner canthus), low nasal bridge, and ptosis of the eyelids.","{4:77,5:1,8:1,9:6,12:1,13:31,14:25129,15:7,16:2,19:1}" -CCD,Cleidocranial dysplasia,short tandem repeats,RUNX2,chr6:45327799-45664031:+,chr6_45422751_GCN,GCN,GCN,chr6:45422751-45422801,chr6:45390488-45390538,CAGCAGCAGCAGCAGGAG,GTGCCCCGGTTGCGGCCG,coding region,AD,119600,http://www.omim.org/entry/119600,omim,increase,27,27,17 repeats,-,27 repeats,"Cleidocranial dysplasia (CCD) patients show persistently open skull sutures with bulging calvaria, hypoplasia or aplasia of the clavicles permitting abnormal facility in apposing the shoulders, wide pubic symphysis, short middle phalanx of the fifth fingers, dental anomalies, and often vertebral malformation.","{6:1,10:4,11:1337,12:5,14:16,16:73,17:23694,18:111,19:2,20:2,21:2,22:2,23:3,24:1,29:1,30:2,36:1,39:1}" -CCHS,Central hypoventilation syndrome,short tandem repeats,PHOX2B,chr4:41744081-41748969:-,chr4_41745972_NGC,GCN,NGC,chr4:41745972-41746031,chr4:41747989-41748048,AGCCGCAGCCAGGCCTCC,GCCGCCCTTGCCGGGTTC,coding region,AD,209880,http://www.omim.org/entry/209880,omim,increase,24,24,20 repeats,-,24 repeats,"Central hypoventilation syndrome (CCHS), also known as 'Ondine's curse', is a rare disorder characterized by abnormal control of respiration in the absence of neuromuscular, lung or cardiac disease, or an identifiable brainstem lesion. ","{6:6,7:41,8:18,9:24,10:25,11:39,12:39,13:156,14:353,15:227,16:85,17:115,18:124,19:210,20:23733,21:9,22:15,23:2,24:1,25:5,26:1,27:4}" -HFG,Hand-foot-uterus syndrome,short tandem repeats,HOXA13,chr7:27193418-27200265:-,chr7_27199679_NGC,GCN,NGC,chr7:27199925-27199966,chr7:27239544-27239585,GCCCCCGCCCCCGGCCCC,CCCTTCCATGTTCTTGTT,exon 1,AD,140000,http://www.omim.org/entry/140000,omim,increase,22,22,14/12/12 repeats,-,22/18/18 repeats,"Hand-foot-uterus (HFG) patients show small feet with unusually short great toes and abnormal thumbs. Females with the disorder have duplication of the genital tract, including longitudinal vaginal septum ","{4:3,5:248,6:17,7:7,8:10,9:17,10:12,11:30,12:35,13:102,14:24620,15:16,16:16,17:5,18:2,20:1,21:1,22:1,23:1}" -HPE5,Holoprosencephaly-5,short tandem repeats,ZIC2,chr13:99981771-99986764:+,chr13_99985449_GCN,GCN,GCN,chr13:99985449-99985493,chr13:100637703-100637747,AGCTCCAACCTGTCCCCA,GTGTCCGCGGTGCACCGG,coding region,AD,609637,http://www.omim.org/entry/609637,omim,increase,25,25,15 repeats,-,25 repeats,Holoprosencephaly (HPE5) is the most common structural anomaly of the human brain and is one of the anomalies seen in patients with deletions and duplications of chromosome 13. ,"{3:1,5:16,6:11,7:3,8:7,9:8,10:55,11:18,12:24,13:14,14:435,15:24561,16:44,17:30,18:11,19:5,20:12,21:1}" -SD5,Syndactyly,short tandem repeats,HOXD13,chr2:176087504-176095937:+,chr2_176093059_GCN,GCN,GCN,chr2:176093059-176093103,chr2:176957787-176957831,GGGACGCATTCGGGGCGG,TCCGGCTTTGCGTACCCC,coding region,AD,186300,http://www.omim.org/entry/186300,omim,increase,22,22,15 repeats,-,22 repeats,Syndactyly (SD5) patients show the characteristic of the presence of an associated metacarpal and metatarsal fusion. ,"{6:8,7:5,8:4,9:1,11:8,12:75,13:2,14:67,15:25047,16:18,17:19,18:5,24:1}" -XLMR,"Mental retardation, X-linked, with isolated growth hormone deficiency",short tandem repeats,SOX3,chrX:140502986-140505059:-,chrX_140504317_NGC,GCN,NGC,chrX:140504317-140504361,chrX:139586482-139586526,CACGCCCACCGGACTGCT,ACCGGGAGGCAGGAGGCC,coding region,XLR,300123,http://www.omim.org/entry/300123,omim,increase,22,22,15 repeats,-,22 repeats,"Mental retardation, X-linked, with hormone deficiencies (XLMR) is caused by defects of SOX3, which encodes a transcription factor that regulates embryonic development and determines cell fate.","{6:3,7:1,8:34,9:37,10:1,11:1,13:3,14:8,15:19874,16:34,17:15,23:1}" -AR,Susceptibility to prostate cancer due to Androgen Receptor expression,short tandem repeats,AR,chrX:67544032-67730619:+,chrX_67545318_CAG,CAG,CAG,chrX:67545318-67545383,chrX:66765160-66765225,GCCAGTTTGCTGCTGCTG,CAAGAGACTAGCCCCAGG,coding region,XLR,176807,http://www.omim.org/entry/176807,omim,decrease,18,8,22 repeats,<=18 repeats,<=8 repeats,"The length of a polymorphic CAG repeat sequence, occurring in the androgen receptor gene, is inversely correlated with transcriptional activity by the androgen receptor. Because heightened androgenic stimulation may increase risk of prostate cancer development and progression, we examined whether shorter CAG repeats in the androgen receptor gene are related to higher risk of prostate cancer. An association existed between fewer androgen receptor gene CAG repeats and higher risk of total prostate cancer [relative risk (RR) = 1.52; 95% confidence interval (CI) = 0.92-2.49; P trend = 0.04; for men with CAG repeat lengths <=18 relative to >=26 repeats]. In particular, a shorter CAG repeat sequence was associated with cancers characterized by extraprostatic extension or distant metastases (stage C or D) or high histologic grade (RR = 2.14; CI = 1.14-4.01; P trend = 0.001). This association was observed individually both for high stage (RR = 2.23) and high grade prostate cancer (RR = 1.89). Men with shorter repeats were at particularly high risk for distant metastatic and fatal prostate cancer. Variability in the CAG repeat length was not associated with low grade or low stage disease. These results demonstrate that a shorter CAG repeat sequence in the androgen receptor gene predicts higher grade and advanced stage of prostate cancer at diagnosis, and metastasis and mortality from the disease.","{1:1,2:3,4:2,5:2,6:1,7:24,8:13,9:8,10:5,11:11,12:30,13:76,14:183,15:159,16:252,17:677,18:1458,19:1964,20:2114,21:3202,22:2191,23:2098,24:2013,25:1465,26:859,27:477,28:281,29:242,30:86,31:53,32:19,33:15,34:7,35:6,36:10,37:3,38:1,40:2,41:1}" -ALS,"Amyotrophic lateral sclerosis, or familial frontotemporal dementia",short tandem repeats,C9orf72,chr9:27546546-27573866:-,chr9_27573529_GGCCCC,GGGGCC,GGCCCC,chr9:27573529-27573546,chr9:27573527-27573544,CCGCCCCGACCACGCCCC,TAGCGCGCGACTCCTGAG,intron 1,AD,105550,http://www.omim.org/entry/105550,omim,increase,31,31,<31 repeats,-,31+ repeats,The hexamer G4C2 repeat expansion in the C9orf72 locus is a major cause of both ALS and frontotemporal dementia. The pathogenic repeat length (>30 repeats) is present in ~10% of all ALS patients including ~40% of familial ALS cases and ~6-8% of sporadic ALS cases in some populations.,"{1:12,2:13703,3:307,4:1005,5:2987,6:1830,7:1194,8:2548,9:407,10:662,11:285,12:181,13:70,14:36,15:12,16:9,17:5,18:1,19:2}" \ No newline at end of file diff --git a/jcvi/utils/data/__init__.py b/jcvi/utils/data/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/utils/data/adapters.fasta b/jcvi/utils/data/adapters.fasta deleted file mode 100644 index bf72c1e0..00000000 --- a/jcvi/utils/data/adapters.fasta +++ /dev/null @@ -1,38 +0,0 @@ ->PrefixNX/1 -AGATGTGTATAAGAGACAG ->Trans1 -TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG ->Trans1_rc -CTGTCTCTTATACACATCTGACGCTGCCGACGA ->Trans2 -GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG ->Trans2_rc -CTGTCTCTTATACACATCTCCGAGCCCACGAGAC ->PrefixPE/1 -AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT ->PrefixPE/2 -CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATC -T ->PCR_Primer1_rc -AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT ->PCR_Primer2_rc -AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTT -G ->FlowCell1 -TTTTTTTTTTAATGATACGGCGACCACCGAGATCTACAC ->FlowCell2 -TTTTTTTTTTCAAGCAGAAGACGGCATACGA ->TruSeq2_SE -AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG ->TruSeq2_PE_f -AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT ->TruSeq2_PE_r -AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG ->PE1 -TACACTCTTTCCCTACACGACGCTCTTCCGATCT ->PE1_rc -AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA ->PE2 -GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT ->PE2_rc -AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC diff --git a/jcvi/utils/data/blosum80.mat b/jcvi/utils/data/blosum80.mat deleted file mode 100644 index e6008c23..00000000 --- a/jcvi/utils/data/blosum80.mat +++ /dev/null @@ -1,40 +0,0 @@ -# Blosum80 -# Matrix made by matblas from blosum80.iij -# * column uses minimum score -# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units -# Blocks Database = /data/blocks_5.0/blocks.dat -# Cluster Percentage: >= 80 -# Entropy = 0.9868, Expected = -0.7442 -GAP-PENALTIES=12 6 6 - A R N D C Q E G H I L K M F P S T W Y V B Z X ? a g t c u ] n -A 7 -3 -3 -3 -1 -2 -2 0 -3 -3 -3 -1 -2 -4 -1 2 0 -5 -4 -1 -3 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9 -R -3 9 -1 -3 -6 1 -1 -4 0 -5 -4 3 -3 -5 -3 -2 -2 -5 -4 -4 -2 0 -2 -9 -9 -9 -9 -9 -9 -9 -9 -N -3 -1 9 2 -5 0 -1 -1 1 -6 -6 0 -4 -6 -4 1 0 -7 -4 -5 5 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9 -D -3 -3 2 10 -7 -1 2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6 6 1 -3 -9 -9 -9 -9 -9 -9 -9 -9 -C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -9 -9 -9 -9 -9 -9 -9 -9 -Q -2 1 0 -1 -5 9 3 -4 1 -5 -4 2 -1 -5 -3 -1 -1 -4 -3 -4 -1 5 -2 -9 -9 -9 -9 -9 -9 -9 -9 -E -2 -1 -1 2 -7 3 8 -4 0 -6 -6 1 -4 -6 -2 -1 -2 -6 -5 -4 1 6 -2 -9 -9 -9 -9 -9 -9 -9 -9 -G 0 -4 -1 -3 -6 -4 -4 9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9 -H -3 0 1 -2 -7 1 0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4 3 -5 -1 0 -2 -9 -9 -9 -9 -9 -9 -9 -9 -I -3 -5 -6 -7 -2 -5 -6 -7 -6 7 2 -5 2 -1 -5 -4 -2 -5 -3 4 -6 -6 -2 -9 -9 -9 -9 -9 -9 -9 -9 -L -3 -4 -6 -7 -3 -4 -6 -7 -5 2 6 -4 3 0 -5 -4 -3 -4 -2 1 -7 -5 -2 -9 -9 -9 -9 -9 -9 -9 -9 -K -1 3 0 -2 -6 2 1 -3 -1 -5 -4 8 -3 -5 -2 -1 -1 -6 -4 -4 -1 1 -2 -9 -9 -9 -9 -9 -9 -9 -9 -M -2 -3 -4 -6 -3 -1 -4 -5 -4 2 3 -3 9 0 -4 -3 -1 -3 -3 1 -5 -3 -2 -9 -9 -9 -9 -9 -9 -9 -9 -F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1 0 -5 0 10 -6 -4 -4 0 4 -2 -6 -6 -3 -9 -9 -9 -9 -9 -9 -9 -9 -P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -9 -9 -9 -9 -9 -9 -9 -9 -S 2 -2 1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2 7 2 -6 -3 -3 0 -1 -1 -9 -9 -9 -9 -9 -9 -9 -9 -T 0 -2 0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3 2 8 -5 -3 0 -1 -2 -1 -9 -9 -9 -9 -9 -9 -9 -9 -W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3 0 -7 -6 -5 16 3 -5 -8 -5 -5 -9 -9 -9 -9 -9 -9 -9 -9 -Y -4 -4 -4 -6 -5 -3 -5 -6 3 -3 -2 -4 -3 4 -6 -3 -3 3 11 -3 -5 -4 -3 -9 -9 -9 -9 -9 -9 -9 -9 -V -1 -4 -5 -6 -2 -4 -4 -6 -5 4 1 -4 1 -2 -4 -3 0 -5 -3 7 -6 -4 -2 -9 -9 -9 -9 -9 -9 -9 -9 -B -3 -2 5 6 -6 -1 1 -2 -1 -6 -7 -1 -5 -6 -4 0 -1 -8 -5 -6 6 0 -3 -9 -9 -9 -9 -9 -9 -9 -9 -Z -2 0 -1 1 -7 5 6 -4 0 -6 -5 1 -3 -6 -2 -1 -2 -5 -4 -4 0 6 -1 -9 -9 -9 -9 -9 -9 -9 -9 -X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -9 -9 -9 -9 -9 -9 -9 -9 -? -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -a -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 4 -2 -2 -2 -2 -9 0 -g -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 4 -2 -2 -2 -9 0 -t -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 4 -2 4 -9 0 -c -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 -2 4 -2 -9 0 -u -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -2 -2 4 -2 4 -9 0 -] -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -n -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 0 0 0 0 0 -9 0 diff --git a/jcvi/utils/data/chrY.hg38.unique_ccn.gc b/jcvi/utils/data/chrY.hg38.unique_ccn.gc deleted file mode 100644 index 6f771a4e..00000000 --- a/jcvi/utils/data/chrY.hg38.unique_ccn.gc +++ /dev/null @@ -1,300 +0,0 @@ -chrY 2784557 2791188 39 -chrY 2801260 2807624 38 -chrY 3001290 3010033 46 -chrY 4208387 4214156 44 -chrY 6532336 6537578 32 -chrY 6556575 6562243 34 -chrY 6749492 6759172 41 -chrY 6780750 6788302 40 -chrY 6807376 6813277 37 -chrY 6824953 6830628 37 -chrY 6840559 6848076 40 -chrY 6858179 6866161 39 -chrY 6890149 6895762 44 -chrY 6948842 6956239 38 -chrY 6958596 6965916 41 -chrY 7021311 7026777 40 -chrY 7129616 7136420 41 -chrY 7168704 7184007 43 -chrY 7192517 7204933 43 -chrY 7475993 7481505 41 -chrY 7499729 7505044 46 -chrY 7550843 7557901 44 -chrY 7654904 7661272 48 -chrY 7706983 7712325 42 -chrY 7719870 7726971 45 -chrY 7760061 7767544 40 -chrY 7828110 7844640 42 -chrY 7884997 7892945 40 -chrY 7894240 7899526 35 -chrY 7909385 7914546 39 -chrY 7952588 7961745 40 -chrY 7985485 7999389 39 -chrY 8096558 8111236 35 -chrY 8121937 8127095 42 -chrY 8127642 8133073 37 -chrY 8195288 8200644 38 -chrY 8202366 8213685 42 -chrY 8233698 8238943 31 -chrY 8250681 8255940 51 -chrY 8263716 8269483 35 -chrY 8398026 8404720 35 -chrY 8475928 8482558 38 -chrY 8535415 8540967 46 -chrY 8645758 8653464 43 -chrY 8712894 8718966 44 -chrY 8803075 8812356 44 -chrY 8863076 8870962 45 -chrY 8943669 8952269 36 -chrY 8978916 8990553 44 -chrY 8995434 9001730 42 -chrY 9217975 9225816 35 -chrY 9238021 9245857 40 -chrY 9272802 9278977 43 -chrY 9280714 9289444 44 -chrY 9571046 9576635 41 -chrY 9920843 9926256 42 -chrY 9971808 9980413 41 -chrY 10042785 10053869 41 -chrY 10055746 10062704 39 -chrY 10093144 10098264 38 -chrY 10108505 10114835 38 -chrY 10158930 10166993 38 -chrY 11071908 11077229 49 -chrY 11490469 11500385 37 -chrY 11647705 11652919 37 -chrY 11653098 11660229 38 -chrY 11664384 11669803 37 -chrY 11674119 11721364 38 -chrY 11863344 11869166 42 -chrY 11888418 11900677 44 -chrY 11967783 11975761 44 -chrY 11998168 12004009 40 -chrY 12006921 12013128 39 -chrY 12056971 12063288 40 -chrY 12066290 12073346 40 -chrY 12148758 12153839 40 -chrY 12155881 12162156 44 -chrY 12166502 12172555 37 -chrY 12180152 12185912 41 -chrY 12212568 12217982 47 -chrY 12259952 12266900 43 -chrY 12321134 12328048 44 -chrY 12353997 12361755 48 -chrY 12471583 12477750 41 -chrY 12556061 12566816 43 -chrY 12567189 12574918 41 -chrY 12574969 12581385 37 -chrY 12593138 12599159 45 -chrY 12599753 12608733 45 -chrY 12623423 12630790 39 -chrY 12920390 12926991 44 -chrY 12993164 12999140 34 -chrY 13004137 13009294 41 -chrY 13039768 13045527 36 -chrY 13051834 13056895 40 -chrY 13115138 13120449 43 -chrY 13167104 13175874 40 -chrY 13185822 13191485 40 -chrY 13252351 13260053 41 -chrY 13263004 13269401 39 -chrY 13280964 13289829 39 -chrY 13328822 13337861 38 -chrY 13338803 13347288 46 -chrY 13424970 13430518 35 -chrY 13437155 13442971 41 -chrY 13484161 13492046 39 -chrY 13514966 13525010 42 -chrY 13529624 13540690 41 -chrY 13565815 13571309 37 -chrY 13573461 13578852 36 -chrY 13590814 13599779 41 -chrY 13600364 13621396 40 -chrY 13627105 13638297 44 -chrY 13644651 13652259 40 -chrY 13657736 13666461 43 -chrY 13670624 13679488 38 -chrY 13806885 13812979 40 -chrY 13887283 13897986 48 -chrY 13971831 13979428 40 -chrY 14082459 14087728 39 -chrY 14098242 14105696 40 -chrY 14157891 14165038 43 -chrY 14205230 14211091 40 -chrY 14233882 14245283 37 -chrY 14307467 14316895 46 -chrY 14339301 14346615 42 -chrY 14347799 14362566 43 -chrY 14374247 14382286 41 -chrY 14461230 14469256 38 -chrY 14504544 14513770 49 -chrY 14560376 14567068 39 -chrY 14572407 14578086 41 -chrY 14590607 14597155 46 -chrY 14762319 14775326 37 -chrY 14813142 14824402 42 -chrY 14862780 14876511 38 -chrY 14885221 14891091 39 -chrY 14984297 14992792 43 -chrY 15007186 15015808 39 -chrY 15035858 15044755 36 -chrY 15059569 15066537 36 -chrY 15098478 15103894 43 -chrY 15105936 15111341 40 -chrY 15113514 15120079 37 -chrY 15133822 15139556 39 -chrY 15254586 15270417 47 -chrY 15314346 15324591 40 -chrY 15336790 15342225 38 -chrY 15368318 15374523 40 -chrY 15384959 15394356 44 -chrY 15394940 15403276 42 -chrY 15500660 15508764 38 -chrY 15509090 15518502 43 -chrY 15607733 15621522 40 -chrY 15626346 15632174 40 -chrY 15662806 15669452 37 -chrY 15682681 15690135 36 -chrY 15731661 15736805 35 -chrY 15741948 15749918 41 -chrY 15815883 15824613 40 -chrY 15849529 15859619 38 -chrY 15941092 15948139 39 -chrY 15964963 15970803 38 -chrY 15973607 15983695 39 -chrY 16019020 16025198 41 -chrY 16057227 16062612 33 -chrY 16068290 16083002 42 -chrY 16098947 16105913 39 -chrY 16128439 16134046 36 -chrY 16292454 16302067 37 -chrY 16694618 16703847 37 -chrY 16746896 16753868 39 -chrY 16760739 16770576 39 -chrY 16832687 16839381 43 -chrY 16874803 16880861 37 -chrY 16948322 16961811 41 -chrY 17002365 17009878 40 -chrY 17042309 17048817 47 -chrY 17129821 17138633 43 -chrY 17138980 17146209 42 -chrY 17166159 17171544 40 -chrY 17183940 17195160 47 -chrY 17212703 17219558 41 -chrY 17235070 17241735 38 -chrY 17285494 17291488 34 -chrY 17301431 17306508 41 -chrY 17340923 17347005 45 -chrY 17347434 17356449 42 -chrY 17437924 17445856 38 -chrY 18664679 18675178 42 -chrY 18882369 18888650 33 -chrY 18943921 18950373 43 -chrY 18995361 19004635 41 -chrY 19039184 19044596 47 -chrY 19101931 19108189 42 -chrY 19111372 19120891 41 -chrY 19126528 19140151 41 -chrY 19145280 19151619 40 -chrY 19168180 19183890 40 -chrY 19185299 19191204 40 -chrY 19230146 19235234 44 -chrY 19244362 19249681 38 -chrY 19253056 19258881 41 -chrY 19258931 19273153 39 -chrY 19277079 19283223 32 -chrY 19289712 19300316 40 -chrY 19307035 19314959 38 -chrY 19321316 19328613 38 -chrY 19359504 19366637 38 -chrY 19378363 19385822 37 -chrY 19393798 19400012 33 -chrY 19401507 19412484 41 -chrY 19418100 19430919 44 -chrY 19431245 19441997 39 -chrY 19442560 19453698 40 -chrY 19468938 19476722 42 -chrY 19505168 19512242 40 -chrY 19514508 19520050 41 -chrY 19520649 19525914 36 -chrY 19568898 19575669 35 -chrY 19577857 19582872 39 -chrY 19610784 19617219 39 -chrY 19617552 19624111 39 -chrY 19624483 19632100 37 -chrY 19632154 19638440 37 -chrY 19640429 19647772 38 -chrY 19652361 19666665 37 -chrY 19685100 19690709 40 -chrY 19706127 19711247 48 -chrY 19743571 19751093 36 -chrY 19792080 19809048 35 -chrY 19827253 19834589 37 -chrY 19867154 19878600 36 -chrY 19887017 19893244 43 -chrY 19893835 19905779 34 -chrY 19906368 19915050 40 -chrY 19915636 19926525 43 -chrY 19945933 19951015 33 -chrY 19953983 19959945 43 -chrY 19960558 19966518 38 -chrY 19967063 19978729 34 -chrY 19990375 19995406 41 -chrY 20006275 20012407 44 -chrY 20012734 20019861 37 -chrY 20047396 20053206 34 -chrY 20299012 20306346 43 -chrY 20389947 20396652 31 -chrY 20400867 20407806 39 -chrY 20443875 20449892 39 -chrY 20459613 20465100 41 -chrY 20476363 20483543 40 -chrY 20483894 20495078 34 -chrY 20513862 20519704 38 -chrY 20541890 20550721 39 -chrY 20562467 20570492 35 -chrY 20595959 20607549 34 -chrY 20615642 20628183 37 -chrY 20628911 20638868 35 -chrY 20642520 20650592 45 -chrY 20658635 20663699 42 -chrY 20672133 20680570 38 -chrY 20682288 20687454 36 -chrY 20693591 20702020 43 -chrY 20703710 20712554 33 -chrY 20742660 20747871 46 -chrY 20763657 20768831 43 -chrY 20793884 20799934 43 -chrY 20800596 20805931 40 -chrY 20815851 20825197 40 -chrY 20841153 20856303 38 -chrY 20856442 20863199 40 -chrY 20891499 20901661 41 -chrY 20920061 20926313 40 -chrY 20926855 20935649 37 -chrY 20937111 20944700 39 -chrY 20963842 20978947 38 -chrY 20979543 20986990 38 -chrY 20989470 20995932 42 -chrY 21021445 21030068 43 -chrY 21035642 21045925 44 -chrY 21086840 21092451 41 -chrY 21123674 21131429 39 -chrY 21142994 21149100 34 -chrY 21159742 21165259 40 -chrY 21173822 21181450 43 -chrY 21189214 21195774 37 -chrY 21203550 21210182 36 -chrY 21235593 21249194 41 -chrY 21281787 21290096 38 -chrY 21293897 21305309 40 -chrY 21447788 21455746 43 -chrY 21571245 21578854 44 -chrY 21623157 21629316 38 -chrY 21844530 21850799 43 -chrY 21851973 21858034 44 -chrY 22229489 22234807 38 -chrY 22247024 22264166 42 -chrY 22292662 22298501 47 -chrY 22346580 22352845 32 -chrY 26623418 26628476 45 diff --git a/jcvi/utils/data/colorchecker.txt b/jcvi/utils/data/colorchecker.txt deleted file mode 100644 index ad8999d1..00000000 --- a/jcvi/utils/data/colorchecker.txt +++ /dev/null @@ -1,4 +0,0 @@ -115,82,68 194,150,130 98,122,157 87,108,67 133,128,177 103,189,170 -214,126,44 80,91,166 193,90,99 94,60,108 157,188,64 224,163,46 -56,61,150 70,148,73 175,54,60 231,199,31 187,86,149 8,133,161 -243,243,242 200,200,200 160,160,160 122,122,121 85,85,85 52,52,52 diff --git a/jcvi/utils/data/hg38.band.txt b/jcvi/utils/data/hg38.band.txt deleted file mode 100644 index 1f52ac81..00000000 --- a/jcvi/utils/data/hg38.band.txt +++ /dev/null @@ -1,1294 +0,0 @@ -#chrom chromStart chromEnd name gieStain -chr1 0 2300000 p36.33 gneg -chr1 2300000 5300000 p36.32 gpos25 -chr1 5300000 7100000 p36.31 gneg -chr1 7100000 9100000 p36.23 gpos25 -chr1 9100000 12500000 p36.22 gneg -chr1 12500000 15900000 p36.21 gpos50 -chr1 15900000 20100000 p36.13 gneg -chr1 20100000 23600000 p36.12 gpos25 -chr1 23600000 27600000 p36.11 gneg -chr1 27600000 29900000 p35.3 gpos25 -chr1 29900000 32300000 p35.2 gneg -chr1 32300000 34300000 p35.1 gpos25 -chr1 34300000 39600000 p34.3 gneg -chr1 39600000 43700000 p34.2 gpos25 -chr1 43700000 46300000 p34.1 gneg -chr1 46300000 50200000 p33 gpos75 -chr1 50200000 55600000 p32.3 gneg -chr1 55600000 58500000 p32.2 gpos50 -chr1 58500000 60800000 p32.1 gneg -chr1 60800000 68500000 p31.3 gpos50 -chr1 68500000 69300000 p31.2 gneg -chr1 69300000 84400000 p31.1 gpos100 -chr1 84400000 87900000 p22.3 gneg -chr1 87900000 91500000 p22.2 gpos75 -chr1 91500000 94300000 p22.1 gneg -chr1 94300000 99300000 p21.3 gpos75 -chr1 99300000 101800000 p21.2 gneg -chr1 101800000 106700000 p21.1 gpos100 -chr1 106700000 111200000 p13.3 gneg -chr1 111200000 115500000 p13.2 gpos50 -chr1 115500000 117200000 p13.1 gneg -chr1 117200000 120400000 p12 gpos50 -chr1 120400000 121700000 p11.2 gneg -chr1 121700000 123400000 p11.1 acen -chr1 123400000 125100000 q11 acen -chr1 125100000 143200000 q12 gvar -chr1 143200000 147500000 q21.1 gneg -chr1 147500000 150600000 q21.2 gpos50 -chr1 150600000 155100000 q21.3 gneg -chr1 155100000 156600000 q22 gpos50 -chr1 156600000 159100000 q23.1 gneg -chr1 159100000 160500000 q23.2 gpos50 -chr1 160500000 165500000 q23.3 gneg -chr1 165500000 167200000 q24.1 gpos50 -chr1 167200000 170900000 q24.2 gneg -chr1 170900000 173000000 q24.3 gpos75 -chr1 173000000 176100000 q25.1 gneg -chr1 176100000 180300000 q25.2 gpos50 -chr1 180300000 185800000 q25.3 gneg -chr1 185800000 190800000 q31.1 gpos100 -chr1 190800000 193800000 q31.2 gneg -chr1 193800000 198700000 q31.3 gpos100 -chr1 198700000 207100000 q32.1 gneg -chr1 207100000 211300000 q32.2 gpos25 -chr1 211300000 214400000 q32.3 gneg -chr1 214400000 223900000 q41 gpos100 -chr1 223900000 224400000 q42.11 gneg -chr1 224400000 226800000 q42.12 gpos25 -chr1 226800000 230500000 q42.13 gneg -chr1 230500000 234600000 q42.2 gpos50 -chr1 234600000 236400000 q42.3 gneg -chr1 236400000 243500000 q43 gpos75 -chr1 243500000 248956422 q44 gneg -chr2 0 4400000 p25.3 gneg -chr2 4400000 6900000 p25.2 gpos50 -chr2 6900000 12000000 p25.1 gneg -chr2 12000000 16500000 p24.3 gpos75 -chr2 16500000 19000000 p24.2 gneg -chr2 19000000 23800000 p24.1 gpos75 -chr2 23800000 27700000 p23.3 gneg -chr2 27700000 29800000 p23.2 gpos25 -chr2 29800000 31800000 p23.1 gneg -chr2 31800000 36300000 p22.3 gpos75 -chr2 36300000 38300000 p22.2 gneg -chr2 38300000 41500000 p22.1 gpos50 -chr2 41500000 47500000 p21 gneg -chr2 47500000 52600000 p16.3 gpos100 -chr2 52600000 54700000 p16.2 gneg -chr2 54700000 61000000 p16.1 gpos100 -chr2 61000000 63900000 p15 gneg -chr2 63900000 68400000 p14 gpos50 -chr2 68400000 71300000 p13.3 gneg -chr2 71300000 73300000 p13.2 gpos50 -chr2 73300000 74800000 p13.1 gneg -chr2 74800000 83100000 p12 gpos100 -chr2 83100000 91800000 p11.2 gneg -chr2 91800000 93900000 p11.1 acen -chr2 93900000 96000000 q11.1 acen -chr2 96000000 102100000 q11.2 gneg -chr2 102100000 105300000 q12.1 gpos50 -chr2 105300000 106700000 q12.2 gneg -chr2 106700000 108700000 q12.3 gpos25 -chr2 108700000 112200000 q13 gneg -chr2 112200000 118100000 q14.1 gpos50 -chr2 118100000 121600000 q14.2 gneg -chr2 121600000 129100000 q14.3 gpos50 -chr2 129100000 131700000 q21.1 gneg -chr2 131700000 134300000 q21.2 gpos25 -chr2 134300000 136100000 q21.3 gneg -chr2 136100000 141500000 q22.1 gpos100 -chr2 141500000 143400000 q22.2 gneg -chr2 143400000 147900000 q22.3 gpos100 -chr2 147900000 149000000 q23.1 gneg -chr2 149000000 149600000 q23.2 gpos25 -chr2 149600000 154000000 q23.3 gneg -chr2 154000000 158900000 q24.1 gpos75 -chr2 158900000 162900000 q24.2 gneg -chr2 162900000 168900000 q24.3 gpos75 -chr2 168900000 177100000 q31.1 gneg -chr2 177100000 179700000 q31.2 gpos50 -chr2 179700000 182100000 q31.3 gneg -chr2 182100000 188500000 q32.1 gpos75 -chr2 188500000 191100000 q32.2 gneg -chr2 191100000 196600000 q32.3 gpos75 -chr2 196600000 202500000 q33.1 gneg -chr2 202500000 204100000 q33.2 gpos50 -chr2 204100000 208200000 q33.3 gneg -chr2 208200000 214500000 q34 gpos100 -chr2 214500000 220700000 q35 gneg -chr2 220700000 224300000 q36.1 gpos75 -chr2 224300000 225200000 q36.2 gneg -chr2 225200000 230100000 q36.3 gpos100 -chr2 230100000 234700000 q37.1 gneg -chr2 234700000 236400000 q37.2 gpos50 -chr2 236400000 242193529 q37.3 gneg -chr3 0 2800000 p26.3 gpos50 -chr3 2800000 4000000 p26.2 gneg -chr3 4000000 8100000 p26.1 gpos50 -chr3 8100000 11600000 p25.3 gneg -chr3 11600000 13200000 p25.2 gpos25 -chr3 13200000 16300000 p25.1 gneg -chr3 16300000 23800000 p24.3 gpos100 -chr3 23800000 26300000 p24.2 gneg -chr3 26300000 30800000 p24.1 gpos75 -chr3 30800000 32000000 p23 gneg -chr3 32000000 36400000 p22.3 gpos50 -chr3 36400000 39300000 p22.2 gneg -chr3 39300000 43600000 p22.1 gpos75 -chr3 43600000 44100000 p21.33 gneg -chr3 44100000 44200000 p21.32 gpos50 -chr3 44200000 50600000 p21.31 gneg -chr3 50600000 52300000 p21.2 gpos25 -chr3 52300000 54400000 p21.1 gneg -chr3 54400000 58600000 p14.3 gpos50 -chr3 58600000 63800000 p14.2 gneg -chr3 63800000 69700000 p14.1 gpos50 -chr3 69700000 74100000 p13 gneg -chr3 74100000 79800000 p12.3 gpos75 -chr3 79800000 83500000 p12.2 gneg -chr3 83500000 87100000 p12.1 gpos75 -chr3 87100000 87800000 p11.2 gneg -chr3 87800000 90900000 p11.1 acen -chr3 90900000 94000000 q11.1 acen -chr3 94000000 98600000 q11.2 gvar -chr3 98600000 100300000 q12.1 gneg -chr3 100300000 101200000 q12.2 gpos25 -chr3 101200000 103100000 q12.3 gneg -chr3 103100000 106500000 q13.11 gpos75 -chr3 106500000 108200000 q13.12 gneg -chr3 108200000 111600000 q13.13 gpos50 -chr3 111600000 113700000 q13.2 gneg -chr3 113700000 117600000 q13.31 gpos75 -chr3 117600000 119300000 q13.32 gneg -chr3 119300000 122200000 q13.33 gpos75 -chr3 122200000 124100000 q21.1 gneg -chr3 124100000 126100000 q21.2 gpos25 -chr3 126100000 129500000 q21.3 gneg -chr3 129500000 134000000 q22.1 gpos25 -chr3 134000000 136000000 q22.2 gneg -chr3 136000000 139000000 q22.3 gpos25 -chr3 139000000 143100000 q23 gneg -chr3 143100000 149200000 q24 gpos100 -chr3 149200000 152300000 q25.1 gneg -chr3 152300000 155300000 q25.2 gpos50 -chr3 155300000 157300000 q25.31 gneg -chr3 157300000 159300000 q25.32 gpos50 -chr3 159300000 161000000 q25.33 gneg -chr3 161000000 167900000 q26.1 gpos100 -chr3 167900000 171200000 q26.2 gneg -chr3 171200000 176000000 q26.31 gpos75 -chr3 176000000 179300000 q26.32 gneg -chr3 179300000 183000000 q26.33 gpos75 -chr3 183000000 184800000 q27.1 gneg -chr3 184800000 186300000 q27.2 gpos25 -chr3 186300000 188200000 q27.3 gneg -chr3 188200000 192600000 q28 gpos75 -chr3 192600000 198295559 q29 gneg -chr4 0 4500000 p16.3 gneg -chr4 4500000 6000000 p16.2 gpos25 -chr4 6000000 11300000 p16.1 gneg -chr4 11300000 15000000 p15.33 gpos50 -chr4 15000000 17700000 p15.32 gneg -chr4 17700000 21300000 p15.31 gpos75 -chr4 21300000 27700000 p15.2 gneg -chr4 27700000 35800000 p15.1 gpos100 -chr4 35800000 41200000 p14 gneg -chr4 41200000 44600000 p13 gpos50 -chr4 44600000 48200000 p12 gneg -chr4 48200000 50000000 p11 acen -chr4 50000000 51800000 q11 acen -chr4 51800000 58500000 q12 gneg -chr4 58500000 65500000 q13.1 gpos100 -chr4 65500000 69400000 q13.2 gneg -chr4 69400000 75300000 q13.3 gpos75 -chr4 75300000 78000000 q21.1 gneg -chr4 78000000 81500000 q21.21 gpos50 -chr4 81500000 83200000 q21.22 gneg -chr4 83200000 86000000 q21.23 gpos25 -chr4 86000000 87100000 q21.3 gneg -chr4 87100000 92800000 q22.1 gpos75 -chr4 92800000 94200000 q22.2 gneg -chr4 94200000 97900000 q22.3 gpos75 -chr4 97900000 100100000 q23 gneg -chr4 100100000 106700000 q24 gpos50 -chr4 106700000 113200000 q25 gneg -chr4 113200000 119900000 q26 gpos75 -chr4 119900000 122800000 q27 gneg -chr4 122800000 127900000 q28.1 gpos50 -chr4 127900000 130100000 q28.2 gneg -chr4 130100000 138500000 q28.3 gpos100 -chr4 138500000 140600000 q31.1 gneg -chr4 140600000 145900000 q31.21 gpos25 -chr4 145900000 147500000 q31.22 gneg -chr4 147500000 150200000 q31.23 gpos25 -chr4 150200000 154600000 q31.3 gneg -chr4 154600000 160800000 q32.1 gpos100 -chr4 160800000 163600000 q32.2 gneg -chr4 163600000 169200000 q32.3 gpos100 -chr4 169200000 171000000 q33 gneg -chr4 171000000 175400000 q34.1 gpos75 -chr4 175400000 176600000 q34.2 gneg -chr4 176600000 182300000 q34.3 gpos100 -chr4 182300000 186200000 q35.1 gneg -chr4 186200000 190214555 q35.2 gpos25 -chr5 0 4400000 p15.33 gneg -chr5 4400000 6300000 p15.32 gpos25 -chr5 6300000 9900000 p15.31 gneg -chr5 9900000 15000000 p15.2 gpos50 -chr5 15000000 18400000 p15.1 gneg -chr5 18400000 23300000 p14.3 gpos100 -chr5 23300000 24600000 p14.2 gneg -chr5 24600000 28900000 p14.1 gpos100 -chr5 28900000 33800000 p13.3 gneg -chr5 33800000 38400000 p13.2 gpos25 -chr5 38400000 42500000 p13.1 gneg -chr5 42500000 46100000 p12 gpos50 -chr5 46100000 48800000 p11 acen -chr5 48800000 51400000 q11.1 acen -chr5 51400000 59600000 q11.2 gneg -chr5 59600000 63600000 q12.1 gpos75 -chr5 63600000 63900000 q12.2 gneg -chr5 63900000 67400000 q12.3 gpos75 -chr5 67400000 69100000 q13.1 gneg -chr5 69100000 74000000 q13.2 gpos50 -chr5 74000000 77600000 q13.3 gneg -chr5 77600000 82100000 q14.1 gpos50 -chr5 82100000 83500000 q14.2 gneg -chr5 83500000 93000000 q14.3 gpos100 -chr5 93000000 98900000 q15 gneg -chr5 98900000 103400000 q21.1 gpos100 -chr5 103400000 105100000 q21.2 gneg -chr5 105100000 110200000 q21.3 gpos100 -chr5 110200000 112200000 q22.1 gneg -chr5 112200000 113800000 q22.2 gpos50 -chr5 113800000 115900000 q22.3 gneg -chr5 115900000 122100000 q23.1 gpos100 -chr5 122100000 127900000 q23.2 gneg -chr5 127900000 131200000 q23.3 gpos100 -chr5 131200000 136900000 q31.1 gneg -chr5 136900000 140100000 q31.2 gpos25 -chr5 140100000 145100000 q31.3 gneg -chr5 145100000 150400000 q32 gpos75 -chr5 150400000 153300000 q33.1 gneg -chr5 153300000 156300000 q33.2 gpos50 -chr5 156300000 160500000 q33.3 gneg -chr5 160500000 169000000 q34 gpos100 -chr5 169000000 173300000 q35.1 gneg -chr5 173300000 177100000 q35.2 gpos25 -chr5 177100000 181538259 q35.3 gneg -chr6 0 2300000 p25.3 gneg -chr6 2300000 4200000 p25.2 gpos25 -chr6 4200000 7100000 p25.1 gneg -chr6 7100000 10600000 p24.3 gpos50 -chr6 10600000 11600000 p24.2 gneg -chr6 11600000 13400000 p24.1 gpos25 -chr6 13400000 15200000 p23 gneg -chr6 15200000 25200000 p22.3 gpos75 -chr6 25200000 27100000 p22.2 gneg -chr6 27100000 30500000 p22.1 gpos50 -chr6 30500000 32100000 p21.33 gneg -chr6 32100000 33500000 p21.32 gpos25 -chr6 33500000 36600000 p21.31 gneg -chr6 36600000 40500000 p21.2 gpos25 -chr6 40500000 46200000 p21.1 gneg -chr6 46200000 51800000 p12.3 gpos100 -chr6 51800000 53000000 p12.2 gneg -chr6 53000000 57200000 p12.1 gpos100 -chr6 57200000 58500000 p11.2 gneg -chr6 58500000 59800000 p11.1 acen -chr6 59800000 62600000 q11.1 acen -chr6 62600000 62700000 q11.2 gneg -chr6 62700000 69200000 q12 gpos100 -chr6 69200000 75200000 q13 gneg -chr6 75200000 83200000 q14.1 gpos50 -chr6 83200000 84200000 q14.2 gneg -chr6 84200000 87300000 q14.3 gpos50 -chr6 87300000 92500000 q15 gneg -chr6 92500000 98900000 q16.1 gpos100 -chr6 98900000 100000000 q16.2 gneg -chr6 100000000 105000000 q16.3 gpos100 -chr6 105000000 114200000 q21 gneg -chr6 114200000 117900000 q22.1 gpos75 -chr6 117900000 118100000 q22.2 gneg -chr6 118100000 125800000 q22.31 gpos100 -chr6 125800000 126800000 q22.32 gneg -chr6 126800000 130000000 q22.33 gpos75 -chr6 130000000 130900000 q23.1 gneg -chr6 130900000 134700000 q23.2 gpos50 -chr6 134700000 138300000 q23.3 gneg -chr6 138300000 142200000 q24.1 gpos75 -chr6 142200000 145100000 q24.2 gneg -chr6 145100000 148500000 q24.3 gpos75 -chr6 148500000 152100000 q25.1 gneg -chr6 152100000 155200000 q25.2 gpos50 -chr6 155200000 160600000 q25.3 gneg -chr6 160600000 164100000 q26 gpos50 -chr6 164100000 170805979 q27 gneg -chr7 0 2800000 p22.3 gneg -chr7 2800000 4500000 p22.2 gpos25 -chr7 4500000 7200000 p22.1 gneg -chr7 7200000 13700000 p21.3 gpos100 -chr7 13700000 16500000 p21.2 gneg -chr7 16500000 20900000 p21.1 gpos100 -chr7 20900000 25500000 p15.3 gneg -chr7 25500000 27900000 p15.2 gpos50 -chr7 27900000 28800000 p15.1 gneg -chr7 28800000 34900000 p14.3 gpos75 -chr7 34900000 37100000 p14.2 gneg -chr7 37100000 43300000 p14.1 gpos75 -chr7 43300000 45400000 p13 gneg -chr7 45400000 49000000 p12.3 gpos75 -chr7 49000000 50500000 p12.2 gneg -chr7 50500000 53900000 p12.1 gpos75 -chr7 53900000 58100000 p11.2 gneg -chr7 58100000 60100000 p11.1 acen -chr7 60100000 62100000 q11.1 acen -chr7 62100000 67500000 q11.21 gneg -chr7 67500000 72700000 q11.22 gpos50 -chr7 72700000 77900000 q11.23 gneg -chr7 77900000 86700000 q21.11 gpos100 -chr7 86700000 88500000 q21.12 gneg -chr7 88500000 91500000 q21.13 gpos75 -chr7 91500000 93300000 q21.2 gneg -chr7 93300000 98400000 q21.3 gpos75 -chr7 98400000 104200000 q22.1 gneg -chr7 104200000 104900000 q22.2 gpos50 -chr7 104900000 107800000 q22.3 gneg -chr7 107800000 115000000 q31.1 gpos75 -chr7 115000000 117700000 q31.2 gneg -chr7 117700000 121400000 q31.31 gpos75 -chr7 121400000 124100000 q31.32 gneg -chr7 124100000 127500000 q31.33 gpos75 -chr7 127500000 129600000 q32.1 gneg -chr7 129600000 130800000 q32.2 gpos25 -chr7 130800000 132900000 q32.3 gneg -chr7 132900000 138500000 q33 gpos50 -chr7 138500000 143400000 q34 gneg -chr7 143400000 148200000 q35 gpos75 -chr7 148200000 152800000 q36.1 gneg -chr7 152800000 155200000 q36.2 gpos25 -chr7 155200000 159345973 q36.3 gneg -chr8 0 2300000 p23.3 gneg -chr8 2300000 6300000 p23.2 gpos75 -chr8 6300000 12800000 p23.1 gneg -chr8 12800000 19200000 p22 gpos100 -chr8 19200000 23500000 p21.3 gneg -chr8 23500000 27500000 p21.2 gpos50 -chr8 27500000 29000000 p21.1 gneg -chr8 29000000 36700000 p12 gpos75 -chr8 36700000 38500000 p11.23 gneg -chr8 38500000 39900000 p11.22 gpos25 -chr8 39900000 43200000 p11.21 gneg -chr8 43200000 45200000 p11.1 acen -chr8 45200000 47200000 q11.1 acen -chr8 47200000 51300000 q11.21 gneg -chr8 51300000 51700000 q11.22 gpos75 -chr8 51700000 54600000 q11.23 gneg -chr8 54600000 60600000 q12.1 gpos50 -chr8 60600000 61300000 q12.2 gneg -chr8 61300000 65100000 q12.3 gpos50 -chr8 65100000 67100000 q13.1 gneg -chr8 67100000 69600000 q13.2 gpos50 -chr8 69600000 72000000 q13.3 gneg -chr8 72000000 74600000 q21.11 gpos100 -chr8 74600000 74700000 q21.12 gneg -chr8 74700000 83500000 q21.13 gpos75 -chr8 83500000 85900000 q21.2 gneg -chr8 85900000 92300000 q21.3 gpos100 -chr8 92300000 97900000 q22.1 gneg -chr8 97900000 100500000 q22.2 gpos25 -chr8 100500000 105100000 q22.3 gneg -chr8 105100000 109500000 q23.1 gpos75 -chr8 109500000 111100000 q23.2 gneg -chr8 111100000 116700000 q23.3 gpos100 -chr8 116700000 118300000 q24.11 gneg -chr8 118300000 121500000 q24.12 gpos50 -chr8 121500000 126300000 q24.13 gneg -chr8 126300000 130400000 q24.21 gpos50 -chr8 130400000 135400000 q24.22 gneg -chr8 135400000 138900000 q24.23 gpos75 -chr8 138900000 145138636 q24.3 gneg -chr9 0 2200000 p24.3 gneg -chr9 2200000 4600000 p24.2 gpos25 -chr9 4600000 9000000 p24.1 gneg -chr9 9000000 14200000 p23 gpos75 -chr9 14200000 16600000 p22.3 gneg -chr9 16600000 18500000 p22.2 gpos25 -chr9 18500000 19900000 p22.1 gneg -chr9 19900000 25600000 p21.3 gpos100 -chr9 25600000 28000000 p21.2 gneg -chr9 28000000 33200000 p21.1 gpos100 -chr9 33200000 36300000 p13.3 gneg -chr9 36300000 37900000 p13.2 gpos25 -chr9 37900000 39000000 p13.1 gneg -chr9 39000000 40000000 p12 gpos50 -chr9 40000000 42200000 p11.2 gneg -chr9 42200000 43000000 p11.1 acen -chr9 43000000 45500000 q11 acen -chr9 45500000 61500000 q12 gvar -chr9 61500000 65000000 q13 gneg -chr9 65000000 69300000 q21.11 gpos25 -chr9 69300000 71300000 q21.12 gneg -chr9 71300000 76600000 q21.13 gpos50 -chr9 76600000 78500000 q21.2 gneg -chr9 78500000 81500000 q21.31 gpos50 -chr9 81500000 84300000 q21.32 gneg -chr9 84300000 87800000 q21.33 gpos50 -chr9 87800000 89200000 q22.1 gneg -chr9 89200000 91200000 q22.2 gpos25 -chr9 91200000 93900000 q22.31 gneg -chr9 93900000 96500000 q22.32 gpos25 -chr9 96500000 99800000 q22.33 gneg -chr9 99800000 105400000 q31.1 gpos100 -chr9 105400000 108500000 q31.2 gneg -chr9 108500000 112100000 q31.3 gpos25 -chr9 112100000 114900000 q32 gneg -chr9 114900000 119800000 q33.1 gpos75 -chr9 119800000 123100000 q33.2 gneg -chr9 123100000 127500000 q33.3 gpos25 -chr9 127500000 130600000 q34.11 gneg -chr9 130600000 131100000 q34.12 gpos25 -chr9 131100000 133100000 q34.13 gneg -chr9 133100000 134500000 q34.2 gpos25 -chr9 134500000 138394717 q34.3 gneg -chrM 0 16569 gneg -chrX 0 4400000 p22.33 gneg -chrX 4400000 6100000 p22.32 gpos50 -chrX 6100000 9600000 p22.31 gneg -chrX 9600000 17400000 p22.2 gpos50 -chrX 17400000 19200000 p22.13 gneg -chrX 19200000 21900000 p22.12 gpos50 -chrX 21900000 24900000 p22.11 gneg -chrX 24900000 29300000 p21.3 gpos100 -chrX 29300000 31500000 p21.2 gneg -chrX 31500000 37800000 p21.1 gpos100 -chrX 37800000 42500000 p11.4 gneg -chrX 42500000 47600000 p11.3 gpos75 -chrX 47600000 50100000 p11.23 gneg -chrX 50100000 54800000 p11.22 gpos25 -chrX 54800000 58100000 p11.21 gneg -chrX 58100000 61000000 p11.1 acen -chrX 61000000 63800000 q11.1 acen -chrX 63800000 65400000 q11.2 gneg -chrX 65400000 68500000 q12 gpos50 -chrX 68500000 73000000 q13.1 gneg -chrX 73000000 74700000 q13.2 gpos50 -chrX 74700000 76800000 q13.3 gneg -chrX 76800000 85400000 q21.1 gpos100 -chrX 85400000 87000000 q21.2 gneg -chrX 87000000 92700000 q21.31 gpos100 -chrX 92700000 94300000 q21.32 gneg -chrX 94300000 99100000 q21.33 gpos75 -chrX 99100000 103300000 q22.1 gneg -chrX 103300000 104500000 q22.2 gpos50 -chrX 104500000 109400000 q22.3 gneg -chrX 109400000 117400000 q23 gpos75 -chrX 117400000 121800000 q24 gneg -chrX 121800000 129500000 q25 gpos100 -chrX 129500000 131300000 q26.1 gneg -chrX 131300000 134500000 q26.2 gpos25 -chrX 134500000 138900000 q26.3 gneg -chrX 138900000 141200000 q27.1 gpos75 -chrX 141200000 143000000 q27.2 gneg -chrX 143000000 148000000 q27.3 gpos100 -chrX 148000000 156040895 q28 gneg -chrY 0 300000 p11.32 gneg -chrY 300000 600000 p11.31 gpos50 -chrY 600000 10300000 p11.2 gneg -chrY 10300000 10400000 p11.1 acen -chrY 10400000 10600000 q11.1 acen -chrY 10600000 12400000 q11.21 gneg -chrY 12400000 17100000 q11.221 gpos50 -chrY 17100000 19600000 q11.222 gneg -chrY 19600000 23800000 q11.223 gpos50 -chrY 23800000 26600000 q11.23 gneg -chrY 26600000 57227415 q12 gvar -chr10 0 3000000 p15.3 gneg -chr10 3000000 3800000 p15.2 gpos25 -chr10 3800000 6600000 p15.1 gneg -chr10 6600000 12200000 p14 gpos75 -chr10 12200000 17300000 p13 gneg -chr10 17300000 18300000 p12.33 gpos75 -chr10 18300000 18400000 p12.32 gneg -chr10 18400000 22300000 p12.31 gpos75 -chr10 22300000 24300000 p12.2 gneg -chr10 24300000 29300000 p12.1 gpos50 -chr10 29300000 31100000 p11.23 gneg -chr10 31100000 34200000 p11.22 gpos25 -chr10 34200000 38000000 p11.21 gneg -chr10 38000000 39800000 p11.1 acen -chr10 39800000 41600000 q11.1 acen -chr10 41600000 45500000 q11.21 gneg -chr10 45500000 48600000 q11.22 gpos25 -chr10 48600000 51100000 q11.23 gneg -chr10 51100000 59400000 q21.1 gpos100 -chr10 59400000 62800000 q21.2 gneg -chr10 62800000 68800000 q21.3 gpos100 -chr10 68800000 73100000 q22.1 gneg -chr10 73100000 75900000 q22.2 gpos50 -chr10 75900000 80300000 q22.3 gneg -chr10 80300000 86100000 q23.1 gpos100 -chr10 86100000 87700000 q23.2 gneg -chr10 87700000 91100000 q23.31 gpos75 -chr10 91100000 92300000 q23.32 gneg -chr10 92300000 95300000 q23.33 gpos50 -chr10 95300000 97500000 q24.1 gneg -chr10 97500000 100100000 q24.2 gpos50 -chr10 100100000 101200000 q24.31 gneg -chr10 101200000 103100000 q24.32 gpos25 -chr10 103100000 104000000 q24.33 gneg -chr10 104000000 110100000 q25.1 gpos100 -chr10 110100000 113100000 q25.2 gneg -chr10 113100000 117300000 q25.3 gpos75 -chr10 117300000 119900000 q26.11 gneg -chr10 119900000 121400000 q26.12 gpos50 -chr10 121400000 125700000 q26.13 gneg -chr10 125700000 128800000 q26.2 gpos50 -chr10 128800000 133797422 q26.3 gneg -chr11 0 2800000 p15.5 gneg -chr11 2800000 11700000 p15.4 gpos50 -chr11 11700000 13800000 p15.3 gneg -chr11 13800000 16900000 p15.2 gpos50 -chr11 16900000 22000000 p15.1 gneg -chr11 22000000 26200000 p14.3 gpos100 -chr11 26200000 27200000 p14.2 gneg -chr11 27200000 31000000 p14.1 gpos75 -chr11 31000000 36400000 p13 gneg -chr11 36400000 43400000 p12 gpos100 -chr11 43400000 48800000 p11.2 gneg -chr11 48800000 51000000 p11.12 gpos75 -chr11 51000000 53400000 p11.11 acen -chr11 53400000 55800000 q11 acen -chr11 55800000 60100000 q12.1 gpos75 -chr11 60100000 61900000 q12.2 gneg -chr11 61900000 63600000 q12.3 gpos25 -chr11 63600000 66100000 q13.1 gneg -chr11 66100000 68700000 q13.2 gpos25 -chr11 68700000 70500000 q13.3 gneg -chr11 70500000 75500000 q13.4 gpos50 -chr11 75500000 77400000 q13.5 gneg -chr11 77400000 85900000 q14.1 gpos100 -chr11 85900000 88600000 q14.2 gneg -chr11 88600000 93000000 q14.3 gpos100 -chr11 93000000 97400000 q21 gneg -chr11 97400000 102300000 q22.1 gpos100 -chr11 102300000 103000000 q22.2 gneg -chr11 103000000 110600000 q22.3 gpos100 -chr11 110600000 112700000 q23.1 gneg -chr11 112700000 114600000 q23.2 gpos50 -chr11 114600000 121300000 q23.3 gneg -chr11 121300000 124000000 q24.1 gpos50 -chr11 124000000 127900000 q24.2 gneg -chr11 127900000 130900000 q24.3 gpos50 -chr11 130900000 135086622 q25 gneg -chr12 0 3200000 p13.33 gneg -chr12 3200000 5300000 p13.32 gpos25 -chr12 5300000 10000000 p13.31 gneg -chr12 10000000 12600000 p13.2 gpos75 -chr12 12600000 14600000 p13.1 gneg -chr12 14600000 19800000 p12.3 gpos100 -chr12 19800000 21100000 p12.2 gneg -chr12 21100000 26300000 p12.1 gpos100 -chr12 26300000 27600000 p11.23 gneg -chr12 27600000 30500000 p11.22 gpos50 -chr12 30500000 33200000 p11.21 gneg -chr12 33200000 35500000 p11.1 acen -chr12 35500000 37800000 q11 acen -chr12 37800000 46000000 q12 gpos100 -chr12 46000000 48700000 q13.11 gneg -chr12 48700000 51100000 q13.12 gpos25 -chr12 51100000 54500000 q13.13 gneg -chr12 54500000 56200000 q13.2 gpos25 -chr12 56200000 57700000 q13.3 gneg -chr12 57700000 62700000 q14.1 gpos75 -chr12 62700000 64700000 q14.2 gneg -chr12 64700000 67300000 q14.3 gpos50 -chr12 67300000 71100000 q15 gneg -chr12 71100000 75300000 q21.1 gpos75 -chr12 75300000 79900000 q21.2 gneg -chr12 79900000 86300000 q21.31 gpos100 -chr12 86300000 88600000 q21.32 gneg -chr12 88600000 92200000 q21.33 gpos100 -chr12 92200000 95800000 q22 gneg -chr12 95800000 101200000 q23.1 gpos75 -chr12 101200000 103500000 q23.2 gneg -chr12 103500000 108600000 q23.3 gpos50 -chr12 108600000 111300000 q24.11 gneg -chr12 111300000 111900000 q24.12 gpos25 -chr12 111900000 113900000 q24.13 gneg -chr12 113900000 116400000 q24.21 gpos50 -chr12 116400000 117700000 q24.22 gneg -chr12 117700000 120300000 q24.23 gpos50 -chr12 120300000 125400000 q24.31 gneg -chr12 125400000 128700000 q24.32 gpos50 -chr12 128700000 133275309 q24.33 gneg -chr13 0 4600000 p13 gvar -chr13 4600000 10100000 p12 stalk -chr13 10100000 16500000 p11.2 gvar -chr13 16500000 17700000 p11.1 acen -chr13 17700000 18900000 q11 acen -chr13 18900000 22600000 q12.11 gneg -chr13 22600000 24900000 q12.12 gpos25 -chr13 24900000 27200000 q12.13 gneg -chr13 27200000 28300000 q12.2 gpos25 -chr13 28300000 31600000 q12.3 gneg -chr13 31600000 33400000 q13.1 gpos50 -chr13 33400000 34900000 q13.2 gneg -chr13 34900000 39500000 q13.3 gpos75 -chr13 39500000 44600000 q14.11 gneg -chr13 44600000 45200000 q14.12 gpos25 -chr13 45200000 46700000 q14.13 gneg -chr13 46700000 50300000 q14.2 gpos50 -chr13 50300000 54700000 q14.3 gneg -chr13 54700000 59000000 q21.1 gpos100 -chr13 59000000 61800000 q21.2 gneg -chr13 61800000 65200000 q21.31 gpos75 -chr13 65200000 68100000 q21.32 gneg -chr13 68100000 72800000 q21.33 gpos100 -chr13 72800000 74900000 q22.1 gneg -chr13 74900000 76700000 q22.2 gpos50 -chr13 76700000 78500000 q22.3 gneg -chr13 78500000 87100000 q31.1 gpos100 -chr13 87100000 89400000 q31.2 gneg -chr13 89400000 94400000 q31.3 gpos100 -chr13 94400000 97500000 q32.1 gneg -chr13 97500000 98700000 q32.2 gpos25 -chr13 98700000 101100000 q32.3 gneg -chr13 101100000 104200000 q33.1 gpos100 -chr13 104200000 106400000 q33.2 gneg -chr13 106400000 109600000 q33.3 gpos100 -chr13 109600000 114364328 q34 gneg -chr14 0 3600000 p13 gvar -chr14 3600000 8000000 p12 stalk -chr14 8000000 16100000 p11.2 gvar -chr14 16100000 17200000 p11.1 acen -chr14 17200000 18200000 q11.1 acen -chr14 18200000 24100000 q11.2 gneg -chr14 24100000 32900000 q12 gpos100 -chr14 32900000 34800000 q13.1 gneg -chr14 34800000 36100000 q13.2 gpos50 -chr14 36100000 37400000 q13.3 gneg -chr14 37400000 43000000 q21.1 gpos100 -chr14 43000000 46700000 q21.2 gneg -chr14 46700000 50400000 q21.3 gpos100 -chr14 50400000 53600000 q22.1 gneg -chr14 53600000 55000000 q22.2 gpos25 -chr14 55000000 57600000 q22.3 gneg -chr14 57600000 61600000 q23.1 gpos75 -chr14 61600000 64300000 q23.2 gneg -chr14 64300000 67400000 q23.3 gpos50 -chr14 67400000 69800000 q24.1 gneg -chr14 69800000 73300000 q24.2 gpos50 -chr14 73300000 78800000 q24.3 gneg -chr14 78800000 83100000 q31.1 gpos100 -chr14 83100000 84400000 q31.2 gneg -chr14 84400000 89300000 q31.3 gpos100 -chr14 89300000 91400000 q32.11 gneg -chr14 91400000 94200000 q32.12 gpos25 -chr14 94200000 95800000 q32.13 gneg -chr14 95800000 100900000 q32.2 gpos50 -chr14 100900000 102700000 q32.31 gneg -chr14 102700000 103500000 q32.32 gpos50 -chr14 103500000 107043718 q32.33 gneg -chr15 0 4200000 p13 gvar -chr15 4200000 9700000 p12 stalk -chr15 9700000 17500000 p11.2 gvar -chr15 17500000 19000000 p11.1 acen -chr15 19000000 20500000 q11.1 acen -chr15 20500000 25500000 q11.2 gneg -chr15 25500000 27800000 q12 gpos50 -chr15 27800000 30000000 q13.1 gneg -chr15 30000000 30900000 q13.2 gpos50 -chr15 30900000 33400000 q13.3 gneg -chr15 33400000 39800000 q14 gpos75 -chr15 39800000 42500000 q15.1 gneg -chr15 42500000 43300000 q15.2 gpos25 -chr15 43300000 44500000 q15.3 gneg -chr15 44500000 49200000 q21.1 gpos75 -chr15 49200000 52600000 q21.2 gneg -chr15 52600000 58800000 q21.3 gpos75 -chr15 58800000 59000000 q22.1 gneg -chr15 59000000 63400000 q22.2 gpos25 -chr15 63400000 66900000 q22.31 gneg -chr15 66900000 67000000 q22.32 gpos25 -chr15 67000000 67200000 q22.33 gneg -chr15 67200000 72400000 q23 gpos25 -chr15 72400000 74900000 q24.1 gneg -chr15 74900000 76300000 q24.2 gpos25 -chr15 76300000 78000000 q24.3 gneg -chr15 78000000 81400000 q25.1 gpos50 -chr15 81400000 84700000 q25.2 gneg -chr15 84700000 88500000 q25.3 gpos50 -chr15 88500000 93800000 q26.1 gneg -chr15 93800000 98000000 q26.2 gpos50 -chr15 98000000 101991189 q26.3 gneg -chr16 0 7800000 p13.3 gneg -chr16 7800000 10400000 p13.2 gpos50 -chr16 10400000 12500000 p13.13 gneg -chr16 12500000 14700000 p13.12 gpos50 -chr16 14700000 16700000 p13.11 gneg -chr16 16700000 21200000 p12.3 gpos50 -chr16 21200000 24200000 p12.2 gneg -chr16 24200000 28500000 p12.1 gpos50 -chr16 28500000 35300000 p11.2 gneg -chr16 35300000 36800000 p11.1 acen -chr16 36800000 38400000 q11.1 acen -chr16 38400000 47000000 q11.2 gvar -chr16 47000000 52600000 q12.1 gneg -chr16 52600000 56000000 q12.2 gpos50 -chr16 56000000 57300000 q13 gneg -chr16 57300000 66600000 q21 gpos100 -chr16 66600000 70800000 q22.1 gneg -chr16 70800000 72800000 q22.2 gpos50 -chr16 72800000 74100000 q22.3 gneg -chr16 74100000 79200000 q23.1 gpos75 -chr16 79200000 81600000 q23.2 gneg -chr16 81600000 84100000 q23.3 gpos50 -chr16 84100000 87000000 q24.1 gneg -chr16 87000000 88700000 q24.2 gpos25 -chr16 88700000 90338345 q24.3 gneg -chr17 0 3400000 p13.3 gneg -chr17 3400000 6500000 p13.2 gpos50 -chr17 6500000 10800000 p13.1 gneg -chr17 10800000 16100000 p12 gpos75 -chr17 16100000 22700000 p11.2 gneg -chr17 22700000 25100000 p11.1 acen -chr17 25100000 27400000 q11.1 acen -chr17 27400000 33500000 q11.2 gneg -chr17 33500000 39800000 q12 gpos50 -chr17 39800000 40200000 q21.1 gneg -chr17 40200000 42800000 q21.2 gpos25 -chr17 42800000 46800000 q21.31 gneg -chr17 46800000 49300000 q21.32 gpos25 -chr17 49300000 52100000 q21.33 gneg -chr17 52100000 59500000 q22 gpos75 -chr17 59500000 60200000 q23.1 gneg -chr17 60200000 63100000 q23.2 gpos75 -chr17 63100000 64600000 q23.3 gneg -chr17 64600000 66200000 q24.1 gpos50 -chr17 66200000 69100000 q24.2 gneg -chr17 69100000 72900000 q24.3 gpos75 -chr17 72900000 76800000 q25.1 gneg -chr17 76800000 77200000 q25.2 gpos25 -chr17 77200000 83257441 q25.3 gneg -chr18 0 2900000 p11.32 gneg -chr18 2900000 7200000 p11.31 gpos50 -chr18 7200000 8500000 p11.23 gneg -chr18 8500000 10900000 p11.22 gpos25 -chr18 10900000 15400000 p11.21 gneg -chr18 15400000 18500000 p11.1 acen -chr18 18500000 21500000 q11.1 acen -chr18 21500000 27500000 q11.2 gneg -chr18 27500000 35100000 q12.1 gpos100 -chr18 35100000 39500000 q12.2 gneg -chr18 39500000 45900000 q12.3 gpos75 -chr18 45900000 50700000 q21.1 gneg -chr18 50700000 56200000 q21.2 gpos75 -chr18 56200000 58600000 q21.31 gneg -chr18 58600000 61300000 q21.32 gpos50 -chr18 61300000 63900000 q21.33 gneg -chr18 63900000 69100000 q22.1 gpos100 -chr18 69100000 71000000 q22.2 gneg -chr18 71000000 75400000 q22.3 gpos25 -chr18 75400000 80373285 q23 gneg -chr19 0 6900000 p13.3 gneg -chr19 6900000 12600000 p13.2 gpos25 -chr19 12600000 13800000 p13.13 gneg -chr19 13800000 16100000 p13.12 gpos25 -chr19 16100000 19900000 p13.11 gneg -chr19 19900000 24200000 p12 gvar -chr19 24200000 26200000 p11 acen -chr19 26200000 28100000 q11 acen -chr19 28100000 31900000 q12 gvar -chr19 31900000 35100000 q13.11 gneg -chr19 35100000 37800000 q13.12 gpos25 -chr19 37800000 38200000 q13.13 gneg -chr19 38200000 42900000 q13.2 gpos25 -chr19 42900000 44700000 q13.31 gneg -chr19 44700000 47500000 q13.32 gpos25 -chr19 47500000 50900000 q13.33 gneg -chr19 50900000 53100000 q13.41 gpos25 -chr19 53100000 55800000 q13.42 gneg -chr19 55800000 58617616 q13.43 gpos25 -chr20 0 5100000 p13 gneg -chr20 5100000 9200000 p12.3 gpos75 -chr20 9200000 12000000 p12.2 gneg -chr20 12000000 17900000 p12.1 gpos75 -chr20 17900000 21300000 p11.23 gneg -chr20 21300000 22300000 p11.22 gpos25 -chr20 22300000 25700000 p11.21 gneg -chr20 25700000 28100000 p11.1 acen -chr20 28100000 30400000 q11.1 acen -chr20 30400000 33500000 q11.21 gneg -chr20 33500000 35800000 q11.22 gpos25 -chr20 35800000 39000000 q11.23 gneg -chr20 39000000 43100000 q12 gpos75 -chr20 43100000 43500000 q13.11 gneg -chr20 43500000 47800000 q13.12 gpos25 -chr20 47800000 51200000 q13.13 gneg -chr20 51200000 56400000 q13.2 gpos75 -chr20 56400000 57800000 q13.31 gneg -chr20 57800000 59700000 q13.32 gpos50 -chr20 59700000 64444167 q13.33 gneg -chr21 0 3100000 p13 gvar -chr21 3100000 7000000 p12 stalk -chr21 7000000 10900000 p11.2 gvar -chr21 10900000 12000000 p11.1 acen -chr21 12000000 13000000 q11.1 acen -chr21 13000000 15000000 q11.2 gneg -chr21 15000000 22600000 q21.1 gpos100 -chr21 22600000 25500000 q21.2 gneg -chr21 25500000 30200000 q21.3 gpos75 -chr21 30200000 34400000 q22.11 gneg -chr21 34400000 36400000 q22.12 gpos50 -chr21 36400000 38300000 q22.13 gneg -chr21 38300000 41200000 q22.2 gpos50 -chr21 41200000 46709983 q22.3 gneg -chr22 0 4300000 p13 gvar -chr22 4300000 9400000 p12 stalk -chr22 9400000 13700000 p11.2 gvar -chr22 13700000 15000000 p11.1 acen -chr22 15000000 17400000 q11.1 acen -chr22 17400000 21700000 q11.21 gneg -chr22 21700000 23100000 q11.22 gpos25 -chr22 23100000 25500000 q11.23 gneg -chr22 25500000 29200000 q12.1 gpos50 -chr22 29200000 31800000 q12.2 gneg -chr22 31800000 37200000 q12.3 gpos50 -chr22 37200000 40600000 q13.1 gneg -chr22 40600000 43800000 q13.2 gpos50 -chr22 43800000 48100000 q13.31 gneg -chr22 48100000 49100000 q13.32 gpos50 -chr22 49100000 50818468 q13.33 gneg -chrUn_GL000195v1 0 182896 gneg -chrUn_GL000213v1 0 164239 gneg -chrUn_GL000214v1 0 137718 gneg -chrUn_GL000216v2 0 176608 gneg -chrUn_GL000218v1 0 161147 gneg -chrUn_GL000219v1 0 179198 gneg -chrUn_GL000220v1 0 161802 gneg -chrUn_GL000224v1 0 179693 gneg -chrUn_GL000226v1 0 15008 gneg -chrUn_KI270302v1 0 2274 gneg -chrUn_KI270303v1 0 1942 gneg -chrUn_KI270304v1 0 2165 gneg -chrUn_KI270305v1 0 1472 gneg -chrUn_KI270310v1 0 1201 gneg -chrUn_KI270311v1 0 12399 gneg -chrUn_KI270312v1 0 998 gneg -chrUn_KI270315v1 0 2276 gneg -chrUn_KI270316v1 0 1444 gneg -chrUn_KI270317v1 0 37690 gneg -chrUn_KI270320v1 0 4416 gneg -chrUn_KI270322v1 0 21476 gneg -chrUn_KI270329v1 0 1040 gneg -chrUn_KI270330v1 0 1652 gneg -chrUn_KI270333v1 0 2699 gneg -chrUn_KI270334v1 0 1368 gneg -chrUn_KI270335v1 0 1048 gneg -chrUn_KI270336v1 0 1026 gneg -chrUn_KI270337v1 0 1121 gneg -chrUn_KI270338v1 0 1428 gneg -chrUn_KI270340v1 0 1428 gneg -chrUn_KI270362v1 0 3530 gneg -chrUn_KI270363v1 0 1803 gneg -chrUn_KI270364v1 0 2855 gneg -chrUn_KI270366v1 0 8320 gneg -chrUn_KI270371v1 0 2805 gneg -chrUn_KI270372v1 0 1650 gneg -chrUn_KI270373v1 0 1451 gneg -chrUn_KI270374v1 0 2656 gneg -chrUn_KI270375v1 0 2378 gneg -chrUn_KI270376v1 0 1136 gneg -chrUn_KI270378v1 0 1048 gneg -chrUn_KI270379v1 0 1045 gneg -chrUn_KI270381v1 0 1930 gneg -chrUn_KI270382v1 0 4215 gneg -chrUn_KI270383v1 0 1750 gneg -chrUn_KI270384v1 0 1658 gneg -chrUn_KI270385v1 0 990 gneg -chrUn_KI270386v1 0 1788 gneg -chrUn_KI270387v1 0 1537 gneg -chrUn_KI270388v1 0 1216 gneg -chrUn_KI270389v1 0 1298 gneg -chrUn_KI270390v1 0 2387 gneg -chrUn_KI270391v1 0 1484 gneg -chrUn_KI270392v1 0 971 gneg -chrUn_KI270393v1 0 1308 gneg -chrUn_KI270394v1 0 970 gneg -chrUn_KI270395v1 0 1143 gneg -chrUn_KI270396v1 0 1880 gneg -chrUn_KI270411v1 0 2646 gneg -chrUn_KI270412v1 0 1179 gneg -chrUn_KI270414v1 0 2489 gneg -chrUn_KI270417v1 0 2043 gneg -chrUn_KI270418v1 0 2145 gneg -chrUn_KI270419v1 0 1029 gneg -chrUn_KI270420v1 0 2321 gneg -chrUn_KI270422v1 0 1445 gneg -chrUn_KI270423v1 0 981 gneg -chrUn_KI270424v1 0 2140 gneg -chrUn_KI270425v1 0 1884 gneg -chrUn_KI270429v1 0 1361 gneg -chrUn_KI270435v1 0 92983 gneg -chrUn_KI270438v1 0 112505 gneg -chrUn_KI270442v1 0 392061 gneg -chrUn_KI270448v1 0 7992 gneg -chrUn_KI270465v1 0 1774 gneg -chrUn_KI270466v1 0 1233 gneg -chrUn_KI270467v1 0 3920 gneg -chrUn_KI270468v1 0 4055 gneg -chrUn_KI270507v1 0 5353 gneg -chrUn_KI270508v1 0 1951 gneg -chrUn_KI270509v1 0 2318 gneg -chrUn_KI270510v1 0 2415 gneg -chrUn_KI270511v1 0 8127 gneg -chrUn_KI270512v1 0 22689 gneg -chrUn_KI270515v1 0 6361 gneg -chrUn_KI270516v1 0 1300 gneg -chrUn_KI270517v1 0 3253 gneg -chrUn_KI270518v1 0 2186 gneg -chrUn_KI270519v1 0 138126 gneg -chrUn_KI270521v1 0 7642 gneg -chrUn_KI270522v1 0 5674 gneg -chrUn_KI270528v1 0 2983 gneg -chrUn_KI270529v1 0 1899 gneg -chrUn_KI270530v1 0 2168 gneg -chrUn_KI270538v1 0 91309 gneg -chrUn_KI270539v1 0 993 gneg -chrUn_KI270544v1 0 1202 gneg -chrUn_KI270548v1 0 1599 gneg -chrUn_KI270579v1 0 31033 gneg -chrUn_KI270580v1 0 1553 gneg -chrUn_KI270581v1 0 7046 gneg -chrUn_KI270582v1 0 6504 gneg -chrUn_KI270583v1 0 1400 gneg -chrUn_KI270584v1 0 4513 gneg -chrUn_KI270587v1 0 2969 gneg -chrUn_KI270588v1 0 6158 gneg -chrUn_KI270589v1 0 44474 gneg -chrUn_KI270590v1 0 4685 gneg -chrUn_KI270591v1 0 5796 gneg -chrUn_KI270593v1 0 3041 gneg -chrUn_KI270741v1 0 157432 gneg -chrUn_KI270742v1 0 186739 gneg -chrUn_KI270743v1 0 210658 gneg -chrUn_KI270744v1 0 168472 gneg -chrUn_KI270745v1 0 41891 gneg -chrUn_KI270746v1 0 66486 gneg -chrUn_KI270747v1 0 198735 gneg -chrUn_KI270748v1 0 93321 gneg -chrUn_KI270749v1 0 158759 gneg -chrUn_KI270750v1 0 148850 gneg -chrUn_KI270751v1 0 150742 gneg -chrUn_KI270752v1 0 27745 gneg -chrUn_KI270753v1 0 62944 gneg -chrUn_KI270754v1 0 40191 gneg -chrUn_KI270755v1 0 36723 gneg -chrUn_KI270756v1 0 79590 gneg -chrUn_KI270757v1 0 71251 gneg -chr1_GL383518v1_alt 0 182439 gneg -chr1_GL383519v1_alt 0 110268 gneg -chr1_GL383520v2_alt 0 366580 gneg -chr1_KI270759v1_alt 0 425601 gneg -chr1_KI270760v1_alt 0 109528 gneg -chr1_KI270761v1_alt 0 165834 gneg -chr1_KI270762v1_alt 0 354444 gneg -chr1_KI270763v1_alt 0 911658 gneg -chr1_KI270764v1_alt 0 50258 gneg -chr1_KI270765v1_alt 0 185285 gneg -chr1_KI270766v1_alt 0 256271 gneg -chr1_KI270892v1_alt 0 162212 gneg -chr2_GL383521v1_alt 0 143390 gneg -chr2_GL383522v1_alt 0 123821 gneg -chr2_GL582966v2_alt 0 96131 gneg -chr2_KI270767v1_alt 0 161578 gneg -chr2_KI270768v1_alt 0 110099 gneg -chr2_KI270769v1_alt 0 120616 gneg -chr2_KI270770v1_alt 0 136240 gneg -chr2_KI270771v1_alt 0 110395 gneg -chr2_KI270772v1_alt 0 133041 gneg -chr2_KI270773v1_alt 0 70887 gneg -chr2_KI270774v1_alt 0 223625 gneg -chr2_KI270775v1_alt 0 138019 gneg -chr2_KI270776v1_alt 0 174166 gneg -chr2_KI270893v1_alt 0 161218 gneg -chr2_KI270894v1_alt 0 214158 gneg -chr3_GL383526v1_alt 0 180671 gneg -chr3_JH636055v2_alt 0 173151 gneg -chr3_KI270777v1_alt 0 173649 gneg -chr3_KI270778v1_alt 0 248252 gneg -chr3_KI270779v1_alt 0 205312 gneg -chr3_KI270780v1_alt 0 224108 gneg -chr3_KI270781v1_alt 0 113034 gneg -chr3_KI270782v1_alt 0 162429 gneg -chr3_KI270783v1_alt 0 109187 gneg -chr3_KI270784v1_alt 0 184404 gneg -chr3_KI270895v1_alt 0 162896 gneg -chr3_KI270924v1_alt 0 166540 gneg -chr3_KI270934v1_alt 0 163458 gneg -chr3_KI270935v1_alt 0 197351 gneg -chr3_KI270936v1_alt 0 164170 gneg -chr3_KI270937v1_alt 0 165607 gneg -chr4_GL000257v2_alt 0 586476 gneg -chr4_GL383527v1_alt 0 164536 gneg -chr4_GL383528v1_alt 0 376187 gneg -chr4_KI270785v1_alt 0 119912 gneg -chr4_KI270786v1_alt 0 244096 gneg -chr4_KI270787v1_alt 0 111943 gneg -chr4_KI270788v1_alt 0 158965 gneg -chr4_KI270789v1_alt 0 205944 gneg -chr4_KI270790v1_alt 0 220246 gneg -chr4_KI270896v1_alt 0 378547 gneg -chr4_KI270925v1_alt 0 555799 gneg -chr5_GL339449v2_alt 0 1612928 gneg -chr5_GL383530v1_alt 0 101241 gneg -chr5_GL383531v1_alt 0 173459 gneg -chr5_GL383532v1_alt 0 82728 gneg -chr5_GL949742v1_alt 0 226852 gneg -chr5_KI270791v1_alt 0 195710 gneg -chr5_KI270792v1_alt 0 179043 gneg -chr5_KI270793v1_alt 0 126136 gneg -chr5_KI270794v1_alt 0 164558 gneg -chr5_KI270795v1_alt 0 131892 gneg -chr5_KI270796v1_alt 0 172708 gneg -chr5_KI270897v1_alt 0 1144418 gneg -chr5_KI270898v1_alt 0 130957 gneg -chr6_GL000250v2_alt 0 4672374 gneg -chr6_GL000251v2_alt 0 4795265 gneg -chr6_GL000252v2_alt 0 4604811 gneg -chr6_GL000253v2_alt 0 4677643 gneg -chr6_GL000254v2_alt 0 4827813 gneg -chr6_GL000255v2_alt 0 4606388 gneg -chr6_GL000256v2_alt 0 4929269 gneg -chr6_GL383533v1_alt 0 124736 gneg -chr6_KB021644v2_alt 0 185823 gneg -chr6_KI270758v1_alt 0 76752 gneg -chr6_KI270797v1_alt 0 197536 gneg -chr6_KI270798v1_alt 0 271782 gneg -chr6_KI270799v1_alt 0 152148 gneg -chr6_KI270800v1_alt 0 175808 gneg -chr6_KI270801v1_alt 0 870480 gneg -chr6_KI270802v1_alt 0 75005 gneg -chr7_GL383534v2_alt 0 119183 gneg -chr7_KI270803v1_alt 0 1111570 gneg -chr7_KI270804v1_alt 0 157952 gneg -chr7_KI270805v1_alt 0 209988 gneg -chr7_KI270806v1_alt 0 158166 gneg -chr7_KI270807v1_alt 0 126434 gneg -chr7_KI270808v1_alt 0 271455 gneg -chr7_KI270809v1_alt 0 209586 gneg -chr7_KI270899v1_alt 0 190869 gneg -chr8_KI270810v1_alt 0 374415 gneg -chr8_KI270811v1_alt 0 292436 gneg -chr8_KI270812v1_alt 0 282736 gneg -chr8_KI270813v1_alt 0 300230 gneg -chr8_KI270814v1_alt 0 141812 gneg -chr8_KI270815v1_alt 0 132244 gneg -chr8_KI270816v1_alt 0 305841 gneg -chr8_KI270817v1_alt 0 158983 gneg -chr8_KI270818v1_alt 0 145606 gneg -chr8_KI270819v1_alt 0 133535 gneg -chr8_KI270820v1_alt 0 36640 gneg -chr8_KI270821v1_alt 0 985506 gneg -chr8_KI270822v1_alt 0 624492 gneg -chr8_KI270900v1_alt 0 318687 gneg -chr8_KI270901v1_alt 0 136959 gneg -chr8_KI270926v1_alt 0 229282 gneg -chr9_GL383539v1_alt 0 162988 gneg -chr9_GL383540v1_alt 0 71551 gneg -chr9_GL383541v1_alt 0 171286 gneg -chr9_GL383542v1_alt 0 60032 gneg -chr9_KI270823v1_alt 0 439082 gneg -chrX_KI270880v1_alt 0 284869 gneg -chrX_KI270881v1_alt 0 144206 gneg -chrX_KI270913v1_alt 0 274009 gneg -chr10_GL383545v1_alt 0 179254 gneg -chr10_GL383546v1_alt 0 309802 gneg -chr10_KI270824v1_alt 0 181496 gneg -chr10_KI270825v1_alt 0 188315 gneg -chr11_GL383547v1_alt 0 154407 gneg -chr11_JH159136v1_alt 0 200998 gneg -chr11_JH159137v1_alt 0 191409 gneg -chr11_KI270826v1_alt 0 186169 gneg -chr11_KI270827v1_alt 0 67707 gneg -chr11_KI270829v1_alt 0 204059 gneg -chr11_KI270830v1_alt 0 177092 gneg -chr11_KI270831v1_alt 0 296895 gneg -chr11_KI270832v1_alt 0 210133 gneg -chr11_KI270902v1_alt 0 106711 gneg -chr11_KI270903v1_alt 0 214625 gneg -chr11_KI270927v1_alt 0 218612 gneg -chr12_GL383549v1_alt 0 120804 gneg -chr12_GL383550v2_alt 0 169178 gneg -chr12_GL383551v1_alt 0 184319 gneg -chr12_GL383552v1_alt 0 138655 gneg -chr12_GL383553v2_alt 0 152874 gneg -chr12_GL877875v1_alt 0 167313 gneg -chr12_GL877876v1_alt 0 408271 gneg -chr12_KI270833v1_alt 0 76061 gneg -chr12_KI270834v1_alt 0 119498 gneg -chr12_KI270835v1_alt 0 238139 gneg -chr12_KI270836v1_alt 0 56134 gneg -chr12_KI270837v1_alt 0 40090 gneg -chr12_KI270904v1_alt 0 572349 gneg -chr13_KI270838v1_alt 0 306913 gneg -chr13_KI270839v1_alt 0 180306 gneg -chr13_KI270840v1_alt 0 191684 gneg -chr13_KI270841v1_alt 0 169134 gneg -chr13_KI270842v1_alt 0 37287 gneg -chr13_KI270843v1_alt 0 103832 gneg -chr14_KI270844v1_alt 0 322166 gneg -chr14_KI270845v1_alt 0 180703 gneg -chr14_KI270846v1_alt 0 1351393 gneg -chr14_KI270847v1_alt 0 1511111 gneg -chr15_GL383554v1_alt 0 296527 gneg -chr15_GL383555v2_alt 0 388773 gneg -chr15_KI270848v1_alt 0 327382 gneg -chr15_KI270849v1_alt 0 244917 gneg -chr15_KI270850v1_alt 0 430880 gneg -chr15_KI270851v1_alt 0 263054 gneg -chr15_KI270852v1_alt 0 478999 gneg -chr15_KI270905v1_alt 0 5161414 gneg -chr15_KI270906v1_alt 0 196384 gneg -chr16_GL383556v1_alt 0 192462 gneg -chr16_GL383557v1_alt 0 89672 gneg -chr16_KI270853v1_alt 0 2659700 gneg -chr16_KI270854v1_alt 0 134193 gneg -chr16_KI270855v1_alt 0 232857 gneg -chr16_KI270856v1_alt 0 63982 gneg -chr17_GL000258v2_alt 0 1821992 gneg -chr17_GL383563v3_alt 0 375691 gneg -chr17_GL383564v2_alt 0 133151 gneg -chr17_GL383565v1_alt 0 223995 gneg -chr17_GL383566v1_alt 0 90219 gneg -chr17_JH159146v1_alt 0 278131 gneg -chr17_JH159147v1_alt 0 70345 gneg -chr17_JH159148v1_alt 0 88070 gneg -chr17_KI270857v1_alt 0 2877074 gneg -chr17_KI270858v1_alt 0 235827 gneg -chr17_KI270859v1_alt 0 108763 gneg -chr17_KI270860v1_alt 0 178921 gneg -chr17_KI270861v1_alt 0 196688 gneg -chr17_KI270862v1_alt 0 391357 gneg -chr17_KI270907v1_alt 0 137721 gneg -chr17_KI270908v1_alt 0 1423190 gneg -chr17_KI270909v1_alt 0 325800 gneg -chr17_KI270910v1_alt 0 157099 gneg -chr18_GL383567v1_alt 0 289831 gneg -chr18_GL383568v1_alt 0 104552 gneg -chr18_GL383569v1_alt 0 167950 gneg -chr18_GL383570v1_alt 0 164789 gneg -chr18_GL383571v1_alt 0 198278 gneg -chr18_GL383572v1_alt 0 159547 gneg -chr18_KI270863v1_alt 0 167999 gneg -chr18_KI270864v1_alt 0 111737 gneg -chr18_KI270911v1_alt 0 157710 gneg -chr18_KI270912v1_alt 0 174061 gneg -chr19_GL000209v2_alt 0 177381 gneg -chr19_GL383573v1_alt 0 385657 gneg -chr19_GL383574v1_alt 0 155864 gneg -chr19_GL383575v2_alt 0 170222 gneg -chr19_GL383576v1_alt 0 188024 gneg -chr19_GL949746v1_alt 0 987716 gneg -chr19_GL949747v2_alt 0 729520 gneg -chr19_GL949748v2_alt 0 1064304 gneg -chr19_GL949749v2_alt 0 1091841 gneg -chr19_GL949750v2_alt 0 1066390 gneg -chr19_GL949751v2_alt 0 1002683 gneg -chr19_GL949752v1_alt 0 987100 gneg -chr19_GL949753v2_alt 0 796479 gneg -chr19_KI270865v1_alt 0 52969 gneg -chr19_KI270866v1_alt 0 43156 gneg -chr19_KI270867v1_alt 0 233762 gneg -chr19_KI270868v1_alt 0 61734 gneg -chr19_KI270882v1_alt 0 248807 gneg -chr19_KI270883v1_alt 0 170399 gneg -chr19_KI270884v1_alt 0 157053 gneg -chr19_KI270885v1_alt 0 171027 gneg -chr19_KI270886v1_alt 0 204239 gneg -chr19_KI270887v1_alt 0 209512 gneg -chr19_KI270888v1_alt 0 155532 gneg -chr19_KI270889v1_alt 0 170698 gneg -chr19_KI270890v1_alt 0 184499 gneg -chr19_KI270891v1_alt 0 170680 gneg -chr19_KI270914v1_alt 0 205194 gneg -chr19_KI270915v1_alt 0 170665 gneg -chr19_KI270916v1_alt 0 184516 gneg -chr19_KI270917v1_alt 0 190932 gneg -chr19_KI270918v1_alt 0 123111 gneg -chr19_KI270919v1_alt 0 170701 gneg -chr19_KI270920v1_alt 0 198005 gneg -chr19_KI270921v1_alt 0 282224 gneg -chr19_KI270922v1_alt 0 187935 gneg -chr19_KI270923v1_alt 0 189352 gneg -chr19_KI270929v1_alt 0 186203 gneg -chr19_KI270930v1_alt 0 200773 gneg -chr19_KI270931v1_alt 0 170148 gneg -chr19_KI270932v1_alt 0 215732 gneg -chr19_KI270933v1_alt 0 170537 gneg -chr19_KI270938v1_alt 0 1066800 gneg -chr20_GL383577v2_alt 0 128386 gneg -chr20_KI270869v1_alt 0 118774 gneg -chr20_KI270870v1_alt 0 183433 gneg -chr20_KI270871v1_alt 0 58661 gneg -chr21_GL383578v2_alt 0 63917 gneg -chr21_GL383579v2_alt 0 201197 gneg -chr21_GL383580v2_alt 0 74653 gneg -chr21_GL383581v2_alt 0 116689 gneg -chr21_KI270872v1_alt 0 82692 gneg -chr21_KI270873v1_alt 0 143900 gneg -chr21_KI270874v1_alt 0 166743 gneg -chr22_GL383582v2_alt 0 162811 gneg -chr22_GL383583v2_alt 0 96924 gneg -chr22_KB663609v1_alt 0 74013 gneg -chr22_KI270875v1_alt 0 259914 gneg -chr22_KI270876v1_alt 0 263666 gneg -chr22_KI270877v1_alt 0 101331 gneg -chr22_KI270878v1_alt 0 186262 gneg -chr22_KI270879v1_alt 0 304135 gneg -chr22_KI270928v1_alt 0 176103 gneg -chr1_KI270706v1_random 0 175055 gneg -chr1_KI270707v1_random 0 32032 gneg -chr1_KI270708v1_random 0 127682 gneg -chr1_KI270709v1_random 0 66860 gneg -chr1_KI270710v1_random 0 40176 gneg -chr1_KI270711v1_random 0 42210 gneg -chr1_KI270712v1_random 0 176043 gneg -chr1_KI270713v1_random 0 40745 gneg -chr1_KI270714v1_random 0 41717 gneg -chr2_KI270715v1_random 0 161471 gneg -chr2_KI270716v1_random 0 153799 gneg -chr3_GL000221v1_random 0 155397 gneg -chr4_GL000008v2_random 0 209709 gneg -chr5_GL000208v1_random 0 92689 gneg -chr9_KI270717v1_random 0 40062 gneg -chr9_KI270718v1_random 0 38054 gneg -chr9_KI270719v1_random 0 176845 gneg -chr9_KI270720v1_random 0 39050 gneg -chrY_KI270740v1_random 0 37240 gneg -chr11_KI270721v1_random 0 100316 gneg -chr14_GL000009v2_random 0 201709 gneg -chr14_GL000194v1_random 0 191469 gneg -chr14_GL000225v1_random 0 211173 gneg -chr14_KI270722v1_random 0 194050 gneg -chr14_KI270723v1_random 0 38115 gneg -chr14_KI270724v1_random 0 39555 gneg -chr14_KI270725v1_random 0 172810 gneg -chr14_KI270726v1_random 0 43739 gneg -chr15_KI270727v1_random 0 448248 gneg -chr16_KI270728v1_random 0 1872759 gneg -chr17_GL000205v2_random 0 185591 gneg -chr17_KI270729v1_random 0 280839 gneg -chr17_KI270730v1_random 0 112551 gneg -chr22_KI270731v1_random 0 150754 gneg -chr22_KI270732v1_random 0 41543 gneg -chr22_KI270733v1_random 0 179772 gneg -chr22_KI270734v1_random 0 165050 gneg -chr22_KI270735v1_random 0 42811 gneg -chr22_KI270736v1_random 0 181920 gneg -chr22_KI270737v1_random 0 103838 gneg -chr22_KI270738v1_random 0 99375 gneg -chr22_KI270739v1_random 0 73985 gneg diff --git a/jcvi/utils/data/hg38.chrom.sizes b/jcvi/utils/data/hg38.chrom.sizes deleted file mode 100644 index 39a3ef9a..00000000 --- a/jcvi/utils/data/hg38.chrom.sizes +++ /dev/null @@ -1,455 +0,0 @@ -chr1 248956422 -chr2 242193529 -chr3 198295559 -chr4 190214555 -chr5 181538259 -chr6 170805979 -chr7 159345973 -chrX 156040895 -chr8 145138636 -chr9 138394717 -chr11 135086622 -chr10 133797422 -chr12 133275309 -chr13 114364328 -chr14 107043718 -chr15 101991189 -chr16 90338345 -chr17 83257441 -chr18 80373285 -chr20 64444167 -chr19 58617616 -chrY 57227415 -chr22 50818468 -chr21 46709983 -chr15_KI270905v1_alt 5161414 -chr6_GL000256v2_alt 4929269 -chr6_GL000254v2_alt 4827813 -chr6_GL000251v2_alt 4795265 -chr6_GL000253v2_alt 4677643 -chr6_GL000250v2_alt 4672374 -chr6_GL000255v2_alt 4606388 -chr6_GL000252v2_alt 4604811 -chr17_KI270857v1_alt 2877074 -chr16_KI270853v1_alt 2659700 -chr16_KI270728v1_random 1872759 -chr17_GL000258v2_alt 1821992 -chr5_GL339449v2_alt 1612928 -chr14_KI270847v1_alt 1511111 -chr17_KI270908v1_alt 1423190 -chr14_KI270846v1_alt 1351393 -chr5_KI270897v1_alt 1144418 -chr7_KI270803v1_alt 1111570 -chr19_GL949749v2_alt 1091841 -chr19_KI270938v1_alt 1066800 -chr19_GL949750v2_alt 1066390 -chr19_GL949748v2_alt 1064304 -chr19_GL949751v2_alt 1002683 -chr19_GL949746v1_alt 987716 -chr19_GL949752v1_alt 987100 -chr8_KI270821v1_alt 985506 -chr1_KI270763v1_alt 911658 -chr6_KI270801v1_alt 870480 -chr19_GL949753v2_alt 796479 -chr19_GL949747v2_alt 729520 -chr8_KI270822v1_alt 624492 -chr4_GL000257v2_alt 586476 -chr12_KI270904v1_alt 572349 -chr4_KI270925v1_alt 555799 -chr15_KI270852v1_alt 478999 -chr15_KI270727v1_random 448248 -chr9_KI270823v1_alt 439082 -chr15_KI270850v1_alt 430880 -chr1_KI270759v1_alt 425601 -chr12_GL877876v1_alt 408271 -chrUn_KI270442v1 392061 -chr17_KI270862v1_alt 391357 -chr15_GL383555v2_alt 388773 -chr19_GL383573v1_alt 385657 -chr4_KI270896v1_alt 378547 -chr4_GL383528v1_alt 376187 -chr17_GL383563v3_alt 375691 -chr8_KI270810v1_alt 374415 -chr1_GL383520v2_alt 366580 -chr1_KI270762v1_alt 354444 -chr15_KI270848v1_alt 327382 -chr17_KI270909v1_alt 325800 -chr14_KI270844v1_alt 322166 -chr8_KI270900v1_alt 318687 -chr10_GL383546v1_alt 309802 -chr13_KI270838v1_alt 306913 -chr8_KI270816v1_alt 305841 -chr22_KI270879v1_alt 304135 -chr8_KI270813v1_alt 300230 -chr11_KI270831v1_alt 296895 -chr15_GL383554v1_alt 296527 -chr8_KI270811v1_alt 292436 -chr18_GL383567v1_alt 289831 -chrX_KI270880v1_alt 284869 -chr8_KI270812v1_alt 282736 -chr19_KI270921v1_alt 282224 -chr17_KI270729v1_random 280839 -chr17_JH159146v1_alt 278131 -chrX_KI270913v1_alt 274009 -chr6_KI270798v1_alt 271782 -chr7_KI270808v1_alt 271455 -chr22_KI270876v1_alt 263666 -chr15_KI270851v1_alt 263054 -chr22_KI270875v1_alt 259914 -chr1_KI270766v1_alt 256271 -chr19_KI270882v1_alt 248807 -chr3_KI270778v1_alt 248252 -chr15_KI270849v1_alt 244917 -chr4_KI270786v1_alt 244096 -chr12_KI270835v1_alt 238139 -chr17_KI270858v1_alt 235827 -chr19_KI270867v1_alt 233762 -chr16_KI270855v1_alt 232857 -chr8_KI270926v1_alt 229282 -chr5_GL949742v1_alt 226852 -chr3_KI270780v1_alt 224108 -chr17_GL383565v1_alt 223995 -chr2_KI270774v1_alt 223625 -chr4_KI270790v1_alt 220246 -chr11_KI270927v1_alt 218612 -chr19_KI270932v1_alt 215732 -chr11_KI270903v1_alt 214625 -chr2_KI270894v1_alt 214158 -chr14_GL000225v1_random 211173 -chrUn_KI270743v1 210658 -chr11_KI270832v1_alt 210133 -chr7_KI270805v1_alt 209988 -chr4_GL000008v2_random 209709 -chr7_KI270809v1_alt 209586 -chr19_KI270887v1_alt 209512 -chr4_KI270789v1_alt 205944 -chr3_KI270779v1_alt 205312 -chr19_KI270914v1_alt 205194 -chr19_KI270886v1_alt 204239 -chr11_KI270829v1_alt 204059 -chr14_GL000009v2_random 201709 -chr21_GL383579v2_alt 201197 -chr11_JH159136v1_alt 200998 -chr19_KI270930v1_alt 200773 -chrUn_KI270747v1 198735 -chr18_GL383571v1_alt 198278 -chr19_KI270920v1_alt 198005 -chr6_KI270797v1_alt 197536 -chr3_KI270935v1_alt 197351 -chr17_KI270861v1_alt 196688 -chr15_KI270906v1_alt 196384 -chr5_KI270791v1_alt 195710 -chr14_KI270722v1_random 194050 -chr16_GL383556v1_alt 192462 -chr13_KI270840v1_alt 191684 -chr14_GL000194v1_random 191469 -chr11_JH159137v1_alt 191409 -chr19_KI270917v1_alt 190932 -chr7_KI270899v1_alt 190869 -chr19_KI270923v1_alt 189352 -chr10_KI270825v1_alt 188315 -chr19_GL383576v1_alt 188024 -chr19_KI270922v1_alt 187935 -chrUn_KI270742v1 186739 -chr22_KI270878v1_alt 186262 -chr19_KI270929v1_alt 186203 -chr11_KI270826v1_alt 186169 -chr6_KB021644v2_alt 185823 -chr17_GL000205v2_random 185591 -chr1_KI270765v1_alt 185285 -chr19_KI270916v1_alt 184516 -chr19_KI270890v1_alt 184499 -chr3_KI270784v1_alt 184404 -chr12_GL383551v1_alt 184319 -chr20_KI270870v1_alt 183433 -chrUn_GL000195v1 182896 -chr1_GL383518v1_alt 182439 -chr22_KI270736v1_random 181920 -chr10_KI270824v1_alt 181496 -chr14_KI270845v1_alt 180703 -chr3_GL383526v1_alt 180671 -chr13_KI270839v1_alt 180306 -chr22_KI270733v1_random 179772 -chrUn_GL000224v1 179693 -chr10_GL383545v1_alt 179254 -chrUn_GL000219v1 179198 -chr5_KI270792v1_alt 179043 -chr17_KI270860v1_alt 178921 -chr19_GL000209v2_alt 177381 -chr11_KI270830v1_alt 177092 -chr9_KI270719v1_random 176845 -chrUn_GL000216v2 176608 -chr22_KI270928v1_alt 176103 -chr1_KI270712v1_random 176043 -chr6_KI270800v1_alt 175808 -chr1_KI270706v1_random 175055 -chr2_KI270776v1_alt 174166 -chr18_KI270912v1_alt 174061 -chr3_KI270777v1_alt 173649 -chr5_GL383531v1_alt 173459 -chr3_JH636055v2_alt 173151 -chr14_KI270725v1_random 172810 -chr5_KI270796v1_alt 172708 -chr9_GL383541v1_alt 171286 -chr19_KI270885v1_alt 171027 -chr19_KI270919v1_alt 170701 -chr19_KI270889v1_alt 170698 -chr19_KI270891v1_alt 170680 -chr19_KI270915v1_alt 170665 -chr19_KI270933v1_alt 170537 -chr19_KI270883v1_alt 170399 -chr19_GL383575v2_alt 170222 -chr19_KI270931v1_alt 170148 -chr12_GL383550v2_alt 169178 -chr13_KI270841v1_alt 169134 -chrUn_KI270744v1 168472 -chr18_KI270863v1_alt 167999 -chr18_GL383569v1_alt 167950 -chr12_GL877875v1_alt 167313 -chr21_KI270874v1_alt 166743 -chr3_KI270924v1_alt 166540 -chr1_KI270761v1_alt 165834 -chr3_KI270937v1_alt 165607 -chr22_KI270734v1_random 165050 -chr18_GL383570v1_alt 164789 -chr5_KI270794v1_alt 164558 -chr4_GL383527v1_alt 164536 -chrUn_GL000213v1 164239 -chr3_KI270936v1_alt 164170 -chr3_KI270934v1_alt 163458 -chr9_GL383539v1_alt 162988 -chr3_KI270895v1_alt 162896 -chr22_GL383582v2_alt 162811 -chr3_KI270782v1_alt 162429 -chr1_KI270892v1_alt 162212 -chrUn_GL000220v1 161802 -chr2_KI270767v1_alt 161578 -chr2_KI270715v1_random 161471 -chr2_KI270893v1_alt 161218 -chrUn_GL000218v1 161147 -chr18_GL383572v1_alt 159547 -chr8_KI270817v1_alt 158983 -chr4_KI270788v1_alt 158965 -chrUn_KI270749v1 158759 -chr7_KI270806v1_alt 158166 -chr7_KI270804v1_alt 157952 -chr18_KI270911v1_alt 157710 -chrUn_KI270741v1 157432 -chr17_KI270910v1_alt 157099 -chr19_KI270884v1_alt 157053 -chr19_GL383574v1_alt 155864 -chr19_KI270888v1_alt 155532 -chr3_GL000221v1_random 155397 -chr11_GL383547v1_alt 154407 -chr2_KI270716v1_random 153799 -chr12_GL383553v2_alt 152874 -chr6_KI270799v1_alt 152148 -chr22_KI270731v1_random 150754 -chrUn_KI270751v1 150742 -chrUn_KI270750v1 148850 -chr8_KI270818v1_alt 145606 -chrX_KI270881v1_alt 144206 -chr21_KI270873v1_alt 143900 -chr2_GL383521v1_alt 143390 -chr8_KI270814v1_alt 141812 -chr12_GL383552v1_alt 138655 -chrUn_KI270519v1 138126 -chr2_KI270775v1_alt 138019 -chr17_KI270907v1_alt 137721 -chrUn_GL000214v1 137718 -chr8_KI270901v1_alt 136959 -chr2_KI270770v1_alt 136240 -chr16_KI270854v1_alt 134193 -chr8_KI270819v1_alt 133535 -chr17_GL383564v2_alt 133151 -chr2_KI270772v1_alt 133041 -chr8_KI270815v1_alt 132244 -chr5_KI270795v1_alt 131892 -chr5_KI270898v1_alt 130957 -chr20_GL383577v2_alt 128386 -chr1_KI270708v1_random 127682 -chr7_KI270807v1_alt 126434 -chr5_KI270793v1_alt 126136 -chr6_GL383533v1_alt 124736 -chr2_GL383522v1_alt 123821 -chr19_KI270918v1_alt 123111 -chr12_GL383549v1_alt 120804 -chr2_KI270769v1_alt 120616 -chr4_KI270785v1_alt 119912 -chr12_KI270834v1_alt 119498 -chr7_GL383534v2_alt 119183 -chr20_KI270869v1_alt 118774 -chr21_GL383581v2_alt 116689 -chr3_KI270781v1_alt 113034 -chr17_KI270730v1_random 112551 -chrUn_KI270438v1 112505 -chr4_KI270787v1_alt 111943 -chr18_KI270864v1_alt 111737 -chr2_KI270771v1_alt 110395 -chr1_GL383519v1_alt 110268 -chr2_KI270768v1_alt 110099 -chr1_KI270760v1_alt 109528 -chr3_KI270783v1_alt 109187 -chr17_KI270859v1_alt 108763 -chr11_KI270902v1_alt 106711 -chr18_GL383568v1_alt 104552 -chr22_KI270737v1_random 103838 -chr13_KI270843v1_alt 103832 -chr22_KI270877v1_alt 101331 -chr5_GL383530v1_alt 101241 -chr11_KI270721v1_random 100316 -chr22_KI270738v1_random 99375 -chr22_GL383583v2_alt 96924 -chr2_GL582966v2_alt 96131 -chrUn_KI270748v1 93321 -chrUn_KI270435v1 92983 -chr5_GL000208v1_random 92689 -chrUn_KI270538v1 91309 -chr17_GL383566v1_alt 90219 -chr16_GL383557v1_alt 89672 -chr17_JH159148v1_alt 88070 -chr5_GL383532v1_alt 82728 -chr21_KI270872v1_alt 82692 -chrUn_KI270756v1 79590 -chr6_KI270758v1_alt 76752 -chr12_KI270833v1_alt 76061 -chr6_KI270802v1_alt 75005 -chr21_GL383580v2_alt 74653 -chr22_KB663609v1_alt 74013 -chr22_KI270739v1_random 73985 -chr9_GL383540v1_alt 71551 -chrUn_KI270757v1 71251 -chr2_KI270773v1_alt 70887 -chr17_JH159147v1_alt 70345 -chr11_KI270827v1_alt 67707 -chr1_KI270709v1_random 66860 -chrUn_KI270746v1 66486 -chr16_KI270856v1_alt 63982 -chr21_GL383578v2_alt 63917 -chrUn_KI270753v1 62944 -chr19_KI270868v1_alt 61734 -chr9_GL383542v1_alt 60032 -chr20_KI270871v1_alt 58661 -chr12_KI270836v1_alt 56134 -chr19_KI270865v1_alt 52969 -chr1_KI270764v1_alt 50258 -chrUn_KI270589v1 44474 -chr14_KI270726v1_random 43739 -chr19_KI270866v1_alt 43156 -chr22_KI270735v1_random 42811 -chr1_KI270711v1_random 42210 -chrUn_KI270745v1 41891 -chr1_KI270714v1_random 41717 -chr22_KI270732v1_random 41543 -chr1_KI270713v1_random 40745 -chrUn_KI270754v1 40191 -chr1_KI270710v1_random 40176 -chr12_KI270837v1_alt 40090 -chr9_KI270717v1_random 40062 -chr14_KI270724v1_random 39555 -chr9_KI270720v1_random 39050 -chr14_KI270723v1_random 38115 -chr9_KI270718v1_random 38054 -chrUn_KI270317v1 37690 -chr13_KI270842v1_alt 37287 -chrY_KI270740v1_random 37240 -chrUn_KI270755v1 36723 -chr8_KI270820v1_alt 36640 -chr1_KI270707v1_random 32032 -chrUn_KI270579v1 31033 -chrUn_KI270752v1 27745 -chrUn_KI270512v1 22689 -chrUn_KI270322v1 21476 -chrM 16569 -chrUn_GL000226v1 15008 -chrUn_KI270311v1 12399 -chrUn_KI270366v1 8320 -chrUn_KI270511v1 8127 -chrUn_KI270448v1 7992 -chrUn_KI270521v1 7642 -chrUn_KI270581v1 7046 -chrUn_KI270582v1 6504 -chrUn_KI270515v1 6361 -chrUn_KI270588v1 6158 -chrUn_KI270591v1 5796 -chrUn_KI270522v1 5674 -chrUn_KI270507v1 5353 -chrUn_KI270590v1 4685 -chrUn_KI270584v1 4513 -chrUn_KI270320v1 4416 -chrUn_KI270382v1 4215 -chrUn_KI270468v1 4055 -chrUn_KI270467v1 3920 -chrUn_KI270362v1 3530 -chrUn_KI270517v1 3253 -chrUn_KI270593v1 3041 -chrUn_KI270528v1 2983 -chrUn_KI270587v1 2969 -chrUn_KI270364v1 2855 -chrUn_KI270371v1 2805 -chrUn_KI270333v1 2699 -chrUn_KI270374v1 2656 -chrUn_KI270411v1 2646 -chrUn_KI270414v1 2489 -chrUn_KI270510v1 2415 -chrUn_KI270390v1 2387 -chrUn_KI270375v1 2378 -chrUn_KI270420v1 2321 -chrUn_KI270509v1 2318 -chrUn_KI270315v1 2276 -chrUn_KI270302v1 2274 -chrUn_KI270518v1 2186 -chrUn_KI270530v1 2168 -chrUn_KI270304v1 2165 -chrUn_KI270418v1 2145 -chrUn_KI270424v1 2140 -chrUn_KI270417v1 2043 -chrUn_KI270508v1 1951 -chrUn_KI270303v1 1942 -chrUn_KI270381v1 1930 -chrUn_KI270529v1 1899 -chrUn_KI270425v1 1884 -chrUn_KI270396v1 1880 -chrUn_KI270363v1 1803 -chrUn_KI270386v1 1788 -chrUn_KI270465v1 1774 -chrUn_KI270383v1 1750 -chrUn_KI270384v1 1658 -chrUn_KI270330v1 1652 -chrUn_KI270372v1 1650 -chrUn_KI270548v1 1599 -chrUn_KI270580v1 1553 -chrUn_KI270387v1 1537 -chrUn_KI270391v1 1484 -chrUn_KI270305v1 1472 -chrUn_KI270373v1 1451 -chrUn_KI270422v1 1445 -chrUn_KI270316v1 1444 -chrUn_KI270338v1 1428 -chrUn_KI270340v1 1428 -chrUn_KI270583v1 1400 -chrUn_KI270334v1 1368 -chrUn_KI270429v1 1361 -chrUn_KI270393v1 1308 -chrUn_KI270516v1 1300 -chrUn_KI270389v1 1298 -chrUn_KI270466v1 1233 -chrUn_KI270388v1 1216 -chrUn_KI270544v1 1202 -chrUn_KI270310v1 1201 -chrUn_KI270412v1 1179 -chrUn_KI270395v1 1143 -chrUn_KI270376v1 1136 -chrUn_KI270337v1 1121 -chrUn_KI270335v1 1048 -chrUn_KI270378v1 1048 -chrUn_KI270379v1 1045 -chrUn_KI270329v1 1040 -chrUn_KI270419v1 1029 -chrUn_KI270336v1 1026 -chrUn_KI270312v1 998 -chrUn_KI270539v1 993 -chrUn_KI270385v1 990 -chrUn_KI270423v1 981 -chrUn_KI270392v1 971 -chrUn_KI270394v1 970 diff --git a/jcvi/utils/data/instance.json b/jcvi/utils/data/instance.json deleted file mode 100644 index 32c35876..00000000 --- a/jcvi/utils/data/instance.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "AvailabilityZone": "us-west-2b", - "InstanceId": "", - "LaunchSpec": { - "BlockDeviceMappings": [ - { - "DeviceName": "/dev/sda1", - "Ebs": { - "VolumeSize": 80, - "VolumeType": "gp2" - } - } - ], - "EbsOptimized": true, - "IamInstanceProfile": { - "Arn": "", - "Name": "" - }, - "ImageId": "ami-1bc98663", - "InstanceType": "c4.8xlarge", - "KeyName": "mvrad-pdx-htang", - "Monitoring": { - "Enabled": false - }, - "SecurityGroupIds": [ - "sg-31982956", - "sg-76bd4f11" - ], - "SubnetId": "subnet-123ab865" - }, - "PrivateIpAddress": "", - "Volumes": [ - { - "Device": "/dev/sdf", - "VolumeId": "vol-aad57e1f" - }, - { - "Device": "/dev/sdg", - "VolumeId": "vol-0fee51beb98eee8c5" - } - ] -} diff --git a/jcvi/utils/db.py b/jcvi/utils/db.py deleted file mode 100644 index 6e2e4b94..00000000 --- a/jcvi/utils/db.py +++ /dev/null @@ -1,334 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Connect to databases (Sybase, MySQL and PostgreSQL database backends) -""" -import os.path as op -import re -import sys - -from ..apps.base import ActionDispatcher, OptionParser, getusername, logger, sh -from ..formats.base import must_open -from ..utils.cbook import AutoVivification - - -# set up valid database connection params -valid_dbconn = AutoVivification() -for dbconn, port, module, host in zip( - ("Sybase", "MySQL", "PostgreSQL", "Oracle"), - (2025, 3306, 5432, 1521), - ("Sybase", "MySQLdb", "psycopg2", "cx_Oracle"), - ("SYBPROD", "mysql-lan-dev", "pgsql-lan-dev", "DBNAME.tacc.utexas.edu"), -): - valid_dbconn[dbconn]["port"] = port - valid_dbconn[dbconn]["module"] = module - valid_dbconn[dbconn]["hostname"] = host - - -def db_defaults(connector="Sybase"): - """ - JCVI legacy Sybase, MySQL and PostgreSQL database connection defaults - """ - return valid_dbconn[connector]["hostname"], "access", "access" - - -def get_profile( - sqshrc="~/.sqshrc", connector="Sybase", hostname=None, username=None, password=None -): - """ - get database, username, password from .sqshrc file e.g. - \set username="user" - """ - if connector == "Sybase": - shost, suser, spass = None, None, None - _ = lambda x: x.split("=")[-1].translate(None, "\"'").strip() - sqshrc = op.expanduser(sqshrc) - if op.exists(sqshrc): - for row in open(sqshrc): - row = row.strip() - if not row.startswith("\\set") or "prompt" in row: - continue - if "password" in row: - spass = _(row) - if "hostname" in row: - shost = _(row) - if "username" in row: - suser = _(row) - else: - print("[warning] file `{0}` not found".format(sqshrc), file=sys.stderr) - - if suser and spass: - username, password = suser, spass - if shost: - hostname = shost - - dhost, duser, dpass = db_defaults(connector=connector) - if not password: - username, password = duser, dpass - elif not username: - username = getusername() - - if not hostname: - hostname = dhost - - return hostname, username, password - - -def connect( - dbname, connector="Sybase", hostname=None, username=None, password=None, port=None -): - if None in (hostname, username, password): - hostname, username, password = get_profile( - hostname=hostname, username=username, password=password - ) - if port is None: - port = valid_dbconn[connector]["port"] - - dbconn = __import__(valid_dbconn[connector]["module"]) - if connector == "PostgreSQL": - dsn = "host={0} user={1} password={2} dbname={3} port={4}".format( - hostname, username, password, dbname, port - ) - dbh = dbconn.connect(dsn) - elif connector == "Oracle": - dsn = dbconn.makedsn(hostname, port, dbname) - dbh = dbconn.connect(username, password, dsn) - else: - dbh = dbconn.connect(hostname, username, password, dbname, port) - - cur = dbh.cursor() - return dbh, cur - - -def fetchall(cur, sql, connector=None): - cur.execute(sql) - return cur if connector == "Oracle" else cur.fetchall() - - -def execute(cur, sql): - cur.execute(sql) - - -def commit(dbh): - return dbh.commit() - - -def main(): - - actions = ( - ("libs", "get list of lib_ids to to run by pull"), - ("pull", "pull the sequences from the TIGR database"), - ("query", "run query using input from datafile"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def libs(args): - """ - %prog libs libfile - - Get list of lib_ids to be run by pull(). The SQL commands: - - select library.lib_id, library.name from library join bac on - library.bac_id=bac.id where bac.lib_name="Medicago"; - select seq_name from sequence where seq_name like 'MBE%' - and trash is null; - """ - p = OptionParser(libs.__doc__) - p.set_db_opts(dbname="track", credentials=None) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (libfile,) = args - - sqlcmd = ( - "select library.lib_id, library.name, bac.gb# from library join bac on " - + "library.bac_id=bac.id where bac.lib_name='Medicago'" - ) - cur = connect(opts.dbname) - results = fetchall(cur, sqlcmd) - - fw = open(libfile, "w") - for lib_id, name, gb in results: - name = name.translate(None, "\n") - if not gb: - gb = "None" - - print("|".join((lib_id, name, gb)), file=fw) - fw.close() - - -def pull(args): - """ - %prog pull libfile - - Pull the sequences using the first column in the libfile. - """ - p = OptionParser(pull.__doc__) - p.set_db_opts(dbname="mtg2", credentials=None) - p.add_argument( - "--frag", - default=False, - action="store_true", - help="The command to pull sequences from db", - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (libfile,) = args - - dbname = opts.dbname - frag = opts.frag - fp = open(libfile) - hostname, username, password = get_profile() - - for row in fp: - lib_id, name = row.split("|", 1) - sqlfile = lib_id + ".sql" - - if not op.exists(sqlfile): - fw = open(sqlfile, "w") - print( - "select seq_name from sequence where seq_name like" - + " '{0}%' and trash is null".format(lib_id), - file=fw, - ) - fw.close() - - if frag: - cmd = "pullfrag -D {0} -n {1}.sql -o {1} -q -S {2}".format( - dbname, lib_id, hostname - ) - cmd += " -U {0} -P {1}".format(username, password) - else: - cmd = "pullseq -D {0} -n {1}.sql -o {1} -q".format(dbname, lib_id) - sh(cmd) - - -to_commit_re = re.compile( - "|".join("^{0}".format(x) for x in ("update", "insert", "delete")), re.I -) - - -def to_commit(query): - """ - check if query needs to be committed (only if "update", "insert" or "delete") - """ - if re.search(to_commit_re, query): - return True - return None - - -def query(args): - """ - %prog query "SELECT feat_name FROM asm_feature WHERE feat_type = \\"{0}\\" AND end5 <= \\"{1}\\" AND end3 >= \\"{2}\\"" ::: datafile1 .... - - Script takes the data from tab-delimited datafile(s) and replaces the placeholders - in the query which is then executed. Depending upon the type of query, results are - either printed out (when running `select`) or not (when running `insert`, `update` - or `delete`) - - If the query contains quotes around field values, then these need to be escaped with \\ - """ - p = OptionParser(query.__doc__) - p.set_db_opts() - p.add_argument( - "--dryrun", - default=False, - action="store_true", - help="Don't commit to database. Just print queries", - ) - p.set_sep(help="Specify output field separator") - p.set_verbose(help="Print out all the queries") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) == 0: - sys.exit(not p.print_help()) - - fieldsep = opts.sep - - sep = ":::" - files = None - if sep in args: - sepidx = args.index(sep) - files = args[sepidx + 1 :] - args = args[:sepidx] - if not files: - files = [""] - - qrys = [] - qry = " ".join(args) - if ";" in qry: - for q in qry.split(";"): - if len(q.strip()) > 0: - qrys.append(q) - else: - qrys.append(qry) - - queries = set() - if files: - for datafile in files: - datafile = datafile.strip() - fp = must_open(datafile) - for row in fp: - for qry in qrys: - qry = qry.strip() - m = re.findall(r"{\d+}", qry) - if m: - mi = [int(x.strip("{}")) for x in m] - atoms = row.strip().split("\t") - assert max(mi) <= len( - atoms - ), "Number of columns in `datafile`({0})".format( - len(atoms) - ) + " != number of `placeholders`({0})".format( - len(m) - ) - natoms = [atoms[x] for x in mi] - for idx, (match, atom) in enumerate(zip(m, natoms)): - qry = qry.replace(match, atom) - queries.add(qry) - else: - for qry in qrys: - if re.search(r"{\d+}", qry): - logger.error( - "Query `%s` contains placeholders, no datafile(s) specified", qry - ) - sys.exit() - queries.add(qry) - - if not opts.dryrun: - fw = must_open(opts.outfile, "w") - dbh, cur = connect( - opts.dbname, - connector=opts.dbconn, - hostname=opts.hostname, - username=opts.username, - password=opts.password, - port=opts.port, - ) - cflag = None - for qry in queries: - if opts.dryrun or opts.verbose: - print(qry) - if not opts.dryrun: - if to_commit(qry): - execute(cur, qry) - cflag = True - else: - results = fetchall(cur, qry, connector=opts.dbconn) - for result in results: - print(fieldsep.join([str(x) for x in result]), file=fw) - if not opts.dryrun and cflag: - commit(dbh) - - -if __name__ == "__main__": - main() diff --git a/jcvi/utils/ez_setup.py b/jcvi/utils/ez_setup.py deleted file mode 100644 index 3dfd4d51..00000000 --- a/jcvi/utils/ez_setup.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# -# ez_setup.py -# utils -# -# Created by Haibao Tang on 11/24/20 -# Copyright © 2021 Haibao Tang. All rights reserved. -# - -""" -Identify the best downloading mechanism for a given URL. -Credits: https://pypi.org/project/ez_setup/ -""" - - -import os -import platform -import subprocess - -from urllib.request import urlopen - - -def download_file_powershell(url, target, cookies=None): - """ - Download the file at url to target using Powershell (which will validate - trust). Raise an exception if the command cannot complete. - """ - if cookies: - raise NotImplementedError - target = os.path.abspath(target) - cmd = [ - "powershell", - "-Command", - f"(new-object System.Net.WebClient).DownloadFile({url}, {target})", - ] - subprocess.check_call(cmd) - - -def has_powershell(): - if platform.system() != "Windows": - return False - cmd = ["powershell", "-Command", "echo test"] - devnull = open(os.path.devnull, "wb") - try: - try: - subprocess.check_call(cmd, stdout=devnull, stderr=devnull) - except FileNotFoundError: - return False - finally: - devnull.close() - return True - - -download_file_powershell.viable = has_powershell - - -def download_file_curl(url, target, cookies=None): - cmd = ["curl", url, "--output", target] - # https://github.com/tanghaibao/jcvi/issues/307 - # When downloading Phytozome directory listing, there are multiple redirects - # before we hit the index page. Natually we'd follow the redirects, similar - # to the default behavior of wget - cmd += ["-L"] # follow redirect - if url.startswith("ftp:"): - cmd += ["-P", "-"] - if cookies: - cmd += ["-b", cookies] - subprocess.check_call(cmd) - - -def has_curl(): - cmd = ["curl", "--version"] - devnull = open(os.path.devnull, "wb") - try: - try: - subprocess.check_call(cmd, stdout=devnull, stderr=devnull) - except FileNotFoundError: - return False - finally: - devnull.close() - return True - - -download_file_curl.viable = has_curl - - -def download_file_wget(url, target, cookies=None): - cmd = ["wget", url, "--output-document", target] - cmd += ["--no-check-certificate"] - if url.startswith("ftp:"): - cmd += ["--passive-ftp"] - if cookies: - cmd += ["--load-cookies", cookies] - subprocess.check_call(cmd) - - -def has_wget(): - cmd = ["wget", "--version"] - devnull = open(os.path.devnull, "wb") - try: - try: - subprocess.check_call(cmd, stdout=devnull, stderr=devnull) - except (FileNotFoundError, NotADirectoryError): - return False - except subprocess.CalledProcessError: - return False - finally: - devnull.close() - return True - - -download_file_wget.viable = has_wget - - -def download_file_insecure(url, target, cookies=None): - """ - Use Python to download the file, even though it cannot authenticate the - connection. - """ - if cookies: - raise NotImplementedError - src = dst = None - try: - src = urlopen(url) - # Read/write all in one block, so we don't create a corrupt file - # if the download is interrupted. - data = src.read() - dst = open(target, "wb") - dst.write(data) - finally: - if src: - src.close() - if dst: - dst.close() - - -download_file_insecure.viable = lambda: True - -ALL_DOWNLOADERS = [ - ("wget", download_file_wget), - ("curl", download_file_curl), - ("powershell", download_file_powershell), - ("insecure", download_file_insecure), -] - - -def get_best_downloader(downloader=None): - """Choose among a set of 4 popular downloaders, in the following order: - - wget - - curl - - powershell - - insecure (Python) - - Args: - downloader (str, optional): Use a given downloader. One of wget|curl|powershell|insecure. - Defaults to None. - - Returns: - Download function: The downloader function that accepts as parameters url, target - and cookies. - """ - for dl_name, dl in ALL_DOWNLOADERS: - if downloader and dl_name != downloader: - continue - if dl.viable(): - return dl diff --git a/jcvi/utils/grouper.py b/jcvi/utils/grouper.py deleted file mode 100755 index 0d158aa9..00000000 --- a/jcvi/utils/grouper.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Disjoint set data structure -Author: Michael Droettboom -""" - - -class Grouper(object): - """ - This class provides a lightweight way to group arbitrary objects - together into disjoint sets when a full-blown graph data structure - would be overkill. - - Objects can be joined using .join(), tested for connectedness - using .joined(), and all disjoint sets can be retrieved using list(g) - The objects being joined must be hashable. - - >>> g = Grouper() - >>> g.join('a', 'b') - >>> g.join('b', 'c') - >>> g.join('d', 'e') - >>> list(g) - [['a', 'b', 'c'], ['d', 'e']] - >>> g.joined('a', 'b') - True - >>> g.joined('a', 'c') - True - >>> 'f' in g - False - >>> g.joined('a', 'd') - False - >>> del g['b'] - >>> list(g) - [['a', 'c'], ['d', 'e']] - """ - - def __init__(self, init=[]): - mapping = self._mapping = {} - for x in init: - mapping[x] = [x] - - def join(self, a, *args): - """ - Join given arguments into the same set. Accepts one or more arguments. - """ - mapping = self._mapping - set_a = mapping.setdefault(a, [a]) - - for arg in args: - set_b = mapping.get(arg) - if set_b is None: - set_a.append(arg) - mapping[arg] = set_a - elif set_b is not set_a: - if len(set_b) > len(set_a): - set_a, set_b = set_b, set_a - set_a.extend(set_b) - for elem in set_b: - mapping[elem] = set_a - - def joined(self, a, b): - """ - Returns True if a and b are members of the same set. - """ - mapping = self._mapping - try: - return mapping[a] is mapping[b] - except KeyError: - return False - - def __iter__(self): - """ - Returns an iterator returning each of the disjoint sets as a list. - """ - seen = set() - for elem, group in self._mapping.items(): - if elem not in seen: - yield group - seen.update(group) - - def __getitem__(self, key): - """ - Returns the set that a certain key belongs. - """ - return tuple(self._mapping[key]) - - def __contains__(self, key): - return key in self._mapping - - def __len__(self): - group = set() - for v in self._mapping.values(): - group.update([tuple(v)]) - return len(group) - - def __delitem__(self, key): - group = self._mapping[key] - group.remove(key) - del self._mapping[key] - - @property - def num_members(self): - return sum(len(x) for x in self) - - def keys(self): - return self._mapping.keys() - - -if __name__ == "__main__": - import doctest - - doctest.testmod() diff --git a/jcvi/utils/orderedcollections.py b/jcvi/utils/orderedcollections.py deleted file mode 100644 index 6fcb7386..00000000 --- a/jcvi/utils/orderedcollections.py +++ /dev/null @@ -1,297 +0,0 @@ -# Copyright (c) 2009 Raymond Hettinger -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. - -from bisect import bisect_left, bisect_right -from collections import defaultdict, OrderedDict -from urllib.parse import parse_qsl - -try: - from collections.abc import Callable -except ImportError: - from collections import Callable - - -class DefaultOrderedDict(OrderedDict): - def __init__(self, default_factory=None, *a, **kw): - if default_factory is not None and not isinstance(default_factory, Callable): - raise TypeError("first argument must be callable") - OrderedDict.__init__(self, *a, **kw) - self.default_factory = default_factory - - def __getitem__(self, key): - try: - return OrderedDict.__getitem__(self, key) - except KeyError: - return self.__missing__(key) - - def __missing__(self, key): - if self.default_factory is None: - raise KeyError(key) - self[key] = value = self.default_factory() - return value - - def __reduce__(self): - if self.default_factory is None: - args = tuple() - else: - args = (self.default_factory,) - return type(self), args, None, None, self.items() - - def copy(self): - return self.__copy__() - - def __copy__(self): - return type(self)(self.default_factory, self) - - def __deepcopy__(self, memo): - import copy - - return type(self)(self.default_factory, copy.deepcopy(self.items())) - - def __repr__(self): - return OrderedDict.__repr__(self) - - -def parse_qs(qs, separator=";", keep_attr_order=True): - """ - Kind of like urlparse.parse_qs, except returns an ordered dict. - Also avoids replicating that function's bad habit of overriding the - built-in 'dict' type. - - Taken from below with modification: - - """ - od = DefaultOrderedDict(list) if keep_attr_order else defaultdict(list) - # Python versions earlier than Python 3.9.2 allowed using both ; and & - # as query parameter separator. This has been changed in 3.9.2 to allow - # only a single separator key, with & as the default separator. - try: - for name, value in parse_qsl(qs, separator=separator): - od[name].append(value) - except TypeError: - for name, value in parse_qsl(qs): - od[name].append(value) - - return od - - -""" -Recipe from . -""" - - -class SortedCollection(object): - """Sequence sorted by a key function. - - SortedCollection() is much easier to work with than using bisect() directly. - It supports key functions like those use in sorted(), min(), and max(). - The result of the key function call is saved so that keys can be searched - efficiently. - - Instead of returning an insertion-point which can be hard to interpret, the - five find-methods return a specific item in the sequence. They can scan for - exact matches, the last item less-than-or-equal to a key, or the first item - greater-than-or-equal to a key. - - Once found, an item's ordinal position can be located with the index() method. - New items can be added with the insert() and insert_right() methods. - Old items can be deleted with the remove() method. - - The usual sequence methods are provided to support indexing, slicing, - length lookup, clearing, copying, forward and reverse iteration, contains - checking, item counts, item removal, and a nice looking repr. - - Finding and indexing are O(log n) operations while iteration and insertion - are O(n). The initial sort is O(n log n). - - The key function is stored in the 'key' attibute for easy introspection or - so that you can assign a new key function (triggering an automatic re-sort). - - In short, the class was designed to handle all of the common use cases for - bisect but with a simpler API and support for key functions. - - >>> from pprint import pprint - >>> from operator import itemgetter - - >>> s = SortedCollection(key=itemgetter(2)) - >>> for record in [ - ... ('roger', 'young', 30), - ... ('angela', 'jones', 28), - ... ('bill', 'smith', 22), - ... ('david', 'thomas', 32)]: - ... s.insert(record) - - >>> pprint(list(s)) # show records sorted by age - [('bill', 'smith', 22), - ('angela', 'jones', 28), - ('roger', 'young', 30), - ('david', 'thomas', 32)] - - >>> s.find_le(29) # find oldest person aged 29 or younger - ('angela', 'jones', 28) - >>> s.find_lt(28) # find oldest person under 28 - ('bill', 'smith', 22) - >>> s.find_gt(28) # find youngest person over 28 - ('roger', 'young', 30) - - >>> r = s.find_ge(32) # find youngest person aged 32 or older - >>> s.index(r) # get the index of their record - 3 - >>> s[3] # fetch the record at that index - ('david', 'thomas', 32) - - >>> s.key = itemgetter(0) # now sort by first name - >>> pprint(list(s)) - [('angela', 'jones', 28), - ('bill', 'smith', 22), - ('david', 'thomas', 32), - ('roger', 'young', 30)] - - """ - - def __init__(self, iterable=(), key=None): - self._given_key = key - key = (lambda x: x) if key is None else key - decorated = sorted((key(item), item) for item in iterable) - self._keys = [k for k, item in decorated] - self._items = [item for k, item in decorated] - self._key = key - - def _getkey(self): - return self._key - - def _setkey(self, key): - if key is not self._key: - self.__init__(self._items, key=key) - - def _delkey(self): - self._setkey(None) - - key = property(_getkey, _setkey, _delkey, "key function") - - def clear(self): - self.__init__([], self._key) - - def copy(self): - return self.__class__(self, self._key) - - def __len__(self): - return len(self._items) - - def __getitem__(self, i): - return self._items[i] - - def __iter__(self): - return iter(self._items) - - def __reversed__(self): - return reversed(self._items) - - def __repr__(self): - return "%s(%r, key=%s)" % ( - self.__class__.__name__, - self._items, - getattr(self._given_key, "__name__", repr(self._given_key)), - ) - - def __reduce__(self): - return self.__class__, (self._items, self._given_key) - - def __contains__(self, item): - k = self._key(item) - i = bisect_left(self._keys, k) - j = bisect_right(self._keys, k) - return item in self._items[i:j] - - def index(self, item): - """Find the position of an item. Raise ValueError if not found.""" - k = self._key(item) - i = bisect_left(self._keys, k) - j = bisect_right(self._keys, k) - return self._items[i:j].index(item) + i - - def count(self, item): - """Return number of occurrences of item""" - k = self._key(item) - i = bisect_left(self._keys, k) - j = bisect_right(self._keys, k) - return self._items[i:j].count(item) - - def insert(self, item): - """Insert a new item. If equal keys are found, add to the left""" - k = self._key(item) - i = bisect_left(self._keys, k) - self._keys.insert(i, k) - self._items.insert(i, item) - - def insert_right(self, item): - """Insert a new item. If equal keys are found, add to the right""" - k = self._key(item) - i = bisect_right(self._keys, k) - self._keys.insert(i, k) - self._items.insert(i, item) - - def remove(self, item): - """Remove first occurence of item. Raise ValueError if not found""" - i = self.index(item) - del self._keys[i] - del self._items[i] - - def find(self, item): - """Return first item with a key == item. Raise ValueError if not found.""" - k = self._key(item) - i = bisect_left(self._keys, k) - if i != len(self) and self._keys[i] == k: - return self._items[i] - raise ValueError("No item found with key equal to: %r" % (k,)) - - def find_le(self, item): - """Return last item with a key <= item. Raise ValueError if not found.""" - k = self._key(item) - i = bisect_right(self._keys, k) - if i: - return self._items[i - 1] - raise ValueError("No item found with key at or below: %r" % (k,)) - - def find_lt(self, item): - """Return last item with a key < item. Raise ValueError if not found.""" - k = self._key(item) - i = bisect_left(self._keys, k) - if i: - return self._items[i - 1] - raise ValueError("No item found with key below: %r" % (k,)) - - def find_ge(self, item): - """Return first item with a key >= equal to item. Raise ValueError if not found""" - k = self._key(item) - i = bisect_left(self._keys, k) - if i != len(self): - return self._items[i] - raise ValueError("No item found with key at or above: %r" % (k,)) - - def find_gt(self, item): - """Return first item with a key > item. Raise ValueError if not found""" - k = self._key(item) - i = bisect_right(self._keys, k) - if i != len(self): - return self._items[i] - raise ValueError("No item found with key above: %r" % (k,)) diff --git a/jcvi/utils/range.py b/jcvi/utils/range.py deleted file mode 100644 index d12043e2..00000000 --- a/jcvi/utils/range.py +++ /dev/null @@ -1,529 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -This script implements algorithm for finding intersecting rectangles, -both on the 2D dotplot and 1D-projection - -`range_chain` implements the exon-chain algorithm -""" -import sys - -from collections import namedtuple, defaultdict -from itertools import groupby - -from more_itertools import pairwise - - -LEFT, RIGHT = 0, 1 -Range = namedtuple("Range", "seqid start end score id") - - -def range_parse(s): - """ - >>> range_parse("chr1:1000-1") - Range(seqid='chr1', start=1, end=1000, score=0, id=0) - """ - chr, se = s.split(":") - start, end = se.split("-") - start, end = int(start), int(end) - if start > end: - start, end = end, start - - return Range(chr, start, end, 0, 0) - - -def range_intersect(a, b, extend=0): - """ - Returns the intersection between two reanges. - - >>> range_intersect((30, 45), (55, 65)) - >>> range_intersect((48, 65), (45, 55)) - [48, 55] - """ - a_min, a_max = a - if a_min > a_max: - a_min, a_max = a_max, a_min - b_min, b_max = b - if b_min > b_max: - b_min, b_max = b_max, b_min - - if a_max + extend < b_min or b_max + extend < a_min: - return None - i_min = max(a_min, b_min) - i_max = min(a_max, b_max) - if i_min > i_max + extend: - return None - - return [i_min, i_max] - - -def ranges_intersect(rset): - """ - Recursively calls the range_intersect() - pairwise version. - - >>> ranges_intersect([(48, 65), (45, 55), (50, 56)]) - [50, 55] - """ - if not rset: - return None - - a = rset[0] - for b in rset[1:]: - if not a: - return None - a = range_intersect(a, b) - - return a - - -def range_overlap(a, b, ratio=False): - """ - Returns whether two ranges overlap. Set percentage=True returns overlap - ratio over the shorter range of the two. - - >>> range_overlap(("1", 30, 45), ("1", 41, 55)) - 5 - >>> range_overlap(("1", 21, 45), ("1", 41, 75), ratio=True) - 0.2 - >>> range_overlap(("1", 30, 45), ("1", 15, 55)) - 16 - >>> range_overlap(("1", 30, 45), ("1", 15, 55), ratio=True) - 1.0 - >>> range_overlap(("1", 30, 45), ("1", 57, 68)) - 0 - >>> range_overlap(("1", 30, 45), ("2", 42, 55)) - 0 - >>> range_overlap(("1", 30, 45), ("2", 42, 55), ratio=True) - 0.0 - """ - a_chr, a_min, a_max = a - b_chr, b_min, b_max = b - a_min, a_max = sorted((a_min, a_max)) - b_min, b_max = sorted((b_min, b_max)) - shorter = min((a_max - a_min), (b_max - b_min)) + 1 - # must be on the same chromosome - if a_chr != b_chr: - ov = 0 - else: - ov = min(shorter, (a_max - b_min + 1), (b_max - a_min + 1)) - ov = max(ov, 0) - if ratio: - ov /= float(shorter) - return ov - - -def range_distance(a, b, distmode="ss"): - """ - Returns the distance between two ranges. - - distmode is ss, se, es, ee and sets the place on read one and two to - measure the distance (s = start, e = end) - - >>> range_distance(("1", 30, 45, '+'), ("1", 45, 55, '+')) - (26, '++') - >>> range_distance(("1", 30, 45, '-'), ("1", 57, 68, '-')) - (39, '--') - >>> range_distance(("1", 30, 42, '-'), ("1", 45, 55, '+')) - (26, '-+') - >>> range_distance(("1", 30, 42, '+'), ("1", 45, 55, '-'), distmode='ee') - (2, '+-') - """ - assert distmode in ("ss", "ee") - - a_chr, a_min, a_max, a_strand = a - b_chr, b_min, b_max, b_strand = b - # must be on the same chromosome - if a_chr != b_chr: - dist = -1 - # elif range_overlap(a[:3], b[:3]): - # dist = 0 - else: - # If the two ranges do not overlap, check stranded-ness and distance - if a_min > b_min: - a_min, b_min = b_min, a_min - a_max, b_max = b_max, a_max - a_strand, b_strand = b_strand, a_strand - - if distmode == "ss": - dist = b_max - a_min + 1 - elif distmode == "ee": - dist = b_min - a_max - 1 - - orientation = a_strand + b_strand - - return dist, orientation - - -def range_minmax(ranges): - """ - Returns the span of a collection of ranges where start is the smallest of - all starts, and end is the largest of all ends. - - >>> ranges = [(30, 45), (40, 50), (10, 100)] - >>> range_minmax(ranges) - (10, 100) - """ - rmin = min(ranges)[0] - rmax = max(ranges, key=lambda x: x[1])[1] - return rmin, rmax - - -def range_closest(ranges, b, left=True): - """ - Returns the range that's closest to the given position. Notice that the - behavior is to return ONE closest range to the left end (if left is True). - This is a SLOW method. - - >>> ranges = [("1", 30, 40), ("1", 33, 35), ("1", 10, 20)] - >>> b = ("1", 22, 25) - >>> range_closest(ranges, b) - ('1', 10, 20) - >>> range_closest(ranges, b, left=False) - ('1', 33, 35) - >>> b = ("1", 2, 5) - >>> range_closest(ranges, b) - """ - from jcvi.utils.orderedcollections import SortedCollection - - key = (lambda x: x) if left else (lambda x: (x[0], x[2], x[1])) - rr = SortedCollection(ranges, key=key) - try: - if left: - s = rr.find_le(b) - assert key(s) <= key(b), (s, b) - else: - s = rr.find_ge(b) - assert key(s) >= key(b), (s, b) - except ValueError: - s = None - - return s - - -def range_interleave(ranges, sizes={}, empty=False): - """ - Returns the ranges in between the given ranges. - - >>> ranges = [("1", 30, 40), ("1", 45, 50), ("1", 10, 30)] - >>> range_interleave(ranges) - [('1', 41, 44)] - >>> ranges = [("1", 30, 40), ("1", 42, 50)] - >>> range_interleave(ranges) - [('1', 41, 41)] - >>> range_interleave(ranges, sizes={"1": 70}) - [('1', 1, 29), ('1', 41, 41), ('1', 51, 70)] - """ - ranges = range_merge(ranges) - interleaved_ranges = [] - - for ch, cranges in groupby(ranges, key=lambda x: x[0]): - cranges = list(cranges) - size = sizes.get(ch, None) - if size: - ch, astart, aend = cranges[0] - if astart > 1: - interleaved_ranges.append((ch, 1, astart - 1)) - elif empty: - interleaved_ranges.append(None) - - for a, b in pairwise(cranges): - ch, astart, aend = a - ch, bstart, bend = b - istart, iend = aend + 1, bstart - 1 - if istart <= iend: - interleaved_ranges.append((ch, istart, iend)) - elif empty: - interleaved_ranges.append(None) - - if size: - ch, astart, aend = cranges[-1] - if aend < size: - interleaved_ranges.append((ch, aend + 1, size)) - elif empty: - interleaved_ranges.append(None) - - return interleaved_ranges - - -def range_merge(ranges, dist=0): - """ - Returns merged range. Similar to range_union, except this returns - new ranges. - - >>> ranges = [("1", 30, 45), ("1", 40, 50), ("1", 10, 50)] - >>> range_merge(ranges) - [('1', 10, 50)] - >>> ranges = [("1", 30, 40), ("1", 45, 50)] - >>> range_merge(ranges) - [('1', 30, 40), ('1', 45, 50)] - >>> ranges = [("1", 30, 40), ("1", 45, 50)] - >>> range_merge(ranges, dist=5) - [('1', 30, 50)] - """ - if not ranges: - return [] - - ranges.sort() - - cur_range = list(ranges[0]) - merged_ranges = [] - for r in ranges[1:]: - # open new range if start > cur_end or seqid != cur_seqid - if r[1] - cur_range[2] > dist or r[0] != cur_range[0]: - merged_ranges.append(tuple(cur_range)) - cur_range = list(r) - else: - cur_range[2] = max(cur_range[2], r[2]) - merged_ranges.append(tuple(cur_range)) - - return merged_ranges - - -def range_union(ranges): - """ - Returns total size of ranges, expect range as (chr, left, right) - - >>> ranges = [("1", 30, 45), ("1", 40, 50), ("1", 10, 50)] - >>> range_union(ranges) - 41 - >>> ranges = [("1", 30, 45), ("2", 40, 50)] - >>> range_union(ranges) - 27 - >>> ranges = [("1", 30, 45), ("1", 45, 50)] - >>> range_union(ranges) - 21 - >>> range_union([]) - 0 - """ - if not ranges: - return 0 - - ranges.sort() - - total_len = 0 - cur_chr, cur_left, cur_right = ranges[0] # left-most range - for r in ranges: - # open new range if left > cur_right or chr != cur_chr - if r[1] > cur_right or r[0] != cur_chr: - total_len += cur_right - cur_left + 1 - cur_chr, cur_left, cur_right = r - else: - # update cur_right - cur_right = max(r[2], cur_right) - - # the last one - total_len += cur_right - cur_left + 1 - - return total_len - - -def range_span(ranges): - """ - Returns the total span between the left most range to the right most range. - - >>> ranges = [("1", 30, 45), ("1", 40, 50), ("1", 10, 50)] - >>> range_span(ranges) - 41 - >>> ranges = [("1", 30, 45), ("2", 40, 50)] - >>> range_span(ranges) - 27 - >>> ranges = [("1", 30, 45), ("1", 45, 50)] - >>> range_span(ranges) - 21 - >>> range_span([]) - 0 - """ - if not ranges: - return 0 - - ranges.sort() - ans = 0 - for seq, lt in groupby(ranges, key=lambda x: x[0]): - lt = list(lt) - ans += max(max(lt)[1:]) - min(min(lt)[1:]) + 1 - return ans - - -def _make_endpoints(ranges): - assert ranges, "Ranges cannot be empty" - endpoints = [] - - for i, (seqid, start, end, score, id) in enumerate(ranges): - endpoints.append((seqid, start, LEFT, i, score)) - endpoints.append((seqid, end, RIGHT, i, score)) - - return sorted(endpoints) - - -def range_piles(ranges): - """ - Return piles of intervals that overlap. The piles are only interrupted by - regions of zero coverage. - - >>> ranges = [Range("2", 0, 1, 3, 0), Range("2", 1, 4, 3, 1), Range("3", 5, 7, 3, 2)] - >>> list(range_piles(ranges)) - [[0, 1], [2]] - """ - endpoints = _make_endpoints(ranges) - - for seqid, ends in groupby(endpoints, lambda x: x[0]): - active = [] - depth = 0 - for seqid, pos, leftright, i, score in ends: - if leftright == LEFT: - active.append(i) - depth += 1 - else: - depth -= 1 - - if depth == 0 and active: - yield active - active = [] - - -def range_conflict(ranges, depth=1): - """ - Find intervals that are overlapping in 1-dimension. - Return groups of block IDs that are in conflict. - - >>> ranges = [Range("2", 0, 1, 3, 0), Range("2", 1, 4, 3, 1), Range("3", 5, 7, 3, 2)] - >>> list(range_conflict(ranges)) - [(0, 1)] - """ - overlap = set() - active = set() - endpoints = _make_endpoints(ranges) - - for seqid, ends in groupby(endpoints, lambda x: x[0]): - active.clear() - for seqid, pos, leftright, i, score in ends: - if leftright == LEFT: - active.add(i) - else: - active.remove(i) - - if len(active) > depth: - overlap.add(tuple(sorted(active))) - - for ov in overlap: - yield ov - - -def range_chain(ranges): - """ - Take list of weighted intervals, find non-overlapping set with max weight. - We proceed with each end point (sorted by their relative positions). - - The input are a list of ranges of the form (start, stop, score), output is - subset of the non-overlapping ranges that give the highest score, score - - >>> ranges = [Range("1", 0, 9, 22, 0), Range("1", 3, 18, 24, 1), Range("1", 10, 28, 20, 2)] - >>> range_chain(ranges) - ([Range(seqid='1', start=0, end=9, score=22, id=0), Range(seqid='1', start=10, end=28, score=20, id=2)], 42) - >>> ranges = [Range("2", 0, 1, 3, 0), Range("2", 1, 4, 3, 1), Range("3", 5, 7, 3, 2)] - >>> range_chain(ranges) - ([Range(seqid='2', start=0, end=1, score=3, id=0), Range(seqid='3', start=5, end=7, score=3, id=2)], 6) - """ - endpoints = _make_endpoints(ranges) - - # stores the left end index for quick retrieval - left_index = {} - # dynamic programming, each entry [score, from_index, which_chain] - scores = [] - - for i, (seqid, pos, leftright, j, score) in enumerate(endpoints): - - cur_score = [0, -1, -1] if i == 0 else scores[-1][:] - - if leftright == LEFT: - left_index[j] = i - - else: # this is right end of j-th interval - # update if chaining j-th interval gives a better score - left_j = left_index[j] - chain_score = scores[left_j][0] + score - if chain_score > cur_score[0]: - cur_score = [chain_score, left_j, j] - - scores.append(cur_score) - - chains = [] - score, last, chain_id = scores[-1] # start backtracking - while last != -1: - if chain_id != -1: - chains.append(chain_id) - _, last, chain_id = scores[last] - - chains.reverse() - - selected = [ranges[x] for x in chains] - - return selected, score - - -def ranges_depth(ranges, sizes, verbose=True): - """ - Allow triple (seqid, start, end) rather than just tuple (start, end) - """ - ranges.sort() - for seqid, rrs in groupby(ranges, key=lambda x: x[0]): - rrs = [(a, b) for (s, a, b) in rrs] - size = sizes[seqid] - ds, depthdetails = range_depth(rrs, size, verbose=verbose) - depthdetails = [(seqid, s, e, d) for s, e, d in depthdetails] - yield depthdetails - - -def range_depth(ranges, size, verbose=True): - """ - Overlay ranges on [start, end], and summarize the ploidy of the intervals. - """ - from jcvi.utils.cbook import percentage - - # Make endpoints - endpoints = [] - for a, b in ranges: - endpoints.append((a, LEFT)) - endpoints.append((b, RIGHT)) - endpoints.sort() - vstart, vend = min(endpoints)[0], max(endpoints)[0] - - assert 0 <= vstart < size - assert 0 <= vend < size - - depth = 0 - depthstore = defaultdict(int) - depthstore[depth] += vstart - depthdetails = [(0, vstart, depth)] - - for (a, atag), (b, btag) in pairwise(endpoints): - if atag == LEFT: - depth += 1 - elif atag == RIGHT: - depth -= 1 - depthstore[depth] += b - a - depthdetails.append((a, b, depth)) - - assert btag == RIGHT - depth -= 1 - - assert depth == 0 - depthstore[depth] += size - vend - depthdetails.append((vend, size, depth)) - - assert sum(depthstore.values()) == size - if verbose: - for depth, count in sorted(depthstore.items()): - print( - "Depth {0}: {1}".format(depth, percentage(count, size)), file=sys.stderr - ) - - return depthstore, depthdetails - - -if __name__ == "__main__": - - import doctest - - doctest.testmod() diff --git a/jcvi/utils/table.py b/jcvi/utils/table.py deleted file mode 100644 index 957d958e..00000000 --- a/jcvi/utils/table.py +++ /dev/null @@ -1,145 +0,0 @@ -""" -Routines to summarize and report tabular data. -""" - - -def comment_banner(s, width=50): - line = "#" * width - return "\n".join((line, "#", "# " + s.strip(), "#", line)) - - -def banner(header, rows, major="=", minor="-"): - formatted = [header] + rows - rulersize = max(max(len(z) for z in x.splitlines()) for x in formatted) - table_edge = major * rulersize - table_sep = minor * rulersize - rows = "\n".join(rows) - - return "\n".join((table_edge, header, table_sep, rows, table_sep)) - - -def loadtable(header, rows, thousands=True): - """ - Print a tabular output, with horizontal separators - """ - formatted = load_csv(header, rows, sep=" ", thousands=thousands) - header, rows = formatted[0], formatted[1:] - - return banner(header, rows) - - -def tabulate(d, transpose=False, thousands=True, key_fun=None, sep=",", align=True): - """ - d is a dictionary, keyed by tuple(A, B). - Goal is to put A in rows, B in columns, report data in table form. - - >>> d = {(1,'a'):3, (1,'b'):4, (2,'a'):5, (2,'b'):0} - >>> print(tabulate(d)) - =========== - o a b - ----------- - 1 3 4 - 2 5 0 - ----------- - >>> print(tabulate(d, transpose=True)) - =========== - o 1 2 - ----------- - a 3 5 - b 4 0 - ----------- - """ - pairs = d.keys() - rows, cols = zip(*pairs) - if transpose: - rows, cols = cols, rows - - rows = sorted(set(rows)) - cols = sorted(set(cols)) - header = ["o"] + list(cols) - table = [] - for r in rows: - combo = [(r, c) for c in cols] - if transpose: - combo = [(c, r) for (r, c) in combo] - data = [d.get(x, "n/a") for x in combo] - data = ["{0:.1f}".format(x) if isinstance(x, float) else x for x in data] - if key_fun: - data = [key_fun(x) for x in data] - table.append([str(r)] + data) - - if not align: - formatted = load_csv(header, table, sep=sep) - return "\n".join(formatted) - - return loadtable(header, table, thousands=thousands) - - -def load_csv(header, contents, sep=",", thousands=False, align=True): - - from jcvi.formats.base import is_number - from jcvi.utils.cbook import thousands as th - - allcontents = [header] + contents if header else contents - cols = len(contents[0]) - for content in allcontents: - assert len(content) == cols - - # Stringify the contents - for i, content in enumerate(allcontents): - if thousands: - content = [int(x) if is_number(x, cast=int) else x for x in content] - content = [ - th(x) if (is_number(x, cast=int) and x >= 1000) else x for x in content - ] - allcontents[i] = [str(x) for x in content] - - colwidths = [max(len(x[i]) for x in allcontents) for i in range(cols)] - sep += " " - formatted_contents = [] - for content in allcontents: - rjusted = ( - [x.rjust(cw) for x, cw in zip(content, colwidths)] if align else content - ) - formatted = sep.join(rjusted) - formatted_contents.append(formatted) - - return formatted_contents - - -def write_csv( - header, - contents, - sep=",", - filename="stdout", - thousands=False, - tee=False, - align=True, - comment=False, -): - """ - Write csv that are aligned with the column headers. - - >>> header = ["x_value", "y_value"] - >>> contents = [(1, 100), (2, 200)] - >>> write_csv(header, contents) - x_value, y_value - 1, 100 - 2, 200 - """ - from jcvi.formats.base import must_open - - formatted = load_csv(header, contents, sep=sep, thousands=thousands, align=align) - if comment: - formatted[0] = "#" + formatted[0][1:] - formatted = "\n".join(formatted) - output = must_open(filename, "w") - print(formatted, file=output) - if tee and filename != "stdout": - print(formatted) - - -if __name__ == "__main__": - import doctest - - doctest.testmod() diff --git a/jcvi/utils/taxonomy.py b/jcvi/utils/taxonomy.py deleted file mode 100644 index dd79fec4..00000000 --- a/jcvi/utils/taxonomy.py +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -r""" -From my blog post: - - -Example: ->>> mylist = [3702, 3649, 3694, 3880] ->>> t = TaxIDTree(mylist) ->>> print t -(((Carica_papaya,Arabidopsis_thaliana)Brassicales,(Medicago_truncatula,Populus_trichocarpa)fabids)rosids); ->>> t.print_tree() - - /-Carica_papaya - - /---| - - | \-Arabidopsis_thaliana - ----- /---| - - | /-Medicago_truncatula - - \---| - - \-Populus_trichocarpa -""" -import sys -import time - -from functools import lru_cache - -from urllib.request import urlopen -from urllib.error import HTTPError, URLError - -from ete3 import Tree - -from ClientForm import ParseResponse -from BeautifulSoup import BeautifulSoup - -from ..apps.base import ActionDispatcher, OptionParser, logger - - -URL = "http://itol.embl.de/other_trees.shtml" - - -class TaxIDTree(object): - def __init__(self, list_of_taxids): - # If only one taxid provided, get full tree with nameExp - # else, get default tree - if isinstance(list_of_taxids, int): # single taxon - list_of_taxids = [list_of_taxids] - form_element_id = "nameExp" - else: - form_element_id = "nameCol" - - # the data to send in - form_data = "\n".join(str(x) for x in list_of_taxids) - - success = False - while not success: - try: - response = urlopen(URL) - success = True - except (URLError, HTTPError, RuntimeError) as e: - logger.error(e) - logger.debug("wait 5 seconds to reconnect...") - time.sleep(5) - - forms = ParseResponse(response, backwards_compat=False) - form = forms[0] - - form["ncbiIDs"] = form_data - page = urlopen(form.click()).read() - soup = BeautifulSoup(page) - - self.newick = "" - for element in soup("textarea"): - - if element["id"] == form_element_id: - self.newick = str(element.contents[0]) - - if self.newick == "": - print(soup) - - def __str__(self): - return self.newick - - def print_tree(self): - t = Tree(self.newick, format=8) - print(t) - - -def get_names(list_of_taxids): - """ - >>> mylist = [3702, 3649, 3694, 3880] - >>> get_names(mylist) - ['Arabidopsis thaliana', 'Carica papaya', 'Populus trichocarpa', 'Medicago truncatula'] - """ - from jcvi.apps.fetch import batch_taxonomy - - list_of_taxids = [str(x) for x in list_of_taxids] - return list(batch_taxonomy(list_of_taxids)) - - -def get_taxids(list_of_names): - """ - >>> mylist = ['Arabidopsis thaliana', 'Carica papaya'] - >>> get_taxids(mylist) - [1, 2] - """ - from jcvi.apps.fetch import batch_taxids - - return [int(x) for x in batch_taxids(list_of_names)] - - -def MRCA(list_of_taxids): - """ - This gets the most recent common ancester (MRCA) for a list of taxids - - >>> mylist = [3702, 3649, 3694, 3880] - >>> MRCA(mylist) - 'rosids' - """ - - t = TaxIDTree(list_of_taxids) - t = Tree(str(t), format=8) - - ancestor = t.get_common_ancestor(*t.get_leaves()) - - return ancestor.name - - -@lru_cache(maxsize=None) -def isPlantOrigin(taxid): - """ - Given a taxid, this gets the expanded tree which can then be checked to - see if the organism is a plant or not - - >>> isPlantOrigin(29760) - True - """ - - assert isinstance(taxid, int) - - t = TaxIDTree(taxid) - try: - return "Viridiplantae" in str(t) - except AttributeError: - raise ValueError("{0} is not a valid ID".format(taxid)) - - -def main(): - - actions = ( - ("newick", "query a list of IDs to newick"), - ("test", "test taxonomy module"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def test(args): - print("Testing isPlantOrigin():") - print(3702, isPlantOrigin(3702)) # Arabidopsis thaliana - print(10090, isPlantOrigin(10090)) # Mus musculus - - print("\nTest cache by 10K calls:") - for i in range(10000): - isPlantOrigin(3702) - isPlantOrigin(10090) - print("done") - - print("\nTest invalid ID:") - print(10099, isPlantOrigin(10099)) # Wrong ID - - -def newick(args): - """ - %prog newick idslist - - Query a list of IDs to retrieve phylogeny. - """ - p = OptionParser(newick.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (idsfile,) = args - mylist = [x.strip() for x in open(idsfile) if x.strip()] - print(get_taxids(mylist)) - - t = TaxIDTree(mylist) - print(t) - - -if __name__ == "__main__": - main() diff --git a/jcvi/utils/validator.py b/jcvi/utils/validator.py deleted file mode 100644 index 6836e891..00000000 --- a/jcvi/utils/validator.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Simple validator to make sure certain values match expectation. -""" - -from typing import Collection, Union, TypeVar - -ComparableType = Union[int, float] -T = TypeVar("T") - - -class ValidationError(Exception): - pass - - -def validate_in_choices(value: T, choices: Collection[T], tag: str = "Value") -> bool: - """ - Validate if certain value is among a collection. - Args: - value: value of interest - choices (Collection): a collection (list, tuple, dict, set etc.) of values - tag (str): the semantic meaning of value to be shown in error - - Returns: - True if validation passes. Raises ValidationError if it fails - """ - if value not in choices: - raise ValidationError(f"{tag} must be one of {choices}, you have: {value}") - return True - - -def validate_in_range( - value: ComparableType, - min_value: ComparableType, - max_value: ComparableType, - tag: str = "Value", -) -> bool: - """ - Validate if certain value is numerically within range. - - Args: - value: value of interest - min_value: minimum expected value - max_value: maximum expected value - tag (str): the semantic meaning of value to be shown in error - - Returns: - True if validation passes. Raises ValidationError if it fails. - """ - if not min_value <= value <= max_value: - raise ValidationError( - f"{tag} must be between [{min_value}, {max_value}], you have: {value}" - ) - return True diff --git a/jcvi/utils/webcolors.py b/jcvi/utils/webcolors.py deleted file mode 100755 index e1c33c41..00000000 --- a/jcvi/utils/webcolors.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# -# webcolors.py -# utils -# -# Created by Haibao Tang on 01/28/20 -# Copyright © 2021 Haibao Tang. All rights reserved. -# -import logging - -import numpy as np - -from skimage.color import rgb2lab, deltaE_cmc -from webcolors._definitions import _CSS3_NAMES_TO_HEX -from webcolors import hex_to_rgb - - -def color_diff(rgb1, rgb2): - """ - Calculate distance between two RGB colors. See discussion: - - http://stackoverflow.com/questions/8863810/python-find-similar-colors-best-way - - - for basic / fast calculations, you can use dE76 but beware of its problems - - for graphics arts use we recommend dE94 and perhaps dE-CMC 2:1 - - for textiles use dE-CMC - """ - rgb1 = np.array(rgb1, dtype="float64").reshape(1, 1, 3) / 255.0 - rgb2 = np.array(rgb2, dtype="float64").reshape(1, 1, 3) / 255.0 - lab1 = rgb2lab(rgb1) - lab2 = rgb2lab(rgb2) - return deltaE_cmc(lab1, lab2, kL=2, kC=1)[0, 0] - - -def closest_color(requested_color): - """ - Find closest color name for the request RGB tuple. - """ - logging.disable(logging.DEBUG) - colors = [] - for name, hex in _CSS3_NAMES_TO_HEX.items(): - diff = color_diff(hex_to_rgb(hex), requested_color) - colors.append((diff, name)) - logging.disable(logging.NOTSET) - _, min_color = min(colors) - - return min_color - - -if __name__ == "__main__": - import doctest - - doctest.testmod() diff --git a/jcvi/variation/__init__.py b/jcvi/variation/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/jcvi/variation/__main__.py b/jcvi/variation/__main__.py deleted file mode 100644 index 175c1d19..00000000 --- a/jcvi/variation/__main__.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Set of scripts relating to variation studies such as imputation, phasing, SNP/CNV analysis, and other supporting routines -""" - -from ..apps.base import dmain - - -if __name__ == "__main__": - dmain(__file__) diff --git a/jcvi/variation/cnv.py b/jcvi/variation/cnv.py deleted file mode 100644 index e2d397be..00000000 --- a/jcvi/variation/cnv.py +++ /dev/null @@ -1,1509 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Helper functions for Copy Number Variations (CNV). -""" -import logging -import os.path as op -import sys - -from collections import Counter, defaultdict -from dataclasses import dataclass -from itertools import groupby -from multiprocessing import Pool -from random import choice - -import numpy as np -import numpy.ma as ma -import pandas as pd -import pysam - -from pybedtools import BedTool, cleanup, set_tempdir - -from ..algorithms.formula import get_kmeans -from ..apps.base import ( - ActionDispatcher, - OptionParser, - getfilesize, - logger, - mkdir, - popen, - sh, -) -from ..apps.grid import MakeManager -from ..utils.aws import glob_s3, push_to_s3, sync_from_s3 -from ..utils.cbook import percentage - -autosomes = [f"chr{x}" for x in range(1, 23)] -sexsomes = ["chrX", "chrY"] -allsomes = autosomes + sexsomes -# See: http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/ -PAR = [("chrX", 10001, 2781479), ("chrX", 155701383, 156030895)] - - -class CopyNumberSegment(object): - def __init__(self, chr, rr, tag, mean_cn, realbins, is_PAR=False): - self.chr = chr - self.rr = rr - self.start = rr[0] * 1000 - self.end = rr[1] * 1000 - self.span = self.end - self.start - self.tag = tag - self.mean_cn = mean_cn - self.realbins = realbins - self.is_PAR = is_PAR - - def __str__(self): - mb = self.rr / 1000.0 - coords = "{}:{}-{}Mb".format(self.chr, format_float(mb[0]), format_float(mb[1])) - if self.is_PAR: - coords += ":PAR" - msg = "[{}] {} CN={} bins={}".format( - self.tag, coords, self.mean_cn, self.realbins - ) - if self.realbins >= 10000: # Mark segments longer than 10K bins ~ 10Mb - msg += "*" - return msg - - @property - def bedline(self): - return "\t".join( - str(x) - for x in (self.chr, self.start, self.end, self.tag, self.span, self.mean_cn) - ) - - -class CopyNumberHMM(object): - def __init__( - self, workdir, betadir="beta", mu=0.003, sigma=10, step=0.1, threshold=0.2 - ): - self.model = self.initialize(mu=mu, sigma=sigma, step=step) - self.workdir = workdir - self.betadir = betadir - if not op.exists(betadir): - sync_from_s3("s3://hli-mv-data-science/htang/ccn/beta", target_dir=betadir) - self.mu = mu - self.sigma = sigma - self.step = step - self.threshold = threshold - - def run(self, samplekey, chrs=allsomes): - if isinstance(chrs, str): - chrs = [chrs] - allevents = [] - for chr in chrs: - X, Z, clen, events = self.run_one(samplekey, chr) - allevents.extend(events) - return allevents - - def run_one(self, samplekey, chr): - cov = np.fromfile( - "{}/{}-cn/{}.{}.cn".format(self.workdir, samplekey, samplekey, chr) - ) - beta = np.fromfile("beta/{}.beta".format(chr)) - std = np.fromfile("beta/{}.std".format(chr)) - # Check if the two arrays have different dimensions - clen, blen = cov.shape[0], beta.shape[0] - tlen = max(clen, blen) - if tlen > clen: - cov = np.array(list(cov) + [np.nan] * (tlen - clen)) - elif tlen > blen: - beta = np.array(list(beta) + [np.nan] * (tlen - blen)) - clen, blen = cov.shape[0], beta.shape[0] - assert clen == blen, "cov ({}) and correction ({}) not same dimension".format( - clen, blen - ) - normalized = cov / beta - fixed = normalized.copy() - fixed[np.where(std > self.threshold)] = np.nan - X = fixed - Z = self.predict(X) - - med_cn = np.median(fixed[np.isfinite(fixed)]) - print(chr, med_cn) - - # Annotate segments - segments = self.annotate_segments(Z) - events = [] - for mean_cn, rr in segments: - ss = fixed[rr[0] : rr[1]] - realbins = np.sum(np.isfinite(ss)) - # Determine whether this is an outlier - segment = self.tag(chr, mean_cn, rr, med_cn, realbins) - if segment: - events.append((mean_cn, rr, segment)) - events.sort(key=lambda x: x[-1].start) - - # Send some debug info to screen - for mean_cn, rr, segment in events: - print(segment) - - return X, Z, clen, events - - def tag(self, chr, mean_cn, rr, med_cn, realbins, base=2): - around_0 = around_value(mean_cn, 0) - around_1 = around_value(mean_cn, 1) - around_2 = around_value(mean_cn, 2) - if realbins <= 1: # Remove singleton bins - return - if chr == "chrX": - start, end = rr - is_PAR = end < 5000 or start > 155000 - if med_cn < 1.25: # Male - # PAR ~ 2, rest ~ 1 - if is_PAR: - base = 2 - if around_2: - return - else: - base = 1 - if around_1: - return - else: - # All ~ 2 - if around_2: - return - elif chr == "chrY": - if med_cn < 0.25: # Female - base = 0 - if around_0: - return - else: - base = 1 - if around_1: - return - else: - if around_2: - return - tag = "DUP" if mean_cn > base else "DEL" - segment = CopyNumberSegment(chr, rr, tag, mean_cn, realbins, is_PAR=False) - return segment - - def initialize(self, mu, sigma, step): - from hmmlearn import hmm - - # Initial population probability - n = int(10 / step) - startprob = 1.0 / n * np.ones(n) - transmat = mu * np.ones((n, n)) - np.fill_diagonal(transmat, 1 - (n - 1) * mu) - - # The means of each component - means = np.arange(0, step * n, step) - means.resize((n, 1, 1)) - # The covariance of each component - covars = sigma * np.ones((n, 1, 1)) - - # Build an HMM instance and set parameters - model = hmm.GaussianHMM(n_components=n, covariance_type="full") - - # Instead of fitting it from the data, we directly set the estimated - # parameters, the means and covariance of the components - model.startprob_ = startprob - model.transmat_ = transmat - model.means_ = means - model.covars_ = covars - return model - - def predict(self, X): - # Handle missing values - X = ma.masked_invalid(X) - mask = X.mask - dX = ma.compressed(X).reshape(-1, 1) - dZ = self.model.predict(dX) - Z = np.array([np.nan for _ in range(X.shape[0])]) - Z[~mask] = dZ - Z = ma.masked_invalid(Z) - - return Z * self.step - - def annotate_segments(self, Z): - """Report the copy number and start-end segment""" - # We need a way to go from compressed idices to original indices - P = Z.copy() - P[~np.isfinite(P)] = -1 - _, mapping = np.unique(np.cumsum(P >= 0), return_index=True) - - dZ = Z.compressed() - uniq, idx = np.unique(dZ, return_inverse=True) - segments = [] - for i, mean_cn in enumerate(uniq): - if not np.isfinite(mean_cn): - continue - for rr in contiguous_regions(idx == i): - segments.append((mean_cn, mapping[rr])) - - return segments - - def plot( - self, samplekey, chrs=allsomes, color=None, dx=None, ymax=8, ms=2, alpha=0.7 - ): - from brewer2mpl import get_map - import matplotlib.pyplot as plt - - props = dict(boxstyle="round", facecolor="wheat", alpha=0.2) - - if isinstance(chrs, str): - chrs = [chrs] - f, axs = plt.subplots(1, len(chrs), sharey=True) - if not isinstance(axs, np.ndarray): - axs = np.array([axs]) - plt.tight_layout() - if color is None: - color = choice(get_map("Set2", "qualitative", 8).mpl_colors) - - for region, ax in zip(chrs, axs): - chr, start, end = parse_region(region) - X, Z, clen, events = self.run_one(samplekey, chr) - ax.plot(X, ".", label="observations", ms=ms, mfc=color, alpha=alpha) - ax.plot(Z, "k.", label="hidden", ms=6) - - if start is None and end is None: - ax.set_xlim(0, clen) - else: - ax.set_xlim(start / 1000, end / 1000) - - ax.set_ylim(0, ymax) - ax.set_xlabel("1Kb bins") - title = "{} {}".format(samplekey.split("_")[1], chr) - if dx: - title += " ({})".format(dx) - ax.set_title(title) - - # The final calls - yy = 0.9 - abnormal = [x for x in events if x[-1]] - if len(abnormal) > 5: - yinterval = 0.02 - size = 10 - else: - yinterval = 0.05 - size = 12 - for mean_cn, rr, event in events: - if mean_cn > ymax: - continue - ax.text(np.mean(rr), mean_cn + 0.2, mean_cn, ha="center", bbox=props) - if event is None: - continue - ax.text( - 0.5, - yy, - str(event).rsplit(" ", 1)[0], - color="r", - ha="center", - transform=ax.transAxes, - size=size, - ) - yy -= yinterval - - axs[0].set_ylabel("Copy number") - - -def parse_region(region): - if ":" not in region: - return region, None, None - - chr, start_end = region.split(":") - start, end = start_end.split("-") - return chr, int(start), int(end) - - -def contiguous_regions(condition): - """Finds contiguous True regions of the boolean array "condition". Returns - a 2D array where the first column is the start index of the region and the - second column is the end index.""" - - # Find the indicies of changes in "condition" - d = np.diff(condition) - (idx,) = d.nonzero() - - # We need to start things after the change in "condition". Therefore, - # we'll shift the index by 1 to the right. - idx += 1 - - if condition[0]: - # If the start of condition is True prepend a 0 - idx = np.r_[0, idx] - - if condition[-1]: - # If the end of condition is True, append the length of the array - idx = np.r_[idx, condition.size] # Edit - - # Reshape the result into two columns - idx.shape = (-1, 2) - return idx - - -def format_float(f): - s = "{:.3f}".format(f) - return s.rstrip("0").rstrip(".") - - -def around_value(s, mu, max_dev=0.25): - return mu - max_dev < s < mu + max_dev - - -def main(): - - actions = ( - ("cib", "convert bam to cib"), - ("coverage", "plot coverage along chromosome"), - ("cn", "correct cib according to GC content"), - ("mergecn", "compile matrix of GC-corrected copy numbers"), - ("hmm", "run cnv segmentation"), - # Gene copy number - ("exonunion", "collapse overlapping exons within the same gene"), - ("gcn", "gene copy number based on Canvas results"), - ("summarycanvas", "count different tags in Canvas vcf"), - # Interact with CCN script - ("batchccn", "run CCN script in batch"), - ("batchcn", "run HMM in batch"), - ("plot", "plot some chromosomes for visual proof"), - # Benchmark, training, etc. - ("sweep", "write a number of commands to sweep parameter space"), - ("compare", "compare cnv output to ground truths"), - # Plots - ("gcdepth", "plot GC content vs depth for genomic bins"), - ("validate", "validate CNV calls by plotting RDR/BAF/CN"), - ("wes_vs_wgs", "plot WES vs WGS CNV calls"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def gcdepth(args): - """ - %prog gcdepth sample_name tag - - Plot GC content vs depth vs genomnic bins. Inputs are mosdepth output: - - NA12878_S1.mosdepth.global.dist.txt - - NA12878_S1.mosdepth.region.dist.txt - - NA12878_S1.regions.bed.gz - - NA12878_S1.regions.bed.gz.csi - - NA12878_S1.regions.gc.bed.gz - - A sample mosdepth.sh script might look like: - ``` - #!/bin/bash - LD_LIBRARY_PATH=mosdepth/htslib/ mosdepth/mosdepth $1 \\ - bams/$1.bam -t 4 -c chr1 -n --by 1000 - - bedtools nuc -fi GRCh38/WholeGenomeFasta/genome.fa \\ - -bed $1.regions.bed.gz \\ - | pigz -c > $1.regions.gc.bed.gz - ``` - """ - import hashlib - from jcvi.algorithms.formula import MAD_interval - from jcvi.graphics.base import latex, plt, savefig, set2 - - p = OptionParser(gcdepth.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - sample_name, tag = args - # The tag is used to add to title, also provide a random (hashed) color - coloridx = int(hashlib.sha256(tag).hexdigest(), 16) % len(set2) - color = set2[coloridx] - - # mosdepth outputs a table that we can use to plot relationship - gcbedgz = sample_name + ".regions.gc.bed.gz" - df = pd.read_csv(gcbedgz, delimiter="\t") - mf = df.loc[:, ("4_usercol", "6_pct_gc")] - mf.columns = ["depth", "gc"] - - # We discard any bins that are gaps - mf = mf[(mf["depth"] > 0.001) | (mf["gc"] > 0.001)] - - # Create GC bins - gcbins = defaultdict(list) - for i, row in mf.iterrows(): - gcp = int(round(row["gc"] * 100)) - gcbins[gcp].append(row["depth"]) - gcd = sorted((k * 0.01, MAD_interval(v)) for (k, v) in gcbins.items()) - gcd_x, gcd_y = zip(*gcd) - m, lo, hi = zip(*gcd_y) - - # Plot - plt.plot( - mf["gc"], - mf["depth"], - ".", - color="lightslategray", - ms=2, - mec="lightslategray", - alpha=0.1, - ) - patch = plt.fill_between( - gcd_x, - lo, - hi, - facecolor=color, - alpha=0.25, - zorder=10, - linewidth=0.0, - label="Median +/- MAD band", - ) - plt.plot(gcd_x, m, "-", color=color, lw=2, zorder=20) - - ax = plt.gca() - ax.legend(handles=[patch], loc="best") - ax.set_xlim(0, 1) - ax.set_ylim(0, 100) - ax.set_title("{} ({})".format(latex(sample_name), tag)) - ax.set_xlabel("GC content") - ax.set_ylabel("Depth") - savefig(sample_name + ".gcdepth.png") - - -def exonunion(args): - """ - %prog exonunion gencode.v26.annotation.exon.bed - - Collapse overlapping exons within the same gene. File - `gencode.v26.annotation.exon.bed` can be generated by: - - $ zcat gencode.v26.annotation.gtf.gz | awk 'OFS="\t" {if ($3=="exon") - {print $1,$4-1,$5,$10,$12,$14,$16,$7}}' | tr -d '";' - """ - p = OptionParser(exonunion.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (gencodebed,) = args - beds = BedTool(gencodebed) - # fields[3] is gene_id; fields[6] is gene_name - for g, gb in groupby(beds, key=lambda x: x.fields[3]): - gb = BedTool(gb) - sys.stdout.write(str(gb.sort().merge(c="4,5,6,7", o=",".join(["first"] * 4)))) - - -def get_gain_loss_summary(vcffile): - """Extract Canvas:GAIN/LOSS/REF/LOH tags""" - from cyvcf2 import VCF - - counter = Counter() - for v in VCF(vcffile): - tag = v.ID.split(":")[1] - counter[tag] += 1 - - return counter - - -def summarycanvas(args): - """ - %prog summarycanvas output.vcf.gz - - Generate tag counts (GAIN/LOSS/REF/LOH) of segments in Canvas output. - """ - p = OptionParser(summarycanvas.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - for vcffile in args: - counter = get_gain_loss_summary(vcffile) - pf = op.basename(vcffile).split(".")[0] - print( - pf - + " " - + " ".join("{}:{}".format(k, v) for k, v in sorted(counter.items())) - ) - - -def parse_segments(vcffile): - """Extract all copy number segments from a CANVAS file - - VCF line looks like: - chr1 788879 Canvas:GAIN:chr1:788880-821005 N 2 q10 - SVTYPE=CNV;END=821005;CNVLEN=32126 RC:BC:CN:MCC 157:4:3:2 - """ - from io import StringIO - from cyvcf2 import VCF - - output = StringIO() - for v in VCF(vcffile): - chrom = v.CHROM - start = v.start - end = v.INFO.get("END") - 1 - (cn,) = v.format("CN")[0] - print("\t".join(str(x) for x in (chrom, start, end, cn)), file=output) - - beds = BedTool(output.getvalue(), from_string=True) - return beds - - -def counter_mean_and_median(counter): - """Calculate the mean and median value of a counter""" - if not counter: - return np.nan, np.nan - - total = sum(v for k, v in counter.items()) - mid = total / 2 - weighted_sum = 0 - items_seen = 0 - median_found = False - for k, v in sorted(counter.items()): - weighted_sum += k * v - items_seen += v - if not median_found and items_seen >= mid: - median = k - median_found = True - mean = weighted_sum * 1.0 / total - return mean, median - - -def counter_format(counter): - """Pretty print a counter so that it appears as: "2:200,3:100,4:20" """ - if not counter: - return "na" - - return ",".join("{}:{}".format(*z) for z in sorted(counter.items())) - - -def gcn(args): - """ - %prog gcn gencode.v26.exonunion.bed data/*.vcf.gz - - Compile gene copy njumber based on CANVAS results. - """ - p = OptionParser(gcn.__doc__) - p.set_cpus() - p.set_tmpdir(tmpdir="tmp") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - exonbed = args[0] - canvasvcfs = args[1:] - tsvfile = opts.outfile - tmpdir = opts.tmpdir - - mkdir(tmpdir) - set_tempdir(tmpdir) - - df = vcf_to_df(canvasvcfs, exonbed, opts.cpus) - for suffix in (".avgcn", ".medcn"): - df_to_tsv(df, tsvfile, suffix) - - -def vcf_to_df_worker(arg): - """Convert CANVAS vcf to a dict, single thread""" - canvasvcf, exonbed, i = arg - logger.debug("Working on job {}: {}".format(i, canvasvcf)) - samplekey = op.basename(canvasvcf).split(".")[0].rsplit("_", 1)[0] - d = {"SampleKey": samplekey} - - exons = BedTool(exonbed) - cn = parse_segments(canvasvcf) - overlaps = exons.intersect(cn, wao=True) - gcn_store = {} - for ov in overlaps: - # Example of ov.fields: - # [u'chr1', u'11868', u'12227', u'ENSG00000223972.5', - # u'ENST00000456328.2', u'transcribed_unprocessed_pseudogene', - # u'DDX11L1', u'.', u'-1', u'-1', u'.', u'0'] - gene_name = "|".join((ov.fields[6], ov.fields[3], ov.fields[5])) - if gene_name not in gcn_store: - gcn_store[gene_name] = defaultdict(int) - - cn = ov.fields[-2] - if cn == ".": - continue - cn = int(cn) - if cn > 10: - cn = 10 - amt = int(ov.fields[-1]) - gcn_store[gene_name][cn] += amt - - for k, v in sorted(gcn_store.items()): - v_mean, v_median = counter_mean_and_median(v) - d[k + ".avgcn"] = v_mean - d[k + ".medcn"] = v_median - cleanup() - return d - - -def vcf_to_df(canvasvcfs, exonbed, cpus): - """Compile a number of vcf files into tsv file for easy manipulation""" - df = pd.DataFrame() - p = Pool(processes=cpus) - results = [] - args = [(x, exonbed, i) for (i, x) in enumerate(canvasvcfs)] - r = p.map_async(vcf_to_df_worker, args, callback=results.append) - r.wait() - - for res in results: - df = df.append(res, ignore_index=True) - return df - - -def df_to_tsv(df, tsvfile, suffix): - """Serialize the dataframe as a tsv""" - tsvfile += suffix - columns = ["SampleKey"] + sorted(x for x in df.columns if x.endswith(suffix)) - tf = df.reindex_axis(columns, axis="columns") - tf.sort_values("SampleKey") - tf.to_csv(tsvfile, sep="\t", index=False, float_format="%.4g", na_rep="na") - print( - "TSV output written to `{}` (# samples={})".format(tsvfile, tf.shape[0]), - file=sys.stderr, - ) - - -def coverage(args): - """ - %prog coverage *.coverage - - Plot coverage along chromosome. The coverage file can be generated with: - $ samtools depth a.bam > a.coverage - - The plot is a simple line plot using matplotlib. - """ - from jcvi.graphics.base import savefig - - p = OptionParser(coverage.__doc__) - opts, args, iopts = p.set_image_options(args, format="png") - - if len(args) != 1: - sys.exit(not p.print_help()) - - (covfile,) = args - df = pd.read_csv(covfile, sep="\t", names=["Ref", "Position", "Depth"]) - - xlabel, ylabel = "Position", "Depth" - df.plot(xlabel, ylabel, color="g") - - image_name = covfile + "." + iopts.format - savefig(image_name) - - -def plot(args): - """ - %prog plot workdir sample chr1,chr2 - - Plot some chromosomes for visual proof. Separate multiple chromosomes with - comma. Must contain folder workdir/sample-cn/. - """ - from jcvi.graphics.base import savefig - - p = OptionParser(plot.__doc__) - opts, args, iopts = p.set_image_options(args, figsize="8x7", format="png") - - if len(args) != 3: - sys.exit(not p.print_help()) - - workdir, sample_key, chrs = args - chrs = chrs.split(",") - hmm = CopyNumberHMM(workdir=workdir) - hmm.plot(sample_key, chrs=chrs) - - image_name = sample_key + "_cn." + iopts.format - savefig(image_name, dpi=iopts.dpi, iopts=iopts) - - -def sweep(args): - """ - %prog sweep workdir 102340_NA12878 - - Write a number of commands to sweep parameter space. - """ - p = OptionParser(sweep.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - workdir, sample_key = args - golden_ratio = (1 + 5**0.5) / 2 - cmd = "python -m jcvi.variation.cnv hmm {} {}".format(workdir, sample_key) - cmd += " --mu {:.5f} --sigma {:.3f} --threshold {:.3f}" - mus = [0.00012 * golden_ratio**x for x in range(10)] - sigmas = [0.0012 * golden_ratio**x for x in range(20)] - thresholds = [0.1 * golden_ratio**x for x in range(10)] - print(mus, file=sys.stderr) - print(sigmas, file=sys.stderr) - print(thresholds, file=sys.stderr) - for mu in mus: - for sigma in sigmas: - for threshold in thresholds: - tcmd = cmd.format(mu, sigma, threshold) - print(tcmd) - - -def compare_worker(arg): - cnvoutput, truths = arg - cmd = "intersectBed -f .5 -F .5" - cmd += " -a {} -b {} | wc -l".format(cnvoutput, truths) - nlines = int(popen(cmd, debug=False).read()) - target_lines = len([x for x in open(cnvoutput)]) - truths_lines = len([x for x in open(truths)]) - precision = nlines * 100.0 / target_lines - recall = nlines * 100.0 / truths_lines - d = "\t".join( - str(x) - for x in ( - cnvoutput, - truths, - nlines, - target_lines, - truths_lines, - precision, - recall, - ) - ) - return d - - -def compare(args): - """ - %prog compare NA12878_array_hg38.bed *.seg - - Compare cnv output to known ground truths. - """ - p = OptionParser(compare.__doc__) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - truths = args[0] - cnvoutputs = args[1:] - cpus = min(len(cnvoutputs), opts.cpus) - p = Pool(processes=cpus) - results = [] - files = [(x, truths) for x in cnvoutputs] - r = p.map_async(compare_worker, files, callback=results.append) - r.wait() - - for res in results: - print("\n".join(res)) - - -def bam_to_cib(arg): - bamfile, seq, samplekey = arg - bam = pysam.AlignmentFile(bamfile, "rb") - name, length = seq["SN"], seq["LN"] - logger.debug("Computing depth for {} (length={})".format(name, length)) - pileup = bam.pileup(name) - a = np.ones(length, dtype=np.int8) * -128 - for x in pileup: - a[x.reference_pos] = min(x.nsegments, 255) - 128 - - cibfile = op.join(samplekey, "{}.{}.cib".format(samplekey, name)) - a.tofile(cibfile) - logger.debug("Depth written to `{}`".format(cibfile)) - - -def cib(args): - """ - %prog cib bamfile samplekey - - Convert BAM to CIB (a binary storage of int8 per base). - """ - p = OptionParser(cib.__doc__) - p.add_argument("--prefix", help="Report seqids with this prefix only") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bamfile, samplekey = args - mkdir(samplekey) - bam = pysam.AlignmentFile(bamfile, "rb") - refs = [x for x in bam.header["SQ"]] - prefix = opts.prefix - if prefix: - refs = [x for x in refs if x["SN"].startswith(prefix)] - - task_args = [] - for r in refs: - task_args.append((bamfile, r, samplekey)) - cpus = min(opts.cpus, len(task_args)) - logger.debug("Use {} cpus".format(cpus)) - - p = Pool(processes=cpus) - for _ in p.imap(bam_to_cib, task_args): - continue - - -def batchcn(args): - """ - %prog batchcn workdir samples.csv - - Run CNV segmentation caller in batch mode. Scans a workdir. - """ - p = OptionParser(batchcn.__doc__) - p.add_argument( - "--upload", - default="s3://hli-mv-data-science/htang/ccn", - help="Upload cn and seg results to s3", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - workdir, samples = args - upload = opts.upload - store = upload + "/{}/*.seg".format(workdir) - computed = [op.basename(x).split(".")[0] for x in glob_s3(store)] - computed = set(computed) - - # Generate a bunch of cn commands - fp = open(samples) - nskipped = ntotal = 0 - cmd = "python -m jcvi.variation.cnv cn --hmm --cleanup {}".format(workdir) - for row in fp: - samplekey, path = row.strip().split(",") - ntotal += 1 - if samplekey in computed: - nskipped += 1 - continue - print(" ".join((cmd, samplekey, path))) - - logger.debug("Skipped: {}".format(percentage(nskipped, ntotal))) - - -def hmm(args): - """ - %prog hmm workdir sample_key - - Run CNV segmentation caller. The workdir must contain a subfolder called - `sample_key-cn` that contains CN for each chromosome. A `beta` directory - that contains scaler for each bin must also be present in the current - directory. - """ - p = OptionParser(hmm.__doc__) - p.add_argument("--mu", default=0.003, type=float, help="Transition probability") - p.add_argument( - "--sigma", - default=0.1, - type=float, - help="Standard deviation of Gaussian emission distribution", - ) - p.add_argument( - "--threshold", - default=1, - type=float, - help="Standard deviation must be < this in the baseline population", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - workdir, sample_key = args - model = CopyNumberHMM( - workdir=workdir, mu=opts.mu, sigma=opts.sigma, threshold=opts.threshold - ) - events = model.run(sample_key) - params = ".mu-{}.sigma-{}.threshold-{}".format(opts.mu, opts.sigma, opts.threshold) - hmmfile = op.join(workdir, sample_key + params + ".seg") - fw = open(hmmfile, "w") - nevents = 0 - for mean_cn, rr, event in events: - if event is None: - continue - print(" ".join((event.bedline, sample_key)), file=fw) - nevents += 1 - fw.close() - logger.debug( - "A total of {} aberrant events written to `{}`".format(nevents, hmmfile) - ) - return hmmfile - - -def batchccn(args): - """ - %prog batchccn test.csv - - Run CCN script in batch. Write makefile. - """ - p = OptionParser(batchccn.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (csvfile,) = args - mm = MakeManager() - pf = op.basename(csvfile).split(".")[0] - mkdir(pf) - - header = next(open(csvfile)) - header = None if header.strip().endswith(".bam") else "infer" - logger.debug("Header={}".format(header)) - df = pd.read_csv(csvfile, header=header) - cmd = "perl /mnt/software/ccn_gcn_hg38_script/ccn_gcn_hg38.pl" - cmd += " -n {} -b {}" - cmd += " -o {} -r hg38".format(pf) - for i, (sample_key, bam) in df.iterrows(): - cmdi = cmd.format(sample_key, bam) - outfile = "{}/{}/{}.ccn".format(pf, sample_key, sample_key) - mm.add(csvfile, outfile, cmdi) - mm.write() - - -def mergecn(args): - """ - %prog mergecn FACE.csv - - Compile matrix of GC-corrected copy numbers. Place a bunch of folders in - csv file. Each folder will be scanned, one chromosomes after another. - """ - p = OptionParser(mergecn.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (csvfile,) = args - samples = [x.replace("-cn", "").strip().strip("/") for x in open(csvfile)] - betadir = "beta" - mkdir(betadir) - for seqid in allsomes: - names = [ - op.join(s + "-cn", "{}.{}.cn".format(op.basename(s), seqid)) - for s in samples - ] - arrays = [np.fromfile(name, dtype=np.float) for name in names] - shapes = [x.shape[0] for x in arrays] - med_shape = np.median(shapes) - arrays = [x for x in arrays if x.shape[0] == med_shape] - ploidy = 2 if seqid not in ("chrY", "chrM") else 1 - if seqid in sexsomes: - chr_med = [np.median([x for x in a if x > 0]) for a in arrays] - chr_med = np.array(chr_med) - idx = get_kmeans(chr_med, k=2) - zero_med = np.median(chr_med[idx == 0]) - one_med = np.median(chr_med[idx == 1]) - logger.debug("K-means with {} c0:{} c1:{}".format(seqid, zero_med, one_med)) - higher_idx = 1 if one_med > zero_med else 0 - # Use the higher mean coverage componen - arrays = np.array(arrays)[idx == higher_idx] - arrays = [[x] for x in arrays] - ar = np.concatenate(arrays) - print(seqid, ar.shape) - rows, columns = ar.shape - beta = [] - std = [] - for j in range(columns): - a = ar[:, j] - beta.append(np.median(a)) - std.append(np.std(a) / np.mean(a)) - beta = np.array(beta) / ploidy - betafile = op.join(betadir, "{}.beta".format(seqid)) - beta.tofile(betafile) - stdfile = op.join(betadir, "{}.std".format(seqid)) - std = np.array(std) - std.tofile(stdfile) - logger.debug("Written to `{}`".format(betafile)) - ar.tofile("{}.bin".format(seqid)) - - -def is_matching_gz(origfile, gzfile): - if not op.exists(origfile): - return False - if not op.exists(gzfile): - return False - return getfilesize(origfile) == getfilesize(gzfile) - - -def load_cib(cibfile, n=1000): - cibgzfile = cibfile + ".gz" - # When we try unzip if cib not found, or cib does not match cibgz - if not op.exists(cibfile) or not is_matching_gz(cibfile, cibgzfile): - if op.exists(cibgzfile): - cibfile = cibgzfile - if cibfile.endswith(".gz"): - sh("pigz -d -k -f {}".format(cibfile)) - cibfile = cibfile.replace(".gz", "") - if not op.exists(cibfile): - return - - cib = np.fromfile(cibfile, dtype=np.int8) + 128 - rm = pd.rolling_mean(cib, n, min_periods=n / 2) - a = rm[n - 1 :: n].copy() - del cib - del rm - return a - - -def build_gc_array(fastafile="/mnt/ref/hg38.upper.fa", gcdir="gc", n=1000): - from pyfasta import Fasta - - f = Fasta(fastafile) - mkdir(gcdir) - for seqid in allsomes: - if seqid not in f: - logger.debug("Seq {} not found. Continue anyway.".format(seqid)) - continue - c = np.array(f[seqid]) - gc = (c == "G") | (c == "C") # If base is GC - rr = ~(c == "N") # If base is real - mgc = pd.rolling_sum(gc, n, min_periods=n / 2)[n - 1 :: n] - mrr = pd.rolling_sum(rr, n, min_periods=n / 2)[n - 1 :: n] - gc_pct = np.rint(mgc * 100 / mrr) - gc_pct = np.asarray(gc_pct, dtype=np.uint8) - arfile = op.join(gcdir, "{}.{}.gc".format(seqid, n)) - gc_pct.tofile(arfile) - print(seqid, gc_pct, arfile, file=sys.stderr) - - -def cn(args): - """ - %prog cn workdir 102340_NA12878 \ - s3://hli-bix-us-west-2/kubernetes/wf-root-test/102340_NA12878/lpierce-ccn_gcn-v2/ - - Download CCN output folder and convert cib to copy number per 1Kb. - """ - p = OptionParser(cn.__doc__) - p.add_argument( - "--binsize", default=1000, type=int, help="Window size along chromosome" - ) - p.add_argument( - "--cleanup", - default=False, - action="store_true", - help="Clean up downloaded s3 folder", - ) - p.add_argument( - "--hmm", - default=False, - action="store_true", - help="Run HMM caller after computing CN", - ) - p.add_argument( - "--upload", - default="s3://hli-mv-data-science/htang/ccn", - help="Upload cn and seg results to s3", - ) - p.add_argument( - "--rebuildgc", help="Rebuild GC directory rather than pulling from S3" - ) - opts, args = p.parse_args(args) - - if len(args) == 2: - workdir, sample_key = args - s3dir = None - elif len(args) == 3: - workdir, sample_key, s3dir = args - else: - sys.exit(not p.print_help()) - - n = opts.binsize - rebuildgc = opts.rebuildgc - mkdir(workdir) - sampledir = op.join(workdir, sample_key) - if s3dir: - sync_from_s3(s3dir, target_dir=sampledir) - - assert op.exists(sampledir), "Directory {} doesn't exist!".format(sampledir) - - cndir = op.join(workdir, sample_key + "-cn") - if op.exists(cndir): - logger.debug("Directory {} exists. Skipped.".format(cndir)) - return - - gcdir = "gc" - if rebuildgc: - build_gc_array(fastafile=rebuildgc, n=n, gcdir=gcdir) - if not op.exists(gcdir): - sync_from_s3("s3://hli-mv-data-science/htang/ccn/gc", target_dir=gcdir) - - # Build GC correction table - gc_bin = defaultdict(list) - gc_med = {} - coverage = [] - - for seqid in allsomes: - gcfile = op.join(gcdir, "{}.{}.gc".format(seqid, n)) - if not op.exists(gcfile): - logger.error("File {} not found. Continue anyway.".format(gcfile)) - continue - gc = np.fromfile(gcfile, dtype=np.uint8) - cibfile = op.join(sampledir, "{}.{}.cib".format(sample_key, seqid)) - cib = load_cib(cibfile) - print(seqid, gc.shape[0], cib.shape[0], file=sys.stderr) - if seqid in autosomes: - for gci, k in zip(gc, cib): - gc_bin[gci].append(k) - coverage.append((seqid, gc, cib)) - - for gci, k in gc_bin.items(): - nonzero_k = [x for x in k if x] - gc_med[gci] = med = np.median(nonzero_k) / 2 - print(gci, len(nonzero_k), med, file=sys.stderr) - - mkdir(cndir) - apply_fun = np.vectorize(gc_med.get) - # Apply the GC correction over coverage - for seqid, gc, cib in coverage: - nitems = cib.shape[0] - beta = apply_fun(gc[:nitems]) - beta_cn = cib / beta - cnfile = op.join(cndir, "{}.{}.cn".format(sample_key, seqid)) - beta_cn.tofile(cnfile) - - # Run HMM caller if asked - segfile = hmm([workdir, sample_key]) if opts.hmm else None - - upload = opts.upload - if upload: - push_to_s3(upload, cndir) - if segfile: - push_to_s3(upload, segfile) - - if opts.cleanup: - from jcvi.apps.base import cleanup - - cleanup(cndir, sampledir) - - -@dataclass -class CNV: - chr: str - start: int - end: int - type: str - name: str - is_pass: bool - cn: int - - -def validate(args): - """ - %prog validate sample.bcc sample.cnv.vcf.gz - - Plot RDR/BAF/CN for validation of CNV calls in `sample.vcf.gz`. - """ - p = OptionParser(validate.__doc__) - p.add_argument( - "--no-rdr-logy", - default=False, - action="store_true", - help="Do not make y-axis of RDR log-scale", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - import holoviews as hv - import hvplot.pandas - - hv.extension("bokeh") - - ( - bccfile, - vcffile, - ) = args - rdr_logy = not opts.no_rdr_logy - df = pd.read_csv(bccfile, sep="\t") - - sample = op.basename(bccfile).split(".", 1)[0] - sizes, xlim = get_hg19_chr_sizes_and_xlim() - b = np.cumsum(sizes["size"]) - a = pd.Series(b[:-1]) - a.index += 1 - sizes["cumsize"] = pd.concat([pd.Series([0]), a]) - jf = pd.merge(df, sizes, how="left", left_on="#chr", right_on="chr") - jf["pos"] = jf["start"] + jf["cumsize"] - model, rfx = get_model_and_dataframe(vcffile, sizes) - - rdr_ylim = (0.5, 4) if rdr_logy else (0, 8) - rdr = jf.hvplot.scatter( - x="pos", - y="rdr", - logy=rdr_logy, - xlim=xlim, - ylim=rdr_ylim, - s=1, - width=1440, - height=240, - c="chr", - title=f"{sample}, Tumor RD/Normal RD (RDR)", - legend=False, - ) - baf = jf.hvplot.scatter( - x="pos", - y="baf", - xlim=xlim, - ylim=(0, 0.5), - s=1, - width=1440, - height=240, - c="chr", - title=f"{sample}, Germline Variant B-Allele Fraction (BAF)", - legend=False, - ) - vaf = jf.hvplot.scatter( - x="pos", - y="tumor_vaf", - xlim=xlim, - ylim=(0, 1), - s=1, - width=1440, - height=180, - c="chr", - title=f"{sample}, Somatic Variant Allele Fraction (VAF)", - legend=False, - ) - comp = get_segments(rfx) - for _, row in sizes.iterrows(): - chr = row["chr"] - cb = row["cumsize"] - vline = hv.VLine(cb).opts(color="lightgray", line_width=1) - ctext1 = hv.Text( - cb, 0.5, chr.replace("chr", ""), halign="left", valign="bottom" - ) - ctext2 = hv.Text(cb, 0, chr.replace("chr", ""), halign="left", valign="bottom") - rdr = rdr * vline * ctext1 - baf = baf * vline * ctext2 - comp = comp * vline - vaf = vaf * vline - model_kv = " ".join(f"{k}={v}" for k, v in model.items()) - comp.opts( - width=1440, - height=240, - xlim=xlim, - ylim=(0, 10), - title=f"{sample}, CNV calls Copy Number (CN) - Red: GAIN, Blue: LOSS, Black: REF, Magenta: CNLOH, Cyan: GAINLOH\n{model_kv}", - ) - cc = (rdr + baf + comp + vaf).cols(1) - htmlfile = f"{sample}.html" - hv.save(cc, htmlfile) - logger.info("Report written to `%s`", htmlfile) - - -def get_segments(rfx: pd.DataFrame): - """ - Return a holoviews object for segments. - """ - import holoviews as hv - - rfx_gain = rfx[(rfx["type"] == "GAIN") & rfx["is_pass"]] - rfx_loss = rfx[(rfx["type"] == "LOSS") & rfx["is_pass"]] - rfx_ref = rfx[(rfx["type"] == "REF") & rfx["is_pass"]] - rfx_cnloh = rfx[(rfx["type"] == "CNLOH") & rfx["is_pass"]] - rfx_gainloh = rfx[(rfx["type"] == "GAINLOH") & rfx["is_pass"]] - rfx_nonpass = rfx[~rfx["is_pass"]] - seg_gain = hv.Segments( - rfx_gain, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] - ) - seg_loss = hv.Segments( - rfx_loss, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] - ) - seg_ref = hv.Segments( - rfx_ref, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] - ) - seg_cnloh = hv.Segments( - rfx_cnloh, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] - ) - seg_gainloh = hv.Segments( - rfx_gainloh, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] - ) - seg_nonpass = hv.Segments( - rfx_nonpass, [hv.Dimension("pos"), hv.Dimension("cn"), "pos_end", "cn"] - ) - seg_gain.opts(color="r", line_width=5, tools=["hover"]) - seg_loss.opts(color="b", line_width=5, tools=["hover"]) - seg_ref.opts(color="k", line_width=5, tools=["hover"]) - seg_cnloh.opts(color="m", line_width=5, tools=["hover"]) - seg_gainloh.opts(color="c", line_width=5, tools=["hover"]) - seg_nonpass.opts(color="lightgray", line_width=5, tools=["hover"]) - comp = seg_gain * seg_ref * seg_loss * seg_cnloh * seg_gainloh * seg_nonpass - return comp - - -def get_model_and_dataframe( - vcffile: str, sizes: pd.DataFrame -) -> tuple[dict, pd.DataFrame]: - """ - Get the model and dataframe from the VCF file. - """ - model = get_purity_and_model(vcffile) - records = get_CNV_records(vcffile) - rf = pd.DataFrame(x.__dict__ for x in records) - rfx = pd.merge(rf, sizes, how="left", left_on="chr", right_on="chr") - rfx["pos"] = rfx["start"] + rfx["cumsize"] - rfx["pos_end"] = rfx["end"] + rfx["cumsize"] - return model, rfx - - -def get_hg19_chr_sizes_and_xlim() -> tuple[pd.DataFrame, tuple[int, int]]: - """ - Get chromosome sizes for hg19 - """ - from io import StringIO - - # hg19 - s = """ - chr size - chr1 249250621 - chr2 243199373 - chr3 198022430 - chr4 191154276 - chr5 180915260 - chr6 171115067 - chr7 159138663 - chr8 146364022 - chr9 141213431 - chr10 135534747 - chr11 135006516 - chr12 133851895 - chr13 115169878 - chr14 107349540 - chr15 102531392 - chr16 90354753 - chr17 81195210 - chr18 78077248 - chr19 59128983 - chr20 63025520 - chr21 48129895 - chr22 51304566 - chrX 155270560 - chrY 59373566""" - sizes = pd.read_csv(StringIO(s), delim_whitespace=True) - return sizes, (0, 2881033286) - - -def get_CNV_records(vcffile: str) -> list[CNV]: - """ - Get CNV records from a VCF file. - """ - from cyvcf2 import VCF - - vcf_reader = VCF(vcffile) - records = [] - for record in vcf_reader: - name = record.ID - dragen, type, chr, start_end = name.split(":") - start, end = [int(x) for x in start_end.split("-")] - is_pass = "PASS" in record.FILTERS - (cn,) = record.format("CN")[0] - record = CNV(chr, start, end, type, name, is_pass, cn) - records.append(record) - logger.info("A total of %d records imported", len(records)) - return records - - -def get_purity_and_model(vcffile: str) -> dict[str, str]: - """ - Get purity and model from VCF header. - """ - model = { - "ModelSource": None, - "EstimatedTumorPurity": None, - # "DiploidCoverage": None, - "OverallPloidy": None, - } - import gzip - - for row in gzip.open(vcffile): - row = row.decode("utf-8") - if not row.startswith("##"): - continue - a, b = row[2:].split("=", 1) - if a in model: - model[a] = b - return model - - -def wes_vs_wgs(args): - """ - %prog wes_vs_wgs sample.bcc sample.wes.cnv.vcf.gz sample.wgs.cnv.vcf.gz - - Compare WES and WGS CNVs. - """ - p = OptionParser(wes_vs_wgs.__doc__) - p.add_argument( - "--no-rdr-logy", - default=False, - action="store_true", - help="Do not make y-axis of RDR log-scale", - ) - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - import holoviews as hv - import hvplot.pandas - - hv.extension("bokeh") - - bccfile, wesfile, wgsfile = args - df = pd.read_csv(bccfile, sep="\t") - rdr_logy = not opts.no_rdr_logy - - sample = op.basename(bccfile).split(".", 1)[0] - sizes, xlim = get_hg19_chr_sizes_and_xlim() - b = np.cumsum(sizes["size"]) - a = pd.Series(b[:-1]) - a.index += 1 - sizes["cumsize"] = pd.concat([pd.Series([0]), a]) - jf = pd.merge(df, sizes, how="left", left_on="#chr", right_on="chr") - jf["pos"] = jf["start"] + jf["cumsize"] - - wes_model, wes_rfx = get_model_and_dataframe(wesfile, sizes) - wgs_model, wgs_rfx = get_model_and_dataframe(wgsfile, sizes) - - rdr_ylim = (0.5, 4) if rdr_logy else (0, 8) - rdr = jf.hvplot.scatter( - x="pos", - y="rdr", - logy=rdr_logy, - xlim=xlim, - ylim=rdr_ylim, - s=1, - width=1440, - height=240, - c="chr", - title=f"{sample}, Tumor RD/Normal RD (RDR)", - legend=False, - ylabel="Read depth ratio", - ) - wes_model = " ".join(f"{k}={v}" for k, v in wes_model.items()) - wes_comp = get_segments(wes_rfx) - wgs_model = " ".join(f"{k}={v}" for k, v in wgs_model.items()) - wgs_comp = get_segments(wgs_rfx) - for _, row in sizes.iterrows(): - chr = row["chr"] - cb = row["cumsize"] - vline = hv.VLine(cb).opts(color="lightgray", line_width=1) - ctext1 = hv.Text( - cb, 0.5, chr.replace("chr", ""), halign="left", valign="bottom" - ) - rdr = rdr * vline * ctext1 - wes_comp = wes_comp * vline - wgs_comp = wgs_comp * vline - cc = (rdr + wes_comp + wgs_comp).cols(1) - for label, c, model_kv in zip( - ("WES", "WGS"), (wes_comp, wgs_comp), (wes_model, wgs_model) - ): - c.opts( - width=1440, - height=240, - xlim=xlim, - ylim=(0, 10), - title=f"{label} {sample}, CNV calls Copy Number (CN) - Red: GAIN, Blue: LOSS, Black: REF, Magenta: CNLOH, Cyan: GAINLOH, Gray: NON-PASS\n{model_kv}", - ) - htmlfile = f"{sample}.html" - hv.save(cc, htmlfile) - logger.info("Report written to `%s`", htmlfile) - - -if __name__ == "__main__": - main() diff --git a/jcvi/variation/deconvolute.py b/jcvi/variation/deconvolute.py deleted file mode 100644 index c525c8a9..00000000 --- a/jcvi/variation/deconvolute.py +++ /dev/null @@ -1,258 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Deconvolute fastq files according to barcodes. -""" -import os.path as op -import sys - -from collections import namedtuple -from itertools import product, groupby, islice -from multiprocessing import Pool - -from Bio.Data.IUPACData import ambiguous_dna_values -from Bio.SeqIO.QualityIO import FastqGeneralIterator - -from ..apps.base import ActionDispatcher, OptionParser, flatten, glob, logger, mkdir -from ..formats.base import FileMerger, must_open -from ..formats.fastq import FastqPairedIterator - - -def main(): - - actions = ( - ("split", "split fastqfile into subsets"), - ("merge", "consolidate split contents"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -BarcodeLine = namedtuple("BarcodeLine", ["id", "seq"]) - - -def unpack_ambiguous(s): - """ - List sequences with ambiguous characters in all possibilities. - """ - sd = [ambiguous_dna_values[x] for x in s] - return ["".join(x) for x in list(product(*sd))] - - -def is_barcode_sample(seq, barcode, excludebarcode, trim): - if seq[:trim] != barcode.seq: - return False - hasexclude = any(seq.startswith(x.seq) for x in excludebarcode) - if hasexclude: - return False - return True - - -def split_barcode_paired(t): - - barcode, excludebarcode, outdir, inputfile = t - trim = len(barcode.seq) - outfastq = op.join(outdir, "{0}.{1}.fastq".format(barcode.id, barcode.seq)) - - r1, r2 = inputfile - p1fp, p2fp = FastqPairedIterator(r1, r2) - fw = open(outfastq, "w") - while True: - a = list(islice(p1fp, 4)) - if not a: - break - - b = list(islice(p2fp, 4)) - title, seq, plus, qual = a - title, seq, qual = title.strip(), seq.strip(), qual.strip() - if not is_barcode_sample(seq, barcode, excludebarcode, trim): - continue - - print("{0}\n{1}\n+\n{2}".format(title, seq[trim:], qual[trim:]), file=fw) - fw.writelines(b) - - fw.close() - - -def append_barcode_paired(t): - - barcode, excludebarcode, outdir, inputfile = t - bs = barcode.seq - trim = len(bs) - fake_qual = len(bs) * "#" - outfastq = op.join(outdir, "{0}.{1}.fastq".format(barcode.id, barcode.seq)) - - r1, r2 = inputfile - p1fp, p2fp = FastqPairedIterator(r1, r2) - fw = open(outfastq, "w") - while True: - a = list(islice(p1fp, 4)) - if not a: - break - - title, seq, plus, qual = a - seq = seq.strip() - if not is_barcode_sample(seq, barcode, excludebarcode, trim): - continue - - fw.writelines(a) - - title, seq, plus, qual = list(islice(p2fp, 4)) - title, seq, qual = title.strip(), seq.strip(), qual.strip() - # append barcode - seq = bs + seq - qual = fake_qual + qual - print("{0}\n{1}\n+\n{2}".format(title, seq, qual), file=fw) - - fw.close() - - -def split_barcode(t): - - barcode, excludebarcode, outdir, inputfile = t - trim = len(barcode.seq) - outfastq = op.join(outdir, "{0}.{1}.fastq".format(barcode.id, barcode.seq)) - - fp = must_open(inputfile) - fw = open(outfastq, "w") - for title, seq, qual in FastqGeneralIterator(fp): - if not is_barcode_sample(seq, barcode, excludebarcode, trim): - continue - print("@{0}\n{1}\n+\n{2}".format(title, seq[trim:], qual[trim:]), file=fw) - - fw.close() - - -def split(args): - """ - %prog split barcodefile fastqfile1 .. - - Deconvolute fastq files into subsets of fastq reads, based on the barcodes - in the barcodefile, which is a two-column file like: - ID01 AGTCCAG - - Input fastqfiles can be several files. Output files are ID01.fastq, - ID02.fastq, one file per line in barcodefile. - - When --paired is set, the number of input fastqfiles must be two. Output - file (the deconvoluted reads) will be in interleaved format. - """ - p = OptionParser(split.__doc__) - p.set_outdir(outdir="deconv") - p.add_argument( - "--nocheckprefix", - default=False, - action="store_true", - help="Don't check shared prefix", - ) - p.add_argument( - "--paired", - default=False, - action="store_true", - help="Paired-end data", - ) - p.add_argument( - "--append", - default=False, - action="store_true", - help="Append barcode to 2nd read", - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - barcodefile = args[0] - fastqfile = args[1:] - paired = opts.paired - append = opts.append - if append: - assert paired, "--append only works with --paired" - - nfiles = len(fastqfile) - - barcodes = [] - fp = open(barcodefile) - for row in fp: - id, seq = row.split() - for s in unpack_ambiguous(seq): - barcodes.append(BarcodeLine._make((id, s))) - - nbc = len(barcodes) - logger.debug("Imported {0} barcodes (ambiguous codes expanded).".format(nbc)) - checkprefix = not opts.nocheckprefix - - if checkprefix: - # Sanity check of shared prefix - excludebarcodes = [] - for bc in barcodes: - exclude = [] - for s in barcodes: - if bc.id == s.id: - continue - - assert bc.seq != s.seq - if s.seq.startswith(bc.seq) and len(s.seq) > len(bc.seq): - logger.error("{0} shares same prefix as {1}.".format(s, bc)) - exclude.append(s) - excludebarcodes.append(exclude) - else: - excludebarcodes = nbc * [[]] - - outdir = opts.outdir - mkdir(outdir) - - cpus = opts.cpus - logger.debug("Create a pool of {0} workers.".format(cpus)) - pool = Pool(cpus) - - if paired: - assert nfiles == 2, "You asked for --paired, but sent in {0} files".format( - nfiles - ) - split_fun = append_barcode_paired if append else split_barcode_paired - mode = "paired" - else: - split_fun = split_barcode - mode = "single" - - logger.debug("Mode: {0}".format(mode)) - - pool.map( - split_fun, zip(barcodes, excludebarcodes, nbc * [outdir], nbc * [fastqfile]) - ) - - -def merge(args): - """ - %prog merge folder1 ... - - Consolidate split contents in the folders. The folders can be generated by - the split() process and several samples may be in separate fastq files. This - program merges them. - """ - p = OptionParser(merge.__doc__) - p.set_outdir(outdir="outdir") - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - folders = args - outdir = opts.outdir - mkdir(outdir) - - files = flatten(glob("{0}/*.*.fastq".format(x)) for x in folders) - files = list(files) - key = lambda x: op.basename(x).split(".")[0] - files.sort(key=key) - for id, fns in groupby(files, key=key): - fns = list(fns) - outfile = op.join(outdir, "{0}.fastq".format(id)) - FileMerger(fns, outfile=outfile).merge(checkexists=True) - - -if __name__ == "__main__": - main() diff --git a/jcvi/variation/delly.py b/jcvi/variation/delly.py deleted file mode 100644 index a8f1b5e0..00000000 --- a/jcvi/variation/delly.py +++ /dev/null @@ -1,343 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Convert delly output to BED format. -""" - -import os.path as op -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh -from ..formats.base import BaseFile, read_until, must_open -from ..formats.sam import coverage -from ..utils.aws import ls_s3, push_to_s3 -from ..utils.cbook import percentage - - -class DelLine(object): - def __init__(self, line): - args = line.strip().split("\t") - self.seqid = args[0] - self.start = int(args[1]) + 1 - self.end = int(args[2]) - self.size = int(args[3]) - assert self.size == self.end - self.start + 1 - self.supporting_pairs = int(args[4]) - self.avg_mapping_quality = float(args[5]) - self.accn = args[6] - - @property - def bedline(self): - return "\t".join( - str(x) - for x in ( - self.seqid, - self.start - 1, - self.end, - self.accn, - self.supporting_pairs, - "+", - ) - ) - - -class Delly(BaseFile): - def __init__(self, filename): - super().__init__(filename) - - def __iter__(self): - fp = must_open(self.filename) - while True: - read_until(fp, "-----") - nextline = fp.readline() - nextline = fp.readline() - if not nextline.strip(): - break - d = DelLine(nextline) - yield d - - def write_bed(self, bedfile="stdout"): - fw = must_open(bedfile, "w") - for d in self: - print(d.bedline, file=fw) - logger.debug("File written to `%s`.", bedfile) - - -def main(): - - actions = ( - ("bed", "Convert del.txt to del.bed"), - ("mito", "Find mito deletions in BAM"), - ("mitosomatic", "Find mito mosaic somatic mutations in piledriver results"), - ("mitocompile", "Compile mito deletions from multiple VCF files"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def mitosomatic(args): - """ - %prog mitosomatic t.piledriver - - Find mito mosaic somatic mutations in piledriver results. - """ - import pandas as pd - - p = OptionParser(mitosomatic.__doc__) - p.add_argument("--minaf", default=0.005, type=float, help="Minimum allele fraction") - p.add_argument("--maxaf", default=0.1, type=float, help="Maximum allele fraction") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (df,) = args - af_file = df.rsplit(".", 1)[0] + ".af" - fw = open(af_file, "w") - df = pd.read_csv(df, sep="\t") - for i, row in df.iterrows(): - na = row["num_A"] - nt = row["num_T"] - nc = row["num_C"] - ng = row["num_G"] - nd = row["num_D"] - ni = row["num_I"] - depth = row["depth"] - # major, minor = sorted([na, nt, nc, ng], reverse=True)[:2] - # af = minor * 1. / (major + minor) - af = (nd + ni) * 1.0 / depth - if not (opts.minaf <= af <= opts.maxaf): - continue - print("{}\t{}\t{:.6f}".format(row["chrom"], row["start"], af), file=fw) - fw.close() - - logger.debug("Allele freq written to `{}`".format(af_file)) - - -def bed(args): - """ - %prog bed del.txt - - Convert `del.txt` to BED format. DELLY manual here: - - - Deletion: - chr, start, end, size, #supporting_pairs, avg._mapping_quality, deletion_id - chr1, 10180, 10509, 329, 75, 15.8667, Deletion_Sample_00000000 - """ - p = OptionParser(bed.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (delt,) = args - dt = Delly(delt) - dt.write_bed("del.bed") - - -def mitocompile(args): - """ - %prog mitcompile *.vcf.gz - - Extract information about deletions in vcf file. - """ - from urllib.parse import parse_qsl - from jcvi.formats.vcf import VcfLine - - p = OptionParser(mitocompile.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - vcfs = args - print("\t".join("vcf samplekey depth seqid pos alt svlen pe sr".split())) - for i, vcf in enumerate(vcfs): - if (i + 1) % 100 == 0: - logger.debug("Process `{}` [{}]".format(vcf, percentage(i + 1, len(vcfs)))) - depthfile = vcf.replace(".sv.vcf.gz", ".depth") - fp = must_open(depthfile) - _, depth = next(fp).split() - depth = int(float(depth)) - samplekey = op.basename(vcf).split("_")[0] - - fp = must_open(vcf) - for row in fp: - if row[0] == "#": - continue - v = VcfLine(row) - info = dict(parse_qsl(v.info)) - print( - "\t".join( - str(x) - for x in ( - vcf, - samplekey, - depth, - v.seqid, - v.pos, - v.alt, - info.get("SVLEN"), - info["PE"], - info["SR"], - ) - ) - ) - - -def mito(args): - """ - %prog mito chrM.fa input.bam - - Identify mitochondrial deletions. - """ - p = OptionParser(mito.__doc__) - p.set_aws_opts(store="hli-mv-data-science/htang/mito-deletions") - p.add_argument( - "--realignonly", default=False, action="store_true", help="Realign only" - ) - p.add_argument( - "--svonly", - default=False, - action="store_true", - help="Run Realign => SV calls only", - ) - p.add_argument( - "--support", default=1, type=int, help="Minimum number of supporting reads" - ) - p.set_home("speedseq", default="/mnt/software/speedseq/bin") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - chrMfa, bamfile = args - store = opts.output_path - cleanup = not opts.nocleanup - - if not op.exists(chrMfa): - logger.debug("File `{}` missing. Exiting.".format(chrMfa)) - return - - chrMfai = chrMfa + ".fai" - if not op.exists(chrMfai): - cmd = "samtools index {}".format(chrMfa) - sh(cmd) - - if not bamfile.endswith(".bam"): - bamfiles = [x.strip() for x in open(bamfile)] - else: - bamfiles = [bamfile] - - if store: - computed = ls_s3(store) - computed = [ - op.basename(x).split(".")[0] for x in computed if x.endswith(".depth") - ] - remaining_samples = [ - x for x in bamfiles if op.basename(x).split(".")[0] not in computed - ] - - logger.debug( - "Already computed on `{}`: {}".format( - store, len(bamfiles) - len(remaining_samples) - ) - ) - bamfiles = remaining_samples - - logger.debug("Total samples: {}".format(len(bamfiles))) - - for bamfile in bamfiles: - run_mito( - chrMfa, - bamfile, - opts, - realignonly=opts.realignonly, - svonly=opts.svonly, - store=store, - cleanup=cleanup, - ) - - -def run_mito( - chrMfa, bamfile, opts, realignonly=False, svonly=False, store=None, cleanup=False -): - from jcvi.formats.sam import get_minibam - - region = "chrM" - minibam = op.basename(bamfile).replace(".bam", ".{}.bam".format(region)) - if not op.exists(minibam): - get_minibam(bamfile, region) - else: - logger.debug("{} found. Skipped.".format(minibam)) - - speedseq_bin = op.join(opts.speedseq_home, "speedseq") - - realign = minibam.rsplit(".", 1)[0] + ".realign" - realignbam = realign + ".bam" - margs = " -v -t {} -o {}".format(opts.cpus, realign) - if need_update(minibam, realign + ".bam", warn=True): - cmd = speedseq_bin + " realign" - cmd += margs - cmd += " {} {}".format(chrMfa, minibam) - sh(cmd) - - if realignonly: - return - - depthfile = realign + ".depth" - if need_update(realignbam, depthfile): - coverage( - [ - chrMfa, - realignbam, - "--nosort", - "--format=coverage", - "--outfile={}".format(depthfile), - ] - ) - - if store: - push_to_s3(store, depthfile) - - vcffile = realign + ".sv.vcf.gz" - if need_update(realignbam, vcffile, warn=True): - cmd = speedseq_bin + " sv" - cmd += margs - cmd += " -R {}".format(chrMfa) - cmd += " -m {}".format(opts.support) - cmd += " -B {} -D {} -S {}".format( - realignbam, realign + ".discordants.bam", realign + ".splitters.bam" - ) - sh(cmd) - - if store: - push_to_s3(store, vcffile) - - if svonly: - if cleanup: - do_cleanup(minibam, realignbam) - return - - piledriver = realign + ".piledriver" - if need_update(realignbam, piledriver): - cmd = "bamtools piledriver -fasta {}".format(chrMfa) - cmd += " -in {}".format(realignbam) - sh(cmd, outfile=piledriver) - - if store: - push_to_s3(store, piledriver) - - if cleanup: - do_cleanup(minibam, realignbam) - - -def do_cleanup(minibam, realignbam): - sh("rm -f {}* {}*".format(minibam, realignbam)) - - -if __name__ == "__main__": - main() diff --git a/jcvi/variation/impute.py b/jcvi/variation/impute.py deleted file mode 100644 index 3807c206..00000000 --- a/jcvi/variation/impute.py +++ /dev/null @@ -1,384 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Impute unknown variations given an input vcf file. -""" -import os.path as op -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger -from ..apps.grid import MakeManager -from ..formats.base import must_open -from ..formats.vcf import VcfLine, CM -from ..utils.cbook import percentage - - -def main(): - - actions = ( - ("beagle", "use BEAGLE4.1 to impute vcf"), - ("impute", "use IMPUTE2 to impute vcf"), - ("minimac", "use MINIMAC3 to impute vcf"), - ("passthrough", "pass through Y and MT vcf"), - ("validate", "validate imputation against withheld variants"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def passthrough(args): - """ - %prog passthrough chrY.vcf chrY.new.vcf - - Pass through Y and MT vcf. - """ - p = OptionParser(passthrough.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - vcffile, newvcffile = args - fp = open(vcffile) - fw = open(newvcffile, "w") - gg = ["0/0", "0/1", "1/1"] - for row in fp: - if row[0] == "#": - print(row.strip(), file=fw) - continue - - v = VcfLine(row) - v.filter = "PASS" - v.format = "GT:GP" - probs = [0] * 3 - probs[gg.index(v.genotype)] = 1 - v.genotype = v.genotype.replace("/", "|") + ":{0}".format( - ",".join("{0:.3f}".format(x) for x in probs) - ) - print(v, file=fw) - fw.close() - - -def validate(args): - """ - %prog validate imputed.vcf withheld.vcf - - Validate imputation against withheld variants. - """ - p = OptionParser(validate.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - imputed, withheld = args - register = {} - fp = open(withheld) - for row in fp: - if row[0] == "#": - continue - v = VcfLine(row) - register[(v.seqid, v.pos)] = v.genotype - - logger.debug("Imported %d records from `%s`", len(register), withheld) - - fp = must_open(imputed) - hit = concordant = 0 - seen = set() - for row in fp: - if row[0] == "#": - continue - v = VcfLine(row) - chr, pos, genotype = v.seqid, v.pos, v.genotype - if (chr, pos) in seen: - continue - seen.add((chr, pos)) - if (chr, pos) not in register: - continue - truth = register[(chr, pos)] - imputed = genotype.split(":")[0] - if "|" in imputed: - imputed = "/".join(sorted(genotype.split(":")[0].split("|"))) - # probs = [float(x) for x in genotype.split(":")[-1].split(",")] - # imputed = max(zip(probs, ["0/0", "0/1", "1/1"]))[-1] - hit += 1 - if truth == imputed: - concordant += 1 - else: - print(row.strip(), "truth={0}".format(truth), file=sys.stderr) - - logger.debug("Total concordant: %s", percentage(concordant, hit)) - - -def minimac(args): - """ - %prog batchminimac input.txt - - Use MINIMAC3 to impute vcf on all chromosomes. - """ - p = OptionParser(minimac.__doc__) - p.set_home("shapeit") - p.set_home("minimac") - p.set_outfile() - p.set_chr() - p.set_ref() - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (txtfile,) = args - ref = opts.ref - mm = MakeManager() - pf = txtfile.split(".")[0] - allrawvcf = [] - alloutvcf = [] - chrs = opts.chr.split(",") - for x in chrs: - px = CM[x] - chrvcf = pf + ".{0}.vcf".format(px) - if txtfile.endswith(".vcf"): - cmd = "vcftools --vcf {0} --chr {1}".format(txtfile, x) - cmd += " --out {0}.{1} --recode".format(pf, px) - cmd += " && mv {0}.{1}.recode.vcf {2}".format(pf, px, chrvcf) - else: # 23andme - cmd = "python -m jcvi.formats.vcf from23andme {0} {1}".format(txtfile, x) - cmd += " --ref {0}".format(ref) - mm.add(txtfile, chrvcf, cmd) - - chrvcf_hg38 = pf + ".{0}.23andme.hg38.vcf".format(px) - minimac_liftover(mm, chrvcf, chrvcf_hg38, opts) - allrawvcf.append(chrvcf_hg38) - - minimacvcf = "{0}.{1}.minimac.dose.vcf".format(pf, px) - if x == "X": - minimac_X(mm, x, chrvcf, opts) - elif x in ["Y", "MT"]: - cmd = "python -m jcvi.variation.impute passthrough" - cmd += " {0} {1}".format(chrvcf, minimacvcf) - mm.add(chrvcf, minimacvcf, cmd) - else: - minimac_autosome(mm, x, chrvcf, opts) - - # keep the best line for multi-allelic markers - uniqvcf = "{0}.{1}.minimac.uniq.vcf".format(pf, px) - cmd = "python -m jcvi.formats.vcf uniq {0} > {1}".format(minimacvcf, uniqvcf) - mm.add(minimacvcf, uniqvcf, cmd) - - minimacvcf_hg38 = "{0}.{1}.minimac.hg38.vcf".format(pf, px) - minimac_liftover(mm, uniqvcf, minimacvcf_hg38, opts) - alloutvcf.append(minimacvcf_hg38) - - if len(allrawvcf) > 1: - rawhg38vcfgz = pf + ".all.23andme.hg38.vcf.gz" - cmd = "vcf-concat {0} | bgzip > {1}".format(" ".join(allrawvcf), rawhg38vcfgz) - mm.add(allrawvcf, rawhg38vcfgz, cmd) - - if len(alloutvcf) > 1: - outhg38vcfgz = pf + ".all.minimac.hg38.vcf.gz" - cmd = "vcf-concat {0} | bgzip > {1}".format(" ".join(alloutvcf), outhg38vcfgz) - mm.add(alloutvcf, outhg38vcfgz, cmd) - - mm.write() - - -def minimac_liftover(mm, chrvcf, chrvcf_hg38, opts): - cmd = "python -m jcvi.formats.vcf liftover {0} {1}/hg19ToHg38.over.chain.gz {2}".format( - chrvcf, opts.ref, chrvcf_hg38 - ) - mm.add(chrvcf, chrvcf_hg38, cmd) - - -def minimac_X(mm, chr, vcffile, opts): - """See details here: - http://genome.sph.umich.edu/wiki/Minimac3_Cookbook_:_Chromosome_X_Imputation - """ - pf = vcffile.rsplit(".", 1)[0] - ranges = [(1, 2699519), (2699520, 154931043), (154931044, 155270560)] - tags = ["PAR1", "NONPAR", "PAR2"] - Xvcf = [] - phasedfiles = [] - for tag, (start, end) in zip(tags, ranges): - recodefile = pf + "_{0}.recode.vcf".format(tag) - cmd = "vcftools --vcf {0} --out {1}_{2}".format(vcffile, pf, tag) - cmd += " --chr X --from-bp {0} --to-bp {1} --recode".format(start, end) - mm.add(vcffile, recodefile, cmd) - - phasedfile = shapeit_phasing(mm, chr + "_{0}".format(tag), recodefile, opts) - phasedfiles.append(phasedfile) - - pars = [x for x in phasedfiles if "_PAR" in x] - parfile = pf + "_PAR.recode.phased.vcf" - nonparfile = pf + "_NONPAR.recode.phased.vcf" - cmd = "vcf-concat {0} > {1}".format(" ".join(pars), parfile) - mm.add(pars, parfile, cmd) - - for phasedfile in (parfile, nonparfile): - outvcf = minimac_autosome(mm, chr, phasedfile, opts, phasing=False) - Xvcf.append(outvcf) - - minimacvcf = pf + ".minimac.dose.vcf" - cmd = "vcf-concat {0} | vcf-sort -c > {1}".format(" ".join(Xvcf), minimacvcf) - mm.add(Xvcf, minimacvcf, cmd) - - -def minimac_autosome(mm, chr, vcffile, opts, phasing=True): - pf = vcffile.rsplit(".", 1)[0] - kg = op.join(opts.ref, "1000GP_Phase3") - if phasing: - shapeit_phasing(mm, chr, vcffile, opts) - phasedfile = pf + ".phased.vcf" - else: - phasedfile = vcffile - - chrtag = chr - if chr == "X": - chrtag = "X.Non.Pseudo.Auto" if "NONPAR" in vcffile else "X.Pseudo.Auto" - - opf = pf + ".minimac" - minimac_cmd = op.join(opts.minimac_home, "Minimac3") - - cmd = minimac_cmd + " --chr {0}".format(chr) - cmd += ( - " --refHaps {0}/{1}.1000g.Phase3.v5.With.Parameter.Estimates.m3vcf.gz".format( - kg, chrtag - ) - ) - cmd += " --haps {0} --prefix {1}".format(phasedfile, opf) - cmd += " --format GT,GP --nobgzip" - outvcf = opf + ".dose.vcf" - mm.add(phasedfile, outvcf, cmd) - - return outvcf - - -def beagle(args): - """ - %prog beagle input.vcf 1 - - Use BEAGLE4.1 to impute vcf on chromosome 1. - """ - p = OptionParser(beagle.__doc__) - p.set_home("beagle") - p.set_ref() - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - vcffile, chr = args - pf = vcffile.rsplit(".", 1)[0] - outpf = pf + ".beagle" - outfile = outpf + ".vcf.gz" - - mm = MakeManager() - beagle_cmd = opts.beagle_home - kg = op.join(opts.ref, "1000GP_Phase3") - cmd = beagle_cmd + " gt={0}".format(vcffile) - cmd += " ref={0}/chr{1}.1kg.phase3.v5a.bref".format(kg, chr) - cmd += " map={0}/plink.chr{1}.GRCh37.map".format(kg, chr) - cmd += " out={0}".format(outpf) - cmd += " nthreads=16 gprobs=true" - mm.add(vcffile, outfile, cmd) - - mm.write() - - -def shapeit_phasing(mm, chr, vcffile, opts, vcf=True): - kg = op.join(opts.ref, "1000GP_Phase3") - shapeit_cmd = op.join(opts.shapeit_home, "shapeit") - - rpf = "{0}/1000GP_Phase3_chr{1}".format(kg, chr) - pf = vcffile.rsplit(".", 1)[0] - mapfile = "{0}/genetic_map_chr{1}_combined_b37.txt".format(kg, chr) - mapfile = mapfile.replace("NONPAR", "nonPAR") - - hapsfile = pf + ".haps" - cmd = shapeit_cmd + " --input-vcf {0}".format(vcffile) - cmd += " --input-map {0}".format(mapfile) - cmd += " --effective-size 11418" - cmd += " --output-max {0}.haps {0}.sample".format(pf) - cmd += " --input-ref {0}.hap.gz {0}.legend.gz".format(rpf) - cmd += " {0}/1000GP_Phase3.sample --output-log {1}.log".format(kg, pf) - if chr == "X": - cmd += " --chrX" - mm.add(vcffile, hapsfile, cmd) - - if not vcf: - return - - phasedfile = pf + ".phased.vcf" - cmd = shapeit_cmd + " -convert --input-haps {0}".format(pf) - cmd += " --output-vcf {0}".format(phasedfile) - mm.add(hapsfile, phasedfile, cmd) - - return phasedfile - - -def impute(args): - """ - %prog impute input.vcf hs37d5.fa 1 - - Use IMPUTE2 to impute vcf on chromosome 1. - """ - from pyfaidx import Fasta - - p = OptionParser(impute.__doc__) - p.set_home("shapeit") - p.set_home("impute") - p.set_ref() - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 3: - sys.exit(not p.print_help()) - - vcffile, fastafile, chr = args - mm = MakeManager() - pf = vcffile.rsplit(".", 1)[0] - hapsfile = pf + ".haps" - kg = op.join(opts.ref, "1000GP_Phase3") - shapeit_phasing(mm, chr, vcffile, opts) - - fasta = Fasta(fastafile) - size = len(fasta[chr]) - binsize = 5000000 - bins = size / binsize # 5Mb bins - if size % binsize: - bins += 1 - impute_cmd = op.join(opts.impute_home, "impute2") - chunks = [] - for x in range(bins + 1): - chunk_start = x * binsize + 1 - chunk_end = min(chunk_start + binsize - 1, size) - outfile = pf + ".chunk{0:02d}.impute2".format(x) - mapfile = "{0}/genetic_map_chr{1}_combined_b37.txt".format(kg, chr) - rpf = "{0}/1000GP_Phase3_chr{1}".format(kg, chr) - cmd = impute_cmd + " -m {0}".format(mapfile) - cmd += " -known_haps_g {0}".format(hapsfile) - cmd += " -h {0}.hap.gz -l {0}.legend.gz".format(rpf) - cmd += " -Ne 20000 -int {0} {1}".format(chunk_start, chunk_end) - cmd += " -o {0} -allow_large_regions -seed 367946".format(outfile) - cmd += " && touch {0}".format(outfile) - mm.add(hapsfile, outfile, cmd) - chunks.append(outfile) - - # Combine all the files - imputefile = pf + ".impute2" - cmd = "cat {0} > {1}".format(" ".join(chunks), imputefile) - mm.add(chunks, imputefile, cmd) - - # Convert to vcf - vcffile = pf + ".impute2.vcf" - cmd = "python -m jcvi.formats.vcf fromimpute2 {0} {1} {2} > {3}".format( - imputefile, fastafile, chr, vcffile - ) - mm.add(imputefile, vcffile, cmd) - mm.write() - - -if __name__ == "__main__": - main() diff --git a/jcvi/variation/phase.py b/jcvi/variation/phase.py deleted file mode 100644 index d1a6f70c..00000000 --- a/jcvi/variation/phase.py +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Read-based phasing. -""" -import sys - -try: - import vcf -except ImportError: - pass -import pysam - -from ..apps.base import ActionDispatcher, OptionParser, logger - - -class CPRA: - def __init__(self, vcf_record): - r = vcf_record - self.chr = r.CHROM - self.pos = r.POS - self.ref = r.REF - self.alt = r.ALT - self.alleles = [self.ref] + self.alt - - @property - def is_valid(self): - """Only retain SNPs or single indels, and are bi-allelic""" - return len(self.ref) == 1 and len(self.alt) == 1 and len(self.alt[0]) == 1 - - def __str__(self): - return "_".join(str(x) for x in (self.chr, self.pos, self.ref, self.alt[0])) - - __repr__ = __str__ - - -def main(): - - actions = ( - ("prepare", "convert vcf and bam to variant list"), - ("counts", "collect allele counts from RO/AO fields"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def counts(args): - """ - %prog counts vcffile - - Collect allele counts from RO and AO fields. - """ - p = OptionParser(counts.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (vcffile,) = args - vcf_reader = vcf.Reader(open(vcffile)) - for r in vcf_reader: - v = CPRA(r) - if not v.is_valid: - continue - for sample in r.samples: - ro = sample["RO"] - ao = sample["AO"] - print("\t".join(str(x) for x in (v, ro, ao))) - - -def prepare(args): - """ - %prog prepare vcffile bamfile - - Convert vcf and bam to variant list. Inputs are: - - vcffile: contains the positions of variants - - bamfile: contains the reads that hold the variants - - Outputs: - - reads_to_phase: phasing for each read - - variants_to_phase: in format of phased vcf - """ - p = OptionParser(prepare.__doc__) - p.add_argument("--accuracy", default=0.85, help="Sequencing per-base accuracy") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - vcffile, bamfile = args - right = "{:.2f}".format(opts.accuracy) - wrong = "{:.2f}".format(1 - opts.accuracy) - vcf_reader = vcf.Reader(open(vcffile)) - variants = [] - for r in vcf_reader: - v = CPRA(r) - if not v.is_valid: - continue - variants.append(v) - - logger.debug( - "A total of %d bi-allelic SNVs imported from `%s`", len(variants), vcffile - ) - - bamfile = pysam.AlignmentFile(bamfile, "rb") - for v in variants: - pos = v.pos - 1 - for column in bamfile.pileup(v.chr, pos, pos + 1, truncate=True): - for read in column.pileups: - query_position = read.query_position - if query_position is None: - continue - read_name = read.alignment.query_name - query_base = read.alignment.query_sequence[query_position] - a, b = v.alleles - if query_base == a: - other_base = b - elif query_base == b: - other_base = a - else: - continue - print( - " ".join( - str(x) - for x in (v, read_name, query_base, right, other_base, wrong) - ) - ) - - -if __name__ == "__main__": - main() diff --git a/jcvi/variation/snp.py b/jcvi/variation/snp.py deleted file mode 100644 index f289a45d..00000000 --- a/jcvi/variation/snp.py +++ /dev/null @@ -1,369 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Analyze SNPs in re-sequencing panels. -""" -import sys - -from ..apps.base import ActionDispatcher, OptionParser, logger, need_update, sh -from ..apps.grid import MakeManager -from ..formats.base import is_number, write_file -from ..formats.fasta import Fasta - - -def main(): - - actions = ( - ("frommaf", "convert to four-column tabular format from MAF"), - ("freq", "call snp frequencies and keep AO and RO"), - ("rmdup", "remove PCR duplicates from BAM files"), - ("freebayes", "call snps using freebayes"), - ("mpileup", "call snps using samtools-mpileup"), - ("gatk", "call snps using GATK"), - ("somatic", "generate series of SPEEDSESQ-somatic commands"), - ("mappability", "generate 50mer mappability for reference genome"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def mappability(args): - """ - %prog mappability reference.fasta - - Generate 50mer mappability for reference genome. Commands are based on gem - mapper. See instructions: - - """ - p = OptionParser(mappability.__doc__) - p.add_argument("--mer", default=50, type=int, help="User mer size") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (ref,) = args - K = opts.mer - pf = ref.rsplit(".", 1)[0] - mm = MakeManager() - - gem = pf + ".gem" - cmd = "gem-indexer -i {} -o {}".format(ref, pf) - mm.add(ref, gem, cmd) - - mer = pf + ".{}mer".format(K) - mapb = mer + ".mappability" - cmd = "gem-mappability -I {} -l {} -o {} -T {}".format(gem, K, mer, opts.cpus) - mm.add(gem, mapb, cmd) - - wig = mer + ".wig" - cmd = "gem-2-wig -I {} -i {} -o {}".format(gem, mapb, mer) - mm.add(mapb, wig, cmd) - - bw = mer + ".bw" - cmd = "wigToBigWig {} {}.sizes {}".format(wig, mer, bw) - mm.add(wig, bw, cmd) - - bg = mer + ".bedGraph" - cmd = "bigWigToBedGraph {} {}".format(bw, bg) - mm.add(bw, bg, cmd) - - merged = mer + ".filtered-1.merge.bed" - cmd = "python -m jcvi.formats.bed filterbedgraph {} 1".format(bg) - mm.add(bg, merged, cmd) - - mm.write() - - -def gatk(args): - """ - %prog gatk bamfile reference.fasta - - Call SNPs based on GATK best practices. - """ - p = OptionParser(gatk.__doc__) - p.add_argument( - "--indelrealign", - default=False, - action="store_true", - help="Perform indel realignment", - ) - p.set_home("gatk") - p.set_home("picard") - p.set_phred() - p.set_cpus(cpus=24) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - bamfile, ref = args - pf = bamfile.rsplit(".", 1)[0] - mm = MakeManager() - picard = "java -Xmx32g -jar {0}/picard.jar".format(opts.picard_home) - tk = "java -Xmx32g -jar {0}/GenomeAnalysisTK.jar".format(opts.gatk_home) - tk += " -R {0}".format(ref) - - # Step 0 - build reference - dictfile = ref.rsplit(".", 1)[0] + ".dict" - cmd1 = picard + " CreateSequenceDictionary" - cmd1 += " R={0} O={1}".format(ref, dictfile) - cmd2 = "samtools faidx {0}".format(ref) - mm.add(ref, dictfile, (cmd1, cmd2)) - - # Step 1 - sort bam - sortedbamfile = pf + ".sorted.bam" - cmd = picard + " SortSam" - cmd += " INPUT={0} OUTPUT={1}".format(bamfile, sortedbamfile) - cmd += " SORT_ORDER=coordinate CREATE_INDEX=true" - mm.add(bamfile, sortedbamfile, cmd) - - # Step 2 - mark duplicates - dedupbamfile = pf + ".dedup.bam" - cmd = picard + " MarkDuplicates" - cmd += " INPUT={0} OUTPUT={1}".format(sortedbamfile, dedupbamfile) - cmd += " METRICS_FILE=dedup.log CREATE_INDEX=true" - mm.add(sortedbamfile, dedupbamfile, cmd) - - if opts.indelrealign: - # Step 3 - create indel realignment targets - intervals = pf + ".intervals" - cmd = tk + " -T RealignerTargetCreator" - cmd += " -I {0} -o {1}".format(dedupbamfile, intervals) - mm.add(dedupbamfile, intervals, cmd) - - # Step 4 - indel realignment - realignedbamfile = pf + ".realigned.bam" - cmd = tk + " -T IndelRealigner" - cmd += " -targetIntervals {0}".format(intervals) - cmd += " -I {0} -o {1}".format(dedupbamfile, realignedbamfile) - mm.add((dictfile, intervals), realignedbamfile, cmd) - else: - realignedbamfile = dedupbamfile - - # Step 5 - SNP calling - vcf = pf + ".vcf" - cmd = tk + " -T HaplotypeCaller" - cmd += " -I {0}".format(realignedbamfile) - cmd += " --genotyping_mode DISCOVERY" - cmd += " -stand_emit_conf 10 -stand_call_conf 30" - cmd += " -nct {0}".format(opts.cpus) - cmd += " -o {0}".format(vcf) - if opts.phred == "64": - cmd += " --fix_misencoded_quality_scores" - mm.add(realignedbamfile, vcf, cmd) - - # Step 6 - SNP filtering - filtered_vcf = pf + ".filtered.vcf" - cmd = tk + " -T VariantFiltration" - cmd += " -V {0}".format(vcf) - cmd += ' --filterExpression "DP < 10 || DP > 300 || QD < 2.0 || FS > 60.0 || MQ < 40.0"' - cmd += ' --filterName "LOWQUAL"' - cmd += ' --genotypeFilterExpression "isHomVar == 1"' - cmd += ' --genotypeFilterName "HOMOVAR"' - cmd += ' --genotypeFilterExpression "isHet == 1"' - cmd += ' --genotypeFilterName "HET"' - cmd += " -o {0}".format(filtered_vcf) - mm.add(vcf, filtered_vcf, cmd) - - mm.write() - - -def somatic(args): - """ - %prog somatic ref.fasta *.bam > somatic.sh - - Useful to identify somatic mutations in each sample compared to all other - samples. Script using SPEEDSEQ-somatic will be written to stdout. - """ - p = OptionParser(somatic.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 3: - sys.exit(not p.print_help()) - - ref, bams = args[0], args[1:] - tcmd = "~/export/speedseq/bin/speedseq somatic" - tcmd += " -t 32 -F .2 -C 3 -q 30" - cmds = [] - for b in bams: - pf = b.split(".")[0] - cmd = tcmd - cmd += " -o {0}".format(pf) - others = ",".join(sorted(set(bams) - {b})) - cmd += " {0} {1} {2}".format(ref, others, b) - cmds.append(cmd) - - write_file("somatic.sh", "\n".join(cmds)) - - -def rmdup(args): - """ - %prog rmdup *.bam > rmdup.cmds - - Remove PCR duplicates from BAM files, generate a list of commands. - """ - p = OptionParser(rmdup.__doc__) - p.add_argument( - "-S", default=False, action="store_true", help="Treat PE reads as SE in rmdup" - ) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - bams = args - cmd = "samtools rmdup" - if opts.S: - cmd += " -S" - for b in bams: - if "rmdup" in b: - continue - rb = b.rsplit(".", 1)[0] + ".rmdup.bam" - if not need_update(b, rb): - continue - print(" ".join((cmd, b, rb))) - - -def mpileup(args): - """ - %prog mpileup prefix ref.fa *.bam - - Call SNPs using samtools mpileup. - """ - p = OptionParser(mpileup.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - prefix, ref = args[0:2] - bams = args[2:] - cmd = "samtools mpileup -P ILLUMINA -E -ugD -r {0}" - cmd += " -f {0} {1}".format(ref, " ".join(bams)) - fmd = "bcftools view -cvg -" - seqids = list(Fasta(ref).iterkeys_ordered()) - for s in seqids: - outfile = prefix + ".{0}.vcf".format(s) - print(cmd.format(s), "|", fmd, ">", outfile) - - -def freebayes(args): - """ - %prog freebayes prefix ref.fa *.bam - - Call SNPs using freebayes. - """ - p = OptionParser(freebayes.__doc__) - p.add_argument("--mindepth", default=3, type=int, help="Minimum depth") - p.add_argument("--minqual", default=20, type=int, help="Minimum quality") - opts, args = p.parse_args(args) - - if len(args) < 2: - sys.exit(not p.print_help()) - - prefix, ref = args[0:2] - bams = args[2:] - cmd = "bamaddrg -R {0}" - cmd += " " + " ".join("-b {0}".format(x) for x in bams) - fmd = "freebayes --stdin -C {0} -f {1}".format(opts.mindepth, ref) - seqids = list(Fasta(ref).iterkeys_ordered()) - for s in seqids: - outfile = prefix + ".{0}.vcf".format(s) - print(cmd.format(s), "|", fmd + " -r {0} -v {1}".format(s, outfile)) - - -def freq(args): - """ - %prog freq fastafile bamfile - - Call SNP frequencies and generate GFF file. - """ - p = OptionParser(freq.__doc__) - p.add_argument("--mindepth", default=3, type=int, help="Minimum depth") - p.add_argument("--minqual", default=20, type=int, help="Minimum quality") - p.set_outfile() - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - fastafile, bamfile = args - cmd = "freebayes -f {0} --pooled-continuous {1}".format(fastafile, bamfile) - cmd += " -F 0 -C {0}".format(opts.mindepth) - cmd += ' | vcffilter -f "QUAL > {0}"'.format(opts.minqual) - cmd += " | vcfkeepinfo - AO RO TYPE" - sh(cmd, outfile=opts.outfile) - - -def frommaf(args): - """ - %prog frommaf maffile - - Convert to four-column tabular format from MAF. - """ - p = OptionParser(frommaf.__doc__) - p.add_argument("--validate", help="Validate coordinates against FASTA") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (maf,) = args - snpfile = maf.rsplit(".", 1)[0] + ".vcf" - fp = open(maf) - fw = open(snpfile, "w") - total = 0 - id = "." - qual = 20 - filter = "PASS" - info = "DP=20" - print("##fileformat=VCFv4.0", file=fw) - print("#CHROM POS ID REF ALT QUAL FILTER INFO".replace(" ", "\t"), file=fw) - for row in fp: - atoms = row.split() - c, pos, ref, alt = atoms[:4] - if is_number(c, int): - c = int(c) - else: - continue - c = "chr{0:02d}".format(c) - pos = int(pos) - print( - "\t".join(str(x) for x in (c, pos, id, ref, alt, qual, filter, info)), - file=fw, - ) - total += 1 - fw.close() - - validate = opts.validate - if not validate: - return - - from jcvi.utils.cbook import percentage - - f = Fasta(validate) - fp = open(snpfile) - nsnps = 0 - for row in fp: - if row[0] == "#": - continue - - c, pos, id, ref, alt, qual, filter, info = row.split("\t") - pos = int(pos) - feat = dict(chr=c, start=pos, stop=pos) - s = f.sequence(feat) - s = str(s) - assert s == ref, "Validation error: {0} is {1} (expect: {2})".format( - feat, s, ref - ) - nsnps += 1 - if nsnps % 50000 == 0: - logger.debug("SNPs parsed: %s", percentage(nsnps, total)) - logger.debug("A total of %d SNPs validated and written to `%s`.", nsnps, snpfile) - - -if __name__ == "__main__": - main() diff --git a/jcvi/variation/str.py b/jcvi/variation/str.py deleted file mode 100644 index 750df228..00000000 --- a/jcvi/variation/str.py +++ /dev/null @@ -1,1568 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -""" -Identify repeat numbers in STR repeats. -""" -import re -import os -import os.path as op -import json -import sys - -from math import log, ceil -from collections import Counter, defaultdict -from multiprocessing import Pool - -import numpy as np -import pandas as pd -import pyfasta - -try: - import vcf -except ImportError: - pass - -from ..apps.base import ( - ActionDispatcher, - OptionParser, - datafile, - logger, - mkdir, - need_update, - sh, -) -from ..apps.grid import MakeManager -from ..formats.base import LineFile, must_open -from ..formats.base import timestamp -from ..formats.bed import natsorted -from ..utils.aws import check_exists_s3, ls_s3, pull_from_s3, push_to_s3 -from ..utils.cbook import percentage, uniqify - - -REF = "hg38" -REPO = datafile("TREDs.meta.csv") - -READLEN = 150 -MINSCORE = 36 -YSEARCH_HAPLOTYPE = """ -DYS393 DYS390 DYS19/DYS394 DYS19b DYS391 DYS385a DYS385b DYS426 DYS388 DYS439 -DYS389I DYS392 DYS389B DYS458 DYS459a/b DYS459a/b DYS455 DYS454 DYS447 DYS437 -DYS448 DYS449 DYS464a/b/c/d DYS464a/b/c/d DYS464a/b/c/d DYS464a/b/c/d DYS464e DYS464f DYS464g DYS460 -GATA-H4 YCAIIa YCAIIb DYS456 DYS607 DYS576 DYS570 CDYa CDYb DYS442 -DYS438 DYS531 DYS578 DYS395S1a/b DYS395S1a/b DYS590 DYS537 DYS641 DYS472 DYS406S1 -DYS511 DYS425 DYS413a DYS413b DYS557 DYS594 DYS436 DYS490 DYS534 DYS450 -DYS444 DYS481 DYS520 DYS446 DYS617 DYS568 DYS487 DYS572 DYS640 DYS492 -DYS565 DYS461 DYS462 GATA-A10 DYS635 GAAT1B07 DYS441 DYS445 DYS452 DYS463 -DYS434 DYS435 DYS485 DYS494 DYS495 DYS505 DYS522 DYS533 DYS549 DYS556 -DYS575 DYS589 DYS636 DYS638 DYS643 DYS714 DYS716 DYS717 DYS726 DXYS156-Y -""".split() -YSEARCH_LL = """ -L1 L2 L3 L4 L5 L6 L7 L8 L9 L10 -L11 L12 L13 L14 L15 L16 L17 L18 L19 L20 -L21 L22 L23 L24 L25 L26 L27 L28 L29 L30 -L31 L32 L33 L34 L35 L36 L37 L38 L39 L40 -L41 L54 L55 L56 L57 L58 L59 L60 L61 L62 -L63 L42 L64 L65 L66 L67 L68 L69 L70 L71 -L49 L72 L73 L51 L74 L75 L76 L77 L78 L79 -L80 L43 L44 L45 L46 L47 L48 L50 L52 L53 -L81 L82 L83 L84 L85 L86 L87 L88 L89 L90 -L91 L92 L93 L94 L95 L96 L97 L98 L99 L100 -""".split() -YHRD_YFILER = """ -DYS456 DYS389I DYS390 DYS389B DYS458 DYS19/DYS394 DYS385 -DYS393 DYS391 DYS439 DYS635 DYS392 GATA-H4 DYS437 DYS438 DYS448 -""".split() -YHRD_YFILERPLUS = """ -DYS576 DYS389I DYS635 DYS389B DYS627 DYS460 DYS458 DYS19/DYS394 GATA-H4 DYS448 DYS391 -DYS456 DYS390 DYS438 DYS392 DYS518 DYS570 DYS437 DYS385a DYS449 -DYS393 DYS439 DYS481 DYF387S1 DYS533 -""".split() -USYSTR_ALL = """ -DYF387S1 DYS19/DYS394 DYS385 DYS389I -DYS389B DYS390 DYS391 DYS392 -DYS393 DYS437 DYS438 DYS439 -DYS448 DYS449 DYS456 DYS458 -DYS460 DYS481 DYS518 DYS533 -DYS549 DYS570 DYS576 DYS627 -DYS635 DYS643 GATA-H4 -""".split() - - -class TREDsRepo(dict): - def __init__(self, ref=REF): - super().__init__() - self.ref = ref - df = pd.read_csv(REPO, index_col=0) - self.names = [] - for name, row in df.iterrows(): - self[name] = TRED(name, row, ref=ref) - self.names.append(name) - self.df = df - - def to_json(self): - s = self.df.to_json(orient="index") - s = s.decode("windows-1252").encode("utf8") - s = json.dumps(json.loads(s), sort_keys=True, indent=2) - return s - - def set_ploidy(self, haploid): - if not haploid: - return - for k, v in self.items(): - if v.chr in haploid: - v.ploidy = 1 - - def get_info(self, tredName): - tr = self.get(tredName) - info = "END={};MOTIF={};NS=1;REF={};CR={};IH={};RL={};VT=STR".format( - tr.repeat_end, - tr.repeat, - tr.ref_copy, - tr.cutoff_risk, - tr.inheritance, - tr.ref_copy * len(tr.repeat), - ) - return tr.chr, tr.repeat_start, tr.ref_copy, tr.repeat, info - - -class TRED(object): - def __init__(self, name, row, ref=REF): - - self.row = row - self.name = name - self.repeat = row["repeat"] - self.motif = row["motif"] - repeat_location_field = "repeat_location" - if ref != REF: - repeat_location_field += "." + ref.split("_")[0] - repeat_location = row[repeat_location_field] - if "_nochr" in ref: # Some reference version do not have chr - repeat_location = repeat_location.replace("chr", "") - self.chr, repeat_location = repeat_location.split(":") - repeat_start, repeat_end = repeat_location.split("-") - self.repeat_start = int(repeat_start) - self.repeat_end = int(repeat_end) - self.ref_copy = (self.repeat_end - self.repeat_start + 1) / len(self.repeat) - self.prefix = row["prefix"] - self.suffix = row["suffix"] - self.cutoff_prerisk = row["cutoff_prerisk"] - self.cutoff_risk = row["cutoff_risk"] - self.inheritance = row["inheritance"] - self.is_xlinked = self.inheritance[0] == "X" - self.is_recessive = self.inheritance[-1] == "R" - self.is_expansion = row["mutation_nature"] == "increase" - self.ploidy = 2 - - def __repr__(self): - return "{} inheritance={} id={}_{}_{}".format( - self.name, self.inheritance, self.chr, self.repeat_start, self.repeat - ) - - def __str__(self): - return ";".join( - str(x) - for x in ( - self.name, - self.repeat, - self.chr, - self.repeat_start, - self.repeat_end, - self.prefix, - self.suffix, - ) - ) - - -class STRLine(object): - def __init__(self, line): - args = line.split() - self.seqid = args[0] - self.start = int(args[1]) - self.end = int(args[2]) - self.period = int(args[3]) - self.copynum = args[4] - self.consensusSize = int(args[5]) - self.pctmatch = int(args[6]) - self.pctindel = int(args[7]) - self.score = args[8] - self.A = args[9] - self.C = args[10] - self.G = args[11] - self.T = args[12] - self.entropy = float(args[13]) - self.motif = args[14] - assert self.period == len(self.motif) - self.name = args[15] if len(args) > 15 else None - - def __str__(self): - fields = [ - self.seqid, - self.start, - self.end, - self.period, - self.copynum, - self.consensusSize, - self.pctmatch, - self.pctindel, - self.score, - self.A, - self.C, - self.G, - self.T, - "{0:.2f}".format(self.entropy), - self.motif, - ] - if self.name is not None: - fields += [self.name] - return "\t".join(str(x) for x in fields) - - @property - def longname(self): - return "_".join(str(x) for x in (self.seqid, self.start, self.motif)) - - def is_valid(self, maxperiod=6, maxlength=READLEN, minscore=MINSCORE): - return ( - 1 <= self.period <= maxperiod - and (self.end - self.start + 1) <= maxlength - and self.score >= minscore - ) - - def calc_entropy(self): - total = self.A + self.C + self.G + self.T - if total == 0: # Perhaps they are all Ns - might crash in lobstrindex() - return 0 - fractions = [x * 1.0 / total for x in [self.A, self.C, self.G, self.T]] - entropy = sum([-1.0 * x * log(x, 2) for x in fractions if x != 0]) - return entropy - - def iter_exact_str(self, genome): - pat = re.compile("(({0}){{2,}})".format(self.motif)) - start = self.start - s = genome[self.seqid][self.start - 1 : self.end].upper() - for m in re.finditer(pat, s): - self.start = start + m.start() - length = m.end() - m.start() - subseq = m.group(0) - assert length % self.period == 0 - assert subseq.startswith(self.motif) - - self.end = self.start - 1 + length - self.copynum = length / self.period - self.pctmatch = 100 - self.pctindel = 0 - self.score = 2 * length - self.fix_counts(subseq) - yield self - - def fix_counts(self, subseq): - length = int(ceil(self.period * float(self.copynum))) - # Sanity check for length, otherwise lobSTR misses units - self.end = max(self.end, self.start + length - 1) - self.A = subseq.count("A") - self.C = subseq.count("C") - self.G = subseq.count("G") - self.T = subseq.count("T") - self.entropy = self.calc_entropy() - - -class STRFile(LineFile): - def __init__(self, lobstr_home, db="hg38"): - filename = op.join(lobstr_home, "{0}/index.info".format(db)) - super().__init__(filename) - fp = open(filename) - for row in fp: - self.append(STRLine(row)) - - @property - def ids(self): - return [s.longname for s in self] - - @property - def register(self): - return dict(((s.seqid, s.start), s.name) for s in self) - - -class LobSTRvcf(dict): - def __init__(self, columnidsfile="STR.ids"): - super().__init__() - self.samplekey = None - self.evidence = {} # name: (supporting reads, stutter reads) - if columnidsfile: - fp = open(columnidsfile) - self.columns = [x.strip() for x in fp] - logger.debug( - "A total of {} markers imported from `{}`".format( - len(self.columns), columnidsfile - ) - ) - - def parse(self, filename, filtered=True, cleanup=False): - self.samplekey = op.basename(filename).split(".")[0] - logger.debug("Parse `{}` (filtered={})".format(filename, filtered)) - fp = must_open(filename) - reader = vcf.Reader(fp) - for record in reader: - if filtered and record.FILTER: - continue - info = record.INFO - ref = float(info["REF"]) - rpa = info.get("RPA", ref) - motif = info["MOTIF"] - name = "_".join(str(x) for x in (record.CHROM, record.POS, motif)) - for sample in record.samples: - gt = sample["GT"] - if filtered and sample["FT"] != "PASS": - continue - if gt == "0/0": - alleles = (ref, ref) - elif gt in ("0/1", "1/0"): - alleles = (ref, rpa[0]) - elif gt == "1/1": - alleles = (rpa[0], rpa[0]) - elif gt == "1/2": - alleles = rpa - try: - self[name] = ",".join(str(int(x)) for x in sorted(alleles)) - except: - self[name] = "-,-" - - # Collect supporting read evidence - motif_length = len(motif) - adjusted_alleles = [(x - ref) * motif_length for x in alleles] - support = stutters = 0 - allreads = sample["ALLREADS"] - for r in allreads.split(";"): - k, v = r.split("|") - k, v = int(k), int(v) - min_dist = min([abs(k - x) for x in adjusted_alleles]) - if motif_length * 0.5 < min_dist < motif_length * 1.5: - stutters += v - support += v - self.evidence[name] = "{},{}".format(stutters, support) - - if cleanup: - sh("rm -f {}".format(op.basename(filename))) - - @property - def csvline(self): - return ",".join([self.get(c, "-1,-1") for c in self.columns]) - - @property - def evline(self): - return ",".join([self.evidence.get(c, "-1,-1") for c in self.columns]) - - -def main(): - - actions = ( - # Compile population data - pipeline: compilevcf->mergecsv->meta->data->mask - ("bin", "convert tsv to binary format"), - ("filtervcf", "filter lobSTR VCF"), - ("compilevcf", "compile vcf results into master spreadsheet"), - ("mergecsv", "combine csv into binary array"), - ("meta", "compute allele frequencies and write to meta"), - ("data", "filter data based on the meta calls"), - ("mask", "compute P-values based on meta calls and data"), - ("treds", "compile allele_frequency for TRED results"), - # lobSTR related - ("lobstrindex", "make lobSTR index"), - ("batchlobstr", "run batch lobSTR"), - ("lobstr", "run lobSTR on a big BAM"), - ("locus", "extract selected locus and run lobSTR"), - ("stutter", "extract info from lobSTR vcf file"), - # Specific markers - ("liftover", "liftOver CODIS/Y-STR markers"), - ("trf", "run TRF on FASTA files"), - ("ystr", "print out Y-STR info given VCF"), - ) - p = ActionDispatcher(actions) - p.dispatch(globals()) - - -def treds(args): - """ - %prog treds hli.tred.tsv - - Compile allele_frequency for TREDs results. Write data.tsv, meta.tsv and - mask.tsv in one go. - """ - from jcvi.apps.base import datafile - - p = OptionParser(treds.__doc__) - p.add_argument( - "--csv", default=False, action="store_true", help="Also write `meta.csv`" - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (tredresults,) = args - df = pd.read_csv(tredresults, sep="\t") - - tredsfile = datafile("TREDs.meta.csv") - tf = pd.read_csv(tredsfile) - - tds = list(tf["abbreviation"]) - ids = list(tf["id"]) - tags = ["SampleKey"] - final_columns = ["SampleKey"] - afs = [] - for td, id in zip(tds, ids): - tag1 = "{}.1".format(td) - tag2 = "{}.2".format(td) - if tag2 not in df: - afs.append("{}") - continue - tags.append(tag2) - final_columns.append(id) - a = np.array(list(df[tag1]) + list(df[tag2])) - counts = alleles_to_counts(a) - af = counts_to_af(counts) - afs.append(af) - - tf["allele_frequency"] = afs - - metafile = "TREDs_{}_SEARCH.meta.tsv".format(timestamp()) - tf.to_csv(metafile, sep="\t", index=False) - logger.debug("File `{}` written.".format(metafile)) - if opts.csv: - metacsvfile = metafile.rsplit(".", 1)[0] + ".csv" - tf.to_csv(metacsvfile, index=False) - logger.debug("File `{}` written.".format(metacsvfile)) - - pp = df[tags] - pp.columns = final_columns - datafile = "TREDs_{}_SEARCH.data.tsv".format(timestamp()) - pp.to_csv(datafile, sep="\t", index=False) - logger.debug("File `{}` written.".format(datafile)) - - mask([datafile, metafile]) - - -def stutter(args): - """ - %prog stutter a.vcf.gz - - Extract info from lobSTR vcf file. Generates a file that has the following - fields: - - CHR, POS, MOTIF, RL, ALLREADS, Q - """ - p = OptionParser(stutter.__doc__) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (vcf,) = args - pf = op.basename(vcf).split(".")[0] - execid, sampleid = pf.split("_") - - C = "vcftools --remove-filtered-all --min-meanDP 10" - C += " --gzvcf {} --out {}".format(vcf, pf) - C += " --indv {}".format(sampleid) - - info = pf + ".INFO" - if need_update(vcf, info): - cmd = C + " --get-INFO MOTIF --get-INFO RL" - sh(cmd) - - allreads = pf + ".ALLREADS.FORMAT" - if need_update(vcf, allreads): - cmd = C + " --extract-FORMAT-info ALLREADS" - sh(cmd) - - q = pf + ".Q.FORMAT" - if need_update(vcf, q): - cmd = C + " --extract-FORMAT-info Q" - sh(cmd) - - outfile = pf + ".STUTTER" - if need_update((info, allreads, q), outfile): - cmd = "cut -f1,2,5,6 {}".format(info) - cmd += r" | sed -e 's/\t/_/g'" - cmd += " | paste - {} {}".format(allreads, q) - cmd += " | cut -f1,4,7" - sh(cmd, outfile=outfile) - - -def write_filtered(vcffile, lhome, store=None): - if vcffile.startswith("s3://"): - vcffile = pull_from_s3(vcffile) - - filteredvcf = op.basename(vcffile).replace(".vcf", ".filtered.vcf") - cmd = "python {}/scripts/lobSTR_filter_vcf.py".format(lhome) - cmd += " --vcf {}".format(vcffile) - cmd += " --loc-cov 5 --loc-log-score 0.8" - # cmd += " --loc-call-rate 0.8 --loc-max-ref-length 80" - # cmd += " --call-cov 5 --call-log-score 0.8 --call-dist-end 20" - sh(cmd, outfile=filteredvcf) - - if store: - push_to_s3(store, filteredvcf) - - return filteredvcf - - -def run_filter(arg): - vcffile, lhome, store = arg - filteredvcf = vcffile.replace(".vcf", ".filtered.vcf") - try: - if vcffile.startswith("s3://"): - if not check_exists_s3(filteredvcf, warn=True): - write_filtered(vcffile, lhome, store=store) - logger.debug("{} written and uploaded.".format(filteredvcf)) - else: - if need_update(vcffile, filteredvcf): - write_filtered(vcffile, lhome, store=None) - except Exception as e: - logger.debug("Thread failed! Error: {}".format(e)) - - -def filtervcf(args): - """ - %prog filtervcf NA12878.hg38.vcf.gz - - Filter lobSTR VCF using script shipped in lobSTR. Input file can be a list - of vcf files. - """ - p = OptionParser(filtervcf.__doc__) - p.set_home("lobstr", default="/mnt/software/lobSTR") - p.set_aws_opts(store="hli-mv-data-science/htang/str") - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (samples,) = args - lhome = opts.lobstr_home - store = opts.output_path - - if samples.endswith((".vcf", ".vcf.gz")): - vcffiles = [samples] - else: - vcffiles = [x.strip() for x in must_open(samples)] - - vcffiles = [x for x in vcffiles if ".filtered." not in x] - - run_args = [(x, lhome, x.startswith("s3://") and store) for x in vcffiles] - cpus = min(opts.cpus, len(run_args)) - p = Pool(processes=cpus) - for _ in p.map_async(run_filter, run_args).get(): - continue - - -def write_meta(af_file, gene_map, blacklist, filename="meta.tsv"): - fp = open(af_file) - fw = open(filename, "w") - header = "id title gene_name variant_type motif allele_frequency".replace(" ", "\t") - print(header, file=fw) - variant_type = "short tandem repeats" - title = "Short tandem repeats ({})n" - for row in fp: - locus, af, remove = row.split() - if remove == "MISSING": - continue - if locus in blacklist: - continue - - seqid, pos, motif = locus.split("_") - gene_name = gene_map.get((seqid, pos), "") - print( - "\t".join((locus, title.format(motif), gene_name, variant_type, motif, af)), - file=fw, - ) - fw.close() - logger.debug("Write meta file to `{}`".format(filename)) - - -def read_treds(tredsfile=datafile("TREDs.meta.csv")): - if tredsfile.endswith(".csv"): - df = pd.read_csv(tredsfile) - treds = set(df["id"]) - else: - df = pd.read_csv(tredsfile, sep="\t") - treds = set(df["abbreviation"]) - - logger.debug("Loaded {} treds from `{}`".format(len(treds), tredsfile)) - return treds, df - - -def meta(args): - """ - %prog meta data.bin samples STR.ids STR-exons.wo.bed - - Compute allele frequencies and prune sites based on missingness. - - Filter subset of loci that satisfy: - 1. no redundancy (unique chr:pos) - 2. variable (n_alleles > 1) - 3. low level of missing data (>= 50% autosomal + X, > 25% for Y) - - Write meta file with the following infor: - 1. id - 2. title - 3. gene_name - 4. variant_type - 5. motif - 6. allele_frequency - - `STR-exons.wo.bed` can be generated like this: - $ tail -n 694105 /mnt/software/lobSTR/hg38/index.tab | cut -f1-3 > all-STR.bed - $ intersectBed -a all-STR.bed -b all-exons.bed -wo > STR-exons.wo.bed - """ - p = OptionParser(meta.__doc__) - p.add_argument( - "--cutoff", - default=0.5, - type=float, - help="Percent observed required (chrY half cutoff)", - ) - p.set_cpus() - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - binfile, sampleids, strids, wobed = args - cutoff = opts.cutoff - - af_file = "allele_freq" - if need_update(binfile, af_file): - df, m, samples, loci = read_binfile(binfile, sampleids, strids) - nalleles = len(samples) - fw = must_open(af_file, "w") - for i, locus in enumerate(loci): - a = m[:, i] - counts = alleles_to_counts(a) - af = counts_to_af(counts) - seqid = locus.split("_")[0] - remove = counts_filter(counts, nalleles, seqid, cutoff=cutoff) - print("\t".join((locus, af, remove)), file=fw) - fw.close() - - logger.debug("Load gene intersections from `{}`".format(wobed)) - fp = open(wobed) - gene_map = defaultdict(set) - for row in fp: - chr1, start1, end1, chr2, start2, end2, name, ov = row.split() - gene_map[(chr1, start1)] |= set(name.split(",")) - for k, v in gene_map.items(): - non_enst = sorted(x for x in v if not x.startswith("ENST")) - # enst = sorted(x.rsplit(".", 1)[0] for x in v if x.startswith("ENST")) - gene_map[k] = ",".join(non_enst) - - TREDS, df = read_treds() - - metafile = "STRs_{}_SEARCH.meta.tsv".format(timestamp()) - write_meta(af_file, gene_map, TREDS, filename=metafile) - logger.debug("File `{}` written.".format(metafile)) - - -def alleles_to_counts(a): - # xa = a / 1000 - xb = a % 1000 - counts = Counter() - # counts.update(xa) - counts.update(xb) - del counts[-1] - del counts[999] - return counts - - -def counts_to_af(counts): - return "{" + ",".join("{}:{}".format(k, v) for k, v in sorted(counts.items())) + "}" - - -def af_to_counts(af): - countst = [x for x in af.strip("{}").split(",") if x] - countsd = {} - for x in countst: - a, b = x.split(":") - countsd[int(a)] = int(b) - return countsd - - -def bin(args): - """ - %prog bin data.tsv - - Conver tsv to binary format. - """ - p = OptionParser(bin.__doc__) - p.add_argument("--dtype", choices=("float32", "int32"), help="dtype of the matrix") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (tsvfile,) = args - dtype = opts.dtype - if dtype is None: # Guess - dtype = np.int32 if "data" in tsvfile else np.float32 - else: - dtype = np.int32 if dtype == "int32" else np.float32 - - print("dtype: {}".format(dtype), file=sys.stderr) - fp = open(tsvfile) - next(fp) - arrays = [] - for i, row in enumerate(fp): - a = np.fromstring(row, sep="\t", dtype=dtype) - a = a[1:] - arrays.append(a) - print(i, a, file=sys.stderr) - - print("Merging", file=sys.stderr) - b = np.concatenate(arrays) - print("Binary shape: {}".format(b.shape), file=sys.stderr) - binfile = tsvfile.rsplit(".", 1)[0] + ".bin" - b.tofile(binfile) - - -def counts_to_percentile(counts): - percentile = {} - s = 0 - for k, v in sorted(counts.items(), reverse=True): - s += v - percentile[k] = s - for k, v in percentile.items(): - v = "{:.6f}".format(v * 1.0 / s) - percentile[k] = v - return percentile - - -def convert_to_percentile(arg): - i, a, percentile = arg - pp = np.array([percentile.get(x, "1.000000") for x in a], dtype="S8") - if i % 1000 == 0: - print(i, file=sys.stderr) - print(a, file=sys.stderr) - print(pp, file=sys.stderr) - return i, pp - - -def write_csv(csvfile, m, index, columns, sep="\t", index_label="SampleKey"): - fw = open(csvfile, "w") - print(sep.join([index_label] + columns), file=fw) - for i, a in enumerate(m): - print(index[i] + sep + sep.join(str(x) for x in a), file=fw) - fw.close() - - -def read_meta(metafile): - df = pd.read_csv(metafile, sep="\t") - final_columns = [] - percentiles = {} - for i, row in df.iterrows(): - id = row["id"] - final_columns.append(id) - counts = row["allele_frequency"] - countsd = af_to_counts(counts) - percentile = counts_to_percentile(countsd) - percentiles[id] = percentile - return final_columns, percentiles - - -def write_mask(cpus, samples, final_columns, run_args, filename="mask.tsv"): - p = Pool(processes=cpus) - res = [] - r = p.map_async(convert_to_percentile, run_args, callback=res.append) - r.wait() - res.sort() - - if len(res) == 1: # sometimes res end up with one more nest - (res,) = res - - # Write mask (P-value) matrix - ii, pvalues = zip(*res) - m = np.vstack(pvalues).T - write_csv(filename, m, samples, final_columns) - - -def data(args): - """ - %prog data data.bin samples.ids STR.ids meta.tsv - - Make data.tsv based on meta.tsv. - """ - p = OptionParser(data.__doc__) - p.add_argument( - "--notsv", default=False, action="store_true", help="Do not write data.tsv" - ) - opts, args = p.parse_args(args) - - if len(args) != 4: - sys.exit(not p.print_help()) - - databin, sampleids, strids, metafile = args - final_columns, percentiles = read_meta(metafile) - df, m, samples, loci = read_binfile(databin, sampleids, strids) - - # Clean the data - m %= 1000 # Get the larger of the two alleles - m[m == 999] = -1 # Missing data - - final = set(final_columns) - remove = [] - for i, locus in enumerate(loci): - if locus not in final: - remove.append(locus) - continue - - pf = "STRs_{}_SEARCH".format(timestamp()) - filteredstrids = "{}.STR.ids".format(pf) - fw = open(filteredstrids, "w") - print("\n".join(final_columns), file=fw) - fw.close() - logger.debug( - "Dropped {} columns; Retained {} columns (`{}`)".format( - len(remove), len(final_columns), filteredstrids - ) - ) - - # Remove low-quality columns! - df.drop(remove, inplace=True, axis=1) - df.columns = final_columns - - filtered_bin = "{}.data.bin".format(pf) - if need_update(databin, filtered_bin): - m = df.as_matrix() - m.tofile(filtered_bin) - logger.debug("Filtered binary matrix written to `{}`".format(filtered_bin)) - - # Write data output - filtered_tsv = "{}.data.tsv".format(pf) - if not opts.notsv and need_update(databin, filtered_tsv): - df.to_csv(filtered_tsv, sep="\t", index_label="SampleKey") - - -def mask(args): - """ - %prog mask data.bin samples.ids STR.ids meta.tsv - - OR - - %prog mask data.tsv meta.tsv - - Compute P-values based on meta and data. The `data.bin` should be the matrix - containing filtered loci and the output mask.tsv will have the same - dimension. - """ - p = OptionParser(mask.__doc__) - opts, args = p.parse_args(args) - - if len(args) not in (2, 4): - sys.exit(not p.print_help()) - - if len(args) == 4: - databin, sampleids, strids, metafile = args - df, m, samples, loci = read_binfile(databin, sampleids, strids) - mode = "STRs" - elif len(args) == 2: - databin, metafile = args - df = pd.read_csv(databin, sep="\t", index_col=0) - m = df.as_matrix() - samples = df.index - loci = list(df.columns) - mode = "TREDs" - - pf = "{}_{}_SEARCH".format(mode, timestamp()) - final_columns, percentiles = read_meta(metafile) - - maskfile = pf + ".mask.tsv" - run_args = [] - for i, locus in enumerate(loci): - a = m[:, i] - percentile = percentiles[locus] - run_args.append((i, a, percentile)) - - if mode == "TREDs" or need_update(databin, maskfile): - cpus = min(8, len(run_args)) - write_mask(cpus, samples, final_columns, run_args, filename=maskfile) - logger.debug("File `{}` written.".format(maskfile)) - - -def counts_filter(countsd, nalleles, seqid, cutoff=0.5): - cutoff *= 100 - # Check for missingness - observed = sum(countsd.values()) - observed_pct = observed * 100 / nalleles - if observed_pct < cutoff: - if not (seqid == "chrY" and observed_pct >= cutoff / 2): - return "MISSING" - - # Check for variability - if len(countsd) < 2: - return "INVARIANT" - - return "PASS" - - -def read_binfile(binfile, sampleids, strids, dtype=np.int32): - m = np.fromfile(binfile, dtype=dtype) - samples = [x.strip() for x in open(sampleids)] - loci = [x.strip() for x in open(strids)] - nsamples, nloci = len(samples), len(loci) - print("{} x {} entries imported".format(nsamples, nloci), file=sys.stderr) - - m.resize(nsamples, nloci) - df = pd.DataFrame(m, index=samples, columns=loci) - return df, m, samples, loci - - -def mergecsv(args): - """ - %prog mergecsv *.csv - - Combine CSV into binary array. - """ - p = OptionParser(mergecsv.__doc__) - opts, args = p.parse_args(args) - - if len(args) < 1: - sys.exit(not p.print_help()) - - csvfiles = args - arrays = [] - samplekeys = [] - for csvfile in csvfiles: - samplekey = op.basename(csvfile).split(".")[0] - a = np.fromfile(csvfile, sep=",", dtype=np.int32) - x1 = a[::2] - x2 = a[1::2] - a = x1 * 1000 + x2 - a[a < 0] = -1 - arrays.append(a) - samplekeys.append(samplekey) - print(samplekey, a, file=sys.stderr) - print("Merging", file=sys.stderr) - b = np.concatenate(arrays) - b.tofile("data.bin") - - fw = open("samples", "w") - print("\n".join(samplekeys), file=fw) - fw.close() - - -def write_csv_ev(filename, filtered, cleanup, store=None): - lv = LobSTRvcf() - lv.parse(filename, filtered=filtered, cleanup=cleanup) - csvfile = op.basename(filename) + ".csv" - evfile = op.basename(filename) + ".ev" - - fw = open(csvfile, "w") - print(lv.csvline, file=fw) - fw.close() - - fw = open(evfile, "w") - print(lv.evline, file=fw) - fw.close() - - # Save to s3 - if store: - push_to_s3(store, csvfile) - push_to_s3(store, evfile) - - -def run_compile(arg): - filename, filtered, cleanup, store = arg - csvfile = filename + ".csv" - try: - if filename.startswith("s3://"): - if not check_exists_s3(csvfile, warn=True): - write_csv_ev(filename, filtered, cleanup, store=store) - logger.debug("{} written and uploaded.".format(csvfile)) - else: - if need_update(filename, csvfile): - write_csv_ev(filename, filtered, cleanup, store=None) - except Exception as e: - logger.debug("Thread failed! Error: {}".format(e)) - - -def compilevcf(args): - """ - %prog compilevcf samples.csv - - Compile vcf results into master spreadsheet. - """ - p = OptionParser(compilevcf.__doc__) - p.add_argument("--db", default="hg38", help="Use these lobSTR db") - p.add_argument( - "--nofilter", - default=False, - action="store_true", - help="Do not filter the variants", - ) - p.set_home("lobstr") - p.set_cpus() - p.set_aws_opts(store="hli-mv-data-science/htang/str-data") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (samples,) = args - workdir = opts.workdir - store = opts.output_path - cleanup = not opts.nocleanup - filtered = not opts.nofilter - dbs = opts.db.split(",") - cwd = os.getcwd() - mkdir(workdir) - os.chdir(workdir) - samples = op.join(cwd, samples) - - stridsfile = "STR.ids" - if samples.endswith((".vcf", ".vcf.gz")): - vcffiles = [samples] - else: - vcffiles = [x.strip() for x in must_open(samples)] - if not op.exists(stridsfile): - ids = [] - for db in dbs: - ids.extend(STRFile(opts.lobstr_home, db=db).ids) - uids = uniqify(ids) - logger.debug("Combined: {} Unique: {}".format(len(ids), len(uids))) - - fw = open(stridsfile, "w") - print("\n".join(uids), file=fw) - fw.close() - - run_args = [(x, filtered, cleanup, store) for x in vcffiles] - cpus = min(opts.cpus, len(run_args)) - p = Pool(processes=cpus) - for _ in p.map_async(run_compile, run_args).get(): - continue - - -def build_ysearch_link(r, ban=["DYS520", "DYS413a", "DYS413b"]): - template = "http://www.ysearch.org/search_search.asp?fail=2&uid=&freeentry=true&" - markers = [] - for i, marker in zip(YSEARCH_LL, YSEARCH_HAPLOTYPE): - z = r.get(marker, "null") - if "a/b" in marker or marker in ban: - z = "null" - m = "{0}={1}".format(i, z) - markers.append(m) - print(template + "&".join(markers)) - - -def build_yhrd_link(r, panel, ban=["DYS385"]): - L = [] - for marker in panel: - z = r.get(marker, "--") - if marker in ban: - z = "--" - L.append(z) - print(" ".join(str(x) for x in L)) - - -def ystr(args): - """ - %prog ystr chrY.vcf - - Print out Y-STR info given VCF. Marker name extracted from tabfile. - """ - from jcvi.utils.table import write_csv - - p = OptionParser(ystr.__doc__) - p.set_home("lobstr") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (vcffile,) = args - si = STRFile(opts.lobstr_home, db="hg38-named") - register = si.register - - header = "Marker|Reads|Ref|Genotype|Motif".split("|") - contents = [] - fp = must_open(vcffile) - reader = vcf.Reader(fp) - simple_register = {} - for record in reader: - name = register[(record.CHROM, record.POS)] - info = record.INFO - ref = int(float(info["REF"])) - rpa = info.get("RPA", ref) - if isinstance(rpa, list): - rpa = "|".join(str(int(float(x))) for x in rpa) - ru = info["RU"] - simple_register[name] = rpa - for sample in record.samples: - contents.append((name, sample["ALLREADS"], ref, rpa, ru)) - - # Multi-part markers - a, b, c = "DYS389I", "DYS389B.1", "DYS389B" - if a in simple_register and b in simple_register: - simple_register[c] = int(simple_register[a]) + int(simple_register[b]) - - # Multi-copy markers - mm = ["DYS385", "DYS413", "YCAII"] - for m in mm: - ma, mb = m + "a", m + "b" - if ma not in simple_register or mb not in simple_register: - simple_register[ma] = simple_register[mb] = None - del simple_register[ma] - del simple_register[mb] - continue - if simple_register[ma] > simple_register[mb]: - simple_register[ma], simple_register[mb] = ( - simple_register[mb], - simple_register[ma], - ) - - write_csv(header, contents, sep=" ") - print("[YSEARCH]") - build_ysearch_link(simple_register) - print("[YFILER]") - build_yhrd_link(simple_register, panel=YHRD_YFILER) - print("[YFILERPLUS]") - build_yhrd_link(simple_register, panel=YHRD_YFILERPLUS) - print("[YSTR-ALL]") - build_yhrd_link(simple_register, panel=USYSTR_ALL) - - -def get_motif(s, motif_length): - sl = len(s) - kmers = set() - # Get all kmers - for i in range(sl - motif_length): - ss = s[i : i + motif_length] - kmers.add(ss) - - kmer_counts = [] - for kmer in kmers: - kmer_counts.append((s.count(kmer), -s.index(kmer), kmer)) - - return sorted(kmer_counts, reverse=True)[0][-1] - - -def liftover(args): - """ - %prog liftover lobstr_v3.0.2_hg38_ref.bed hg38.upper.fa - - LiftOver CODIS/Y-STR markers. - """ - p = OptionParser(liftover.__doc__) - p.add_argument( - "--checkvalid", - default=False, - action="store_true", - help="Check minscore, period and length", - ) - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - refbed, fastafile = args - genome = pyfasta.Fasta(fastafile) - edits = [] - fp = open(refbed) - for i, row in enumerate(fp): - s = STRLine(row) - seq = genome[s.seqid][s.start - 1 : s.end].upper() - s.motif = get_motif(seq, len(s.motif)) - s.fix_counts(seq) - if opts.checkvalid and not s.is_valid(): - continue - edits.append(s) - if i % 10000 == 0: - print(i, "lines read", file=sys.stderr) - - edits = natsorted(edits, key=lambda x: (x.seqid, x.start)) - for e in edits: - print(str(e)) - - -def trf(args): - """ - %prog trf outdir - - Run TRF on FASTA files. - """ - from jcvi.apps.base import iglob - - cparams = "1 1 2 80 5 200 2000" - - p = OptionParser(trf.__doc__) - p.add_argument("--mismatch", default=31, type=int, help="Mismatch and gap penalty") - p.add_argument( - "--minscore", default=MINSCORE, type=int, help="Minimum score to report" - ) - p.add_argument("--period", default=6, type=int, help="Maximum period to report") - p.add_argument( - "--lobstr", - default=False, - action="store_true", - help="Generate output for lobSTR", - ) - p.add_argument( - "--telomeres", - default=False, - action="store_true", - help="Run telomere search: minscore=140 period=7", - ) - p.add_argument( - "--centromeres", - default=False, - action="store_true", - help="Run centromere search: {}".format(cparams), - ) - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (outdir,) = args - minlength = opts.minscore / 2 - mm = MakeManager() - if opts.telomeres: - opts.minscore, opts.period = 140, 7 - - params = "2 {0} {0} 80 10 {1} {2}".format( - opts.mismatch, opts.minscore, opts.period - ).split() - if opts.centromeres: - params = cparams.split() - - bedfiles = [] - for fastafile in natsorted(iglob(outdir, "*.fa,*.fasta")): - pf = op.basename(fastafile).rsplit(".", 1)[0] - # Commands starting with trf ignores errors - cmd1 = "-trf {0} {1} -d -h".format(fastafile, " ".join(params)) - datfile = op.basename(fastafile) + "." + ".".join(params) + ".dat" - bedfile = "{0}.trf.bed".format(pf) - cmd2 = "cat {} | grep -v ^Parameters".format(datfile) - if opts.lobstr: - cmd2 += " | awk '($8 >= {} && $8 <= {})'".format( - minlength, READLEN - minlength - ) - else: - cmd2 += " | awk '($8 >= 0)'" - cmd2 += " | sed 's/ /\\t/g'" - cmd2 += " | awk '{{print \"{0}\\t\" $0}}' > {1}".format(pf, bedfile) - mm.add(fastafile, datfile, cmd1) - mm.add(datfile, bedfile, cmd2) - bedfiles.append(bedfile) - - bedfile = "trf.bed" - cmd = "cat {0} > {1}".format(" ".join(natsorted(bedfiles)), bedfile) - mm.add(bedfiles, bedfile, cmd) - - mm.write() - - -def batchlobstr(args): - """ - %prog batchlobstr samples.csv - - Run lobSTR sequentially on list of samples. Each line contains: - sample-name,s3-location - """ - p = OptionParser(batchlobstr.__doc__) - p.add_argument("--sep", default=",", help="Separator for building commandline") - p.set_home("lobstr", default="s3://hli-mv-data-science/htang/str-build/lobSTR/") - p.set_aws_opts(store="hli-mv-data-science/htang/str-data") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (samplesfile,) = args - store = opts.output_path - computed = ls_s3(store) - fp = open(samplesfile) - skipped = total = 0 - for row in fp: - total += 1 - sample, s3file = row.strip().split(",")[:2] - exec_id, sample_id = sample.split("_") - bamfile = s3file.replace(".gz", "").replace(".vcf", ".bam") - - gzfile = sample + ".{0}.vcf.gz".format("hg38") - if gzfile in computed: - skipped += 1 - continue - - print( - opts.sep.join( - "python -m jcvi.variation.str lobstr".split() - + [ - "hg38", - "--input_bam_path", - bamfile, - "--output_path", - store, - "--sample_id", - sample_id, - "--workflow_execution_id", - exec_id, - "--lobstr_home", - opts.lobstr_home, - "--workdir", - opts.workdir, - ] - ) - ) - fp.close() - logger.debug("Total skipped: {0}".format(percentage(skipped, total))) - - -def lobstr(args): - """ - %prog lobstr lobstr_index1 lobstr_index2 ... - - Run lobSTR on a big BAM file. There can be multiple lobSTR indices. In - addition, bamfile can be S3 location and --lobstr_home can be S3 location - (e.g. s3://hli-mv-data-science/htang/str-build/lobSTR/) - """ - p = OptionParser(lobstr.__doc__) - p.add_argument( - "--haploid", default="chrY,chrM", help="Use haploid model for these chromosomes" - ) - p.add_argument("--chr", help="Run only this chromosome") - p.add_argument( - "--simulation", default=False, action="store_true", help="Simulation mode" - ) - p.set_home("lobstr", default="s3://hli-mv-data-science/htang/str-build/lobSTR/") - p.set_cpus() - p.set_aws_opts(store="hli-mv-data-science/htang/str-data") - opts, args = p.parse_args(args) - bamfile = opts.input_bam_path - - if len(args) < 1 or bamfile is None: - sys.exit(not p.print_help()) - - lbindices = args - if opts.simulation: # Simulation mode - cmd, vcf_file = allelotype_on_chr( - bamfile, "chr4", "/mnt/software/lobSTR/", "TREDs", haploid=opts.haploid - ) - stats_file = vcf_file.rsplit(".", 1)[0] + ".allelotype.stats" - results_dir = "lobstr_results" - mkdir(results_dir) - sh(cmd) - sh("mv {} {}/ && rm {}".format(vcf_file, results_dir, stats_file)) - return - - s3mode = bamfile.startswith("s3") - store = opts.output_path - cleanup = not opts.nocleanup - workdir = opts.workdir - mkdir(workdir) - os.chdir(workdir) - - lhome = opts.lobstr_home - if lhome.startswith("s3://"): - lhome = pull_from_s3(lhome, overwrite=False) - - exec_id, sample_id = opts.workflow_execution_id, opts.sample_id - prefix = [x for x in (exec_id, sample_id) if x] - if prefix: - pf = "_".join(prefix) - else: - pf = bamfile.split("/")[-1].split(".")[0] - - if s3mode: - gzfile = pf + ".{0}.vcf.gz".format(lbindices[-1]) - remotegzfile = "{0}/{1}".format(store, gzfile) - if check_exists_s3(remotegzfile): - logger.debug( - "Object `{0}` exists. Computation skipped.".format(remotegzfile) - ) - return - localbamfile = pf + ".bam" - localbaifile = localbamfile + ".bai" - if op.exists(localbamfile): - logger.debug("BAM file already downloaded.") - else: - pull_from_s3(bamfile, localbamfile) - if op.exists(localbaifile): - logger.debug("BAM index file already downloaded.") - else: - remotebaifile = bamfile + ".bai" - if check_exists_s3(remotebaifile): - pull_from_s3(remotebaifile, localbaifile) - else: - remotebaifile = bamfile.rsplit(".")[0] + ".bai" - if check_exists_s3(remotebaifile): - pull_from_s3(remotebaifile, localbaifile) - else: - logger.debug("BAM index cannot be found in S3!") - sh("samtools index {0}".format(localbamfile)) - bamfile = localbamfile - - chrs = [opts.chr] if opts.chr else (range(1, 23) + ["X", "Y"]) - for lbidx in lbindices: - makefile = "makefile.{0}".format(lbidx) - mm = MakeManager(filename=makefile) - vcffiles = [] - for chr in chrs: - cmd, vcffile = allelotype_on_chr( - bamfile, chr, lhome, lbidx, haploid=opts.haploid - ) - mm.add(bamfile, vcffile, cmd) - filteredvcffile = vcffile.replace(".vcf", ".filtered.vcf") - cmd = "python -m jcvi.variation.str filtervcf {}".format(vcffile) - cmd += " --lobstr_home {}".format(lhome) - mm.add(vcffile, filteredvcffile, cmd) - vcffiles.append(filteredvcffile) - - gzfile = bamfile.split(".")[0] + ".{0}.vcf.gz".format(lbidx) - cmd = "vcf-concat {0} | vcf-sort".format(" ".join(vcffiles)) - cmd += " | bgzip -c > {0}".format(gzfile) - mm.add(vcffiles, gzfile, cmd) - - mm.run(cpus=opts.cpus) - - if s3mode: - push_to_s3(store, gzfile) - - if cleanup: - mm.clean() - sh("rm -f {} {} *.bai *.stats".format(bamfile, mm.makefile)) - - -def allelotype_on_chr(bamfile, chr, lhome, lbidx, haploid="chrY,chrM"): - if "chr" not in chr.lower(): - chr = "chr" + chr - outfile = "{0}.{1}".format(bamfile.split(".")[0], chr) - cmd = "allelotype --command classify --bam {}".format(bamfile) - cmd += " --noise_model {0}/models/illumina_v3.pcrfree".format(lhome) - cmd += " --strinfo {0}/{1}/index.tab".format(lhome, lbidx) - cmd += " --index-prefix {0}/{1}/lobSTR_".format(lhome, lbidx) - cmd += " --chrom {0} --out {1}.{2}".format(chr, outfile, lbidx) - cmd += " --max-diff-ref {0}".format(READLEN) - cmd += " --realign" - cmd += " --haploid {}".format(haploid) - return cmd, ".".join((outfile, lbidx, "vcf")) - - -def locus(args): - """ - %prog locus bamfile - - Extract selected locus from a list of TREDs for validation, and run lobSTR. - """ - from jcvi.formats.sam import get_minibam - - # See `Format-lobSTR-database.ipynb` for a list of TREDs for validation - INCLUDE = ["HD", "SBMA", "SCA1", "SCA2", "SCA8", "SCA17", "DM1", "DM2", "FXTAS"] - db_choices = ("hg38", "hg19") - - p = OptionParser(locus.__doc__) - p.add_argument("--tred", choices=INCLUDE, help="TRED name") - p.add_argument("--ref", choices=db_choices, default="hg38", help="Reference genome") - p.set_home("lobstr") - opts, args = p.parse_args(args) - - if len(args) != 1: - sys.exit(not p.print_help()) - - (bamfile,) = args - ref = opts.ref - lhome = opts.lobstr_home - tred = opts.tred - - tredsfile = datafile("TREDs.meta.csv") - tf = pd.read_csv(tredsfile, index_col=0) - row = tf.ix[tred] - tag = "repeat_location" - ldb = "TREDs" - if ref == "hg19": - tag += "." + ref - ldb += "-" + ref - seqid, start_end = row[tag].split(":") - - PAD = 1000 - start, end = start_end.split("-") - start, end = int(start) - PAD, int(end) + PAD - region = "{}:{}-{}".format(seqid, start, end) - - minibamfile = get_minibam(bamfile, region) - c = seqid.replace("chr", "") - cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, ldb) - sh(cmd) - - parser = LobSTRvcf(columnidsfile=None) - parser.parse(vcf, filtered=False) - items = parser.items() - if not items: - print("No entry found!", file=sys.stderr) - return - - k, v = parser.items()[0] - print("{} => {}".format(tred, v.replace(",", "/")), file=sys.stderr) - - -def lobstrindex(args): - """ - %prog lobstrindex hg38.trf.bed hg38.upper.fa - - Make lobSTR index. Make sure the FASTA contain only upper case (so use - fasta.format --upper to convert from UCSC fasta). The bed file is generated - by str(). - """ - p = OptionParser(lobstrindex.__doc__) - p.add_argument( - "--notreds", - default=False, - action="store_true", - help="Remove TREDs from the bed file", - ) - p.set_home("lobstr") - opts, args = p.parse_args(args) - - if len(args) != 2: - sys.exit(not p.print_help()) - - trfbed, fastafile = args - pf = fastafile.split(".")[0] - lhome = opts.lobstr_home - mkdir(pf) - - if opts.notreds: - newbedfile = trfbed + ".new" - newbed = open(newbedfile, "w") - fp = open(trfbed) - retained = total = 0 - seen = set() - for row in fp: - r = STRLine(row) - total += 1 - name = r.longname - if name in seen: - continue - seen.add(name) - print(r, file=newbed) - retained += 1 - newbed.close() - logger.debug("Retained: {0}".format(percentage(retained, total))) - else: - newbedfile = trfbed - - mm = MakeManager() - cmd = "python {0}/scripts/lobstr_index.py".format(lhome) - cmd += " --str {0} --ref {1} --out {2}".format(newbedfile, fastafile, pf) - mm.add((newbedfile, fastafile), op.join(pf, "lobSTR_ref.fasta.rsa"), cmd) - - tabfile = "{0}/index.tab".format(pf) - cmd = "python {0}/scripts/GetSTRInfo.py".format(lhome) - cmd += " {0} {1} > {2}".format(newbedfile, fastafile, tabfile) - mm.add((newbedfile, fastafile), tabfile, cmd) - - infofile = "{0}/index.info".format(pf) - cmd = "cp {0} {1}".format(newbedfile, infofile) - mm.add(trfbed, infofile, cmd) - mm.write() - - -if __name__ == "__main__": - main() From a4c347f2489ae60053fd431544f9fb8169fc4aec Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 12:43:01 +1100 Subject: [PATCH 31/43] optparse not used --- src/jcvi/apps/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/jcvi/apps/base.py b/src/jcvi/apps/base.py index ba9c761f..80b9e658 100644 --- a/src/jcvi/apps/base.py +++ b/src/jcvi/apps/base.py @@ -29,6 +29,9 @@ from typing import Any, Collection, List, Optional, Tuple, Union from urllib.parse import urlencode +#from optparse import OptionParser as OptionP, OptionGroup, SUPPRESS_HELP + + from natsort import natsorted from rich.console import Console from rich.logging import RichHandler From 067553ff873e188bbc1e5a8c9256670bb638fea6 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 12:46:17 +1100 Subject: [PATCH 32/43] use logger instead of logging --- src/jcvi/graphics/base.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/jcvi/graphics/base.py b/src/jcvi/graphics/base.py index 6b40aa45..62113be5 100644 --- a/src/jcvi/graphics/base.py +++ b/src/jcvi/graphics/base.py @@ -322,16 +322,39 @@ def update_figname(figname: str, format: str) -> str: return figname + "." + format -def savefig(figname, dpi=150, iopts=None, cleanup=True): +def update_figname(figname: str, format: str) -> str: + """Update the name of a figure to include the format. + + Args: + figname (str): Path to the figure + format (str): Figure format, must be one of GRAPHIC_FORMATS + + Returns: + str: New file path + """ + _, ext = op.splitext(figname) + if ext.strip(".") in GRAPHIC_FORMATS: # User suffix has precedence + return figname + # When the user has not supplied a format in the filename, use the requested format + assert format in GRAPHIC_FORMATS, "Invalid format" + return figname + "." + format + + +def savefig(figname, dpi=150, iopts=None, cleanup=True, transparent=False): try: format = figname.rsplit(".", 1)[-1].lower() except: format = "pdf" try: - logger.debug("Matplotlib backend is: %s", mpl.get_backend()) - plt.savefig(figname, dpi=dpi, format=format) + logger.debug(f"Matplotlib backend is: {mpl.get_backend()}") + logger.debug(f"Attempting save as: {figname}") + plt.savefig(figname, dpi=dpi, format=format, transparent=transparent) except Exception as e: - logger.error("savefig failed with message:\n%s", e) + message = "savefig failed with message:" + message += "\n{0}".format(str(e)) + logger.error(message) + logger.debug(f"Matplotlib backend is: {mpl.get_backend()}") + logger.debug(f"Attempted save as: {format}") logger.info("Try running again with --notex option to disable latex.") if op.exists(figname): if op.getsize(figname) < 1000: From e92fa50b1d11318a4463b05011e71eea30e97e20 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 13:03:53 +1100 Subject: [PATCH 33/43] fix conda env to py 3.12 --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 5ecec614..f47d16c7 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge - bioconda dependencies: - - python >=3.12 + - python 3.12 - bedtools - imagemagick - libmagic # System-level magic library From c17ed8e3913e121de316d315f4fc7d93e0313af1 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 13:04:31 +1100 Subject: [PATCH 34/43] init pytest action --- .github/workflows/pytest.yml | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 .github/workflows/pytest.yml diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 00000000..ce1514fd --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,40 @@ +name: Python Tests + +on: [pull_request] + +jobs: + test: + runs-on: ${{ matrix.os }} + + strategy: + max-parallel: 4 + matrix: + os: [ubuntu-latest, macos-latest] + python-version: ["3.8", "3.12"] + + steps: + # Checkout the latest commit associated with the PR + - uses: actions/checkout@v4 + + - name: Debug matrix value + run: echo "Python version is ${{ matrix.python-version }}" + + # Set up Miniconda + - name: Set up Miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true # Optional: update Conda to the latest version + python-version: ${{ matrix.python-version }} + + # Install any additional dependencies not included in the pyproject.toml file + - name: Install additional dependencies + run: | + conda install -y -c conda-forge -c bioconda bedtools imagemagick libmagic wand + pip install '.[tests]' # Install all dependencies, including test-specific ones + shell: bash -l {0} + + # Run pytest on the specified directory + - name: Run tests + run: | + pytest -p no:warnings tests + shell: bash -l {0} From b2596424b934ada948878511560c5ffa8c03260b Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 13:07:53 +1100 Subject: [PATCH 35/43] black fmt --- src/jcvi/apps/base.py | 2 +- src/jcvi/apps/biomart.py | 2 +- src/jcvi/apps/phylo.py | 12 ++++++------ src/jcvi/compara/synteny.py | 2 +- src/jcvi/formats/blast.py | 1 + src/jcvi/graphics/glyph.py | 14 +++++++------- src/jcvi/graphics/ribbon.py | 12 +++++++----- src/jcvi/projects/allmaps.py | 2 +- 8 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/jcvi/apps/base.py b/src/jcvi/apps/base.py index 80b9e658..2dcbe53b 100644 --- a/src/jcvi/apps/base.py +++ b/src/jcvi/apps/base.py @@ -29,7 +29,7 @@ from typing import Any, Collection, List, Optional, Tuple, Union from urllib.parse import urlencode -#from optparse import OptionParser as OptionP, OptionGroup, SUPPRESS_HELP +# from optparse import OptionParser as OptionP, OptionGroup, SUPPRESS_HELP from natsort import natsorted diff --git a/src/jcvi/apps/biomart.py b/src/jcvi/apps/biomart.py index d5f9dcac..39ff2cb0 100644 --- a/src/jcvi/apps/biomart.py +++ b/src/jcvi/apps/biomart.py @@ -133,7 +133,7 @@ def __init__( port="80", name="ensembl", virtual_schema="default", - **attrib + **attrib, ): self.__dict__ = attrib.copy() diff --git a/src/jcvi/apps/phylo.py b/src/jcvi/apps/phylo.py index e93485e7..15fd2564 100644 --- a/src/jcvi/apps/phylo.py +++ b/src/jcvi/apps/phylo.py @@ -106,7 +106,7 @@ def __init__( outtreefile, command=FPHYLIP_BIN("ffitch"), intreefile=None, - **kwargs + **kwargs, ): self.datafile = datafile @@ -144,7 +144,7 @@ def __init__( a_ext, command=TREEFIX_BIN("treefix"), r=False, - **kwargs + **kwargs, ): self.input = input @@ -175,7 +175,7 @@ def run_treefix( a_ext=".fasta", o_ext=".dnd", n_ext=".treefix.dnd", - **kwargs + **kwargs, ): """ get the ML tree closest to the species tree @@ -187,7 +187,7 @@ def run_treefix( a_ext=a_ext, o=o_ext, n=n_ext, - **kwargs + **kwargs, ) outtreefile = input.rsplit(o_ext, 1)[0] + n_ext print("TreeFix:", cl, file=sys.stderr) @@ -435,7 +435,7 @@ def build_ml_raxml(alignment, outfile, work_dir=".", **kwargs): num_replicates=100, name="aln", working_dir=raxml_work, - **kwargs + **kwargs, ) logger.debug("Building ML tree using RAxML: %s" % raxml_cl) @@ -1048,7 +1048,7 @@ def _draw_trees( reroot=False, supportcolor="r", SH=SH, - **kwargs + **kwargs, ) root.set_xlim(0, 1) diff --git a/src/jcvi/compara/synteny.py b/src/jcvi/compara/synteny.py index 190b5b36..2e51711c 100755 --- a/src/jcvi/compara/synteny.py +++ b/src/jcvi/compara/synteny.py @@ -41,7 +41,7 @@ def __init__(self, filename, defaultcolor="#fb8072", header=False): data = [] highlight = [] for row in fp: - if row[0] == '#': + if row[0] == "#": continue hl = "*" in row # r* highlights the block in red color diff --git a/src/jcvi/formats/blast.py b/src/jcvi/formats/blast.py index 54b2ede0..ab632eec 100644 --- a/src/jcvi/formats/blast.py +++ b/src/jcvi/formats/blast.py @@ -26,6 +26,7 @@ except ImportError as e: logger.error(f"Failed to import cblast: {e}") from .pyblast import BlastLine + logger.warning("Fall back to Python implementation of BlastLine") diff --git a/src/jcvi/graphics/glyph.py b/src/jcvi/graphics/glyph.py index d6de01a1..43a21b95 100644 --- a/src/jcvi/graphics/glyph.py +++ b/src/jcvi/graphics/glyph.py @@ -76,7 +76,7 @@ def __init__(self, ax, x1, x2, t, lw=0, fill=False, fc="lavender", **kwargs): t, ha="center", bbox=dict(boxstyle="round", fill=fill, fc=fc, lw=lw), - **kwargs + **kwargs, ) @@ -167,7 +167,7 @@ def __init__( size=12, zorder=4, fontweight="bold", - **kwargs + **kwargs, ): width, height = get_asymmetry(ax, radius) circle = Ellipse((x, y), width, height, fc=fc, ec=fc, zorder=zorder, **kwargs) @@ -182,7 +182,7 @@ def __init__( size=size, zorder=zorder + 1, fontweight=fontweight, - **kwargs + **kwargs, ) @@ -273,7 +273,7 @@ def __init__( ec="gainsboro", lw=0, style="box", - **kwargs + **kwargs, ): """Draw a region that represent an interval feature, e.g. gene or repeat @@ -302,7 +302,7 @@ def __init__( fc=fc, ec=ec, lw=lw, - **kwargs + **kwargs, ) else: patch = Rectangle(p1, width, height, fc=fc, ec=ec, lw=lw, **kwargs) @@ -320,7 +320,7 @@ def __init__( fc="w", lw=0, alpha=0.1, - **kwargs + **kwargs, ) ) @@ -357,7 +357,7 @@ def __init__( tip=0.0025, color="k", shadow=False, - **kwargs + **kwargs, ): super().__init__(ax) # Figure out the polygon vertices first diff --git a/src/jcvi/graphics/ribbon.py b/src/jcvi/graphics/ribbon.py index b21a78e8..42330cb2 100644 --- a/src/jcvi/graphics/ribbon.py +++ b/src/jcvi/graphics/ribbon.py @@ -21,11 +21,12 @@ import numpy as np import sys -#from jcvi.formats.base import DictFile +# from jcvi.formats.base import DictFile from jcvi.apps.base import OptionParser, logger from jcvi.compara.synteny import BlockFile from jcvi.formats.bed import Bed -from jcvi.graphics.base import (AbstractLayout, +from jcvi.graphics.base import ( + AbstractLayout, markup, mpl, Path, @@ -33,7 +34,7 @@ plt, savefig, ) -from jcvi.graphics.glyph import Glyph, RoundLabel #, GeneGlyph +from jcvi.graphics.glyph import Glyph, RoundLabel # , GeneGlyph from jcvi.utils.cbook import human_size @@ -223,6 +224,7 @@ def __init__( def cv(t): return xstart + abs(t - startbp) / scale + hidden = layout.hidden # Plot Chromosome Bar @@ -368,7 +370,7 @@ def cv(t): loc_label, color="lightslategrey", size=10, - **kwargs + **kwargs, ) else: ax.text(lx, ly, chr_label, color=layout.color, **kwargs) @@ -817,7 +819,7 @@ def main(): # Check for data files if len(args) != 3: - logger.error('Requires 3 data file args.') + logger.error("Requires 3 data file args.") sys.exit(not p.print_help()) # Unpack data file paths diff --git a/src/jcvi/projects/allmaps.py b/src/jcvi/projects/allmaps.py index edf714c3..a67d8067 100644 --- a/src/jcvi/projects/allmaps.py +++ b/src/jcvi/projects/allmaps.py @@ -256,7 +256,7 @@ def estimategaps(args): sum(markers) / 2, ypos + pad, "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", - **fontprop + **fontprop, ) ypos = y - pad From 0fe133634578cb0574f2fd10f620e76617eb552b Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 13:32:59 +1100 Subject: [PATCH 36/43] use miniforge --- .github/workflows/pytest.yml | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index ce1514fd..57a9af90 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -16,25 +16,26 @@ jobs: # Checkout the latest commit associated with the PR - uses: actions/checkout@v4 - - name: Debug matrix value - run: echo "Python version is ${{ matrix.python-version }}" - - # Set up Miniconda - - name: Set up Miniconda - uses: conda-incubator/setup-miniconda@v2 + # Set up Miniforge + - name: Set up Miniforge Python ${{ matrix.python-version }} + uses: conda-incubator/setup-miniconda@v3 with: - auto-update-conda: true # Optional: update Conda to the latest version + miniforge-version: "latest" python-version: ${{ matrix.python-version }} + auto-activate-base: true + channels: conda-forge,bioconda # Install any additional dependencies not included in the pyproject.toml file - name: Install additional dependencies + shell: bash -l {0} run: | conda install -y -c conda-forge -c bioconda bedtools imagemagick libmagic wand pip install '.[tests]' # Install all dependencies, including test-specific ones - shell: bash -l {0} + # Run pytest on the specified directory - name: Run tests + shell: bash -l {0} run: | - pytest -p no:warnings tests - shell: bash -l {0} + pytest -p no:warnings tests/ + From c224ef46b491d43459d50ca64f6e860149001c50 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 13:45:29 +1100 Subject: [PATCH 37/43] switch run order --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 57a9af90..6e835903 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -9,7 +9,7 @@ jobs: strategy: max-parallel: 4 matrix: - os: [ubuntu-latest, macos-latest] + os: [macos-latest, ubuntu-latest] python-version: ["3.8", "3.12"] steps: From b7c6810c8103e37b7297ac6e617223cd0c163f4c Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 13:52:42 +1100 Subject: [PATCH 38/43] handle testing in build action --- .github/workflows/build.yml | 5 ++--- .github/workflows/pytest.yml | 41 ------------------------------------ 2 files changed, 2 insertions(+), 44 deletions(-) delete mode 100644 .github/workflows/pytest.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2cab74ba..65af5d3e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,7 +9,7 @@ jobs: max-parallel: 4 matrix: os: [ubuntu-latest, macos-latest] - python-version: ["3.8", "3.10"] + python-version: ["3.8", "3.10", "3.12"] steps: - uses: actions/checkout@v4 @@ -31,9 +31,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -e . + pip install -e '.[tests]' - name: Test with pytest run: | - pip install PyYAML pytest pytest-cov pytest-benchmark mock pytest --cov=jcvi tests diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml deleted file mode 100644 index 6e835903..00000000 --- a/.github/workflows/pytest.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: Python Tests - -on: [pull_request] - -jobs: - test: - runs-on: ${{ matrix.os }} - - strategy: - max-parallel: 4 - matrix: - os: [macos-latest, ubuntu-latest] - python-version: ["3.8", "3.12"] - - steps: - # Checkout the latest commit associated with the PR - - uses: actions/checkout@v4 - - # Set up Miniforge - - name: Set up Miniforge Python ${{ matrix.python-version }} - uses: conda-incubator/setup-miniconda@v3 - with: - miniforge-version: "latest" - python-version: ${{ matrix.python-version }} - auto-activate-base: true - channels: conda-forge,bioconda - - # Install any additional dependencies not included in the pyproject.toml file - - name: Install additional dependencies - shell: bash -l {0} - run: | - conda install -y -c conda-forge -c bioconda bedtools imagemagick libmagic wand - pip install '.[tests]' # Install all dependencies, including test-specific ones - - - # Run pytest on the specified directory - - name: Run tests - shell: bash -l {0} - run: | - pytest -p no:warnings tests/ - From 7b3b890ca190e33db14a6c4cfb1946d56fa4ffb3 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 14:04:55 +1100 Subject: [PATCH 39/43] black fmt --- build.py | 3 ++- setup.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/build.py b/build.py index 1bcaac52..ea014d38 100644 --- a/build.py +++ b/build.py @@ -1,8 +1,9 @@ import subprocess from hatchling.builders.hooks.plugin.interface import BuildHookInterface + class CustomBuildHook(BuildHookInterface): def initialize(self, version, build_data): # Run setup.py build_ext before main build subprocess.check_call(["python", "setup.py", "build_ext", "--inplace"]) - return super().initialize(version, build_data) \ No newline at end of file + return super().initialize(version, build_data) diff --git a/setup.py b/setup.py index ceb375e8..5fe88135 100644 --- a/setup.py +++ b/setup.py @@ -14,9 +14,9 @@ extra_compile_args=["-O3"], ), Extension( - "jcvi.formats.cblast", - ["src/jcvi/formats/cblast.pyx"], - extra_compile_args=["-O3"] + "jcvi.formats.cblast", + ["src/jcvi/formats/cblast.pyx"], + extra_compile_args=["-O3"], ), ] From a8927ec2819d34d8411f8a44b9df7da1f41366ca Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 14:33:42 +1100 Subject: [PATCH 40/43] Add cython to deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index f79b6d4a..2f9862f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "boto3", "brewer2mpl", "CrossMap", + "cython", "deap", "ete3", "ftpretty", From 429ee45ccdcb1aa1c954b829b26cb9b87d036b1d Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 15:11:05 +1100 Subject: [PATCH 41/43] typo --- src/jcvi/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jcvi/__init__.py b/src/jcvi/__init__.py index dbd33d8d..e03ec50e 100644 --- a/src/jcvi/__init__.py +++ b/src/jcvi/__init__.py @@ -15,7 +15,7 @@ from ._version import __version__ # noqa except ImportError as exc: # pragma: no cover raise ImportError( - "Failed to find (autogenerated) version.py. " + "Failed to find (autogenerated) _version.py. " "This might be because you are installing from GitHub's tarballs, " "use the PyPI ones." ) from exc From bba6f133ae778e3521431b8a3871e0498f78047d Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 15:11:33 +1100 Subject: [PATCH 42/43] Add cli entrypoint to check version --- pyproject.toml | 5 ++++- src/jcvi/cli.py | 12 ++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 src/jcvi/cli.py diff --git a/pyproject.toml b/pyproject.toml index 2f9862f9..a87b262b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,12 +79,15 @@ tests = [ [project.urls] homepage = "http://github.com/tanghaibao/jcvi" +# Command-line script entry point +[project.scripts] +jcvi = "jcvi.cli:main" + [tool.hatch.metadata] allow-direct-references = true [tool.hatch.build] packages = ["src/jcvi"] -#source = "src" [tool.hatch.version] source = "vcs" diff --git a/src/jcvi/cli.py b/src/jcvi/cli.py new file mode 100644 index 00000000..b55e091a --- /dev/null +++ b/src/jcvi/cli.py @@ -0,0 +1,12 @@ +# cli.py +import argparse +from . import __version__ + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--version', action='version', + version=f'%(prog)s {__version__}') + args = parser.parse_args() + +if __name__ == '__main__': + main() \ No newline at end of file From b72b2c5ecea787ba719f49af6a7c7e0b9b610e23 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sat, 4 Jan 2025 15:11:47 +1100 Subject: [PATCH 43/43] update readme --- README.md | 56 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index e2d200da..e6767f86 100644 --- a/README.md +++ b/README.md @@ -104,23 +104,25 @@ full-fledged applications. ## Dependencies -Following are a list of third-party python packages that are used by -some routines in the library. These dependencies are _not_ mandatory -since they are only used by a few modules. +JCVI requires Python3 between v3.8 and v3.12. -- [Biopython](http://www.biopython.org) -- [numpy](http://numpy.scipy.org) -- [matplotlib](http://matplotlib.org/) +A few modules may ask for locations of external programs, +if the executable cannot be found in your `PATH`. -There are other Python modules here and there in various scripts. The -best way is to install them via `pip install` when you see -`ImportError`. +The external programs that are often used are: + +- [Kent tools](http://hgdownload.cse.ucsc.edu/admin/jksrc.zip) +- [BEDTOOLS](http://code.google.com/p/bedtools/) +- [EMBOSS](http://emboss.sourceforge.net/) ## Installation -**Installing jcvi in a Conda environment:** +**Installing JCVI in a Conda environment:** + +You can create a Conda environment with Python 3.12 and basic dependencies for JCVI using the YAML files in this repo. + +If you are new to Conda, we recommend the [Miniforge](https://conda-forge.org/download/) distribution. -You can create a Python 3.12 environment with basic dependencies for JCVI using the YAML files in this repo. ```bash conda env create -f environment.yml @@ -130,7 +132,7 @@ conda activate jcvi Note: If you are using a Mac with an ARM64 (Apple Silicon) processor, some dependencies are not currently available from Bioconda for this architecture. -You can instead create a virtual OSX64 env like this: +You can instead create a virtual OSX64 (intel) env like this: ```bash conda env create -f env_osx64.yml @@ -140,37 +142,39 @@ conda activate jcvi-osx64 After activating the Conda environment install JCVI using one of the following options. + **Installation options:** -pip install the latest development version directly from this repo. +1) Use pip to install the latest development version directly from this repo. -```console +```bash pip install git+git://github.com/tanghaibao/jcvi.git ``` -Install latest release from PyPi. +2) Install latest release from PyPi. -```console +```bash pip install jcvi ``` -Alternatively, if you want to install manually: +3) Alternatively, if you want to install in development mode. -```console -cd ~/code # or any directory of your choice +```bash git clone git://github.com/tanghaibao/jcvi.git && cd jcvi pip install -e '.[tests]' ``` -In addition, a few module might ask for locations of external programs, -if the extended cannot be found in your `PATH`. The external programs -that are often used are: +**Test Installation:** -- [Kent tools](http://hgdownload.cse.ucsc.edu/admin/jksrc.zip) -- [BEDTOOLS](http://code.google.com/p/bedtools/) -- [EMBOSS](http://emboss.sourceforge.net/) +If installed successfully, you can check the version with: + +```bash +jcvi --version +``` + +Use `python -m` to call any of the modules installed with JCVI. -Most of the scripts in this package contains multiple actions. To use +Most of the modules in this package contains multiple actions. To use the `fasta` example: ```console