Skip to content

Commit

Permalink
Merge pull request #115 from pachterlab/devel
Browse files Browse the repository at this point in the history
Merge devel into master
  • Loading branch information
Lioscro authored Apr 12, 2021
2 parents b8996b2 + 7f4ae6c commit 91e1833
Show file tree
Hide file tree
Showing 16 changed files with 365 additions and 907,697 deletions.
122 changes: 94 additions & 28 deletions kb_python/config.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os
import platform
import shutil
from collections import namedtuple

PACKAGE_PATH = os.path.dirname(__file__)
PACKAGE_PATH = os.path.abspath(os.path.dirname(__file__))
PLATFORM = platform.system().lower()
BINS_DIR = 'bins'

Expand All @@ -11,6 +12,43 @@
VALIDATE = True
CHUNK_SIZE = 1024 * 1024 * 4 # Download files in chunks of 4 Mb


def get_provided_kallisto_path():
"""Finds platform-dependent kallisto binary included with the installation.
:return: path to the binary, `None` if not found
:rtype: str
"""
bin_filename = 'kallisto.exe' if PLATFORM == 'windows' else 'kallisto'
path = os.path.join(
PACKAGE_PATH, BINS_DIR, PLATFORM, 'kallisto', bin_filename
)
if not os.path.isfile(path):
return None
return path


def get_provided_bustools_path():
"""Finds platform-dependent bustools binary included with the installation.
:return: path to the binary, `None` if not found
:rtype: str
"""
bin_filename = 'bustools.exe' if PLATFORM == 'windows' else 'bustools'
path = os.path.join(
PACKAGE_PATH, BINS_DIR, PLATFORM, 'bustools', bin_filename
)
if not os.path.isfile(path):
return None
return path


# Binary paths. These should hold the full path to the binaries that should
# be called throughout the execution of the program. Therefore, this
# usually needs to be set only once. Defaults to provided binaries.
KALLISTO_PATH = get_provided_kallisto_path()
BUSTOOLS_PATH = get_provided_bustools_path()

# Technology to file position mapping
Technology = namedtuple(
'Technology', [
Expand Down Expand Up @@ -174,40 +212,68 @@ class UnsupportedOSException(Exception):
pass


def get_kallisto_binary_path():
"""Get the path to the platform-dependent Kallisto binary included with
the installation.
class NotExecutableException(Exception):
pass

:return: path to the binary
:rtype: str

def get_kallisto_binary_path():
"""Dummy function that simply returns the current value of KALLISTO_PATH.
"""
bin_filename = 'kallisto.exe' if PLATFORM == 'windows' else 'kallisto'
path = os.path.join(
PACKAGE_PATH, BINS_DIR, PLATFORM, 'kallisto', bin_filename
)
if not os.path.exists(path):
raise UnsupportedOSException(
'This operating system ({}) is not supported.'.format(PLATFORM)
)
return path
return KALLISTO_PATH


def get_bustools_binary_path():
"""Get the path to the platform-dependent Bustools binary included with
the installation.
"""Dummy function that simply returns the current value of BUSTOOLS_PATH.
"""
return BUSTOOLS_PATH

:return: path to the binary
:rtype: str

def set_kallisto_binary_path(path):
"""Helper function to set the KALLISTO_PATH variable. Automatically finds the
full path to the executable and sets that as KALLISTO_PATH.
"""
bin_filename = 'bustools.exe' if PLATFORM == 'windows' else 'bustools'
path = os.path.join(
PACKAGE_PATH, BINS_DIR, PLATFORM, 'bustools', bin_filename
)
if not os.path.exists(path):
raise UnsupportedOSException(
'This operating system ({}) is not supported.'.format(PLATFORM)
)
return path
global KALLISTO_PATH

shutil_path = shutil.which(path)
actual_path = None

# First, check if it is an executable in the user's PATH
if shutil_path:
actual_path = os.path.abspath(shutil_path)
elif os.path.isfile(path):
actual_path = os.path.abspath(path)
else:
raise Exception(f'Unable to resolve path {path}')

# Check that it is executable
if not os.access(actual_path, os.X_OK):
raise NotExecutableException(f'{actual_path} is not executable')

KALLISTO_PATH = actual_path


def set_bustools_binary_path(path):
"""Helper function to set the BUSTOOLS_PATH variable. Automatically finds the
full path to the executable and sets that as BUSTOOLS_PATH.
"""
global BUSTOOLS_PATH

shutil_path = shutil.which(path)
actual_path = None

# First, check if it is an executable in the user's PATH
if shutil_path:
actual_path = os.path.abspath(shutil_path)
elif os.path.isfile(path):
actual_path = os.path.abspath(path)
else:
raise Exception(f'Unable to resolve path {path}')

# Check that it is executable
if not os.access(actual_path, os.X_OK):
raise NotExecutableException(f'{actual_path} is not executable')

BUSTOOLS_PATH = actual_path


def set_dry():
Expand Down
6 changes: 4 additions & 2 deletions kb_python/gtf.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class GTF:
(?P<group>.*) # groups
''', re.VERBOSE
)
GROUP_PARSER = re.compile(r'(?P<key>\S+?)\s*"(?P<value>.+?)"')
GROUP_PARSER = re.compile(r'(?P<key>\S+?)\s*"(?P<value>.+?)";?')

def __init__(self, gtf_path):
self.gtf_path = gtf_path
Expand All @@ -46,7 +46,9 @@ def parse_entry(line):
groupdict['start'] = int(groupdict['start'])
groupdict['end'] = int(groupdict['end'])
groupdict['group'] = dict(
GTF.GROUP_PARSER.findall(groupdict.get('group', ''))
GTF.GROUP_PARSER.findall(
groupdict.get('group', '').replace(' ', '')
)
)
if not groupdict['group']:
logger.warning(
Expand Down
89 changes: 59 additions & 30 deletions kb_python/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
PACKAGE_PATH,
REFERENCES_MAPPING,
set_dry,
set_bustools_binary_path,
set_kallisto_binary_path,
TECHNOLOGIES,
TEMP_DIR,
)
Expand Down Expand Up @@ -136,30 +138,39 @@ def parse_ref(parser, args, temp_dir='tmp'):
overwrite=args.overwrite,
temp_dir=temp_dir
)
elif args.workflow == 'kite':
ref_kite(
args.feature,
args.f1,
args.i,
args.g,
n=args.n,
k=args.k,
no_mismatches=args.no_mismatches,
overwrite=args.overwrite,
temp_dir=temp_dir
)
else:
ref(
args.fasta,
args.gtf,
args.f1,
args.i,
args.g,
n=args.n,
k=args.k,
overwrite=args.overwrite,
temp_dir=temp_dir
)
# Report extraneous options
velocity_only = ['f2', 'c1', 'c2', 'flank']
for arg in velocity_only:
if getattr(args, arg):
parser.error(
f'Option `{arg}` is not supported for workflow `{args.workflow}`'
)

if args.workflow == 'kite':
ref_kite(
args.feature,
args.f1,
args.i,
args.g,
n=args.n,
k=args.k,
no_mismatches=args.no_mismatches,
overwrite=args.overwrite,
temp_dir=temp_dir
)
else:
ref(
args.fasta,
args.gtf,
args.f1,
args.i,
args.g,
n=args.n,
k=args.k,
overwrite=args.overwrite,
temp_dir=temp_dir
)


def parse_count(parser, args, temp_dir='tmp'):
Expand Down Expand Up @@ -273,7 +284,7 @@ def parse_count(parser, args, temp_dir='tmp'):
fastq_1, fastq_2 = fastqs[i], (
fastqs[i + 1] if i + 1 < len(fastqs) else ''
)
cells[cell_id] = (fastq_2, fastq_2)
cells[cell_id] = (fastq_1, fastq_2)
logger.info('Found the following FASTQs:')
fastq_pairs = []
cell_ids = []
Expand Down Expand Up @@ -717,6 +728,10 @@ def setup_count_args(parser, parent):
def main():
"""Command-line entrypoint.
"""
# Get prepackaged kallisto and bustools paths.
kallisto_path = get_kallisto_binary_path()
bustools_path = get_bustools_binary_path()

# Main parser
parser = argparse.ArgumentParser(
description='kb_python {}'.format(__version__)
Expand Down Expand Up @@ -748,6 +763,18 @@ def main():
parent.add_argument(
'--verbose', help='Print debugging information', action='store_true'
)
parent.add_argument(
'--kallisto',
help=f'Path to kallisto binary to use (default: {kallisto_path})',
type=str,
default=kallisto_path
)
parent.add_argument(
'--bustools',
help=f'Path to bustools binary to use (default: {bustools_path})',
type=str,
default=bustools_path
)

# Command parsers
setup_info_args(subparsers, argparse.ArgumentParser(add_help=False))
Expand Down Expand Up @@ -819,12 +846,14 @@ def main():
))

logger.debug('Printing verbose output')
logger.debug(
'kallisto binary located at {}'.format(get_kallisto_binary_path())
)
logger.debug(
'bustools binary located at {}'.format(get_bustools_binary_path())
)

# Set binary paths
set_kallisto_binary_path(args.kallisto)
set_bustools_binary_path(args.bustools)

logger.debug(f'kallisto binary located at {get_kallisto_binary_path()}')
logger.debug(f'bustools binary located at {get_bustools_binary_path()}')

temp_dir = args.tmp or os.path.join(
args.o, TEMP_DIR
) if 'o' in args else TEMP_DIR
Expand Down
31 changes: 26 additions & 5 deletions kb_python/stats.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import datetime as dt
import json
import os
import sys

from . import __version__
from .config import is_dry
from .config import (
get_bustools_binary_path,
get_kallisto_binary_path,
is_dry,
)
from .dry import dummy_function
from .dry import dryable

Expand All @@ -13,8 +18,9 @@ class Stats:
"""

def __init__(self):
self.kallisto_version = None
self.bustools_version = None
self.workdir = None
self.kallisto = None
self.bustools = None
self.start_time = None
self.call = None
self.commands = []
Expand All @@ -26,12 +32,24 @@ def __init__(self):
def start(self):
"""Start collecting statistics.
Sets start time, the command line call,
and the commands array to an empty list.
Sets start time, the command line call, and the commands array to an empty list.
Additionally, sets the kallisto and bustools paths and versions.
"""
self.start_time = dt.datetime.now()
self.call = ' '.join(sys.argv)
self.commands = []
self.workdir = os.getcwd()

# Import here to prevent circular imports
from .utils import get_bustools_version, get_kallisto_version
self.kallisto = {
'path': get_kallisto_binary_path(),
'version': '.'.join(str(i) for i in get_kallisto_version())
}
self.bustools = {
'path': get_bustools_binary_path(),
'version': '.'.join(str(i) for i in get_bustools_version())
}

def command(self, command, runtime=None):
"""Report a shell command was run.
Expand Down Expand Up @@ -71,7 +89,10 @@ def to_dict(self):
by the report-rendering functions.
"""
return {
'workdir': self.workdir,
'version': self.version,
'kallisto': self.kallisto,
'bustools': self.bustools,
'start_time': self.start_time.isoformat(),
'end_time': self.end_time.isoformat(),
'elapsed': self.elapsed,
Expand Down
Loading

0 comments on commit 91e1833

Please sign in to comment.