Skip to content

Commit

Permalink
Merge pull request #120 from pachterlab/devel
Browse files Browse the repository at this point in the history
merge devel into master
  • Loading branch information
Lioscro authored Jun 2, 2021
2 parents 67f3edc + 8c2baea commit 3cea79d
Show file tree
Hide file tree
Showing 27 changed files with 773 additions and 2,158 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
repos:
- repo: https://github.com/pre-commit/mirrors-yapf
rev: v0.29.0
rev: v0.30.0
hooks:
- id: yapf
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
- repo: https://github.com/pycqa/flake8
rev: 3.9.0
hooks:
- id: flake8
2 changes: 1 addition & 1 deletion Makefile
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ check:
yapf -r --diff kb_python && echo OK

build:
python setup.py sdist bdist_wheel
python setup.py sdist

docs:
sphinx-build -a docs docs/_build
Expand Down
129 changes: 19 additions & 110 deletions kb_python/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
import shutil
from collections import namedtuple

import ngs_tools as ngs

PACKAGE_PATH = os.path.abspath(os.path.dirname(__file__))
PLATFORM = platform.system().lower()
BINS_DIR = 'bins'

TEMP_DIR = 'tmp'
DRY = False
VALIDATE = True
CHUNK_SIZE = 1024 * 1024 * 4 # Download files in chunks of 4 Mb


def get_provided_kallisto_path():
Expand Down Expand Up @@ -50,128 +51,36 @@ def get_provided_bustools_path():
BUSTOOLS_PATH = get_provided_bustools_path()

# Technology to file position mapping
Technology = namedtuple(
'Technology', [
'name', 'description', 'nfiles', 'reads_file', 'umi_positions',
'barcode_positions', 'whitelist_archive', 'map_archive'
]
)
WHITELIST_DIR = 'whitelists'
MAP_DIR = 'maps'
Technology = namedtuple('Technology', ['name', 'description', 'chemistry'])
TECHNOLOGIES = [
Technology('10XV1', '10x version 1', ngs.chemistry.get_chemistry('10xv1')),
Technology('10XV2', '10x version 2', ngs.chemistry.get_chemistry('10xv2')),
Technology('10XV3', '10x version 3', ngs.chemistry.get_chemistry('10xv3')),
Technology('CELSEQ', 'CEL-Seq', ngs.chemistry.get_chemistry('celseq')),
Technology(
'10XV1',
'10x version 1',
3,
2,
[(1, 0, 10)],
[(0, 0, 14)],
'10xv1_whitelist.txt.gz',
None,
),
Technology(
'10XV2',
'10x version 2',
2,
1,
[(0, 16, 26)],
[(0, 0, 16)],
'10xv2_whitelist.txt.gz',
None,
),
Technology(
'10XV3',
'10x version 3',
2,
1,
[(0, 16, 28)],
[(0, 0, 16)],
'10xv3_whitelist.txt.gz',
'10xv3_feature_barcode_map.txt.gz',
),
Technology(
'CELSEQ',
'CEL-Seq',
2,
1,
[(0, 8, 12)],
[(0, 0, 8)],
None,
None,
'CELSEQ2', 'CEL-SEQ version 2', ngs.chemistry.get_chemistry('celseq2')
),
Technology('DROPSEQ', 'DropSeq', ngs.chemistry.get_chemistry('dropseq')),
Technology(
'CELSEQ2',
'CEL-SEQ version 2',
2,
1,
[(0, 0, 6)],
[(0, 6, 12)],
None,
None,
'INDROPSV1', 'inDrops version 1',
ngs.chemistry.get_chemistry('indropsv1')
),
Technology(
'DROPSEQ',
'DropSeq',
2,
1,
[(0, 12, 20)],
[(0, 0, 12)],
None,
None,
'INDROPSV2', 'inDrops version 2',
ngs.chemistry.get_chemistry('indropsv2')
),
Technology(
'INDROPSV1',
'inDrops version 1',
2,
1,
[(0, 42, 48)],
[(0, 0, 11), (0, 30, 38)],
None,
None,
'INDROPSV3', 'inDrops version 3',
ngs.chemistry.get_chemistry('indropsv3')
),
Technology('SCRUBSEQ', 'SCRB-Seq', ngs.chemistry.get_chemistry('scrbseq')),
Technology(
'INDROPSV2',
'inDrops version 2',
2,
0,
[(1, 42, 48)],
[(1, 0, 11), (1, 30, 38)],
None,
None,
'SURECELL', 'SureCell for ddSEQ',
ngs.chemistry.get_chemistry('surecell')
),
Technology(
'INDROPSV3',
'inDrops version 3',
3,
2,
[(1, 8, 14)],
[(0, 0, 8), (1, 0, 8)],
'inDropsv3_whitelist.txt.gz',
None,
'SMARTSEQ', 'Smart-seq2', ngs.chemistry.get_chemistry('smartseq2')
),
Technology(
'SCRUBSEQ',
'SCRB-Seq',
2,
1,
[(0, 6, 16)],
[(0, 0, 6)],
None,
None,
),
Technology(
'SURECELL',
'SureCell for ddSEQ',
2,
1,
[(0, 51, 59)],
[(0, 0, 6), (0, 21, 27), (0, 42, 48)],
None,
None,
),
Technology(
'SMARTSEQ', 'Smart-seq2', 2, '0, 1 (paired)', [], [], None, None
)
]
TECHNOLOGIES_MAPPING = {t.name: t for t in TECHNOLOGIES}

Expand Down
7 changes: 4 additions & 3 deletions kb_python/count.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import json
import logging
import os
import re
from urllib.parse import urlparse
Expand Down Expand Up @@ -45,6 +44,7 @@
UNFILTERED_COUNTS_DIR,
WHITELIST_FILENAME,
)
from .logging import logger
from .report import render_report
from .utils import (
copy_map,
Expand All @@ -66,8 +66,6 @@
from .stats import STATS
from .validate import validate_files

logger = logging.getLogger(__name__)

INSPECT_PARSER = re.compile(r'^.*?(?P<count>[0-9]+)')


Expand Down Expand Up @@ -1014,6 +1012,7 @@ def convert_transcripts_to_genes(txnames_path, t2g_path, genes_path):
return genes_path


@logger.namespaced('count')
def count(
index_paths,
t2g_path,
Expand Down Expand Up @@ -1302,6 +1301,7 @@ def count(
return results


@logger.namespaced('count_smartseq')
def count_smartseq(
index_paths,
t2g_path,
Expand Down Expand Up @@ -1397,6 +1397,7 @@ def count_smartseq(
return results


@logger.namespaced('count_lamanno')
def count_velocity(
index_paths,
t2g_path,
Expand Down
13 changes: 4 additions & 9 deletions kb_python/dry/utils.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@
import tempfile

from ..config import (
MAP_DIR,
PACKAGE_PATH,
PLATFORM,
TECHNOLOGIES_MAPPING,
WHITELIST_DIR,
UnsupportedOSException,
)

Expand Down Expand Up @@ -66,12 +63,10 @@ def copy_whitelist(technology, out_dir):
"""Dry version of `utils.copy_whitelist`.
"""
technology = TECHNOLOGIES_MAPPING[technology.upper()]
archive_path = os.path.join(
PACKAGE_PATH, WHITELIST_DIR, technology.whitelist_archive
)
archive_path = technology.chemistry.whitelist_path
whitelist_path = os.path.join(
out_dir,
os.path.splitext(technology.whitelist_archive)[0]
os.path.splitext(os.path.basename(archive_path))[0]
)
print('gzip -dc {} > {}'.format(archive_path, whitelist_path))
return whitelist_path
Expand All @@ -81,10 +76,10 @@ def copy_map(technology, out_dir):
"""Dry version of `utils.copy_map`.
"""
technology = TECHNOLOGIES_MAPPING[technology.upper()]
archive_path = os.path.join(PACKAGE_PATH, MAP_DIR, technology.map_archive)
archive_path = technology.chemistry.feature_map_path
map_path = os.path.join(
out_dir,
os.path.splitext(technology.map_archive)[0]
os.path.splitext(os.path.basename(archive_path))[0]
)
print('gzip -dc {} > {}'.format(archive_path, map_path))
return map_path
Expand Down
Loading

0 comments on commit 3cea79d

Please sign in to comment.