diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
old mode 100644
new mode 100755
index b5214e7..c49316c
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,9 +1,9 @@
 repos:
   - repo: https://github.com/pre-commit/mirrors-yapf
-    rev: v0.29.0
+    rev: v0.30.0
     hooks:
       - id: yapf
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v2.3.0
+  - repo: https://github.com/pycqa/flake8
+    rev: 3.9.0
     hooks:
       - id: flake8
diff --git a/Makefile b/Makefile
old mode 100644
new mode 100755
index f5e801c..dc7cb6b
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ check:
 	yapf -r --diff kb_python && echo OK
 
 build:
-	python setup.py sdist bdist_wheel
+	python setup.py sdist
 
 docs:
 	sphinx-build -a docs docs/_build
diff --git a/kb_python/config.py b/kb_python/config.py
index 535f3b5..d858657 100755
--- a/kb_python/config.py
+++ b/kb_python/config.py
@@ -3,6 +3,8 @@
 import shutil
 from collections import namedtuple
 
+import ngs_tools as ngs
+
 PACKAGE_PATH = os.path.abspath(os.path.dirname(__file__))
 PLATFORM = platform.system().lower()
 BINS_DIR = 'bins'
@@ -10,7 +12,6 @@
 TEMP_DIR = 'tmp'
 DRY = False
 VALIDATE = True
-CHUNK_SIZE = 1024 * 1024 * 4  # Download files in chunks of 4 Mb
 
 
 def get_provided_kallisto_path():
@@ -50,128 +51,36 @@ def get_provided_bustools_path():
 BUSTOOLS_PATH = get_provided_bustools_path()
 
 # Technology to file position mapping
-Technology = namedtuple(
-    'Technology', [
-        'name', 'description', 'nfiles', 'reads_file', 'umi_positions',
-        'barcode_positions', 'whitelist_archive', 'map_archive'
-    ]
-)
-WHITELIST_DIR = 'whitelists'
-MAP_DIR = 'maps'
+Technology = namedtuple('Technology', ['name', 'description', 'chemistry'])
 TECHNOLOGIES = [
+    Technology('10XV1', '10x version 1', ngs.chemistry.get_chemistry('10xv1')),
+    Technology('10XV2', '10x version 2', ngs.chemistry.get_chemistry('10xv2')),
+    Technology('10XV3', '10x version 3', ngs.chemistry.get_chemistry('10xv3')),
+    Technology('CELSEQ', 'CEL-Seq', ngs.chemistry.get_chemistry('celseq')),
     Technology(
-        '10XV1',
-        '10x version 1',
-        3,
-        2,
-        [(1, 0, 10)],
-        [(0, 0, 14)],
-        '10xv1_whitelist.txt.gz',
-        None,
-    ),
-    Technology(
-        '10XV2',
-        '10x version 2',
-        2,
-        1,
-        [(0, 16, 26)],
-        [(0, 0, 16)],
-        '10xv2_whitelist.txt.gz',
-        None,
-    ),
-    Technology(
-        '10XV3',
-        '10x version 3',
-        2,
-        1,
-        [(0, 16, 28)],
-        [(0, 0, 16)],
-        '10xv3_whitelist.txt.gz',
-        '10xv3_feature_barcode_map.txt.gz',
-    ),
-    Technology(
-        'CELSEQ',
-        'CEL-Seq',
-        2,
-        1,
-        [(0, 8, 12)],
-        [(0, 0, 8)],
-        None,
-        None,
+        'CELSEQ2', 'CEL-SEQ version 2', ngs.chemistry.get_chemistry('celseq2')
     ),
+    Technology('DROPSEQ', 'DropSeq', ngs.chemistry.get_chemistry('dropseq')),
     Technology(
-        'CELSEQ2',
-        'CEL-SEQ version 2',
-        2,
-        1,
-        [(0, 0, 6)],
-        [(0, 6, 12)],
-        None,
-        None,
+        'INDROPSV1', 'inDrops version 1',
+        ngs.chemistry.get_chemistry('indropsv1')
     ),
     Technology(
-        'DROPSEQ',
-        'DropSeq',
-        2,
-        1,
-        [(0, 12, 20)],
-        [(0, 0, 12)],
-        None,
-        None,
+        'INDROPSV2', 'inDrops version 2',
+        ngs.chemistry.get_chemistry('indropsv2')
     ),
     Technology(
-        'INDROPSV1',
-        'inDrops version 1',
-        2,
-        1,
-        [(0, 42, 48)],
-        [(0, 0, 11), (0, 30, 38)],
-        None,
-        None,
+        'INDROPSV3', 'inDrops version 3',
+        ngs.chemistry.get_chemistry('indropsv3')
     ),
+    Technology('SCRUBSEQ', 'SCRB-Seq', ngs.chemistry.get_chemistry('scrbseq')),
     Technology(
-        'INDROPSV2',
-        'inDrops version 2',
-        2,
-        0,
-        [(1, 42, 48)],
-        [(1, 0, 11), (1, 30, 38)],
-        None,
-        None,
+        'SURECELL', 'SureCell for ddSEQ',
+        ngs.chemistry.get_chemistry('surecell')
     ),
     Technology(
-        'INDROPSV3',
-        'inDrops version 3',
-        3,
-        2,
-        [(1, 8, 14)],
-        [(0, 0, 8), (1, 0, 8)],
-        'inDropsv3_whitelist.txt.gz',
-        None,
+        'SMARTSEQ', 'Smart-seq2', ngs.chemistry.get_chemistry('smartseq2')
     ),
-    Technology(
-        'SCRUBSEQ',
-        'SCRB-Seq',
-        2,
-        1,
-        [(0, 6, 16)],
-        [(0, 0, 6)],
-        None,
-        None,
-    ),
-    Technology(
-        'SURECELL',
-        'SureCell for ddSEQ',
-        2,
-        1,
-        [(0, 51, 59)],
-        [(0, 0, 6), (0, 21, 27), (0, 42, 48)],
-        None,
-        None,
-    ),
-    Technology(
-        'SMARTSEQ', 'Smart-seq2', 2, '0, 1 (paired)', [], [], None, None
-    )
 ]
 TECHNOLOGIES_MAPPING = {t.name: t for t in TECHNOLOGIES}
 
diff --git a/kb_python/count.py b/kb_python/count.py
index 7c517d5..a3839fb 100755
--- a/kb_python/count.py
+++ b/kb_python/count.py
@@ -1,5 +1,4 @@
 import json
-import logging
 import os
 import re
 from urllib.parse import urlparse
@@ -45,6 +44,7 @@
     UNFILTERED_COUNTS_DIR,
     WHITELIST_FILENAME,
 )
+from .logging import logger
 from .report import render_report
 from .utils import (
     copy_map,
@@ -66,8 +66,6 @@
 from .stats import STATS
 from .validate import validate_files
 
-logger = logging.getLogger(__name__)
-
 INSPECT_PARSER = re.compile(r'^.*?(?P<count>[0-9]+)')
 
 
@@ -1014,6 +1012,7 @@ def convert_transcripts_to_genes(txnames_path, t2g_path, genes_path):
     return genes_path
 
 
+@logger.namespaced('count')
 def count(
     index_paths,
     t2g_path,
@@ -1302,6 +1301,7 @@ def count(
     return results
 
 
+@logger.namespaced('count_smartseq')
 def count_smartseq(
     index_paths,
     t2g_path,
@@ -1397,6 +1397,7 @@ def count_smartseq(
     return results
 
 
+@logger.namespaced('count_lamanno')
 def count_velocity(
     index_paths,
     t2g_path,
diff --git a/kb_python/dry/utils.py b/kb_python/dry/utils.py
old mode 100644
new mode 100755
index 6b6f620..44d5087
--- a/kb_python/dry/utils.py
+++ b/kb_python/dry/utils.py
@@ -2,11 +2,8 @@
 import tempfile
 
 from ..config import (
-    MAP_DIR,
-    PACKAGE_PATH,
     PLATFORM,
     TECHNOLOGIES_MAPPING,
-    WHITELIST_DIR,
     UnsupportedOSException,
 )
 
@@ -66,12 +63,10 @@ def copy_whitelist(technology, out_dir):
     """Dry version of `utils.copy_whitelist`.
     """
     technology = TECHNOLOGIES_MAPPING[technology.upper()]
-    archive_path = os.path.join(
-        PACKAGE_PATH, WHITELIST_DIR, technology.whitelist_archive
-    )
+    archive_path = technology.chemistry.whitelist_path
     whitelist_path = os.path.join(
         out_dir,
-        os.path.splitext(technology.whitelist_archive)[0]
+        os.path.splitext(os.path.basename(archive_path))[0]
     )
     print('gzip -dc {} > {}'.format(archive_path, whitelist_path))
     return whitelist_path
@@ -81,10 +76,10 @@ def copy_map(technology, out_dir):
     """Dry version of `utils.copy_map`.
     """
     technology = TECHNOLOGIES_MAPPING[technology.upper()]
-    archive_path = os.path.join(PACKAGE_PATH, MAP_DIR, technology.map_archive)
+    archive_path = technology.chemistry.feature_map_path
     map_path = os.path.join(
         out_dir,
-        os.path.splitext(technology.map_archive)[0]
+        os.path.splitext(os.path.basename(archive_path))[0]
     )
     print('gzip -dc {} > {}'.format(archive_path, map_path))
     return map_path
diff --git a/kb_python/fasta.py b/kb_python/fasta.py
deleted file mode 100755
index 39caf5d..0000000
--- a/kb_python/fasta.py
+++ /dev/null
@@ -1,669 +0,0 @@
-import itertools
-import logging
-import re
-
-import pandas as pd
-
-from .gtf import GTF
-from .utils import open_as_text
-
-logger = logging.getLogger(__name__)
-
-
-class FASTA:
-    """Utility class to easily read and manipulate FASTA files.
-
-    :param fasta_path: path to FASTA file
-    :type fasta_path: str
-    """
-    PARSER = re.compile(r'^>(?P<sequence_id>\S+)(?P<group>.*)')
-    GROUP_PARSER = re.compile(r'(?P<key>\S+?):(?P<value>\S+)')
-    COMPLEMENT = {
-        'A': 'T',
-        'C': 'G',
-        'G': 'C',
-        'T': 'A',
-        'Y': 'R',
-        'R': 'Y',
-        'W': 'W',
-        'S': 'S',
-        'K': 'M',
-        'M': 'K',
-        'D': 'H',
-        'V': 'B',
-        'H': 'D',
-        'B': 'V',
-        'N': 'N',
-    }
-    SEQUENCE_PARSER = re.compile(r'[^atcgATCG]')
-
-    def __init__(self, fasta_path):
-        self.fasta_path = fasta_path
-
-    @staticmethod
-    def make_header(seq_id, attributes):
-        """Create a correctly-formatted FASTA header with the given sequence ID
-        and attributes.
-
-        :param seq_id: sequence ID
-        :type seq_id: str
-        :param attributes: list of key-value pairs corresponding to attributes
-                           of this sequence
-        :type attributes: list
-
-        :return: FASTA header
-        :rtype: str
-        """
-        return '>{} {}'.format(
-            seq_id, ' '.join('{}:{}'.format(k, v) for k, v in attributes)
-        )
-
-    @staticmethod
-    def parse_header(line):
-        """Parse information from a FASTA header.
-
-        :param line: FASTA header line
-        :type line: str
-
-        :return: parsed information
-        :rtype: dict
-        """
-        match = FASTA.PARSER.match(line)
-        if match:
-            groupdict = match.groupdict()
-            groupdict['group'] = dict(
-                FASTA.GROUP_PARSER.findall(groupdict.get('group', ''))
-            )
-            return groupdict
-        return None
-
-    @staticmethod
-    def reverse_complement(sequence):
-        """Get the reverse complement of the given DNA sequence.
-
-        :param sequence: DNA sequence
-        :type sequence: str
-
-        :return: reverse complement
-        :rtype: str
-        """
-        return ''.join(FASTA.COMPLEMENT[b] for b in reversed(sequence.upper()))
-
-    def entries(self, parse=True):
-        """Generator that yields one FASTA entry (sequence ID + sequence) at a time.
-
-        :param parse: whether or not to parse the header into a dictionary,
-                      defaults to `True`
-        :type parse: bool, optional
-
-        :return: a generator that yields a tuple of the FASTA entry
-        :rtype: generator
-        """
-        with open_as_text(self.fasta_path, 'r') as f:
-            info = None
-            sequence = ''
-            for line in f:
-                if line.startswith('>'):
-                    if info:
-                        yield info, sequence
-                        sequence = ''
-
-                    info = FASTA.parse_header(line) if parse else line.strip()
-                else:
-                    sequence += line.strip()
-
-            if info:
-                yield info, sequence
-
-    def sort(self, out_path):
-        """Sort the FASTA file by sequence ID.
-
-        :param out_path: path to generate the sorted FASTA
-        :type out_path: str
-
-        :return: path to sorted FASTA file, set of chromosomes in FASTA file
-        :rtype: tuple
-        """
-        chromosomes = set()
-        to_sort = []
-        with open_as_text(self.fasta_path, 'r') as f:
-            position = 0
-            line = f.readline()
-
-            while line:
-                if line.startswith('>'):
-                    header = FASTA.parse_header(line)
-                    logger.debug(
-                        'Found FASTA entry {}'.format(header['sequence_id'])
-                    )
-                    to_sort.append([header['sequence_id'], position, None])
-                    chromosomes.add(header['sequence_id'])
-
-                position = f.tell()
-                line = f.readline()
-
-        logger.debug('Sorting {} FASTA entries'.format(len(to_sort)))
-        for i in range(len(to_sort) - 1):
-            to_sort[i][2] = to_sort[i + 1][1]
-        to_sort.sort()
-
-        logger.debug('Writing sorted FASTA {}'.format(out_path))
-        with open_as_text(self.fasta_path,
-                          'r') as fasta, open_as_text(out_path, 'w') as f:
-            for tup in to_sort:
-                logger.debug(f'Writing FASTA entry {tup[0]}')
-                start_position = tup[1]
-                end_position = tup[2]
-                fasta.seek(start_position)
-                if end_position is not None:
-                    while fasta.tell() < end_position:
-                        f.write(fasta.readline())
-                else:
-                    for line in fasta:
-                        f.write(line)
-
-        return out_path, chromosomes
-
-
-def generate_kite_fasta(feature_path, out_path, no_mismatches=False):
-    """Generate a FASTA file for feature barcoding with the KITE workflow.
-
-    This FASTA contains all sequences that are 1 hamming distance from the
-    provided barcodes. The file of barcodes must be a 2-column TSV containing
-    the barcode sequences in the first column and their corresponding feature
-    name in the second column. If hamming distance 1 variants collide for any
-    pair of barcodes, the hamming distance 1 variants for those barcodes are
-    not generated.
-
-    :param feature_path: path to TSV containing barcodes and feature names
-    :type feature_path: str
-    :param out_path: path to FASTA to generate
-    :type out_path: str
-    :param no_mismatches: whether to generate hamming distance 1 variants,
-                          defaults to `False`
-    :type no_mismatches: bool, optional
-
-    :raises Exception: if there are barcodes of different lengths
-    :raises Exception: if there are duplicate barcodes
-
-    :return: (path to generated FASTA, set of barcode lengths)
-    :rtype: tuple
-    """
-
-    def generate_mismatches(name, sequence):
-        """Helper function to generate 1 hamming distance mismatches.
-
-        :param name: name of the sequence
-        :type name: str
-        :param sequence: the sequence
-        :type sequence: str
-
-        :return: a generator that yields a tuple of (unique name, mismatched sequence)
-        :rtype: generator
-        """
-        sequence = sequence.upper()
-        for i in range(len(sequence)):
-            base = sequence[i]
-            before = sequence[:i]
-            after = sequence[i + 1:]
-
-            for j, different in enumerate([b for b in ['A', 'C', 'G', 'T']
-                                           if b != base]):
-                yield f'{name}-{i}.{j+1}', f'{before}{different}{after}'
-
-    df_features = pd.read_csv(
-        feature_path, sep='\t', header=None, names=['sequence', 'name']
-    )
-
-    lengths = set()
-    features = {}
-    variants = {}
-    # Generate all feature barcode variations before saving to check for collisions.
-    for i, row in df_features.iterrows():
-        # Check that the first column contains the sequence
-        # and the second column the feature name.
-        if FASTA.SEQUENCE_PARSER.search(row.sequence.upper()):
-            raise Exception((
-                'Encountered non-ATCG basepairs in barcode sequence {row.sequence}. '
-                'Does the first column contain the sequences and the second column the feature names?'
-            ))
-
-        lengths.add(len(row.sequence))
-        features[row['name']] = row.sequence
-        variants[row['name']] = {
-            name: seq
-            for name, seq in generate_mismatches(row['name'], row.sequence)
-            if not no_mismatches
-        }
-
-    # Check duplicate barcodes.
-    duplicates = set([
-        bc for bc in features.values() if list(features.values()).count(bc) > 1
-    ])
-    if len(duplicates) > 0:
-        raise Exception(
-            'Duplicate feature barcodes: {}'.format(' '.join(duplicates))
-        )
-    if len(lengths) > 1:
-        logger.warning(
-            'Detected barcodes of different lengths: {}'.format(
-                ','.join(str(l) for l in lengths)  # noqa
-            )
-        )
-    for f1, f2 in itertools.combinations(variants.keys(), 2):
-        v1 = variants[f1]
-        v2 = variants[f2]
-        if len(set.intersection(set(v1.values()), set(v2.values()))) > 0:
-            logger.warning((
-                f'Detected features with barcodes that are not >2 hamming distance apart: {f1}, {f2}. '
-                f'Hamming distance 1 variants for these barcodes will not be generated.'
-            ))
-            variants[f1] = {}
-            variants[f2] = {}
-
-    # Write FASTA
-    with open_as_text(out_path, 'w') as f:
-        for feature, barcode in features.items():
-            attributes = [('feature_id', feature)]
-            f.write(f'{FASTA.make_header(feature, attributes)}\n')
-            f.write(f'{barcode}\n')
-
-            for name, variant in variants[feature].items():
-                f.write(f'{FASTA.make_header(name, attributes)}\n')
-                f.write(f'{variant}\n')
-
-    return out_path, min(lengths)
-
-
-def generate_cdna_fasta(fasta_path, gtf_path, out_path, chromosomes=None):
-    """Generate a cDNA FASTA using the genome and GTF.
-
-    This function assumes the order in which the chromosomes appear in the
-    genome FASTA is identical to the order in which they appear in the GTF.
-    Additionally, the GTF must be sorted by start position.
-
-    :param fasta_path: path to genomic FASTA file
-    :type fasta_path: str
-    :param gtf_path: path to GTF file
-    :type gtf_path: str
-    :param out_path: path to cDNA FASTA to generate
-    :type out_path: str
-    :param chromosomes: set of chromosomes to generate sequences for. If not
-                        provided, sequences for all chromosomes are generated
-                        by default, defaults to `None`
-    :type chromosomes: set, optional
-
-    :return: path to generated cDNA FASTA
-    :rtype: str
-    """
-    fasta = FASTA(fasta_path)
-    gtf = GTF(gtf_path)
-    gtf_entries = gtf.entries()
-
-    count = 0
-    with open_as_text(out_path, 'w') as f:
-        previous_gtf_entry = None
-        for info, sequence in fasta.entries():
-            sequence_id = info['sequence_id']
-            if chromosomes and sequence_id not in chromosomes:
-                continue
-
-            logger.debug(
-                'Generating cDNA from chromosome {}'.format(sequence_id)
-            )
-
-            transcript_sequences = {}
-            transcript_infos = {}
-            while True:
-                try:
-                    gtf_entry = previous_gtf_entry or next(gtf_entries)
-                except StopIteration:
-                    break
-                previous_gtf_entry = None
-                chromosome = gtf_entry['seqname']
-                if chromosomes and chromosome not in chromosomes:
-                    continue
-
-                if sequence_id != chromosome:
-                    previous_gtf_entry = gtf_entry
-                    break
-
-                start = gtf_entry['start']
-                end = gtf_entry['end']
-                strand = gtf_entry['strand']
-                if gtf_entry['feature'] == 'exon':
-                    transcript_id = gtf_entry['group']['transcript_id']
-                    transcript_version = gtf_entry['group'].get(
-                        'transcript_version', None
-                    )
-
-                    transcript = '{}.{}'.format(
-                        transcript_id, transcript_version
-                    ) if transcript_version else transcript_id
-                    if transcript not in transcript_sequences:
-                        transcript_sequences[transcript] = ''
-                    transcript_sequences[transcript] += sequence[start - 1:end]
-                elif gtf_entry['feature'] == 'transcript':
-                    transcript_id = gtf_entry['group']['transcript_id']
-                    transcript_version = gtf_entry['group'].get(
-                        'transcript_version', None
-                    )
-                    transcript = '{}.{}'.format(
-                        transcript_id, transcript_version
-                    ) if transcript_version else transcript_id
-
-                    gene_id = gtf_entry['group']['gene_id']
-                    gene_version = gtf_entry['group'].get('gene_version', None)
-                    gene = '{}.{}'.format(
-                        gene_id, gene_version
-                    ) if gene_version else gene_id
-                    gene_name = gtf_entry['group'].get('gene_name', '')
-                    transcript_name = gtf_entry['group'].get(
-                        'transcript_name', ''
-                    )
-
-                    if transcript not in transcript_infos:
-                        attributes = [
-                            ('gene_id', gene),
-                            ('gene_name', gene_name),
-                            ('transcript_name', transcript_name),
-                            ('chr', chromosome),
-                            ('start', start),
-                            ('end', end),
-                            ('strand', strand),
-                        ]
-                        transcript_infos[transcript] = attributes
-
-            logger.debug(
-                f'Writing {len(transcript_sequences)} cDNA transcripts for chromosome {chromosome}'
-            )
-            # Validation.
-            info_unique = set(transcript_infos.keys()
-                              ) - set(transcript_sequences.keys())
-            sequences_unique = set(transcript_sequences.keys()
-                                   ) - set(transcript_infos.keys())
-            if info_unique:
-                raise Exception(
-                    'The following transcripts have "transcript" feature(s) but no '
-                    'corresponding "exon" feature: {}'.format(
-                        ', '.join(info_unique)
-                    )
-                )
-            if sequences_unique:
-                raise Exception(
-                    'The following transcripts have a "exon" feature but no '
-                    'corresponding "transcript" feature(s): {}'.format(
-                        ', '.join(sequences_unique)
-                    )
-                )
-
-            for transcript in sorted(transcript_sequences.keys()):
-                exon = transcript_sequences[transcript]
-                attributes = transcript_infos[transcript]
-                f.write(
-                    '{}\n'.format(FASTA.make_header(transcript, attributes))
-                )
-                f.write(
-                    '{}\n'.format(
-                        exon if dict(attributes)['strand'] ==
-                        '+' else FASTA.reverse_complement(exon)
-                    )
-                )
-                count += 1
-    logger.info(f'Wrote {count} cDNA transcripts')
-
-    return out_path
-
-
-def generate_intron_fasta(
-    fasta_path, gtf_path, out_path, chromosomes=None, flank=30
-):
-    """Generate an intron FASTA using the genome and GTF.
-
-    This function assumes the order in which the chromosomes appear in the
-    genome FASTA is identical to the order in which they appear in the GTF.
-    Additionally, the GTF must be sorted by start position.
-    The intron for a specific transcript is the collection of the following:
-    1. transcript - exons
-    2. 5' UTR
-    3. 3' UTR
-    Additionally, append 30-bp (k - 1 where k = 31) flanks to each intron,
-    combining sections that overlap into a single FASTA entry.
-
-    :param fasta_path: path to genomic FASTA file
-    :type fasta_path: str
-    :param gtf_path: path to GTF file
-    :type gtf_path: str
-    :param out_path: path to intron FASTA to generate
-    :type out_path: str
-    :param chromosomes: set of chromosomes to generate sequences for. If not
-                        provided, sequences for all chromosomes are generated
-                        by default, defaults to `None`
-    :type chromosomes: set, optional
-    :param flank: the size of intron flanks, in bases, defaults to `30`
-    :type flank: int, optional
-
-    :return: path to generated intron FASTA
-    :rtype: str
-    """
-    fasta = FASTA(fasta_path)
-    gtf = GTF(gtf_path)
-    gtf_entries = gtf.entries()
-
-    count = 0
-    with open_as_text(out_path, 'w') as f:
-        previous_gtf_entry = None
-        for info, sequence in fasta.entries():
-            sequence_id = info['sequence_id']
-            if chromosomes and sequence_id not in chromosomes:
-                continue
-            logger.debug(
-                'Generating introns from chromosome {}'.format(sequence_id)
-            )
-
-            transcript_exons = {}
-            transcript_infos = {}
-            while True:
-                try:
-                    gtf_entry = previous_gtf_entry or next(gtf_entries)
-                except StopIteration:
-                    break
-                previous_gtf_entry = None
-                chromosome = gtf_entry['seqname']
-                if chromosomes and chromosome not in chromosomes:
-                    continue
-
-                if sequence_id != chromosome:
-                    previous_gtf_entry = gtf_entry
-                    break
-
-                start = gtf_entry['start']
-                end = gtf_entry['end']
-                strand = gtf_entry['strand']
-                if gtf_entry['feature'] == 'exon':
-                    transcript_id = gtf_entry['group']['transcript_id']
-                    transcript_version = gtf_entry['group'].get(
-                        'transcript_version', None
-                    )
-                    transcript = '{}.{}'.format(
-                        transcript_id, transcript_version
-                    ) if transcript_version else transcript_id
-
-                    transcript_exons.setdefault(transcript,
-                                                []).append((start, end))
-                elif gtf_entry['feature'] == 'transcript':
-                    transcript_id = gtf_entry['group']['transcript_id']
-                    transcript_version = gtf_entry['group'].get(
-                        'transcript_version', None
-                    )
-                    transcript = '{}.{}'.format(
-                        transcript_id, transcript_version
-                    ) if transcript_version else transcript_id
-
-                    gene_id = gtf_entry['group']['gene_id']
-                    gene_version = gtf_entry['group'].get('gene_version', None)
-                    gene = '{}.{}'.format(
-                        gene_id, gene_version
-                    ) if gene_version else gene_id
-                    gene_name = gtf_entry['group'].get('gene_name', '')
-                    transcript_name = gtf_entry['group'].get(
-                        'transcript_name', ''
-                    )
-
-                    if transcript not in transcript_infos:
-                        attributes = [
-                            ('gene_id', gene),
-                            ('gene_name', gene_name),
-                            ('transcript_name', transcript_name),
-                            ('chr', chromosome),
-                            ('start', start),
-                            ('end', end),
-                            ('strand', strand),
-                        ]
-                        transcript_infos[transcript] = attributes
-
-            logger.debug((
-                f'Found {len(transcript_exons)} transcripts with exons for chromosome {chromosome}.'
-                'Generating introns from these transcripts'
-            ))
-            # Validation.
-            info_unique = set(transcript_infos.keys()
-                              ) - set(transcript_exons.keys())
-            sequences_unique = set(transcript_exons.keys()
-                                   ) - set(transcript_infos.keys())
-            if info_unique:
-                raise Exception(
-                    'The following transcripts have "exon" feature(s) but no '
-                    'corresponding "transcript" feature: {}'.format(
-                        ', '.join(info_unique)
-                    )
-                )
-            if sequences_unique:
-                raise Exception(
-                    'The following transcripts have a "transcript" feature but no '
-                    'corresponding "exon" feature(s): {}'.format(
-                        ', '.join(sequences_unique)
-                    )
-                )
-            for transcript in sorted(transcript_exons.keys()):
-                attributes = transcript_infos[transcript]
-
-                # Find transcript interval - all exon intervals
-                attributes_dict = dict(attributes)
-                transcript_interval = (
-                    attributes_dict['start'], attributes_dict['end']
-                )
-                introns = []
-                exons = list(sorted(transcript_exons[transcript]))
-                if exons:
-                    if exons[0][0] > transcript_interval[0]:
-                        introns.append(
-                            (transcript_interval[0], exons[0][0] - 1)
-                        )
-
-                    for i in range(len(exons) - 1):
-                        start = exons[i][1]
-                        end = exons[i + 1][0]
-                        introns.append((start + 1, end - 1))
-
-                    if exons[-1][1] < transcript_interval[1]:
-                        introns.append(
-                            (exons[-1][1] + 1, transcript_interval[1])
-                        )
-                else:
-                    introns.append(transcript_interval)
-
-                index = 1
-                flank_start = None
-                flank_end = None
-                for start, end in introns:
-                    if flank_start is None:
-                        flank_start = max(start - flank, transcript_interval[0])
-                    if flank_end is None or start - flank <= flank_end:
-                        flank_end = min(end + flank, transcript_interval[1])
-                    else:
-                        intron = sequence[flank_start - 1:flank_end]
-                        f.write(
-                            '{}\n'.format(
-                                FASTA.make_header(
-                                    '{}-I.{}'.format(transcript, index),
-                                    attributes
-                                )
-                            )
-                        )
-                        f.write(
-                            '{}\n'.format(
-                                intron if dict(attributes)['strand'] ==
-                                '+' else FASTA.reverse_complement(intron)
-                            )
-                        )
-                        count += 1
-                        index += 1
-                        flank_start = max(start - flank, transcript_interval[0])
-                        flank_end = min(end + flank, transcript_interval[1])
-                if flank_start is not None and flank_end is not None:
-                    intron = sequence[flank_start - 1:flank_end]
-                    f.write(
-                        '{}\n'.format(
-                            FASTA.make_header(
-                                '{}-I.{}'.format(transcript, index), attributes
-                            )
-                        )
-                    )
-                    f.write(
-                        '{}\n'.format(
-                            intron if dict(attributes)['strand'] ==
-                            '+' else FASTA.reverse_complement(intron)
-                        )
-                    )
-                    count += 1
-                    index += 1
-    logger.info(f'Wrote {count} intron sequences')
-
-    return out_path
-
-
-def generate_spliced_fasta(fasta_path, gtf_path, out_path):
-    """Generate a spliced FASTA using the genome and GTF.
-
-    This function assumes the order in which the chromosomes appear in the
-    genome FASTA is identical to the order in which they appear in the GTF.
-    Additionally, the GTF must be sorted by start position.
-    The spliced FASTA contains entries of length 2 * (k - 1) for k = 31,
-    centered around exon-exon splice junctions (any overlapping regions are
-    collapsed).
-
-    :param fasta_path: path to genomic FASTA file
-    :type fasta_path: str
-    :param gtf_path: path to GTF file
-    :type gtf_path: str
-    :param out_path: path to spliced FASTA to generate
-    :type out_path: str
-
-    :return: path to generated spliced FASTA
-    :rtype: str
-    """
-    pass
-
-
-def generate_unspliced_fasta(fasta_path, gtf_path, out_path):
-    """Generate a unspliced FASTA using the genome and GTF.
-
-    This function assumes the order in which the chromosomes appear in the
-    genome FASTA is identical to the order in which they appear in the GTF.
-    Additionally, the GTF must be sorted by start position.
-    The spliced FASTA contains entries of length 2 * (k - 1) for k = 31,
-    centered around exon-intron splice junctions + full introns
-    (any overlapping regions are collapsed).
-
-    :param fasta_path: path to genomic FASTA file
-    :type fasta_path: str
-    :param gtf_path: path to GTF file
-    :type gtf_path: str
-    :param out_path: path to unspliced FASTA to generate
-    :type out_path: str
-
-    :return: path to generated unspliced FASTA
-    :rtype: str
-    """
-    pass
diff --git a/kb_python/gtf.py b/kb_python/gtf.py
deleted file mode 100755
index afc57d6..0000000
--- a/kb_python/gtf.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import logging
-import re
-
-from .utils import open_as_text
-
-logger = logging.getLogger(__name__)
-
-
-class GTF:
-    """Utility class to easily read and parse GTF files.
-
-    :param gtf_path: path to GTF file
-    :type gtf_path: str
-    """
-    PARSER = re.compile(
-        r'''
-        ^(?P<seqname>.+?)\s+    # chromosome
-        .*?\t                   # source
-        (?P<feature>.+?)\s+     # feature: transcript, exon, etc.
-        (?P<start>[0-9]+?)\s+   # start position (1-indexed)
-        (?P<end>[0-9]+?)\s+     # end position (1-indexed, inclusive)
-        .*?\s+                  # score
-        (?P<strand>\+|-|\.)\s+  # +, -, . indicating strand
-        .*?\s+                  # frame
-        (?P<group>.*)           # groups
-    ''', re.VERBOSE
-    )
-    GROUP_PARSER = re.compile(r'(?P<key>\S+?)\s*"(?P<value>.+?)";?')
-
-    def __init__(self, gtf_path):
-        self.gtf_path = gtf_path
-
-    @staticmethod
-    def parse_entry(line):
-        """Parse a single GTF entry.
-
-        :param line: a line in the GTF file
-        :type line: str
-
-        :return: parsed GTF information
-        :rtype: dict
-        """
-        match = GTF.PARSER.match(line)
-        if match:
-            groupdict = match.groupdict()
-            groupdict['start'] = int(groupdict['start'])
-            groupdict['end'] = int(groupdict['end'])
-            groupdict['group'] = dict(
-                GTF.GROUP_PARSER.findall(
-                    groupdict.get('group', '').replace(' ', '')
-                )
-            )
-            if not groupdict['group']:
-                logger.warning(
-                    f'Failed to parse GTF attributes of entry: {line}'
-                )
-
-            return groupdict
-        logger.warning(f'Failed to parse GTF entry: {line}')
-        return None
-
-    def entries(self):
-        """Generator that yields one GTF entry at a time.
-
-        :return: a generator that yields a dict of the GTF entry
-        :rtype: generator
-        """
-        with open_as_text(self.gtf_path, 'r') as f:
-            for line in f:
-                if line.startswith('#') or line.isspace():
-                    continue
-
-                yield GTF.parse_entry(line)
-
-    def sort(self, out_path):
-        """Sort the GTF file by chromosome, start position, line number.
-
-        :param out_path: path to generate the sorted GTF
-        :type out_path: str
-
-        :return: path to sorted GTF file, set of chromosomes in GTF file
-        :rtype: tuple
-        """
-        chromosomes = set()
-        to_sort = []
-        with open_as_text(self.gtf_path, 'r') as f:
-            position = 0
-            line = f.readline()
-            while line:
-                if not line.startswith('#') and not line.isspace():
-                    entry = GTF.parse_entry(line)
-                    if entry['feature'] in ('transcript', 'exon'):
-                        to_sort.append(
-                            (entry['seqname'], entry['start'], position)
-                        )
-                        chromosomes.add(entry['seqname'])
-                position = f.tell()
-                line = f.readline()
-        logger.debug('Sorting {} GTF entries'.format(len(to_sort)))
-        to_sort.sort()
-
-        logger.debug('Writing sorted GTF {}'.format(out_path))
-        with open_as_text(self.gtf_path, 'r') as gtf, open_as_text(out_path,
-                                                                   'w') as f:
-            for tup in to_sort:
-                position = tup[2]
-                gtf.seek(position)
-                f.write(gtf.readline())
-
-        return out_path, chromosomes
diff --git a/kb_python/info.txt b/kb_python/info.txt
index 2a71f76..9c8ed60 100755
--- a/kb_python/info.txt
+++ b/kb_python/info.txt
@@ -1,6 +1,4 @@
-kb is a python package for rapidly pre-processing single-cell RNA-seq data. It is a wrapper for the methods described in
-
-Melsted, Booeshaghi et al., Modular and efficient pre-processing of single-cell RNA-seq, bioRxiv, 2019
+kb is a python package for rapidly pre-processing single-cell RNA-seq data. It is a wrapper for the methods described in [2].
 
 The goal of the wrapper is to simplify downloading and running of the kallisto [1] and bustools [2] programs. It was inspired by Sten Linnarsson’s loompy fromfq command (http://linnarssonlab.org/loompy/kallisto/index.html)
 
diff --git a/kb_python/logging.py b/kb_python/logging.py
new file mode 100755
index 0000000..7acd61d
--- /dev/null
+++ b/kb_python/logging.py
@@ -0,0 +1,4 @@
+import ngs_tools as ngs
+
+logger = ngs.logging.Logger(__name__)
+ngs.logging.set_logger(logger)
diff --git a/kb_python/main.py b/kb_python/main.py
index 7340457..9a140f4 100755
--- a/kb_python/main.py
+++ b/kb_python/main.py
@@ -22,6 +22,7 @@
 )
 from .constants import INFO_FILENAME
 from .count import count, count_smartseq, count_velocity
+from .logging import logger
 from .ref import download_reference, ref, ref_kite, ref_lamanno
 from .utils import (
     get_bustools_version,
@@ -31,8 +32,6 @@
     remove_directory,
 )
 
-logger = logging.getLogger(__name__)
-
 
 def display_info():
     """Displays kb, kallisto and bustools version + citation information, along
@@ -61,20 +60,26 @@ def display_technologies():
     """Displays a list of supported technologies along with whether kb provides
     a whitelist for that technology and the FASTQ argument order for kb count.
     """
-    headers = [
-        'name', 'whitelist provided', 'barcode (file #, start, stop)',
-        'umi (file #, start, stop)', 'read file #'
-    ]
+    headers = ['name', 'whitelist', 'barcode', 'umi', 'cDNA']
     rows = [headers]
 
     print('List of supported single-cell technologies\n')
+    print('Positions syntax: `input file index, start position, end position`')
+    print('When start & end positions are None, refers to the entire file')
+    print(
+        'Custom technologies may be defined by providing a kallisto-supported '
+        'technology string\n(see https://pachterlab.github.io/kallisto/manual)\n'
+    )
     for t in TECHNOLOGIES:
+        chem = t.chemistry
         row = [
             t.name,
-            'yes' if t.whitelist_archive else '',
-            ' '.join(str(tup) for tup in t.barcode_positions),
-            ' '.join(str(tup) for tup in t.umi_positions),
-            str(t.reads_file),
+            'yes' if chem.has_whitelist else '',
+            ' '.join(str(_def) for _def in chem.cell_barcode_parser)
+            if chem.has_cell_barcode else '',
+            ' '.join(str(_def)
+                     for _def in chem.umi_parser) if chem.has_umi else '',
+            ' '.join(str(_def) for _def in chem.cdna_parser),
         ]
         rows.append(row)
 
@@ -179,7 +184,6 @@ def parse_count(parser, args, temp_dir='tmp'):
     :param args: Command-line arguments dictionary, as parsed by argparse
     :type args: dict
     """
-    logger = logging.getLogger(__name__)
     if args.report:
         logger.warning((
             'Using `--report` may cause `kb` to exceed maximum memory specified '
@@ -725,6 +729,7 @@ def setup_count_args(parser, parent):
     return parser_count
 
 
+@logger.namespaced('main')
 def main():
     """Command-line entrypoint.
     """
@@ -803,10 +808,7 @@ def main():
 
     args = parser.parse_args()
 
-    logging.basicConfig(
-        format='[%(asctime)s] %(levelname)7s %(message)s',
-        level=logging.DEBUG if args.verbose else logging.INFO,
-    )
+    logger.setLevel(logging.DEBUG if args.verbose else logging.INFO)
 
     # Validation
     if 'no_validate' in args and args.no_validate:
@@ -827,17 +829,6 @@ def main():
             logging.disable(level=logging.CRITICAL)
             set_dry()
 
-    # Turn off logging from other packages.
-    try:
-        anndata_logger = logging.getLogger('anndata')
-        h5py_logger = logging.getLogger('h5py')
-        anndata_logger.setLevel(logging.CRITICAL + 10)
-        anndata_logger.propagate = False
-        h5py_logger.setLevel(logging.CRITICAL + 10)
-        h5py_logger.propagate = False
-    except Exception:
-        pass
-
     if any(arg in sys.argv for arg in {'--lamanno', '--nucleus'}):
         logger.warning((
             'The `--lamanno` and `-`-n`ucleus` flags are deprecated. '
@@ -854,10 +845,20 @@ def main():
     logger.debug(f'kallisto binary located at {get_kallisto_binary_path()}')
     logger.debug(f'bustools binary located at {get_bustools_binary_path()}')
 
-    temp_dir = args.tmp or os.path.join(
-        args.o, TEMP_DIR
-    ) if 'o' in args else TEMP_DIR
-    logger.debug(f'Creating {temp_dir} directory')
+    temp_dir = args.tmp or (
+        os.path.join(args.o, TEMP_DIR) if 'o' in args else TEMP_DIR
+    )
+    # Check if temp_dir exists and exit if it does.
+    # This is so that kb doesn't accidently use an existing directory and
+    # delete it afterwards.
+    if os.path.exists(temp_dir):
+        parser.error(
+            f'Temporary directory `{temp_dir}` exists! Is another instance running? '
+            'Either remove the existing directory or use `--tmp` to specify a '
+            'different temporary directory.'
+        )
+
+    logger.debug(f'Creating `{temp_dir}` directory')
     make_directory(temp_dir)
     try:
         logger.debug(args)
@@ -871,5 +872,5 @@ def main():
     finally:
         # Always clean temp dir
         if not args.keep_tmp:
-            logger.debug(f'Removing {temp_dir} directory')
+            logger.debug(f'Removing `{temp_dir}` directory')
             remove_directory(temp_dir)
diff --git a/kb_python/maps/10xv3_feature_barcode_map.txt.gz b/kb_python/maps/10xv3_feature_barcode_map.txt.gz
deleted file mode 100644
index 593bc55..0000000
Binary files a/kb_python/maps/10xv3_feature_barcode_map.txt.gz and /dev/null differ
diff --git a/kb_python/ref.py b/kb_python/ref.py
old mode 100644
new mode 100755
index 989822a..5f3a4da
--- a/kb_python/ref.py
+++ b/kb_python/ref.py
@@ -1,16 +1,13 @@
 import glob
-import logging
+import itertools
 import os
 import tarfile
 
+import ngs_tools as ngs
+import pandas as pd
+
 from .config import get_kallisto_binary_path
-from .fasta import (
-    FASTA,
-    generate_cdna_fasta,
-    generate_intron_fasta,
-    generate_kite_fasta,
-)
-from .gtf import GTF
+from .logging import logger
 from .utils import (
     concatenate_files,
     decompress_gzip,
@@ -20,66 +17,117 @@
     run_executable,
 )
 
-logger = logging.getLogger(__name__)
-
 
-def sort_gtf(gtf_path, out_path):
-    """Sorts a GTF file based on its chromosome, start position, line number.
+def generate_kite_fasta(feature_path, out_path, no_mismatches=False):
+    """Generate a FASTA file for feature barcoding with the KITE workflow.
 
-    :param gtf_path: path to GTF file
-    :type gtf_path: str
+    This FASTA contains all sequences that are 1 hamming distance from the
+    provided barcodes. The file of barcodes must be a 2-column TSV containing
+    the barcode sequences in the first column and their corresponding feature
+    name in the second column. If hamming distance 1 variants collide for any
+    pair of barcodes, the hamming distance 1 variants for those barcodes are
+    not generated.
 
-    :return: path to sorted GTF file, set of chromosomes in GTF file
-    :rtype: tuple
-    """
-    logger.info(f'Sorting {gtf_path} to {out_path}')
-    gtf = GTF(gtf_path)
-    return gtf.sort(out_path)
-
-
-def sort_fasta(fasta_path, out_path):
-    """Sorts a FASTA file based on its header.
+    :param feature_path: path to TSV containing barcodes and feature names
+    :type feature_path: str
+    :param out_path: path to FASTA to generate
+    :type out_path: str
+    :param no_mismatches: whether to generate hamming distance 1 variants,
+                          defaults to `False`
+    :type no_mismatches: bool, optional
 
-    :param fasta_path: path to FASTA file
-    :type fasta_path: str
+    :raises Exception: if there are barcodes of different lengths
+    :raises Exception: if there are duplicate barcodes
 
-    :return: path to sorted FASTA file, set of chromosomes in FASTA file
+    :return: (path to generated FASTA, set of barcode lengths)
     :rtype: tuple
     """
-    logger.info(f'Sorting {fasta_path} to {out_path}')
-    fasta = FASTA(fasta_path)
-    return fasta.sort(out_path)
-
 
-def check_chromosomes(fasta_chromosomes, gtf_chromosomes):
-    """Compares the two chromosome sets and outputs warnings if there are
-    unique chromosomes in either set.
-
-    :param fasta_chromosomes: set of chromosomes found in FASTA
-    :type fasta_chromosomes: set
-    :param gtf_chromosomes: set of chromosomes found in GTF
-    :type gtf_chromosomes: set
+    def generate_mismatches(name, sequence):
+        """Helper function to generate 1 hamming distance mismatches.
+
+        :param name: name of the sequence
+        :type name: str
+        :param sequence: the sequence
+        :type sequence: str
+
+        :return: a generator that yields a tuple of (unique name, mismatched sequence)
+        :rtype: generator
+        """
+        sequence = sequence.upper()
+        for i in range(len(sequence)):
+            base = sequence[i]
+            before = sequence[:i]
+            after = sequence[i + 1:]
+
+            for j, different in enumerate([b for b in ['A', 'C', 'G', 'T']
+                                           if b != base]):
+                yield f'{name}-{i}.{j+1}', f'{before}{different}{after}'
+
+    df_features = pd.read_csv(
+        feature_path, sep='\t', header=None, names=['sequence', 'name']
+    )
 
-    :return: intersection of the two sets
-    :rtype: set
-    """
-    fasta_unique = fasta_chromosomes - gtf_chromosomes
-    gtf_unique = gtf_chromosomes - fasta_chromosomes
-    if fasta_unique:
-        logger.warning((
-            'The following chromosomes were found in the FASTA but does not have '
-            'any "transcript" features in the GTF: {}. '
-            'No sequences will be generated for these chromosomes.'
-        ).format(', '.join(fasta_unique)))
-    if gtf_unique:
-        logger.warning((
-            'The following chromosomes were found to have "transcript" features '
-            'in the GTF but does not exist in the FASTA. '
-            'No sequences will be generated for these chromosomes.'
-        ))
-    chromosomes = set.intersection(fasta_chromosomes, gtf_chromosomes)
-
-    return chromosomes
+    lengths = set()
+    features = {}
+    variants = {}
+    # Generate all feature barcode variations before saving to check for collisions.
+    for i, row in df_features.iterrows():
+        # Check that the first column contains the sequence
+        # and the second column the feature name.
+        if ngs.sequence.SEQUENCE_PARSER.search(row.sequence.upper()):
+            raise Exception((
+                'Encountered non-ATCG basepairs in barcode sequence {row.sequence}. '
+                'Does the first column contain the sequences and the second column the feature names?'
+            ))
+
+        lengths.add(len(row.sequence))
+        features[row['name']] = row.sequence
+        variants[row['name']] = {
+            name: seq
+            for name, seq in generate_mismatches(row['name'], row.sequence)
+            if not no_mismatches
+        }
+
+    # Check duplicate barcodes.
+    duplicates = set([
+        bc for bc in features.values() if list(features.values()).count(bc) > 1
+    ])
+    if len(duplicates) > 0:
+        raise Exception(
+            'Duplicate feature barcodes: {}'.format(' '.join(duplicates))
+        )
+    if len(lengths) > 1:
+        logger.warning(
+            'Detected barcodes of different lengths: {}'.format(
+                ','.join(str(l) for l in lengths)  # noqa
+            )
+        )
+    for f1, f2 in itertools.combinations(variants.keys(), 2):
+        v1 = variants[f1]
+        v2 = variants[f2]
+        if len(set.intersection(set(v1.values()), set(v2.values()))) > 0:
+            logger.warning((
+                f'Detected features with barcodes that are not >2 hamming distance apart: {f1}, {f2}. '
+                f'Hamming distance 1 variants for these barcodes will not be generated.'
+            ))
+            variants[f1] = {}
+            variants[f2] = {}
+
+    # Write FASTA
+    with ngs.fasta.Fasta(out_path, 'w') as f:
+        for feature, barcode in features.items():
+            attributes = {'feature_id': feature}
+            header = ngs.fasta.FastaEntry.make_header(feature, attributes)
+            entry = ngs.fasta.FastaEntry(header, barcode)
+            f.write(entry)
+
+            for name, variant in variants[feature].items():
+                header = ngs.fasta.FastaEntry.make_header(name, attributes)
+                entry = ngs.fasta.FastaEntry(header, variant)
+                f.write(entry)
+
+    return out_path, min(lengths)
 
 
 def create_t2g_from_fasta(fasta_path, t2g_path):
@@ -93,81 +141,34 @@ def create_t2g_from_fasta(fasta_path, t2g_path):
     :return: dictionary containing path to generated t2g mapping
     :rtype: dict
     """
-    logger.info('Creating transcript-to-gene mapping at {}'.format(t2g_path))
-    with open_as_text(t2g_path, 'w') as f:
-        fasta = FASTA(fasta_path)
-        for info, _ in fasta.entries():
-            if 'feature_id' in info['group']:
-                row = [
-                    info['sequence_id'],
-                    info['group']['feature_id'],
-                    info['group']['feature_id'],
-                ]
+    logger.info(f'Creating transcript-to-gene mapping at {t2g_path}')
+    with ngs.fasta.Fasta(fasta_path, 'r') as f_in, open_as_text(t2g_path,
+                                                                'w') as f_out:
+        for entry in f_in:
+            attributes = entry.attributes
+
+            if 'feature_id' in attributes:
+                feature_id = attributes['feature_id']
+                row = [entry.name, feature_id, feature_id]
             else:
+                gene_id = attributes['gene_id']
+                gene_name = attributes.get('gene_name', '')
+                transcript_name = attributes.get('transcript_name', '')
+                chromosome = attributes['chr']
+                start = attributes['start']
+                end = attributes['end']
+                strand = attributes['strand']
                 row = [
-                    info['sequence_id'], info['group']['gene_id'],
-                    info['group'].get('gene_name', ''),
-                    info['group'].get('transcript_name',
-                                      ''), info['group'].get('chr', ''),
-                    info['group'].get('start',
-                                      ''), info['group'].get('end', ''),
-                    info['group'].get('strand', '')
-                ]
-            f.write('\t'.join(str(item) for item in row) + '\n')
-    return {'t2g': t2g_path}
-
-
-def create_t2g_from_gtf(gtf_path, t2g_path, intron=False):
-    """Creates a transcript-to-gene mapping from a GTF file.
-
-    GTF entries that have `transcript` as its feature are parsed for
-    the `transcript_id`, `gene_id` and `gene_name`.
-
-    :param gtf_path: path to GTF file
-    :type gtf_path: str
-    :param t2g_path: path to output transcript-to-gene mapping
-    :type t2g_path: str
-    :param intron: whether or not to include intron transcript ids (with the
-                   `-I` prefix), defaults to `False`
-    :type intron: bool, optional
-
-    :return: dictionary containing path to generated t2g mapping
-    :rtype: dict
-    """
-    logger.info('Creating transcript-to-gene mapping at {}'.format(t2g_path))
-    gtf = GTF(gtf_path)
-    with open_as_text(t2g_path, 'w') as f:
-        for entry in gtf.entries():
-            if entry['feature'] == 'transcript':
-                transcript_id = entry['group']['transcript_id']
-                transcript_version = entry['group'].get(
-                    'transcript_version', None
-                )
-                transcript = '{}.{}'.format(
-                    transcript_id, transcript_version
-                ) if transcript_version else transcript_id
-                gene_id = entry['group']['gene_id']
-                gene_version = entry['group'].get('gene_version', None)
-                gene = '{}.{}'.format(
-                    gene_id, gene_version
-                ) if gene_version else gene_id
-
-                row = [
-                    transcript,
-                    gene,
-                    entry['group'].get('gene_name', ''),
-                    entry['group'].get('transcript_name', ''),
-                    entry.get('seqname', ''),
-                    entry.get('start', ''),
-                    entry.get('end', ''),
-                    entry.get('strand', ''),
+                    entry.name,
+                    gene_id,
+                    gene_name,
+                    transcript_name,
+                    chromosome,
+                    start,
+                    end,
+                    strand,
                 ]
-                f.write('\t'.join(str(item) for item in row) + '\n')
-
-                if intron:
-                    intron_row = row.copy()
-                    intron_row[0] = intron_row[0] + '-I'
-                    f.write('\t'.join(str(item) for item in intron_row) + '\n')
+            f_out.write('\t'.join(str(item) for item in row) + '\n')
 
     return {'t2g': t2g_path}
 
@@ -183,11 +184,10 @@ def create_t2c(fasta_path, t2c_path):
     :return: dictionary containing path to generated t2c list
     :rtype: dict
     """
-    fasta = FASTA(fasta_path)
-    with open_as_text(t2c_path, 'w') as f:
-        for info, _ in fasta.entries():
-            sequence_id = info['sequence_id']
-            f.write('{}\n'.format(sequence_id))
+    with ngs.fasta.Fasta(fasta_path, 'r') as f_in, open_as_text(t2c_path,
+                                                                'w') as f_out:
+        for entry in f_in:
+            f_out.write(f'{entry.name}\n')
     return {'t2c': t2c_path}
 
 
@@ -236,27 +236,27 @@ def split_and_index(fasta_path, index_prefix, n=2, k=31, temp_dir='tmp'):
     logger.info(f'Splitting {fasta_path} into {n} parts')
     size = int(os.path.getsize(fasta_path) / n) + 4
 
-    fasta = FASTA(fasta_path)
-    fasta_entries = fasta.entries(parse=False)
-    finished = False
-    for i in range(n):
-        fasta_part_path = get_temporary_filename(temp_dir)
-        index_part_path = f'{index_prefix}.{i}'
-        fastas.append(fasta_part_path)
-        indices.append(index_part_path)
-
-        with open(fasta_part_path, 'w') as f:
-            logger.debug(f'Writing {fasta_part_path}')
-            while f.tell() < size:
-                try:
-                    info, seq = next(fasta_entries)
-                except StopIteration:
-                    finished = True
-                    break
-                f.write(f'{info}\n{seq}\n')
-
-        if finished:
-            break
+    with ngs.fasta.Fasta(fasta_path, 'r') as f_in:
+        fasta_iter = iter(f_in)
+        finished = False
+        for i in range(n):
+            fasta_part_path = get_temporary_filename(temp_dir)
+            index_part_path = f'{index_prefix}.{i}'
+            fastas.append(fasta_part_path)
+            indices.append(index_part_path)
+
+            with ngs.fasta.Fasta(fasta_part_path, 'w') as f_out:
+                logger.debug(f'Writing {fasta_part_path}')
+                while f_out.tell() < size:
+                    try:
+                        entry = next(fasta_iter)
+                    except StopIteration:
+                        finished = True
+                        break
+                    f_out.write(entry)
+
+            if finished:
+                break
 
     built = []
     for fasta_part_path, index_part_path in zip(fastas, indices):
@@ -266,6 +266,7 @@ def split_and_index(fasta_path, index_prefix, n=2, k=31, temp_dir='tmp'):
     return {'indices': built}
 
 
+@logger.namespaced('download')
 def download_reference(reference, files, temp_dir='tmp', overwrite=False):
     """Downloads a provided reference file from a static url.
 
@@ -288,7 +289,7 @@ def download_reference(reference, files, temp_dir='tmp', overwrite=False):
     :rtype: dict
     """
     results = {}
-    if not any(os.path.exists(file) for file in files) or overwrite:
+    if not ngs.utils.all_exists(*list(files.values())) or overwrite:
         # Make sure all the required file paths are there.
         diff = set(reference.files.keys()) - set(files.keys())
         if diff:
@@ -299,14 +300,14 @@ def download_reference(reference, files, temp_dir='tmp', overwrite=False):
 
         url = reference.url
         path = os.path.join(temp_dir, os.path.basename(url))
-        logging.info(
+        logger.info(
             'Downloading files for {} from {} to {}'.format(
                 reference.name, url, path
             )
         )
         local_path = download_file(url, path)
 
-        logging.info('Extracting files from {}'.format(local_path))
+        logger.info('Extracting files from {}'.format(local_path))
         with tarfile.open(local_path, 'r:gz') as f:
             f.extractall(temp_dir)
 
@@ -344,6 +345,7 @@ def decompress_file(path, temp_dir='tmp'):
         return path
 
 
+@logger.namespaced('ref')
 def ref(
     fasta_paths,
     gtf_paths,
@@ -383,60 +385,47 @@ def ref(
         gtf_paths = [gtf_paths]
 
     results = {}
-    t2gs = []
     cdnas = []
-    for fasta_path, gtf_path in zip(fasta_paths, gtf_paths):
-        logger.info(f'Preparing {fasta_path}, {gtf_path}')
-        gtf_path = decompress_file(gtf_path, temp_dir=temp_dir)
-        t2g_result = create_t2g_from_gtf(
-            gtf_path, get_temporary_filename(temp_dir)
-        )
-        t2gs.append(t2g_result['t2g'])
-
-        if not glob.glob(f'{index_path}*') or overwrite:
-            fasta_path = decompress_file(fasta_path, temp_dir=temp_dir)
-            sorted_fasta_path, fasta_chromosomes = sort_fasta(
-                fasta_path, get_temporary_filename(temp_dir)
-            )
-            sorted_gtf_path, gtf_chromosomes = sort_gtf(
-                gtf_path, get_temporary_filename(temp_dir)
+    if (not ngs.utils.all_exists(cdna_path, t2g_path)) or overwrite:
+        for fasta_path, gtf_path in zip(fasta_paths, gtf_paths):
+            logger.info(f'Preparing {fasta_path}, {gtf_path}')
+            # Parse GTF for gene and transcripts
+            gene_infos, transcript_infos = ngs.gtf.genes_and_transcripts_from_gtf(
+                gtf_path, use_version=True
             )
 
+            # Split
             cdna_temp_path = get_temporary_filename(temp_dir)
             logger.info(
                 f'Splitting genome {fasta_path} into cDNA at {cdna_temp_path}'
             )
-            chromosomes = check_chromosomes(fasta_chromosomes, gtf_chromosomes)
-            cdna_fasta_path = generate_cdna_fasta(
-                sorted_fasta_path,
-                sorted_gtf_path,
-                cdna_temp_path,
-                chromosomes=chromosomes
+            cdna_temp_path = ngs.fasta.split_genomic_fasta_to_cdna(
+                fasta_path, cdna_temp_path, gene_infos, transcript_infos
             )
-            cdnas.append(cdna_fasta_path)
-
-    # Concatenate t2gs
-    logger.info(
-        f'Concatenating {len(t2gs)} transcript-to-gene mappings to {t2g_path}'
-    )
-    t2g_path = concatenate_files(*t2gs, out_path=t2g_path, temp_dir=temp_dir)
-    results.update({'t2g': t2g_path})
+            cdnas.append(cdna_temp_path)
 
-    # Concatenate cdnas and index
-    if not glob.glob(f'{index_path}*') or overwrite:
         logger.info(f'Concatenating {len(cdnas)} cDNAs to {cdna_path}')
-        cdna_fasta_path = concatenate_files(
+        cdna_path = concatenate_files(
             *cdnas, out_path=cdna_path, temp_dir=temp_dir
         )
+        results.update({'cdna_fasta': cdna_path})
+    else:
+        logger.info(
+            f'Skipping cDNA FASTA generation because {cdna_path} already exists. Use --overwrite flag to overwrite'
+        )
+
+    if not glob.glob(f'{index_path}*') or overwrite:
+        t2g_result = create_t2g_from_fasta(cdna_path, t2g_path)
+        results.update(t2g_result)
 
         if k and k != 31:
             logger.warning(
                 f'Using provided k-mer length {k} instead of optimal length 31'
             )
         index_result = split_and_index(
-            cdna_fasta_path, index_path, n=n, k=k or 31, temp_dir=temp_dir
+            cdna_path, index_path, n=n, k=k or 31, temp_dir=temp_dir
         ) if n > 1 else kallisto_index(
-            cdna_fasta_path, index_path, k=k or 31
+            cdna_path, index_path, k=k or 31
         )
         results.update(index_result)
     else:
@@ -448,6 +437,7 @@ def ref(
     return results
 
 
+@logger.namespaced('ref_kite')
 def ref_kite(
     feature_path,
     fasta_path,
@@ -514,6 +504,7 @@ def ref_kite(
     return results
 
 
+@logger.namespaced('ref_lamanno')
 def ref_lamanno(
     fasta_paths,
     gtf_paths,
@@ -571,53 +562,53 @@ def ref_lamanno(
     introns = []
     cdna_t2cs = []
     intron_t2cs = []
-    if not glob.glob(f'{index_path}*') or overwrite:
+    if (not ngs.utils.all_exists(cdna_path, intron_path, t2g_path,
+                                 cdna_t2c_path, intron_t2c_path)) or overwrite:
         for fasta_path, gtf_path in zip(fasta_paths, gtf_paths):
             logger.info(f'Preparing {fasta_path}, {gtf_path}')
-
-            fasta_path = decompress_file(fasta_path, temp_dir=temp_dir)
-            sorted_fasta_path, fasta_chromosomes = sort_fasta(
-                fasta_path, get_temporary_filename(temp_dir)
-            )
-            gtf_path = decompress_file(gtf_path, temp_dir=temp_dir)
-            sorted_gtf_path, gtf_chromosomes = sort_gtf(
-                gtf_path, get_temporary_filename(temp_dir)
+            # Parse GTF for gene and transcripts
+            gene_infos, transcript_infos = ngs.gtf.genes_and_transcripts_from_gtf(
+                gtf_path, use_version=True
             )
+
+            # Split cDNA
             cdna_temp_path = get_temporary_filename(temp_dir)
             logger.info(
                 f'Splitting genome {fasta_path} into cDNA at {cdna_temp_path}'
             )
-            chromosomes = check_chromosomes(fasta_chromosomes, gtf_chromosomes)
-            cdna_fasta_path = generate_cdna_fasta(
-                sorted_fasta_path,
-                sorted_gtf_path,
-                cdna_temp_path,
-                chromosomes=chromosomes
+            cdna_temp_path = ngs.fasta.split_genomic_fasta_to_cdna(
+                fasta_path, cdna_temp_path, gene_infos, transcript_infos
             )
-            cdnas.append(cdna_fasta_path)
+            cdnas.append(cdna_temp_path)
+
+            # cDNA t2c
             cdna_t2c_temp_path = get_temporary_filename(temp_dir)
             logger.info(
                 f'Creating cDNA transcripts-to-capture at {cdna_t2c_temp_path}'
             )
-            cdna_t2c_result = create_t2c(cdna_fasta_path, cdna_t2c_temp_path)
+            cdna_t2c_result = create_t2c(cdna_temp_path, cdna_t2c_temp_path)
             cdna_t2cs.append(cdna_t2c_result['t2c'])
+
+            # Split intron
             intron_temp_path = get_temporary_filename(temp_dir)
             logger.info(f'Splitting genome into introns at {intron_temp_path}')
-            intron_fasta_path = generate_intron_fasta(
-                sorted_fasta_path,
-                sorted_gtf_path,
+            intron_temp_path = ngs.fasta.split_genomic_fasta_to_intron(
+                fasta_path,
                 intron_temp_path,
-                chromosomes=chromosomes,
+                gene_infos,
+                transcript_infos,
                 flank=flank if flank is not None else k -
                 1 if k is not None else 30
             )
-            introns.append(intron_fasta_path)
+            introns.append(intron_temp_path)
+
+            # intron t2c
             intron_t2c_temp_path = get_temporary_filename(temp_dir)
             logger.info(
                 f'Creating intron transcripts-to-capture at {intron_t2c_temp_path}'
             )
             intron_t2c_result = create_t2c(
-                intron_fasta_path, intron_t2c_temp_path
+                intron_temp_path, intron_t2c_temp_path
             )
             intron_t2cs.append(intron_t2c_result['t2c'])
 
@@ -651,6 +642,12 @@ def ref_lamanno(
             'intron_t2c': intron_t2c_path
         })
 
+    else:
+        logger.info(
+            'Skipping cDNA and intron FASTA generation because files already exist. Use --overwrite flag to overwrite'
+        )
+
+    if not glob.glob(f'{index_path}*') or overwrite:
         # Concatenate cDNA and intron fastas to generate T2G and build index
         combined_path = get_temporary_filename(temp_dir)
         logger.info(f'Concatenating cDNA and intron FASTAs to {combined_path}')
@@ -659,6 +656,7 @@ def ref_lamanno(
         )
         t2g_result = create_t2g_from_fasta(combined_path, t2g_path)
         results.update(t2g_result)
+
         if k and k != 31:
             logger.warning(
                 f'Using provided k-mer length {k} instead of optimal length 31'
diff --git a/kb_python/report.py b/kb_python/report.py
old mode 100644
new mode 100755
index 26edfc0..3b89550
--- a/kb_python/report.py
+++ b/kb_python/report.py
@@ -1,4 +1,3 @@
-import logging
 import os
 
 import nbformat
@@ -13,8 +12,6 @@
 from .dry import dryable
 from .utils import get_temporary_filename
 
-logger = logging.getLogger(__name__)
-
 REPORT_DIR = os.path.join(PACKAGE_PATH, 'report')
 BASIC_TEMPLATE_PATH = os.path.join(REPORT_DIR, 'report_basic.ipynb')
 MATRIX_TEMPLATE_PATH = os.path.join(REPORT_DIR, 'report_matrix.ipynb')
diff --git a/kb_python/stats.py b/kb_python/stats.py
index 8c4de57..231ae37 100755
--- a/kb_python/stats.py
+++ b/kb_python/stats.py
@@ -40,16 +40,17 @@ def start(self):
         self.commands = []
         self.workdir = os.getcwd()
 
-        # Import here to prevent circular imports
-        from .utils import get_bustools_version, get_kallisto_version
-        self.kallisto = {
-            'path': get_kallisto_binary_path(),
-            'version': '.'.join(str(i) for i in get_kallisto_version())
-        }
-        self.bustools = {
-            'path': get_bustools_binary_path(),
-            'version': '.'.join(str(i) for i in get_bustools_version())
-        }
+        if not is_dry():
+            # Import here to prevent circular imports
+            from .utils import get_bustools_version, get_kallisto_version
+            self.kallisto = {
+                'path': get_kallisto_binary_path(),
+                'version': '.'.join(str(i) for i in get_kallisto_version())
+            }
+            self.bustools = {
+                'path': get_bustools_binary_path(),
+                'version': '.'.join(str(i) for i in get_bustools_version())
+            }
 
     def command(self, command, runtime=None):
         """Report a shell command was run.
diff --git a/kb_python/utils.py b/kb_python/utils.py
index 5915c8a..7ceb862 100755
--- a/kb_python/utils.py
+++ b/kb_python/utils.py
@@ -1,42 +1,47 @@
 import concurrent.futures
-import gzip
 import logging
 import os
 import re
-import requests
 import shutil
 import subprocess as sp
-import tempfile
 import threading
 import time
 from urllib.request import urlretrieve
 
 import anndata
+import ngs_tools as ngs
 import pandas as pd
 import scipy.io
 from scipy import sparse
 from tqdm import tqdm
 
 from .config import (
-    CHUNK_SIZE,
     get_bustools_binary_path,
     get_kallisto_binary_path,
-    MAP_DIR,
-    PACKAGE_PATH,
     PLATFORM,
     TECHNOLOGIES_MAPPING,
-    WHITELIST_DIR,
     UnsupportedOSException,
 )
 from .dry import dryable
 from .dry import utils as dry_utils
+from .logging import logger
 from .stats import STATS
 
-logger = logging.getLogger(__name__)
-
 TECHNOLOGY_PARSER = re.compile(r'^(?P<name>\S+)')
 VERSION_PARSER = re.compile(r'^\S*? ([0-9]+).([0-9]+).([0-9]+)')
 
+# These functions have been moved as of 0.26.1 to the ngs_tools library but are
+# imported from this file in other places. For now, let's keep these here.
+# TODO: remove these
+open_as_text = ngs.utils.open_as_text
+decompress_gzip = ngs.utils.decompress_gzip
+compress_gzip = ngs.utils.compress_gzip
+concatenate_files = ngs.utils.concatenate_files_as_text
+download_file = ngs.utils.download_file
+get_temporary_filename = dryable(dry_utils.get_temporary_filename)(
+    ngs.utils.mkstemp
+)
+
 
 class NotImplementedException(Exception):
     pass
@@ -82,53 +87,6 @@ def update_filename(filename, code):
     return f'{name}.{code}{extension}'
 
 
-def open_as_text(path, mode):
-    """Open a textfile or gzip file in text mode.
-
-    :param path: path to textfile or gzip
-    :type path: str
-    :param mode: mode to open the file, either `w` for write or `r` for read
-    :type mode: str
-
-    :return: file object
-    :rtype: file object
-    """
-    return gzip.open(path, mode +
-                     't') if path.endswith('.gz') else open(path, mode)
-
-
-def decompress_gzip(gzip_path, out_path):
-    """Decompress a gzip file to provided file path.
-
-    :param gzip_path: path to gzip file
-    :type gzip_path: str
-    :param out_path: path to decompressed file
-    :type out_path: str
-
-    :return: path to decompressed file
-    :rtype: str
-    """
-    with gzip.open(gzip_path, 'rb') as f, open(out_path, 'wb') as out:
-        shutil.copyfileobj(f, out)
-    return out_path
-
-
-def compress_gzip(file_path, out_path):
-    """Compress a file into gzip.
-
-    :param file_path: path to file
-    :type file_path: str
-    :param out_dir: path to compressed file
-    :type out_dir: str
-
-    :return: path to compressed file
-    :rtype: str
-    """
-    with open(file_path, 'rb') as f, gzip.open(out_path, 'wb') as out:
-        shutil.copyfileobj(f, out)
-    return out_path
-
-
 @dryable(dry_utils.make_directory)
 def make_directory(path):
     """Quietly make the specified directory (and any subdirectories).
@@ -322,7 +280,7 @@ def whitelist_provided(technology):
     """
     upper = technology.upper()
     return upper in TECHNOLOGIES_MAPPING and TECHNOLOGIES_MAPPING[
-        upper].whitelist_archive
+        upper].chemistry.has_whitelist
 
 
 @dryable(dry_utils.move_file)
@@ -355,12 +313,10 @@ def copy_whitelist(technology, out_dir):
     :rtype: str
     """
     technology = TECHNOLOGIES_MAPPING[technology.upper()]
-    archive_path = os.path.join(
-        PACKAGE_PATH, WHITELIST_DIR, technology.whitelist_archive
-    )
+    archive_path = technology.chemistry.whitelist_path
     whitelist_path = os.path.join(
         out_dir,
-        os.path.splitext(technology.whitelist_archive)[0]
+        os.path.splitext(os.path.basename(archive_path))[0]
     )
     with open_as_text(archive_path, 'r') as f, open(whitelist_path, 'w') as out:
         out.write(f.read())
@@ -380,70 +336,16 @@ def copy_map(technology, out_dir):
     :rtype: str
     """
     technology = TECHNOLOGIES_MAPPING[technology.upper()]
-    archive_path = os.path.join(PACKAGE_PATH, MAP_DIR, technology.map_archive)
+    archive_path = technology.chemistry.whitelist_path
     map_path = os.path.join(
         out_dir,
-        os.path.splitext(technology.map_archive)[0]
+        os.path.splitext(os.path.basename(archive_path))[0]
     )
     with open_as_text(archive_path, 'r') as f, open(map_path, 'w') as out:
         out.write(f.read())
     return map_path
 
 
-def concatenate_files(*paths, out_path, temp_dir='tmp'):
-    """Concatenates an arbitrary number of files into one TEXT file.
-
-    Only supports text and gzip files.
-
-    :param paths: an arbitrary number of paths to files
-    :type paths: str
-    :param out_path: path to place concatenated file
-    :type out_path: str
-    :param temp_dir: temporary directory, defaults to `tmp`
-    :type temp_dir: str, optional
-
-    :return: path to concatenated file
-    :rtype: str
-    """
-    with open(out_path, 'w') as out:
-        for path in paths:
-            with open_as_text(path, 'r') as f:
-                for line in f:
-                    if not line.isspace():
-                        out.write(line.strip() + '\n')
-
-    return out_path
-
-
-def download_file(url, path):
-    """Download a remote file to the provided path while displaying a progress bar.
-
-    :param url: remote url
-    :type url: str
-    :param path: local path to download the file to
-    :type path: str
-
-    :return: path to downloaded file
-    :rtype: str
-    """
-    logger.addHandler(TqdmLoggingHandler())
-    r = requests.get(url, stream=True)
-    with open(path, 'wb') as f:
-        t = tqdm(
-            unit='B',
-            total=int(r.headers['Content-Length']),
-            unit_divisor=1024,
-            unit_scale=True,
-            ascii=True,
-        )
-        for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
-            if chunk:
-                t.update(len(chunk))
-                f.write(chunk)
-        t.close()
-    return path
-
-
 @dryable(dry_utils.stream_file)
 def stream_file(url, path):
     """Creates a FIFO file to use for piping remote files into processes.
@@ -475,24 +377,6 @@ def stream_file(url, path):
     return path
 
 
-@dryable(dry_utils.get_temporary_filename)
-def get_temporary_filename(temp_dir=None):
-    """Create a temporary file in the provided temprorary directory.
-
-    The caller is responsible for deleting the file.
-
-    :param temp_dir: path to temporary directory, defaults to `None`
-    :type temp_dir: str, optional
-
-    :return: temporary filename
-    :rtype: str
-    """
-    fp, path = tempfile.mkstemp(dir=temp_dir)
-    os.close(fp)
-
-    return path
-
-
 def read_t2g(t2g_path):
     """Given a transcript-to-gene mapping path, read it into a dictionary.
     The first column is always assumed to tbe the transcript IDs.
diff --git a/kb_python/validate.py b/kb_python/validate.py
old mode 100644
new mode 100755
index 9416469..12efaa5
--- a/kb_python/validate.py
+++ b/kb_python/validate.py
@@ -1,15 +1,13 @@
 import functools
-import logging
 import os
 import re
 
 import scipy.io
 
 from .config import get_bustools_binary_path, is_dry, is_validate
+from .logging import logger
 from .utils import run_executable
 
-logger = logging.getLogger(__name__)
-
 BUSTOOLS_INSPECT_PARSER = re.compile(r'^.*?(?P<count>[0-9]+)')
 
 
diff --git a/kb_python/whitelists/10xv1_whitelist.txt.gz b/kb_python/whitelists/10xv1_whitelist.txt.gz
deleted file mode 100644
index 7e3a7c5..0000000
Binary files a/kb_python/whitelists/10xv1_whitelist.txt.gz and /dev/null differ
diff --git a/kb_python/whitelists/10xv2_whitelist.txt.gz b/kb_python/whitelists/10xv2_whitelist.txt.gz
deleted file mode 100644
index fbc9759..0000000
Binary files a/kb_python/whitelists/10xv2_whitelist.txt.gz and /dev/null differ
diff --git a/kb_python/whitelists/10xv3_whitelist.txt.gz b/kb_python/whitelists/10xv3_whitelist.txt.gz
deleted file mode 100644
index 840c3e6..0000000
Binary files a/kb_python/whitelists/10xv3_whitelist.txt.gz and /dev/null differ
diff --git a/kb_python/whitelists/inDropsv3_whitelist.txt.gz b/kb_python/whitelists/inDropsv3_whitelist.txt.gz
deleted file mode 100644
index 1f66162..0000000
Binary files a/kb_python/whitelists/inDropsv3_whitelist.txt.gz and /dev/null differ
diff --git a/requirements.txt b/requirements.txt
old mode 100644
new mode 100755
index f85a650..212f3fc
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,9 +4,10 @@ Jinja2>2.10.1
 loompy>=3.0.6
 nbconvert>=5.6.0
 nbformat>=4.4.0
+ngs-tools>=1.3.0
 numpy>=1.17.2
+pandas>=1.0.0
 plotly>=4.5.0
-requests>=2.19.0
 scanpy>=1.4.4.post1
 scikit-learn>=0.21.3
 tqdm>=4.39.0
diff --git a/tests/__init__.py b/tests/__init__.py
old mode 100644
new mode 100755
index e69de29..5c41a56
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1,2 @@
+from kb_python.logging import logger
+logger.setLevel(9999)
diff --git a/tests/dry/test_utils.py b/tests/dry/test_utils.py
old mode 100644
new mode 100755
index a675173..3f2c199
--- a/tests/dry/test_utils.py
+++ b/tests/dry/test_utils.py
@@ -69,7 +69,7 @@ def test_move_file_windows(self):
     def test_copy_whitelist(self):
         with mock.patch('kb_python.dry.utils.print') as p:
             self.assertEquals(
-                'path/10xv2_whitelist.txt',
+                'path/10x_version2_whitelist.txt',
                 utils.copy_whitelist('10xv2', 'path')
             )
             p.assert_called()
@@ -77,7 +77,7 @@ def test_copy_whitelist(self):
     def test_copy_map(self):
         with mock.patch('kb_python.dry.utils.print') as p:
             self.assertEqual(
-                'path/10xv3_feature_barcode_map.txt',
+                'path/10x_version3_feature_map.txt',
                 utils.copy_map('10xv3', 'path')
             )
             p.assert_called()
diff --git a/tests/test_fasta.py b/tests/test_fasta.py
deleted file mode 100755
index 7fe7a12..0000000
--- a/tests/test_fasta.py
+++ /dev/null
@@ -1,218 +0,0 @@
-import os
-import uuid
-from unittest import mock, TestCase
-
-import kb_python.fasta as fasta
-from tests.mixins import TestMixin
-
-
-class TestFASTA(TestMixin, TestCase):
-
-    def test_make_header(self):
-        seq_id = 'TRANSCRIPT_ID'
-        attributes = [
-            ('gene_id', 'GENE_ID'),
-            ('gene_name', 'GENE_NAME'),
-        ]
-        self.assertEqual(
-            '>TRANSCRIPT_ID gene_id:GENE_ID gene_name:GENE_NAME',
-            fasta.FASTA.make_header(seq_id, attributes)
-        )
-
-    def test_parse_header(self):
-        header = '>transcript_id TEST:testing'
-        self.assertEqual({
-            'sequence_id': 'transcript_id',
-            'group': {
-                'TEST': 'testing'
-            }
-        }, fasta.FASTA.parse_header(header))
-
-    def test_reverse_complement(self):
-        sequence = 'ATCG'
-        self.assertEqual('CGAT', fasta.FASTA.reverse_complement(sequence))
-
-    def test_entries(self):
-        fa = fasta.FASTA(self.unsorted_fasta_path)
-        self.assertEqual(2, len(list(fa.entries())))
-
-    def test_sort(self):
-        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-        fa = fasta.FASTA(self.unsorted_fasta_path)
-        self.assertEqual((out_path, {'1', '2'}), fa.sort(out_path))
-
-        with open(out_path, 'r') as f, open(self.sorted_fasta_path,
-                                            'r') as sorted:
-            self.assertEqual(f.read(), sorted.read())
-
-    def test_generate_kite_fasta(self):
-        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-        self.assertEqual(
-            (out_path, 15),
-            fasta.generate_kite_fasta(self.kite_feature_path, out_path)
-        )
-        with open(out_path, 'r') as f, open(self.kite_fasta_path, 'r') as fa:
-            self.assertEqual(fa.read(), f.read())
-
-    def test_generate_kite_fasta_no_mismatches(self):
-        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-        self.assertEqual((out_path, 15),
-                         fasta.generate_kite_fasta(
-                             self.kite_feature_path,
-                             out_path,
-                             no_mismatches=True
-                         ))
-        with open(out_path, 'r') as f, open(self.kite_no_mismatches_fasta_path,
-                                            'r') as fa:
-            self.assertEqual(fa.read(), f.read())
-
-    def test_generate_kite_fasta_different_length(self):
-        with mock.patch('kb_python.fasta.logger.warning') as warning:
-            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-            self.assertEqual((out_path, 14),
-                             fasta.generate_kite_fasta(
-                                 self.kite_different_feature_path, out_path
-                             ))
-            warning.assert_called_once()
-            with open(out_path, 'r') as f, open(self.kite_different_fasta_path,
-                                                'r') as fa:
-                self.assertEqual(fa.read(), f.read())
-
-    def test_generate_kite_fasta_duplicate(self):
-        with self.assertRaises(Exception):
-            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-            fasta.generate_kite_fasta(
-                self.kite_duplicate_feature_path, out_path
-            )
-
-    def test_generate_kite_fasta_wrong_order(self):
-        with self.assertRaises(Exception):
-            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-            fasta.generate_kite_fasta(self.kite_order_feature_path, out_path)
-
-    def test_generate_kite_fasta_collision(self):
-        with mock.patch('kb_python.fasta.logger.warning') as warning:
-            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-            self.assertEqual((out_path, 15),
-                             fasta.generate_kite_fasta(
-                                 self.kite_collision_feature_path, out_path
-                             ))
-            warning.assert_called_once()
-            with open(out_path, 'r') as f, open(self.kite_collision_fasta_path,
-                                                'r') as fa:
-                self.assertEqual(fa.read(), f.read())
-
-    def test_generate_cdna_fasta(self):
-        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-        self.assertEqual(
-            out_path,
-            fasta.generate_cdna_fasta(
-                self.sorted_fasta_path, self.sorted_gtf_path, out_path
-            )
-        )
-        with open(out_path, 'r') as f, open(self.split_cdna_fasta_path,
-                                            'r') as split:
-            self.assertEqual(f.read(), split.read())
-
-    def test_generate_cdna_fasta_with_space(self):
-        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-        self.assertEqual(
-            out_path,
-            fasta.generate_cdna_fasta(
-                self.sorted_fasta_path, self.sorted_gtf_with_space_path,
-                out_path
-            )
-        )
-        with open(out_path,
-                  'r') as f, open(self.split_cdna_fasta_with_space_path,
-                                  'r') as split:
-            self.assertEqual(f.read(), split.read())
-
-    def test_generate_cdna_fasta_no_exon(self):
-        with self.assertRaises(Exception):
-            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-            fasta.generate_cdna_fasta(
-                self.sorted_fasta_path, self.gtf_no_exon, out_path
-            )
-
-    def test_generate_cdna_fasta_no_transcript(self):
-        with self.assertRaises(Exception):
-            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-            fasta.generate_cdna_fasta(
-                self.sorted_fasta_path, self.gtf_no_transcript, out_path
-            )
-
-    def test_generate_cdna_fasta_with_chromosomes(self):
-        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-        self.assertEqual(
-            out_path,
-            fasta.generate_cdna_fasta(
-                self.sorted_fasta_path,
-                self.sorted_gtf_path,
-                out_path,
-                chromosomes={'2'}
-            )
-        )
-        with open(out_path, 'r') as f, open(self.partial_cdna_fasta_path,
-                                            'r') as split:
-            self.assertEqual(f.read(), split.read())
-
-    def test_generate_intron_fasta(self):
-        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-        self.assertEqual(
-            out_path,
-            fasta.generate_intron_fasta(
-                self.sorted_fasta_path, self.sorted_gtf_path, out_path, flank=1
-            )
-        )
-        with open(out_path, 'r') as f, open(self.split_intron_fasta_path,
-                                            'r') as split:
-            self.assertEqual(f.read(), split.read())
-
-    def test_generate_intron_fasta_with_chromosomes(self):
-        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-        self.assertEqual(
-            out_path,
-            fasta.generate_intron_fasta(
-                self.sorted_fasta_path,
-                self.sorted_gtf_path,
-                out_path,
-                chromosomes={'1'},
-                flank=1
-            )
-        )
-        with open(out_path, 'r') as f, open(self.partial_intron_fasta_path,
-                                            'r') as split:
-            self.assertEqual(f.read(), split.read())
-
-    def test_generate_intron_fasta_no_exon(self):
-        with self.assertRaises(Exception):
-            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-            fasta.generate_intron_fasta(
-                self.sorted_fasta_path, self.gtf_no_exon, out_path
-            )
-
-    def test_generate_intron_fasta_no_transcript(self):
-        with self.assertRaises(Exception):
-            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-            fasta.generate_intron_fasta(
-                self.sorted_fasta_path, self.gtf_no_transcript, out_path
-            )
-
-    def test_generate_spliced_fasta(self):
-        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-        fasta.generate_spliced_fasta(
-            self.sorted_fasta_path, self.sorted_gtf_path, out_path
-        )
-
-    def test_generate_unspliced_fasta(self):
-        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
-        fasta.generate_unspliced_fasta(
-            self.sorted_fasta_path, self.sorted_gtf_path, out_path
-        )
-
-    def test_complement(self):
-        # check for each code and its complement,
-        # if the complement's complement is the code.
-        for key, value in fasta.FASTA.COMPLEMENT.items():
-            assert key == fasta.FASTA.COMPLEMENT[value]
diff --git a/tests/test_gtf.py b/tests/test_gtf.py
deleted file mode 100755
index ebf6246..0000000
--- a/tests/test_gtf.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import os
-import uuid
-from unittest import TestCase
-
-from kb_python.gtf import GTF
-from tests.mixins import TestMixin
-
-
-class TestGTF(TestMixin, TestCase):
-
-    def test_parser(self):
-        line = '2\thavana\tgene\t2\t3\t.\t+\t.\tgene_id "ENSMUSG00000102693"; gene_version "1"; gene_name "4933401J01Rik"; gene_source "havana"; gene_biotype "TEC";'  # noqa
-        match = GTF.PARSER.match(line)
-        self.assertIsNotNone(match)
-        self.assertEqual(
-            {
-                'seqname':
-                    '2',
-                'feature':
-                    'gene',
-                'start':
-                    '2',
-                'end':
-                    '3',
-                'strand':
-                    '+',
-                'group':
-                    'gene_id "ENSMUSG00000102693"; gene_version "1"; gene_name "4933401J01Rik"; gene_source "havana"; gene_biotype "TEC";'  # noqa
-            },
-            match.groupdict()
-        )
-
-    def test_group_parser(self):
-        group = 'gene_id "ENSMUSG00000102693";'
-        match = GTF.GROUP_PARSER.match(group)
-        self.assertIsNotNone(match)
-        self.assertEqual({
-            'key': 'gene_id',
-            'value': 'ENSMUSG00000102693'
-        }, match.groupdict())
-
-    def test_parse_entry(self):
-        line = '2\thavana\tgene\t2\t3\t.\t+\t.\tgene_id "ENSMUSG00000102693"; gene_version "1"; gene_name "4933401J01Rik"; gene_source "havana"; gene_biotype "TEC";'  # noqa
-        self.assertEqual({
-            'seqname': '2',
-            'feature': 'gene',
-            'start': 2,
-            'end': 3,
-            'strand': '+',
-            'group': {
-                'gene_id': 'ENSMUSG00000102693',
-                'gene_version': '1',
-                'gene_name': '4933401J01Rik',
-                'gene_source': 'havana',
-                'gene_biotype': 'TEC'
-            }
-        }, GTF.parse_entry(line))
-
-    def test_parse_entry_with_space(self):
-        line = '2\thavana\tgene\t2\t3\t.\t+\t.\tgene_id "ENSMUSG00000102693 [A]"; gene_version "1"; gene_name "4933401J01Rik"; gene_source "havana"; gene_biotype "TEC";'  # noqa
-        self.assertEqual({
-            'seqname': '2',
-            'feature': 'gene',
-            'start': 2,
-            'end': 3,
-            'strand': '+',
-            'group': {
-                'gene_id': 'ENSMUSG00000102693[A]',
-                'gene_version': '1',
-                'gene_name': '4933401J01Rik',
-                'gene_source': 'havana',
-                'gene_biotype': 'TEC'
-            }
-        }, GTF.parse_entry(line))
-
-    def test_entries(self):
-        gtf = GTF(self.unsorted_gtf_path)
-        self.assertEqual(8, len(list(gtf.entries())))
-
-    def test_sort(self):
-        out_path = os.path.join(self.temp_dir, '{}.gtf'.format(uuid.uuid4()))
-        gtf = GTF(self.unsorted_gtf_path)
-        self.assertEqual((out_path, {'1', '2'}), gtf.sort(out_path))
-
-        with open(out_path, 'r') as f, open(self.sorted_gtf_path,
-                                            'r') as sorted:
-            self.assertEqual(f.read(), sorted.read())
diff --git a/tests/test_ref.py b/tests/test_ref.py
index eb70350..cf23932 100755
--- a/tests/test_ref.py
+++ b/tests/test_ref.py
@@ -11,63 +11,60 @@
 
 class TestRef(TestMixin, TestCase):
 
-    def test_sort_gtf(self):
-        with mock.patch('kb_python.ref.GTF') as GTF:
-            out_path = 'test'
-            self.assertEqual(
-                GTF().sort.return_value,
-                ref.sort_gtf(self.unsorted_gtf_path, out_path)
-            )
-            GTF().sort.assert_called_once_with(out_path)
-
-    def test_sort_fasta(self):
-        with mock.patch('kb_python.ref.FASTA') as FASTA:
-            out_path = 'test'
-            self.assertEqual(
-                FASTA().sort.return_value,
-                ref.sort_fasta(self.unsorted_fasta_path, out_path)
-            )
-            FASTA().sort.assert_called_once_with(out_path)
+    def test_generate_kite_fasta(self):
+        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
+        self.assertEqual(
+            (out_path, 15),
+            ref.generate_kite_fasta(self.kite_feature_path, out_path)
+        )
+        with open(out_path, 'r') as f, open(self.kite_fasta_path, 'r') as fa:
+            self.assertEqual(fa.read(), f.read())
 
-    def test_check_chromosomes(self):
-        with mock.patch('kb_python.ref.logger.warning') as warning:
-            fasta_chromosomes = gtf_chromosomes = chromosomes = {'1', '2'}
-            self.assertEqual(
-                chromosomes,
-                ref.check_chromosomes(fasta_chromosomes, gtf_chromosomes)
-            )
-            warning.assert_not_called()
+    def test_generate_kite_fasta_no_mismatches(self):
+        out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
+        self.assertEqual((out_path, 15),
+                         ref.generate_kite_fasta(
+                             self.kite_feature_path,
+                             out_path,
+                             no_mismatches=True
+                         ))
+        with open(out_path, 'r') as f, open(self.kite_no_mismatches_fasta_path,
+                                            'r') as fa:
+            self.assertEqual(fa.read(), f.read())
 
-    def test_check_chromosomes_fasta_unique(self):
+    def test_generate_kite_fasta_different_length(self):
         with mock.patch('kb_python.ref.logger.warning') as warning:
-            fasta_chromosomes = {'1', '2'}
-            gtf_chromosomes = chromosomes = {'2'}
-            self.assertEqual(
-                chromosomes,
-                ref.check_chromosomes(fasta_chromosomes, gtf_chromosomes)
-            )
+            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
+            self.assertEqual((out_path, 14),
+                             ref.generate_kite_fasta(
+                                 self.kite_different_feature_path, out_path
+                             ))
             warning.assert_called_once()
+            with open(out_path, 'r') as f, open(self.kite_different_fasta_path,
+                                                'r') as fa:
+                self.assertEqual(fa.read(), f.read())
 
-    def test_check_chromosomes_gtf_unique(self):
-        with mock.patch('kb_python.ref.logger.warning') as warning:
-            fasta_chromosomes = chromosomes = {'2'}
-            gtf_chromosomes = {'1', '2'}
-            self.assertEqual(
-                chromosomes,
-                ref.check_chromosomes(fasta_chromosomes, gtf_chromosomes)
-            )
-            warning.assert_called_once()
+    def test_generate_kite_fasta_duplicate(self):
+        with self.assertRaises(Exception):
+            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
+            ref.generate_kite_fasta(self.kite_duplicate_feature_path, out_path)
 
-    def test_check_chromosomes_both_unique(self):
+    def test_generate_kite_fasta_wrong_order(self):
+        with self.assertRaises(Exception):
+            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
+            ref.generate_kite_fasta(self.kite_order_feature_path, out_path)
+
+    def test_generate_kite_fasta_collision(self):
         with mock.patch('kb_python.ref.logger.warning') as warning:
-            fasta_chromosomes = {'1'}
-            gtf_chromosomes = {'2'}
-            chromosomes = set()
-            self.assertEqual(
-                chromosomes,
-                ref.check_chromosomes(fasta_chromosomes, gtf_chromosomes)
-            )
-            self.assertEqual(2, warning.call_count)
+            out_path = os.path.join(self.temp_dir, '{}.fa'.format(uuid.uuid4()))
+            self.assertEqual((out_path, 15),
+                             ref.generate_kite_fasta(
+                                 self.kite_collision_feature_path, out_path
+                             ))
+            warning.assert_called_once()
+            with open(out_path, 'r') as f, open(self.kite_collision_fasta_path,
+                                                'r') as fa:
+                self.assertEqual(fa.read(), f.read())
 
     def test_kallisto_index(self):
         index_path = os.path.join(self.temp_dir, '{}.idx'.format(uuid.uuid4()))
@@ -124,30 +121,6 @@ def test_create_t2g_from_fasta_kite(self):
                                                  'r') as t2g:
             self.assertEqual(f.read(), t2g.read())
 
-    def test_create_t2g_from_gtf(self):
-        t2g_path = os.path.join(self.temp_dir, '{}.txt'.format(uuid.uuid4()))
-        result = ref.create_t2g_from_gtf(self.unsorted_gtf_path, t2g_path)
-        with open(result['t2g'], 'r') as f, open(self.gtf_t2g_path, 'r') as t2g:
-            self.assertEqual(f.read(), t2g.read())
-
-    def test_create_t2g_from_gtf_with_space(self):
-        t2g_path = os.path.join(self.temp_dir, '{}.txt'.format(uuid.uuid4()))
-        result = ref.create_t2g_from_gtf(
-            self.unsorted_gtf_with_space_path, t2g_path
-        )
-        with open(result['t2g'], 'r') as f, open(self.gtf_t2g_with_space_path,
-                                                 'r') as t2g:
-            self.assertEqual(f.read(), t2g.read())
-
-    def test_create_t2g_from_gtf_with_intron(self):
-        t2g_path = os.path.join(self.temp_dir, '{}.txt'.format(uuid.uuid4()))
-        result = ref.create_t2g_from_gtf(
-            self.unsorted_gtf_path, t2g_path, intron=True
-        )
-        with open(result['t2g'], 'r') as f, open(self.gtf_t2g_intron_path,
-                                                 'r') as t2g:
-            self.assertEqual(f.read(), t2g.read())
-
     def test_create_t2c(self):
         t2c_path = os.path.join(self.temp_dir, '{}.txt'.format(uuid.uuid4()))
         result = ref.create_t2c(self.unsorted_fasta_path, t2c_path)
@@ -277,36 +250,32 @@ def test_decompress_file_gzip(self):
 
     def test_ref(self):
         with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
-            mock.patch('kb_python.ref.create_t2g_from_gtf') as create_t2g_from_gtf,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=False),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
             mock.patch('kb_python.ref.split_and_index') as split_and_index,\
             mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = []
+            mock.patch('kb_python.ref.glob.glob', return_value=[]):
             temp_dir = self.temp_dir
             index_path = mock.MagicMock()
             t2g_path = mock.MagicMock()
-            sorted_fasta_path = mock.MagicMock()
-            sorted_gtf_path = mock.MagicMock()
             cdna_fasta_path = mock.MagicMock()
-            chromosomes = {'1', '2'}
-            get_temporary_filename.side_effect = ['t2g', 'fasta', 'gtf', 'cdna']
-            decompress_file.side_effect = [self.gtf_path, self.fasta_path]
-            sort_fasta.return_value = sorted_fasta_path, chromosomes
-            sort_gtf.return_value = sorted_gtf_path, chromosomes
-            check_chromosomes.return_value = chromosomes
-            generate_cdna_fasta.return_value = 'cdna'
-            concatenate_files.side_effect = [t2g_path, cdna_fasta_path]
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
+            get_temporary_filename.return_value = 'cdna'
+            split_genomic_fasta_to_cdna.return_value = cdna_fasta_path
+            concatenate_files.return_value = cdna_fasta_path
             kallisto_index.return_value = {'index': index_path}
-            create_t2g_from_gtf.return_value = {'t2g': 't2g'}
+            create_t2g_from_fasta.return_value = {'t2g': t2g_path}
             self.assertEqual({
                 't2g': t2g_path,
                 'index': index_path,
+                'cdna_fasta': cdna_fasta_path,
             },
                              ref.ref(
                                  self.fasta_path,
@@ -316,26 +285,21 @@ def test_ref(self):
                                  t2g_path,
                                  temp_dir=temp_dir
                              ))
-            self.assertEqual(2, decompress_file.call_count)
-            decompress_file.assert_has_calls([
-                call(self.gtf_path, temp_dir=temp_dir),
-                call(self.fasta_path, temp_dir=temp_dir)
-            ])
-            create_t2g_from_gtf.assert_called_once_with(self.gtf_path, 't2g')
-            sort_fasta.assert_called_once_with(self.fasta_path, 'fasta')
-            sort_gtf.assert_called_once_with(self.gtf_path, 'gtf')
-            check_chromosomes.assert_called_once_with(chromosomes, chromosomes)
-            generate_cdna_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
+            genes_and_transcripts_from_gtf.assert_called_once_with(
+                self.gtf_path, use_version=True
+            )
+            create_t2g_from_fasta.assert_called_once_with(
+                cdna_fasta_path, t2g_path
+            )
+            split_genomic_fasta_to_cdna.assert_called_once_with(
+                self.fasta_path,
                 'cdna',
-                chromosomes=chromosomes
+                gene_infos,
+                transcript_infos,
+            )
+            concatenate_files.assert_called_once_with(
+                cdna_fasta_path, out_path=cdna_fasta_path, temp_dir=temp_dir
             )
-            self.assertEqual(2, concatenate_files.call_count)
-            concatenate_files.assert_has_calls([
-                call('t2g', out_path=t2g_path, temp_dir=temp_dir),
-                call('cdna', out_path=cdna_fasta_path, temp_dir=temp_dir)
-            ])
             kallisto_index.assert_called_once_with(
                 cdna_fasta_path, index_path, k=31
             )
@@ -343,36 +307,32 @@ def test_ref(self):
 
     def test_ref_split(self):
         with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
-            mock.patch('kb_python.ref.create_t2g_from_gtf') as create_t2g_from_gtf,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=False),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
             mock.patch('kb_python.ref.split_and_index') as split_and_index,\
             mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = []
+            mock.patch('kb_python.ref.glob.glob', return_value=[]):
             temp_dir = self.temp_dir
             index_path = mock.MagicMock()
             t2g_path = mock.MagicMock()
-            sorted_fasta_path = mock.MagicMock()
-            sorted_gtf_path = mock.MagicMock()
             cdna_fasta_path = mock.MagicMock()
-            chromosomes = {'1', '2'}
-            get_temporary_filename.side_effect = ['t2g', 'fasta', 'gtf', 'cdna']
-            decompress_file.side_effect = [self.gtf_path, self.fasta_path]
-            sort_fasta.return_value = sorted_fasta_path, chromosomes
-            sort_gtf.return_value = sorted_gtf_path, chromosomes
-            check_chromosomes.return_value = chromosomes
-            generate_cdna_fasta.return_value = 'cdna'
-            concatenate_files.side_effect = [t2g_path, cdna_fasta_path]
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
+            get_temporary_filename.return_value = 'cdna'
+            split_genomic_fasta_to_cdna.return_value = cdna_fasta_path
+            concatenate_files.return_value = cdna_fasta_path
             split_and_index.return_value = {'indices': ['index.0', 'index.1']}
-            create_t2g_from_gtf.return_value = {'t2g': 't2g'}
+            create_t2g_from_fasta.return_value = {'t2g': t2g_path}
             self.assertEqual({
                 't2g': t2g_path,
                 'indices': ['index.0', 'index.1'],
+                'cdna_fasta': cdna_fasta_path,
             },
                              ref.ref(
                                  self.fasta_path,
@@ -383,63 +343,55 @@ def test_ref_split(self):
                                  n=2,
                                  temp_dir=temp_dir
                              ))
-            self.assertEqual(2, decompress_file.call_count)
-            decompress_file.assert_has_calls([
-                call(self.gtf_path, temp_dir=temp_dir),
-                call(self.fasta_path, temp_dir=temp_dir)
-            ])
-            create_t2g_from_gtf.assert_called_once_with(self.gtf_path, 't2g')
-            sort_fasta.assert_called_once_with(self.fasta_path, 'fasta')
-            sort_gtf.assert_called_once_with(self.gtf_path, 'gtf')
-            check_chromosomes.assert_called_once_with(chromosomes, chromosomes)
-            generate_cdna_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
+            genes_and_transcripts_from_gtf.assert_called_once_with(
+                self.gtf_path, use_version=True
+            )
+            create_t2g_from_fasta.assert_called_once_with(
+                cdna_fasta_path, t2g_path
+            )
+            split_genomic_fasta_to_cdna.assert_called_once_with(
+                self.fasta_path,
                 'cdna',
-                chromosomes=chromosomes
+                gene_infos,
+                transcript_infos,
             )
-            self.assertEqual(2, concatenate_files.call_count)
-            concatenate_files.assert_has_calls([
-                call('t2g', out_path=t2g_path, temp_dir=temp_dir),
-                call('cdna', out_path=cdna_fasta_path, temp_dir=temp_dir)
-            ])
+            concatenate_files.assert_called_once_with(
+                cdna_fasta_path, out_path=cdna_fasta_path, temp_dir=temp_dir
+            )
+            kallisto_index.assert_not_called()
             split_and_index.assert_called_once_with(
                 cdna_fasta_path, index_path, k=31, n=2, temp_dir=temp_dir
             )
-            kallisto_index.assert_not_called()
 
     def test_ref_override_k(self):
         with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
-            mock.patch('kb_python.ref.create_t2g_from_gtf') as create_t2g_from_gtf,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=False),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
+            mock.patch('kb_python.ref.split_and_index') as split_and_index,\
             mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = []
-            k = 999
+            mock.patch('kb_python.ref.glob.glob', return_value=[]):
             temp_dir = self.temp_dir
+            k = 999
             index_path = mock.MagicMock()
             t2g_path = mock.MagicMock()
-            sorted_fasta_path = mock.MagicMock()
-            sorted_gtf_path = mock.MagicMock()
             cdna_fasta_path = mock.MagicMock()
-            chromosomes = {'1', '2'}
-            get_temporary_filename.side_effect = ['t2g', 'fasta', 'gtf', 'cdna']
-            decompress_file.side_effect = [self.gtf_path, self.fasta_path]
-            sort_fasta.return_value = sorted_fasta_path, chromosomes
-            sort_gtf.return_value = sorted_gtf_path, chromosomes
-            check_chromosomes.return_value = chromosomes
-            generate_cdna_fasta.return_value = 'cdna'
-            concatenate_files.side_effect = [t2g_path, cdna_fasta_path]
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
+            get_temporary_filename.return_value = 'cdna'
+            split_genomic_fasta_to_cdna.return_value = cdna_fasta_path
+            concatenate_files.return_value = cdna_fasta_path
             kallisto_index.return_value = {'index': index_path}
-            create_t2g_from_gtf.return_value = {'t2g': 't2g'}
+            create_t2g_from_fasta.return_value = {'t2g': t2g_path}
             self.assertEqual({
                 't2g': t2g_path,
                 'index': index_path,
+                'cdna_fasta': cdna_fasta_path,
             },
                              ref.ref(
                                  self.fasta_path,
@@ -450,52 +402,54 @@ def test_ref_override_k(self):
                                  k=k,
                                  temp_dir=temp_dir
                              ))
-            self.assertEqual(2, decompress_file.call_count)
-            decompress_file.assert_has_calls([
-                call(self.gtf_path, temp_dir=temp_dir),
-                call(self.fasta_path, temp_dir=temp_dir)
-            ])
-            create_t2g_from_gtf.assert_called_once_with(self.gtf_path, 't2g')
-            sort_fasta.assert_called_once_with(self.fasta_path, 'fasta')
-            sort_gtf.assert_called_once_with(self.gtf_path, 'gtf')
-            check_chromosomes.assert_called_once_with(chromosomes, chromosomes)
-            generate_cdna_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
+            genes_and_transcripts_from_gtf.assert_called_once_with(
+                self.gtf_path, use_version=True
+            )
+            create_t2g_from_fasta.assert_called_once_with(
+                cdna_fasta_path, t2g_path
+            )
+            split_genomic_fasta_to_cdna.assert_called_once_with(
+                self.fasta_path,
                 'cdna',
-                chromosomes=chromosomes
+                gene_infos,
+                transcript_infos,
+            )
+            concatenate_files.assert_called_once_with(
+                cdna_fasta_path, out_path=cdna_fasta_path, temp_dir=temp_dir
             )
-            self.assertEqual(2, concatenate_files.call_count)
-            concatenate_files.assert_has_calls([
-                call('t2g', out_path=t2g_path, temp_dir=temp_dir),
-                call('cdna', out_path=cdna_fasta_path, temp_dir=temp_dir)
-            ])
             kallisto_index.assert_called_once_with(
                 cdna_fasta_path, index_path, k=k
             )
+            split_and_index.assert_not_called()
 
     def test_ref_exists(self):
         with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
-            mock.patch('kb_python.ref.create_t2g_from_gtf') as create_t2g_from_gtf,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=True),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
+            mock.patch('kb_python.ref.split_and_index') as split_and_index,\
             mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = ['index']
-            cdna_fasta_path = mock.MagicMock()
+            mock.patch('kb_python.ref.glob.glob', return_value=[]):
+            temp_dir = self.temp_dir
             index_path = mock.MagicMock()
             t2g_path = mock.MagicMock()
-            temp_dir = mock.MagicMock()
-            get_temporary_filename.side_effect = ['t2g', 'fasta', 'gtf', 'cdna']
-            decompress_file.return_value = self.gtf_path
+            cdna_fasta_path = mock.MagicMock()
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
+            get_temporary_filename.return_value = 'cdna'
+            split_genomic_fasta_to_cdna.return_value = cdna_fasta_path
+            concatenate_files.return_value = cdna_fasta_path
             kallisto_index.return_value = {'index': index_path}
-            create_t2g_from_gtf.return_value = {'t2g': 't2g'}
-            concatenate_files.return_value = t2g_path
-            self.assertEqual({'t2g': t2g_path},
+            create_t2g_from_fasta.return_value = {'t2g': t2g_path}
+            self.assertEqual({
+                'index': index_path,
+                't2g': t2g_path
+            },
                              ref.ref(
                                  self.fasta_path,
                                  self.gtf_path,
@@ -504,50 +458,85 @@ def test_ref_exists(self):
                                  t2g_path,
                                  temp_dir=temp_dir
                              ))
-            decompress_file.assert_called_once_with(
-                self.gtf_path, temp_dir=temp_dir
+            genes_and_transcripts_from_gtf.assert_not_called()
+            split_genomic_fasta_to_cdna.assert_not_called()
+            concatenate_files.assert_not_called()
+            create_t2g_from_fasta.assert_called_once_with(
+                cdna_fasta_path, t2g_path
             )
-            create_t2g_from_gtf.assert_called_once_with(self.gtf_path, 't2g')
-            concatenate_files.assert_called_once_with(
-                't2g', out_path=t2g_path, temp_dir=temp_dir
+            kallisto_index.assert_called_once_with(
+                cdna_fasta_path, index_path, k=31
             )
-            sort_fasta.assert_not_called()
-            sort_gtf.assert_not_called()
-            check_chromosomes.assert_not_called()
-            generate_cdna_fasta.assert_not_called()
+            split_and_index.assert_not_called()
+
+    def test_ref_exists2(self):
+        with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=True),\
+            mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
+            mock.patch('kb_python.ref.split_and_index') as split_and_index,\
+            mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
+            mock.patch('kb_python.ref.glob.glob', return_value=['a']):
+            temp_dir = self.temp_dir
+            index_path = mock.MagicMock()
+            t2g_path = mock.MagicMock()
+            cdna_fasta_path = mock.MagicMock()
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
+            get_temporary_filename.return_value = 'cdna'
+            split_genomic_fasta_to_cdna.return_value = cdna_fasta_path
+            concatenate_files.return_value = cdna_fasta_path
+            kallisto_index.return_value = {'index': index_path}
+            create_t2g_from_fasta.return_value = {'t2g': t2g_path}
+            self.assertEqual({},
+                             ref.ref(
+                                 self.fasta_path,
+                                 self.gtf_path,
+                                 cdna_fasta_path,
+                                 index_path,
+                                 t2g_path,
+                                 temp_dir=temp_dir
+                             ))
+            genes_and_transcripts_from_gtf.assert_not_called()
+            create_t2g_from_fasta.assert_not_called()
+            split_genomic_fasta_to_cdna.assert_not_called()
+            concatenate_files.assert_not_called()
             kallisto_index.assert_not_called()
+            split_and_index.assert_not_called()
 
     def test_ref_overwrite(self):
         with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
-            mock.patch('kb_python.ref.create_t2g_from_gtf') as create_t2g_from_gtf,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=True),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
+            mock.patch('kb_python.ref.split_and_index') as split_and_index,\
             mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = ['index']
+            mock.patch('kb_python.ref.glob.glob', return_value=['a']):
             temp_dir = self.temp_dir
             index_path = mock.MagicMock()
             t2g_path = mock.MagicMock()
-            sorted_fasta_path = mock.MagicMock()
-            sorted_gtf_path = mock.MagicMock()
             cdna_fasta_path = mock.MagicMock()
-            chromosomes = {'1', '2'}
-            get_temporary_filename.side_effect = ['t2g', 'fasta', 'gtf', 'cdna']
-            decompress_file.side_effect = [self.gtf_path, self.fasta_path]
-            sort_fasta.return_value = sorted_fasta_path, chromosomes
-            sort_gtf.return_value = sorted_gtf_path, chromosomes
-            check_chromosomes.return_value = chromosomes
-            generate_cdna_fasta.return_value = 'cdna'
-            concatenate_files.side_effect = [t2g_path, cdna_fasta_path]
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
+            get_temporary_filename.return_value = 'cdna'
+            split_genomic_fasta_to_cdna.return_value = cdna_fasta_path
+            concatenate_files.return_value = cdna_fasta_path
             kallisto_index.return_value = {'index': index_path}
-            create_t2g_from_gtf.return_value = {'t2g': 't2g'}
+            create_t2g_from_fasta.return_value = {'t2g': t2g_path}
             self.assertEqual({
                 't2g': t2g_path,
                 'index': index_path,
+                'cdna_fasta': cdna_fasta_path,
             },
                              ref.ref(
                                  self.fasta_path,
@@ -558,29 +547,25 @@ def test_ref_overwrite(self):
                                  temp_dir=temp_dir,
                                  overwrite=True
                              ))
-            self.assertEqual(2, decompress_file.call_count)
-            decompress_file.assert_has_calls([
-                call(self.gtf_path, temp_dir=temp_dir),
-                call(self.fasta_path, temp_dir=temp_dir)
-            ])
-            create_t2g_from_gtf.assert_called_once_with(self.gtf_path, 't2g')
-            sort_fasta.assert_called_once_with(self.fasta_path, 'fasta')
-            sort_gtf.assert_called_once_with(self.gtf_path, 'gtf')
-            check_chromosomes.assert_called_once_with(chromosomes, chromosomes)
-            generate_cdna_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
+            genes_and_transcripts_from_gtf.assert_called_once_with(
+                self.gtf_path, use_version=True
+            )
+            create_t2g_from_fasta.assert_called_once_with(
+                cdna_fasta_path, t2g_path
+            )
+            split_genomic_fasta_to_cdna.assert_called_once_with(
+                self.fasta_path,
                 'cdna',
-                chromosomes=chromosomes
+                gene_infos,
+                transcript_infos,
+            )
+            concatenate_files.assert_called_once_with(
+                cdna_fasta_path, out_path=cdna_fasta_path, temp_dir=temp_dir
             )
-            self.assertEqual(2, concatenate_files.call_count)
-            concatenate_files.assert_has_calls([
-                call('t2g', out_path=t2g_path, temp_dir=temp_dir),
-                call('cdna', out_path=cdna_fasta_path, temp_dir=temp_dir)
-            ])
             kallisto_index.assert_called_once_with(
                 cdna_fasta_path, index_path, k=31
             )
+            split_and_index.assert_not_called()
 
     def test_ref_kite_odd(self):
         with mock.patch('kb_python.ref.decompress_file') as decompress_file,\
@@ -820,40 +805,34 @@ def test_ref_kite_overwrite(self):
 
     def test_ref_lamanno(self):
         with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
             mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
-            mock.patch('kb_python.ref.generate_intron_fasta') as generate_intron_fasta,\
             mock.patch('kb_python.ref.create_t2c') as create_t2c,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_intron') as split_genomic_fasta_to_intron,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=False),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
             mock.patch('kb_python.ref.split_and_index') as split_and_index,\
             mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = []
+            mock.patch('kb_python.ref.glob.glob', return_value=[]):
             temp_dir = self.temp_dir
             index_path = mock.MagicMock()
             t2g_path = mock.MagicMock()
-            sorted_fasta_path = mock.MagicMock()
-            sorted_gtf_path = mock.MagicMock()
             cdna_fasta_path = mock.MagicMock()
             intron_fasta_path = mock.MagicMock()
             cdna_t2c_path = mock.MagicMock()
             intron_t2c_path = mock.MagicMock()
             combined_path = mock.MagicMock()
-            chromosomes = {'1', '2'}
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
             get_temporary_filename.side_effect = [
-                'fasta', 'gtf', 'cdna', 'cdna_t2c', 'intron', 'intron_t2c',
-                'combined'
+                'cdna', 'cdna_t2c', 'intron', 'intron_t2c', 'combined'
             ]
-            decompress_file.side_effect = [self.fasta_path, self.gtf_path]
-            sort_fasta.return_value = sorted_fasta_path, chromosomes
-            sort_gtf.return_value = sorted_gtf_path, chromosomes
-            check_chromosomes.return_value = chromosomes
-            generate_cdna_fasta.return_value = 'cdna'
-            generate_intron_fasta.return_value = 'intron'
+            split_genomic_fasta_to_cdna.return_value = 'cdna'
+            split_genomic_fasta_to_intron.return_value = 'intron'
             kallisto_index.return_value = {'index': index_path}
             create_t2g_from_fasta.return_value = {'t2g': t2g_path}
             create_t2c.side_effect = [{
@@ -884,28 +863,20 @@ def test_ref_lamanno(self):
                                  intron_t2c_path,
                                  temp_dir=temp_dir
                              ))
-            self.assertEqual(2, decompress_file.call_count)
-            decompress_file.assert_has_calls([
-                call(self.fasta_path, temp_dir=temp_dir),
-                call(self.gtf_path, temp_dir=temp_dir),
-            ])
+            genes_and_transcripts_from_gtf.assert_called_once_with(
+                self.gtf_path, use_version=True
+            )
             create_t2g_from_fasta.assert_called_once_with(
                 combined_path, t2g_path
             )
-            sort_fasta.assert_called_once_with(self.fasta_path, 'fasta')
-            sort_gtf.assert_called_once_with(self.gtf_path, 'gtf')
-            check_chromosomes.assert_called_once_with(chromosomes, chromosomes)
-            generate_cdna_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
-                'cdna',
-                chromosomes=chromosomes
+            split_genomic_fasta_to_cdna.assert_called_once_with(
+                self.fasta_path, 'cdna', gene_infos, transcript_infos
             )
-            generate_intron_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
+            split_genomic_fasta_to_intron.assert_called_once_with(
+                self.fasta_path,
                 'intron',
-                chromosomes=chromosomes,
+                gene_infos,
+                transcript_infos,
                 flank=30
             )
             self.assertEqual(2, create_t2c.call_count)
@@ -933,39 +904,34 @@ def test_ref_lamanno(self):
 
     def test_ref_lamanno_split_2(self):
         with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
             mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
-            mock.patch('kb_python.ref.generate_intron_fasta') as generate_intron_fasta,\
             mock.patch('kb_python.ref.create_t2c') as create_t2c,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_intron') as split_genomic_fasta_to_intron,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=False),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
+            mock.patch('kb_python.ref.split_and_index') as split_and_index,\
             mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = []
+            mock.patch('kb_python.ref.glob.glob', return_value=[]):
             temp_dir = self.temp_dir
             index_path = 'index'
             t2g_path = mock.MagicMock()
-            sorted_fasta_path = mock.MagicMock()
-            sorted_gtf_path = mock.MagicMock()
             cdna_fasta_path = mock.MagicMock()
             intron_fasta_path = mock.MagicMock()
             cdna_t2c_path = mock.MagicMock()
             intron_t2c_path = mock.MagicMock()
             combined_path = mock.MagicMock()
-            chromosomes = {'1', '2'}
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
             get_temporary_filename.side_effect = [
-                'fasta', 'gtf', 'cdna', 'cdna_t2c', 'intron', 'intron_t2c',
-                'combined'
+                'cdna', 'cdna_t2c', 'intron', 'intron_t2c', 'combined'
             ]
-            decompress_file.side_effect = [self.fasta_path, self.gtf_path]
-            sort_fasta.return_value = sorted_fasta_path, chromosomes
-            sort_gtf.return_value = sorted_gtf_path, chromosomes
-            check_chromosomes.return_value = chromosomes
-            generate_cdna_fasta.return_value = 'cdna'
-            generate_intron_fasta.return_value = 'intron'
+            split_genomic_fasta_to_cdna.return_value = 'cdna'
+            split_genomic_fasta_to_intron.return_value = 'intron'
             kallisto_index.side_effect = [{
                 'index': 'index_cdna'
             }, {
@@ -998,31 +964,23 @@ def test_ref_lamanno_split_2(self):
                                  t2g_path,
                                  cdna_t2c_path,
                                  intron_t2c_path,
-                                 n=2,
-                                 temp_dir=temp_dir
+                                 temp_dir=temp_dir,
+                                 n=2
                              ))
-            self.assertEqual(2, decompress_file.call_count)
-            decompress_file.assert_has_calls([
-                call(self.fasta_path, temp_dir=temp_dir),
-                call(self.gtf_path, temp_dir=temp_dir),
-            ])
+            genes_and_transcripts_from_gtf.assert_called_once_with(
+                self.gtf_path, use_version=True
+            )
             create_t2g_from_fasta.assert_called_once_with(
                 combined_path, t2g_path
             )
-            sort_fasta.assert_called_once_with(self.fasta_path, 'fasta')
-            sort_gtf.assert_called_once_with(self.gtf_path, 'gtf')
-            check_chromosomes.assert_called_once_with(chromosomes, chromosomes)
-            generate_cdna_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
-                'cdna',
-                chromosomes=chromosomes
+            split_genomic_fasta_to_cdna.assert_called_once_with(
+                self.fasta_path, 'cdna', gene_infos, transcript_infos
             )
-            generate_intron_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
+            split_genomic_fasta_to_intron.assert_called_once_with(
+                self.fasta_path,
                 'intron',
-                chromosomes=chromosomes,
+                gene_infos,
+                transcript_infos,
                 flank=30
             )
             self.assertEqual(2, create_t2c.call_count)
@@ -1046,45 +1004,40 @@ def test_ref_lamanno_split_2(self):
             self.assertEqual(2, kallisto_index.call_count)
             kallisto_index.assert_has_calls([
                 call(cdna_fasta_path, 'index_cdna', k=31),
-                call(intron_fasta_path, 'index_intron', k=31),
+                call(intron_fasta_path, 'index_intron', k=31)
             ])
+            split_and_index.assert_not_called()
 
     def test_ref_lamanno_split_3(self):
         with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
             mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
-            mock.patch('kb_python.ref.generate_intron_fasta') as generate_intron_fasta,\
             mock.patch('kb_python.ref.create_t2c') as create_t2c,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_intron') as split_genomic_fasta_to_intron,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=False),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
-            mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
             mock.patch('kb_python.ref.split_and_index') as split_and_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = []
+            mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
+            mock.patch('kb_python.ref.glob.glob', return_value=[]):
             temp_dir = self.temp_dir
             index_path = 'index'
             t2g_path = mock.MagicMock()
-            sorted_fasta_path = mock.MagicMock()
-            sorted_gtf_path = mock.MagicMock()
             cdna_fasta_path = mock.MagicMock()
             intron_fasta_path = mock.MagicMock()
             cdna_t2c_path = mock.MagicMock()
             intron_t2c_path = mock.MagicMock()
             combined_path = mock.MagicMock()
-            chromosomes = {'1', '2'}
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
             get_temporary_filename.side_effect = [
-                'fasta', 'gtf', 'cdna', 'cdna_t2c', 'intron', 'intron_t2c',
-                'combined'
+                'cdna', 'cdna_t2c', 'intron', 'intron_t2c', 'combined'
             ]
-            decompress_file.side_effect = [self.fasta_path, self.gtf_path]
-            sort_fasta.return_value = sorted_fasta_path, chromosomes
-            sort_gtf.return_value = sorted_gtf_path, chromosomes
-            check_chromosomes.return_value = chromosomes
-            generate_cdna_fasta.return_value = 'cdna'
-            generate_intron_fasta.return_value = 'intron'
+            split_genomic_fasta_to_cdna.return_value = 'cdna'
+            split_genomic_fasta_to_intron.return_value = 'intron'
             kallisto_index.return_value = {'index': 'index_cdna'}
             split_and_index.return_value = {
                 'indices': ['index_intron.0', 'index_intron.1']
@@ -1116,31 +1069,23 @@ def test_ref_lamanno_split_3(self):
                                  t2g_path,
                                  cdna_t2c_path,
                                  intron_t2c_path,
-                                 n=3,
-                                 temp_dir=temp_dir
+                                 temp_dir=temp_dir,
+                                 n=3
                              ))
-            self.assertEqual(2, decompress_file.call_count)
-            decompress_file.assert_has_calls([
-                call(self.fasta_path, temp_dir=temp_dir),
-                call(self.gtf_path, temp_dir=temp_dir),
-            ])
+            genes_and_transcripts_from_gtf.assert_called_once_with(
+                self.gtf_path, use_version=True
+            )
             create_t2g_from_fasta.assert_called_once_with(
                 combined_path, t2g_path
             )
-            sort_fasta.assert_called_once_with(self.fasta_path, 'fasta')
-            sort_gtf.assert_called_once_with(self.gtf_path, 'gtf')
-            check_chromosomes.assert_called_once_with(chromosomes, chromosomes)
-            generate_cdna_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
-                'cdna',
-                chromosomes=chromosomes
+            split_genomic_fasta_to_cdna.assert_called_once_with(
+                self.fasta_path, 'cdna', gene_infos, transcript_infos
             )
-            generate_intron_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
+            split_genomic_fasta_to_intron.assert_called_once_with(
+                self.fasta_path,
                 'intron',
-                chromosomes=chromosomes,
+                gene_infos,
+                transcript_infos,
                 flank=30
             )
             self.assertEqual(2, create_t2c.call_count)
@@ -1170,40 +1115,35 @@ def test_ref_lamanno_split_3(self):
 
     def test_ref_lamanno_override_k(self):
         with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
             mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
-            mock.patch('kb_python.ref.generate_intron_fasta') as generate_intron_fasta,\
             mock.patch('kb_python.ref.create_t2c') as create_t2c,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_intron') as split_genomic_fasta_to_intron,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=False),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
+            mock.patch('kb_python.ref.split_and_index') as split_and_index,\
             mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = []
-            k = 999
+            mock.patch('kb_python.ref.glob.glob', return_value=[]):
             temp_dir = self.temp_dir
+            k = 999
             index_path = mock.MagicMock()
             t2g_path = mock.MagicMock()
-            sorted_fasta_path = mock.MagicMock()
-            sorted_gtf_path = mock.MagicMock()
             cdna_fasta_path = mock.MagicMock()
             intron_fasta_path = mock.MagicMock()
             cdna_t2c_path = mock.MagicMock()
             intron_t2c_path = mock.MagicMock()
             combined_path = mock.MagicMock()
-            chromosomes = {'1', '2'}
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
             get_temporary_filename.side_effect = [
-                'fasta', 'gtf', 'cdna', 'cdna_t2c', 'intron', 'intron_t2c',
-                'combined'
+                'cdna', 'cdna_t2c', 'intron', 'intron_t2c', 'combined'
             ]
-            decompress_file.side_effect = [self.fasta_path, self.gtf_path]
-            sort_fasta.return_value = sorted_fasta_path, chromosomes
-            sort_gtf.return_value = sorted_gtf_path, chromosomes
-            check_chromosomes.return_value = chromosomes
-            generate_cdna_fasta.return_value = 'cdna'
-            generate_intron_fasta.return_value = 'intron'
+            split_genomic_fasta_to_cdna.return_value = 'cdna'
+            split_genomic_fasta_to_intron.return_value = 'intron'
             kallisto_index.return_value = {'index': index_path}
             create_t2g_from_fasta.return_value = {'t2g': t2g_path}
             create_t2c.side_effect = [{
@@ -1235,28 +1175,20 @@ def test_ref_lamanno_override_k(self):
                                  k=k,
                                  temp_dir=temp_dir
                              ))
-            self.assertEqual(2, decompress_file.call_count)
-            decompress_file.assert_has_calls([
-                call(self.fasta_path, temp_dir=temp_dir),
-                call(self.gtf_path, temp_dir=temp_dir),
-            ])
+            genes_and_transcripts_from_gtf.assert_called_once_with(
+                self.gtf_path, use_version=True
+            )
             create_t2g_from_fasta.assert_called_once_with(
                 combined_path, t2g_path
             )
-            sort_fasta.assert_called_once_with(self.fasta_path, 'fasta')
-            sort_gtf.assert_called_once_with(self.gtf_path, 'gtf')
-            check_chromosomes.assert_called_once_with(chromosomes, chromosomes)
-            generate_cdna_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
-                'cdna',
-                chromosomes=chromosomes
+            split_genomic_fasta_to_cdna.assert_called_once_with(
+                self.fasta_path, 'cdna', gene_infos, transcript_infos
             )
-            generate_intron_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
+            split_genomic_fasta_to_intron.assert_called_once_with(
+                self.fasta_path,
                 'intron',
-                chromosomes=chromosomes,
+                gene_infos,
+                transcript_infos,
                 flank=k - 1
             )
             self.assertEqual(2, create_t2c.call_count)
@@ -1280,21 +1212,89 @@ def test_ref_lamanno_override_k(self):
             kallisto_index.assert_called_once_with(
                 combined_path, index_path, k=k
             )
+            split_and_index.assert_not_called()
 
     def test_ref_lamanno_exists(self):
-        with mock.patch('kb_python.ref.get_temporary_filename'),\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
+        with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
             mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
-            mock.patch('kb_python.ref.generate_intron_fasta') as generate_intron_fasta,\
             mock.patch('kb_python.ref.create_t2c') as create_t2c,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_intron') as split_genomic_fasta_to_intron,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=True),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
+            mock.patch('kb_python.ref.split_and_index') as split_and_index,\
             mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = ['index']
+            mock.patch('kb_python.ref.glob.glob', return_value=[]):
+            temp_dir = self.temp_dir
+            index_path = mock.MagicMock()
+            t2g_path = mock.MagicMock()
+            cdna_fasta_path = mock.MagicMock()
+            intron_fasta_path = mock.MagicMock()
+            cdna_t2c_path = mock.MagicMock()
+            intron_t2c_path = mock.MagicMock()
+            combined_path = mock.MagicMock()
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
+            get_temporary_filename.return_value = 'combined'
+            split_genomic_fasta_to_cdna.return_value = 'cdna'
+            split_genomic_fasta_to_intron.return_value = 'intron'
+            kallisto_index.return_value = {'index': index_path}
+            create_t2g_from_fasta.return_value = {'t2g': t2g_path}
+            create_t2c.side_effect = [{
+                't2c': 'cdna_t2c'
+            }, {
+                't2c': 'intron_t2c'
+            }]
+            concatenate_files.return_value = combined_path
+            self.assertEqual({
+                't2g': t2g_path,
+                'index': index_path,
+            },
+                             ref.ref_lamanno(
+                                 self.fasta_path,
+                                 self.gtf_path,
+                                 cdna_fasta_path,
+                                 intron_fasta_path,
+                                 index_path,
+                                 t2g_path,
+                                 cdna_t2c_path,
+                                 intron_t2c_path,
+                                 temp_dir=temp_dir
+                             ))
+            genes_and_transcripts_from_gtf.assert_not_called()
+            create_t2g_from_fasta.assert_called_once_with(
+                combined_path, t2g_path
+            )
+            split_genomic_fasta_to_cdna.assert_not_called()
+            split_genomic_fasta_to_intron.assert_not_called()
+            create_t2c.assert_not_called()
+            concatenate_files.assert_called_once_with(
+                cdna_fasta_path,
+                intron_fasta_path,
+                out_path='combined',
+                temp_dir=temp_dir
+            )
+            kallisto_index.assert_called_once_with(
+                combined_path, index_path, k=31
+            )
+            split_and_index.assert_not_called()
+
+    def test_ref_lamanno_exists2(self):
+        with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
+            mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
+            mock.patch('kb_python.ref.create_t2c') as create_t2c,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_intron') as split_genomic_fasta_to_intron,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=True),\
+            mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
+            mock.patch('kb_python.ref.split_and_index') as split_and_index,\
+            mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
+            mock.patch('kb_python.ref.glob.glob', return_value=['a']):
             temp_dir = self.temp_dir
             index_path = mock.MagicMock()
             t2g_path = mock.MagicMock()
@@ -1302,8 +1302,23 @@ def test_ref_lamanno_exists(self):
             intron_fasta_path = mock.MagicMock()
             cdna_t2c_path = mock.MagicMock()
             intron_t2c_path = mock.MagicMock()
+            combined_path = mock.MagicMock()
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
+            get_temporary_filename.return_value = 'combined'
+            split_genomic_fasta_to_cdna.return_value = 'cdna'
+            split_genomic_fasta_to_intron.return_value = 'intron'
             kallisto_index.return_value = {'index': index_path}
             create_t2g_from_fasta.return_value = {'t2g': t2g_path}
+            create_t2c.side_effect = [{
+                't2c': 'cdna_t2c'
+            }, {
+                't2c': 'intron_t2c'
+            }]
+            concatenate_files.return_value = combined_path
             self.assertEqual({},
                              ref.ref_lamanno(
                                  self.fasta_path,
@@ -1316,52 +1331,45 @@ def test_ref_lamanno_exists(self):
                                  intron_t2c_path,
                                  temp_dir=temp_dir
                              ))
-            decompress_file.assert_not_called()
+            genes_and_transcripts_from_gtf.assert_not_called()
             create_t2g_from_fasta.assert_not_called()
-            sort_fasta.assert_not_called()
-            sort_gtf.assert_not_called()
-            check_chromosomes.assert_not_called()
-            generate_cdna_fasta.assert_not_called()
-            generate_intron_fasta.assert_not_called()
+            split_genomic_fasta_to_cdna.assert_not_called()
+            split_genomic_fasta_to_intron.assert_not_called()
             create_t2c.assert_not_called()
             concatenate_files.assert_not_called()
             kallisto_index.assert_not_called()
+            split_and_index.assert_not_called()
 
     def test_ref_lamanno_overwrite(self):
         with mock.patch('kb_python.ref.get_temporary_filename') as get_temporary_filename,\
-            mock.patch('kb_python.ref.decompress_file') as decompress_file,\
             mock.patch('kb_python.ref.create_t2g_from_fasta') as create_t2g_from_fasta,\
-            mock.patch('kb_python.ref.sort_fasta') as sort_fasta,\
-            mock.patch('kb_python.ref.sort_gtf') as sort_gtf,\
-            mock.patch('kb_python.ref.check_chromosomes') as check_chromosomes,\
-            mock.patch('kb_python.ref.generate_cdna_fasta') as generate_cdna_fasta,\
-            mock.patch('kb_python.ref.generate_intron_fasta') as generate_intron_fasta,\
             mock.patch('kb_python.ref.create_t2c') as create_t2c,\
+            mock.patch('kb_python.ref.ngs.gtf.genes_and_transcripts_from_gtf') as genes_and_transcripts_from_gtf,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_cdna') as split_genomic_fasta_to_cdna,\
+            mock.patch('kb_python.ref.ngs.fasta.split_genomic_fasta_to_intron') as split_genomic_fasta_to_intron,\
+            mock.patch('kb_python.ref.ngs.utils.all_exists', return_value=True),\
             mock.patch('kb_python.ref.concatenate_files') as concatenate_files,\
+            mock.patch('kb_python.ref.split_and_index') as split_and_index,\
             mock.patch('kb_python.ref.kallisto_index') as kallisto_index,\
-            mock.patch('kb_python.ref.glob.glob') as glob:
-            glob.return_value = ['index']
+            mock.patch('kb_python.ref.glob.glob', return_value=['a']):
             temp_dir = self.temp_dir
             index_path = mock.MagicMock()
             t2g_path = mock.MagicMock()
-            sorted_fasta_path = mock.MagicMock()
-            sorted_gtf_path = mock.MagicMock()
             cdna_fasta_path = mock.MagicMock()
             intron_fasta_path = mock.MagicMock()
             cdna_t2c_path = mock.MagicMock()
             intron_t2c_path = mock.MagicMock()
             combined_path = mock.MagicMock()
-            chromosomes = {'1', '2'}
+            gene_infos = mock.MagicMock()
+            transcript_infos = mock.MagicMock()
+            genes_and_transcripts_from_gtf.return_value = (
+                gene_infos, transcript_infos
+            )
             get_temporary_filename.side_effect = [
-                'fasta', 'gtf', 'cdna', 'cdna_t2c', 'intron', 'intron_t2c',
-                'combined'
+                'cdna', 'cdna_t2c', 'intron', 'intron_t2c', 'combined'
             ]
-            decompress_file.side_effect = [self.fasta_path, self.gtf_path]
-            sort_fasta.return_value = sorted_fasta_path, chromosomes
-            sort_gtf.return_value = sorted_gtf_path, chromosomes
-            check_chromosomes.return_value = chromosomes
-            generate_cdna_fasta.return_value = 'cdna'
-            generate_intron_fasta.return_value = 'intron'
+            split_genomic_fasta_to_cdna.return_value = 'cdna'
+            split_genomic_fasta_to_intron.return_value = 'intron'
             kallisto_index.return_value = {'index': index_path}
             create_t2g_from_fasta.return_value = {'t2g': t2g_path}
             create_t2c.side_effect = [{
@@ -1393,28 +1401,20 @@ def test_ref_lamanno_overwrite(self):
                                  temp_dir=temp_dir,
                                  overwrite=True
                              ))
-            self.assertEqual(2, decompress_file.call_count)
-            decompress_file.assert_has_calls([
-                call(self.fasta_path, temp_dir=temp_dir),
-                call(self.gtf_path, temp_dir=temp_dir),
-            ])
+            genes_and_transcripts_from_gtf.assert_called_once_with(
+                self.gtf_path, use_version=True
+            )
             create_t2g_from_fasta.assert_called_once_with(
                 combined_path, t2g_path
             )
-            sort_fasta.assert_called_once_with(self.fasta_path, 'fasta')
-            sort_gtf.assert_called_once_with(self.gtf_path, 'gtf')
-            check_chromosomes.assert_called_once_with(chromosomes, chromosomes)
-            generate_cdna_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
-                'cdna',
-                chromosomes=chromosomes
+            split_genomic_fasta_to_cdna.assert_called_once_with(
+                self.fasta_path, 'cdna', gene_infos, transcript_infos
             )
-            generate_intron_fasta.assert_called_once_with(
-                sorted_fasta_path,
-                sorted_gtf_path,
+            split_genomic_fasta_to_intron.assert_called_once_with(
+                self.fasta_path,
                 'intron',
-                chromosomes=chromosomes,
+                gene_infos,
+                transcript_infos,
                 flank=30
             )
             self.assertEqual(2, create_t2c.call_count)
@@ -1438,3 +1438,4 @@ def test_ref_lamanno_overwrite(self):
             kallisto_index.assert_called_once_with(
                 combined_path, index_path, k=31
             )
+            split_and_index.assert_not_called()
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9440668..a856799 100755
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,7 +1,5 @@
-import gzip
 import os
 import subprocess as sp
-import uuid
 from unittest import mock, TestCase
 from unittest.mock import call
 from unittest.mock import ANY
@@ -9,7 +7,7 @@
 import anndata
 
 import kb_python.utils as utils
-from kb_python.config import CHUNK_SIZE, UnsupportedOSException
+from kb_python.config import UnsupportedOSException
 from tests.mixins import TestMixin
 
 
@@ -20,44 +18,6 @@ def test_update_filename(self):
             'output.s.c.bus', utils.update_filename('output.s.bus', 'c')
         )
 
-    def test_open_as_text_textfile(self):
-        path = os.path.join(self.temp_dir, '{}.txt'.format(uuid.uuid4()))
-        with utils.open_as_text(path, 'w') as f:
-            f.write('TESTING')
-        self.assertTrue(os.path.exists(path))
-        with utils.open_as_text(path, 'r') as f:
-            self.assertEqual(f.read(), 'TESTING')
-
-    def test_open_as_text_gzip(self):
-        path = os.path.join(self.temp_dir, '{}.gz'.format(uuid.uuid4()))
-        with utils.open_as_text(path, 'w') as f:
-            f.write('TESTING')
-        self.assertTrue(os.path.exists(path))
-        with utils.open_as_text(path, 'r') as f:
-            self.assertEqual(f.read(), 'TESTING')
-
-    def test_decompress_gzip(self):
-        filename = str(uuid.uuid4())
-        gzip_path = os.path.join(self.temp_dir, '{}.gz'.format(filename))
-        out_path = os.path.join(self.temp_dir, filename)
-        with gzip.open(gzip_path, 'wt') as f:
-            f.write('TESTING\nTEST')
-        self.assertEqual(out_path, utils.decompress_gzip(gzip_path, out_path))
-        self.assertTrue(os.path.exists(out_path))
-        with open(out_path, 'r') as f:
-            self.assertEqual('TESTING\nTEST', f.read())
-
-    def test_compress_gzip(self):
-        filename = str(uuid.uuid4())
-        file_path = os.path.join(self.temp_dir, filename)
-        out_path = os.path.join(self.temp_dir, '{}.gz'.format(filename))
-        with open(file_path, 'w') as f:
-            f.write('TESTING\nTEST')
-        self.assertEqual(out_path, utils.compress_gzip(file_path, out_path))
-        self.assertTrue(os.path.exists(out_path))
-        with gzip.open(out_path, 'rt') as f:
-            self.assertEqual('TESTING\nTEST', f.read())
-
     def test_make_directory(self):
         with mock.patch('kb_python.utils.os.makedirs') as makedirs:
             utils.make_directory('path')
@@ -122,33 +82,6 @@ def test_whitelist_provided(self):
         self.assertTrue(utils.whitelist_provided('10xv2'))
         self.assertFalse(utils.whitelist_provided('UNSUPPORTED'))
 
-    def test_download_file(self):
-        with mock.patch('kb_python.utils.requests.get') as get,\
-            mock.patch('kb_python.utils.tqdm') as tqdm:
-            path = os.path.join(self.temp_dir, str(uuid.uuid4()))
-            get.return_value.headers = {'Content-Length': str(1024 * 2)}
-            get.return_value.iter_content.return_value = [
-                b'1' * 1024, b'2' * 1024
-            ]
-            self.assertEqual(path, utils.download_file('remote/path', path))
-            get.assert_called_once_with('remote/path', stream=True)
-            tqdm.assert_called_once_with(
-                unit='B',
-                total=1024 * 2,
-                unit_divisor=1024,
-                unit_scale=True,
-                ascii=True
-            )
-            get.return_value.iter_content.assert_called_once_with(
-                chunk_size=CHUNK_SIZE
-            )
-            self.assertEqual(2, tqdm.return_value.update.call_count)
-            tqdm.return_value.update.assert_has_calls([call(1024), call(1024)])
-            tqdm.return_value.close.assert_called_once_with()
-
-            with open(path, 'r') as f:
-                self.assertEqual(('1' * 1024) + ('2' * 1024), f.read())
-
     def test_stream_file(self):
         with mock.patch('kb_python.utils.PLATFORM', 'linux'),\
             mock.patch('kb_python.utils.os') as os,\
@@ -176,10 +109,6 @@ def test_stream_file_windows(self):
             threading.thread.assert_not_called()
             urlretrieve.assert_not_called()
 
-    def test_get_temporary_filename(self):
-        path = utils.get_temporary_filename()
-        self.assertTrue(os.path.exists(path))
-
     def test_read_t2g(self):
         t2g = utils.read_t2g(self.t2g_path)
         self.assertEqual(16457, len(t2g))
@@ -289,23 +218,3 @@ def test_move_file(self):
     def test_copy_whitelist(self):
         whitelist_path = utils.copy_whitelist('10xv1', self.temp_dir)
         self.assertTrue(os.path.exists(whitelist_path))
-
-    def test_concatenate_files(self):
-        temp_dir = self.temp_dir
-        file1_path = os.path.join(temp_dir, str(uuid.uuid4()))
-        file2_path = os.path.join(temp_dir, '{}.gz'.format(uuid.uuid4()))
-
-        with open(file1_path, 'w') as f:
-            f.write('TEST1')
-        with gzip.open(file2_path, 'wt') as f:
-            f.write('TEST2')
-
-        out_path = utils.concatenate_files(
-            file1_path,
-            file2_path,
-            out_path=os.path.join(temp_dir, str(uuid.uuid4())),
-            temp_dir=self.temp_dir
-        )
-
-        with open(out_path, 'r') as f:
-            self.assertEqual(f.read(), 'TEST1\nTEST2\n')