Skip to content

Commit

Permalink
rename download_or_stream_fastqs to stream_fastqs (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
Lioscro authored Oct 30, 2019
1 parent e9b9760 commit 91a0da7
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 43 deletions.
17 changes: 14 additions & 3 deletions kb_python/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,18 @@ def bustools_whitelist(bus_path, out_path):
return {'whitelist': out_path}


def download_or_stream_fastqs(fastqs, temp_dir='tmp'):
def stream_fastqs(fastqs, temp_dir='tmp'):
"""Given a list of fastqs (that may be local or remote paths), stream any
remote files. Internally, calls utils.
:param fastqs: list of (remote or local) fastq paths
:type fastqs: list
:param temp_dir: temporary directory
:type temp_dir: str
:return: all remote paths substituted with a local path
:rtype: list
"""
return [
stream_file(fastq, os.path.join(temp_dir, os.path.basename(fastq)))
if urlparse(fastq).scheme in ('http', 'https', 'ftp', 'ftps') else fastq
Expand Down Expand Up @@ -393,7 +404,7 @@ def count(
if any(not os.path.exists(path)
for name, path in bus_result.items()) or overwrite:
# Pipe any remote files.
fastqs = download_or_stream_fastqs(fastqs, temp_dir=temp_dir)
fastqs = stream_fastqs(fastqs, temp_dir=temp_dir)
bus_result = kallisto_bus(
fastqs, index_path, technology, unfiltered_dir, threads=threads
)
Expand Down Expand Up @@ -590,7 +601,7 @@ def count_lamanno(
}
if any(not os.path.exists(path)
for name, path in bus_result.items()) or overwrite:
fastqs = download_or_stream_fastqs(fastqs, temp_dir=temp_dir)
fastqs = stream_fastqs(fastqs, temp_dir=temp_dir)
bus_result = kallisto_bus(
fastqs, index_path, technology, out_dir, threads=threads
)
Expand Down
78 changes: 38 additions & 40 deletions tests/test_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,17 @@ def test_convert_matrix_to_h5ad(self):
)
self.assertTrue(os.path.exists(out_path))

def test_download_or_stream_fastqs_local(self):
def test_stream_fastqs_local(self):
with mock.patch('kb_python.count.stream_file') as stream_file:
temp_dir = tempfile.mkdtemp()
fastqs = ['path/to/file1.gz', 'path/to/file2.gz']
stream_file.side_effect = ['FILE 1', 'FILE 2']
self.assertEqual(
fastqs,
count.download_or_stream_fastqs(fastqs, temp_dir=temp_dir)
fastqs, count.stream_fastqs(fastqs, temp_dir=temp_dir)
)
stream_file.assert_not_called()

def test_download_or_stream_fastqs_remote(self):
def test_stream_fastqs_remote(self):
with mock.patch('kb_python.count.stream_file') as stream_file:
temp_dir = tempfile.mkdtemp()
fastqs = ['http://path/to/file1.gz', 'https://path/to/file2.gz']
Expand All @@ -133,9 +132,8 @@ def test_download_or_stream_fastqs_remote(self):
for fastq in fastqs
]
stream_file.side_effect = ['FILE 1', 'FILE 2']
self.assertEqual([
'FILE 1', 'FILE 2'
], count.download_or_stream_fastqs(fastqs, temp_dir=temp_dir))
self.assertEqual(['FILE 1', 'FILE 2'],
count.stream_fastqs(fastqs, temp_dir=temp_dir))
self.assertEqual(2, stream_file.call_count)
stream_file.assert_has_calls([
call(fastqs[0], local_fastqs[0]),
Expand Down Expand Up @@ -165,7 +163,7 @@ def test_copy_or_create_whitelist_not_provided(self):
)

def test_count_with_whitelist(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand All @@ -186,7 +184,7 @@ def test_count_with_whitelist(self):
bus_s_path = os.path.join(temp_dir, BUS_S_FILENAME)
bus_sc_path = os.path.join(temp_dir, BUS_SC_FILENAME)
bus_scs_path = os.path.join(out_dir, BUS_SCS_FILENAME)
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -226,7 +224,7 @@ def test_count_with_whitelist(self):
threads=threads,
memory=memory
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down Expand Up @@ -268,7 +266,7 @@ def test_count_with_whitelist(self):
convert_matrix_to_h5ad.assert_not_called()

def test_count_loom(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand All @@ -290,7 +288,7 @@ def test_count_loom(self):
bus_sc_path = os.path.join(temp_dir, BUS_SC_FILENAME)
bus_scs_path = os.path.join(out_dir, BUS_SCS_FILENAME)
loom_path = mock.MagicMock()
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -333,7 +331,7 @@ def test_count_loom(self):
memory=memory,
loom=True,
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down Expand Up @@ -382,7 +380,7 @@ def test_count_loom(self):
convert_matrix_to_h5ad.assert_not_called()

def test_count_h5ad(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand All @@ -404,7 +402,7 @@ def test_count_h5ad(self):
bus_sc_path = os.path.join(temp_dir, BUS_SC_FILENAME)
bus_scs_path = os.path.join(out_dir, BUS_SCS_FILENAME)
h5ad_path = mock.MagicMock()
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -447,7 +445,7 @@ def test_count_h5ad(self):
memory=memory,
h5ad=True,
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down Expand Up @@ -496,7 +494,7 @@ def test_count_h5ad(self):
)

def test_count_filter(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand Down Expand Up @@ -531,7 +529,7 @@ def test_count_filter(self):
)
filtered_temp_bus_path = os.path.join(temp_dir, BUS_SCS_FILENAME)
filtered_bus_path = os.path.join(filtered_dir, BUS_SCS_FILENAME)
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -596,7 +594,7 @@ def test_count_filter(self):
threads=threads,
memory=memory
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down Expand Up @@ -663,7 +661,7 @@ def test_count_filter(self):
convert_matrix_to_h5ad.assert_not_called()

def test_count_filter_loom(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand Down Expand Up @@ -700,7 +698,7 @@ def test_count_filter_loom(self):
filtered_temp_bus_path = os.path.join(temp_dir, BUS_SCS_FILENAME)
filtered_bus_path = os.path.join(filtered_dir, BUS_SCS_FILENAME)
filtered_loom_path = mock.MagicMock()
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -770,7 +768,7 @@ def test_count_filter_loom(self):
memory=memory,
loom=True
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down Expand Up @@ -857,7 +855,7 @@ def test_count_filter_loom(self):
convert_matrix_to_h5ad.assert_not_called()

def test_count_filter_h5ad(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand Down Expand Up @@ -894,7 +892,7 @@ def test_count_filter_h5ad(self):
filtered_temp_bus_path = os.path.join(temp_dir, BUS_SCS_FILENAME)
filtered_bus_path = os.path.join(filtered_dir, BUS_SCS_FILENAME)
filtered_h5ad_path = mock.MagicMock()
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -964,7 +962,7 @@ def test_count_filter_h5ad(self):
memory=memory,
h5ad=True
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down Expand Up @@ -1051,7 +1049,7 @@ def test_count_filter_h5ad(self):
convert_matrix_to_loom.assert_not_called()

def test_count_without_whitelist(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand All @@ -1072,7 +1070,7 @@ def test_count_without_whitelist(self):
bus_s_path = os.path.join(temp_dir, BUS_S_FILENAME)
bus_sc_path = os.path.join(temp_dir, BUS_SC_FILENAME)
bus_scs_path = os.path.join(out_dir, BUS_SCS_FILENAME)
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -1113,7 +1111,7 @@ def test_count_without_whitelist(self):
threads=threads,
memory=memory
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down Expand Up @@ -1157,7 +1155,7 @@ def test_count_without_whitelist(self):
convert_matrix_to_h5ad.assert_not_called()

def test_count_lamanno_with_whitelist(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand Down Expand Up @@ -1193,7 +1191,7 @@ def test_count_lamanno_with_whitelist(self):
)
cdna_t2c_path = mock.MagicMock()
intron_t2c_path = mock.MagicMock()
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -1293,7 +1291,7 @@ def test_count_lamanno_with_whitelist(self):
threads=threads,
memory=memory
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down Expand Up @@ -1356,7 +1354,7 @@ def test_count_lamanno_with_whitelist(self):
overlay_anndatas.assert_not_called()

def test_count_lamanno_loom(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand Down Expand Up @@ -1397,7 +1395,7 @@ def test_count_lamanno_loom(self):
adata = mock.MagicMock()
loom_path = os.path.join(counts_dir, '{}.loom'.format(ADATA_PREFIX))
adata.write_loom.return_value = loom_path
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -1503,7 +1501,7 @@ def test_count_lamanno_loom(self):
memory=memory,
loom=True
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down Expand Up @@ -1589,7 +1587,7 @@ def test_count_lamanno_loom(self):
adata.write.assert_not_called()

def test_count_lamanno_h5ad(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand Down Expand Up @@ -1630,7 +1628,7 @@ def test_count_lamanno_h5ad(self):
adata = mock.MagicMock()
h5ad_path = os.path.join(counts_dir, '{}.h5ad'.format(ADATA_PREFIX))
adata.write.return_value = h5ad_path
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -1736,7 +1734,7 @@ def test_count_lamanno_h5ad(self):
memory=memory,
h5ad=True
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down Expand Up @@ -1822,7 +1820,7 @@ def test_count_lamanno_h5ad(self):
adata.write.assert_called_once_with(h5ad_path)

def test_count_lamanno_without_whitelist(self):
with mock.patch('kb_python.count.download_or_stream_fastqs') as download_or_stream_fastqs,\
with mock.patch('kb_python.count.stream_fastqs') as stream_fastqs,\
mock.patch('kb_python.count.kallisto_bus') as kallisto_bus,\
mock.patch('kb_python.count.bustools_sort') as bustools_sort,\
mock.patch('kb_python.count.bustools_inspect') as bustools_inspect,\
Expand Down Expand Up @@ -1858,7 +1856,7 @@ def test_count_lamanno_without_whitelist(self):
)
cdna_t2c_path = mock.MagicMock()
intron_t2c_path = mock.MagicMock()
download_or_stream_fastqs.return_value = self.fastqs
stream_fastqs.return_value = self.fastqs
kallisto_bus.return_value = {
'bus': bus_path,
'ecmap': ecmap_path,
Expand Down Expand Up @@ -1959,7 +1957,7 @@ def test_count_lamanno_without_whitelist(self):
threads=threads,
memory=memory
))
download_or_stream_fastqs.assert_called_once_with(
stream_fastqs.assert_called_once_with(
self.fastqs, temp_dir=temp_dir
)
kallisto_bus.assert_called_once_with(
Expand Down

0 comments on commit 91a0da7

Please sign in to comment.