From c02d4cc3052467893c327f728c0da8e4c5d930e4 Mon Sep 17 00:00:00 2001 From: Fritz Lekschas <932103+flekschas@users.noreply.github.com> Date: Wed, 9 Dec 2020 21:43:19 -0500 Subject: [PATCH 1/5] Add a new tile-based indexing strategy for beddb files Genome wide this can lead to 20x faster queries but it increases the file size by a factor of 2.5 --- clodius/cli/aggregate.py | 351 ++++++++++++++++++++++++++++++++++----- clodius/tiles/beddb.py | 18 +- 2 files changed, 327 insertions(+), 42 deletions(-) diff --git a/clodius/cli/aggregate.py b/clodius/cli/aggregate.py index e086babd..33004353 100644 --- a/clodius/cli/aggregate.py +++ b/clodius/cli/aggregate.py @@ -465,9 +465,16 @@ def _bedfile( delimiter, chromsizes_filename, offset, + sqlite_cache_size=500, # 500 MB + sqlite_batch_size=100000, + index_strategy='range-index', + verbose=False, ): BEDDB_VERSION = 3 + if verbose: + print(f'BEDDB VERSION: {BEDDB_VERSION}') + if output_file is None: output_file = filepath + ".beddb" else: @@ -550,7 +557,9 @@ def line_to_np_array(line): dset = [] - print("delimiter:", delimiter) + if verbose: + print("delimiter:", delimiter) + if has_header: line = bed_file.readline() header = line.strip().split(delimiter) @@ -610,7 +619,9 @@ def line_to_np_array(line): import sqlite3 sqlite3.register_adapter(np.int64, lambda val: int(val)) - print("output_file:", output_file, "header:", header) + if verbose: + print("output_file:", output_file, "header:", header) + conn = sqlite3.connect(output_file) # store some meta data @@ -629,19 +640,21 @@ def line_to_np_array(line): ) # max_width = tile_size * 2 ** max_zoom - uid_to_entry = {} + uid_to_interval = {} intervals = [] # store each bed file entry as an interval for d in dset: uid = d["uid"] - uid_to_entry[uid] = d + uid_to_interval[uid] = d intervals += [(d["startPos"], d["endPos"], uid)] - tile_width = tile_size - c = conn.cursor() + c.execute("PRAGMA synchronous = OFF;") + c.execute("PRAGMA journal_mode = OFF;") + c.execute(f"PRAGMA cache_size = {int(sqlite_cache_size * 1000)};") + c.execute( """ CREATE TABLE intervals @@ -659,6 +672,74 @@ def line_to_np_array(line): """ ) + sorted_intervals = sorted( + intervals, key=lambda x: -uid_to_interval[x[-1]]["importance"] + ) + + if verbose: + print("max_per_tile:", max_per_tile) + + tile_counts = col.defaultdict(int) + + if index_strategy == 'tile-index': + _bedfile_tile_index( + conn, + c, + sorted_intervals, + uid_to_interval, + max_zoom, + tile_size, + tile_counts, + max_per_tile, + sqlite_cache_size, + sqlite_batch_size, + verbose, + ) + else: + _bedfile_range_index( + conn, + c, + sorted_intervals, + uid_to_interval, + max_zoom, + tile_size, + tile_counts, + max_per_tile, + sqlite_cache_size, + sqlite_batch_size, + verbose, + ) + + conn.commit() + + c.execute("ANALYZE;") + + conn.commit() + + c.close() + + return True + + +def _bedfile_range_index( + conn, + c, + sorted_intervals, + uid_to_interval, + max_zoom, + tile_size, + tile_counts, + max_per_tile, + sqlite_cache_size=500, # 500 MB + sqlite_batch_size=100000, + verbose=False, +): + """Traditional beddb format + """ + + if verbose: + print('Indexing strategy: range-based (default)') + c.execute( """ CREATE VIRTUAL TABLE position_index USING rtree( @@ -668,25 +749,36 @@ def line_to_np_array(line): """ ) - curr_zoom = 0 counter = 0 - max_viewable_zoom = max_zoom + if verbose: + print("max_per_tile:", max_per_tile) - if max_zoom is not None and max_zoom < max_zoom: - max_viewable_zoom = max_zoom + tile_counts = col.defaultdict(int) - sorted_intervals = sorted( - intervals, key=lambda x: -uid_to_entry[x[-1]]["importance"] - ) - # print('si:', sorted_intervals[:10]) - print("max_per_tile:", max_per_tile) + interval_inserts = [] + position_index_inserts = [] - tile_counts = col.defaultdict(int) + def batch_insert(conn, c, interval_inserts, position_index_inserts): + if verbose > 0: + print(f"Insert batch ({counter})") + + with transaction(conn): + c.executemany( + "INSERT INTO intervals VALUES (?,?,?,?,?,?,?,?,?)", interval_inserts + ) + c.executemany( + "INSERT INTO position_index VALUES (?,?,?,?,?)", position_index_inserts + ) + + interval_inserts.clear() + position_index_inserts.clear() for interval in sorted_intervals: + curr_zoom = 0 + # go through each interval from most important to least - while curr_zoom <= max_viewable_zoom: + while curr_zoom <= max_zoom: # try to place it in the highest zoom level and go down from there tile_width = tile_size * 2 ** (max_zoom - curr_zoom) @@ -735,14 +827,9 @@ def line_to_np_array(line): if space_available: # there's available space - value = uid_to_entry[interval[-1]] + value = uid_to_interval[interval[-1]] - # one extra question mark for the primary key - exec_statement = "INSERT INTO intervals VALUES (?,?,?,?,?,?,?,?,?)" - - c.execute( - exec_statement, - # primary key, zoomLevel, startPos, endPos, chrOffset, line + interval_inserts.append( ( counter, curr_zoom, @@ -756,14 +843,14 @@ def line_to_np_array(line): ), ) - if counter % 1000 == 0: - print("counter:", counter, value["endPos"] - value["startPos"]) - - exec_statement = "INSERT INTO position_index VALUES (?,?,?,?,?)" - c.execute( - exec_statement, - # add counter as a primary key - (counter, curr_zoom, curr_zoom, value["startPos"], value["endPos"]), + position_index_inserts.append( + ( + counter, + curr_zoom, + curr_zoom, + value["startPos"], + value["endPos"] + ), ) counter += 1 @@ -771,9 +858,149 @@ def line_to_np_array(line): curr_zoom += 1 - curr_zoom = 0 + if len(interval_inserts) >= sqlite_batch_size: + batch_insert(conn, c, interval_inserts, position_index_inserts) + + batch_insert(conn, c, interval_inserts, position_index_inserts) + + +def _bedfile_tile_index( + conn, + c, + sorted_intervals, + uid_to_interval, + max_zoom, + tile_size, + tile_counts, + max_per_tile, + sqlite_cache_size=500, # 500 MB + sqlite_batch_size=100000, + verbose=False, +): + if verbose: + print('Indexing strategy: tile-based') + + row = c.execute("SELECT * from tileset_info").fetchone() + version = row[next(zip(*c.description)).index("version")] + c.execute( + f""" + UPDATE tileset_info + SET version = '{version}t' + WHERE version = '{version}' + """ + ) conn.commit() - return True + + c.execute( + """ + CREATE TABLE tiles + ( + id int, + intervalId int, + PRIMARY KEY (id, intervalId) + ) + """ + ) + + # I.e., tiles_cumsum[3] is the number of tiles with zoomlevels lower than 3 + tiles_cumsum = np.cumsum([0] + [2**x for x in range(max_zoom + 1)]) + + interval_inserts = [] + tile_inserts = [] + + def batch_insert(conn, c, interval_inserts, tile_inserts, counter): + if verbose > 0: + print(f"Insert batch ({counter})") + + with transaction(conn): + c.executemany( + "INSERT INTO intervals VALUES (?,?,?,?,?,?,?,?,?)", interval_inserts + ) + c.executemany( + "INSERT INTO tiles VALUES (?,?)", tile_inserts + ) + + interval_inserts.clear() + tile_inserts.clear() + + for interval_idx, interval in enumerate(sorted_intervals): + curr_zoom = 0 + inserted = False + # go through each interval from most important to least + while curr_zoom <= max_zoom: + # try to place it in the highest zoom level and go down from there + tile_width = tile_size * 2 ** (max_zoom - curr_zoom) + + curr_pos = interval[0] + space_available = True + + # if we have not yet inserted the interval somewhere + if not inserted: + # check if there's space at this zoom level + while curr_pos < interval[1]: + curr_tile = math.floor(curr_pos / tile_width) + tile_id = f"{curr_zoom}.{curr_tile}" + + # if any of the overlapping tiles is already filled up, + # lets go to the next zoom level by setting `space_available` + # to false + if tile_counts[tile_id] >= max_per_tile: + space_available = False + break + + curr_pos += tile_width + + # if there is, then increment the tile counters + if not inserted and space_available: + curr_pos = interval[0] + while curr_pos < interval[1]: + curr_tile = math.floor(curr_pos / tile_width) + tile_id = "{}.{}".format(curr_zoom, curr_tile) + + tile_counts[tile_id] += 1 + + curr_pos += tile_width + + if inserted or space_available: + # there's available space + if not inserted: + value = uid_to_interval[interval[-1]] + interval_inserts.append( + ( + interval_idx, + curr_zoom, + value["importance"], + value["startPos"], + value["endPos"], + value["chrOffset"], + value["uid"], + value["name"], + value["fields"], + ), + ) + if verbose and interval_idx == 0: + print(f'Interval 0 first appears at zoom level {curr_zoom}') + + curr_pos = interval[0] + while curr_pos < interval[1]: + curr_tile_x = math.floor(curr_pos / tile_width) + tile_idx = tiles_cumsum[curr_zoom] + curr_tile_x + + tile_inserts.append((tile_idx, interval_idx)) + + if verbose and interval_idx == 0: + print(f'Interval 0 is added to {curr_zoom}.{curr_tile_x}') + + curr_pos += tile_width + + inserted = True + + curr_zoom += 1 + + if len(interval_inserts) >= sqlite_batch_size: + batch_insert(conn, c, interval_inserts, tile_inserts, interval_idx) + + batch_insert(conn, c, interval_inserts, tile_inserts, len(sorted_intervals)) ############################################################################### @@ -1431,59 +1658,98 @@ def bedgraph( @click.option( "--output-file", "-o", - default=None, help="The default output file name to use. If this isn't " "specified, clodius will replace the current extension " "with .multires.bed", + default=None, + show_default=True, ) @click.option( "--assembly", "-a", help="The genome assembly that this file was created against", default="hg19", + show_default=True, ) @click.option( "--importance-column", + "-i", help="The column (1-based) containing information about how important " "that row is. If it's absent, then use the length of the region. " "If the value is equal to `random`, then a random value will be " "used for the importance (effectively leading to random sampling)", + default="random", + show_default=True, ) @click.option( "--has-header/--no-header", help="Does this file have a header that we should ignore", default=False, + show_default=True, ) @click.option( "--chromosome", - default=None, + "-c", help="Only extract values for a particular chromosome. " "Use all chromosomes if not set.", + default=None, + show_default=True, ) @click.option( "--max-per-tile", - default=100, + "-m", type=int, + default=100, + show_default=True, help="The maximum number of entries to store per tile", ) @click.option( "--tile-size", - default=1024, + "-s", help="The number of nucleotides that the highest resolution tiles " "should span. This determines the maximum zoom level", + default=1024, + show_default=True, ) @click.option("--delimiter", default=None, type=str) @click.option( "--chromsizes-filename", help="A file containing chromosome sizes and order", default=None, + show_default=True, ) @click.option( "--offset", help="Apply an offset to all the coordinates in this file", type=int, default=0, + show_default=True, ) +@click.option( + "--sqlite-cache-size", + help="The SQLite cache size in MB. The higher " + + "the faster the aggregation gets but more memory will be required", + type=int, + default=500, + show_default=True, +) +@click.option( + "--sqlite-batch-size", + help="The number of entries inserted into SQLite at once. The higher " + + "the faster the aggregation gets but more memory will be required", + type=int, + default=100000, + show_default=True, +) +@click.option( + "--tile-index/--no-tile-index", + help="Tile-based indexing speeds up interval queries by up to 20x at " + +"the expensive of a 2.5x larger filesize", + type=bool, + default=False, + show_default=True, +) +@click.option("-v", "--verbose", count=True, help="Increase log statements") def bedfile( filepath, output_file, @@ -1496,7 +1762,12 @@ def bedfile( delimiter, chromsizes_filename, offset, + sqlite_cache_size, + sqlite_batch_size, + tile_index, + verbose ): + index_strategy = 'tile-index' if tile_index else 'range-index' _bedfile( filepath, output_file, @@ -1509,6 +1780,10 @@ def bedfile( delimiter, chromsizes_filename, offset, + sqlite_cache_size, + sqlite_batch_size, + index_strategy, + verbose ) diff --git a/clodius/tiles/beddb.py b/clodius/tiles/beddb.py index 160d8a56..11b6dd37 100644 --- a/clodius/tiles/beddb.py +++ b/clodius/tiles/beddb.py @@ -12,7 +12,10 @@ def tileset_info(db_file): if "version" not in colnames: version = 1 else: - version = int(row[colnames.index("version")]) + try: + version = int(row[colnames.index("version")]) + except ValueError: + version = row[colnames.index("version")] if "header" not in colnames: header = "" @@ -153,10 +156,17 @@ def get_1D_tiles(db_file, zoom, tile_x_pos, num_tiles=1): zoom, tile_start_pos, tile_end_pos ) - # import time - # t1 = time.time() + if version == '3t': + tile_id = sum([2**x for x in range(zoom)]) + tile_x_pos + query = f""" + SELECT startPos, endPos, chrOffset, importance, fields, uid, name + FROM intervals, tiles + WHERE + tiles.id = {tile_id} AND + tiles.intervalId = intervals.id + """ + rows = c.execute(query).fetchall() - # t2 = time.time() new_rows = [] From eb50e3812d8bcf9a68f85467d2b8ce0f3b3beef0 Mon Sep 17 00:00:00 2001 From: Fritz Lekschas <932103+flekschas@users.noreply.github.com> Date: Wed, 9 Dec 2020 21:50:52 -0500 Subject: [PATCH 2/5] Blackification --- clodius/cli/aggregate.py | 40 ++++++++++++++++------------------------ clodius/tiles/beddb.py | 4 ++-- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/clodius/cli/aggregate.py b/clodius/cli/aggregate.py index 75d00618..8b882b8a 100644 --- a/clodius/cli/aggregate.py +++ b/clodius/cli/aggregate.py @@ -468,13 +468,13 @@ def _bedfile( offset, sqlite_cache_size=500, # 500 MB sqlite_batch_size=100000, - index_strategy='range-index', + index_strategy="range-index", verbose=False, ): BEDDB_VERSION = 3 if verbose: - print(f'BEDDB VERSION: {BEDDB_VERSION}') + print(f"BEDDB VERSION: {BEDDB_VERSION}") if output_file is None: output_file = filepath + ".beddb" @@ -682,7 +682,7 @@ def line_to_np_array(line): tile_counts = col.defaultdict(int) - if index_strategy == 'tile-index': + if index_strategy == "tile-index": _bedfile_tile_index( conn, c, @@ -739,7 +739,7 @@ def _bedfile_range_index( """ if verbose: - print('Indexing strategy: range-based (default)') + print("Indexing strategy: range-based (default)") c.execute( """ @@ -841,17 +841,11 @@ def batch_insert(conn, c, interval_inserts, position_index_inserts): value["uid"], value["name"], value["fields"], - ), + ) ) position_index_inserts.append( - ( - counter, - curr_zoom, - curr_zoom, - value["startPos"], - value["endPos"] - ), + (counter, curr_zoom, curr_zoom, value["startPos"], value["endPos"]) ) counter += 1 @@ -879,7 +873,7 @@ def _bedfile_tile_index( verbose=False, ): if verbose: - print('Indexing strategy: tile-based') + print("Indexing strategy: tile-based") row = c.execute("SELECT * from tileset_info").fetchone() version = row[next(zip(*c.description)).index("version")] @@ -904,7 +898,7 @@ def _bedfile_tile_index( ) # I.e., tiles_cumsum[3] is the number of tiles with zoomlevels lower than 3 - tiles_cumsum = np.cumsum([0] + [2**x for x in range(max_zoom + 1)]) + tiles_cumsum = np.cumsum([0] + [2 ** x for x in range(max_zoom + 1)]) interval_inserts = [] tile_inserts = [] @@ -917,9 +911,7 @@ def batch_insert(conn, c, interval_inserts, tile_inserts, counter): c.executemany( "INSERT INTO intervals VALUES (?,?,?,?,?,?,?,?,?)", interval_inserts ) - c.executemany( - "INSERT INTO tiles VALUES (?,?)", tile_inserts - ) + c.executemany("INSERT INTO tiles VALUES (?,?)", tile_inserts) interval_inserts.clear() tile_inserts.clear() @@ -977,10 +969,10 @@ def batch_insert(conn, c, interval_inserts, tile_inserts, counter): value["uid"], value["name"], value["fields"], - ), + ) ) if verbose and interval_idx == 0: - print(f'Interval 0 first appears at zoom level {curr_zoom}') + print(f"Interval 0 first appears at zoom level {curr_zoom}") curr_pos = interval[0] while curr_pos < interval[1]: @@ -990,7 +982,7 @@ def batch_insert(conn, c, interval_inserts, tile_inserts, counter): tile_inserts.append((tile_idx, interval_idx)) if verbose and interval_idx == 0: - print(f'Interval 0 is added to {curr_zoom}.{curr_tile_x}') + print(f"Interval 0 is added to {curr_zoom}.{curr_tile_x}") curr_pos += tile_width @@ -1745,7 +1737,7 @@ def bedgraph( @click.option( "--tile-index/--no-tile-index", help="Tile-based indexing speeds up interval queries by up to 20x at " - +"the expensive of a 2.5x larger filesize", + + "the expensive of a 2.5x larger filesize", type=bool, default=False, show_default=True, @@ -1766,9 +1758,9 @@ def bedfile( sqlite_cache_size, sqlite_batch_size, tile_index, - verbose + verbose, ): - index_strategy = 'tile-index' if tile_index else 'range-index' + index_strategy = "tile-index" if tile_index else "range-index" _bedfile( filepath, output_file, @@ -1784,7 +1776,7 @@ def bedfile( sqlite_cache_size, sqlite_batch_size, index_strategy, - verbose + verbose, ) diff --git a/clodius/tiles/beddb.py b/clodius/tiles/beddb.py index 11b6dd37..43e7ea9b 100644 --- a/clodius/tiles/beddb.py +++ b/clodius/tiles/beddb.py @@ -156,8 +156,8 @@ def get_1D_tiles(db_file, zoom, tile_x_pos, num_tiles=1): zoom, tile_start_pos, tile_end_pos ) - if version == '3t': - tile_id = sum([2**x for x in range(zoom)]) + tile_x_pos + if version == "3t": + tile_id = sum([2 ** x for x in range(zoom)]) + tile_x_pos query = f""" SELECT startPos, endPos, chrOffset, importance, fields, uid, name FROM intervals, tiles From a0523f5d5ee0d1277025bba0777749010367d75f Mon Sep 17 00:00:00 2001 From: Fritz Lekschas <932103+flekschas@users.noreply.github.com> Date: Wed, 9 Dec 2020 21:51:44 -0500 Subject: [PATCH 3/5] Update --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3c497f7..a3bc70c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ v0.16.0 +- Added a tile-based indexing strategy for `.beddb` files, which can speed up queries by up to 20x at the expense of increasing the file size by a factor of 2.5x + +v0.16.0 + - No default assembly v0.15.2 From 8fc7cad6b7616e68a51b053585c1ae1c3913ace0 Mon Sep 17 00:00:00 2001 From: Fritz Lekschas <932103+flekschas@users.noreply.github.com> Date: Mon, 14 Dec 2020 11:22:55 -0500 Subject: [PATCH 4/5] Improve code comments for documentation and remove debug logs --- clodius/cli/aggregate.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/clodius/cli/aggregate.py b/clodius/cli/aggregate.py index 8b882b8a..39bde5b8 100644 --- a/clodius/cli/aggregate.py +++ b/clodius/cli/aggregate.py @@ -943,7 +943,11 @@ def batch_insert(conn, c, interval_inserts, tile_inserts, counter): curr_pos += tile_width - # if there is, then increment the tile counters + # If there is, then increment the tile counters + # Note, the tile count should only be incremented when space is + # available and we have not yet inserted the interval. In other + # words, only the first instance of where an interval is inserted + # counts! if not inserted and space_available: curr_pos = interval[0] while curr_pos < interval[1]: @@ -955,7 +959,10 @@ def batch_insert(conn, c, interval_inserts, tile_inserts, counter): curr_pos += tile_width if inserted or space_available: - # there's available space + # If there's available space, we will insert the interval + # Note, that we only want to insert the interval exactly once + # and skip subsequent inserts by checking if `inserted` is + # false if not inserted: value = uid_to_interval[interval[-1]] interval_inserts.append( @@ -971,9 +978,9 @@ def batch_insert(conn, c, interval_inserts, tile_inserts, counter): value["fields"], ) ) - if verbose and interval_idx == 0: - print(f"Interval 0 first appears at zoom level {curr_zoom}") + # The following while-loop is necessary to ensure that tiles at + # higher zoom level also contain the interval curr_pos = interval[0] while curr_pos < interval[1]: curr_tile_x = math.floor(curr_pos / tile_width) @@ -981,9 +988,6 @@ def batch_insert(conn, c, interval_inserts, tile_inserts, counter): tile_inserts.append((tile_idx, interval_idx)) - if verbose and interval_idx == 0: - print(f"Interval 0 is added to {curr_zoom}.{curr_tile_x}") - curr_pos += tile_width inserted = True From 74c21043dfbf46fd9cbf55622629ce35668e25c9 Mon Sep 17 00:00:00 2001 From: Fritz Lekschas <932103+flekschas@users.noreply.github.com> Date: Mon, 14 Dec 2020 14:58:09 -0500 Subject: [PATCH 5/5] Added a test for tile index beddb files --- test/bedfile_test.py | 100 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/test/bedfile_test.py b/test/bedfile_test.py index 4bba8dba..a320c6ac 100644 --- a/test/bedfile_test.py +++ b/test/bedfile_test.py @@ -108,6 +108,8 @@ def test_gene_annotations(): rows = ctb.tiles(f.name, ["x.11.112"])[0][1] assert rows[0]["fields"][3] == "Lrp1b" + os.remove(f.name) + def test_random_importance(): # check that when aggregating using random importance, all values that @@ -165,6 +167,7 @@ def test_random_importance(): for key, value in found.items(): assert value + os.remove(f.name) pass @@ -272,3 +275,100 @@ def test_float_importance(): ], ) # TODO: Make assertions about result + + os.remove(f.name) + + +def test_tile_index(): + f = tempfile.NamedTemporaryFile(delete=False) + + runner = clt.CliRunner() + input_file = op.join(testdir, "sample_data", "test_float_importance.bed") + + runner.invoke( + cca.bedfile, + [ + input_file, + "--max-per-tile", + "2", + "--importance-column", + "4", + "--assembly", + "hg38", + "--no-header", + "--tile-index", + "--output-file", + f.name, + ], + ) + + rows = ctb.tiles(f.name, ["x.0.0"])[0][1] + + for row in rows: + assert row["fields"][0] == "chr20" + + conn = sqlite3.connect(f.name) + c = conn.cursor() + + rows = c.execute("SELECT * from tiles;").fetchall() + assert len(rows) == 2 * 39 + 2 * 38 + 37 + + rows = c.execute("SELECT * from intervals;").fetchall() + assert len(rows) == 5 + + os.remove(f.name) + + + +def test_compare_tile_vs_range_index(): + f_tile = tempfile.NamedTemporaryFile(delete=False) + f_range = tempfile.NamedTemporaryFile(delete=False) + + runner = clt.CliRunner() + input_file = op.join(testdir, "sample_data", "test_float_importance.bed") + + runner.invoke( + cca.bedfile, + [ + input_file, + "--max-per-tile", + "2", + "--importance-column", + "4", + "--assembly", + "hg38", + "--no-header", + "--tile-index", + "--output-file", + f_tile.name, + ], + ) + + runner.invoke( + cca.bedfile, + [ + input_file, + "--max-per-tile", + "2", + "--importance-column", + "4", + "--assembly", + "hg38", + "--no-header", + "--output-file", + f_range.name, + ], + ) + + rows_tile = ctb.tiles(f_tile.name, ["x.1.1"])[0][1] + assert len(rows_tile) == 4 + rows_range = ctb.tiles(f_tile.name, ["x.1.1"])[0][1] + assert len(rows_range) == 4 + + rows_tile = ctb.tiles(f_tile.name, ["x.2.2"])[0][1] + assert len(rows_tile) == 5 + rows_range = ctb.tiles(f_tile.name, ["x.2.2"])[0][1] + assert len(rows_range) == 5 + + os.remove(f_tile.name) + os.remove(f_range.name) \ No newline at end of file