Skip to content

Commit

Permalink
Update indptr calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
yul.kk authored and lucas(김광섭) committed Nov 11, 2020
1 parent fbb1a14 commit 8add02e
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions buffalo/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,12 @@ def _build_compressed_triplets(self, db, job_files, num_lines, max_key, is_colwi
assert data_index + total_records <= num_lines, 'Requests data size(%s) exceed capacity(%s)' % (data_index + total_records, num_lines)
db['key'][data_index:data_index + total_records] = I
db['val'][data_index:data_index + total_records] = V
indptr = [data_index for j in range(U[0] - prev_key)]
indptr += [data_index + i
for i in range(1, total_records)
for j in range(U[i] - U[i - 1])]
diff = U[1:] - U[:-1]
max_diff = np.amax(diff) if len(diff) else 0
indptr = [data_index for _ in range(U[0] - prev_key)]
for i in range(max_diff):
indptr += (np.where(diff > i)[0] + data_index + 1).tolist()
indptr.sort()
db['indptr'][indptr_index:indptr_index + len(indptr)] = indptr
assert indptr_index + len(indptr) <= max_key
data_index += total_records
Expand Down

0 comments on commit 8add02e

Please sign in to comment.