Skip to content

Commit

Permalink
release gil more (pandas-dev#29322)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Nov 4, 2019
1 parent 8eb4ba6 commit 7ba9eb6
Show file tree
Hide file tree
Showing 3 changed files with 252 additions and 241 deletions.
62 changes: 32 additions & 30 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def diff_2d(ndarray[diff_t, ndim=2] arr,
Py_ssize_t periods, int axis):
cdef:
Py_ssize_t i, j, sx, sy, start, stop
bint f_contig = arr.flags.f_contiguous

# Disable for unsupported dtype combinations,
# see https://github.com/cython/cython/issues/2646
Expand All @@ -37,40 +38,41 @@ def diff_2d(ndarray[diff_t, ndim=2] arr,
# We put this inside an indented else block to avoid cython build
# warnings about unreachable code
sx, sy = (<object>arr).shape
if arr.flags.f_contiguous:
if axis == 0:
if periods >= 0:
start, stop = periods, sx
with nogil:
if f_contig:
if axis == 0:
if periods >= 0:
start, stop = periods, sx
else:
start, stop = 0, sx + periods
for j in range(sy):
for i in range(start, stop):
out[i, j] = arr[i, j] - arr[i - periods, j]
else:
start, stop = 0, sx + periods
for j in range(sy):
for i in range(start, stop):
out[i, j] = arr[i, j] - arr[i - periods, j]
if periods >= 0:
start, stop = periods, sy
else:
start, stop = 0, sy + periods
for j in range(start, stop):
for i in range(sx):
out[i, j] = arr[i, j] - arr[i, j - periods]
else:
if periods >= 0:
start, stop = periods, sy
if axis == 0:
if periods >= 0:
start, stop = periods, sx
else:
start, stop = 0, sx + periods
for i in range(start, stop):
for j in range(sy):
out[i, j] = arr[i, j] - arr[i - periods, j]
else:
start, stop = 0, sy + periods
for j in range(start, stop):
if periods >= 0:
start, stop = periods, sy
else:
start, stop = 0, sy + periods
for i in range(sx):
out[i, j] = arr[i, j] - arr[i, j - periods]
else:
if axis == 0:
if periods >= 0:
start, stop = periods, sx
else:
start, stop = 0, sx + periods
for i in range(start, stop):
for j in range(sy):
out[i, j] = arr[i, j] - arr[i - periods, j]
else:
if periods >= 0:
start, stop = periods, sy
else:
start, stop = 0, sy + periods
for i in range(sx):
for j in range(start, stop):
out[i, j] = arr[i, j] - arr[i, j - periods]
for j in range(start, stop):
out[i, j] = arr[i, j] - arr[i, j - periods]


# ----------------------------------------------------------------------
Expand Down
152 changes: 79 additions & 73 deletions pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@ def inner_join(const int64_t[:] left, const int64_t[:] right,
left_sorter, left_count = groupsort_indexer(left, max_groups)
right_sorter, right_count = groupsort_indexer(right, max_groups)

# First pass, determine size of result set, do not use the NA group
for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]
with nogil:
# First pass, determine size of result set, do not use the NA group
for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]

if rc > 0 and lc > 0:
count += lc * rc
if rc > 0 and lc > 0:
count += lc * rc

# exclude the NA group
left_pos = left_count[0]
Expand All @@ -44,19 +45,20 @@ def inner_join(const int64_t[:] left, const int64_t[:] right,
left_indexer = np.empty(count, dtype=np.int64)
right_indexer = np.empty(count, dtype=np.int64)

for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]

if rc > 0 and lc > 0:
for j in range(lc):
offset = position + j * rc
for k in range(rc):
left_indexer[offset + k] = left_pos + j
right_indexer[offset + k] = right_pos + k
position += lc * rc
left_pos += lc
right_pos += rc
with nogil:
for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]

if rc > 0 and lc > 0:
for j in range(lc):
offset = position + j * rc
for k in range(rc):
left_indexer[offset + k] = left_pos + j
right_indexer[offset + k] = right_pos + k
position += lc * rc
left_pos += lc
right_pos += rc

return (_get_result_indexer(left_sorter, left_indexer),
_get_result_indexer(right_sorter, right_indexer))
Expand All @@ -79,12 +81,13 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
left_sorter, left_count = groupsort_indexer(left, max_groups)
right_sorter, right_count = groupsort_indexer(right, max_groups)

# First pass, determine size of result set, do not use the NA group
for i in range(1, max_groups + 1):
if right_count[i] > 0:
count += left_count[i] * right_count[i]
else:
count += left_count[i]
with nogil:
# First pass, determine size of result set, do not use the NA group
for i in range(1, max_groups + 1):
if right_count[i] > 0:
count += left_count[i] * right_count[i]
else:
count += left_count[i]

# exclude the NA group
left_pos = left_count[0]
Expand All @@ -93,24 +96,25 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
left_indexer = np.empty(count, dtype=np.int64)
right_indexer = np.empty(count, dtype=np.int64)

for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]
with nogil:
for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]

if rc == 0:
for j in range(lc):
left_indexer[position + j] = left_pos + j
right_indexer[position + j] = -1
position += lc
else:
for j in range(lc):
offset = position + j * rc
for k in range(rc):
left_indexer[offset + k] = left_pos + j
right_indexer[offset + k] = right_pos + k
position += lc * rc
left_pos += lc
right_pos += rc
if rc == 0:
for j in range(lc):
left_indexer[position + j] = left_pos + j
right_indexer[position + j] = -1
position += lc
else:
for j in range(lc):
offset = position + j * rc
for k in range(rc):
left_indexer[offset + k] = left_pos + j
right_indexer[offset + k] = right_pos + k
position += lc * rc
left_pos += lc
right_pos += rc

left_indexer = _get_result_indexer(left_sorter, left_indexer)
right_indexer = _get_result_indexer(right_sorter, right_indexer)
Expand Down Expand Up @@ -149,15 +153,16 @@ def full_outer_join(const int64_t[:] left, const int64_t[:] right,
left_sorter, left_count = groupsort_indexer(left, max_groups)
right_sorter, right_count = groupsort_indexer(right, max_groups)

# First pass, determine size of result set, do not use the NA group
for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]
with nogil:
# First pass, determine size of result set, do not use the NA group
for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]

if rc > 0 and lc > 0:
count += lc * rc
else:
count += lc + rc
if rc > 0 and lc > 0:
count += lc * rc
else:
count += lc + rc

# exclude the NA group
left_pos = left_count[0]
Expand All @@ -166,29 +171,30 @@ def full_outer_join(const int64_t[:] left, const int64_t[:] right,
left_indexer = np.empty(count, dtype=np.int64)
right_indexer = np.empty(count, dtype=np.int64)

for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]

if rc == 0:
for j in range(lc):
left_indexer[position + j] = left_pos + j
right_indexer[position + j] = -1
position += lc
elif lc == 0:
for j in range(rc):
left_indexer[position + j] = -1
right_indexer[position + j] = right_pos + j
position += rc
else:
for j in range(lc):
offset = position + j * rc
for k in range(rc):
left_indexer[offset + k] = left_pos + j
right_indexer[offset + k] = right_pos + k
position += lc * rc
left_pos += lc
right_pos += rc
with nogil:
for i in range(1, max_groups + 1):
lc = left_count[i]
rc = right_count[i]

if rc == 0:
for j in range(lc):
left_indexer[position + j] = left_pos + j
right_indexer[position + j] = -1
position += lc
elif lc == 0:
for j in range(rc):
left_indexer[position + j] = -1
right_indexer[position + j] = right_pos + j
position += rc
else:
for j in range(lc):
offset = position + j * rc
for k in range(rc):
left_indexer[offset + k] = left_pos + j
right_indexer[offset + k] = right_pos + k
position += lc * rc
left_pos += lc
right_pos += rc

return (_get_result_indexer(left_sorter, left_indexer),
_get_result_indexer(right_sorter, right_indexer))
Expand Down
Loading

0 comments on commit 7ba9eb6

Please sign in to comment.