Skip to content

Commit

Permalink
Add loop peeling, too
Browse files Browse the repository at this point in the history
  • Loading branch information
brandtbucher committed Sep 13, 2024
2 parents eb54546 + ee3b5e3 commit e2ba0e8
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 16 deletions.
22 changes: 11 additions & 11 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,8 +1025,8 @@ def testfunc(n):

uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
uop_names = [uop[0] for uop in uops_and_operands]
self.assertEqual(uop_names.count("_PUSH_FRAME"), 2)
self.assertEqual(uop_names.count("_RETURN_VALUE"), 2)
self.assertEqual(uop_names.count("_PUSH_FRAME"), 4)
self.assertEqual(uop_names.count("_RETURN_VALUE"), 4)
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
# sequential calls: max(12, 13) == 13
Expand All @@ -1052,8 +1052,8 @@ def testfunc(n):

uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
uop_names = [uop[0] for uop in uops_and_operands]
self.assertEqual(uop_names.count("_PUSH_FRAME"), 2)
self.assertEqual(uop_names.count("_RETURN_VALUE"), 2)
self.assertEqual(uop_names.count("_PUSH_FRAME"), 4)
self.assertEqual(uop_names.count("_RETURN_VALUE"), 4)
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
# nested calls: 15 + 12 == 27
Expand Down Expand Up @@ -1087,8 +1087,8 @@ def testfunc(n):

uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
uop_names = [uop[0] for uop in uops_and_operands]
self.assertEqual(uop_names.count("_PUSH_FRAME"), 4)
self.assertEqual(uop_names.count("_RETURN_VALUE"), 4)
self.assertEqual(uop_names.count("_PUSH_FRAME"), 8)
self.assertEqual(uop_names.count("_RETURN_VALUE"), 8)
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
# max(12, 18 + max(12, 13)) == 31
Expand Down Expand Up @@ -1123,8 +1123,8 @@ def testfunc(n):

uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
uop_names = [uop[0] for uop in uops_and_operands]
self.assertEqual(uop_names.count("_PUSH_FRAME"), 4)
self.assertEqual(uop_names.count("_RETURN_VALUE"), 4)
self.assertEqual(uop_names.count("_PUSH_FRAME"), 8)
self.assertEqual(uop_names.count("_RETURN_VALUE"), 8)
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
# max(18 + max(12, 13), 12) == 31
Expand Down Expand Up @@ -1167,8 +1167,8 @@ def testfunc(n):

uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
uop_names = [uop[0] for uop in uops_and_operands]
self.assertEqual(uop_names.count("_PUSH_FRAME"), 15)
self.assertEqual(uop_names.count("_RETURN_VALUE"), 15)
self.assertEqual(uop_names.count("_PUSH_FRAME"), 22)
self.assertEqual(uop_names.count("_RETURN_VALUE"), 20)

self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
Expand Down Expand Up @@ -1408,7 +1408,7 @@ class Bar:
self.assertIsNotNone(ex)
self.assertEqual(res, 219)
guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
self.assertEqual(guard_type_version_count, 2)
self.assertEqual(guard_type_version_count, 4)


@unittest.expectedFailure
Expand Down
36 changes: 33 additions & 3 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -579,8 +579,30 @@ translate_bytecode_to_trace(

if (!first && instr == initial_instr) {
// We have looped around to the start:
RESERVE(1);
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
int end = trace_length;
assert(trace[0].opcode == _START_EXECUTOR);
for (int i = 1; i < end; i++) {
_PyUOpInstruction *uop = &trace[i];
int stubs = 0;
if (OPCODE_HAS_DEOPT(opcode)) {
stubs++;
}
if (OPCODE_HAS_ERROR(opcode)) {
stubs++;
}
if (OPCODE_HAS_EXIT(opcode)) {
stubs++;
}
// uop + stubs + _JUMP_TO_TOP all need to fit. Don't use
// RESERVE since we need to insert _JUMP_TO_TOP once
// we're out of space:
if (max_length < trace_length + 1 + stubs + 1) {
break;
}
max_length -= stubs;
ADD_TO_TRACE(uop->opcode, uop->oparg, uop->operand, uop->target);
}
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, trace_length - end + 1);
goto done;
}

Expand Down Expand Up @@ -990,6 +1012,14 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length)
int32_t current_popped = -1;
int32_t current_exit_op = -1;
/* Leaving in NOPs slows down the interpreter and messes up the stats */
_PyUOpInstruction *jump_to_top = &buffer[length - 1];
if (jump_to_top->opcode == _JUMP_TO_TOP) {
int nops = 0;
for (int i = 0; i < (int)jump_to_top->target; i++) {
nops += (buffer[i].opcode == _NOP);
}
jump_to_top->target -= nops;
}
_PyUOpInstruction *copy_to = &buffer[0];
for (int i = 0; i < length; i++) {
_PyUOpInstruction *inst = &buffer[i];
Expand Down Expand Up @@ -1048,7 +1078,7 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length)
if (opcode == _JUMP_TO_TOP) {
assert(buffer[0].opcode == _START_EXECUTOR);
buffer[i].format = UOP_FORMAT_JUMP;
buffer[i].jump_target = 1;
buffer[i].jump_target = target;
}
}
return next_spare;
Expand Down
26 changes: 25 additions & 1 deletion Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,10 @@ dummy_func(void) {
(void)callable;

PyCodeObject *co = NULL;
assert((this_instr + 2)->opcode == _PUSH_FRAME);
if ((this_instr + 2)->opcode != _PUSH_FRAME) {
ctx->done = true;
break;
}
uint64_t push_operand = (this_instr + 2)->operand;
if (push_operand & 1) {
co = (PyCodeObject *)(push_operand & ~1);
Expand Down Expand Up @@ -883,6 +886,27 @@ dummy_func(void) {
res = sym_new_const(ctx, Py_True);
}

op(_ITER_CHECK_LIST, (iter -- iter)) {
if (sym_matches_type(iter, &PyListIter_Type)) {
REPLACE_OP(this_instr, _NOP, 0, 0);
}
sym_set_type(iter, &PyListIter_Type);
}

op(_ITER_CHECK_RANGE, (iter -- iter)) {
if (sym_matches_type(iter, &PyRangeIter_Type)) {
REPLACE_OP(this_instr, _NOP, 0, 0);
}
sym_set_type(iter, &PyRangeIter_Type);
}

op(_ITER_CHECK_TUPLE, (iter -- iter)) {
if (sym_matches_type(iter, &PyTupleIter_Type)) {
REPLACE_OP(this_instr, _NOP, 0, 0);
}
sym_set_type(iter, &PyTupleIter_Type);
}

op(_JUMP_TO_TOP, (--)) {
ctx->done = true;
}
Expand Down
23 changes: 22 additions & 1 deletion Python/optimizer_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e2ba0e8

Please sign in to comment.