From b523cd22864af08904b02090b229abc6323af161 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 2 Jul 2024 16:41:09 -0700 Subject: [PATCH] _DYNAMIC_EXIT on underflow --- Python/bytecodes.c | 18 ++++++++++++-- Python/executor_cases.c.h | 12 +++++++++ Python/generated_cases.c.h | 48 ++++++++++++++++++++++++++++++++++-- Python/optimizer.c | 47 ++++++++++++++++++----------------- Python/optimizer_analysis.c | 10 +++++--- Python/optimizer_bytecodes.c | 14 ++++++++--- Python/optimizer_cases.c.h | 14 ++++++++--- Python/optimizer_symbols.c | 4 +-- Python/specialize.c | 4 +-- 9 files changed, 128 insertions(+), 43 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 4afce2cc3bea9d..9165b5063dfec7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -941,6 +941,9 @@ dummy_func( _PyEval_FrameClearAndPop(tstate, dying); LOAD_SP(); LOAD_IP(frame->return_offset); + #if TIER_TWO + frame->instr_ptr += frame->return_offset; + #endif res = retval; LLTRACE_RESUME_FRAME(); } @@ -1191,6 +1194,9 @@ dummy_func( _PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR); #endif LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); + #if TIER_TWO + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_SEND; + #endif LOAD_SP(); value = retval; LLTRACE_RESUME_FRAME(); @@ -2829,7 +2835,8 @@ dummy_func( } /* iterator ended normally */ assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + next_instr[oparg].op.code == INSTRUMENTED_END_FOR || + next_instr[oparg].op.code == ENTER_EXECUTOR); PyStackRef_CLOSE(iter); STACK_SHRINK(1); /* Jump forward oparg, then skip following END_FOR and POP_TOP instruction */ @@ -2881,7 +2888,8 @@ dummy_func( } /* iterator ended normally */ assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + next_instr[oparg].op.code == INSTRUMENTED_END_FOR || + next_instr[oparg].op.code == ENTER_EXECUTOR); STACK_SHRINK(1); PyStackRef_CLOSE(iter_stackref); /* Skip END_FOR and POP_TOP */ @@ -3566,6 +3574,9 @@ dummy_func( tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); + #if TIER_TWO + frame->instr_ptr += 0; + #endif LLTRACE_RESUME_FRAME(); } @@ -4349,6 +4360,9 @@ dummy_func( _PyThreadState_PopFrame(tstate, frame); frame = tstate->current_frame = prev; LOAD_IP(frame->return_offset); + #if TIER_TWO + frame->instr_ptr += frame->return_offset; + #endif LOAD_SP(); LLTRACE_RESUME_FRAME(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 62654035e80f50..99b4ecfd46ba90 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1145,6 +1145,9 @@ _PyEval_FrameClearAndPop(tstate, dying); LOAD_SP(); LOAD_IP(frame->return_offset); + #if TIER_TWO + frame->instr_ptr += frame->return_offset; + #endif res = retval; LLTRACE_RESUME_FRAME(); stack_pointer[0] = res; @@ -1336,6 +1339,9 @@ _PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR); #endif LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); + #if TIER_TWO + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_SEND; + #endif LOAD_SP(); value = retval; LLTRACE_RESUME_FRAME(); @@ -4036,6 +4042,9 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); + #if TIER_TWO + frame->instr_ptr += 0; + #endif LLTRACE_RESUME_FRAME(); break; } @@ -4808,6 +4817,9 @@ _PyThreadState_PopFrame(tstate, frame); frame = tstate->current_frame = prev; LOAD_IP(frame->return_offset); + #if TIER_TWO + frame->instr_ptr += frame->return_offset; + #endif LOAD_SP(); LLTRACE_RESUME_FRAME(); stack_pointer[0] = res; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3c643f637ab095..aaf081fbcf8936 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1056,6 +1056,9 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); + #if TIER_TWO + frame->instr_ptr += 0; + #endif LLTRACE_RESUME_FRAME(); } DISPATCH(); @@ -1152,6 +1155,9 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); + #if TIER_TWO + frame->instr_ptr += 0; + #endif LLTRACE_RESUME_FRAME(); } DISPATCH(); @@ -2124,6 +2130,9 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); + #if TIER_TWO + frame->instr_ptr += 0; + #endif LLTRACE_RESUME_FRAME(); } DISPATCH(); @@ -2200,6 +2209,9 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); + #if TIER_TWO + frame->instr_ptr += 0; + #endif LLTRACE_RESUME_FRAME(); } DISPATCH(); @@ -3068,7 +3080,8 @@ } /* iterator ended normally */ assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + next_instr[oparg].op.code == INSTRUMENTED_END_FOR || + next_instr[oparg].op.code == ENTER_EXECUTOR); PyStackRef_CLOSE(iter); STACK_SHRINK(1); /* Jump forward oparg, then skip following END_FOR and POP_TOP instruction */ @@ -3125,6 +3138,9 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); + #if TIER_TWO + frame->instr_ptr += 0; + #endif LLTRACE_RESUME_FRAME(); } DISPATCH(); @@ -3736,7 +3752,8 @@ } /* iterator ended normally */ assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + next_instr[oparg].op.code == INSTRUMENTED_END_FOR || + next_instr[oparg].op.code == ENTER_EXECUTOR); STACK_SHRINK(1); PyStackRef_CLOSE(iter_stackref); /* Skip END_FOR and POP_TOP */ @@ -3977,6 +3994,9 @@ _PyEval_FrameClearAndPop(tstate, dying); LOAD_SP(); LOAD_IP(frame->return_offset); + #if TIER_TWO + frame->instr_ptr += frame->return_offset; + #endif res = retval; LLTRACE_RESUME_FRAME(); } @@ -4019,6 +4039,9 @@ _PyEval_FrameClearAndPop(tstate, dying); LOAD_SP(); LOAD_IP(frame->return_offset); + #if TIER_TWO + frame->instr_ptr += frame->return_offset; + #endif res = retval; LLTRACE_RESUME_FRAME(); } @@ -4084,6 +4107,9 @@ _PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR); #endif LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); + #if TIER_TWO + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_SEND; + #endif LOAD_SP(); value = retval; LLTRACE_RESUME_FRAME(); @@ -4726,6 +4752,9 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); + #if TIER_TWO + frame->instr_ptr += 0; + #endif LLTRACE_RESUME_FRAME(); } DISPATCH(); @@ -5898,6 +5927,9 @@ _PyEval_FrameClearAndPop(tstate, dying); LOAD_SP(); LOAD_IP(frame->return_offset); + #if TIER_TWO + frame->instr_ptr += frame->return_offset; + #endif res = retval; LLTRACE_RESUME_FRAME(); } @@ -5932,6 +5964,9 @@ _PyThreadState_PopFrame(tstate, frame); frame = tstate->current_frame = prev; LOAD_IP(frame->return_offset); + #if TIER_TWO + frame->instr_ptr += frame->return_offset; + #endif LOAD_SP(); LLTRACE_RESUME_FRAME(); stack_pointer[0] = res; @@ -5961,6 +5996,9 @@ _PyEval_FrameClearAndPop(tstate, dying); LOAD_SP(); LOAD_IP(frame->return_offset); + #if TIER_TWO + frame->instr_ptr += frame->return_offset; + #endif res = retval; LLTRACE_RESUME_FRAME(); stack_pointer[0] = res; @@ -6088,6 +6126,9 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); + #if TIER_TWO + frame->instr_ptr += 0; + #endif LLTRACE_RESUME_FRAME(); } DISPATCH(); @@ -7040,6 +7081,9 @@ _PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR); #endif LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); + #if TIER_TWO + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_SEND; + #endif LOAD_SP(); value = retval; LLTRACE_RESUME_FRAME(); diff --git a/Python/optimizer.c b/Python/optimizer.c index ce8a36575cde1d..c0ccb4f2e1af12 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -178,6 +178,11 @@ _PyOptimizer_Optimize( _PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr) { + if (!PyCode_Check(frame->f_executable) || + !PyFunction_Check(frame->f_funcobj)) + { + return 0; + } PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -506,19 +511,25 @@ add_to_trace( return 0; \ } \ assert(func == NULL || func->func_code == (PyObject *)code); \ + assert(code != NULL); \ trace_stack[trace_stack_depth].func = func; \ trace_stack[trace_stack_depth].code = code; \ trace_stack[trace_stack_depth].instr = instr; \ trace_stack_depth++; #define TRACE_STACK_POP() \ if (trace_stack_depth <= 0) { \ - Py_FatalError("Trace stack underflow\n"); \ + func = NULL; \ + code = NULL; \ + instr = NULL; \ } \ - trace_stack_depth--; \ - func = trace_stack[trace_stack_depth].func; \ - code = trace_stack[trace_stack_depth].code; \ - assert(func == NULL || func->func_code == (PyObject *)code); \ - instr = trace_stack[trace_stack_depth].instr; + else { \ + trace_stack_depth--; \ + func = trace_stack[trace_stack_depth].func; \ + code = trace_stack[trace_stack_depth].code; \ + assert(func == NULL || func->func_code == (PyObject *)code); \ + assert(code != NULL); \ + instr = trace_stack[trace_stack_depth].instr; \ + } /* Returns the length of the trace on success, * 0 if it failed to produce a worthwhile trace, @@ -704,17 +715,6 @@ translate_bytecode_to_trace( // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE) int nuops = expansion->nuops; RESERVE(nuops + 1); /* One extra for exit */ - int16_t last_op = expansion->uops[nuops-1].uop; - if (last_op == _RETURN_VALUE || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) { - // Check for trace stack underflow now: - // We can't bail e.g. in the middle of - // LOAD_CONST + _RETURN_VALUE. - if (trace_stack_depth == 0) { - DPRINTF(2, "Trace stack underflow\n"); - OPT_STAT_INC(trace_stack_underflow); - goto done; - } - } uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM for (int i = 0; i < nuops; i++) { oparg = orig_oparg; @@ -779,7 +779,9 @@ translate_bytecode_to_trace( operand = (uintptr_t)code | 1; } else { - operand = 0; + ADD_TO_TRACE(uop, oparg, 0, target); + ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); + goto done; } ADD_TO_TRACE(uop, oparg, operand, target); DPRINTF(2, @@ -847,11 +849,9 @@ translate_bytecode_to_trace( if (new_func != NULL) { operand = (uintptr_t)new_func; } - else if (new_code != NULL) { - operand = (uintptr_t)new_code | 1; - } else { - operand = 0; + assert(new_code != NULL); + operand = (uintptr_t)new_code | 1; } ADD_TO_TRACE(uop, oparg, operand, target); code = new_code; @@ -909,7 +909,8 @@ translate_bytecode_to_trace( while (trace_stack_depth > 0) { TRACE_STACK_POP(); } - assert(code == initial_code); + assert(code == NULL || code == initial_code); + code = initial_code; // Skip short traces where we can't even translate a single instruction: if (progress_needed) { OPT_STAT_INC(trace_too_short); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8c866417478128..6887467cde4a11 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -448,10 +448,12 @@ optimize_uops( DPRINTF(1, "\nUnknown opcode in abstract interpreter\n"); Py_UNREACHABLE(); } - assert(ctx->frame != NULL); - DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); - ctx->frame->stack_pointer = stack_pointer; - assert(STACK_LEVEL() >= 0); + // assert(ctx->frame != NULL); + if (ctx->frame) { + DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); + ctx->frame->stack_pointer = stack_pointer; + assert(STACK_LEVEL() >= 0); + } } if (ctx->out_of_space) { DPRINTF(3, "\n"); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index c982e37182157a..236658d5a52df6 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -617,7 +617,9 @@ dummy_func(void) { SYNC_SP(); ctx->frame->stack_pointer = stack_pointer; frame_pop(ctx); - stack_pointer = ctx->frame->stack_pointer; + if (ctx->frame) { + stack_pointer = ctx->frame->stack_pointer; + } res = retval; /* Stack space handling */ @@ -625,13 +627,14 @@ dummy_func(void) { assert(co != NULL); int framesize = co->co_framesize; assert(framesize > 0); - assert(framesize <= curr_space); + // assert(framesize <= curr_space); curr_space -= framesize; co = get_code(this_instr); if (co == NULL) { // might be impossible, but bailing is still safe ctx->done = true; + break; } } @@ -639,7 +642,9 @@ dummy_func(void) { SYNC_SP(); ctx->frame->stack_pointer = stack_pointer; frame_pop(ctx); - stack_pointer = ctx->frame->stack_pointer; + if (ctx->frame) { + stack_pointer = ctx->frame->stack_pointer; + } res = sym_new_unknown(ctx); /* Stack space handling */ @@ -647,13 +652,14 @@ dummy_func(void) { assert(co != NULL); int framesize = co->co_framesize; assert(framesize > 0); - assert(framesize <= curr_space); + // assert(framesize <= curr_space); curr_space -= framesize; co = get_code(this_instr); if (co == NULL) { // might be impossible, but bailing is still safe ctx->done = true; + break; } } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 4fa40ff861ba70..105c2354d74e67 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -601,19 +601,22 @@ assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; frame_pop(ctx); - stack_pointer = ctx->frame->stack_pointer; + if (ctx->frame) { + stack_pointer = ctx->frame->stack_pointer; + } res = retval; /* Stack space handling */ assert(corresponding_check_stack == NULL); assert(co != NULL); int framesize = co->co_framesize; assert(framesize > 0); - assert(framesize <= curr_space); + // assert(framesize <= curr_space); curr_space -= framesize; co = get_code(this_instr); if (co == NULL) { // might be impossible, but bailing is still safe ctx->done = true; + break; } stack_pointer[0] = res; stack_pointer += 1; @@ -1967,19 +1970,22 @@ _Py_UopsSymbol *res; ctx->frame->stack_pointer = stack_pointer; frame_pop(ctx); - stack_pointer = ctx->frame->stack_pointer; + if (ctx->frame) { + stack_pointer = ctx->frame->stack_pointer; + } res = sym_new_unknown(ctx); /* Stack space handling */ assert(corresponding_check_stack == NULL); assert(co != NULL); int framesize = co->co_framesize; assert(framesize > 0); - assert(framesize <= curr_space); + // assert(framesize <= curr_space); curr_space -= framesize; co = get_code(this_instr); if (co == NULL) { // might be impossible, but bailing is still safe ctx->done = true; + break; } stack_pointer[0] = res; stack_pointer += 1; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 40cbf95e3d6d39..1547f9065c97e0 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -415,8 +415,8 @@ _Py_uop_frame_pop(_Py_UOpsContext *ctx) _Py_UOpsAbstractFrame *frame = ctx->frame; ctx->n_consumed = frame->locals; ctx->curr_frame_depth--; - assert(ctx->curr_frame_depth >= 1); - ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; + assert(ctx->curr_frame_depth >= 0); + ctx->frame = ctx->curr_frame_depth >= 1 ? &ctx->frames[ctx->curr_frame_depth - 1] : NULL; return 0; } diff --git a/Python/specialize.c b/Python/specialize.c index c354a9079019ac..a058c9ee7eae22 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2379,8 +2379,8 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) } else if (tp == &PyGen_Type && oparg <= SHRT_MAX) { assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || - instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR - ); + instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR || + instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == ENTER_EXECUTOR); if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_OTHER); goto failure;