From 81cd4f4375ebbb0d43eef7139791e740fc672ea6 Mon Sep 17 00:00:00 2001 From: Manjusaka Date: Sat, 28 Jun 2025 17:54:51 +0800 Subject: [PATCH 1/4] gh-134584: Eliminate redundant refcounting from _CALL_STR_1 Signed-off-by: Manjusaka --- Include/internal/pycore_opcode_metadata.h | 2 +- Lib/test/test_capi/test_opt.py | 14 ++++++++++ ...-06-28-17-54-27.gh-issue-134584.EXgPub.rst | 1 + Python/bytecodes.c | 10 +++---- Python/generated_cases.c.h | 27 ++++++++++--------- Python/optimizer_bytecodes.c | 3 ++- Python/optimizer_cases.c.h | 5 +++- 7 files changed, 40 insertions(+), 22 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-28-17-54-27.gh-issue-134584.EXgPub.rst diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index dd1bf2d1d2b51a..88f4880b090883 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1366,7 +1366,7 @@ _PyOpcode_macro_expansion[256] = { [CALL_NON_PY_GENERAL] = { .nuops = 3, .uops = { { _CHECK_IS_NOT_PY_CALLABLE, OPARG_SIMPLE, 3 }, { _CALL_NON_PY_GENERAL, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_PY_EXACT_ARGS] = { .nuops = 8, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_FUNCTION_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _CHECK_STACK_SPACE, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _INIT_CALL_PY_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_PY_GENERAL] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, - [CALL_STR_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_STR_1, OPARG_SIMPLE, 3 }, { _CALL_STR_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, + [CALL_STR_1] = { .nuops = 5, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_STR_1, OPARG_SIMPLE, 3 }, { _CALL_STR_1, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_TUPLE_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TUPLE_1, OPARG_SIMPLE, 3 }, { _CALL_TUPLE_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_TYPE_1] = { .nuops = 3, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TYPE_1, OPARG_SIMPLE, 3 }, { _CALL_TYPE_1, OPARG_SIMPLE, 3 } } }, [CHECK_EG_MATCH] = { .nuops = 1, .uops = { { _CHECK_EG_MATCH, OPARG_SIMPLE, 0 } } }, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e4c9a463855a69..f379c3b758d0cc 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1858,6 +1858,20 @@ def testfunc(n): self.assertNotIn("_GUARD_NOS_NULL", uops) self.assertNotIn("_GUARD_CALLABLE_STR_1", uops) + def test_call_str_1_pop_top(self): + def testfunc(n): + x = 0 + for _ in range(n): + t = str("") + x += 1 if len(t) == 0 else 0 + return x + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_STR_1", uops) + self.assertIn("_POP_TOP_NOP", uops) + def test_call_str_1_result_is_str(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-28-17-54-27.gh-issue-134584.EXgPub.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-28-17-54-27.gh-issue-134584.EXgPub.rst new file mode 100644 index 00000000000000..638c170e262cab --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-28-17-54-27.gh-issue-134584.EXgPub.rst @@ -0,0 +1 @@ +Eliminate redundant refcounting from ``_CALL_STR_1``. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1a5a9ff13a23a5..486a78e9848cff 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4055,17 +4055,14 @@ dummy_func( DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type); } - op(_CALL_STR_1, (callable, null, arg -- res)) { + op(_CALL_STR_1, (callable, null, arg -- res, a)) { PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); + INPUTS_DEAD(); PyObject *res_o = PyObject_Str(arg_o); - DEAD(null); - DEAD(callable); - (void)callable; // Silence compiler warnings about unused variables - (void)null; - PyStackRef_CLOSE(arg); + a = arg; ERROR_IF(res_o == NULL); res = PyStackRef_FromPyObjectSteal(res_o); } @@ -4076,6 +4073,7 @@ dummy_func( _GUARD_NOS_NULL + _GUARD_CALLABLE_STR_1 + _CALL_STR_1 + + POP_TOP + _CHECK_PERIODIC; op(_GUARD_CALLABLE_TUPLE_1, (callable, unused, unused -- callable, unused, unused)) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8f7932f0033c6f..cc9aaefead5245 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4379,6 +4379,8 @@ _PyStackRef callable; _PyStackRef arg; _PyStackRef res; + _PyStackRef a; + _PyStackRef value; /* Skip 1 cache entry */ /* Skip 2 cache entries */ // _GUARD_NOS_NULL @@ -4406,41 +4408,40 @@ PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = PyObject_Str(arg_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - (void)callable; - (void)null; stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(arg); + PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); + a = arg; if (res_o == NULL) { JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); } + // _POP_TOP + { + value = a; + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } // _CHECK_PERIODIC { _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); QSBR_QUIESCENT_STATE(tstate); if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { - stack_pointer[0] = res; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); int err = _Py_HandlePending(tstate); stack_pointer = _PyFrame_GetStackPointer(frame); if (err != 0) { JUMP_TO_LABEL(error); } - stack_pointer += -1; } } - stack_pointer[0] = res; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 3182e8b3b70144..33ca6697053f8d 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -936,7 +936,7 @@ dummy_func(void) { } } - op(_CALL_STR_1, (unused, unused, arg -- res)) { + op(_CALL_STR_1, (unused, unused, arg -- res, a)) { if (sym_matches_type(arg, &PyUnicode_Type)) { // e.g. str('foo') or str(foo) where foo is known to be a string res = arg; @@ -944,6 +944,7 @@ dummy_func(void) { else { res = sym_new_type(ctx, &PyUnicode_Type); } + a = arg; } op(_CALL_ISINSTANCE, (unused, unused, instance, cls -- res)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 8d30df3aa7d429..ab1ecb887cb947 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2339,6 +2339,7 @@ case _CALL_STR_1: { JitOptRef arg; JitOptRef res; + JitOptRef a; arg = stack_pointer[-1]; if (sym_matches_type(arg, &PyUnicode_Type)) { res = arg; @@ -2346,8 +2347,10 @@ else { res = sym_new_type(ctx, &PyUnicode_Type); } + a = arg; stack_pointer[-3] = res; - stack_pointer += -2; + stack_pointer[-2] = a; + stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } From a8057aadb6a22c75429775949c7cb0b6db98eada Mon Sep 17 00:00:00 2001 From: Manjusaka Date: Sat, 28 Jun 2025 18:04:21 +0800 Subject: [PATCH 2/4] fix ci Signed-off-by: Manjusaka --- Python/executor_cases.c.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 46fc164a5b3bc2..37c2a50fe6bce1 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5443,32 +5443,26 @@ case _CALL_STR_1: { _PyStackRef arg; - _PyStackRef null; - _PyStackRef callable; _PyStackRef res; + _PyStackRef a; oparg = CURRENT_OPARG(); arg = stack_pointer[-1]; - null = stack_pointer[-2]; - callable = stack_pointer[-3]; PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = PyObject_Str(arg_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - (void)callable; - (void)null; stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(arg); + PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); + a = arg; if (res_o == NULL) { JUMP_TO_ERROR(); } res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[0] = res; - stack_pointer += 1; + stack_pointer[1] = a; + stack_pointer += 2; assert(WITHIN_STACK_BOUNDS()); break; } From 5636c727b234198935739068c7cb3596a71d9a89 Mon Sep 17 00:00:00 2001 From: Manjusaka Date: Thu, 11 Dec 2025 23:35:24 +0800 Subject: [PATCH 3/4] fix review idea Signed-off-by: Manjusaka --- Python/bytecodes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 486a78e9848cff..0241f7230c10ba 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4060,9 +4060,9 @@ dummy_func( assert(oparg == 1); STAT_INC(CALL, hit); - INPUTS_DEAD(); PyObject *res_o = PyObject_Str(arg_o); a = arg; + INPUTS_DEAD(); ERROR_IF(res_o == NULL); res = PyStackRef_FromPyObjectSteal(res_o); } From 2dcd9eb647b38d2b8b26b4c20bda9d8ae1caf117 Mon Sep 17 00:00:00 2001 From: Manjusaka Date: Sun, 14 Dec 2025 16:44:56 +0800 Subject: [PATCH 4/4] fix review idea Signed-off-by: Manjusaka --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 4 +++- Python/executor_cases.c.h | 4 +--- Python/generated_cases.c.h | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index eb50980dcf838e..ec2eb15151c982 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1129,7 +1129,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [CALL_NON_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_SYNC_SP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_SYNC_SP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, - [CALL_STR_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_STR_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_TUPLE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_TYPE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [CHECK_EG_MATCH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 092aa84c6b96d0..3bd19c82bc4eb4 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -274,7 +274,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_CALLABLE_TYPE_1] = HAS_DEOPT_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_GUARD_CALLABLE_STR_1] = HAS_DEOPT_FLAG, - [_CALL_STR_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_STR_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_GUARD_CALLABLE_TUPLE_1] = HAS_DEOPT_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CHECK_AND_ALLOCATE_OBJECT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a0ccbc591f9d91..3e76dcd77561ce 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4046,9 +4046,11 @@ dummy_func( assert(oparg == 1); STAT_INC(CALL, hit); PyObject *res_o = PyObject_Str(arg_o); + if (res_o == NULL) { + ERROR_NO_POP(); + } a = arg; INPUTS_DEAD(); - ERROR_IF(res_o == NULL); res = PyStackRef_FromPyObjectSteal(res_o); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 85bd30ffce8572..ff73b55bc6aaba 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -12777,13 +12777,11 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); - a = arg; if (res_o == NULL) { - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } + a = arg; res = PyStackRef_FromPyObjectSteal(res_o); _tos_cache1 = a; _tos_cache0 = res; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e634945d1d7370..1d8c01bd1b3501 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4000,10 +4000,10 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); - a = arg; if (res_o == NULL) { - JUMP_TO_LABEL(pop_3_error); + JUMP_TO_LABEL(error); } + a = arg; res = PyStackRef_FromPyObjectSteal(res_o); } // _POP_TOP