From d86950f1dde9cc469f1cd3eec23ae27ab69c0375 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 10 Mar 2026 18:38:58 -0400 Subject: [PATCH] gh-145779: Improve classmethod/staticmethod scaling in free-threaded build Add special cases for classmethod and staticmethod descriptors in _PyObject_GetMethodStackRef() to avoid calling tp_descr_get, which avoids reference count contention on the bound method and underlying callable. This improves scaling when calling classmethods and staticmethods from multiple threads. Refactor method_vectorcall in classobject.c into a new _PyObject_VectorcallPrepend() helper so that it can be used by by PyObject_VectorcallMethod as well. --- Include/internal/pycore_call.h | 8 ++ Include/internal/pycore_function.h | 5 + Include/internal/pycore_object.h | 2 +- ...10-22-38-40.gh-issue-145779.5375381d80.rst | 2 + Objects/call.c | 105 +++++++++++++++--- Objects/classobject.c | 49 +------- Objects/funcobject.c | 8 ++ Objects/object.c | 53 +++++++-- Python/ceval.c | 43 +++---- Tools/ftscalingbench/ftscalingbench.py | 21 ++++ 10 files changed, 193 insertions(+), 103 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-03-10-22-38-40.gh-issue-145779.5375381d80.rst diff --git a/Include/internal/pycore_call.h b/Include/internal/pycore_call.h index 4f4cf02f64b828..e544b4cf49d1fb 100644 --- a/Include/internal/pycore_call.h +++ b/Include/internal/pycore_call.h @@ -65,6 +65,14 @@ PyAPI_FUNC(PyObject*) _PyObject_CallMethod( const char *format, ...); +extern PyObject *_PyObject_VectorcallPrepend( + PyThreadState *tstate, + PyObject *callable, + PyObject *arg, + PyObject *const *args, + size_t nargsf, + PyObject *kwnames); + /* === Vectorcall protocol (PEP 590) ============================= */ // Call callable using tp_call. Arguments are like PyObject_Vectorcall(), diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index 9c2121f59a4a0c..99dacaf0fe7c54 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -46,6 +46,11 @@ static inline PyObject* _PyFunction_GET_BUILTINS(PyObject *func) { #define _PyFunction_GET_BUILTINS(func) _PyFunction_GET_BUILTINS(_PyObject_CAST(func)) +/* Get the callable wrapped by a classmethod. + Returns a borrowed reference. + The caller must ensure 'cm' is a classmethod object. */ +extern PyObject *_PyClassMethod_GetFunc(PyObject *cm); + /* Get the callable wrapped by a staticmethod. Returns a borrowed reference. The caller must ensure 'sm' is a staticmethod object. */ diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 8c241c7707d074..96685af02cd0e3 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -895,7 +895,7 @@ extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *, extern unsigned int _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out); -PyAPI_FUNC(int) _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, +extern int _PyObject_GetMethodStackRef(PyThreadState *ts, _PyStackRef *self, PyObject *name, _PyStackRef *method); // Like PyObject_GetAttr but returns a _PyStackRef. For types, this can diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-10-22-38-40.gh-issue-145779.5375381d80.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-10-22-38-40.gh-issue-145779.5375381d80.rst new file mode 100644 index 00000000000000..9cd0263a107f16 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-10-22-38-40.gh-issue-145779.5375381d80.rst @@ -0,0 +1,2 @@ +Improve scaling of :func:`classmethod` and :func:`staticmethod` calls in +the free-threaded build by avoiding the descriptor ``__get__`` call. diff --git a/Objects/call.c b/Objects/call.c index 4b1b4bd52a2e56..9718642473103c 100644 --- a/Objects/call.c +++ b/Objects/call.c @@ -828,6 +828,60 @@ object_vacall(PyThreadState *tstate, PyObject *base, return result; } +PyObject * +_PyObject_VectorcallPrepend(PyThreadState *tstate, PyObject *callable, + PyObject *arg, PyObject *const *args, + size_t nargsf, PyObject *kwnames) +{ + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + assert(nargs == 0 || args[nargs-1]); + + PyObject *result; + if (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET) { + /* PY_VECTORCALL_ARGUMENTS_OFFSET is set, so we are allowed to mutate the vector */ + PyObject **newargs = (PyObject**)args - 1; + nargs += 1; + PyObject *tmp = newargs[0]; + newargs[0] = arg; + assert(newargs[nargs-1]); + result = _PyObject_VectorcallTstate(tstate, callable, newargs, + nargs, kwnames); + newargs[0] = tmp; + } + else { + Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames); + Py_ssize_t totalargs = nargs + nkwargs; + if (totalargs == 0) { + return _PyObject_VectorcallTstate(tstate, callable, &arg, 1, NULL); + } + + PyObject *newargs_stack[_PY_FASTCALL_SMALL_STACK]; + PyObject **newargs; + if (totalargs <= (Py_ssize_t)Py_ARRAY_LENGTH(newargs_stack) - 1) { + newargs = newargs_stack; + } + else { + newargs = PyMem_Malloc((totalargs+1) * sizeof(PyObject *)); + if (newargs == NULL) { + _PyErr_NoMemory(tstate); + return NULL; + } + } + /* use borrowed references */ + newargs[0] = arg; + /* bpo-37138: since totalargs > 0, it's impossible that args is NULL. + * We need this, since calling memcpy() with a NULL pointer is + * undefined behaviour. */ + assert(args != NULL); + memcpy(newargs + 1, args, totalargs * sizeof(PyObject *)); + result = _PyObject_VectorcallTstate(tstate, callable, + newargs, nargs+1, kwnames); + if (newargs != newargs_stack) { + PyMem_Free(newargs); + } + } + return result; +} PyObject * PyObject_VectorcallMethod(PyObject *name, PyObject *const *args, @@ -838,31 +892,44 @@ PyObject_VectorcallMethod(PyObject *name, PyObject *const *args, assert(PyVectorcall_NARGS(nargsf) >= 1); PyThreadState *tstate = _PyThreadState_GET(); - _PyCStackRef method; + _PyCStackRef self, method; + _PyThreadState_PushCStackRef(tstate, &self); _PyThreadState_PushCStackRef(tstate, &method); /* Use args[0] as "self" argument */ - int unbound = _PyObject_GetMethodStackRef(tstate, args[0], name, &method.ref); - if (PyStackRef_IsNull(method.ref)) { + self.ref = PyStackRef_FromPyObjectBorrow(args[0]); + int unbound = _PyObject_GetMethodStackRef(tstate, &self.ref, name, &method.ref); + if (unbound < 0) { _PyThreadState_PopCStackRef(tstate, &method); + _PyThreadState_PopCStackRef(tstate, &self); return NULL; } + PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref); + PyObject *self_obj = PyStackRef_AsPyObjectBorrow(self.ref); + PyObject *result; - if (unbound) { + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_METHOD, callable); + if (self_obj == NULL) { + /* Skip "self". We can keep PY_VECTORCALL_ARGUMENTS_OFFSET since + * args[-1] in the onward call is args[0] here. */ + result = _PyObject_VectorcallTstate(tstate, callable, + args + 1, nargsf - 1, kwnames); + } + else if (self_obj == args[0]) { /* We must remove PY_VECTORCALL_ARGUMENTS_OFFSET since * that would be interpreted as allowing to change args[-1] */ - nargsf &= ~PY_VECTORCALL_ARGUMENTS_OFFSET; + result = _PyObject_VectorcallTstate(tstate, callable, args, + nargsf & ~PY_VECTORCALL_ARGUMENTS_OFFSET, + kwnames); } else { - /* Skip "self". We can keep PY_VECTORCALL_ARGUMENTS_OFFSET since - * args[-1] in the onward call is args[0] here. */ - args++; - nargsf--; + /* classmethod: self_obj is the type, not args[0]. Replace + * args[0] with self_obj and call the underlying callable. */ + result = _PyObject_VectorcallPrepend(tstate, callable, self_obj, + args + 1, nargsf - 1, kwnames); } - EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_METHOD, callable); - PyObject *result = _PyObject_VectorcallTstate(tstate, callable, - args, nargsf, kwnames); _PyThreadState_PopCStackRef(tstate, &method); + _PyThreadState_PopCStackRef(tstate, &self); return result; } @@ -875,22 +942,26 @@ PyObject_CallMethodObjArgs(PyObject *obj, PyObject *name, ...) return null_error(tstate); } - _PyCStackRef method; + _PyCStackRef self, method; + _PyThreadState_PushCStackRef(tstate, &self); _PyThreadState_PushCStackRef(tstate, &method); - int is_method = _PyObject_GetMethodStackRef(tstate, obj, name, &method.ref); - if (PyStackRef_IsNull(method.ref)) { + self.ref = PyStackRef_FromPyObjectBorrow(obj); + int res = _PyObject_GetMethodStackRef(tstate, &self.ref, name, &method.ref); + if (res < 0) { _PyThreadState_PopCStackRef(tstate, &method); + _PyThreadState_PopCStackRef(tstate, &self); return NULL; } PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref); - obj = is_method ? obj : NULL; + PyObject *self_obj = PyStackRef_AsPyObjectBorrow(self.ref); va_list vargs; va_start(vargs, name); - PyObject *result = object_vacall(tstate, obj, callable, vargs); + PyObject *result = object_vacall(tstate, self_obj, callable, vargs); va_end(vargs); _PyThreadState_PopCStackRef(tstate, &method); + _PyThreadState_PopCStackRef(tstate, &self); return result; } diff --git a/Objects/classobject.c b/Objects/classobject.c index 4c99c194df53a5..238f1c1dad7d86 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -52,54 +52,7 @@ method_vectorcall(PyObject *method, PyObject *const *args, PyThreadState *tstate = _PyThreadState_GET(); PyObject *self = PyMethod_GET_SELF(method); PyObject *func = PyMethod_GET_FUNCTION(method); - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); - assert(nargs == 0 || args[nargs-1]); - - PyObject *result; - if (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET) { - /* PY_VECTORCALL_ARGUMENTS_OFFSET is set, so we are allowed to mutate the vector */ - PyObject **newargs = (PyObject**)args - 1; - nargs += 1; - PyObject *tmp = newargs[0]; - newargs[0] = self; - assert(newargs[nargs-1]); - result = _PyObject_VectorcallTstate(tstate, func, newargs, - nargs, kwnames); - newargs[0] = tmp; - } - else { - Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames); - Py_ssize_t totalargs = nargs + nkwargs; - if (totalargs == 0) { - return _PyObject_VectorcallTstate(tstate, func, &self, 1, NULL); - } - - PyObject *newargs_stack[_PY_FASTCALL_SMALL_STACK]; - PyObject **newargs; - if (totalargs <= (Py_ssize_t)Py_ARRAY_LENGTH(newargs_stack) - 1) { - newargs = newargs_stack; - } - else { - newargs = PyMem_Malloc((totalargs+1) * sizeof(PyObject *)); - if (newargs == NULL) { - _PyErr_NoMemory(tstate); - return NULL; - } - } - /* use borrowed references */ - newargs[0] = self; - /* bpo-37138: since totalargs > 0, it's impossible that args is NULL. - * We need this, since calling memcpy() with a NULL pointer is - * undefined behaviour. */ - assert(args != NULL); - memcpy(newargs + 1, args, totalargs * sizeof(PyObject *)); - result = _PyObject_VectorcallTstate(tstate, func, - newargs, nargs+1, kwnames); - if (newargs != newargs_stack) { - PyMem_Free(newargs); - } - } - return result; + return _PyObject_VectorcallPrepend(tstate, func, self, args, nargsf, kwnames); } diff --git a/Objects/funcobject.c b/Objects/funcobject.c index fc32826fb3a861..ee25a48dd6f406 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1470,6 +1470,7 @@ cm_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (cm == NULL) { return NULL; } + _PyObject_SetDeferredRefcount((PyObject *)cm); if (cm_set_callable(cm, callable) < 0) { Py_DECREF(cm); return NULL; @@ -1906,6 +1907,13 @@ PyStaticMethod_New(PyObject *callable) return (PyObject *)sm; } +PyObject * +_PyClassMethod_GetFunc(PyObject *self) +{ + classmethod *cm = _PyClassMethod_CAST(self); + return cm->cm_callable; +} + PyObject * _PyStaticMethod_GetFunc(PyObject *self) { diff --git a/Objects/object.c b/Objects/object.c index e405963614689f..ae6ad558ff6c37 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -10,6 +10,7 @@ #include "pycore_descrobject.h" // _PyMethodWrapper_Type #include "pycore_dict.h" // _PyObject_MaterializeManagedDict() #include "pycore_floatobject.h" // _PyFloat_DebugMallocStats() +#include "pycore_function.h" // _PyClassMethod_GetFunc() #include "pycore_freelist.h" // _PyObject_ClearFreeLists() #include "pycore_genobject.h" // _PyAsyncGenAThrow_Type #include "pycore_hamt.h" // _PyHamtItems_Type @@ -1741,27 +1742,39 @@ _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) return 0; } +// Look up a method on `self` by `name`. +// +// On success, `*method` is set and the function returns 0 or 1. If the +// return value is 1, the call is an unbound method and `*self` is the +// "self" or "cls" argument to pass. If the return value is 0, the call is +// a regular function and `*self` is cleared. +// +// On error, returns -1, clears `*self`, and sets an exception. int -_PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, +_PyObject_GetMethodStackRef(PyThreadState *ts, _PyStackRef *self, PyObject *name, _PyStackRef *method) { int meth_found = 0; + PyObject *obj = PyStackRef_AsPyObjectBorrow(*self); assert(PyStackRef_IsNull(*method)); PyTypeObject *tp = Py_TYPE(obj); if (!_PyType_IsReady(tp)) { if (PyType_Ready(tp) < 0) { - return 0; + PyStackRef_CLEAR(*self); + return -1; } } if (tp->tp_getattro != PyObject_GenericGetAttr || !PyUnicode_CheckExact(name)) { PyObject *res = PyObject_GetAttr(obj, name); + PyStackRef_CLEAR(*self); if (res != NULL) { *method = PyStackRef_FromPyObjectSteal(res); + return 0; } - return 0; + return -1; } _PyType_LookupStackRefAndVersion(tp, name, method); @@ -1776,10 +1789,12 @@ _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, if (f != NULL && PyDescr_IsData(descr)) { PyObject *value = f(descr, obj, (PyObject *)Py_TYPE(obj)); PyStackRef_CLEAR(*method); + PyStackRef_CLEAR(*self); if (value != NULL) { *method = PyStackRef_FromPyObjectSteal(value); + return 0; } - return 0; + return -1; } } } @@ -1787,9 +1802,9 @@ _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && _PyObject_TryGetInstanceAttribute(obj, name, &attr)) { if (attr != NULL) { - PyStackRef_CLEAR(*method); - *method = PyStackRef_FromPyObjectSteal(attr); - return 0; + PyStackRef_XSETREF(*method, PyStackRef_FromPyObjectSteal(attr)); + PyStackRef_CLEAR(*self); + return 0; } dict = NULL; } @@ -1810,9 +1825,11 @@ _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, int found = _PyDict_GetMethodStackRef((PyDictObject *)dict, name, method); if (found < 0) { assert(PyStackRef_IsNull(*method)); + PyStackRef_CLEAR(*self); return -1; } else if (found) { + PyStackRef_CLEAR(*self); return 0; } } @@ -1823,16 +1840,31 @@ _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, } if (f != NULL) { - PyObject *value = f(descr, obj, (PyObject *)Py_TYPE(obj)); + if (Py_IS_TYPE(descr, &PyClassMethod_Type)) { + PyObject *callable = _PyClassMethod_GetFunc(descr); + PyStackRef_XSETREF(*method, PyStackRef_FromPyObjectNew(callable)); + PyStackRef_XSETREF(*self, PyStackRef_FromPyObjectNew((PyObject *)tp)); + return 1; + } + else if (Py_IS_TYPE(descr, &PyStaticMethod_Type)) { + PyObject *callable = _PyStaticMethod_GetFunc(descr); + PyStackRef_XSETREF(*method, PyStackRef_FromPyObjectNew(callable)); + PyStackRef_CLEAR(*self); + return 0; + } + PyObject *value = f(descr, obj, (PyObject *)tp); PyStackRef_CLEAR(*method); + PyStackRef_CLEAR(*self); if (value) { *method = PyStackRef_FromPyObjectSteal(value); + return 0; } - return 0; + return -1; } if (descr != NULL) { assert(!PyStackRef_IsNull(*method)); + PyStackRef_CLEAR(*self); return 0; } @@ -1842,7 +1874,8 @@ _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj, _PyObject_SetAttributeErrorContext(obj, name); assert(PyStackRef_IsNull(*method)); - return 0; + PyStackRef_CLEAR(*self); + return -1; } diff --git a/Python/ceval.c b/Python/ceval.c index 950050a6027116..16d5c6cb2c667a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -437,13 +437,15 @@ _PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys) // - Atomically check for a key and get its value without error handling. // - Don't cause key creation or resizing in dict subclasses like // collections.defaultdict that define __missing__ (or similar). - _PyCStackRef cref; - _PyThreadState_PushCStackRef(tstate, &cref); - int meth_found = _PyObject_GetMethodStackRef(tstate, map, &_Py_ID(get), &cref.ref); - PyObject *get = PyStackRef_AsPyObjectBorrow(cref.ref); - if (get == NULL) { + _PyCStackRef self, method; + _PyThreadState_PushCStackRef(tstate, &self); + _PyThreadState_PushCStackRef(tstate, &method); + self.ref = PyStackRef_FromPyObjectBorrow(map); + int res = _PyObject_GetMethodStackRef(tstate, &self.ref, &_Py_ID(get), &method.ref); + if (res < 0) { goto fail; } + PyObject *get = PyStackRef_AsPyObjectBorrow(method.ref); seen = PySet_New(NULL); if (seen == NULL) { goto fail; @@ -467,9 +469,10 @@ _PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys) } goto fail; } - PyObject *args[] = { map, key, dummy }; + PyObject *self_obj = PyStackRef_AsPyObjectBorrow(self.ref); + PyObject *args[] = { self_obj, key, dummy }; PyObject *value = NULL; - if (meth_found) { + if (!PyStackRef_IsNull(self.ref)) { value = PyObject_Vectorcall(get, args, 3, NULL); } else { @@ -490,12 +493,14 @@ _PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys) } // Success: done: - _PyThreadState_PopCStackRef(tstate, &cref); + _PyThreadState_PopCStackRef(tstate, &method); + _PyThreadState_PopCStackRef(tstate, &self); Py_DECREF(seen); Py_DECREF(dummy); return values; fail: - _PyThreadState_PopCStackRef(tstate, &cref); + _PyThreadState_PopCStackRef(tstate, &method); + _PyThreadState_PopCStackRef(tstate, &self); Py_XDECREF(seen); Py_XDECREF(dummy); Py_XDECREF(values); @@ -1022,24 +1027,8 @@ _Py_LoadAttr_StackRefSteal( { _PyCStackRef method; _PyThreadState_PushCStackRef(tstate, &method); - int is_meth = _PyObject_GetMethodStackRef(tstate, PyStackRef_AsPyObjectBorrow(owner), name, &method.ref); - if (is_meth) { - /* We can bypass temporary bound method object. - meth is unbound method and obj is self. - meth | self | arg1 | ... | argN - */ - assert(!PyStackRef_IsNull(method.ref)); // No errors on this branch - self_or_null[0] = owner; // Transfer ownership - return _PyThreadState_PopCStackRefSteal(tstate, &method); - } - /* meth is not an unbound method (but a regular attr, or - something was returned by a descriptor protocol). Set - the second element of the stack to NULL, to signal - CALL that it's not a method call. - meth | NULL | arg1 | ... | argN - */ - PyStackRef_CLOSE(owner); - self_or_null[0] = PyStackRef_NULL; + *self_or_null = owner; + _PyObject_GetMethodStackRef(tstate, self_or_null, name, &method.ref); return _PyThreadState_PopCStackRefSteal(tstate, &method); } diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index 8d8bbc88e7f30a..bcbd61f601a7d3 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -258,6 +258,27 @@ def method(self): obj.method() +class MyClassMethod: + @classmethod + def my_classmethod(cls): + return cls + + @staticmethod + def my_staticmethod(): + pass + +@register_benchmark +def classmethod_call(): + obj = MyClassMethod() + for _ in range(1000 * WORK_SCALE): + obj.my_classmethod() + +@register_benchmark +def staticmethod_call(): + obj = MyClassMethod() + for _ in range(1000 * WORK_SCALE): + obj.my_staticmethod() + @register_benchmark def deepcopy(): x = {'list': [1, 2], 'tuple': (1, None)}