From a812905eac488aba4a051c32c4124897f073a407 Mon Sep 17 00:00:00 2001 From: "T. Wouters" Date: Wed, 11 Mar 2026 15:46:16 +0100 Subject: [PATCH 1/2] gh-142183: Cache one datachunk per tstate to prevent alloc/dealloc thrashing (GH-145789) Cache one datachunk per tstate to prevent alloc/dealloc thrashing when repeatedly hitting the same call depth at exactly the wrong boundary. --------- (cherry picked from commit 706fd4ec08acbf1b1def3630017ebe55d224adfa) Co-authored-by: T. Wouters Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- Include/cpython/pystate.h | 1 + ...-03-11-00-13-59.gh-issue-142183.2iVhJH.rst | 1 + Python/pystate.c | 30 ++++++++++++++++--- 3 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-03-11-00-13-59.gh-issue-142183.2iVhJH.rst diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index f428396411c5e5..3f5e054075c233 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -180,6 +180,7 @@ struct _ts { _PyStackChunk *datastack_chunk; PyObject **datastack_top; PyObject **datastack_limit; + _PyStackChunk *datastack_cached_chunk; /* XXX signal handlers should also be here */ /* The following fields are here to avoid allocation during init. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-11-00-13-59.gh-issue-142183.2iVhJH.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-11-00-13-59.gh-issue-142183.2iVhJH.rst new file mode 100644 index 00000000000000..827224dc71e827 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-11-00-13-59.gh-issue-142183.2iVhJH.rst @@ -0,0 +1 @@ +Avoid a pathological case where repeated calls at a specific stack depth could be significantly slower. diff --git a/Python/pystate.c b/Python/pystate.c index 7188e5bf361fa5..2b67a40fe4f787 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1575,6 +1575,7 @@ init_threadstate(_PyThreadStateImpl *_tstate, tstate->datastack_chunk = NULL; tstate->datastack_top = NULL; tstate->datastack_limit = NULL; + tstate->datastack_cached_chunk = NULL; tstate->what_event = -1; tstate->current_executor = NULL; tstate->dict_global_version = 0; @@ -1714,6 +1715,11 @@ clear_datastack(PyThreadState *tstate) _PyObject_VirtualFree(chunk, chunk->size); chunk = prev; } + if (tstate->datastack_cached_chunk != NULL) { + _PyObject_VirtualFree(tstate->datastack_cached_chunk, + tstate->datastack_cached_chunk->size); + tstate->datastack_cached_chunk = NULL; + } } void @@ -3029,9 +3035,20 @@ push_chunk(PyThreadState *tstate, int size) while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) { allocate_size *= 2; } - _PyStackChunk *new = allocate_chunk(allocate_size, tstate->datastack_chunk); - if (new == NULL) { - return NULL; + _PyStackChunk *new; + if (tstate->datastack_cached_chunk != NULL + && (size_t)allocate_size <= tstate->datastack_cached_chunk->size) + { + new = tstate->datastack_cached_chunk; + tstate->datastack_cached_chunk = NULL; + new->previous = tstate->datastack_chunk; + new->top = 0; + } + else { + new = allocate_chunk(allocate_size, tstate->datastack_chunk); + if (new == NULL) { + return NULL; + } } if (tstate->datastack_chunk) { tstate->datastack_chunk->top = tstate->datastack_top - @@ -3067,12 +3084,17 @@ _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame) if (base == &tstate->datastack_chunk->data[0]) { _PyStackChunk *chunk = tstate->datastack_chunk; _PyStackChunk *previous = chunk->previous; + _PyStackChunk *cached = tstate->datastack_cached_chunk; // push_chunk ensures that the root chunk is never popped: assert(previous); tstate->datastack_top = &previous->data[previous->top]; tstate->datastack_chunk = previous; - _PyObject_VirtualFree(chunk, chunk->size); tstate->datastack_limit = (PyObject **)(((char *)previous) + previous->size); + chunk->previous = NULL; + if (cached != NULL) { + _PyObject_VirtualFree(cached, cached->size); + } + tstate->datastack_cached_chunk = chunk; } else { assert(tstate->datastack_top); From a07bf7de1531b8a09eb1782f14c63ca71ab2bde4 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Wed, 11 Mar 2026 16:46:36 +0100 Subject: [PATCH 2/2] Move new _ts member to the end to not mess up remote debuggers' ideas of the struct's layout. (The struct is only created by the runtime, and the new field only used by the runtime, so it should be safe.) --- Include/cpython/pystate.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 3f5e054075c233..c1b97d428e2e9b 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -180,7 +180,6 @@ struct _ts { _PyStackChunk *datastack_chunk; PyObject **datastack_top; PyObject **datastack_limit; - _PyStackChunk *datastack_cached_chunk; /* XXX signal handlers should also be here */ /* The following fields are here to avoid allocation during init. @@ -209,6 +208,8 @@ struct _ts { */ PyObject *threading_local_sentinel; _PyRemoteDebuggerSupport remote_debugger_support; + + _PyStackChunk *datastack_cached_chunk; }; /* other API */