Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Include/internal/pycore_interp_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ extern "C" {
#include "pycore_structs.h" // PyHamtObject
#include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_typedefs.h" // _PyRuntimeState
#include "pycore_uop.h" // _PyBloomFilter

#define CODE_MAX_WATCHERS 8
#define CONTEXT_MAX_WATCHERS 8
Expand Down Expand Up @@ -972,7 +973,10 @@ struct _is {

// Optimization configuration (thresholds and flags for JIT and interpreter)
_PyOptimizationConfig opt_config;
struct _PyExecutorObject *executor_list_head;
_PyBloomFilter *executor_blooms; // Contiguous bloom filter array
struct _PyExecutorObject **executor_ptrs; // Corresponding executor pointer array
size_t executor_count; // Number of valid executors
size_t executor_capacity; // Array capacity
struct _PyExecutorObject *executor_deletion_list_head;
struct _PyExecutorObject *cold_executor;
struct _PyExecutorObject *cold_dynamic_executor;
Expand Down
6 changes: 3 additions & 3 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ typedef struct {
bool cold;
uint8_t pending_deletion;
int32_t index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
_PyBloomFilter bloom;
_PyExecutorLinkListNode links;
int32_t bloom_array_idx; // Index in interp->executor_blooms/executor_ptrs.
_PyExecutorLinkListNode links; // Used by deletion list.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this necessary now? We can traverse all executors using the executor_ptrs array.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need it to save deletion list:

cpython/Python/optimizer.c

Lines 332 to 338 in 08a018e

static void
uop_dealloc(PyObject *op) {
_PyExecutorObject *self = _PyExecutorObject_CAST(op);
executor_invalidate(op);
assert(self->vm_data.code == NULL);
add_to_pending_deletion_list(self);
}

PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR).
} _PyVMData;

Expand Down Expand Up @@ -157,7 +157,7 @@ typedef struct _PyExecutorObject {
// Export for '_opcode' shared extension (JIT compiler).
PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset);

void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
int _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
void _Py_ExecutorDetach(_PyExecutorObject *);
void _Py_BloomFilter_Init(_PyBloomFilter *);
void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
Expand Down
4 changes: 2 additions & 2 deletions InternalDocs/jit.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ and execution returns to the adaptive interpreter.
## Invalidating Executors

In addition to being stored on the code object, each executor is also
inserted into a list of all executors, which is stored in the interpreter
state's `executor_list_head` field. This list is used when it is necessary
inserted into contiguous arrays (`executor_blooms` and `executor_ptrs`)
stored in the interpreter state. These arrays are used when it is necessary
to invalidate executors because values they used in their construction may
have changed.

Expand Down
6 changes: 2 additions & 4 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,10 +278,8 @@ get_jit_code_ranges(PyObject *self, PyObject *Py_UNUSED(args))
if (interp == NULL) {
return ranges;
}
for (_PyExecutorObject *exec = interp->executor_list_head;
exec != NULL;
exec = exec->vm_data.links.next)
{
for (size_t i = 0; i < interp->executor_count; i++) {
_PyExecutorObject *exec = interp->executor_ptrs[i];
if (exec->jit_code == NULL || exec->jit_size == 0) {
continue;
}
Expand Down
1 change: 0 additions & 1 deletion Objects/funcobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_stats.h"
#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS()
#include "pycore_optimizer.h" // _Py_Executors_InvalidateDependency

static const char *
func_event_name(PyFunction_WatchEvent event) {
Expand Down
19 changes: 18 additions & 1 deletion Python/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,23 @@ jit_error(const char *message)

static size_t _Py_jit_shim_size = 0;

static int
address_in_executor_array(_PyExecutorObject **ptrs, size_t count, uintptr_t addr)
{
for (size_t i = 0; i < count; i++) {
_PyExecutorObject *exec = ptrs[i];
if (exec->jit_code == NULL || exec->jit_size == 0) {
continue;
}
uintptr_t start = (uintptr_t)exec->jit_code;
uintptr_t end = start + exec->jit_size;
if (addr >= start && addr < end) {
return 1;
}
}
return 0;
}

static int
address_in_executor_list(_PyExecutorObject *head, uintptr_t addr)
{
Expand Down Expand Up @@ -94,7 +111,7 @@ _PyJIT_AddressInJitCode(PyInterpreterState *interp, uintptr_t addr)
return 1;
}
}
if (address_in_executor_list(interp->executor_list_head, addr)) {
if (address_in_executor_array(interp->executor_ptrs, interp->executor_count, addr)) {
return 1;
}
if (address_in_executor_list(interp->executor_deletion_list_head, addr)) {
Expand Down
121 changes: 63 additions & 58 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1379,7 +1379,10 @@ make_executor_from_uops(_PyThreadStateImpl *tstate, _PyUOpInstruction *buffer, i
// linking of executor. Otherwise, the GC tries to untrack a
// still untracked object during dealloc.
_PyObject_GC_TRACK(executor);
_Py_ExecutorInit(executor, dependencies);
if (_Py_ExecutorInit(executor, dependencies) < 0) {
Py_DECREF(executor);
return NULL;
}
#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
int lltrace = 0;
Expand Down Expand Up @@ -1646,59 +1649,63 @@ bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes)
return true;
}

static void
link_executor(_PyExecutorObject *executor)
static int
link_executor(_PyExecutorObject *executor, const _PyBloomFilter *bloom)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyExecutorLinkListNode *links = &executor->vm_data.links;
_PyExecutorObject *head = interp->executor_list_head;
if (head == NULL) {
interp->executor_list_head = executor;
links->previous = NULL;
links->next = NULL;
}
else {
assert(head->vm_data.links.previous == NULL);
links->previous = NULL;
links->next = head;
head->vm_data.links.previous = executor;
interp->executor_list_head = executor;
}
/* executor_list_head must be first in list */
assert(interp->executor_list_head->vm_data.links.previous == NULL);
if (interp->executor_count == interp->executor_capacity) {
size_t new_cap = interp->executor_capacity ? interp->executor_capacity * 2 : 64;
_PyBloomFilter *new_blooms = PyMem_Realloc(
interp->executor_blooms, new_cap * sizeof(_PyBloomFilter));
if (new_blooms == NULL) {
return -1;
}
_PyExecutorObject **new_ptrs = PyMem_Realloc(
interp->executor_ptrs, new_cap * sizeof(_PyExecutorObject *));
if (new_ptrs == NULL) {
/* Revert blooms realloc — the old pointer may have been freed by
* a successful realloc, but new_blooms is the valid pointer. */
interp->executor_blooms = new_blooms;
return -1;
}
interp->executor_blooms = new_blooms;
interp->executor_ptrs = new_ptrs;
interp->executor_capacity = new_cap;
}
size_t idx = interp->executor_count++;
interp->executor_blooms[idx] = *bloom;
interp->executor_ptrs[idx] = executor;
executor->vm_data.bloom_array_idx = (int32_t)idx;
return 0;
}

static void
unlink_executor(_PyExecutorObject *executor)
{
_PyExecutorLinkListNode *links = &executor->vm_data.links;
_PyExecutorObject *next = links->next;
_PyExecutorObject *prev = links->previous;
if (next != NULL) {
next->vm_data.links.previous = prev;
}
if (prev != NULL) {
prev->vm_data.links.next = next;
}
else {
// prev == NULL implies that executor is the list head
PyInterpreterState *interp = PyInterpreterState_Get();
assert(interp->executor_list_head == executor);
interp->executor_list_head = next;
PyInterpreterState *interp = PyInterpreterState_Get();
int32_t idx = executor->vm_data.bloom_array_idx;
assert(idx >= 0 && (size_t)idx < interp->executor_count);
size_t last = --interp->executor_count;
if ((size_t)idx != last) {
/* Swap-remove: move the last element into the vacated slot */
interp->executor_blooms[idx] = interp->executor_blooms[last];
interp->executor_ptrs[idx] = interp->executor_ptrs[last];
interp->executor_ptrs[idx]->vm_data.bloom_array_idx = idx;
}
executor->vm_data.bloom_array_idx = -1;
}

/* This must be called by optimizers before using the executor */
void
int
_Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_set)
{
executor->vm_data.valid = true;
executor->vm_data.pending_deletion = 0;
executor->vm_data.code = NULL;
for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) {
executor->vm_data.bloom.bits[i] = dependency_set->bits[i];
if (link_executor(executor, dependency_set) < 0) {
return -1;
}
link_executor(executor);
return 0;
}

static _PyExecutorObject *
Expand Down Expand Up @@ -1809,35 +1816,36 @@ void
_Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj)
{
assert(executor->vm_data.valid);
_Py_BloomFilter_Add(&executor->vm_data.bloom, obj);
PyInterpreterState *interp = _PyInterpreterState_GET();
int32_t idx = executor->vm_data.bloom_array_idx;
assert(idx >= 0 && (size_t)idx < interp->executor_count);
_Py_BloomFilter_Add(&interp->executor_blooms[idx], obj);
}

/* Invalidate all executors that depend on `obj`
* May cause other executors to be invalidated as well
* May cause other executors to be invalidated as well.
* Uses contiguous bloom filter array for cache-friendly scanning.
*/
void
_Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation)
{
_PyBloomFilter obj_filter;
_Py_BloomFilter_Init(&obj_filter);
_Py_BloomFilter_Add(&obj_filter, obj);
/* Walk the list of executors */
/* TO DO -- Use a tree to avoid traversing as many objects */
/* Scan contiguous bloom filter array */
PyObject *invalidate = PyList_New(0);
if (invalidate == NULL) {
goto error;
}
/* Clearing an executor can clear others, so we need to make a list of
* executors to invalidate first */
for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
assert(exec->vm_data.valid);
_PyExecutorObject *next = exec->vm_data.links.next;
if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter) &&
PyList_Append(invalidate, (PyObject *)exec))
for (size_t i = 0; i < interp->executor_count; i++) {
assert(interp->executor_ptrs[i]->vm_data.valid);
if (bloom_filter_may_contain(&interp->executor_blooms[i], &obj_filter) &&
PyList_Append(invalidate, (PyObject *)interp->executor_ptrs[i]))
{
goto error;
}
exec = next;
}
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
PyObject *exec = PyList_GET_ITEM(invalidate, i);
Expand All @@ -1859,8 +1867,9 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is
void
_Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
{
while (interp->executor_list_head) {
_PyExecutorObject *executor = interp->executor_list_head;
while (interp->executor_count > 0) {
/* Invalidate from the end to avoid repeated swap-remove shifts */
_PyExecutorObject *executor = interp->executor_ptrs[interp->executor_count - 1];
assert(executor->vm_data.valid);
if (executor->vm_data.code) {
// Clear the entire code object so its co_executors array be freed:
Expand All @@ -1878,27 +1887,24 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
void
_Py_Executors_InvalidateCold(PyInterpreterState *interp)
{
/* Walk the list of executors */
/* TO DO -- Use a tree to avoid traversing as many objects */
/* Scan contiguous executor array */
PyObject *invalidate = PyList_New(0);
if (invalidate == NULL) {
goto error;
}

/* Clearing an executor can deallocate others, so we need to make a list of
* executors to invalidate first */
for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
for (size_t i = 0; i < interp->executor_count; i++) {
_PyExecutorObject *exec = interp->executor_ptrs[i];
assert(exec->vm_data.valid);
_PyExecutorObject *next = exec->vm_data.links.next;

if (exec->vm_data.cold && PyList_Append(invalidate, (PyObject *)exec) < 0) {
goto error;
}
else {
exec->vm_data.cold = true;
}

exec = next;
}
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
PyObject *exec = PyList_GET_ITEM(invalidate, i);
Expand Down Expand Up @@ -2142,9 +2148,8 @@ _PyDumpExecutors(FILE *out)
fprintf(out, " rankdir = \"LR\"\n\n");
fprintf(out, " node [colorscheme=greys9]\n");
PyInterpreterState *interp = PyInterpreterState_Get();
for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
executor_to_gv(exec, out);
exec = exec->vm_data.links.next;
for (size_t i = 0; i < interp->executor_count; i++) {
executor_to_gv(interp->executor_ptrs[i], out);
}
fprintf(out, "}\n\n");
return 0;
Expand Down
6 changes: 6 additions & 0 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1761,6 +1761,12 @@ finalize_modules(PyThreadState *tstate)
interp->compiling = false;
#ifdef _Py_TIER2
_Py_Executors_InvalidateAll(interp, 0);
PyMem_Free(interp->executor_blooms);
PyMem_Free(interp->executor_ptrs);
interp->executor_blooms = NULL;
interp->executor_ptrs = NULL;
interp->executor_count = 0;
interp->executor_capacity = 0;
#endif

// Stop watching __builtin__ modifications
Expand Down
5 changes: 4 additions & 1 deletion Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,10 @@ init_interpreter(PyInterpreterState *interp,
interp->_code_object_generation = 0;
interp->jit = false;
interp->compiling = false;
interp->executor_list_head = NULL;
interp->executor_blooms = NULL;
interp->executor_ptrs = NULL;
interp->executor_count = 0;
interp->executor_capacity = 0;
interp->executor_deletion_list_head = NULL;
interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD;

Expand Down
Loading