Skip to content

Commit d76df75

Browse files
authored
gh-145615: Fix mimalloc page leak in the free-threaded build (gh-145626)
Fix three issues that caused mimalloc pages to be leaked until the owning thread exited: 1. In _PyMem_mi_page_maybe_free(), move pages out of the full queue when relying on QSBR to defer freeing the page. Pages in the full queue are never searched by mi_page_queue_find_free_ex(), so a page left there is unusable for allocations. 2. Move _PyMem_mi_page_clear_qsbr() from _mi_page_free_collect() to _mi_page_thread_free_collect() where it only fires when all blocks on the page are free (used == 0). The previous placement was too broad: it cleared QSBR state whenever local_free was non-NULL, but _mi_page_free_collect() is called from non-allocation paths (e.g., page visiting in mi_heap_visit_blocks) where the page is not being reused. 3. In _PyMem_mi_page_maybe_free(), use the page's heap tld to find the correct thread state for QSBR list insertion instead of PyThreadState_GET(). During stop-the-world pauses, the function may process pages belonging to other threads, so the current thread state is not necessarily the owner of the page.
1 parent 171133a commit d76df75

File tree

5 files changed

+40
-10
lines changed

5 files changed

+40
-10
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fixed a memory leak in the :term:`free-threaded build` where mimalloc pages
2+
could become permanently unreclaimable until the owning thread exited.

Objects/mimalloc/heap.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
100100
// note: this will free retired pages as well.
101101
bool freed = _PyMem_mi_page_maybe_free(page, pq, collect >= MI_FORCE);
102102
if (!freed && collect == MI_ABANDON) {
103-
_mi_page_abandon(page, pq);
103+
// _PyMem_mi_page_maybe_free may have moved the page to a different
104+
// page queue, so we need to re-fetch the correct queue.
105+
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
106+
_mi_page_abandon(page, &heap->pages[bin]);
104107
}
105108
}
106109
else if (collect == MI_ABANDON) {

Objects/mimalloc/page.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,13 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
213213

214214
// update counts now
215215
page->used -= count;
216+
217+
if (page->used == 0) {
218+
// The page may have had a QSBR goal set from a previous point when it
219+
// was all-free. That goal is no longer valid because the page was
220+
// allocated from and then freed again by other threads.
221+
_PyMem_mi_page_clear_qsbr(page);
222+
}
216223
}
217224

218225
void _mi_page_free_collect(mi_page_t* page, bool force) {
@@ -225,9 +232,6 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
225232

226233
// and the local free list
227234
if (page->local_free != NULL) {
228-
// any previous QSBR goals are no longer valid because we reused the page
229-
_PyMem_mi_page_clear_qsbr(page);
230-
231235
if mi_likely(page->free == NULL) {
232236
// usual case
233237
page->free = page->local_free;

Objects/mimalloc/segment.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,6 +1286,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
12861286
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
12871287
#ifdef Py_GIL_DISABLED
12881288
page->qsbr_goal = 0;
1289+
mi_assert_internal(page->qsbr_node.next == NULL);
12891290
#endif
12901291
segment->abandoned--;
12911292
slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
@@ -1361,6 +1362,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
13611362
// if everything free by now, free the page
13621363
#ifdef Py_GIL_DISABLED
13631364
page->qsbr_goal = 0;
1365+
mi_assert_internal(page->qsbr_node.next == NULL);
13641366
#endif
13651367
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
13661368
}

Objects/obmalloc.c

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,12 @@ should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
151151
}
152152
return false;
153153
}
154+
155+
static _PyThreadStateImpl *
156+
tstate_from_heap(mi_heap_t *heap)
157+
{
158+
return _Py_CONTAINER_OF(heap->tld, _PyThreadStateImpl, mimalloc.tld);
159+
}
154160
#endif
155161

156162
static bool
@@ -159,23 +165,35 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
159165
#ifdef Py_GIL_DISABLED
160166
assert(mi_page_all_free(page));
161167
if (page->use_qsbr) {
162-
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
163-
if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
168+
struct _qsbr_thread_state *qsbr = ((_PyThreadStateImpl *)PyThreadState_GET())->qsbr;
169+
if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(qsbr, page->qsbr_goal)) {
164170
_PyMem_mi_page_clear_qsbr(page);
165171
_mi_page_free(page, pq, force);
166172
return true;
167173
}
168174

175+
// gh-145615: since we are not freeing this page yet, we want to
176+
// make it available for allocations. Note that the QSBR goal and
177+
// linked list node remain set even if the page is later used for
178+
// an allocation. We only detect and clear the QSBR goal when the
179+
// page becomes empty again (used == 0).
180+
if (mi_page_is_in_full(page)) {
181+
_mi_page_unfull(page);
182+
}
183+
169184
_PyMem_mi_page_clear_qsbr(page);
170185
page->retire_expire = 0;
171186

172-
if (should_advance_qsbr_for_page(tstate->qsbr, page)) {
173-
page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared);
187+
if (should_advance_qsbr_for_page(qsbr, page)) {
188+
page->qsbr_goal = _Py_qsbr_advance(qsbr->shared);
174189
}
175190
else {
176-
page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared);
191+
page->qsbr_goal = _Py_qsbr_shared_next(qsbr->shared);
177192
}
178193

194+
// We may be freeing a page belonging to a different thread during a
195+
// stop-the-world event. Find the _PyThreadStateImpl for the page.
196+
_PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
179197
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
180198
return false;
181199
}
@@ -192,7 +210,8 @@ _PyMem_mi_page_reclaimed(mi_page_t *page)
192210
if (page->qsbr_goal != 0) {
193211
if (mi_page_all_free(page)) {
194212
assert(page->qsbr_node.next == NULL);
195-
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
213+
_PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
214+
assert(tstate == (_PyThreadStateImpl *)_PyThreadState_GET());
196215
page->retire_expire = 0;
197216
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
198217
}

0 commit comments

Comments
 (0)